diff options
Diffstat (limited to 'lib')
465 files changed, 17513 insertions, 14723 deletions
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 4ae8859..808e6fa 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -80,7 +80,7 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy, // First thing is first. We only want to think about integer here, so if // we have something in FP form, recast it as integer. - if (DstEltTy->isFloatingPoint()) { + if (DstEltTy->isFloatingPointTy()) { // Fold to an vector of integers with same size as our FP type. unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits(); const Type *DestIVTy = @@ -95,7 +95,7 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy, // Okay, we know the destination is integer, if the input is FP, convert // it to integer first. - if (SrcEltTy->isFloatingPoint()) { + if (SrcEltTy->isFloatingPointTy()) { unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits(); const Type *SrcIVTy = VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElt); @@ -517,6 +517,42 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, return 0; } +/// CastGEPIndices - If array indices are not pointer-sized integers, +/// explicitly cast them so that they aren't implicitly casted by the +/// getelementptr. +static Constant *CastGEPIndices(Constant *const *Ops, unsigned NumOps, + const Type *ResultTy, + const TargetData *TD) { + if (!TD) return 0; + const Type *IntPtrTy = TD->getIntPtrType(ResultTy->getContext()); + + bool Any = false; + SmallVector<Constant*, 32> NewIdxs; + for (unsigned i = 1; i != NumOps; ++i) { + if ((i == 1 || + !isa<StructType>(GetElementPtrInst::getIndexedType(Ops[0]->getType(), + reinterpret_cast<Value *const *>(Ops+1), + i-1))) && + Ops[i]->getType() != IntPtrTy) { + Any = true; + NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i], + true, + IntPtrTy, + true), + Ops[i], IntPtrTy)); + } else + NewIdxs.push_back(Ops[i]); + } + if (!Any) return 0; + + Constant *C = + ConstantExpr::getGetElementPtr(Ops[0], &NewIdxs[0], NewIdxs.size()); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) + C = Folded; + return C; +} + /// SymbolicallyEvaluateGEP - If we can symbolically evaluate the specified GEP /// constant expression, do so. static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps, @@ -676,10 +712,10 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) { /// ConstantFoldConstantExpression - Attempt to fold the constant expression /// using the specified TargetData. If successful, the constant result is /// result is returned, if not, null is returned. -Constant *llvm::ConstantFoldConstantExpression(ConstantExpr *CE, +Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE, const TargetData *TD) { SmallVector<Constant*, 8> Ops; - for (User::op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i) { + for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i) { Constant *NewC = cast<Constant>(*i); // Recursively fold the ConstantExpr's operands. if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC)) @@ -810,6 +846,8 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, case Instruction::ShuffleVector: return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]); case Instruction::GetElementPtr: + if (Constant *C = CastGEPIndices(Ops, NumOps, DestTy, TD)) + return C; if (Constant *C = SymbolicallyEvaluateGEP(Ops, NumOps, DestTy, TD)) return C; diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp index 4ba837a..258f1db 100644 --- a/lib/Analysis/DebugInfo.cpp +++ b/lib/Analysis/DebugInfo.cpp @@ -725,6 +725,29 @@ DIBasicType DIFactory::CreateBasicTypeEx(DIDescriptor Context, return DIBasicType(MDNode::get(VMContext, &Elts[0], 10)); } +/// CreateArtificialType - Create a new DIType with "artificial" flag set. +DIType DIFactory::CreateArtificialType(DIType Ty) { + if (Ty.isArtificial()) + return Ty; + + SmallVector<Value *, 9> Elts; + MDNode *N = Ty.getNode(); + assert (N && "Unexpected input DIType!"); + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + if (Value *V = N->getOperand(i)) + Elts.push_back(V); + else + Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext))); + } + + unsigned CurFlags = Ty.getFlags(); + CurFlags = CurFlags | DIType::FlagArtificial; + + // Flags are stored at this slot. + Elts[8] = ConstantInt::get(Type::getInt32Ty(VMContext), CurFlags); + + return DIType(MDNode::get(VMContext, Elts.data(), Elts.size())); +} /// CreateDerivedType - Create a derived type like const qualified type, /// pointer, typedef, etc. @@ -794,7 +817,8 @@ DICompositeType DIFactory::CreateCompositeType(unsigned Tag, unsigned Flags, DIType DerivedFrom, DIArray Elements, - unsigned RuntimeLang) { + unsigned RuntimeLang, + MDNode *ContainingType) { Value *Elts[] = { GetTagConstant(Tag), @@ -808,9 +832,10 @@ DICompositeType DIFactory::CreateCompositeType(unsigned Tag, ConstantInt::get(Type::getInt32Ty(VMContext), Flags), DerivedFrom.getNode(), Elements.getNode(), - ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang) + ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang), + ContainingType }; - return DICompositeType(MDNode::get(VMContext, &Elts[0], 12)); + return DICompositeType(MDNode::get(VMContext, &Elts[0], 13)); } @@ -858,7 +883,8 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context, bool isLocalToUnit, bool isDefinition, unsigned VK, unsigned VIndex, - DIType ContainingType) { + DIType ContainingType, + bool isArtificial) { Value *Elts[] = { GetTagConstant(dwarf::DW_TAG_subprogram), @@ -874,9 +900,10 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context, ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition), ConstantInt::get(Type::getInt32Ty(VMContext), (unsigned)VK), ConstantInt::get(Type::getInt32Ty(VMContext), VIndex), - ContainingType.getNode() + ContainingType.getNode(), + ConstantInt::get(Type::getInt1Ty(VMContext), isArtificial) }; - return DISubprogram(MDNode::get(VMContext, &Elts[0], 14)); + return DISubprogram(MDNode::get(VMContext, &Elts[0], 15)); } /// CreateSubprogramDefinition - Create new subprogram descriptor for the @@ -900,9 +927,10 @@ DISubprogram DIFactory::CreateSubprogramDefinition(DISubprogram &SPDeclaration) ConstantInt::get(Type::getInt1Ty(VMContext), true), DeclNode->getOperand(11), // Virtuality DeclNode->getOperand(12), // VIndex - DeclNode->getOperand(13) // Containting Type + DeclNode->getOperand(13), // Containting Type + DeclNode->getOperand(14) // isArtificial }; - return DISubprogram(MDNode::get(VMContext, &Elts[0], 14)); + return DISubprogram(MDNode::get(VMContext, &Elts[0], 15)); } /// CreateGlobalVariable - Create a new descriptor for the specified global. @@ -1033,6 +1061,8 @@ DILocation DIFactory::CreateLocation(unsigned LineNo, unsigned ColumnNo, /// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call. Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D, Instruction *InsertBefore) { + assert(Storage && "no storage passed to dbg.declare"); + assert(D.getNode() && "empty DIVariable passed to dbg.declare"); if (!DeclareFn) DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); @@ -1044,19 +1074,27 @@ Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D, /// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call. Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D, BasicBlock *InsertAtEnd) { + assert(Storage && "no storage passed to dbg.declare"); + assert(D.getNode() && "empty DIVariable passed to dbg.declare"); if (!DeclareFn) DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); Value *Args[] = { MDNode::get(Storage->getContext(), &Storage, 1), D.getNode() }; - return CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd); -} + + // If this block already has a terminator then insert this intrinsic + // before the terminator. + if (TerminatorInst *T = InsertAtEnd->getTerminator()) + return CallInst::Create(DeclareFn, Args, Args+2, "", T); + else + return CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd);} /// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, uint64_t Offset, DIVariable D, Instruction *InsertBefore) { assert(V && "no value passed to dbg.value"); + assert(D.getNode() && "empty DIVariable passed to dbg.value"); if (!ValueFn) ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value); @@ -1071,6 +1109,7 @@ Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, uint64_t Offset, DIVariable D, BasicBlock *InsertAtEnd) { assert(V && "no value passed to dbg.value"); + assert(D.getNode() && "empty DIVariable passed to dbg.value"); if (!ValueFn) ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value); diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp index e803a48..ec94bc8 100644 --- a/lib/Analysis/IPA/GlobalsModRef.cpp +++ b/lib/Analysis/IPA/GlobalsModRef.cpp @@ -486,7 +486,7 @@ GlobalsModRef::alias(const Value *V1, unsigned V1Size, if (GV1 && !NonAddressTakenGlobals.count(GV1)) GV1 = 0; if (GV2 && !NonAddressTakenGlobals.count(GV2)) GV2 = 0; - // If the the two pointers are derived from two different non-addr-taken + // If the two pointers are derived from two different non-addr-taken // globals, or if one is and the other isn't, we know these can't alias. if ((GV1 || GV2) && GV1 != GV2) return NoAlias; diff --git a/lib/Analysis/IPA/Makefile b/lib/Analysis/IPA/Makefile index da719ba..b850c9f 100644 --- a/lib/Analysis/IPA/Makefile +++ b/lib/Analysis/IPA/Makefile @@ -10,7 +10,6 @@ LEVEL = ../../.. LIBRARYNAME = LLVMipa BUILD_ARCHIVE = 1 -CXXFLAGS = -fno-rtti include $(LEVEL)/Makefile.common diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index 38611cc..4ce6868 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Assembly/AsmAnnotationWriter.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -35,42 +36,30 @@ Pass *llvm::createIVUsersPass() { return new IVUsers(); } -/// containsAddRecFromDifferentLoop - Determine whether expression S involves a -/// subexpression that is an AddRec from a loop other than L. An outer loop -/// of L is OK, but not an inner loop nor a disjoint loop. -static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) { - // This is very common, put it first. - if (isa<SCEVConstant>(S)) - return false; - if (const SCEVCommutativeExpr *AE = dyn_cast<SCEVCommutativeExpr>(S)) { - for (unsigned int i=0; i< AE->getNumOperands(); i++) - if (containsAddRecFromDifferentLoop(AE->getOperand(i), L)) - return true; - return false; - } - if (const SCEVAddRecExpr *AE = dyn_cast<SCEVAddRecExpr>(S)) { - if (const Loop *newLoop = AE->getLoop()) { - if (newLoop == L) - return false; - // if newLoop is an outer loop of L, this is OK. - if (newLoop->contains(L)) - return false; +/// CollectSubexprs - Split S into subexpressions which can be pulled out into +/// separate registers. +static void CollectSubexprs(const SCEV *S, + SmallVectorImpl<const SCEV *> &Ops, + ScalarEvolution &SE) { + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + // Break out add operands. + for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); + I != E; ++I) + CollectSubexprs(*I, Ops, SE); + return; + } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + // Split a non-zero base out of an addrec. + if (!AR->getStart()->isZero()) { + CollectSubexprs(AR->getStart(), Ops, SE); + CollectSubexprs(SE.getAddRecExpr(SE.getIntegerSCEV(0, AR->getType()), + AR->getStepRecurrence(SE), + AR->getLoop()), Ops, SE); + return; } - return true; } - if (const SCEVUDivExpr *DE = dyn_cast<SCEVUDivExpr>(S)) - return containsAddRecFromDifferentLoop(DE->getLHS(), L) || - containsAddRecFromDifferentLoop(DE->getRHS(), L); -#if 0 - // SCEVSDivExpr has been backed out temporarily, but will be back; we'll - // need this when it is. - if (const SCEVSDivExpr *DE = dyn_cast<SCEVSDivExpr>(S)) - return containsAddRecFromDifferentLoop(DE->getLHS(), L) || - containsAddRecFromDifferentLoop(DE->getRHS(), L); -#endif - if (const SCEVCastExpr *CE = dyn_cast<SCEVCastExpr>(S)) - return containsAddRecFromDifferentLoop(CE->getOperand(), L); - return false; + + // Otherwise use the value itself. + Ops.push_back(S); } /// getSCEVStartAndStride - Compute the start and stride of this expression, @@ -89,35 +78,42 @@ static bool getSCEVStartAndStride(const SCEV *&SH, Loop *L, Loop *UseLoop, if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(SH)) { for (unsigned i = 0, e = AE->getNumOperands(); i != e; ++i) if (const SCEVAddRecExpr *AddRec = - dyn_cast<SCEVAddRecExpr>(AE->getOperand(i))) { - if (AddRec->getLoop() == L) - TheAddRec = SE->getAddExpr(AddRec, TheAddRec); - else - return false; // Nested IV of some sort? - } else { + dyn_cast<SCEVAddRecExpr>(AE->getOperand(i))) + TheAddRec = SE->getAddExpr(AddRec, TheAddRec); + else Start = SE->getAddExpr(Start, AE->getOperand(i)); - } } else if (isa<SCEVAddRecExpr>(SH)) { TheAddRec = SH; } else { return false; // not analyzable. } - const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(TheAddRec); - if (!AddRec || AddRec->getLoop() != L) return false; + // Break down TheAddRec into its component parts. + SmallVector<const SCEV *, 4> Subexprs; + CollectSubexprs(TheAddRec, Subexprs, *SE); + + // Look for an addrec on the current loop among the parts. + const SCEV *AddRecStride = 0; + for (SmallVectorImpl<const SCEV *>::iterator I = Subexprs.begin(), + E = Subexprs.end(); I != E; ++I) { + const SCEV *S = *I; + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) + if (AR->getLoop() == L) { + *I = AR->getStart(); + AddRecStride = AR->getStepRecurrence(*SE); + break; + } + } + if (!AddRecStride) + return false; + + // Add up everything else into a start value (which may not be + // loop-invariant). + const SCEV *AddRecStart = SE->getAddExpr(Subexprs); // Use getSCEVAtScope to attempt to simplify other loops out of // the picture. - const SCEV *AddRecStart = AddRec->getStart(); AddRecStart = SE->getSCEVAtScope(AddRecStart, UseLoop); - const SCEV *AddRecStride = AddRec->getStepRecurrence(*SE); - - // FIXME: If Start contains an SCEVAddRecExpr from a different loop, other - // than an outer loop of the current loop, reject it. LSR has no concept of - // operating on more than one loop at a time so don't confuse it with such - // expressions. - if (containsAddRecFromDifferentLoop(AddRecStart, L)) - return false; Start = SE->getAddExpr(Start, AddRecStart); @@ -130,7 +126,7 @@ static bool getSCEVStartAndStride(const SCEV *&SH, Loop *L, Loop *UseLoop, DEBUG(dbgs() << "["; WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false); - dbgs() << "] Variable stride: " << *AddRec << "\n"); + dbgs() << "] Variable stride: " << *AddRecStride << "\n"); } Stride = AddRecStride; @@ -246,14 +242,6 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { } if (AddUserToIVUsers) { - IVUsersOfOneStride *StrideUses = IVUsesByStride[Stride]; - if (!StrideUses) { // First occurrence of this stride? - StrideOrder.push_back(Stride); - StrideUses = new IVUsersOfOneStride(Stride); - IVUses.push_back(StrideUses); - IVUsesByStride[Stride] = StrideUses; - } - // Okay, we found a user that we cannot reduce. Analyze the instruction // and decide what to do with it. If we are a use inside of the loop, use // the value before incrementation, otherwise use it after incrementation. @@ -261,27 +249,21 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { // The value used will be incremented by the stride more than we are // expecting, so subtract this off. const SCEV *NewStart = SE->getMinusSCEV(Start, Stride); - StrideUses->addUser(NewStart, User, I); - StrideUses->Users.back().setIsUseOfPostIncrementedValue(true); + IVUses.push_back(new IVStrideUse(this, Stride, NewStart, User, I)); + IVUses.back().setIsUseOfPostIncrementedValue(true); DEBUG(dbgs() << " USING POSTINC SCEV, START=" << *NewStart<< "\n"); } else { - StrideUses->addUser(Start, User, I); + IVUses.push_back(new IVStrideUse(this, Stride, Start, User, I)); } } } return true; } -void IVUsers::AddUser(const SCEV *Stride, const SCEV *Offset, - Instruction *User, Value *Operand) { - IVUsersOfOneStride *StrideUses = IVUsesByStride[Stride]; - if (!StrideUses) { // First occurrence of this stride? - StrideOrder.push_back(Stride); - StrideUses = new IVUsersOfOneStride(Stride); - IVUses.push_back(StrideUses); - IVUsesByStride[Stride] = StrideUses; - } - IVUsesByStride[Stride]->addUser(Offset, User, Operand); +IVStrideUse &IVUsers::AddUser(const SCEV *Stride, const SCEV *Offset, + Instruction *User, Value *Operand) { + IVUses.push_back(new IVStrideUse(this, Stride, Offset, User, Operand)); + return IVUses.back(); } IVUsers::IVUsers() @@ -315,15 +297,15 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { /// value of the OperandValToReplace of the given IVStrideUse. const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &U) const { // Start with zero. - const SCEV *RetVal = SE->getIntegerSCEV(0, U.getParent()->Stride->getType()); + const SCEV *RetVal = SE->getIntegerSCEV(0, U.getStride()->getType()); // Create the basic add recurrence. - RetVal = SE->getAddRecExpr(RetVal, U.getParent()->Stride, L); + RetVal = SE->getAddRecExpr(RetVal, U.getStride(), L); // Add the offset in a separate step, because it may be loop-variant. RetVal = SE->getAddExpr(RetVal, U.getOffset()); // For uses of post-incremented values, add an extra stride to compute // the actual replacement value. if (U.isUseOfPostIncrementedValue()) - RetVal = SE->getAddExpr(RetVal, U.getParent()->Stride); + RetVal = SE->getAddExpr(RetVal, U.getStride()); return RetVal; } @@ -332,9 +314,9 @@ const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &U) const { /// isUseOfPostIncrementedValue flag. const SCEV *IVUsers::getCanonicalExpr(const IVStrideUse &U) const { // Start with zero. - const SCEV *RetVal = SE->getIntegerSCEV(0, U.getParent()->Stride->getType()); + const SCEV *RetVal = SE->getIntegerSCEV(0, U.getStride()->getType()); // Create the basic add recurrence. - RetVal = SE->getAddRecExpr(RetVal, U.getParent()->Stride, L); + RetVal = SE->getAddRecExpr(RetVal, U.getStride(), L); // Add the offset in a separate step, because it may be loop-variant. RetVal = SE->getAddExpr(RetVal, U.getOffset()); return RetVal; @@ -349,24 +331,20 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const { } OS << ":\n"; - for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e; ++Stride) { - std::map<const SCEV *, IVUsersOfOneStride*>::const_iterator SI = - IVUsesByStride.find(StrideOrder[Stride]); - assert(SI != IVUsesByStride.end() && "Stride doesn't exist!"); - OS << " Stride " << *SI->first->getType() << " " << *SI->first << ":\n"; - - for (ilist<IVStrideUse>::const_iterator UI = SI->second->Users.begin(), - E = SI->second->Users.end(); UI != E; ++UI) { - OS << " "; - WriteAsOperand(OS, UI->getOperandValToReplace(), false); - OS << " = "; - OS << *getReplacementExpr(*UI); - if (UI->isUseOfPostIncrementedValue()) - OS << " (post-inc)"; - OS << " in "; - UI->getUser()->print(OS); - OS << '\n'; - } + // Use a defualt AssemblyAnnotationWriter to suppress the default info + // comments, which aren't relevant here. + AssemblyAnnotationWriter Annotator; + for (ilist<IVStrideUse>::const_iterator UI = IVUses.begin(), + E = IVUses.end(); UI != E; ++UI) { + OS << " "; + WriteAsOperand(OS, UI->getOperandValToReplace(), false); + OS << " = " + << *getReplacementExpr(*UI); + if (UI->isUseOfPostIncrementedValue()) + OS << " (post-inc)"; + OS << " in "; + UI->getUser()->print(OS, &Annotator); + OS << '\n'; } } @@ -375,14 +353,12 @@ void IVUsers::dump() const { } void IVUsers::releaseMemory() { - IVUsesByStride.clear(); - StrideOrder.clear(); Processed.clear(); IVUses.clear(); } void IVStrideUse::deleted() { // Remove this user from the list. - Parent->Users.erase(this); + Parent->IVUses.erase(this); // this now dangles! } diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 651c918..972d034 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -25,26 +25,28 @@ unsigned InlineCostAnalyzer::FunctionInfo:: CountCodeReductionForConstant(Value *V) { unsigned Reduction = 0; for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) - if (isa<BranchInst>(*UI)) - Reduction += 40; // Eliminating a conditional branch is a big win - else if (SwitchInst *SI = dyn_cast<SwitchInst>(*UI)) - // Eliminating a switch is a big win, proportional to the number of edges - // deleted. - Reduction += (SI->getNumSuccessors()-1) * 40; - else if (isa<IndirectBrInst>(*UI)) - // Eliminating an indirect branch is a big win. - Reduction += 200; - else if (CallInst *CI = dyn_cast<CallInst>(*UI)) { + if (isa<BranchInst>(*UI) || isa<SwitchInst>(*UI)) { + // We will be able to eliminate all but one of the successors. + const TerminatorInst &TI = cast<TerminatorInst>(**UI); + const unsigned NumSucc = TI.getNumSuccessors(); + unsigned Instrs = 0; + for (unsigned I = 0; I != NumSucc; ++I) + Instrs += TI.getSuccessor(I)->size(); + // We don't know which blocks will be eliminated, so use the average size. + Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc; + } else if (CallInst *CI = dyn_cast<CallInst>(*UI)) { // Turning an indirect call into a direct call is a BIG win - Reduction += CI->getCalledValue() == V ? 500 : 0; + if (CI->getCalledValue() == V) + Reduction += InlineConstants::IndirectCallBonus; } else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) { // Turning an indirect call into a direct call is a BIG win - Reduction += II->getCalledValue() == V ? 500 : 0; + if (II->getCalledValue() == V) + Reduction += InlineConstants::IndirectCallBonus; } else { // Figure out if this instruction will be removed due to simple constant // propagation. Instruction &Inst = cast<Instruction>(**UI); - + // We can't constant propagate instructions which have effects or // read memory. // @@ -53,7 +55,7 @@ unsigned InlineCostAnalyzer::FunctionInfo:: // Unfortunately, we don't know the pointer that may get propagated here, // so we can't make this decision. if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() || - isa<AllocaInst>(Inst)) + isa<AllocaInst>(Inst)) continue; bool AllOperandsConstant = true; @@ -65,7 +67,7 @@ unsigned InlineCostAnalyzer::FunctionInfo:: if (AllOperandsConstant) { // We will get to remove this instruction... - Reduction += 7; + Reduction += InlineConstants::InstrCost; // And any other instructions that use it which become constants // themselves. @@ -87,11 +89,14 @@ unsigned InlineCostAnalyzer::FunctionInfo:: for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ Instruction *I = cast<Instruction>(*UI); if (isa<LoadInst>(I) || isa<StoreInst>(I)) - Reduction += 10; + Reduction += InlineConstants::InstrCost; else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { // If the GEP has variable indices, we won't be able to do much with it. - if (!GEP->hasAllConstantIndices()) - Reduction += CountCodeReductionForAlloca(GEP)+15; + if (GEP->hasAllConstantIndices()) + Reduction += CountCodeReductionForAlloca(GEP); + } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) { + // Track pointer through bitcasts. + Reduction += CountCodeReductionForAlloca(BCI); } else { // If there is some other strange instruction, we're not going to be able // to do much if we inline this. @@ -158,10 +163,11 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) { (F->getName() == "setjmp" || F->getName() == "_setjmp")) NeverInline = true; - // Calls often compile into many machine instructions. Bump up their - // cost to reflect this. - if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction())) - NumInsts += InlineConstants::CallPenalty; + if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction())) { + // Each argument to a call takes on average one instruction to set up. + NumInsts += CS.arg_size(); + ++NumCalls; + } } if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { @@ -223,8 +229,14 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) { if (Metrics.NumRets==1) --Metrics.NumInsts; + // Don't bother calculating argument weights if we are never going to inline + // the function anyway. + if (Metrics.NeverInline) + return; + // Check out all of the arguments to the function, figuring out how much // code can be eliminated if one of the arguments is a constant. + ArgumentWeights.reserve(F->arg_size()); for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) ArgumentWeights.push_back(ArgInfo(CountCodeReductionForConstant(I), CountCodeReductionForAlloca(I))); @@ -313,23 +325,18 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; ++I, ++ArgNo) { // Each argument passed in has a cost at both the caller and the callee - // sides. This favors functions that take many arguments over functions - // that take few arguments. - InlineCost -= 20; - - // If this is a function being passed in, it is very likely that we will be - // able to turn an indirect function call into a direct function call. - if (isa<Function>(I)) - InlineCost -= 100; - + // sides. Measurements show that each argument costs about the same as an + // instruction. + InlineCost -= InlineConstants::InstrCost; + // If an alloca is passed in, inlining this function is likely to allow // significant future optimization possibilities (like scalar promotion, and // scalarization), so encourage the inlining of the function. // - else if (isa<AllocaInst>(I)) { + if (isa<AllocaInst>(I)) { if (ArgNo < CalleeFI.ArgumentWeights.size()) InlineCost -= CalleeFI.ArgumentWeights[ArgNo].AllocaWeight; - + // If this is a constant being passed into the function, use the argument // weights calculated for the callee to determine how much will be folded // away with this information. @@ -341,14 +348,17 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, // Now that we have considered all of the factors that make the call site more // likely to be inlined, look at factors that make us not want to inline it. - + + // Calls usually take a long time, so they make the inlining gain smaller. + InlineCost += CalleeFI.Metrics.NumCalls * InlineConstants::CallPenalty; + // Don't inline into something too big, which would make it bigger. // "size" here is the number of basic blocks, not instructions. // InlineCost += Caller->size()/15; // Look at the size of the callee. Each instruction counts as 5. - InlineCost += CalleeFI.Metrics.NumInsts*5; + InlineCost += CalleeFI.Metrics.NumInsts*InlineConstants::InstrCost; return llvm::InlineCost::get(InlineCost); } diff --git a/lib/Analysis/LiveValues.cpp b/lib/Analysis/LiveValues.cpp index 02ec7d3..1b91d93 100644 --- a/lib/Analysis/LiveValues.cpp +++ b/lib/Analysis/LiveValues.cpp @@ -184,7 +184,7 @@ LiveValues::Memo &LiveValues::compute(const Value *V) { } } - // If the value was never used outside the the block in which it was + // If the value was never used outside the block in which it was // defined, it's killed in that block. if (!LiveOutOfDefBB) M.Killed.insert(DefBB); diff --git a/lib/Analysis/Makefile b/lib/Analysis/Makefile index f61b8aa..4af6d35 100644 --- a/lib/Analysis/Makefile +++ b/lib/Analysis/Makefile @@ -11,7 +11,6 @@ LEVEL = ../.. LIBRARYNAME = LLVMAnalysis DIRS = IPA BUILD_ARCHIVE = 1 -CXXFLAGS = -fno-rtti include $(LEVEL)/Makefile.common diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index b448628..297b588 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -24,7 +24,7 @@ using namespace llvm; // malloc Call Utility Functions. // -/// isMalloc - Returns true if the the value is either a malloc call or a +/// isMalloc - Returns true if the value is either a malloc call or a /// bitcast of the result of a malloc call. bool llvm::isMalloc(const Value *I) { return extractMallocCall(I) || extractMallocCallFromBitCast(I); @@ -183,7 +183,7 @@ Value *llvm::getMallocArraySize(CallInst *CI, const TargetData *TD, // free Call Utility Functions. // -/// isFreeCall - Returns true if the the value is a call to the builtin free() +/// isFreeCall - Returns true if the value is a call to the builtin free() bool llvm::isFreeCall(const Value *I) { const CallInst *CI = dyn_cast<CallInst>(I); if (!CI) diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 2f44913..9ee7d3a 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -214,8 +214,8 @@ bool SCEVCastExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const { SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeID &ID, const SCEV *op, const Type *ty) : SCEVCastExpr(ID, scTruncate, op, ty) { - assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) && - (Ty->isInteger() || isa<PointerType>(Ty)) && + assert((Op->getType()->isIntegerTy() || isa<PointerType>(Op->getType())) && + (Ty->isIntegerTy() || isa<PointerType>(Ty)) && "Cannot truncate non-integer value!"); } @@ -226,8 +226,8 @@ void SCEVTruncateExpr::print(raw_ostream &OS) const { SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeID &ID, const SCEV *op, const Type *ty) : SCEVCastExpr(ID, scZeroExtend, op, ty) { - assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) && - (Ty->isInteger() || isa<PointerType>(Ty)) && + assert((Op->getType()->isIntegerTy() || isa<PointerType>(Op->getType())) && + (Ty->isIntegerTy() || isa<PointerType>(Ty)) && "Cannot zero extend non-integer value!"); } @@ -238,8 +238,8 @@ void SCEVZeroExtendExpr::print(raw_ostream &OS) const { SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeID &ID, const SCEV *op, const Type *ty) : SCEVCastExpr(ID, scSignExtend, op, ty) { - assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) && - (Ty->isInteger() || isa<PointerType>(Ty)) && + assert((Op->getType()->isIntegerTy() || isa<PointerType>(Op->getType())) && + (Ty->isIntegerTy() || isa<PointerType>(Ty)) && "Cannot sign extend non-integer value!"); } @@ -312,6 +312,21 @@ bool SCEVAddRecExpr::isLoopInvariant(const Loop *QueryLoop) const { return true; } +bool +SCEVAddRecExpr::dominates(BasicBlock *BB, DominatorTree *DT) const { + return DT->dominates(L->getHeader(), BB) && + SCEVNAryExpr::dominates(BB, DT); +} + +bool +SCEVAddRecExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const { + // This uses a "dominates" query instead of "properly dominates" query because + // the instruction which produces the addrec's value is a PHI, and a PHI + // effectively properly dominates its entire containing block. + return DT->dominates(L->getHeader(), BB) && + SCEVNAryExpr::properlyDominates(BB, DT); +} + void SCEVAddRecExpr::print(raw_ostream &OS) const { OS << "{" << *Operands[0]; for (unsigned i = 1, e = Operands.size(); i != e; ++i) @@ -321,15 +336,6 @@ void SCEVAddRecExpr::print(raw_ostream &OS) const { OS << ">"; } -void SCEVFieldOffsetExpr::print(raw_ostream &OS) const { - // LLVM struct fields don't have names, so just print the field number. - OS << "offsetof(" << *STy << ", " << FieldNo << ")"; -} - -void SCEVAllocSizeExpr::print(raw_ostream &OS) const { - OS << "sizeof(" << *AllocTy << ")"; -} - bool SCEVUnknown::isLoopInvariant(const Loop *L) const { // All non-instruction values are loop invariant. All instructions are loop // invariant if they are not contained in the specified loop. @@ -356,7 +362,91 @@ const Type *SCEVUnknown::getType() const { return V->getType(); } +bool SCEVUnknown::isSizeOf(const Type *&AllocTy) const { + if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(V)) + if (VCE->getOpcode() == Instruction::PtrToInt) + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0))) + if (CE->getOpcode() == Instruction::GetElementPtr && + CE->getOperand(0)->isNullValue() && + CE->getNumOperands() == 2) + if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(1))) + if (CI->isOne()) { + AllocTy = cast<PointerType>(CE->getOperand(0)->getType()) + ->getElementType(); + return true; + } + + return false; +} + +bool SCEVUnknown::isAlignOf(const Type *&AllocTy) const { + if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(V)) + if (VCE->getOpcode() == Instruction::PtrToInt) + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0))) + if (CE->getOpcode() == Instruction::GetElementPtr && + CE->getOperand(0)->isNullValue()) { + const Type *Ty = + cast<PointerType>(CE->getOperand(0)->getType())->getElementType(); + if (const StructType *STy = dyn_cast<StructType>(Ty)) + if (!STy->isPacked() && + CE->getNumOperands() == 3 && + CE->getOperand(1)->isNullValue()) { + if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(2))) + if (CI->isOne() && + STy->getNumElements() == 2 && + STy->getElementType(0)->isIntegerTy(1)) { + AllocTy = STy->getElementType(1); + return true; + } + } + } + + return false; +} + +bool SCEVUnknown::isOffsetOf(const Type *&CTy, Constant *&FieldNo) const { + if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(V)) + if (VCE->getOpcode() == Instruction::PtrToInt) + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0))) + if (CE->getOpcode() == Instruction::GetElementPtr && + CE->getNumOperands() == 3 && + CE->getOperand(0)->isNullValue() && + CE->getOperand(1)->isNullValue()) { + const Type *Ty = + cast<PointerType>(CE->getOperand(0)->getType())->getElementType(); + // Ignore vector types here so that ScalarEvolutionExpander doesn't + // emit getelementptrs that index into vectors. + if (isa<StructType>(Ty) || isa<ArrayType>(Ty)) { + CTy = Ty; + FieldNo = CE->getOperand(2); + return true; + } + } + + return false; +} + void SCEVUnknown::print(raw_ostream &OS) const { + const Type *AllocTy; + if (isSizeOf(AllocTy)) { + OS << "sizeof(" << *AllocTy << ")"; + return; + } + if (isAlignOf(AllocTy)) { + OS << "alignof(" << *AllocTy << ")"; + return; + } + + const Type *CTy; + Constant *FieldNo; + if (isOffsetOf(CTy, FieldNo)) { + OS << "offsetof(" << *CTy << ", "; + WriteAsOperand(OS, FieldNo, false); + OS << ")"; + return; + } + + // Otherwise just print it normally. WriteAsOperand(OS, V, false); } @@ -515,21 +605,6 @@ namespace { return operator()(LC->getOperand(), RC->getOperand()); } - // Compare offsetof expressions. - if (const SCEVFieldOffsetExpr *LA = dyn_cast<SCEVFieldOffsetExpr>(LHS)) { - const SCEVFieldOffsetExpr *RA = cast<SCEVFieldOffsetExpr>(RHS); - if (CompareTypes(LA->getStructType(), RA->getStructType()) || - CompareTypes(RA->getStructType(), LA->getStructType())) - return CompareTypes(LA->getStructType(), RA->getStructType()); - return LA->getFieldNo() < RA->getFieldNo(); - } - - // Compare sizeof expressions by the allocation type. - if (const SCEVAllocSizeExpr *LA = dyn_cast<SCEVAllocSizeExpr>(LHS)) { - const SCEVAllocSizeExpr *RA = cast<SCEVAllocSizeExpr>(RHS); - return CompareTypes(LA->getAllocType(), RA->getAllocType()); - } - llvm_unreachable("Unknown SCEV kind!"); return false; } @@ -2172,74 +2247,38 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS, return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS))); } -const SCEV *ScalarEvolution::getFieldOffsetExpr(const StructType *STy, - unsigned FieldNo) { - // If we have TargetData we can determine the constant offset. - if (TD) { - const Type *IntPtrTy = TD->getIntPtrType(getContext()); - const StructLayout &SL = *TD->getStructLayout(STy); - uint64_t Offset = SL.getElementOffset(FieldNo); - return getIntegerSCEV(Offset, IntPtrTy); - } +const SCEV *ScalarEvolution::getSizeOfExpr(const Type *AllocTy) { + Constant *C = ConstantExpr::getSizeOf(AllocTy); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + C = ConstantFoldConstantExpression(CE, TD); + const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); + return getTruncateOrZeroExtend(getSCEV(C), Ty); +} - // Field 0 is always at offset 0. - if (FieldNo == 0) { - const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy)); - return getIntegerSCEV(0, Ty); - } +const SCEV *ScalarEvolution::getAlignOfExpr(const Type *AllocTy) { + Constant *C = ConstantExpr::getAlignOf(AllocTy); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + C = ConstantFoldConstantExpression(CE, TD); + const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); + return getTruncateOrZeroExtend(getSCEV(C), Ty); +} - // Okay, it looks like we really DO need an offsetof expr. Check to see if we - // already have one, otherwise create a new one. - FoldingSetNodeID ID; - ID.AddInteger(scFieldOffset); - ID.AddPointer(STy); - ID.AddInteger(FieldNo); - void *IP = 0; - if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; - SCEV *S = SCEVAllocator.Allocate<SCEVFieldOffsetExpr>(); +const SCEV *ScalarEvolution::getOffsetOfExpr(const StructType *STy, + unsigned FieldNo) { + Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + C = ConstantFoldConstantExpression(CE, TD); const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy)); - new (S) SCEVFieldOffsetExpr(ID, Ty, STy, FieldNo); - UniqueSCEVs.InsertNode(S, IP); - return S; + return getTruncateOrZeroExtend(getSCEV(C), Ty); } -const SCEV *ScalarEvolution::getAllocSizeExpr(const Type *AllocTy) { - // If we have TargetData we can determine the constant size. - if (TD && AllocTy->isSized()) { - const Type *IntPtrTy = TD->getIntPtrType(getContext()); - return getIntegerSCEV(TD->getTypeAllocSize(AllocTy), IntPtrTy); - } - - // Expand an array size into the element size times the number - // of elements. - if (const ArrayType *ATy = dyn_cast<ArrayType>(AllocTy)) { - const SCEV *E = getAllocSizeExpr(ATy->getElementType()); - return getMulExpr( - E, getConstant(ConstantInt::get(cast<IntegerType>(E->getType()), - ATy->getNumElements()))); - } - - // Expand a vector size into the element size times the number - // of elements. - if (const VectorType *VTy = dyn_cast<VectorType>(AllocTy)) { - const SCEV *E = getAllocSizeExpr(VTy->getElementType()); - return getMulExpr( - E, getConstant(ConstantInt::get(cast<IntegerType>(E->getType()), - VTy->getNumElements()))); - } - - // Okay, it looks like we really DO need a sizeof expr. Check to see if we - // already have one, otherwise create a new one. - FoldingSetNodeID ID; - ID.AddInteger(scAllocSize); - ID.AddPointer(AllocTy); - void *IP = 0; - if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; - SCEV *S = SCEVAllocator.Allocate<SCEVAllocSizeExpr>(); - const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); - new (S) SCEVAllocSizeExpr(ID, Ty, AllocTy); - UniqueSCEVs.InsertNode(S, IP); - return S; +const SCEV *ScalarEvolution::getOffsetOfExpr(const Type *CTy, + Constant *FieldNo) { + Constant *C = ConstantExpr::getOffsetOf(CTy, FieldNo); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + C = ConstantFoldConstantExpression(CE, TD); + const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy)); + return getTruncateOrZeroExtend(getSCEV(C), Ty); } const SCEV *ScalarEvolution::getUnknown(Value *V) { @@ -2269,7 +2308,7 @@ const SCEV *ScalarEvolution::getUnknown(Value *V) { /// has access to target-specific information. bool ScalarEvolution::isSCEVable(const Type *Ty) const { // Integers and pointers are always SCEVable. - return Ty->isInteger() || isa<PointerType>(Ty); + return Ty->isIntegerTy() || isa<PointerType>(Ty); } /// getTypeSizeInBits - Return the size in bits of the specified type, @@ -2282,7 +2321,7 @@ uint64_t ScalarEvolution::getTypeSizeInBits(const Type *Ty) const { return TD->getTypeSizeInBits(Ty); // Integer types have fixed sizes. - if (Ty->isInteger()) + if (Ty->isIntegerTy()) return Ty->getPrimitiveSizeInBits(); // The only other support type is pointer. Without TargetData, conservatively @@ -2298,7 +2337,7 @@ uint64_t ScalarEvolution::getTypeSizeInBits(const Type *Ty) const { const Type *ScalarEvolution::getEffectiveSCEVType(const Type *Ty) const { assert(isSCEVable(Ty) && "Type is not SCEVable!"); - if (Ty->isInteger()) + if (Ty->isIntegerTy()) return Ty; // The only other support type is pointer. @@ -2327,7 +2366,7 @@ const SCEV *ScalarEvolution::getSCEV(Value *V) { /// getIntegerSCEV - Given a SCEVable type, create a constant for the /// specified signed integer value and return a SCEV for the constant. -const SCEV *ScalarEvolution::getIntegerSCEV(int Val, const Type *Ty) { +const SCEV *ScalarEvolution::getIntegerSCEV(int64_t Val, const Type *Ty) { const IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty)); return getConstant(ConstantInt::get(ITy, Val)); } @@ -2373,8 +2412,8 @@ const SCEV * ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, const Type *Ty) { const Type *SrcTy = V->getType(); - assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) && - (Ty->isInteger() || isa<PointerType>(Ty)) && + assert((SrcTy->isIntegerTy() || isa<PointerType>(SrcTy)) && + (Ty->isIntegerTy() || isa<PointerType>(Ty)) && "Cannot truncate or zero extend with non-integer arguments!"); if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) return V; // No conversion @@ -2390,8 +2429,8 @@ const SCEV * ScalarEvolution::getTruncateOrSignExtend(const SCEV *V, const Type *Ty) { const Type *SrcTy = V->getType(); - assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) && - (Ty->isInteger() || isa<PointerType>(Ty)) && + assert((SrcTy->isIntegerTy() || isa<PointerType>(SrcTy)) && + (Ty->isIntegerTy() || isa<PointerType>(Ty)) && "Cannot truncate or zero extend with non-integer arguments!"); if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) return V; // No conversion @@ -2406,8 +2445,8 @@ ScalarEvolution::getTruncateOrSignExtend(const SCEV *V, const SCEV * ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, const Type *Ty) { const Type *SrcTy = V->getType(); - assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) && - (Ty->isInteger() || isa<PointerType>(Ty)) && + assert((SrcTy->isIntegerTy() || isa<PointerType>(SrcTy)) && + (Ty->isIntegerTy() || isa<PointerType>(Ty)) && "Cannot noop or zero extend with non-integer arguments!"); assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && "getNoopOrZeroExtend cannot truncate!"); @@ -2422,8 +2461,8 @@ ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, const Type *Ty) { const SCEV * ScalarEvolution::getNoopOrSignExtend(const SCEV *V, const Type *Ty) { const Type *SrcTy = V->getType(); - assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) && - (Ty->isInteger() || isa<PointerType>(Ty)) && + assert((SrcTy->isIntegerTy() || isa<PointerType>(SrcTy)) && + (Ty->isIntegerTy() || isa<PointerType>(Ty)) && "Cannot noop or sign extend with non-integer arguments!"); assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && "getNoopOrSignExtend cannot truncate!"); @@ -2439,8 +2478,8 @@ ScalarEvolution::getNoopOrSignExtend(const SCEV *V, const Type *Ty) { const SCEV * ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, const Type *Ty) { const Type *SrcTy = V->getType(); - assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) && - (Ty->isInteger() || isa<PointerType>(Ty)) && + assert((SrcTy->isIntegerTy() || isa<PointerType>(SrcTy)) && + (Ty->isIntegerTy() || isa<PointerType>(Ty)) && "Cannot noop or any extend with non-integer arguments!"); assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && "getNoopOrAnyExtend cannot truncate!"); @@ -2454,8 +2493,8 @@ ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, const Type *Ty) { const SCEV * ScalarEvolution::getTruncateOrNoop(const SCEV *V, const Type *Ty) { const Type *SrcTy = V->getType(); - assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) && - (Ty->isInteger() || isa<PointerType>(Ty)) && + assert((SrcTy->isIntegerTy() || isa<PointerType>(SrcTy)) && + (Ty->isIntegerTy() || isa<PointerType>(Ty)) && "Cannot truncate or noop with non-integer arguments!"); assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) && "getTruncateOrNoop cannot extend!"); @@ -2527,7 +2566,7 @@ ScalarEvolution::ForgetSymbolicName(Instruction *I, const SCEV *SymName) { if (It != Scalars.end()) { // Short-circuit the def-use traversal if the symbolic name // ceases to appear in expressions. - if (!It->second->hasOperand(SymName)) + if (It->second != SymName && !It->second->hasOperand(SymName)) continue; // SCEVUnknown for a PHI either means that it has an unrecognized @@ -2689,16 +2728,15 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { // For a struct, add the member offset. unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue(); TotalOffset = getAddExpr(TotalOffset, - getFieldOffsetExpr(STy, FieldNo), + getOffsetOfExpr(STy, FieldNo), /*HasNUW=*/false, /*HasNSW=*/InBounds); } else { // For an array, add the element offset, explicitly scaled. const SCEV *LocalOffset = getSCEV(Index); - if (!isa<PointerType>(LocalOffset->getType())) - // Getelementptr indicies are signed. - LocalOffset = getTruncateOrSignExtend(LocalOffset, IntPtrTy); + // Getelementptr indicies are signed. + LocalOffset = getTruncateOrSignExtend(LocalOffset, IntPtrTy); // Lower "inbounds" GEPs to NSW arithmetic. - LocalOffset = getMulExpr(LocalOffset, getAllocSizeExpr(*GTI), + LocalOffset = getMulExpr(LocalOffset, getSizeOfExpr(*GTI), /*HasNUW=*/false, /*HasNSW=*/InBounds); TotalOffset = getAddExpr(TotalOffset, LocalOffset, /*HasNUW=*/false, /*HasNSW=*/InBounds); @@ -2797,62 +2835,67 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) { if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) return ConstantRange(C->getValue()->getValue()); + unsigned BitWidth = getTypeSizeInBits(S->getType()); + ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true); + + // If the value has known zeros, the maximum unsigned value will have those + // known zeros as well. + uint32_t TZ = GetMinTrailingZeros(S); + if (TZ != 0) + ConservativeResult = + ConstantRange(APInt::getMinValue(BitWidth), + APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1); + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { ConstantRange X = getUnsignedRange(Add->getOperand(0)); for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i) X = X.add(getUnsignedRange(Add->getOperand(i))); - return X; + return ConservativeResult.intersectWith(X); } if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) { ConstantRange X = getUnsignedRange(Mul->getOperand(0)); for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i) X = X.multiply(getUnsignedRange(Mul->getOperand(i))); - return X; + return ConservativeResult.intersectWith(X); } if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) { ConstantRange X = getUnsignedRange(SMax->getOperand(0)); for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i) X = X.smax(getUnsignedRange(SMax->getOperand(i))); - return X; + return ConservativeResult.intersectWith(X); } if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) { ConstantRange X = getUnsignedRange(UMax->getOperand(0)); for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i) X = X.umax(getUnsignedRange(UMax->getOperand(i))); - return X; + return ConservativeResult.intersectWith(X); } if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) { ConstantRange X = getUnsignedRange(UDiv->getLHS()); ConstantRange Y = getUnsignedRange(UDiv->getRHS()); - return X.udiv(Y); + return ConservativeResult.intersectWith(X.udiv(Y)); } if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) { ConstantRange X = getUnsignedRange(ZExt->getOperand()); - return X.zeroExtend(cast<IntegerType>(ZExt->getType())->getBitWidth()); + return ConservativeResult.intersectWith(X.zeroExtend(BitWidth)); } if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) { ConstantRange X = getUnsignedRange(SExt->getOperand()); - return X.signExtend(cast<IntegerType>(SExt->getType())->getBitWidth()); + return ConservativeResult.intersectWith(X.signExtend(BitWidth)); } if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) { ConstantRange X = getUnsignedRange(Trunc->getOperand()); - return X.truncate(cast<IntegerType>(Trunc->getType())->getBitWidth()); + return ConservativeResult.intersectWith(X.truncate(BitWidth)); } - ConstantRange FullSet(getTypeSizeInBits(S->getType()), true); - if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { - const SCEV *T = getBackedgeTakenCount(AddRec->getLoop()); - const SCEVConstant *Trip = dyn_cast<SCEVConstant>(T); - ConstantRange ConservativeResult = FullSet; - // If there's no unsigned wrap, the value will never be less than its // initial value. if (AddRec->hasNoUnsignedWrap()) @@ -2862,10 +2905,11 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) { APInt(getTypeSizeInBits(C->getType()), 0)); // TODO: non-affine addrec - if (Trip && AddRec->isAffine()) { + if (AddRec->isAffine()) { const Type *Ty = AddRec->getType(); const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop()); - if (getTypeSizeInBits(MaxBECount->getType()) <= getTypeSizeInBits(Ty)) { + if (!isa<SCEVCouldNotCompute>(MaxBECount) && + getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) { MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty); const SCEV *Start = AddRec->getStart(); @@ -2883,7 +2927,7 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) { EndRange.getUnsignedMax()); if (Min.isMinValue() && Max.isMaxValue()) return ConservativeResult; - return ConstantRange(Min, Max+1); + return ConservativeResult.intersectWith(ConstantRange(Min, Max+1)); } } @@ -2897,11 +2941,11 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) { APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones, TD); if (Ones == ~Zeros + 1) - return FullSet; - return ConstantRange(Ones, ~Zeros + 1); + return ConservativeResult; + return ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1)); } - return FullSet; + return ConservativeResult; } /// getSignedRange - Determine the signed range for a particular SCEV. @@ -2912,62 +2956,67 @@ ScalarEvolution::getSignedRange(const SCEV *S) { if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) return ConstantRange(C->getValue()->getValue()); + unsigned BitWidth = getTypeSizeInBits(S->getType()); + ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true); + + // If the value has known zeros, the maximum signed value will have those + // known zeros as well. + uint32_t TZ = GetMinTrailingZeros(S); + if (TZ != 0) + ConservativeResult = + ConstantRange(APInt::getSignedMinValue(BitWidth), + APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1); + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { ConstantRange X = getSignedRange(Add->getOperand(0)); for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i) X = X.add(getSignedRange(Add->getOperand(i))); - return X; + return ConservativeResult.intersectWith(X); } if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) { ConstantRange X = getSignedRange(Mul->getOperand(0)); for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i) X = X.multiply(getSignedRange(Mul->getOperand(i))); - return X; + return ConservativeResult.intersectWith(X); } if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) { ConstantRange X = getSignedRange(SMax->getOperand(0)); for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i) X = X.smax(getSignedRange(SMax->getOperand(i))); - return X; + return ConservativeResult.intersectWith(X); } if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) { ConstantRange X = getSignedRange(UMax->getOperand(0)); for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i) X = X.umax(getSignedRange(UMax->getOperand(i))); - return X; + return ConservativeResult.intersectWith(X); } if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) { ConstantRange X = getSignedRange(UDiv->getLHS()); ConstantRange Y = getSignedRange(UDiv->getRHS()); - return X.udiv(Y); + return ConservativeResult.intersectWith(X.udiv(Y)); } if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) { ConstantRange X = getSignedRange(ZExt->getOperand()); - return X.zeroExtend(cast<IntegerType>(ZExt->getType())->getBitWidth()); + return ConservativeResult.intersectWith(X.zeroExtend(BitWidth)); } if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) { ConstantRange X = getSignedRange(SExt->getOperand()); - return X.signExtend(cast<IntegerType>(SExt->getType())->getBitWidth()); + return ConservativeResult.intersectWith(X.signExtend(BitWidth)); } if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) { ConstantRange X = getSignedRange(Trunc->getOperand()); - return X.truncate(cast<IntegerType>(Trunc->getType())->getBitWidth()); + return ConservativeResult.intersectWith(X.truncate(BitWidth)); } - ConstantRange FullSet(getTypeSizeInBits(S->getType()), true); - if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { - const SCEV *T = getBackedgeTakenCount(AddRec->getLoop()); - const SCEVConstant *Trip = dyn_cast<SCEVConstant>(T); - ConstantRange ConservativeResult = FullSet; - // If there's no signed wrap, and all the operands have the same sign or // zero, the value won't ever change sign. if (AddRec->hasNoSignedWrap()) { @@ -2977,20 +3026,22 @@ ScalarEvolution::getSignedRange(const SCEV *S) { if (!isKnownNonNegative(AddRec->getOperand(i))) AllNonNeg = false; if (!isKnownNonPositive(AddRec->getOperand(i))) AllNonPos = false; } - unsigned BitWidth = getTypeSizeInBits(AddRec->getType()); if (AllNonNeg) - ConservativeResult = ConstantRange(APInt(BitWidth, 0), - APInt::getSignedMinValue(BitWidth)); + ConservativeResult = ConservativeResult.intersectWith( + ConstantRange(APInt(BitWidth, 0), + APInt::getSignedMinValue(BitWidth))); else if (AllNonPos) - ConservativeResult = ConstantRange(APInt::getSignedMinValue(BitWidth), - APInt(BitWidth, 1)); + ConservativeResult = ConservativeResult.intersectWith( + ConstantRange(APInt::getSignedMinValue(BitWidth), + APInt(BitWidth, 1))); } // TODO: non-affine addrec - if (Trip && AddRec->isAffine()) { + if (AddRec->isAffine()) { const Type *Ty = AddRec->getType(); const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop()); - if (getTypeSizeInBits(MaxBECount->getType()) <= getTypeSizeInBits(Ty)) { + if (!isa<SCEVCouldNotCompute>(MaxBECount) && + getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) { MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty); const SCEV *Start = AddRec->getStart(); @@ -3008,7 +3059,7 @@ ScalarEvolution::getSignedRange(const SCEV *S) { EndRange.getSignedMax()); if (Min.isMinSignedValue() && Max.isMaxSignedValue()) return ConservativeResult; - return ConstantRange(Min, Max+1); + return ConservativeResult.intersectWith(ConstantRange(Min, Max+1)); } } @@ -3017,18 +3068,17 @@ ScalarEvolution::getSignedRange(const SCEV *S) { if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { // For a SCEVUnknown, ask ValueTracking. - unsigned BitWidth = getTypeSizeInBits(U->getType()); - if (!U->getValue()->getType()->isInteger() && !TD) - return FullSet; + if (!U->getValue()->getType()->isIntegerTy() && !TD) + return ConservativeResult; unsigned NS = ComputeNumSignBits(U->getValue(), TD); if (NS == 1) - return FullSet; - return + return ConservativeResult; + return ConservativeResult.intersectWith( ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1), - APInt::getSignedMaxValue(BitWidth).ashr(NS - 1)+1); + APInt::getSignedMaxValue(BitWidth).ashr(NS - 1)+1)); } - return FullSet; + return ConservativeResult; } /// createSCEV - We know that there is no SCEV for the specified value. @@ -3179,7 +3229,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { case Instruction::Shl: // Turn shift left of a constant amount into a multiply. if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) { - uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth(); + uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth(); Constant *X = ConstantInt::get(getContext(), APInt(BitWidth, 1).shl(SA->getLimitedValue(BitWidth))); return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X)); @@ -3189,7 +3239,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { case Instruction::LShr: // Turn logical shift right of a constant into a unsigned divide. if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) { - uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth(); + uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth(); Constant *X = ConstantInt::get(getContext(), APInt(BitWidth, 1).shl(SA->getLimitedValue(BitWidth))); return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(X)); @@ -3230,10 +3280,10 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { return getSCEV(U->getOperand(0)); break; - // It's tempting to handle inttoptr and ptrtoint, however this can - // lead to pointer expressions which cannot be expanded to GEPs - // (because they may overflow). For now, the only pointer-typed - // expressions we handle are GEPs and address literals. + // It's tempting to handle inttoptr and ptrtoint as no-ops, however this can + // lead to pointer expressions which cannot safely be expanded to GEPs, + // because ScalarEvolution doesn't respect the GEP aliasing rules when + // simplifying integer expressions. case Instruction::GetElementPtr: return createNodeForGEP(cast<GEPOperator>(U)); @@ -3350,19 +3400,19 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { std::pair<std::map<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair = BackedgeTakenCounts.insert(std::make_pair(L, getCouldNotCompute())); if (Pair.second) { - BackedgeTakenInfo ItCount = ComputeBackedgeTakenCount(L); - if (ItCount.Exact != getCouldNotCompute()) { - assert(ItCount.Exact->isLoopInvariant(L) && - ItCount.Max->isLoopInvariant(L) && - "Computed trip count isn't loop invariant for loop!"); + BackedgeTakenInfo BECount = ComputeBackedgeTakenCount(L); + if (BECount.Exact != getCouldNotCompute()) { + assert(BECount.Exact->isLoopInvariant(L) && + BECount.Max->isLoopInvariant(L) && + "Computed backedge-taken count isn't loop invariant for loop!"); ++NumTripCountsComputed; // Update the value in the map. - Pair.first->second = ItCount; + Pair.first->second = BECount; } else { - if (ItCount.Max != getCouldNotCompute()) + if (BECount.Max != getCouldNotCompute()) // Update the value in the map. - Pair.first->second = ItCount; + Pair.first->second = BECount; if (isa<PHINode>(L->getHeader()->begin())) // Only count loops that have phi nodes as not being computable. ++NumTripCountsNotComputed; @@ -3373,7 +3423,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { // conservative estimates made without the benefit of trip count // information. This is similar to the code in forgetLoop, except that // it handles SCEVUnknown PHI nodes specially. - if (ItCount.hasAnyInfo()) { + if (BECount.hasAnyInfo()) { SmallVector<Instruction *, 16> Worklist; PushLoopPHIs(L, Worklist); @@ -4230,9 +4280,6 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { return getTruncateExpr(Op, Cast->getType()); } - if (isa<SCEVTargetDataConstant>(V)) - return V; - llvm_unreachable("Unknown SCEV type!"); return 0; } @@ -4403,7 +4450,7 @@ const SCEV *ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { -StartC->getValue()->getValue(), *this); } - } else if (AddRec->isQuadratic() && AddRec->getType()->isInteger()) { + } else if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) { // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of // the quadratic equation to solve it. std::pair<const SCEV *,const SCEV *> Roots = SolveQuadraticEquation(AddRec, @@ -4947,6 +4994,9 @@ const SCEV *ScalarEvolution::getBECount(const SCEV *Start, const SCEV *End, const SCEV *Step, bool NoWrap) { + assert(!isKnownNegative(Step) && + "This code doesn't handle negative strides yet!"); + const Type *Ty = Start->getType(); const SCEV *NegOne = getIntegerSCEV(-1, Ty); const SCEV *Diff = getMinusSCEV(End, Start); @@ -4989,39 +5039,35 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, AddRec->hasNoUnsignedWrap(); if (AddRec->isAffine()) { - // FORNOW: We only support unit strides. unsigned BitWidth = getTypeSizeInBits(AddRec->getType()); const SCEV *Step = AddRec->getStepRecurrence(*this); - // TODO: handle non-constant strides. - const SCEVConstant *CStep = dyn_cast<SCEVConstant>(Step); - if (!CStep || CStep->isZero()) + if (Step->isZero()) return getCouldNotCompute(); - if (CStep->isOne()) { + if (Step->isOne()) { // With unit stride, the iteration never steps past the limit value. - } else if (CStep->getValue()->getValue().isStrictlyPositive()) { - if (NoWrap) { - // We know the iteration won't step past the maximum value for its type. - ; - } else if (const SCEVConstant *CLimit = dyn_cast<SCEVConstant>(RHS)) { - // Test whether a positive iteration iteration can step past the limit - // value and past the maximum value for its type in a single step. - if (isSigned) { - APInt Max = APInt::getSignedMaxValue(BitWidth); - if ((Max - CStep->getValue()->getValue()) - .slt(CLimit->getValue()->getValue())) - return getCouldNotCompute(); - } else { - APInt Max = APInt::getMaxValue(BitWidth); - if ((Max - CStep->getValue()->getValue()) - .ult(CLimit->getValue()->getValue())) - return getCouldNotCompute(); - } - } else - // TODO: handle non-constant limit values below. - return getCouldNotCompute(); + } else if (isKnownPositive(Step)) { + // Test whether a positive iteration can step past the limit + // value and past the maximum value for its type in a single step. + // Note that it's not sufficient to check NoWrap here, because even + // though the value after a wrap is undefined, it's not undefined + // behavior, so if wrap does occur, the loop could either terminate or + // loop infinitely, but in either case, the loop is guaranteed to + // iterate at least until the iteration where the wrapping occurs. + const SCEV *One = getIntegerSCEV(1, Step->getType()); + if (isSigned) { + APInt Max = APInt::getSignedMaxValue(BitWidth); + if ((Max - getSignedRange(getMinusSCEV(Step, One)).getSignedMax()) + .slt(getSignedRange(RHS).getSignedMax())) + return getCouldNotCompute(); + } else { + APInt Max = APInt::getMaxValue(BitWidth); + if ((Max - getUnsignedRange(getMinusSCEV(Step, One)).getUnsignedMax()) + .ult(getUnsignedRange(RHS).getUnsignedMax())) + return getCouldNotCompute(); + } } else - // TODO: handle negative strides below. + // TODO: Handle negative strides here and below. return getCouldNotCompute(); // We know the LHS is of the form {n,+,s} and the RHS is some loop-invariant @@ -5054,6 +5100,20 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, getSignedRange(End).getSignedMax() : getUnsignedRange(End).getUnsignedMax()); + // If MaxEnd is within a step of the maximum integer value in its type, + // adjust it down to the minimum value which would produce the same effect. + // This allows the subsequent ceiling divison of (N+(step-1))/step to + // compute the correct value. + const SCEV *StepMinusOne = getMinusSCEV(Step, + getIntegerSCEV(1, Step->getType())); + MaxEnd = isSigned ? + getSMinExpr(MaxEnd, + getMinusSCEV(getConstant(APInt::getSignedMaxValue(BitWidth)), + StepMinusOne)) : + getUMinExpr(MaxEnd, + getMinusSCEV(getConstant(APInt::getMaxValue(BitWidth)), + StepMinusOne)); + // Finally, we subtract these two values and divide, rounding up, to get // the number of times the backedge is executed. const SCEV *BECount = getBECount(Start, End, Step, NoWrap); diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index a72f58f..c2e1f89 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -365,31 +365,33 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, // the indices index into the element or field type selected by the // preceding index. for (;;) { - const SCEV *ElSize = SE.getAllocSizeExpr(ElTy); // If the scale size is not 0, attempt to factor out a scale for // array indexing. SmallVector<const SCEV *, 8> ScaledOps; - if (ElTy->isSized() && !ElSize->isZero()) { - SmallVector<const SCEV *, 8> NewOps; - for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - const SCEV *Op = Ops[i]; - const SCEV *Remainder = SE.getIntegerSCEV(0, Ty); - if (FactorOutConstant(Op, Remainder, ElSize, SE, SE.TD)) { - // Op now has ElSize factored out. - ScaledOps.push_back(Op); - if (!Remainder->isZero()) - NewOps.push_back(Remainder); - AnyNonZeroIndices = true; - } else { - // The operand was not divisible, so add it to the list of operands - // we'll scan next iteration. - NewOps.push_back(Ops[i]); + if (ElTy->isSized()) { + const SCEV *ElSize = SE.getSizeOfExpr(ElTy); + if (!ElSize->isZero()) { + SmallVector<const SCEV *, 8> NewOps; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + const SCEV *Op = Ops[i]; + const SCEV *Remainder = SE.getIntegerSCEV(0, Ty); + if (FactorOutConstant(Op, Remainder, ElSize, SE, SE.TD)) { + // Op now has ElSize factored out. + ScaledOps.push_back(Op); + if (!Remainder->isZero()) + NewOps.push_back(Remainder); + AnyNonZeroIndices = true; + } else { + // The operand was not divisible, so add it to the list of operands + // we'll scan next iteration. + NewOps.push_back(Ops[i]); + } + } + // If we made any changes, update Ops. + if (!ScaledOps.empty()) { + Ops = NewOps; + SimplifyAddOperands(Ops, Ty, SE); } - } - // If we made any changes, update Ops. - if (!ScaledOps.empty()) { - Ops = NewOps; - SimplifyAddOperands(Ops, Ty, SE); } } @@ -427,22 +429,22 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, } } } else { - // Without TargetData, just check for a SCEVFieldOffsetExpr of the + // Without TargetData, just check for an offsetof expression of the // appropriate struct type. for (unsigned i = 0, e = Ops.size(); i != e; ++i) - if (const SCEVFieldOffsetExpr *FO = - dyn_cast<SCEVFieldOffsetExpr>(Ops[i])) - if (FO->getStructType() == STy) { - unsigned FieldNo = FO->getFieldNo(); - GepIndices.push_back( - ConstantInt::get(Type::getInt32Ty(Ty->getContext()), - FieldNo)); - ElTy = STy->getTypeAtIndex(FieldNo); + if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Ops[i])) { + const Type *CTy; + Constant *FieldNo; + if (U->isOffsetOf(CTy, FieldNo) && CTy == STy) { + GepIndices.push_back(FieldNo); + ElTy = + STy->getTypeAtIndex(cast<ConstantInt>(FieldNo)->getZExtValue()); Ops[i] = SE.getConstant(Ty, 0); AnyNonZeroIndices = true; FoundFieldNo = true; break; } + } } // If no struct field offsets were found, tentatively assume that // field zero was selected (since the zero offset would obviously @@ -639,8 +641,52 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, // Reuse a previously-inserted PHI, if present. for (BasicBlock::iterator I = L->getHeader()->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I) - if (isInsertedInstruction(PN) && SE.getSCEV(PN) == Normalized) - return PN; + if (SE.isSCEVable(PN->getType()) && + (SE.getEffectiveSCEVType(PN->getType()) == + SE.getEffectiveSCEVType(Normalized->getType())) && + SE.getSCEV(PN) == Normalized) + if (BasicBlock *LatchBlock = L->getLoopLatch()) { + Instruction *IncV = + cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock)); + + // Determine if this is a well-behaved chain of instructions leading + // back to the PHI. It probably will be, if we're scanning an inner + // loop already visited by LSR for example, but it wouldn't have + // to be. + do { + if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV)) { + IncV = 0; + break; + } + IncV = dyn_cast<Instruction>(IncV->getOperand(0)); + if (!IncV) + break; + if (IncV->mayHaveSideEffects()) { + IncV = 0; + break; + } + } while (IncV != PN); + + if (IncV) { + // Ok, the add recurrence looks usable. + // Remember this PHI, even in post-inc mode. + InsertedValues.insert(PN); + // Remember the increment. + IncV = cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock)); + rememberInstruction(IncV); + if (L == IVIncInsertLoop) + do { + if (SE.DT->dominates(IncV, IVIncInsertPos)) + break; + // Make sure the increment is where we want it. But don't move it + // down past a potential existing post-inc user. + IncV->moveBefore(IVIncInsertPos); + IVIncInsertPos = IncV; + IncV = cast<Instruction>(IncV->getOperand(0)); + } while (IncV != PN); + return PN; + } + } // Save the original insertion point so we can restore it when we're done. BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); @@ -711,7 +757,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, // Restore the original insert point. if (SaveInsertBB) - Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt); + restoreInsertPoint(SaveInsertBB, SaveInsertPt); // Remember this PHI, even in post-inc mode. InsertedValues.insert(PN); @@ -774,6 +820,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { // Re-apply any non-loop-dominating scale. if (PostLoopScale) { + Result = InsertNoopCastOfTo(Result, IntTy); Result = Builder.CreateMul(Result, expandCodeFor(PostLoopScale, IntTy)); rememberInstruction(Result); @@ -785,6 +832,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { const SCEV *const OffsetArray[1] = { PostLoopOffset }; Result = expandAddToGEP(OffsetArray, OffsetArray+1, PTy, IntTy, Result); } else { + Result = InsertNoopCastOfTo(Result, IntTy); Result = Builder.CreateAdd(Result, expandCodeFor(PostLoopOffset, IntTy)); rememberInstruction(Result); @@ -825,7 +873,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { while (isa<PHINode>(NewInsertPt)) ++NewInsertPt; V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0, NewInsertPt); - Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt); + restoreInsertPoint(SaveInsertBB, SaveInsertPt); return V; } @@ -1001,14 +1049,6 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { return LHS; } -Value *SCEVExpander::visitFieldOffsetExpr(const SCEVFieldOffsetExpr *S) { - return ConstantExpr::getOffsetOf(S->getStructType(), S->getFieldNo()); -} - -Value *SCEVExpander::visitAllocSizeExpr(const SCEVAllocSizeExpr *S) { - return ConstantExpr::getSizeOf(S->getAllocType()); -} - Value *SCEVExpander::expandCodeFor(const SCEV *SH, const Type *Ty) { // Expand the code for this SCEV. Value *V = expand(SH); @@ -1059,10 +1099,32 @@ Value *SCEVExpander::expand(const SCEV *S) { if (!PostIncLoop) InsertedExpressions[std::make_pair(S, InsertPt)] = V; - Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt); + restoreInsertPoint(SaveInsertBB, SaveInsertPt); return V; } +void SCEVExpander::rememberInstruction(Value *I) { + if (!PostIncLoop) + InsertedValues.insert(I); + + // If we just claimed an existing instruction and that instruction had + // been the insert point, adjust the insert point forward so that + // subsequently inserted code will be dominated. + if (Builder.GetInsertPoint() == I) { + BasicBlock::iterator It = cast<Instruction>(I); + do { ++It; } while (isInsertedInstruction(It)); + Builder.SetInsertPoint(Builder.GetInsertBlock(), It); + } +} + +void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) { + // If we aquired more instructions since the old insert point was saved, + // advance past them. + while (isInsertedInstruction(I)) ++I; + + Builder.SetInsertPoint(BB, I); +} + /// getOrInsertCanonicalInductionVariable - This method returns the /// canonical induction variable of the specified type for the specified /// loop (inserting one if there is none). A canonical induction variable @@ -1070,13 +1132,13 @@ Value *SCEVExpander::expand(const SCEV *S) { Value * SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, const Type *Ty) { - assert(Ty->isInteger() && "Can only insert integer induction variables!"); + assert(Ty->isIntegerTy() && "Can only insert integer induction variables!"); const SCEV *H = SE.getAddRecExpr(SE.getIntegerSCEV(0, Ty), SE.getIntegerSCEV(1, Ty), L); BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); Value *V = expandCodeFor(H, 0, L->getHeader()->begin()); if (SaveInsertBB) - Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt); + restoreInsertPoint(SaveInsertBB, SaveInsertPt); return V; } diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 91e5bc3..7cc9c0d 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -49,11 +49,11 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, assert(V && "No Value?"); assert(Depth <= MaxDepth && "Limit Search Depth"); unsigned BitWidth = Mask.getBitWidth(); - assert((V->getType()->isIntOrIntVector() || isa<PointerType>(V->getType())) && - "Not integer or pointer type!"); + assert((V->getType()->isIntOrIntVectorTy() || isa<PointerType>(V->getType())) + && "Not integer or pointer type!"); assert((!TD || TD->getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) && - (!V->getType()->isIntOrIntVector() || + (!V->getType()->isIntOrIntVectorTy() || V->getType()->getScalarSizeInBits() == BitWidth) && KnownZero.getBitWidth() == BitWidth && KnownOne.getBitWidth() == BitWidth && @@ -269,7 +269,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, } case Instruction::BitCast: { const Type *SrcTy = I->getOperand(0)->getType(); - if ((SrcTy->isInteger() || isa<PointerType>(SrcTy)) && + if ((SrcTy->isIntegerTy() || isa<PointerType>(SrcTy)) && // TODO: For now, not handling conversions like: // (bitcast i64 %x to <2 x i32>) !isa<VectorType>(I->getType())) { @@ -421,20 +421,29 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, } case Instruction::SRem: if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) { - APInt RA = Rem->getValue(); - if (RA.isPowerOf2() || (-RA).isPowerOf2()) { - APInt LowBits = RA.isStrictlyPositive() ? (RA - 1) : ~RA; + APInt RA = Rem->getValue().abs(); + if (RA.isPowerOf2()) { + APInt LowBits = RA - 1; APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD, Depth+1); - // If the sign bit of the first operand is zero, the sign bit of - // the result is zero. If the first operand has no one bits below - // the second operand's single 1 bit, its sign will be zero. + // The low bits of the first operand are unchanged by the srem. + KnownZero = KnownZero2 & LowBits; + KnownOne = KnownOne2 & LowBits; + + // If the first operand is non-negative or has all low bits zero, then + // the upper bits are all zero. if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits)) - KnownZero2 |= ~LowBits; + KnownZero |= ~LowBits; - KnownZero |= KnownZero2 & Mask; + // If the first operand is negative and not all low bits are zero, then + // the upper bits are all one. + if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0)) + KnownOne |= ~LowBits; + + KnownZero &= Mask; + KnownOne &= Mask; assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); } @@ -640,7 +649,7 @@ bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, /// unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD, unsigned Depth) { - assert((TD || V->getType()->isIntOrIntVector()) && + assert((TD || V->getType()->isIntOrIntVectorTy()) && "ComputeNumSignBits requires a TargetData object to operate " "on non-integer values!"); const Type *Ty = V->getType(); @@ -814,7 +823,7 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, assert(V && "No Value?"); assert(Depth <= MaxDepth && "Limit Search Depth"); - assert(V->getType()->isInteger() && "Not integer or pointer type!"); + assert(V->getType()->isIntegerTy() && "Not integer or pointer type!"); const Type *T = V->getType(); @@ -1363,7 +1372,7 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset, // Make sure the index-ee is a pointer to array of i8. const PointerType *PT = cast<PointerType>(GEP->getOperand(0)->getType()); const ArrayType *AT = dyn_cast<ArrayType>(PT->getElementType()); - if (AT == 0 || !AT->getElementType()->isInteger(8)) + if (AT == 0 || !AT->getElementType()->isIntegerTy(8)) return false; // Check to make sure that the first operand of the GEP is an integer and @@ -1402,7 +1411,7 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset, // Must be a Constant Array ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit); - if (Array == 0 || !Array->getType()->getElementType()->isInteger(8)) + if (Array == 0 || !Array->getType()->getElementType()->isIntegerTy(8)) return false; // Get the number of elements in the array diff --git a/lib/Archive/Archive.cpp b/lib/Archive/Archive.cpp index 00778d9..f4f8a43 100644 --- a/lib/Archive/Archive.cpp +++ b/lib/Archive/Archive.cpp @@ -14,7 +14,6 @@ #include "ArchiveInternals.h" #include "llvm/Bitcode/ReaderWriter.h" -#include "llvm/ModuleProvider.h" #include "llvm/Module.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/System/Process.h" @@ -173,8 +172,8 @@ void Archive::cleanUpMemory() { foreignST = 0; } - // Delete any ModuleProviders and ArchiveMember's we've allocated as a result - // of symbol table searches. + // Delete any Modules and ArchiveMember's we've allocated as a result of + // symbol table searches. for (ModuleMap::iterator I=modules.begin(), E=modules.end(); I != E; ++I ) { delete I->second.first; delete I->second.second; @@ -221,51 +220,37 @@ bool llvm::GetBitcodeSymbols(const sys::Path& fName, return true; } - ModuleProvider *MP = getBitcodeModuleProvider(Buffer.get(), Context, ErrMsg); - if (!MP) + Module *M = ParseBitcodeFile(Buffer.get(), Context, ErrMsg); + if (!M) return true; - // Get the module from the provider - Module* M = MP->materializeModule(); - if (M == 0) { - delete MP; - return true; - } - // Get the symbols getSymbols(M, symbols); // Done with the module. - delete MP; + delete M; return true; } -ModuleProvider* +Module* llvm::GetBitcodeSymbols(const unsigned char *BufPtr, unsigned Length, const std::string& ModuleID, LLVMContext& Context, std::vector<std::string>& symbols, std::string* ErrMsg) { - // Get the module provider - MemoryBuffer *Buffer =MemoryBuffer::getNewMemBuffer(Length, ModuleID.c_str()); + // Get the module. + std::auto_ptr<MemoryBuffer> Buffer( + MemoryBuffer::getNewMemBuffer(Length, ModuleID.c_str())); memcpy((char*)Buffer->getBufferStart(), BufPtr, Length); - ModuleProvider *MP = getBitcodeModuleProvider(Buffer, Context, ErrMsg); - if (!MP) + Module *M = ParseBitcodeFile(Buffer.get(), Context, ErrMsg); + if (!M) return 0; - // Get the module from the provider - Module* M = MP->materializeModule(); - if (M == 0) { - delete MP; - return 0; - } - // Get the symbols getSymbols(M, symbols); - // Done with the module. Note that ModuleProvider will delete the - // Module when it is deleted. Also note that its the caller's responsibility - // to delete the ModuleProvider. - return MP; + // Done with the module. Note that it's the caller's responsibility to delete + // the Module. + return M; } diff --git a/lib/Archive/ArchiveInternals.h b/lib/Archive/ArchiveInternals.h index d187ed9..baea544 100644 --- a/lib/Archive/ArchiveInternals.h +++ b/lib/Archive/ArchiveInternals.h @@ -77,11 +77,11 @@ namespace llvm { std::vector<std::string>& symbols, std::string* ErrMsg); - ModuleProvider* GetBitcodeSymbols(const unsigned char*Buffer,unsigned Length, - const std::string& ModuleID, - LLVMContext& Context, - std::vector<std::string>& symbols, - std::string* ErrMsg); + Module* GetBitcodeSymbols(const unsigned char*Buffer,unsigned Length, + const std::string& ModuleID, + LLVMContext& Context, + std::vector<std::string>& symbols, + std::string* ErrMsg); } #endif diff --git a/lib/Archive/ArchiveReader.cpp b/lib/Archive/ArchiveReader.cpp index 74895d8..3ef15d2 100644 --- a/lib/Archive/ArchiveReader.cpp +++ b/lib/Archive/ArchiveReader.cpp @@ -120,7 +120,8 @@ Archive::parseMemberHeader(const char*& At, const char* End, std::string* error) if (Hdr->name[1] == '1' && Hdr->name[2] == '/') { if (isdigit(Hdr->name[3])) { unsigned len = atoi(&Hdr->name[3]); - pathname.assign(At, len); + const char *nulp = (const char *)memchr(At, '\0', len); + pathname.assign(At, nulp != 0 ? nulp - At : len); At += len; MemberSize -= len; flags |= ArchiveMember::HasLongFilenameFlag; @@ -452,9 +453,9 @@ Archive* Archive::OpenAndLoadSymbols(const sys::Path& file, return result.release(); } -// Look up one symbol in the symbol table and return a ModuleProvider for the -// module that defines that symbol. -ModuleProvider* +// Look up one symbol in the symbol table and return the module that defines +// that symbol. +Module* Archive::findModuleDefiningSymbol(const std::string& symbol, std::string* ErrMsg) { SymTabType::iterator SI = symTab.find(symbol); @@ -483,27 +484,27 @@ Archive::findModuleDefiningSymbol(const std::string& symbol, if (!mbr) return 0; - // Now, load the bitcode module to get the ModuleProvider + // Now, load the bitcode module to get the Module. std::string FullMemberName = archPath.str() + "(" + mbr->getPath().str() + ")"; MemoryBuffer *Buffer =MemoryBuffer::getNewMemBuffer(mbr->getSize(), FullMemberName.c_str()); memcpy((char*)Buffer->getBufferStart(), mbr->getData(), mbr->getSize()); - ModuleProvider *mp = getBitcodeModuleProvider(Buffer, Context, ErrMsg); - if (!mp) + Module *m = getLazyBitcodeModule(Buffer, Context, ErrMsg); + if (!m) return 0; - modules.insert(std::make_pair(fileOffset, std::make_pair(mp, mbr))); + modules.insert(std::make_pair(fileOffset, std::make_pair(m, mbr))); - return mp; + return m; } // Look up multiple symbols in the symbol table and return a set of -// ModuleProviders that define those symbols. +// Modules that define those symbols. bool Archive::findModulesDefiningSymbols(std::set<std::string>& symbols, - std::set<ModuleProvider*>& result, + std::set<Module*>& result, std::string* error) { if (!mapfile || !base) { if (error) @@ -536,19 +537,19 @@ Archive::findModulesDefiningSymbols(std::set<std::string>& symbols, std::vector<std::string> symbols; std::string FullMemberName = archPath.str() + "(" + mbr->getPath().str() + ")"; - ModuleProvider* MP = + Module* M = GetBitcodeSymbols((const unsigned char*)At, mbr->getSize(), FullMemberName, Context, symbols, error); - if (MP) { + if (M) { // Insert the module's symbols into the symbol table for (std::vector<std::string>::iterator I = symbols.begin(), E=symbols.end(); I != E; ++I ) { symTab.insert(std::make_pair(*I, offset)); } - // Insert the ModuleProvider and the ArchiveMember into the table of + // Insert the Module and the ArchiveMember into the table of // modules. - modules.insert(std::make_pair(offset, std::make_pair(MP, mbr))); + modules.insert(std::make_pair(offset, std::make_pair(M, mbr))); } else { if (error) *error = "Can't parse bitcode member: " + @@ -571,11 +572,11 @@ Archive::findModulesDefiningSymbols(std::set<std::string>& symbols, for (std::set<std::string>::iterator I=symbols.begin(), E=symbols.end(); I != E;) { // See if this symbol exists - ModuleProvider* mp = findModuleDefiningSymbol(*I,error); - if (mp) { - // The symbol exists, insert the ModuleProvider into our result, - // duplicates wil be ignored - result.insert(mp); + Module* m = findModuleDefiningSymbol(*I,error); + if (m) { + // The symbol exists, insert the Module into our result, duplicates will + // be ignored. + result.insert(m); // Remove the symbol now that its been resolved, being careful to // post-increment the iterator. diff --git a/lib/Archive/ArchiveWriter.cpp b/lib/Archive/ArchiveWriter.cpp index d17f6b5..58fbbf4 100644 --- a/lib/Archive/ArchiveWriter.cpp +++ b/lib/Archive/ArchiveWriter.cpp @@ -12,12 +12,12 @@ //===----------------------------------------------------------------------===// #include "ArchiveInternals.h" -#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/Module.h" #include "llvm/ADT/OwningPtr.h" +#include "llvm/Bitcode/ReaderWriter.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/System/Signals.h" #include "llvm/System/Process.h" -#include "llvm/ModuleProvider.h" +#include "llvm/System/Signals.h" #include <fstream> #include <ostream> #include <iomanip> @@ -225,12 +225,12 @@ Archive::writeMember( std::vector<std::string> symbols; std::string FullMemberName = archPath.str() + "(" + member.getPath().str() + ")"; - ModuleProvider* MP = + Module* M = GetBitcodeSymbols((const unsigned char*)data,fSize, FullMemberName, Context, symbols, ErrMsg); // If the bitcode parsed successfully - if ( MP ) { + if ( M ) { for (std::vector<std::string>::iterator SI = symbols.begin(), SE = symbols.end(); SI != SE; ++SI) { @@ -244,7 +244,7 @@ Archive::writeMember( } } // We don't need this module any more. - delete MP; + delete M; } else { delete mFile; if (ErrMsg) diff --git a/lib/Archive/Makefile b/lib/Archive/Makefile index 1256e1c..da97804 100644 --- a/lib/Archive/Makefile +++ b/lib/Archive/Makefile @@ -9,7 +9,6 @@ LEVEL = ../.. LIBRARYNAME = LLVMArchive -CXXFLAGS = -fno-rtti # We only want an archive so only those modules actually used by a tool are # included. diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 2a926d2..46f3cbc 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -558,6 +558,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(readnone); KEYWORD(readonly); + KEYWORD(inlinehint); KEYWORD(noinline); KEYWORD(alwaysinline); KEYWORD(optsize); @@ -569,6 +570,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(type); KEYWORD(opaque); + KEYWORD(union); KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle); KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 04a5263c..9cae0d2 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -947,6 +947,7 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) { case lltok::kw_noinline: Attrs |= Attribute::NoInline; break; case lltok::kw_readnone: Attrs |= Attribute::ReadNone; break; case lltok::kw_readonly: Attrs |= Attribute::ReadOnly; break; + case lltok::kw_inlinehint: Attrs |= Attribute::InlineHint; break; case lltok::kw_alwaysinline: Attrs |= Attribute::AlwaysInline; break; case lltok::kw_optsize: Attrs |= Attribute::OptimizeForSize; break; case lltok::kw_ssp: Attrs |= Attribute::StackProtect; break; @@ -955,6 +956,14 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) { case lltok::kw_noimplicitfloat: Attrs |= Attribute::NoImplicitFloat; break; case lltok::kw_naked: Attrs |= Attribute::Naked; break; + case lltok::kw_alignstack: { + unsigned Alignment; + if (ParseOptionalStackAlignment(Alignment)) + return true; + Attrs |= Attribute::constructStackAlignmentFromInt(Alignment); + continue; + } + case lltok::kw_align: { unsigned Alignment; if (ParseOptionalAlignment(Alignment)) @@ -962,6 +971,7 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) { Attrs |= Attribute::constructAlignmentFromInt(Alignment); continue; } + } Lex.Lex(); } @@ -1130,6 +1140,25 @@ bool LLParser::ParseOptionalCommaAlign(unsigned &Alignment, return false; } +/// ParseOptionalStackAlignment +/// ::= /* empty */ +/// ::= 'alignstack' '(' 4 ')' +bool LLParser::ParseOptionalStackAlignment(unsigned &Alignment) { + Alignment = 0; + if (!EatIfPresent(lltok::kw_alignstack)) + return false; + LocTy ParenLoc = Lex.getLoc(); + if (!EatIfPresent(lltok::lparen)) + return Error(ParenLoc, "expected '('"); + LocTy AlignLoc = Lex.getLoc(); + if (ParseUInt32(Alignment)) return true; + ParenLoc = Lex.getLoc(); + if (!EatIfPresent(lltok::rparen)) + return Error(ParenLoc, "expected ')'"); + if (!isPowerOf2_32(Alignment)) + return Error(AlignLoc, "stack alignment is not a power of two"); + return false; +} /// ParseIndexList - This parses the index list for an insert/extractvalue /// instruction. This sets AteExtraComma in the case where we eat an extra @@ -1266,6 +1295,11 @@ bool LLParser::ParseTypeRec(PATypeHolder &Result) { if (ParseStructType(Result, false)) return true; break; + case lltok::kw_union: + // TypeRec ::= 'union' '{' ... '}' + if (ParseUnionType(Result)) + return true; + break; case lltok::lsquare: // TypeRec ::= '[' ... ']' Lex.Lex(); // eat the lsquare. @@ -1575,6 +1609,38 @@ bool LLParser::ParseStructType(PATypeHolder &Result, bool Packed) { return false; } +/// ParseUnionType +/// TypeRec +/// ::= 'union' '{' TypeRec (',' TypeRec)* '}' +bool LLParser::ParseUnionType(PATypeHolder &Result) { + assert(Lex.getKind() == lltok::kw_union); + Lex.Lex(); // Consume the 'union' + + if (ParseToken(lltok::lbrace, "'{' expected after 'union'")) return true; + + SmallVector<PATypeHolder, 8> ParamsList; + do { + LocTy EltTyLoc = Lex.getLoc(); + if (ParseTypeRec(Result)) return true; + ParamsList.push_back(Result); + + if (Result->isVoidTy()) + return Error(EltTyLoc, "union element can not have void type"); + if (!UnionType::isValidElementType(Result)) + return Error(EltTyLoc, "invalid element type for union"); + + } while (EatIfPresent(lltok::comma)) ; + + if (ParseToken(lltok::rbrace, "expected '}' at end of union")) + return true; + + SmallVector<const Type*, 8> ParamsListTy; + for (unsigned i = 0, e = ParamsList.size(); i != e; ++i) + ParamsListTy.push_back(ParamsList[i].get()); + Result = HandleUpRefs(UnionType::get(&ParamsListTy[0], ParamsListTy.size())); + return false; +} + /// ParseArrayVectorType - Parse an array or vector type, assuming the first /// token has already been consumed. /// TypeRec @@ -1991,8 +2057,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { if (Elts.empty()) return Error(ID.Loc, "constant vector must not be empty"); - if (!Elts[0]->getType()->isInteger() && - !Elts[0]->getType()->isFloatingPoint()) + if (!Elts[0]->getType()->isIntegerTy() && + !Elts[0]->getType()->isFloatingPointTy()) return Error(FirstEltLoc, "vector elements must have integer or floating point type"); @@ -2134,8 +2200,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { ParseToken(lltok::rparen, "expected ')' in extractvalue constantexpr")) return true; - if (!isa<StructType>(Val->getType()) && !isa<ArrayType>(Val->getType())) - return Error(ID.Loc, "extractvalue operand must be array or struct"); + if (!Val->getType()->isAggregateType()) + return Error(ID.Loc, "extractvalue operand must be aggregate type"); if (!ExtractValueInst::getIndexedType(Val->getType(), Indices.begin(), Indices.end())) return Error(ID.Loc, "invalid indices for extractvalue"); @@ -2155,8 +2221,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { ParseIndexList(Indices) || ParseToken(lltok::rparen, "expected ')' in insertvalue constantexpr")) return true; - if (!isa<StructType>(Val0->getType()) && !isa<ArrayType>(Val0->getType())) - return Error(ID.Loc, "extractvalue operand must be array or struct"); + if (!Val0->getType()->isAggregateType()) + return Error(ID.Loc, "insertvalue operand must be aggregate type"); if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices.begin(), Indices.end())) return Error(ID.Loc, "invalid indices for insertvalue"); @@ -2184,12 +2250,12 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { CmpInst::Predicate Pred = (CmpInst::Predicate)PredVal; if (Opc == Instruction::FCmp) { - if (!Val0->getType()->isFPOrFPVector()) + if (!Val0->getType()->isFPOrFPVectorTy()) return Error(ID.Loc, "fcmp requires floating point operands"); ID.ConstantVal = ConstantExpr::getFCmp(Pred, Val0, Val1); } else { assert(Opc == Instruction::ICmp && "Unexpected opcode for CmpInst!"); - if (!Val0->getType()->isIntOrIntVector() && + if (!Val0->getType()->isIntOrIntVectorTy() && !isa<PointerType>(Val0->getType())) return Error(ID.Loc, "icmp requires pointer or integer operands"); ID.ConstantVal = ConstantExpr::getICmp(Pred, Val0, Val1); @@ -2240,7 +2306,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { return true; if (Val0->getType() != Val1->getType()) return Error(ID.Loc, "operands of constexpr must have same type"); - if (!Val0->getType()->isIntOrIntVector()) { + if (!Val0->getType()->isIntOrIntVectorTy()) { if (NUW) return Error(ModifierLoc, "nuw only applies to integer operations"); if (NSW) @@ -2248,8 +2314,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { } // API compatibility: Accept either integer or floating-point types with // add, sub, and mul. - if (!Val0->getType()->isIntOrIntVector() && - !Val0->getType()->isFPOrFPVector()) + if (!Val0->getType()->isIntOrIntVectorTy() && + !Val0->getType()->isFPOrFPVectorTy()) return Error(ID.Loc,"constexpr requires integer, fp, or vector operands"); unsigned Flags = 0; if (NUW) Flags |= OverflowingBinaryOperator::NoUnsignedWrap; @@ -2279,7 +2345,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { return true; if (Val0->getType() != Val1->getType()) return Error(ID.Loc, "operands of constexpr must have same type"); - if (!Val0->getType()->isIntOrIntVector()) + if (!Val0->getType()->isIntOrIntVectorTy()) return Error(ID.Loc, "constexpr requires integer or integer vector operands"); ID.ConstantVal = ConstantExpr::get(Opc, Val0, Val1); @@ -2449,7 +2515,7 @@ bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V, V = ConstantInt::get(Context, ID.APSIntVal); return false; case ValID::t_APFloat: - if (!Ty->isFloatingPoint() || + if (!Ty->isFloatingPointTy() || !ConstantFP::isValueValidForType(Ty, ID.APFloatVal)) return Error(ID.Loc, "floating point constant invalid for type"); @@ -2492,8 +2558,17 @@ bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V, V = Constant::getNullValue(Ty); return false; case ValID::t_Constant: - if (ID.ConstantVal->getType() != Ty) + if (ID.ConstantVal->getType() != Ty) { + // Allow a constant struct with a single member to be converted + // to a union, if the union has a member which is the same type + // as the struct member. + if (const UnionType* utype = dyn_cast<UnionType>(Ty)) { + return ParseUnionValue(utype, ID, V); + } + return Error(ID.Loc, "constant expression type mismatch"); + } + V = ID.ConstantVal; return false; } @@ -2523,6 +2598,22 @@ bool LLParser::ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc, return false; } +bool LLParser::ParseUnionValue(const UnionType* utype, ValID &ID, Value *&V) { + if (const StructType* stype = dyn_cast<StructType>(ID.ConstantVal->getType())) { + if (stype->getNumContainedTypes() != 1) + return Error(ID.Loc, "constant expression type mismatch"); + int index = utype->getElementTypeIndex(stype->getContainedType(0)); + if (index < 0) + return Error(ID.Loc, "initializer type is not a member of the union"); + + V = ConstantUnion::get( + utype, cast<Constant>(ID.ConstantVal->getOperand(0))); + return false; + } + + return Error(ID.Loc, "constant expression type mismatch"); +} + /// FunctionHeader /// ::= OptionalLinkage OptionalVisibility OptionalCallingConv OptRetAttrs @@ -2566,7 +2657,6 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { return Error(LinkageLoc, "invalid linkage for function declaration"); break; case GlobalValue::AppendingLinkage: - case GlobalValue::GhostLinkage: case GlobalValue::CommonLinkage: return Error(LinkageLoc, "invalid function linkage type"); } @@ -2873,7 +2963,7 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB, // API compatibility: Accept either integer or floating-point types. bool Result = ParseArithmetic(Inst, PFS, KeywordVal, 0); if (!Result) { - if (!Inst->getType()->isIntOrIntVector()) { + if (!Inst->getType()->isIntOrIntVectorTy()) { if (NUW) return Error(ModifierLoc, "nuw only applies to integer operations"); if (NSW) @@ -3292,11 +3382,11 @@ bool LLParser::ParseArithmetic(Instruction *&Inst, PerFunctionState &PFS, switch (OperandType) { default: llvm_unreachable("Unknown operand type!"); case 0: // int or FP. - Valid = LHS->getType()->isIntOrIntVector() || - LHS->getType()->isFPOrFPVector(); + Valid = LHS->getType()->isIntOrIntVectorTy() || + LHS->getType()->isFPOrFPVectorTy(); break; - case 1: Valid = LHS->getType()->isIntOrIntVector(); break; - case 2: Valid = LHS->getType()->isFPOrFPVector(); break; + case 1: Valid = LHS->getType()->isIntOrIntVectorTy(); break; + case 2: Valid = LHS->getType()->isFPOrFPVectorTy(); break; } if (!Valid) @@ -3316,7 +3406,7 @@ bool LLParser::ParseLogical(Instruction *&Inst, PerFunctionState &PFS, ParseValue(LHS->getType(), RHS, PFS)) return true; - if (!LHS->getType()->isIntOrIntVector()) + if (!LHS->getType()->isIntOrIntVectorTy()) return Error(Loc,"instruction requires integer or integer vector operands"); Inst = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS); @@ -3340,12 +3430,12 @@ bool LLParser::ParseCompare(Instruction *&Inst, PerFunctionState &PFS, return true; if (Opc == Instruction::FCmp) { - if (!LHS->getType()->isFPOrFPVector()) + if (!LHS->getType()->isFPOrFPVectorTy()) return Error(Loc, "fcmp requires floating point operands"); Inst = new FCmpInst(CmpInst::Predicate(Pred), LHS, RHS); } else { assert(Opc == Instruction::ICmp && "Unknown opcode for CmpInst!"); - if (!LHS->getType()->isIntOrIntVector() && + if (!LHS->getType()->isIntOrIntVectorTy() && !isa<PointerType>(LHS->getType())) return Error(Loc, "icmp requires integer operands"); Inst = new ICmpInst(CmpInst::Predicate(Pred), LHS, RHS); @@ -3643,7 +3733,7 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS, } } - if (Size && !Size->getType()->isInteger(32)) + if (Size && !Size->getType()->isIntegerTy(32)) return Error(SizeLoc, "element count must be i32"); if (isAlloca) { @@ -3783,8 +3873,8 @@ int LLParser::ParseExtractValue(Instruction *&Inst, PerFunctionState &PFS) { ParseIndexList(Indices, AteExtraComma)) return true; - if (!isa<StructType>(Val->getType()) && !isa<ArrayType>(Val->getType())) - return Error(Loc, "extractvalue operand must be array or struct"); + if (!Val->getType()->isAggregateType()) + return Error(Loc, "extractvalue operand must be aggregate type"); if (!ExtractValueInst::getIndexedType(Val->getType(), Indices.begin(), Indices.end())) @@ -3805,8 +3895,8 @@ int LLParser::ParseInsertValue(Instruction *&Inst, PerFunctionState &PFS) { ParseIndexList(Indices, AteExtraComma)) return true; - if (!isa<StructType>(Val0->getType()) && !isa<ArrayType>(Val0->getType())) - return Error(Loc0, "extractvalue operand must be array or struct"); + if (!Val0->getType()->isAggregateType()) + return Error(Loc0, "insertvalue operand must be aggregate type"); if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices.begin(), Indices.end())) diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index 85c07ff..9abe404 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -31,6 +31,7 @@ namespace llvm { class GlobalValue; class MDString; class MDNode; + class UnionType; /// ValID - Represents a reference of a definition of some sort with no type. /// There are several cases where we have to parse the value but where the @@ -169,6 +170,7 @@ namespace llvm { bool ParseOptionalVisibility(unsigned &Visibility); bool ParseOptionalCallingConv(CallingConv::ID &CC); bool ParseOptionalAlignment(unsigned &Alignment); + bool ParseOptionalStackAlignment(unsigned &Alignment); bool ParseInstructionMetadata(SmallVectorImpl<std::pair<unsigned, MDNode *> > &); bool ParseOptionalCommaAlign(unsigned &Alignment, bool &AteExtraComma); @@ -211,6 +213,7 @@ namespace llvm { } bool ParseTypeRec(PATypeHolder &H); bool ParseStructType(PATypeHolder &H, bool Packed); + bool ParseUnionType(PATypeHolder &H); bool ParseArrayVectorType(PATypeHolder &H, bool isVector); bool ParseFunctionType(PATypeHolder &Result); PATypeHolder HandleUpRefs(const Type *Ty); @@ -279,6 +282,8 @@ namespace llvm { return ParseTypeAndBasicBlock(BB, Loc, PFS); } + bool ParseUnionValue(const UnionType* utype, ValID &ID, Value *&V); + struct ParamInfo { LocTy Loc; Value *V; diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index 80eb194..3ac9169 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -85,6 +85,7 @@ namespace lltok { kw_readnone, kw_readonly, + kw_inlinehint, kw_noinline, kw_alwaysinline, kw_optsize, @@ -96,6 +97,7 @@ namespace lltok { kw_type, kw_opaque, + kw_union, kw_eq, kw_ne, kw_slt, kw_sgt, kw_sle, kw_sge, kw_ult, kw_ugt, kw_ule, kw_uge, kw_oeq, kw_one, kw_olt, kw_ogt, kw_ole, kw_oge, kw_ord, kw_uno, diff --git a/lib/AsmParser/Makefile b/lib/AsmParser/Makefile index 7b53a87..995bb0e 100644 --- a/lib/AsmParser/Makefile +++ b/lib/AsmParser/Makefile @@ -10,6 +10,5 @@ LEVEL = ../.. LIBRARYNAME := LLVMAsmParser BUILD_ARCHIVE = 1 -CXXFLAGS = -fno-rtti include $(LEVEL)/Makefile.common diff --git a/lib/Bitcode/Reader/BitReader.cpp b/lib/Bitcode/Reader/BitReader.cpp index 32b97e8..7537435 100644 --- a/lib/Bitcode/Reader/BitReader.cpp +++ b/lib/Bitcode/Reader/BitReader.cpp @@ -21,17 +21,8 @@ using namespace llvm; Optionally returns a human-readable error message via OutMessage. */ LLVMBool LLVMParseBitcode(LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutModule, char **OutMessage) { - std::string Message; - - *OutModule = wrap(ParseBitcodeFile(unwrap(MemBuf), getGlobalContext(), - &Message)); - if (!*OutModule) { - if (OutMessage) - *OutMessage = strdup(Message.c_str()); - return 1; - } - - return 0; + return LLVMParseBitcodeInContext(wrap(&getGlobalContext()), MemBuf, OutModule, + OutMessage); } LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef, @@ -57,18 +48,8 @@ LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef, LLVMBool LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf, LLVMModuleProviderRef *OutMP, char **OutMessage) { - std::string Message; - - *OutMP = wrap(getBitcodeModuleProvider(unwrap(MemBuf), getGlobalContext(), - &Message)); - - if (!*OutMP) { - if (OutMessage) - *OutMessage = strdup(Message.c_str()); - return 1; - } - - return 0; + return LLVMGetBitcodeModuleProviderInContext(wrap(&getGlobalContext()), + MemBuf, OutMP, OutMessage); } LLVMBool LLVMGetBitcodeModuleProviderInContext(LLVMContextRef ContextRef, @@ -77,8 +58,8 @@ LLVMBool LLVMGetBitcodeModuleProviderInContext(LLVMContextRef ContextRef, char **OutMessage) { std::string Message; - *OutMP = wrap(getBitcodeModuleProvider(unwrap(MemBuf), *unwrap(ContextRef), - &Message)); + *OutMP = reinterpret_cast<LLVMModuleProviderRef>( + getLazyBitcodeModule(unwrap(MemBuf), *unwrap(ContextRef), &Message)); if (!*OutMP) { if (OutMessage) *OutMessage = strdup(Message.c_str()); diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 2549a51..cebfbf6 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -28,7 +28,8 @@ using namespace llvm; void BitcodeReader::FreeState() { - delete Buffer; + if (BufferOwned) + delete Buffer; Buffer = 0; std::vector<PATypeHolder>().swap(TypeList); ValueList.clear(); @@ -107,17 +108,17 @@ static int GetDecodedBinaryOpcode(unsigned Val, const Type *Ty) { switch (Val) { default: return -1; case bitc::BINOP_ADD: - return Ty->isFPOrFPVector() ? Instruction::FAdd : Instruction::Add; + return Ty->isFPOrFPVectorTy() ? Instruction::FAdd : Instruction::Add; case bitc::BINOP_SUB: - return Ty->isFPOrFPVector() ? Instruction::FSub : Instruction::Sub; + return Ty->isFPOrFPVectorTy() ? Instruction::FSub : Instruction::Sub; case bitc::BINOP_MUL: - return Ty->isFPOrFPVector() ? Instruction::FMul : Instruction::Mul; + return Ty->isFPOrFPVectorTy() ? Instruction::FMul : Instruction::Mul; case bitc::BINOP_UDIV: return Instruction::UDiv; case bitc::BINOP_SDIV: - return Ty->isFPOrFPVector() ? Instruction::FDiv : Instruction::SDiv; + return Ty->isFPOrFPVectorTy() ? Instruction::FDiv : Instruction::SDiv; case bitc::BINOP_UREM: return Instruction::URem; case bitc::BINOP_SREM: - return Ty->isFPOrFPVector() ? Instruction::FRem : Instruction::SRem; + return Ty->isFPOrFPVectorTy() ? Instruction::FRem : Instruction::SRem; case bitc::BINOP_SHL: return Instruction::Shl; case bitc::BINOP_LSHR: return Instruction::LShr; case bitc::BINOP_ASHR: return Instruction::AShr; @@ -584,6 +585,13 @@ bool BitcodeReader::ParseTypeTable() { ResultTy = StructType::get(Context, EltTys, Record[0]); break; } + case bitc::TYPE_CODE_UNION: { // UNION: [eltty x N] + SmallVector<const Type*, 8> EltTys; + for (unsigned i = 0, e = Record.size(); i != e; ++i) + EltTys.push_back(getTypeByID(Record[i], true)); + ResultTy = UnionType::get(&EltTys[0], EltTys.size()); + break; + } case bitc::TYPE_CODE_ARRAY: // ARRAY: [numelts, eltty] if (Record.size() < 2) return Error("Invalid ARRAY type record"); @@ -1167,7 +1175,7 @@ bool BitcodeReader::ParseConstants() { Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy); Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy); - if (OpTy->isFPOrFPVector()) + if (OpTy->isFPOrFPVectorTy()) V = ConstantExpr::getFCmp(Record[3], Op0, Op1); else V = ConstantExpr::getICmp(Record[3], Op0, Op1); @@ -1241,11 +1249,7 @@ bool BitcodeReader::RememberAndSkipFunctionBody() { // Save the current stream state. uint64_t CurBit = Stream.GetCurrentBitNo(); - DeferredFunctionInfo[Fn] = std::make_pair(CurBit, Fn->getLinkage()); - - // Set the functions linkage to GhostLinkage so we know it is lazily - // deserialized. - Fn->setLinkage(GlobalValue::GhostLinkage); + DeferredFunctionInfo[Fn] = CurBit; // Skip over the function block for now. if (Stream.SkipBlock()) @@ -1253,17 +1257,10 @@ bool BitcodeReader::RememberAndSkipFunctionBody() { return false; } -bool BitcodeReader::ParseModule(const std::string &ModuleID) { - // Reject multiple MODULE_BLOCK's in a single bitstream. - if (TheModule) - return Error("Multiple MODULE_BLOCKs in same stream"); - +bool BitcodeReader::ParseModule() { if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) return Error("Malformed block record"); - // Otherwise, create the module. - TheModule = new Module(ModuleID, Context); - SmallVector<uint64_t, 64> Record; std::vector<std::string> SectionTable; std::vector<std::string> GCTable; @@ -1520,7 +1517,7 @@ bool BitcodeReader::ParseModule(const std::string &ModuleID) { return Error("Premature end of bitstream"); } -bool BitcodeReader::ParseBitcode() { +bool BitcodeReader::ParseBitcodeInto(Module *M) { TheModule = 0; if (Buffer->getBufferSize() & 3) @@ -1564,7 +1561,11 @@ bool BitcodeReader::ParseBitcode() { return Error("Malformed BlockInfoBlock"); break; case bitc::MODULE_BLOCK_ID: - if (ParseModule(Buffer->getBufferIdentifier())) + // Reject multiple MODULE_BLOCK's in a single bitstream. + if (TheModule) + return Error("Multiple MODULE_BLOCKs in same stream"); + TheModule = M; + if (ParseModule()) return true; break; default: @@ -1702,12 +1703,12 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { if (Opc == Instruction::Add || Opc == Instruction::Sub || Opc == Instruction::Mul) { - if (Record[3] & (1 << bitc::OBO_NO_SIGNED_WRAP)) + if (Record[OpNum] & (1 << bitc::OBO_NO_SIGNED_WRAP)) cast<BinaryOperator>(I)->setHasNoSignedWrap(true); - if (Record[3] & (1 << bitc::OBO_NO_UNSIGNED_WRAP)) + if (Record[OpNum] & (1 << bitc::OBO_NO_UNSIGNED_WRAP)) cast<BinaryOperator>(I)->setHasNoUnsignedWrap(true); } else if (Opc == Instruction::SDiv) { - if (Record[3] & (1 << bitc::SDIV_EXACT)) + if (Record[OpNum] & (1 << bitc::SDIV_EXACT)) cast<BinaryOperator>(I)->setIsExact(true); } } @@ -1891,7 +1892,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { OpNum+1 != Record.size()) return Error("Invalid CMP record"); - if (LHS->getType()->isFPOrFPVector()) + if (LHS->getType()->isFPOrFPVectorTy()) I = new FCmpInst((FCmpInst::Predicate)Record[OpNum], LHS, RHS); else I = new ICmpInst((ICmpInst::Predicate)Record[OpNum], LHS, RHS); @@ -2299,22 +2300,28 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { } //===----------------------------------------------------------------------===// -// ModuleProvider implementation +// GVMaterializer implementation //===----------------------------------------------------------------------===// -bool BitcodeReader::materializeFunction(Function *F, std::string *ErrInfo) { - // If it already is material, ignore the request. - if (!F->hasNotBeenReadFromBitcode()) return false; +bool BitcodeReader::isMaterializable(const GlobalValue *GV) const { + if (const Function *F = dyn_cast<Function>(GV)) { + return F->isDeclaration() && + DeferredFunctionInfo.count(const_cast<Function*>(F)); + } + return false; +} - DenseMap<Function*, std::pair<uint64_t, unsigned> >::iterator DFII = - DeferredFunctionInfo.find(F); +bool BitcodeReader::Materialize(GlobalValue *GV, std::string *ErrInfo) { + Function *F = dyn_cast<Function>(GV); + // If it's not a function or is already material, ignore the request. + if (!F || !F->isMaterializable()) return false; + + DenseMap<Function*, uint64_t>::iterator DFII = DeferredFunctionInfo.find(F); assert(DFII != DeferredFunctionInfo.end() && "Deferred function not found!"); - // Move the bit stream to the saved position of the deferred function body and - // restore the real linkage type for the function. - Stream.JumpToBit(DFII->second.first); - F->setLinkage((GlobalValue::LinkageTypes)DFII->second.second); + // Move the bit stream to the saved position of the deferred function body. + Stream.JumpToBit(DFII->second); if (ParseFunctionBody(F)) { if (ErrInfo) *ErrInfo = ErrorString; @@ -2336,27 +2343,36 @@ bool BitcodeReader::materializeFunction(Function *F, std::string *ErrInfo) { return false; } -void BitcodeReader::dematerializeFunction(Function *F) { - // If this function isn't materialized, or if it is a proto, this is a noop. - if (F->hasNotBeenReadFromBitcode() || F->isDeclaration()) +bool BitcodeReader::isDematerializable(const GlobalValue *GV) const { + const Function *F = dyn_cast<Function>(GV); + if (!F || F->isDeclaration()) + return false; + return DeferredFunctionInfo.count(const_cast<Function*>(F)); +} + +void BitcodeReader::Dematerialize(GlobalValue *GV) { + Function *F = dyn_cast<Function>(GV); + // If this function isn't dematerializable, this is a noop. + if (!F || !isDematerializable(F)) return; assert(DeferredFunctionInfo.count(F) && "No info to read function later?"); // Just forget the function body, we can remat it later. F->deleteBody(); - F->setLinkage(GlobalValue::GhostLinkage); } -Module *BitcodeReader::materializeModule(std::string *ErrInfo) { +bool BitcodeReader::MaterializeModule(Module *M, std::string *ErrInfo) { + assert(M == TheModule && + "Can only Materialize the Module this BitcodeReader is attached to."); // Iterate over the module, deserializing any functions that are still on // disk. for (Module::iterator F = TheModule->begin(), E = TheModule->end(); F != E; ++F) - if (F->hasNotBeenReadFromBitcode() && - materializeFunction(F, ErrInfo)) - return 0; + if (F->isMaterializable() && + Materialize(F, ErrInfo)) + return true; // Upgrade any intrinsic calls that slipped through (should not happen!) and // delete the old functions to clean up. We can't do this unless the entire @@ -2380,19 +2396,7 @@ Module *BitcodeReader::materializeModule(std::string *ErrInfo) { // Check debug info intrinsics. CheckDebugInfoIntrinsics(TheModule); - return TheModule; -} - - -/// This method is provided by the parent ModuleProvde class and overriden -/// here. It simply releases the module from its provided and frees up our -/// state. -/// @brief Release our hold on the generated module -Module *BitcodeReader::releaseModule(std::string *ErrInfo) { - // Since we're losing control of this Module, we must hand it back complete - Module *M = ModuleProvider::releaseModule(ErrInfo); - FreeState(); - return M; + return false; } @@ -2400,45 +2404,41 @@ Module *BitcodeReader::releaseModule(std::string *ErrInfo) { // External interface //===----------------------------------------------------------------------===// -/// getBitcodeModuleProvider - lazy function-at-a-time loading from a file. +/// getLazyBitcodeModule - lazy function-at-a-time loading from a file. /// -ModuleProvider *llvm::getBitcodeModuleProvider(MemoryBuffer *Buffer, - LLVMContext& Context, - std::string *ErrMsg) { +Module *llvm::getLazyBitcodeModule(MemoryBuffer *Buffer, + LLVMContext& Context, + std::string *ErrMsg) { + Module *M = new Module(Buffer->getBufferIdentifier(), Context); BitcodeReader *R = new BitcodeReader(Buffer, Context); - if (R->ParseBitcode()) { + M->setMaterializer(R); + if (R->ParseBitcodeInto(M)) { if (ErrMsg) *ErrMsg = R->getErrorString(); - // Don't let the BitcodeReader dtor delete 'Buffer'. - R->releaseMemoryBuffer(); - delete R; + delete M; // Also deletes R. return 0; } - return R; + // Have the BitcodeReader dtor delete 'Buffer'. + R->setBufferOwned(true); + return M; } /// ParseBitcodeFile - Read the specified bitcode file, returning the module. /// If an error occurs, return null and fill in *ErrMsg if non-null. Module *llvm::ParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext& Context, std::string *ErrMsg){ - BitcodeReader *R; - R = static_cast<BitcodeReader*>(getBitcodeModuleProvider(Buffer, Context, - ErrMsg)); - if (!R) return 0; - - // Read in the entire module. - Module *M = R->materializeModule(ErrMsg); + Module *M = getLazyBitcodeModule(Buffer, Context, ErrMsg); + if (!M) return 0; // Don't let the BitcodeReader dtor delete 'Buffer', regardless of whether // there was an error. - R->releaseMemoryBuffer(); + static_cast<BitcodeReader*>(M->getMaterializer())->setBufferOwned(false); - // If there was no error, tell ModuleProvider not to delete it when its dtor - // is run. - if (M) - M = R->releaseModule(ErrMsg); - - delete R; + // Read in the entire module, and destroy the BitcodeReader. + if (M->MaterializeAllPermanently(ErrMsg)) { + delete M; + return NULL; + } return M; } diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h index bb3961a..55c71f7 100644 --- a/lib/Bitcode/Reader/BitcodeReader.h +++ b/lib/Bitcode/Reader/BitcodeReader.h @@ -14,7 +14,7 @@ #ifndef BITCODE_READER_H #define BITCODE_READER_H -#include "llvm/ModuleProvider.h" +#include "llvm/GVMaterializer.h" #include "llvm/Attributes.h" #include "llvm/Type.h" #include "llvm/OperandTraits.h" @@ -121,9 +121,11 @@ public: void AssignValue(Value *V, unsigned Idx); }; -class BitcodeReader : public ModuleProvider { +class BitcodeReader : public GVMaterializer { LLVMContext &Context; + Module *TheModule; MemoryBuffer *Buffer; + bool BufferOwned; BitstreamReader StreamFile; BitstreamCursor Stream; @@ -160,9 +162,9 @@ class BitcodeReader : public ModuleProvider { bool HasReversedFunctionsWithBodies; /// DeferredFunctionInfo - When function bodies are initially scanned, this - /// map contains info about where to find deferred function body (in the - /// stream) and what linkage the original function had. - DenseMap<Function*, std::pair<uint64_t, unsigned> > DeferredFunctionInfo; + /// map contains info about where to find deferred function body in the + /// stream. + DenseMap<Function*, uint64_t> DeferredFunctionInfo; /// BlockAddrFwdRefs - These are blockaddr references to basic blocks. These /// are resolved lazily when functions are loaded. @@ -171,7 +173,8 @@ class BitcodeReader : public ModuleProvider { public: explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C) - : Context(C), Buffer(buffer), ErrorString(0), ValueList(C), MDValueList(C) { + : Context(C), TheModule(0), Buffer(buffer), BufferOwned(false), + ErrorString(0), ValueList(C), MDValueList(C) { HasReversedFunctionsWithBodies = false; } ~BitcodeReader() { @@ -180,17 +183,15 @@ public: void FreeState(); - /// releaseMemoryBuffer - This causes the reader to completely forget about - /// the memory buffer it contains, which prevents the buffer from being - /// destroyed when it is deleted. - void releaseMemoryBuffer() { - Buffer = 0; - } + /// setBufferOwned - If this is true, the reader will destroy the MemoryBuffer + /// when the reader is destroyed. + void setBufferOwned(bool Owned) { BufferOwned = Owned; } - virtual bool materializeFunction(Function *F, std::string *ErrInfo = 0); - virtual Module *materializeModule(std::string *ErrInfo = 0); - virtual void dematerializeFunction(Function *F); - virtual Module *releaseModule(std::string *ErrInfo = 0); + virtual bool isMaterializable(const GlobalValue *GV) const; + virtual bool isDematerializable(const GlobalValue *GV) const; + virtual bool Materialize(GlobalValue *GV, std::string *ErrInfo = 0); + virtual bool MaterializeModule(Module *M, std::string *ErrInfo = 0); + virtual void Dematerialize(GlobalValue *GV); bool Error(const char *Str) { ErrorString = Str; @@ -200,7 +201,7 @@ public: /// @brief Main interface to parsing a bitcode buffer. /// @returns true if an error occurred. - bool ParseBitcode(); + bool ParseBitcodeInto(Module *M); private: const Type *getTypeByID(unsigned ID, bool isTypeTable = false); Value *getFnValueByID(unsigned ID, const Type *Ty) { @@ -248,7 +249,7 @@ private: } - bool ParseModule(const std::string &ModuleID); + bool ParseModule(); bool ParseAttributeBlock(); bool ParseTypeTable(); bool ParseTypeSymbolTable(); diff --git a/lib/Bitcode/Reader/Makefile b/lib/Bitcode/Reader/Makefile index 0aae3bf..59af8d53 100644 --- a/lib/Bitcode/Reader/Makefile +++ b/lib/Bitcode/Reader/Makefile @@ -10,7 +10,6 @@ LEVEL = ../../.. LIBRARYNAME = LLVMBitReader BUILD_ARCHIVE = 1 -CXXFLAGS = -fno-rtti include $(LEVEL)/Makefile.common diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 5a4a1b2..82e73b5 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -181,6 +181,14 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { Log2_32_Ceil(VE.getTypes().size()+1))); unsigned StructAbbrev = Stream.EmitAbbrev(Abbv); + // Abbrev for TYPE_CODE_UNION. + Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_UNION)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, + Log2_32_Ceil(VE.getTypes().size()+1))); + unsigned UnionAbbrev = Stream.EmitAbbrev(Abbv); + // Abbrev for TYPE_CODE_ARRAY. Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_ARRAY)); @@ -250,6 +258,17 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { AbbrevToUse = StructAbbrev; break; } + case Type::UnionTyID: { + const UnionType *UT = cast<UnionType>(T); + // UNION: [eltty x N] + Code = bitc::TYPE_CODE_UNION; + // Output all of the element types. + for (UnionType::element_iterator I = UT->element_begin(), + E = UT->element_end(); I != E; ++I) + TypeVals.push_back(VE.getTypeID(*I)); + AbbrevToUse = UnionAbbrev; + break; + } case Type::ArrayTyID: { const ArrayType *AT = cast<ArrayType>(T); // ARRAY: [numelts, eltty] @@ -280,7 +299,6 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { static unsigned getEncodedLinkage(const GlobalValue *GV) { switch (GV->getLinkage()) { default: llvm_unreachable("Invalid linkage!"); - case GlobalValue::GhostLinkage: // Map ghost linkage onto external. case GlobalValue::ExternalLinkage: return 0; case GlobalValue::WeakAnyLinkage: return 1; case GlobalValue::AppendingLinkage: return 2; @@ -499,7 +517,7 @@ static void WriteModuleMetadata(const ValueEnumerator &VE, for (unsigned i = 0, e = Vals.size(); i != e; ++i) { if (const MDNode *N = dyn_cast<MDNode>(Vals[i].first)) { - if (!N->isFunctionLocal()) { + if (!N->isFunctionLocal() || !N->getFunction()) { if (!StartedMetadataBlock) { Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3); StartedMetadataBlock = true; @@ -563,7 +581,7 @@ static void WriteFunctionLocalMetadata(const Function &F, for (unsigned i = 0, e = Vals.size(); i != e; ++i) if (const MDNode *N = dyn_cast<MDNode>(Vals[i].first)) - if (N->getFunction() == &F) { + if (N->isFunctionLocal() && N->getFunction() == &F) { if (!StartedMetadataBlock) { Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3); StartedMetadataBlock = true; @@ -790,7 +808,7 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal, else if (isCStr7) AbbrevToUse = CString7Abbrev; } else if (isa<ConstantArray>(C) || isa<ConstantStruct>(V) || - isa<ConstantVector>(V)) { + isa<ConstantUnion>(C) || isa<ConstantVector>(V)) { Code = bitc::CST_CODE_AGGREGATE; for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) Record.push_back(VE.getValueID(C->getOperand(i))); @@ -1511,16 +1529,50 @@ enum { DarwinBCHeaderSize = 5*4 }; +/// isARMTriplet - Return true if the triplet looks like: +/// arm-*, thumb-*, armv[0-9]-*, thumbv[0-9]-*, armv5te-*, or armv6t2-*. +static bool isARMTriplet(const std::string &TT) { + size_t Pos = 0; + size_t Size = TT.size(); + if (Size >= 6 && + TT[0] == 't' && TT[1] == 'h' && TT[2] == 'u' && + TT[3] == 'm' && TT[4] == 'b') + Pos = 5; + else if (Size >= 4 && TT[0] == 'a' && TT[1] == 'r' && TT[2] == 'm') + Pos = 3; + else + return false; + + if (TT[Pos] == '-') + return true; + else if (TT[Pos] == 'v') { + if (Size >= Pos+4 && + TT[Pos+1] == '6' && TT[Pos+2] == 't' && TT[Pos+3] == '2') + return true; + else if (Size >= Pos+4 && + TT[Pos+1] == '5' && TT[Pos+2] == 't' && TT[Pos+3] == 'e') + return true; + } else + return false; + while (++Pos < Size && TT[Pos] != '-') { + if (!isdigit(TT[Pos])) + return false; + } + return true; +} + static void EmitDarwinBCHeader(BitstreamWriter &Stream, const std::string &TT) { unsigned CPUType = ~0U; - // Match x86_64-*, i[3-9]86-*, powerpc-*, powerpc64-*. The CPUType is a - // magic number from /usr/include/mach/machine.h. It is ok to reproduce the + // Match x86_64-*, i[3-9]86-*, powerpc-*, powerpc64-*, arm-*, thumb-*, + // armv[0-9]-*, thumbv[0-9]-*, armv5te-*, or armv6t2-*. The CPUType is a magic + // number from /usr/include/mach/machine.h. It is ok to reproduce the // specific constants here because they are implicitly part of the Darwin ABI. enum { DARWIN_CPU_ARCH_ABI64 = 0x01000000, DARWIN_CPU_TYPE_X86 = 7, + DARWIN_CPU_TYPE_ARM = 12, DARWIN_CPU_TYPE_POWERPC = 18 }; @@ -1533,6 +1585,8 @@ static void EmitDarwinBCHeader(BitstreamWriter &Stream, CPUType = DARWIN_CPU_TYPE_POWERPC; else if (TT.find("powerpc64-") == 0) CPUType = DARWIN_CPU_TYPE_POWERPC | DARWIN_CPU_ARCH_ABI64; + else if (isARMTriplet(TT)) + CPUType = DARWIN_CPU_TYPE_ARM; // Traditional Bitcode starts after header. unsigned BCOffset = DarwinBCHeaderSize; diff --git a/lib/Bitcode/Writer/Makefile b/lib/Bitcode/Writer/Makefile index 5f9742e..7b0bd72 100644 --- a/lib/Bitcode/Writer/Makefile +++ b/lib/Bitcode/Writer/Makefile @@ -10,7 +10,6 @@ LEVEL = ../../.. LIBRARYNAME = LLVMBitWriter BUILD_ARCHIVE = 1 -CXXFLAGS = -fno-rtti include $(LEVEL)/Makefile.common diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp index c46d735..595497f 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -93,7 +93,7 @@ ValueEnumerator::ValueEnumerator(const Module *M) { for (User::const_op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) { if (MDNode *MD = dyn_cast<MDNode>(*OI)) - if (MD->isFunctionLocal()) + if (MD->isFunctionLocal() && MD->getFunction()) // These will get enumerated during function-incorporation. continue; EnumerateOperandType(*OI); @@ -408,21 +408,25 @@ void ValueEnumerator::incorporateFunction(const Function &F) { FirstInstID = Values.size(); + SmallVector<MDNode *, 8> FunctionLocalMDs; // Add all of the instructions. for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) { for (User::const_op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) { if (MDNode *MD = dyn_cast<MDNode>(*OI)) - if (!MD->isFunctionLocal()) - // These were already enumerated during ValueEnumerator creation. - continue; - EnumerateOperandType(*OI); + if (MD->isFunctionLocal() && MD->getFunction()) + // Enumerate metadata after the instructions they might refer to. + FunctionLocalMDs.push_back(MD); } if (!I->getType()->isVoidTy()) EnumerateValue(I); } } + + // Add all of the function-local metadata. + for (unsigned i = 0, e = FunctionLocalMDs.size(); i != e; ++i) + EnumerateOperandType(FunctionLocalMDs[i]); } void ValueEnumerator::purgeFunction() { diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index ca1f4a3..8840622f 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -425,8 +425,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, unsigned Reg = MO.getReg(); if (Reg == 0) continue; // Ignore KILLs and passthru registers for liveness... - if ((MI->getOpcode() == TargetInstrInfo::KILL) || - (PassthruRegs.count(Reg) != 0)) + if (MI->isKill() || (PassthruRegs.count(Reg) != 0)) continue; // Update def for Reg and aliases. @@ -481,7 +480,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, // Form a group of all defs and uses of a KILL instruction to ensure // that all registers are renamed as a group. - if (MI->getOpcode() == TargetInstrInfo::KILL) { + if (MI->isKill()) { DEBUG(dbgs() << "\tKill Group:"); unsigned FirstReg = 0; @@ -792,7 +791,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( // Ignore KILL instructions (they form a group in ScanInstruction // but don't cause any anti-dependence breaking themselves) - if (MI->getOpcode() != TargetInstrInfo::KILL) { + if (!MI->isKill()) { // Attempt to break each anti-dependency... for (unsigned i = 0, e = Edges.size(); i != e; ++i) { SDep *Edge = Edges[i]; diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index f4d8864..fc08384 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "asm-printer" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/Assembly/Writer.h" #include "llvm/DerivedTypes.h" @@ -24,6 +25,7 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -31,10 +33,6 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/FormattedStream.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetData.h" @@ -45,37 +43,27 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/FormattedStream.h" #include <cerrno> using namespace llvm; -static cl::opt<cl::boolOrDefault> -AsmVerbose("asm-verbose", cl::desc("Add comments to directives."), - cl::init(cl::BOU_UNSET)); - -static bool getVerboseAsm(bool VDef) { - switch (AsmVerbose) { - default: - case cl::BOU_UNSET: return VDef; - case cl::BOU_TRUE: return true; - case cl::BOU_FALSE: return false; - } -} +STATISTIC(EmittedInsts, "Number of machine instrs printed"); char AsmPrinter::ID = 0; AsmPrinter::AsmPrinter(formatted_raw_ostream &o, TargetMachine &tm, - const MCAsmInfo *T, bool VDef) - : MachineFunctionPass(&ID), FunctionNumber(0), O(o), + MCContext &Ctx, MCStreamer &Streamer, + const MCAsmInfo *T) + : MachineFunctionPass(&ID), O(o), TM(tm), MAI(T), TRI(tm.getRegisterInfo()), - - OutContext(*new MCContext()), - // FIXME: Pass instprinter to streamer. - OutStreamer(*createAsmStreamer(OutContext, O, *T, - TM.getTargetData()->isLittleEndian(), - getVerboseAsm(VDef), 0)), - + OutContext(Ctx), OutStreamer(Streamer), LastMI(0), LastFn(0), Counter(~0U), PrevDLT(NULL) { DW = 0; MMI = 0; - VerboseAsm = getVerboseAsm(VDef); + VerboseAsm = Streamer.isVerboseAsm(); } AsmPrinter::~AsmPrinter() { @@ -87,6 +75,12 @@ AsmPrinter::~AsmPrinter() { delete &OutContext; } +/// getFunctionNumber - Return a unique ID for the current function. +/// +unsigned AsmPrinter::getFunctionNumber() const { + return MF->getFunctionNumber(); +} + TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const { return TM.getTargetLowering()->getObjFileLowering(); } @@ -115,11 +109,11 @@ bool AsmPrinter::doInitialization(Module &M) { // Allow the target to emit any magic that it wants at the start of the file. EmitStartOfAsmFile(M); + // Very minimal debug info. It is ignored if we emit actual debug info. If we + // don't, this at least helps the user find where a global came from. if (MAI->hasSingleParameterDotFile()) { - // Very minimal debug info. It is ignored if we emit actual - // debug info. If we don't, this at least helps the user find where - // a function came from. - O << "\t.file\t\"" << M.getModuleIdentifier() << "\"\n"; + // .file "foo.c" + OutStreamer.EmitFileDirective(M.getModuleIdentifier()); } GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); @@ -144,6 +138,52 @@ bool AsmPrinter::doInitialization(Module &M) { return false; } +void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const { + switch ((GlobalValue::LinkageTypes)Linkage) { + case GlobalValue::CommonLinkage: + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: + case GlobalValue::LinkerPrivateLinkage: + if (MAI->getWeakDefDirective() != 0) { + // .globl _foo + OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); + // .weak_definition _foo + OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition); + } else if (const char *LinkOnce = MAI->getLinkOnceDirective()) { + // .globl _foo + OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); + // FIXME: linkonce should be a section attribute, handled by COFF Section + // assignment. + // http://sourceware.org/binutils/docs-2.20/as/Linkonce.html#Linkonce + // .linkonce discard + // FIXME: It would be nice to use .linkonce samesize for non-common + // globals. + O << LinkOnce; + } else { + // .weak _foo + OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak); + } + break; + case GlobalValue::DLLExportLinkage: + case GlobalValue::AppendingLinkage: + // FIXME: appending linkage variables should go into a section of + // their name or something. For now, just emit them as external. + case GlobalValue::ExternalLinkage: + // If external or appending, declare as a global symbol. + // .globl _foo + OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); + break; + case GlobalValue::PrivateLinkage: + case GlobalValue::InternalLinkage: + break; + default: + llvm_unreachable("Unknown linkage type!"); + } +} + + /// EmitGlobalVariable - Emit the specified global variable to the .s file. void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { if (!GV->hasInitializer()) // External globals require no code. @@ -154,15 +194,10 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { return; MCSymbol *GVSym = GetGlobalValueSymbol(GV); - printVisibility(GVSym, GV->getVisibility()); + EmitVisibility(GVSym, GV->getVisibility()); - if (MAI->hasDotTypeDotSizeDirective()) { - O << "\t.type\t" << *GVSym; - if (MAI->getCommentString()[0] != '@') - O << ",@object\n"; - else - O << ",%object\n"; - } + if (MAI->hasDotTypeDotSizeDirective()) + OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject); SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM); @@ -224,47 +259,9 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { OutStreamer.SwitchSection(TheSection); - // TODO: Factor into an 'emit linkage' thing that is shared with function - // bodies. - switch (GV->getLinkage()) { - case GlobalValue::CommonLinkage: - case GlobalValue::LinkOnceAnyLinkage: - case GlobalValue::LinkOnceODRLinkage: - case GlobalValue::WeakAnyLinkage: - case GlobalValue::WeakODRLinkage: - case GlobalValue::LinkerPrivateLinkage: - if (MAI->getWeakDefDirective() != 0) { - // .globl _foo - OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); - // .weak_definition _foo - OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition); - } else if (const char *LinkOnce = MAI->getLinkOnceDirective()) { - // .globl _foo - OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); - // .linkonce same_size - O << LinkOnce; - } else { - // .weak _foo - OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak); - } - break; - case GlobalValue::DLLExportLinkage: - case GlobalValue::AppendingLinkage: - // FIXME: appending linkage variables should go into a section of - // their name or something. For now, just emit them as external. - case GlobalValue::ExternalLinkage: - // If external or appending, declare as a global symbol. - // .globl _foo - OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); - break; - case GlobalValue::PrivateLinkage: - case GlobalValue::InternalLinkage: - break; - default: - llvm_unreachable("Unknown linkage type!"); - } - + EmitLinkage(GV->getLinkage(), GVSym); EmitAlignment(AlignLog, GV); + if (VerboseAsm) { WriteAsOperand(OutStreamer.GetCommentOS(), GV, /*PrintType=*/false, GV->getParent()); @@ -275,7 +272,185 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { EmitGlobalConstant(GV->getInitializer()); if (MAI->hasDotTypeDotSizeDirective()) - O << "\t.size\t" << *GVSym << ", " << Size << '\n'; + // .size foo, 42 + OutStreamer.EmitELFSize(GVSym, MCConstantExpr::Create(Size, OutContext)); + + OutStreamer.AddBlankLine(); +} + +/// EmitFunctionHeader - This method emits the header for the current +/// function. +void AsmPrinter::EmitFunctionHeader() { + // Print out constants referenced by the function + EmitConstantPool(); + + // Print the 'header' of function. + const Function *F = MF->getFunction(); + + OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM)); + EmitVisibility(CurrentFnSym, F->getVisibility()); + + EmitLinkage(F->getLinkage(), CurrentFnSym); + EmitAlignment(MF->getAlignment(), F); + + if (MAI->hasDotTypeDotSizeDirective()) + OutStreamer.EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction); + + if (VerboseAsm) { + WriteAsOperand(OutStreamer.GetCommentOS(), F, + /*PrintType=*/false, F->getParent()); + OutStreamer.GetCommentOS() << '\n'; + } + + // Emit the CurrentFnSym. This is a virtual function to allow targets to + // do their wild and crazy things as required. + EmitFunctionEntryLabel(); + + // Add some workaround for linkonce linkage on Cygwin\MinGW. + if (MAI->getLinkOnceDirective() != 0 && + (F->hasLinkOnceLinkage() || F->hasWeakLinkage())) + // FIXME: What is this? + O << "Lllvm$workaround$fake$stub$" << *CurrentFnSym << ":\n"; + + // Emit pre-function debug and/or EH information. + if (MAI->doesSupportDebugInformation() || MAI->doesSupportExceptionHandling()) + DW->BeginFunction(MF); +} + +/// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the +/// function. This can be overridden by targets as required to do custom stuff. +void AsmPrinter::EmitFunctionEntryLabel() { + OutStreamer.EmitLabel(CurrentFnSym); +} + + +/// EmitComments - Pretty-print comments for instructions. +static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) { + const MachineFunction *MF = MI.getParent()->getParent(); + const TargetMachine &TM = MF->getTarget(); + + if (!MI.getDebugLoc().isUnknown()) { + DILocation DLT = MF->getDILocation(MI.getDebugLoc()); + + // Print source line info. + DIScope Scope = DLT.getScope(); + // Omit the directory, because it's likely to be long and uninteresting. + if (!Scope.isNull()) + CommentOS << Scope.getFilename(); + else + CommentOS << "<unknown>"; + CommentOS << ':' << DLT.getLineNumber(); + if (DLT.getColumnNumber() != 0) + CommentOS << ':' << DLT.getColumnNumber(); + CommentOS << '\n'; + } + + // Check for spills and reloads + int FI; + + const MachineFrameInfo *FrameInfo = MF->getFrameInfo(); + + // We assume a single instruction only has a spill or reload, not + // both. + const MachineMemOperand *MMO; + if (TM.getInstrInfo()->isLoadFromStackSlotPostFE(&MI, FI)) { + if (FrameInfo->isSpillSlotObjectIndex(FI)) { + MMO = *MI.memoperands_begin(); + CommentOS << MMO->getSize() << "-byte Reload\n"; + } + } else if (TM.getInstrInfo()->hasLoadFromStackSlot(&MI, MMO, FI)) { + if (FrameInfo->isSpillSlotObjectIndex(FI)) + CommentOS << MMO->getSize() << "-byte Folded Reload\n"; + } else if (TM.getInstrInfo()->isStoreToStackSlotPostFE(&MI, FI)) { + if (FrameInfo->isSpillSlotObjectIndex(FI)) { + MMO = *MI.memoperands_begin(); + CommentOS << MMO->getSize() << "-byte Spill\n"; + } + } else if (TM.getInstrInfo()->hasStoreToStackSlot(&MI, MMO, FI)) { + if (FrameInfo->isSpillSlotObjectIndex(FI)) + CommentOS << MMO->getSize() << "-byte Folded Spill\n"; + } + + // Check for spill-induced copies + unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; + if (TM.getInstrInfo()->isMoveInstr(MI, SrcReg, DstReg, + SrcSubIdx, DstSubIdx)) { + if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse)) + CommentOS << " Reload Reuse\n"; + } +} + + + +/// EmitFunctionBody - This method emits the body and trailer for a +/// function. +void AsmPrinter::EmitFunctionBody() { + // Emit target-specific gunk before the function body. + EmitFunctionBodyStart(); + + // Print out code for the function. + bool HasAnyRealCode = false; + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); + I != E; ++I) { + // Print a label for the basic block. + EmitBasicBlockStart(I); + for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); + II != IE; ++II) { + // Print the assembly for the instruction. + if (!II->isLabel()) + HasAnyRealCode = true; + + ++EmittedInsts; + + // FIXME: Clean up processDebugLoc. + processDebugLoc(II, true); + + if (VerboseAsm) + EmitComments(*II, OutStreamer.GetCommentOS()); + + switch (II->getOpcode()) { + case TargetOpcode::DBG_LABEL: + case TargetOpcode::EH_LABEL: + case TargetOpcode::GC_LABEL: + printLabelInst(II); + break; + case TargetOpcode::INLINEASM: + printInlineAsm(II); + break; + case TargetOpcode::IMPLICIT_DEF: + printImplicitDef(II); + break; + case TargetOpcode::KILL: + printKill(II); + break; + default: + EmitInstruction(II); + break; + } + + // FIXME: Clean up processDebugLoc. + processDebugLoc(II, false); + } + } + + // If the function is empty and the object file uses .subsections_via_symbols, + // then we need to emit *something* to the function body to prevent the + // labels from collapsing together. Just emit a 0 byte. + if (MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode) + OutStreamer.EmitIntValue(0, 1, 0/*addrspace*/); + + // Emit target-specific gunk after the function body. + EmitFunctionBodyEnd(); + + if (MAI->hasDotTypeDotSizeDirective()) + O << "\t.size\t" << *CurrentFnSym << ", .-" << *CurrentFnSym << '\n'; + + // Emit post-function debug information. + if (MAI->doesSupportDebugInformation() || MAI->doesSupportExceptionHandling()) + DW->EndFunction(MF); + + // Print out jump tables referenced by the function. + EmitJumpTableInfo(); OutStreamer.AddBlankLine(); } @@ -313,7 +488,7 @@ bool AsmPrinter::doFinalization(Module &M) { } } - if (MAI->getSetDirective()) { + if (MAI->hasSetDirective()) { OutStreamer.AddBlankLine(); for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end(); I != E; ++I) { @@ -329,9 +504,11 @@ bool AsmPrinter::doFinalization(Module &M) { else assert(I->hasLocalLinkage() && "Invalid alias linkage"); - printVisibility(Name, I->getVisibility()); + EmitVisibility(Name, I->getVisibility()); - O << MAI->getSetDirective() << ' ' << *Name << ", " << *Target << '\n'; + // Emit the directives as assignments aka .set: + OutStreamer.EmitAssignment(Name, + MCSymbolRefExpr::Create(Target, OutContext)); } } @@ -360,9 +537,9 @@ bool AsmPrinter::doFinalization(Module &M) { } void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { + this->MF = &MF; // Get the function symbol. CurrentFnSym = GetGlobalValueSymbol(MF.getFunction()); - IncrementFunctionNumber(); if (VerboseAsm) LI = &getAnalysis<MachineLoopInfo>(); @@ -383,7 +560,8 @@ namespace { /// used to print out constants which have been "spilled to memory" by /// the code generator. /// -void AsmPrinter::EmitConstantPool(MachineConstantPool *MCP) { +void AsmPrinter::EmitConstantPool() { + const MachineConstantPool *MCP = MF->getConstantPool(); const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants(); if (CP.empty()) return; @@ -470,27 +648,26 @@ void AsmPrinter::EmitConstantPool(MachineConstantPool *MCP) { /// EmitJumpTableInfo - Print assembly representations of the jump tables used /// by the current function to the current output stream. /// -void AsmPrinter::EmitJumpTableInfo(MachineJumpTableInfo *MJTI, - MachineFunction &MF) { +void AsmPrinter::EmitJumpTableInfo() { + const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); + if (MJTI == 0) return; const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); if (JT.empty()) return; - bool IsPic = TM.getRelocationModel() == Reloc::PIC_; - // Pick the directive to use to print the jump table entries, and switch to // the appropriate section. - TargetLowering *LoweringInfo = TM.getTargetLowering(); - - const Function *F = MF.getFunction(); + const Function *F = MF->getFunction(); bool JTInDiffSection = false; - if (F->isWeakForLinker() || - (IsPic && !LoweringInfo->usesGlobalOffsetTable())) { - // In PIC mode, we need to emit the jump table to the same section as the - // function body itself, otherwise the label differences won't make sense. - // We should also do if the section name is NULL or function is declared in - // discardable section. - OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, - TM)); + if (// In PIC mode, we need to emit the jump table to the same section as the + // function body itself, otherwise the label differences won't make sense. + // FIXME: Need a better predicate for this: what about custom entries? + MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 || + // We should also do if the section name is NULL or function is declared + // in discardable section + // FIXME: this isn't the right predicate, should be based on the MCSection + // for the function. + F->isWeakForLinker()) { + OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F,Mang,TM)); } else { // Otherwise, drop it in the readonly section. const MCSection *ReadOnlySection = @@ -498,73 +675,106 @@ void AsmPrinter::EmitJumpTableInfo(MachineJumpTableInfo *MJTI, OutStreamer.SwitchSection(ReadOnlySection); JTInDiffSection = true; } + + EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getTargetData()))); - EmitAlignment(Log2_32(MJTI->getAlignment())); - - for (unsigned i = 0, e = JT.size(); i != e; ++i) { - const std::vector<MachineBasicBlock*> &JTBBs = JT[i].MBBs; + for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) { + const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; // If this jump table was deleted, ignore it. if (JTBBs.empty()) continue; - // For PIC codegen, if possible we want to use the SetDirective to reduce - // the number of relocations the assembler will generate for the jump table. - // Set directives are all printed before the jump table itself. - SmallPtrSet<MachineBasicBlock*, 16> EmittedSets; - if (MAI->getSetDirective() && IsPic) - for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) - if (EmittedSets.insert(JTBBs[ii])) - printPICJumpTableSetLabel(i, JTBBs[ii]); + // For the EK_LabelDifference32 entry, if the target supports .set, emit a + // .set directive for each unique entry. This reduces the number of + // relocations the assembler will generate for the jump table. + if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 && + MAI->hasSetDirective()) { + SmallPtrSet<const MachineBasicBlock*, 16> EmittedSets; + const TargetLowering *TLI = TM.getTargetLowering(); + const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF,JTI,OutContext); + for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) { + const MachineBasicBlock *MBB = JTBBs[ii]; + if (!EmittedSets.insert(MBB)) continue; + + // .set LJTSet, LBB32-base + const MCExpr *LHS = + MCSymbolRefExpr::Create(MBB->getSymbol(OutContext), OutContext); + OutStreamer.EmitAssignment(GetJTSetSymbol(JTI, MBB->getNumber()), + MCBinaryExpr::CreateSub(LHS, Base, OutContext)); + } + } // On some targets (e.g. Darwin) we want to emit two consequtive labels // before each jump table. The first label is never referenced, but tells // the assembler and linker the extents of the jump table object. The // second label is actually referenced by the code. if (JTInDiffSection && MAI->getLinkerPrivateGlobalPrefix()[0]) - OutStreamer.EmitLabel(GetJTISymbol(i, true)); + // FIXME: This doesn't have to have any specific name, just any randomly + // named and numbered 'l' label would work. Simplify GetJTISymbol. + OutStreamer.EmitLabel(GetJTISymbol(JTI, true)); - OutStreamer.EmitLabel(GetJTISymbol(i)); + OutStreamer.EmitLabel(GetJTISymbol(JTI)); - for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) { - printPICJumpTableEntry(MJTI, JTBBs[ii], i); - O << '\n'; - } + for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) + EmitJumpTableEntry(MJTI, JTBBs[ii], JTI); } } -void AsmPrinter::printPICJumpTableEntry(const MachineJumpTableInfo *MJTI, - const MachineBasicBlock *MBB, - unsigned uid) const { - bool isPIC = TM.getRelocationModel() == Reloc::PIC_; - - // Use JumpTableDirective otherwise honor the entry size from the jump table - // info. - const char *JTEntryDirective = MAI->getJumpTableDirective(isPIC); - bool HadJTEntryDirective = JTEntryDirective != NULL; - if (!HadJTEntryDirective) { - JTEntryDirective = MJTI->getEntrySize() == 4 ? - MAI->getData32bitsDirective() : MAI->getData64bitsDirective(); +/// EmitJumpTableEntry - Emit a jump table entry for the specified MBB to the +/// current stream. +void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, + const MachineBasicBlock *MBB, + unsigned UID) const { + const MCExpr *Value = 0; + switch (MJTI->getEntryKind()) { + case MachineJumpTableInfo::EK_Custom32: + Value = TM.getTargetLowering()->LowerCustomJumpTableEntry(MJTI, MBB, UID, + OutContext); + break; + case MachineJumpTableInfo::EK_BlockAddress: + // EK_BlockAddress - Each entry is a plain address of block, e.g.: + // .word LBB123 + Value = MCSymbolRefExpr::Create(MBB->getSymbol(OutContext), OutContext); + break; + case MachineJumpTableInfo::EK_GPRel32BlockAddress: { + // EK_GPRel32BlockAddress - Each entry is an address of block, encoded + // with a relocation as gp-relative, e.g.: + // .gprel32 LBB123 + MCSymbol *MBBSym = MBB->getSymbol(OutContext); + OutStreamer.EmitGPRel32Value(MCSymbolRefExpr::Create(MBBSym, OutContext)); + return; } - O << JTEntryDirective << ' '; - - // If we have emitted set directives for the jump table entries, print - // them rather than the entries themselves. If we're emitting PIC, then - // emit the table entries as differences between two text section labels. - // If we're emitting non-PIC code, then emit the entries as direct - // references to the target basic blocks. - if (!isPIC) { - O << *GetMBBSymbol(MBB->getNumber()); - } else if (MAI->getSetDirective()) { - O << MAI->getPrivateGlobalPrefix() << getFunctionNumber() - << '_' << uid << "_set_" << MBB->getNumber(); - } else { - O << *GetMBBSymbol(MBB->getNumber()); - // If the arch uses custom Jump Table directives, don't calc relative to - // JT. - if (!HadJTEntryDirective) - O << '-' << *GetJTISymbol(uid); + case MachineJumpTableInfo::EK_LabelDifference32: { + // EK_LabelDifference32 - Each entry is the address of the block minus + // the address of the jump table. This is used for PIC jump tables where + // gprel32 is not supported. e.g.: + // .word LBB123 - LJTI1_2 + // If the .set directive is supported, this is emitted as: + // .set L4_5_set_123, LBB123 - LJTI1_2 + // .word L4_5_set_123 + + // If we have emitted set directives for the jump table entries, print + // them rather than the entries themselves. If we're emitting PIC, then + // emit the table entries as differences between two text section labels. + if (MAI->hasSetDirective()) { + // If we used .set, reference the .set's symbol. + Value = MCSymbolRefExpr::Create(GetJTSetSymbol(UID, MBB->getNumber()), + OutContext); + break; + } + // Otherwise, use the difference as the jump table entry. + Value = MCSymbolRefExpr::Create(MBB->getSymbol(OutContext), OutContext); + const MCExpr *JTI = MCSymbolRefExpr::Create(GetJTISymbol(UID), OutContext); + Value = MCBinaryExpr::CreateSub(Value, JTI, OutContext); + break; + } } + + assert(Value && "Unknown entry kind!"); + + unsigned EntrySize = MJTI->getEntrySize(*TM.getTargetData()); + OutStreamer.EmitValue(Value, EntrySize, /*addrspace*/0); } @@ -683,48 +893,6 @@ void AsmPrinter::EmitInt64(uint64_t Value) const { OutStreamer.EmitIntValue(Value, 8, 0/*addrspace*/); } - -/// toOctal - Convert the low order bits of X into an octal digit. -/// -static inline char toOctal(int X) { - return (X&7)+'0'; -} - -/// printStringChar - Print a char, escaped if necessary. -/// -static void printStringChar(formatted_raw_ostream &O, unsigned char C) { - if (C == '"') { - O << "\\\""; - } else if (C == '\\') { - O << "\\\\"; - } else if (isprint((unsigned char)C)) { - O << C; - } else { - switch(C) { - case '\b': O << "\\b"; break; - case '\f': O << "\\f"; break; - case '\n': O << "\\n"; break; - case '\r': O << "\\r"; break; - case '\t': O << "\\t"; break; - default: - O << '\\'; - O << toOctal(C >> 6); - O << toOctal(C >> 3); - O << toOctal(C >> 0); - break; - } - } -} - -/// EmitFile - Emit a .file directive. -void AsmPrinter::EmitFile(unsigned Number, StringRef Name) const { - O << "\t.file\t" << Number << " \""; - for (unsigned i = 0, N = Name.size(); i < N; ++i) - printStringChar(O, Name[i]); - O << '\"'; -} - - //===----------------------------------------------------------------------===// // EmitAlignment - Emit an alignment directive to the specified power of @@ -779,15 +947,18 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { } switch (CE->getOpcode()) { - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::FPTrunc: - case Instruction::FPExt: - case Instruction::UIToFP: - case Instruction::SIToFP: - case Instruction::FPToUI: - case Instruction::FPToSI: - default: llvm_unreachable("FIXME: Don't support this constant cast expr"); + default: + // If the code isn't optimized, there may be outstanding folding + // opportunities. Attempt to fold the expression using TargetData as a + // last resort before giving up. + if (Constant *C = + ConstantFoldConstantExpression(CE, AP.TM.getTargetData())) + if (C != CE) + return LowerConstant(C, AP); +#ifndef NDEBUG + CE->dump(); +#endif + llvm_unreachable("FIXME: Don't support this constant expr"); case Instruction::GetElementPtr: { const TargetData &TD = *AP.TM.getTargetData(); // Generate a symbolic expression for the byte address @@ -851,8 +1022,14 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx); } + // The MC library also has a right-shift operator, but it isn't consistently + // signed or unsigned between different targets. case Instruction::Add: case Instruction::Sub: + case Instruction::Mul: + case Instruction::SDiv: + case Instruction::SRem: + case Instruction::Shl: case Instruction::And: case Instruction::Or: case Instruction::Xor: { @@ -862,6 +1039,10 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { default: llvm_unreachable("Unknown binary operator constant cast expr"); case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx); case Instruction::Sub: return MCBinaryExpr::CreateSub(LHS, RHS, Ctx); + case Instruction::Mul: return MCBinaryExpr::CreateMul(LHS, RHS, Ctx); + case Instruction::SDiv: return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx); + case Instruction::SRem: return MCBinaryExpr::CreateMod(LHS, RHS, Ctx); + case Instruction::Shl: return MCBinaryExpr::CreateShl(LHS, RHS, Ctx); case Instruction::And: return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx); case Instruction::Or: return MCBinaryExpr::CreateOr (LHS, RHS, Ctx); case Instruction::Xor: return MCBinaryExpr::CreateXor(LHS, RHS, Ctx); @@ -1012,6 +1193,7 @@ static void EmitGlobalConstantLargeInt(const ConstantInt *CI, void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) { if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV)) { uint64_t Size = TM.getTargetData()->getTypeAllocSize(CV->getType()); + if (Size == 0) Size = 1; // An empty "_foo:" followed by a section is undef. return OutStreamer.EmitZeros(Size, AddrSpace); } @@ -1295,7 +1477,7 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const { ++OpNo; // Skip over the ID number. if (Modifier[0] == 'l') // labels are target independent - O << *GetMBBSymbol(MI->getOperand(OpNo).getMBB()->getNumber()); + O << *MI->getOperand(OpNo).getMBB()->getSymbol(OutContext); else { AsmPrinter *AP = const_cast<AsmPrinter*>(this); if ((OpFlags & 7) == 4) { @@ -1320,6 +1502,7 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const { } } O << "\n\t" << MAI->getCommentString() << MAI->getInlineAsmEnd(); + OutStreamer.AddBlankLine(); } /// printImplicitDef - This method prints the specified machine instruction @@ -1329,6 +1512,7 @@ void AsmPrinter::printImplicitDef(const MachineInstr *MI) const { O.PadToColumn(MAI->getCommentColumn()); O << MAI->getCommentString() << " implicit-def: " << TRI->getName(MI->getOperand(0).getReg()); + OutStreamer.AddBlankLine(); } void AsmPrinter::printKill(const MachineInstr *MI) const { @@ -1340,12 +1524,14 @@ void AsmPrinter::printKill(const MachineInstr *MI) const { assert(op.isReg() && "KILL instruction must have only register operands"); O << ' ' << TRI->getName(op.getReg()) << (op.isDef() ? "<def>" : "<kill>"); } + OutStreamer.AddBlankLine(); } /// printLabel - This method prints a local label used by debug and /// exception handling tables. -void AsmPrinter::printLabel(const MachineInstr *MI) const { +void AsmPrinter::printLabelInst(const MachineInstr *MI) const { printLabel(MI->getOperand(0).getImm()); + OutStreamer.AddBlankLine(); } void AsmPrinter::printLabel(unsigned Id) const { @@ -1368,14 +1554,12 @@ bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, return true; } -MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BlockAddress *BA, - const char *Suffix) const { - return GetBlockAddressSymbol(BA->getFunction(), BA->getBasicBlock(), Suffix); +MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BlockAddress *BA) const { + return GetBlockAddressSymbol(BA->getFunction(), BA->getBasicBlock()); } MCSymbol *AsmPrinter::GetBlockAddressSymbol(const Function *F, - const BasicBlock *BB, - const char *Suffix) const { + const BasicBlock *BB) const { assert(BB->hasName() && "Address of anonymous basic block not supported yet!"); @@ -1389,19 +1573,12 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const Function *F, SmallString<60> NameResult; Mang->getNameWithPrefix(NameResult, StringRef("BA") + Twine((unsigned)FnName.size()) + - "_" + FnName.str() + "_" + BB->getName() + Suffix, + "_" + FnName.str() + "_" + BB->getName(), Mangler::Private); return OutContext.GetOrCreateSymbol(NameResult.str()); } -MCSymbol *AsmPrinter::GetMBBSymbol(unsigned MBBID) const { - SmallString<60> Name; - raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "BB" - << getFunctionNumber() << '_' << MBBID; - return OutContext.GetOrCreateSymbol(Name.str()); -} - /// GetCPISymbol - Return the symbol for the specified constant pool entry. MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const { SmallString<60> Name; @@ -1412,11 +1589,15 @@ MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const { /// GetJTISymbol - Return the symbol for the specified jump table entry. MCSymbol *AsmPrinter::GetJTISymbol(unsigned JTID, bool isLinkerPrivate) const { - const char *Prefix = isLinkerPrivate ? MAI->getLinkerPrivateGlobalPrefix() : - MAI->getPrivateGlobalPrefix(); + return MF->getJTISymbol(JTID, OutContext, isLinkerPrivate); +} + +/// GetJTSetSymbol - Return the symbol for the specified jump table .set +/// FIXME: privatize to AsmPrinter. +MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const { SmallString<60> Name; - raw_svector_ostream(Name) << Prefix << "JTI" << getFunctionNumber() << '_' - << JTID; + raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() + << getFunctionNumber() << '_' << UID << "_set_" << MBBID; return OutContext.GetOrCreateSymbol(Name.str()); } @@ -1476,7 +1657,7 @@ static void PrintChildLoopComment(raw_ostream &OS, const MachineLoop *Loop, } } -/// EmitComments - Pretty-print comments for basic blocks. +/// PrintBasicBlockLoopComments - Pretty-print comments for basic blocks. static void PrintBasicBlockLoopComments(const MachineBasicBlock &MBB, const MachineLoopInfo *LI, const AsmPrinter &AP) { @@ -1555,37 +1736,11 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const { PrintBasicBlockLoopComments(*MBB, LI, *this); } - OutStreamer.EmitLabel(GetMBBSymbol(MBB->getNumber())); + OutStreamer.EmitLabel(MBB->getSymbol(OutContext)); } } -/// printPICJumpTableSetLabel - This method prints a set label for the -/// specified MachineBasicBlock for a jumptable entry. -void AsmPrinter::printPICJumpTableSetLabel(unsigned uid, - const MachineBasicBlock *MBB) const { - if (!MAI->getSetDirective()) - return; - - O << MAI->getSetDirective() << ' ' << MAI->getPrivateGlobalPrefix() - << getFunctionNumber() << '_' << uid << "_set_" << MBB->getNumber() << ',' - << *GetMBBSymbol(MBB->getNumber()) - << '-' << *GetJTISymbol(uid) << '\n'; -} - -void AsmPrinter::printPICJumpTableSetLabel(unsigned uid, unsigned uid2, - const MachineBasicBlock *MBB) const { - if (!MAI->getSetDirective()) - return; - - O << MAI->getSetDirective() << ' ' << MAI->getPrivateGlobalPrefix() - << getFunctionNumber() << '_' << uid << '_' << uid2 - << "_set_" << MBB->getNumber() << ',' - << *GetMBBSymbol(MBB->getNumber()) - << '-' << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() - << '_' << uid << '_' << uid2 << '\n'; -} - -void AsmPrinter::printVisibility(MCSymbol *Sym, unsigned Visibility) const { +void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility) const { MCSymbolAttr Attr = MCSA_Invalid; switch (Visibility) { @@ -1633,86 +1788,3 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) { return 0; } -/// EmitComments - Pretty-print comments for instructions -void AsmPrinter::EmitComments(const MachineInstr &MI) const { - if (!VerboseAsm) - return; - - bool Newline = false; - - if (!MI.getDebugLoc().isUnknown()) { - DILocation DLT = MF->getDILocation(MI.getDebugLoc()); - - // Print source line info. - O.PadToColumn(MAI->getCommentColumn()); - O << MAI->getCommentString() << ' '; - DIScope Scope = DLT.getScope(); - // Omit the directory, because it's likely to be long and uninteresting. - if (!Scope.isNull()) - O << Scope.getFilename(); - else - O << "<unknown>"; - O << ':' << DLT.getLineNumber(); - if (DLT.getColumnNumber() != 0) - O << ':' << DLT.getColumnNumber(); - Newline = true; - } - - // Check for spills and reloads - int FI; - - const MachineFrameInfo *FrameInfo = - MI.getParent()->getParent()->getFrameInfo(); - - // We assume a single instruction only has a spill or reload, not - // both. - const MachineMemOperand *MMO; - if (TM.getInstrInfo()->isLoadFromStackSlotPostFE(&MI, FI)) { - if (FrameInfo->isSpillSlotObjectIndex(FI)) { - MMO = *MI.memoperands_begin(); - if (Newline) O << '\n'; - O.PadToColumn(MAI->getCommentColumn()); - O << MAI->getCommentString() << ' ' << MMO->getSize() << "-byte Reload"; - Newline = true; - } - } - else if (TM.getInstrInfo()->hasLoadFromStackSlot(&MI, MMO, FI)) { - if (FrameInfo->isSpillSlotObjectIndex(FI)) { - if (Newline) O << '\n'; - O.PadToColumn(MAI->getCommentColumn()); - O << MAI->getCommentString() << ' ' - << MMO->getSize() << "-byte Folded Reload"; - Newline = true; - } - } - else if (TM.getInstrInfo()->isStoreToStackSlotPostFE(&MI, FI)) { - if (FrameInfo->isSpillSlotObjectIndex(FI)) { - MMO = *MI.memoperands_begin(); - if (Newline) O << '\n'; - O.PadToColumn(MAI->getCommentColumn()); - O << MAI->getCommentString() << ' ' << MMO->getSize() << "-byte Spill"; - Newline = true; - } - } - else if (TM.getInstrInfo()->hasStoreToStackSlot(&MI, MMO, FI)) { - if (FrameInfo->isSpillSlotObjectIndex(FI)) { - if (Newline) O << '\n'; - O.PadToColumn(MAI->getCommentColumn()); - O << MAI->getCommentString() << ' ' - << MMO->getSize() << "-byte Folded Spill"; - Newline = true; - } - } - - // Check for spill-induced copies - unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - if (TM.getInstrInfo()->isMoveInstr(MI, SrcReg, DstReg, - SrcSubIdx, DstSubIdx)) { - if (MI.getAsmPrinterFlag(ReloadReuse)) { - if (Newline) O << '\n'; - O.PadToColumn(MAI->getCommentColumn()); - O << MAI->getCommentString() << " Reload Reuse"; - } - } -} - diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index 349e0ac..63360c0 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -313,6 +313,7 @@ void DIESectionOffset::EmitValue(DwarfPrinter *D, unsigned Form) const { D->EmitSectionOffset(Label.getTag(), Section.getTag(), Label.getNumber(), Section.getNumber(), IsSmall, IsEH, UseSet); + D->getAsm()->O << '\n'; // FIXME: Necesssary? } /// SizeOf - Determine size of delta value in bytes. diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 513987f..5093dd9 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -26,6 +26,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ValueHandle.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/Timer.h" #include "llvm/System/Path.h" @@ -166,7 +167,8 @@ public: class DbgScope { DbgScope *Parent; // Parent to this scope. DIDescriptor Desc; // Debug info descriptor for scope. - MDNode * InlinedAtLocation; // Location at which scope is inlined. + // Location at which this scope is inlined. + AssertingVH<MDNode> InlinedAtLocation; bool AbstractScope; // Abstract Scope unsigned StartLabelID; // Label ID of the beginning of scope. unsigned EndLabelID; // Label ID of the end of scope. @@ -189,7 +191,7 @@ public: void setParent(DbgScope *P) { Parent = P; } DIDescriptor getDesc() const { return Desc; } MDNode *getInlinedAt() const { - return dyn_cast_or_null<MDNode>(InlinedAtLocation); + return InlinedAtLocation; } MDNode *getScopeNode() const { return Desc.getNode(); } unsigned getStartLabelID() const { return StartLabelID; } @@ -616,7 +618,7 @@ void DwarfDebug::addComplexAddress(DbgVariable *&DV, DIE *Die, 1). Add the offset of the forwarding field. - 2). Follow that pointer to get the the real __Block_byref_x_VarName + 2). Follow that pointer to get the real __Block_byref_x_VarName struct to use (the real one may have been copied onto the heap). 3). Add the offset for the field VarName, to find the actual variable. @@ -937,7 +939,16 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { DIE *ElemDie = NULL; if (Element.getTag() == dwarf::DW_TAG_subprogram) ElemDie = createSubprogramDIE(DISubprogram(Element.getNode())); - else + else if (Element.getTag() == dwarf::DW_TAG_auto_variable) { + DIVariable DV(Element.getNode()); + ElemDie = new DIE(dwarf::DW_TAG_variable); + addString(ElemDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, + DV.getName()); + addType(ElemDie, DV.getType()); + addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + addSourceLine(ElemDie, &DV); + } else ElemDie = createMemberDIE(DIDerivedType(Element.getNode())); Buffer.addChild(ElemDie); } @@ -949,6 +960,11 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { if (RLang) addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, dwarf::DW_FORM_data1, RLang); + + DICompositeType ContainingType = CTy.getContainingType(); + if (!ContainingType.isNull()) + addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, + getOrCreateTypeDIE(DIType(ContainingType.getNode()))); break; } default: @@ -959,7 +975,7 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { if (!Name.empty()) addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - if (Tag == dwarf::DW_TAG_enumeration_type || + if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) { // Add size if non-zero (derived types might be zero-sized.) if (Size) @@ -1107,7 +1123,26 @@ DIE *DwarfDebug::createMemberDIE(const DIDerivedType &DT) { // This is not a bitfield. addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3); - addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie); + if (DT.getTag() == dwarf::DW_TAG_inheritance + && DT.isVirtual()) { + + // For C++, virtual base classes are not at fixed offset. Use following + // expression to extract appropriate offset from vtable. + // BaseAddr = ObAddr + *((*ObAddr) - Offset) + + DIEBlock *VBaseLocationDie = new DIEBlock(); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_dup); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits()); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_minus); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); + + addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, + VBaseLocationDie); + } else + addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie); if (DT.isProtected()) addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, @@ -1179,12 +1214,17 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) { if (SPTag == dwarf::DW_TAG_subroutine_type) for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); - addType(Arg, DIType(Args.getElement(i).getNode())); - addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); // ?? + DIType ATy = DIType(DIType(Args.getElement(i).getNode())); + addType(Arg, ATy); + if (ATy.isArtificial()) + addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); SPDie->addChild(Arg); } } + if (SP.isArtificial()) + addUInt(SPDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + // DW_TAG_inlined_subroutine may refer to this DIE. ModuleCU->insertDIE(SP.getNode(), SPDie); return SPDie; @@ -1289,7 +1329,13 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) { DIE *SPDie = ModuleCU->getDIE(SPNode); assert (SPDie && "Unable to find subprogram DIE!"); DISubprogram SP(SPNode); - if (SP.isDefinition() && !SP.getContext().isCompileUnit()) { + // There is not any need to generate specification DIE for a function + // defined at compile unit level. If a function is defined inside another + // function then gdb prefers the definition at top level and but does not + // expect specification DIE in parent function. So avoid creating + // specification DIE for a function defined inside a function. + if (SP.isDefinition() && !SP.getContext().isCompileUnit() + && !SP.getContext().isSubprogram()) { addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); // Add arguments. DICompositeType SPTy = SP.getType(); @@ -1298,8 +1344,10 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) { if (SPTag == dwarf::DW_TAG_subroutine_type) for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); - addType(Arg, DIType(Args.getElement(i).getNode())); - addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); // ?? + DIType ATy = DIType(DIType(Args.getElement(i).getNode())); + addType(Arg, ATy); + if (ATy.isArtificial()) + addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); SPDie->addChild(Arg); } DIE *SPDeclDie = SPDie; @@ -1308,7 +1356,7 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) { SPDeclDie); ModuleCU->addDie(SPDie); } - + addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, DWLabel("func_begin", SubprogramCount)); addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, @@ -1471,6 +1519,9 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { else addAddress(VariableDie, dwarf::DW_AT_location, Location); } + + if (Tag == dwarf::DW_TAG_formal_parameter && VD.getType().isArtificial()) + addUInt(VariableDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); DV->setDIE(VariableDie); return VariableDie; @@ -1669,6 +1720,7 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) { addObjectLabel(Block, 0, dwarf::DW_FORM_udata, Asm->GetGlobalValueSymbol(DI_GV.getGlobal())); addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block); + addUInt(VariableDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); ModuleCU->addDie(VariableSpecDIE); } else { DIEBlock *Block = new DIEBlock(); @@ -1779,8 +1831,7 @@ void DwarfDebug::beginModule(Module *M, MachineModuleInfo *mmi) { FullPath.appendComponent(getSourceFileName(Id.second)); assert(AppendOk && "Could not append filename to directory!"); AppendOk = false; - Asm->EmitFile(i, FullPath.str()); - Asm->O << '\n'; + Asm->OutStreamer.EmitDwarfFileDirective(i, FullPath.str()); } } @@ -1986,7 +2037,7 @@ void DwarfDebug::createDbgScope(MDNode *Scope, MDNode *InlinedAt) { /// extractScopeInformation - Scan machine instructions in this function /// and collect DbgScopes. Return true, if atleast one scope was found. -bool DwarfDebug::extractScopeInformation(MachineFunction *MF) { +bool DwarfDebug::extractScopeInformation() { // If scope information was extracted using .dbg intrinsics then there is not // any need to extract these information by scanning each instruction. if (!DbgScopeMap.empty()) @@ -2081,7 +2132,7 @@ bool DwarfDebug::extractScopeInformation(MachineFunction *MF) { /// beginFunction - Gather pre-function debug information. Assumes being /// emitted immediately after the function entry point. -void DwarfDebug::beginFunction(MachineFunction *MF) { +void DwarfDebug::beginFunction(const MachineFunction *MF) { this->MF = MF; if (!ShouldEmitDwarfDebug()) return; @@ -2089,14 +2140,11 @@ void DwarfDebug::beginFunction(MachineFunction *MF) { if (TimePassesIsEnabled) DebugTimer->startTimer(); - if (!extractScopeInformation(MF)) + if (!extractScopeInformation()) return; collectVariableInfo(); - // Begin accumulating function debug information. - MMI->BeginFunction(MF); - // Assumes in correct section after the entry point. EmitLabel("func_begin", ++SubprogramCount); @@ -2123,7 +2171,7 @@ void DwarfDebug::beginFunction(MachineFunction *MF) { /// endFunction - Gather and emit post-function debug information. /// -void DwarfDebug::endFunction(MachineFunction *MF) { +void DwarfDebug::endFunction(const MachineFunction *MF) { if (!ShouldEmitDwarfDebug()) return; if (TimePassesIsEnabled) @@ -2366,6 +2414,9 @@ void DwarfDebug::emitDIE(DIE *Die) { unsigned Form = AbbrevData[i].getForm(); assert(Form && "Too many attributes for DIE (check abbreviation)"); + if (Asm->VerboseAsm) + Asm->OutStreamer.AddComment(dwarf::AttributeString(Attr)); + switch (Attr) { case dwarf::DW_AT_sibling: Asm->EmitInt32(Die->getSiblingOffset()); @@ -2380,10 +2431,9 @@ void DwarfDebug::emitDIE(DIE *Die) { default: // Emit an attribute using the defined form. Values[i]->EmitValue(this, Form); + O << "\n"; // REMOVE This once all EmitValue impls emit their own newline. break; } - - EOL(dwarf::AttributeString(Attr)); } // Emit the DIE children if any. @@ -2767,7 +2817,8 @@ void DwarfDebug::emitDebugPubTypes() { EmitLabel("pubtypes_begin", ModuleCU->getID()); - Asm->EmitInt16(dwarf::DWARF_VERSION); EOL("DWARF Version"); + if (Asm->VerboseAsm) Asm->OutStreamer.AddComment("DWARF Version"); + Asm->EmitInt16(dwarf::DWARF_VERSION); EmitSectionOffset("info_begin", "section_info", ModuleCU->getID(), 0, true, false); @@ -2783,10 +2834,11 @@ void DwarfDebug::emitDebugPubTypes() { const char *Name = GI->getKeyData(); DIE * Entity = GI->second; - Asm->EmitInt32(Entity->getOffset()); EOL("DIE offset"); + if (Asm->VerboseAsm) Asm->OutStreamer.AddComment("DIE offset"); + Asm->EmitInt32(Entity->getOffset()); if (Asm->VerboseAsm) Asm->OutStreamer.AddComment("External Name"); - Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)), 0); + Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1), 0); } Asm->EmitInt32(0); EOL("End Mark"); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index e723621..55baa92 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -103,7 +103,7 @@ class DwarfDebug : public DwarfPrinter { /// SmallVector<std::pair<unsigned, unsigned>, 8> SourceIds; - /// Lines - List of of source line correspondence. + /// Lines - List of source line correspondence. std::vector<SrcLineInfo> Lines; /// DIEValues - A list of all the unique values in use. @@ -523,11 +523,11 @@ public: /// beginFunction - Gather pre-function debug information. Assumes being /// emitted immediately after the function entry point. - void beginFunction(MachineFunction *MF); + void beginFunction(const MachineFunction *MF); /// endFunction - Gather and emit post-function debug information. /// - void endFunction(MachineFunction *MF); + void endFunction(const MachineFunction *MF); /// recordSourceLine - Records location information and associates it with a /// label. Returns a unique label ID used to generate a label and provide @@ -550,7 +550,7 @@ public: /// extractScopeInformation - Scan machine instructions in this function /// and collect DbgScopes. Return true, if atleast one scope was found. - bool extractScopeInformation(MachineFunction *MF); + bool extractScopeInformation(); /// collectVariableInfo - Populate DbgScope entries with variables' info. void collectVariableInfo(); diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp index 2ae16c0..c6c59f5 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -34,6 +34,7 @@ #include "llvm/Support/Timer.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" using namespace llvm; DwarfException::DwarfException(raw_ostream &OS, AsmPrinter *A, @@ -49,26 +50,6 @@ DwarfException::~DwarfException() { delete ExceptionTimer; } -/// SizeOfEncodedValue - Return the size of the encoding in bytes. -unsigned DwarfException::SizeOfEncodedValue(unsigned Encoding) { - if (Encoding == dwarf::DW_EH_PE_omit) - return 0; - - switch (Encoding & 0x07) { - case dwarf::DW_EH_PE_absptr: - return TD->getPointerSize(); - case dwarf::DW_EH_PE_udata2: - return 2; - case dwarf::DW_EH_PE_udata4: - return 4; - case dwarf::DW_EH_PE_udata8: - return 8; - } - - assert(0 && "Invalid encoded value."); - return 0; -} - /// CreateLabelDiff - Emit a label and subtract it from the expression we /// already have. This is equivalent to emitting "foo - .", but we have to emit /// the label for "." directly. @@ -99,7 +80,7 @@ void DwarfException::EmitCIE(const Function *PersonalityFn, unsigned Index) { TD->getPointerSize() : -TD->getPointerSize(); const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); - + // Begin eh frame section. Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection()); @@ -127,30 +108,16 @@ void DwarfException::EmitCIE(const Function *PersonalityFn, unsigned Index) { // The personality presence indicates that language specific information will // show up in the eh frame. Find out how we are supposed to lower the // personality function reference: - const MCExpr *PersonalityRef = 0; - bool IsPersonalityIndirect = false, IsPersonalityPCRel = false; - if (PersonalityFn) { - // FIXME: HANDLE STATIC CODEGEN MODEL HERE. - - // In non-static mode, ask the object file how to represent this reference. - PersonalityRef = - TLOF.getSymbolForDwarfGlobalReference(PersonalityFn, Asm->Mang, - Asm->MMI, - IsPersonalityIndirect, - IsPersonalityPCRel); - } - - unsigned PerEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; - if (IsPersonalityIndirect) - PerEncoding |= dwarf::DW_EH_PE_indirect; - unsigned LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; - unsigned FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; + + unsigned LSDAEncoding = TLOF.getLSDAEncoding(); + unsigned FDEEncoding = TLOF.getFDEEncoding(); + unsigned PerEncoding = TLOF.getPersonalityEncoding(); char Augmentation[6] = { 0 }; unsigned AugmentationSize = 0; char *APtr = Augmentation + 1; - if (PersonalityRef) { + if (PersonalityFn) { // There is a personality function. *APtr++ = 'P'; AugmentationSize += 1 + SizeOfEncodedValue(PerEncoding); @@ -180,20 +147,19 @@ void DwarfException::EmitCIE(const Function *PersonalityFn, unsigned Index) { Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), true)); EOL("CIE Return Address Column"); - EmitULEB128(AugmentationSize, "Augmentation Size"); - EmitEncodingByte(PerEncoding, "Personality"); - - // If there is a personality, we need to indicate the function's location. - if (PersonalityRef) { - if (!IsPersonalityPCRel) - PersonalityRef = CreateLabelDiff(PersonalityRef, "personalityref_addr", - Index); - - O << MAI->getData32bitsDirective() << *PersonalityRef; - EOL("Personality"); + if (Augmentation[0]) { + EmitULEB128(AugmentationSize, "Augmentation Size"); - EmitEncodingByte(LSDAEncoding, "LSDA"); - EmitEncodingByte(FDEEncoding, "FDE"); + // If there is a personality, we need to indicate the function's location. + if (PersonalityFn) { + EmitEncodingByte(PerEncoding, "Personality"); + EmitReference(PersonalityFn, PerEncoding); + EOL("Personality"); + } + if (UsesLSDA[Index]) + EmitEncodingByte(LSDAEncoding, "LSDA"); + if (FDEEncoding != dwarf::DW_EH_PE_absptr) + EmitEncodingByte(FDEEncoding, "FDE"); } // Indicate locations of general callee saved registers in frame. @@ -215,8 +181,12 @@ void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) { "Should not emit 'available externally' functions at all"); const Function *TheFunc = EHFrameInfo.function; + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); - Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getEHFrameSection()); + unsigned LSDAEncoding = TLOF.getLSDAEncoding(); + unsigned FDEEncoding = TLOF.getFDEEncoding(); + + Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection()); // Externally visible entry into the functions eh frame info. If the // corresponding function is static, this should not be externally visible. @@ -254,7 +224,8 @@ void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) { // EH frame header. EmitDifference("eh_frame_end", EHFrameInfo.Number, - "eh_frame_begin", EHFrameInfo.Number, true); + "eh_frame_begin", EHFrameInfo.Number, + true); EOL("Length of Frame Information Entry"); EmitLabel("eh_frame_begin", EHFrameInfo.Number); @@ -265,33 +236,23 @@ void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) { EOL("FDE CIE offset"); - EmitReference("eh_func_begin", EHFrameInfo.Number, true, true); + EmitReference("eh_func_begin", EHFrameInfo.Number, FDEEncoding); EOL("FDE initial location"); EmitDifference("eh_func_end", EHFrameInfo.Number, - "eh_func_begin", EHFrameInfo.Number, true); + "eh_func_begin", EHFrameInfo.Number, + SizeOfEncodedValue(FDEEncoding) == 4); EOL("FDE address range"); // If there is a personality and landing pads then point to the language // specific data area in the exception table. if (MMI->getPersonalities()[0] != NULL) { + unsigned Size = SizeOfEncodedValue(LSDAEncoding); - if (Asm->TM.getLSDAEncoding() != DwarfLSDAEncoding::EightByte) { - EmitULEB128(4, "Augmentation size"); - - if (EHFrameInfo.hasLandingPads) - EmitReference("exception", EHFrameInfo.Number, true, true); - else - Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/); - } else { - EmitULEB128(TD->getPointerSize(), "Augmentation size"); - - if (EHFrameInfo.hasLandingPads) { - EmitReference("exception", EHFrameInfo.Number, true, false); - } else { - Asm->OutStreamer.EmitIntValue(0, TD->getPointerSize(), - 0/*addrspace*/); - } - } + EmitULEB128(Size, "Augmentation size"); + if (EHFrameInfo.hasLandingPads) + EmitReference("exception", EHFrameInfo.Number, LSDAEncoding); + else + Asm->OutStreamer.EmitIntValue(0, Size/*size*/, 0/*addrspace*/); EOL("Language Specific Data Area"); } else { @@ -406,20 +367,22 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, if (NumShared < TypeIds.size()) { unsigned SizeAction = 0; - ActionEntry *PrevAction = 0; + unsigned PrevAction = (unsigned)-1; if (NumShared) { const unsigned SizePrevIds = PrevLPI->TypeIds.size(); assert(Actions.size()); - PrevAction = &Actions.back(); - SizeAction = MCAsmInfo::getSLEB128Size(PrevAction->NextAction) + - MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID); + PrevAction = Actions.size() - 1; + SizeAction = + MCAsmInfo::getSLEB128Size(Actions[PrevAction].NextAction) + + MCAsmInfo::getSLEB128Size(Actions[PrevAction].ValueForTypeID); for (unsigned j = NumShared; j != SizePrevIds; ++j) { + assert(PrevAction != (unsigned)-1 && "PrevAction is invalid!"); SizeAction -= - MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID); - SizeAction += -PrevAction->NextAction; - PrevAction = PrevAction->Previous; + MCAsmInfo::getSLEB128Size(Actions[PrevAction].ValueForTypeID); + SizeAction += -Actions[PrevAction].NextAction; + PrevAction = Actions[PrevAction].Previous; } } @@ -436,7 +399,7 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, ActionEntry Action = { ValueForTypeID, NextAction, PrevAction }; Actions.push_back(Action); - PrevAction = &Actions.back(); + PrevAction = Actions.size() - 1; } // Record the first action of the landing pad site. @@ -446,7 +409,7 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, // Information used when created the call-site table. The action record // field of the call site record is the offset of the first associated // action record, relative to the start of the actions table. This value is - // biased by 1 (1 in dicating the start of the actions table), and 0 + // biased by 1 (1 indicating the start of the actions table), and 0 // indicates that there are no actions. FirstActions.push_back(FirstAction); @@ -579,7 +542,16 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, } // Otherwise, create a new call-site. - CallSites.push_back(Site); + if (MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) + CallSites.push_back(Site); + else { + // SjLj EH must maintain the call sites in the order assigned + // to them by the SjLjPrepare pass. + unsigned SiteNo = MMI->getCallSiteBeginLabel(BeginLabel); + if (CallSites.size() < SiteNo) + CallSites.resize(SiteNo); + CallSites[SiteNo - 1] = Site; + } PreviousIsInvoke = true; } else { // Create a gap. @@ -638,8 +610,7 @@ void DwarfException::EmitExceptionTable() { // landing pad site. SmallVector<ActionEntry, 32> Actions; SmallVector<unsigned, 64> FirstActions; - unsigned SizeActions = ComputeActionsTable(LandingPads, Actions, - FirstActions); + unsigned SizeActions=ComputeActionsTable(LandingPads, Actions, FirstActions); // Invokes and nounwind calls have entries in PadMap (due to being bracketed // by try-range labels when lowered). Ordinary calls do not, so appropriate @@ -683,13 +654,13 @@ void DwarfException::EmitExceptionTable() { // Type infos. const MCSection *LSDASection = Asm->getObjFileLowering().getLSDASection(); - unsigned TTypeFormat; + unsigned TTypeEncoding; unsigned TypeFormatSize; if (!HaveTTData) { // For SjLj exceptions, if there is no TypeInfo, then we just explicitly say // that we're omitting that bit. - TTypeFormat = dwarf::DW_EH_PE_omit; + TTypeEncoding = dwarf::DW_EH_PE_omit; TypeFormatSize = SizeOfEncodedValue(dwarf::DW_EH_PE_absptr); } else { // Okay, we have actual filters or typeinfos to emit. As such, we need to @@ -719,14 +690,8 @@ void DwarfException::EmitExceptionTable() { // somewhere. This predicate should be moved to a shared location that is // in target-independent code. // - if (LSDASection->getKind().isWriteable() || - Asm->TM.getRelocationModel() == Reloc::Static) - TTypeFormat = dwarf::DW_EH_PE_absptr; - else - TTypeFormat = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | - dwarf::DW_EH_PE_sdata4; - - TypeFormatSize = SizeOfEncodedValue(TTypeFormat); + TTypeEncoding = Asm->getObjFileLowering().getTTypeEncoding(); + TypeFormatSize = SizeOfEncodedValue(TTypeEncoding); } // Begin the exception table. @@ -752,7 +717,7 @@ void DwarfException::EmitExceptionTable() { // does, instead output it before the table. unsigned SizeTypes = TypeInfos.size() * TypeFormatSize; unsigned TyOffset = sizeof(int8_t) + // Call site format - MCAsmInfo::getULEB128Size(SizeSites) + // Call-site table length + MCAsmInfo::getULEB128Size(SizeSites) + // Call site table length SizeSites + SizeActions + SizeTypes; unsigned TotalSize = sizeof(int8_t) + // LPStart format sizeof(int8_t) + // TType format @@ -777,7 +742,7 @@ void DwarfException::EmitExceptionTable() { // Emit the header. EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart"); - EmitEncodingByte(TTypeFormat, "@TType"); + EmitEncodingByte(TTypeEncoding, "@TType"); if (HaveTTData) EmitULEB128(TyOffset, "@TType base offset"); @@ -826,7 +791,7 @@ void DwarfException::EmitExceptionTable() { // Emit the landing pad call site table. EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site"); - EmitULEB128(SizeSites, "Call site table size"); + EmitULEB128(SizeSites, "Call site table length"); for (SmallVectorImpl<CallSiteEntry>::const_iterator I = CallSites.begin(), E = CallSites.end(); I != E; ++I) { @@ -859,13 +824,14 @@ void DwarfException::EmitExceptionTable() { // Offset of the landing pad, counted in 16-byte bundles relative to the // @LPStart address. - if (!S.PadLabel) + if (!S.PadLabel) { + Asm->OutStreamer.AddComment("Landing pad"); Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/); - else + } else { EmitSectionOffset("label", "eh_func_begin", S.PadLabel, SubprogramCount, true, true); - - EOL("Landing pad"); + EOL("Landing pad"); + } // Offset of the first associated action record, relative to the start of // the action table. This value is biased by 1 (1 indicates the start of @@ -875,38 +841,43 @@ void DwarfException::EmitExceptionTable() { } // Emit the Action Table. + if (Actions.size() != 0) EOL("-- Action Record Table --"); for (SmallVectorImpl<ActionEntry>::const_iterator I = Actions.begin(), E = Actions.end(); I != E; ++I) { const ActionEntry &Action = *I; + EOL("Action Record:"); // Type Filter // // Used by the runtime to match the type of the thrown exception to the // type of the catch clauses or the types in the exception specification. - EmitSLEB128(Action.ValueForTypeID, "TypeInfo index"); + EmitSLEB128(Action.ValueForTypeID, " TypeInfo index"); // Action Record // // Self-relative signed displacement in bytes of the next action record, // or 0 if there is no next action record. - EmitSLEB128(Action.NextAction, "Next action"); + EmitSLEB128(Action.NextAction, " Next action"); } // Emit the Catch TypeInfos. + if (!TypeInfos.empty()) EOL("-- Catch TypeInfos --"); for (std::vector<GlobalVariable *>::const_reverse_iterator I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) { const GlobalVariable *GV = *I; - PrintRelDirective(); - if (GV) - O << *Asm->GetGlobalValueSymbol(GV); - else + if (GV) { + EmitReference(GV, TTypeEncoding); + EOL("TypeInfo"); + } else { + PrintRelDirective(TTypeEncoding); O << "0x0"; - - EOL("TypeInfo"); + EOL(""); + } } // Emit the Exception Specifications. + if (!FilterIds.empty()) EOL("-- Filter IDs --"); for (std::vector<unsigned>::const_iterator I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) { unsigned TypeID = *I; @@ -943,7 +914,7 @@ void DwarfException::EndModule() { /// BeginFunction - Gather pre-function exception information. Assumes it's /// being emitted immediately after the function entry point. -void DwarfException::BeginFunction(MachineFunction *MF) { +void DwarfException::BeginFunction(const MachineFunction *MF) { if (!MMI || !MAI->doesSupportExceptionHandling()) return; if (TimePassesIsEnabled) diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h index 3921e91..3db1a00 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/lib/CodeGen/AsmPrinter/DwarfException.h @@ -76,9 +76,6 @@ class DwarfException : public DwarfPrinter { /// ExceptionTimer - Timer for the Dwarf exception writer. Timer *ExceptionTimer; - /// SizeOfEncodedValue - Return the size of the encoding in bytes. - unsigned SizeOfEncodedValue(unsigned Encoding); - /// EmitCIE - Emit a Common Information Entry (CIE). This holds information /// that is shared among many Frame Description Entries. There is at least /// one CIE in every non-empty .debug_frame section. @@ -103,7 +100,7 @@ class DwarfException : public DwarfPrinter { /// exception. If it matches then the exception and type id are passed /// on to the landing pad. Otherwise the next action is looked up. This /// chain is terminated with a next action of zero. If no type id is - /// found the the frame is unwound and handling continues. + /// found the frame is unwound and handling continues. /// 3. Type id table contains references to all the C++ typeinfo for all /// catches in the function. This tables is reversed indexed base 1. @@ -135,7 +132,7 @@ class DwarfException : public DwarfPrinter { struct ActionEntry { int ValueForTypeID; // The value to write - may not be equal to the type id. int NextAction; - struct ActionEntry *Previous; + unsigned Previous; }; /// CallSiteEntry - Structure describing an entry in the call-site table. @@ -197,7 +194,7 @@ public: /// BeginFunction - Gather pre-function exception information. Assumes being /// emitted immediately after the function entry point. - void BeginFunction(MachineFunction *MF); + void BeginFunction(const MachineFunction *MF); /// EndFunction - Gather and emit post-function exception information. void EndFunction(); diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp index d204bba..1299d04 100644 --- a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // // Emit general DWARF directives. -// +// //===----------------------------------------------------------------------===// #include "DwarfPrinter.h" @@ -18,13 +18,17 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/ADT/SmallString.h" using namespace llvm; DwarfPrinter::DwarfPrinter(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T, @@ -33,6 +37,26 @@ DwarfPrinter::DwarfPrinter(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T, RI(Asm->TM.getRegisterInfo()), M(NULL), MF(NULL), MMI(NULL), SubprogramCount(0), Flavor(flavor), SetCounter(1) {} +/// SizeOfEncodedValue - Return the size of the encoding in bytes. +unsigned DwarfPrinter::SizeOfEncodedValue(unsigned Encoding) const { + if (Encoding == dwarf::DW_EH_PE_omit) + return 0; + + switch (Encoding & 0x07) { + case dwarf::DW_EH_PE_absptr: + return TD->getPointerSize(); + case dwarf::DW_EH_PE_udata2: + return 2; + case dwarf::DW_EH_PE_udata4: + return 4; + case dwarf::DW_EH_PE_udata8: + return 8; + } + + assert(0 && "Invalid encoded value."); + return 0; +} + void DwarfPrinter::PrintRelDirective(bool Force32Bit, bool isInSection) const { if (isInSection && MAI->getDwarfSectionOffsetDirective()) O << MAI->getDwarfSectionOffsetDirective(); @@ -42,6 +66,14 @@ void DwarfPrinter::PrintRelDirective(bool Force32Bit, bool isInSection) const { O << MAI->getData64bitsDirective(); } +void DwarfPrinter::PrintRelDirective(unsigned Encoding) const { + unsigned Size = SizeOfEncodedValue(Encoding); + assert((Size == 4 || Size == 8) && "Do not support other types or rels!"); + + O << (Size == 4 ? + MAI->getData32bitsDirective() : MAI->getData64bitsDirective()); +} + /// EOL - Print a newline character to asm stream. If a comment is present /// then it will be printed first. Comments should not contain '\n'. void DwarfPrinter::EOL(const Twine &Comment) const { @@ -195,13 +227,38 @@ void DwarfPrinter::EmitReference(const MCSymbol *Sym, bool IsPCRelative, if (IsPCRelative) O << "-" << MAI->getPCSymbol(); } -/// EmitDifference - Emit the difference between two labels. Some assemblers do -/// not behave with absolute expressions with data directives, so there is an -/// option (needsSet) to use an intermediary set expression. +void DwarfPrinter::EmitReference(const char *Tag, unsigned Number, + unsigned Encoding) const { + SmallString<64> Name; + raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() + << Tag << Number; + + MCSymbol *Sym = Asm->OutContext.GetOrCreateSymbol(Name.str()); + EmitReference(Sym, Encoding); +} + +void DwarfPrinter::EmitReference(const MCSymbol *Sym, unsigned Encoding) const { + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + + PrintRelDirective(Encoding); + O << *TLOF.getSymbolForDwarfReference(Sym, Asm->MMI, Encoding);; +} + +void DwarfPrinter::EmitReference(const GlobalValue *GV, unsigned Encoding)const { + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + + PrintRelDirective(Encoding); + O << *TLOF.getSymbolForDwarfGlobalReference(GV, Asm->Mang, + Asm->MMI, Encoding);; +} + +/// EmitDifference - Emit the difference between two labels. If this assembler +/// supports .set, we emit a .set of a temporary and then use it in the .word. void DwarfPrinter::EmitDifference(const char *TagHi, unsigned NumberHi, const char *TagLo, unsigned NumberLo, bool IsSmall) { - if (MAI->needsSet()) { + if (MAI->hasSetDirective()) { + // FIXME: switch to OutStreamer.EmitAssignment. O << "\t.set\t"; PrintLabelName("set", SetCounter, Flavor); O << ","; @@ -232,7 +289,8 @@ void DwarfPrinter::EmitSectionOffset(const char* Label, const char* Section, else printAbsolute = MAI->isAbsoluteDebugSectionOffsets(); - if (MAI->needsSet() && useSet) { + if (MAI->hasSetDirective() && useSet) { + // FIXME: switch to OutStreamer.EmitAssignment. O << "\t.set\t"; PrintLabelName("set", SetCounter, Flavor); O << ","; diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.h b/lib/CodeGen/AsmPrinter/DwarfPrinter.h index 86fe2ab..73de398 100644 --- a/lib/CodeGen/AsmPrinter/DwarfPrinter.h +++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.h @@ -28,11 +28,14 @@ class Module; class MCAsmInfo; class TargetData; class TargetRegisterInfo; +class GlobalValue; class MCSymbol; class Twine; class DwarfPrinter { protected: + ~DwarfPrinter() {} + //===-------------------------------------------------------------==---===// // Core attributes used by the DWARF printer. // @@ -56,7 +59,7 @@ protected: Module *M; /// MF - Current machine function. - MachineFunction *MF; + const MachineFunction *MF; /// MMI - Collected machine module information. MachineModuleInfo *MMI; @@ -83,6 +86,10 @@ public: const MCAsmInfo *getMCAsmInfo() const { return MAI; } const TargetData *getTargetData() const { return TD; } + /// SizeOfEncodedValue - Return the size of the encoding in bytes. + unsigned SizeOfEncodedValue(unsigned Encoding) const; + + void PrintRelDirective(unsigned Encoding) const; void PrintRelDirective(bool Force32Bit = false, bool isInSection = false) const; @@ -138,9 +145,11 @@ public: void EmitReference(const MCSymbol *Sym, bool IsPCRelative = false, bool Force32Bit = false) const; - /// EmitDifference - Emit the difference between two labels. Some - /// assemblers do not behave with absolute expressions with data directives, - /// so there is an option (needsSet) to use an intermediary set expression. + void EmitReference(const char *Tag, unsigned Number, unsigned Encoding) const; + void EmitReference(const MCSymbol *Sym, unsigned Encoding) const; + void EmitReference(const GlobalValue *GV, unsigned Encoding) const; + + /// EmitDifference - Emit the difference between two labels. void EmitDifference(const DWLabel &LabelHi, const DWLabel &LabelLo, bool IsSmall = false) { EmitDifference(LabelHi.getTag(), LabelHi.getNumber(), diff --git a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp index dd8d88a..08e1bbc 100644 --- a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp @@ -57,14 +57,14 @@ void DwarfWriter::EndModule() { /// BeginFunction - Gather pre-function debug information. Assumes being /// emitted immediately after the function entry point. -void DwarfWriter::BeginFunction(MachineFunction *MF) { +void DwarfWriter::BeginFunction(const MachineFunction *MF) { DE->BeginFunction(MF); DD->beginFunction(MF); } /// EndFunction - Gather and emit post-function debug information. /// -void DwarfWriter::EndFunction(MachineFunction *MF) { +void DwarfWriter::EndFunction(const MachineFunction *MF) { DD->endFunction(MF); DE->EndFunction(); diff --git a/lib/CodeGen/AsmPrinter/Makefile b/lib/CodeGen/AsmPrinter/Makefile index b0071d0..60aa6cb 100644 --- a/lib/CodeGen/AsmPrinter/Makefile +++ b/lib/CodeGen/AsmPrinter/Makefile @@ -9,6 +9,5 @@ LEVEL = ../../.. LIBRARYNAME = LLVMAsmPrinter -CXXFLAGS = -fno-rtti include $(LEVEL)/Makefile.common diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 92849d3..faf4d95 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -133,7 +133,7 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) { SmallSet<unsigned, 4> ImpDefRegs; MachineBasicBlock::iterator I = MBB->begin(); while (I != MBB->end()) { - if (I->getOpcode() != TargetInstrInfo::IMPLICIT_DEF) + if (!I->isImplicitDef()) break; unsigned Reg = I->getOperand(0).getReg(); ImpDefRegs.insert(Reg); @@ -206,53 +206,56 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, // See if any jump tables have become mergable or dead as the code generator // did its thing. MachineJumpTableInfo *JTI = MF.getJumpTableInfo(); + if (JTI == 0) { + delete RS; + return MadeChange; + } + const std::vector<MachineJumpTableEntry> &JTs = JTI->getJumpTables(); - if (!JTs.empty()) { - // Figure out how these jump tables should be merged. - std::vector<unsigned> JTMapping; - JTMapping.reserve(JTs.size()); - - // We always keep the 0th jump table. - JTMapping.push_back(0); - - // Scan the jump tables, seeing if there are any duplicates. Note that this - // is N^2, which should be fixed someday. - for (unsigned i = 1, e = JTs.size(); i != e; ++i) { - if (JTs[i].MBBs.empty()) - JTMapping.push_back(i); - else - JTMapping.push_back(JTI->getJumpTableIndex(JTs[i].MBBs)); - } - - // If a jump table was merge with another one, walk the function rewriting - // references to jump tables to reference the new JT ID's. Keep track of - // whether we see a jump table idx, if not, we can delete the JT. - BitVector JTIsLive(JTs.size()); - for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); - BB != E; ++BB) { - for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); - I != E; ++I) - for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { - MachineOperand &Op = I->getOperand(op); - if (!Op.isJTI()) continue; - unsigned NewIdx = JTMapping[Op.getIndex()]; - Op.setIndex(NewIdx); - - // Remember that this JT is live. - JTIsLive.set(NewIdx); - } - } + // Figure out how these jump tables should be merged. + std::vector<unsigned> JTMapping; + JTMapping.reserve(JTs.size()); + + // We always keep the 0th jump table. + JTMapping.push_back(0); + + // Scan the jump tables, seeing if there are any duplicates. Note that this + // is N^2, which should be fixed someday. + for (unsigned i = 1, e = JTs.size(); i != e; ++i) { + if (JTs[i].MBBs.empty()) + JTMapping.push_back(i); + else + JTMapping.push_back(JTI->getJumpTableIndex(JTs[i].MBBs)); + } - // Finally, remove dead jump tables. This happens either because the - // indirect jump was unreachable (and thus deleted) or because the jump - // table was merged with some other one. - for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i) - if (!JTIsLive.test(i)) { - JTI->RemoveJumpTable(i); - MadeChange = true; + // If a jump table was merge with another one, walk the function rewriting + // references to jump tables to reference the new JT ID's. Keep track of + // whether we see a jump table idx, if not, we can delete the JT. + BitVector JTIsLive(JTs.size()); + for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); + BB != E; ++BB) { + for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); + I != E; ++I) + for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { + MachineOperand &Op = I->getOperand(op); + if (!Op.isJTI()) continue; + unsigned NewIdx = JTMapping[Op.getIndex()]; + Op.setIndex(NewIdx); + + // Remember that this JT is live. + JTIsLive.set(NewIdx); } } + // Finally, remove dead jump tables. This happens either because the + // indirect jump was unreachable (and thus deleted) or because the jump + // table was merged with some other one. + for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i) + if (!JTIsLive.test(i)) { + JTI->RemoveJumpTable(i); + MadeChange = true; + } + delete RS; return MadeChange; } @@ -337,7 +340,7 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, // relative order. This is untenable because normal compiler // optimizations (like this one) may reorder and/or merge these // directives. - I1->getOpcode() == TargetInstrInfo::INLINEASM) { + I1->isInlineAsm()) { ++I1; ++I2; break; } @@ -957,7 +960,8 @@ ReoptimizeBlock: } // If MBB was the target of a jump table, update jump tables to go to the // fallthrough instead. - MF.getJumpTableInfo()->ReplaceMBBInJumpTables(MBB, FallThrough); + if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo()) + MJTI->ReplaceMBBInJumpTables(MBB, FallThrough); MadeChange = true; } return MadeChange; @@ -1191,7 +1195,8 @@ ReoptimizeBlock: } // Change any jumptables to go to the new MBB. - MF.getJumpTableInfo()->ReplaceMBBInJumpTables(MBB, CurTBB); + if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo()) + MJTI->ReplaceMBBInJumpTables(MBB, CurTBB); if (DidChange) { ++NumBranchOpts; MadeChange = true; diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 17072d3..62cf339 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -21,7 +21,6 @@ add_llvm_library(LLVMCodeGen LiveStackAnalysis.cpp LiveVariables.cpp LowerSubregs.cpp - MachOWriter.cpp MachineBasicBlock.cpp MachineDominators.cpp MachineFunction.cpp @@ -40,6 +39,7 @@ add_llvm_library(LLVMCodeGen ObjectCodeEmitter.cpp OcamlGC.cpp OptimizeExts.cpp + OptimizePHIs.cpp PHIElimination.cpp Passes.cpp PostRASchedulerList.cpp @@ -67,6 +67,7 @@ add_llvm_library(LLVMCodeGen StrongPHIElimination.cpp TailDuplication.cpp TargetInstrInfoImpl.cpp + TargetLoweringObjectFileImpl.cpp TwoAddressInstructionPass.cpp UnreachableBlockElim.cpp VirtRegMap.cpp diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index b8ef219..2bedd04 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -20,8 +20,8 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" - using namespace llvm; char CalculateSpillWeights::ID = 0; @@ -58,10 +58,7 @@ bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &fn) { for (MachineBasicBlock::const_iterator mii = mbb->begin(), mie = mbb->end(); mii != mie; ++mii) { const MachineInstr *mi = mii; - if (tii->isIdentityCopy(*mi)) - continue; - - if (mi->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) + if (tii->isIdentityCopy(*mi) || mi->isImplicitDef() || mi->isDebugValue()) continue; for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp index 126700b..a13a310 100644 --- a/lib/CodeGen/CodePlacementOpt.cpp +++ b/lib/CodeGen/CodePlacementOpt.cpp @@ -106,7 +106,7 @@ bool CodePlacementOpt::HasAnalyzableTerminator(MachineBasicBlock *MBB) { // At the time of this writing, there are blocks which AnalyzeBranch // thinks end in single uncoditional branches, yet which have two CFG // successors. Code in this file is not prepared to reason about such things. - if (!MBB->empty() && MBB->back().getOpcode() == TargetInstrInfo::EH_LABEL) + if (!MBB->empty() && MBB->back().isEHLabel()) return false; // Aggressively handle return blocks and similar constructs. @@ -115,7 +115,7 @@ bool CodePlacementOpt::HasAnalyzableTerminator(MachineBasicBlock *MBB) { // Ask the target's AnalyzeBranch if it can handle this block. MachineBasicBlock *TBB = 0, *FBB = 0; SmallVector<MachineOperand, 4> Cond; - // Make the the terminator is understood. + // Make sure the terminator is understood. if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond)) return false; // Make sure we have the option of reversing the condition. diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp index 0982eab..a215a19 100644 --- a/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "codegen-dce" #include "llvm/CodeGen/Passes.h" #include "llvm/Pass.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -19,8 +20,11 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/Statistic.h" using namespace llvm; +STATISTIC(NumDeletes, "Number of dead instructions deleted"); + namespace { class DeadMachineInstructionElim : public MachineFunctionPass { virtual bool runOnMachineFunction(MachineFunction &MF); @@ -51,7 +55,7 @@ FunctionPass *llvm::createDeadMachineInstructionElimPass() { bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { // Don't delete instructions with side effects. bool SawStore = false; - if (!MI->isSafeToMove(TII, SawStore, 0)) + if (!MI->isSafeToMove(TII, SawStore, 0) && !MI->isPHI()) return false; // Examine each operand. @@ -60,8 +64,8 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { if (MO.isReg() && MO.isDef()) { unsigned Reg = MO.getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg) ? - LivePhysRegs[Reg] : !MRI->use_empty(Reg)) { - // This def has a use. Don't delete the instruction! + LivePhysRegs[Reg] : !MRI->use_nodbg_empty(Reg)) { + // This def has a non-debug use. Don't delete the instruction! return false; } } @@ -110,8 +114,31 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { // If the instruction is dead, delete it! if (isDead(MI)) { DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI); + // It is possible that some DBG_VALUE instructions refer to this + // instruction. Examine each def operand for such references; + // if found, mark the DBG_VALUE as undef (but don't delete it). + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + MachineRegisterInfo::use_iterator nextI; + for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg), + E = MRI->use_end(); I!=E; I=nextI) { + nextI = llvm::next(I); // I is invalidated by the setReg + MachineOperand& Use = I.getOperand(); + MachineInstr *UseMI = Use.getParent(); + if (UseMI==MI) + continue; + assert(Use.isDebug()); + UseMI->getOperand(0).setReg(0U); + } + } AnyChanges = true; MI->eraseFromParent(); + ++NumDeletes; MIE = MBB->rend(); // MII is now pointing to the next instruction to process, // so don't increment it. diff --git a/lib/CodeGen/ELFCodeEmitter.cpp b/lib/CodeGen/ELFCodeEmitter.cpp index 11a85a0..8416d3b 100644 --- a/lib/CodeGen/ELFCodeEmitter.cpp +++ b/lib/CodeGen/ELFCodeEmitter.cpp @@ -62,7 +62,8 @@ void ELFCodeEmitter::startFunction(MachineFunction &MF) { // They need to be emitted before the function because in some targets // the later may reference JT or CP entry address. emitConstantPool(MF.getConstantPool()); - emitJumpTables(MF.getJumpTableInfo()); + if (MF.getJumpTableInfo()) + emitJumpTables(MF.getJumpTableInfo()); } /// finishFunction - This callback is invoked after the function is completely @@ -84,7 +85,7 @@ bool ELFCodeEmitter::finishFunction(MachineFunction &MF) { // Patch up Jump Table Section relocations to use the real MBBs offsets // now that the MBB label offsets inside the function are known. - if (!MF.getJumpTableInfo()->isEmpty()) { + if (MF.getJumpTableInfo()) { ELFSection &JTSection = EW.getJumpTableSection(); for (std::vector<MachineRelocation>::iterator MRI = JTRelocations.begin(), MRE = JTRelocations.end(); MRI != MRE; ++MRI) { @@ -172,7 +173,7 @@ void ELFCodeEmitter::emitJumpTables(MachineJumpTableInfo *MJTI) { "PIC codegen not yet handled for elf jump tables!"); const TargetELFWriterInfo *TEW = TM.getELFWriterInfo(); - unsigned EntrySize = MJTI->getEntrySize(); + unsigned EntrySize = 4; //MJTI->getEntrySize(); // Get the ELF Section to emit the jump table ELFSection &JTSection = EW.getJumpTableSection(); diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp index de45e09..0979c04 100644 --- a/lib/CodeGen/ELFWriter.cpp +++ b/lib/CodeGen/ELFWriter.cpp @@ -37,7 +37,6 @@ #include "llvm/PassManager.h" #include "llvm/DerivedTypes.h" #include "llvm/CodeGen/BinaryObject.h" -#include "llvm/CodeGen/FileWriters.h" #include "llvm/CodeGen/MachineCodeEmitter.h" #include "llvm/CodeGen/ObjectCodeEmitter.h" #include "llvm/CodeGen/MachineCodeEmitter.h" @@ -59,15 +58,6 @@ using namespace llvm; char ELFWriter::ID = 0; -/// AddELFWriter - Add the ELF writer to the function pass manager -ObjectCodeEmitter *llvm::AddELFWriter(PassManagerBase &PM, - raw_ostream &O, - TargetMachine &TM) { - ELFWriter *EW = new ELFWriter(O, TM); - PM.add(EW); - return EW->getObjectCodeEmitter(); -} - //===----------------------------------------------------------------------===// // ELFWriter Implementation //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/ExactHazardRecognizer.cpp b/lib/CodeGen/ExactHazardRecognizer.cpp index 266c74c..61959bb 100644 --- a/lib/CodeGen/ExactHazardRecognizer.cpp +++ b/lib/CodeGen/ExactHazardRecognizer.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This implements a a hazard recognizer using the instructions itineraries +// This implements a hazard recognizer using the instructions itineraries // defined for the current target. // //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp index 79b2986..b5006fd 100644 --- a/lib/CodeGen/GCStrategy.cpp +++ b/lib/CodeGen/GCStrategy.cpp @@ -335,7 +335,7 @@ unsigned MachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB, unsigned Label = MMI->NextLabelID(); BuildMI(MBB, MI, DL, - TII->get(TargetInstrInfo::GC_LABEL)).addImm(Label); + TII->get(TargetOpcode::GC_LABEL)).addImm(Label); return Label; } diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index 9997a48..87ab7ef 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -155,7 +155,7 @@ void IntrinsicLowering::AddPrototypes(Module &M) { /// LowerBSWAP - Emit the code to lower bswap of V before the specified /// instruction IP. static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) { - assert(V->getType()->isInteger() && "Can't bswap a non-integer type!"); + assert(V->getType()->isIntegerTy() && "Can't bswap a non-integer type!"); unsigned BitSize = V->getType()->getPrimitiveSizeInBits(); @@ -251,7 +251,7 @@ static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) { /// LowerCTPOP - Emit the code to lower ctpop of V before the specified /// instruction IP. static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) { - assert(V->getType()->isInteger() && "Can't ctpop a non-integer type!"); + assert(V->getType()->isIntegerTy() && "Can't ctpop a non-integer type!"); static const uint64_t MaskValues[6] = { 0x5555555555555555ULL, 0x3333333333333333ULL, diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 837e184..278de02 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -14,16 +14,20 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/PassManager.h" #include "llvm/Pass.h" +#include "llvm/Analysis/Verifier.h" #include "llvm/Assembly/PrintModulePass.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/FileWriters.h" #include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/Target/TargetOptions.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Target/TargetData.h" #include "llvm/Target/TargetRegistry.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/OwningPtr.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FormattedStream.h" @@ -57,14 +61,24 @@ static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden, cl::desc("Print LLVM IR produced by the loop-reduce pass")); static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden, cl::desc("Print LLVM IR input to isel pass")); -static cl::opt<bool> PrintEmittedAsm("print-emitted-asm", cl::Hidden, - cl::desc("Dump emitter generated instructions as assembly")); static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden, cl::desc("Dump garbage collector data")); static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden, cl::desc("Verify generated machine code"), cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL)); +static cl::opt<cl::boolOrDefault> +AsmVerbose("asm-verbose", cl::desc("Add comments to directives."), + cl::init(cl::BOU_UNSET)); + +static bool getVerboseAsm() { + switch (AsmVerbose) { + default: + case cl::BOU_UNSET: return TargetMachine::getAsmVerbosityDefault(); + case cl::BOU_TRUE: return true; + case cl::BOU_FALSE: return false; + } +} // Enable or disable FastISel. Both options are needed, because // FastISel is enabled by default with -fast, and we wish to be @@ -98,139 +112,81 @@ LLVMTargetMachine::setCodeModelForStatic() { setCodeModel(CodeModel::Small); } -FileModel::Model -LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, - formatted_raw_ostream &Out, - CodeGenFileType FileType, - CodeGenOpt::Level OptLevel) { +bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, + formatted_raw_ostream &Out, + CodeGenFileType FileType, + CodeGenOpt::Level OptLevel) { // Add common CodeGen passes. if (addCommonCodeGenPasses(PM, OptLevel)) - return FileModel::Error; + return true; + OwningPtr<MCContext> Context(new MCContext()); + OwningPtr<MCStreamer> AsmStreamer; + + formatted_raw_ostream *LegacyOutput; switch (FileType) { - default: + default: return true; + case CGFT_AssemblyFile: { + const MCAsmInfo &MAI = *getMCAsmInfo(); + MCInstPrinter *InstPrinter = + getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, Out); + AsmStreamer.reset(createAsmStreamer(*Context, Out, MAI, + getTargetData()->isLittleEndian(), + getVerboseAsm(), InstPrinter, + /*codeemitter*/0)); + // Set the AsmPrinter's "O" to the output file. + LegacyOutput = &Out; break; - case TargetMachine::AssemblyFile: - if (addAssemblyEmitter(PM, OptLevel, getAsmVerbosityDefault(), Out)) - return FileModel::Error; - return FileModel::AsmFile; - case TargetMachine::ObjectFile: - if (!addObjectFileEmitter(PM, OptLevel, Out)) - return FileModel::MachOFile; - else if (getELFWriterInfo()) - return FileModel::ElfFile; } - return FileModel::Error; -} - -bool LLVMTargetMachine::addAssemblyEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - bool Verbose, - formatted_raw_ostream &Out) { + case CGFT_ObjectFile: { + // Create the code emitter for the target if it exists. If not, .o file + // emission fails. + MCCodeEmitter *MCE = getTarget().createCodeEmitter(*this, *Context); + if (MCE == 0) + return true; + + AsmStreamer.reset(createMachOStreamer(*Context, Out, MCE)); + + // Any output to the asmprinter's "O" stream is bad and needs to be fixed, + // force it to come out stderr. + // FIXME: this is horrible and leaks, eventually remove the raw_ostream from + // asmprinter. + LegacyOutput = new formatted_raw_ostream(errs()); + break; + } + case CGFT_Null: + // The Null output is intended for use for performance analysis and testing, + // not real users. + AsmStreamer.reset(createNullStreamer(*Context)); + // Any output to the asmprinter's "O" stream is bad and needs to be fixed, + // force it to come out stderr. + // FIXME: this is horrible and leaks, eventually remove the raw_ostream from + // asmprinter. + LegacyOutput = new formatted_raw_ostream(errs()); + break; + } + + // Create the AsmPrinter, which takes ownership of Context and AsmStreamer + // if successful. FunctionPass *Printer = - getTarget().createAsmPrinter(Out, *this, getMCAsmInfo(), Verbose); - if (!Printer) - return true; - - PM.add(Printer); - return false; -} - -bool LLVMTargetMachine::addObjectFileEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - formatted_raw_ostream &Out) { - MCCodeEmitter *Emitter = getTarget().createCodeEmitter(*this); - if (!Emitter) + getTarget().createAsmPrinter(*LegacyOutput, *this, *Context, *AsmStreamer, + getMCAsmInfo()); + if (Printer == 0) return true; - PM.add(createMachOWriter(Out, *this, getMCAsmInfo(), Emitter)); - return false; -} - -/// addPassesToEmitFileFinish - If the passes to emit the specified file had to -/// be split up (e.g., to add an object writer pass), this method can be used to -/// finish up adding passes to emit the file, if necessary. -bool LLVMTargetMachine::addPassesToEmitFileFinish(PassManagerBase &PM, - MachineCodeEmitter *MCE, - CodeGenOpt::Level OptLevel) { - // Make sure the code model is set. - setCodeModelForStatic(); + // If successful, createAsmPrinter took ownership of AsmStreamer and Context. + Context.take(); AsmStreamer.take(); - if (MCE) - addSimpleCodeEmitter(PM, OptLevel, *MCE); - if (PrintEmittedAsm) - addAssemblyEmitter(PM, OptLevel, true, ferrs()); - - PM.add(createGCInfoDeleter()); - - return false; // success! -} - -/// addPassesToEmitFileFinish - If the passes to emit the specified file had to -/// be split up (e.g., to add an object writer pass), this method can be used to -/// finish up adding passes to emit the file, if necessary. -bool LLVMTargetMachine::addPassesToEmitFileFinish(PassManagerBase &PM, - JITCodeEmitter *JCE, - CodeGenOpt::Level OptLevel) { - // Make sure the code model is set. - setCodeModelForJIT(); + PM.add(Printer); - if (JCE) - addSimpleCodeEmitter(PM, OptLevel, *JCE); - if (PrintEmittedAsm) - addAssemblyEmitter(PM, OptLevel, true, ferrs()); - - PM.add(createGCInfoDeleter()); - - return false; // success! -} - -/// addPassesToEmitFileFinish - If the passes to emit the specified file had to -/// be split up (e.g., to add an object writer pass), this method can be used to -/// finish up adding passes to emit the file, if necessary. -bool LLVMTargetMachine::addPassesToEmitFileFinish(PassManagerBase &PM, - ObjectCodeEmitter *OCE, - CodeGenOpt::Level OptLevel) { // Make sure the code model is set. setCodeModelForStatic(); - - if (OCE) - addSimpleCodeEmitter(PM, OptLevel, *OCE); - if (PrintEmittedAsm) - addAssemblyEmitter(PM, OptLevel, true, ferrs()); - - PM.add(createGCInfoDeleter()); - - return false; // success! -} - -/// addPassesToEmitMachineCode - Add passes to the specified pass manager to -/// get machine code emitted. This uses a MachineCodeEmitter object to handle -/// actually outputting the machine code and resolving things like the address -/// of functions. This method should returns true if machine code emission is -/// not supported. -/// -bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, - MachineCodeEmitter &MCE, - CodeGenOpt::Level OptLevel) { - // Make sure the code model is set. - setCodeModelForJIT(); - - // Add common CodeGen passes. - if (addCommonCodeGenPasses(PM, OptLevel)) - return true; - - addCodeEmitter(PM, OptLevel, MCE); - if (PrintEmittedAsm) - addAssemblyEmitter(PM, OptLevel, true, ferrs()); - PM.add(createGCInfoDeleter()); - - return false; // success! + return false; } /// addPassesToEmitMachineCode - Add passes to the specified pass manager to -/// get machine code emitted. This uses a MachineCodeEmitter object to handle +/// get machine code emitted. This uses a JITCodeEmitter object to handle /// actually outputting the machine code and resolving things like the address /// of functions. This method should returns true if machine code emission is /// not supported. @@ -246,9 +202,6 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, return true; addCodeEmitter(PM, OptLevel, JCE); - if (PrintEmittedAsm) - addAssemblyEmitter(PM, OptLevel, true, ferrs()); - PM.add(createGCInfoDeleter()); return false; // success! @@ -282,6 +235,9 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, PM.add(createLoopStrengthReducePass(getTargetLowering())); if (PrintLSR) PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs())); +#ifndef NDEBUG + PM.add(createVerifierPass()); +#endif } // Turn exception handling constructs into something the code generators can @@ -339,6 +295,16 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, printAndVerify(PM, "After Instruction Selection", /* allowDoubleDefs= */ true); + // Optimize PHIs before DCE: removing dead PHI cycles may make more + // instructions dead. + if (OptLevel != CodeGenOpt::None) + PM.add(createOptimizePHIsPass()); + + // Delete dead machine instructions regardless of optimization level. + PM.add(createDeadMachineInstructionElimPass()); + printAndVerify(PM, "After codegen DCE pass", + /* allowDoubleDefs= */ true); + if (OptLevel != CodeGenOpt::None) { PM.add(createOptimizeExtsPass()); if (!DisableMachineLICM) diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 8746bf9..f6bf433 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -140,7 +140,7 @@ void LiveIntervals::printInstrs(raw_ostream &OS) const { << ":\t\t# derived from " << mbbi->getName() << "\n"; for (MachineBasicBlock::iterator mii = mbbi->begin(), mie = mbbi->end(); mii != mie; ++mii) { - if (mii->getOpcode()==TargetInstrInfo::DEBUG_VALUE) + if (mii->isDebugValue()) OS << SlotIndex::getEmptyKey() << '\t' << *mii; else OS << getInstructionIndex(mii) << '\t' << *mii; @@ -288,9 +288,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, VNInfo *ValNo; MachineInstr *CopyMI = NULL; unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (mi->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG || - mi->getOpcode() == TargetInstrInfo::INSERT_SUBREG || - mi->getOpcode() == TargetInstrInfo::SUBREG_TO_REG || + if (mi->isExtractSubreg() || mi->isInsertSubreg() || mi->isSubregToReg() || tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)) CopyMI = mi; // Earlyclobbers move back one. @@ -460,9 +458,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, VNInfo *ValNo; MachineInstr *CopyMI = NULL; unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (mi->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG || - mi->getOpcode() == TargetInstrInfo::INSERT_SUBREG || - mi->getOpcode() == TargetInstrInfo::SUBREG_TO_REG || + if (mi->isExtractSubreg() || mi->isInsertSubreg() || mi->isSubregToReg()|| tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)) CopyMI = mi; ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator); @@ -516,6 +512,8 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, baseIndex = baseIndex.getNextIndex(); while (++mi != MBB->end()) { + if (mi->isDebugValue()) + continue; if (getInstructionFromIndex(baseIndex) == 0) baseIndex = indexes_->getNextNonNullIndex(baseIndex); @@ -531,8 +529,8 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, end = baseIndex.getDefIndex(); } else { // Another instruction redefines the register before it is ever read. - // Then the register is essentially dead at the instruction that defines - // it. Hence its interval is: + // Then the register is essentially dead at the instruction that + // defines it. Hence its interval is: // [defSlot(def), defSlot(def)+1) DEBUG(dbgs() << " dead"); end = start.getStoreIndex(); @@ -577,9 +575,7 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, else if (allocatableRegs_[MO.getReg()]) { MachineInstr *CopyMI = NULL; unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG || - MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG || - MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG || + if (MI->isExtractSubreg() || MI->isInsertSubreg() || MI->isSubregToReg() || tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg)) CopyMI = MI; handlePhysicalRegisterDef(MBB, MI, MIIdx, MO, @@ -612,8 +608,16 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, SlotIndex end = baseIndex; bool SeenDefUse = false; - - while (mi != MBB->end()) { + + MachineBasicBlock::iterator E = MBB->end(); + while (mi != E) { + if (mi->isDebugValue()) { + ++mi; + if (mi != E && !mi->isDebugValue()) { + baseIndex = indexes_->getNextNonNullIndex(baseIndex); + } + continue; + } if (mi->killsRegister(interval.reg, tri_)) { DEBUG(dbgs() << " killed"); end = baseIndex.getDefIndex(); @@ -631,7 +635,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, } ++mi; - if (mi != MBB->end()) { + if (mi != E && !mi->isDebugValue()) { baseIndex = indexes_->getNextNonNullIndex(baseIndex); } } @@ -671,6 +675,9 @@ void LiveIntervals::computeIntervals() { for (MachineFunction::iterator MBBI = mf_->begin(), E = mf_->end(); MBBI != E; ++MBBI) { MachineBasicBlock *MBB = MBBI; + if (MBB->empty()) + continue; + // Track the index of the current machine instr. SlotIndex MIIndex = getMBBStartIdx(MBB); DEBUG(dbgs() << MBB->getName() << ":\n"); @@ -693,7 +700,7 @@ void LiveIntervals::computeIntervals() { for (MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end(); MI != miEnd; ++MI) { DEBUG(dbgs() << MIIndex << "\t" << *MI); - if (MI->getOpcode()==TargetInstrInfo::DEBUG_VALUE) + if (MI->isDebugValue()) continue; // Handle defs. @@ -742,7 +749,7 @@ unsigned LiveIntervals::getVNInfoSourceReg(const VNInfo *VNI) const { if (!VNI->getCopy()) return 0; - if (VNI->getCopy()->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) { + if (VNI->getCopy()->isExtractSubreg()) { // If it's extracting out of a physical register, return the sub-register. unsigned Reg = VNI->getCopy()->getOperand(1).getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { @@ -756,8 +763,8 @@ unsigned LiveIntervals::getVNInfoSourceReg(const VNInfo *VNI) const { Reg = tri_->getSubReg(Reg, VNI->getCopy()->getOperand(2).getImm()); } return Reg; - } else if (VNI->getCopy()->getOpcode() == TargetInstrInfo::INSERT_SUBREG || - VNI->getCopy()->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) + } else if (VNI->getCopy()->isInsertSubreg() || + VNI->getCopy()->isSubregToReg()) return VNI->getCopy()->getOperand(2).getReg(); unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; @@ -919,7 +926,7 @@ bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI, SmallVector<unsigned, 2> &Ops, bool isSS, int Slot, unsigned Reg) { // If it is an implicit def instruction, just delete it. - if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) { + if (MI->isImplicitDef()) { RemoveMachineInstrFromMaps(MI); vrm.RemoveMachineInstrFromMaps(MI); MI->eraseFromParent(); @@ -1059,7 +1066,7 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, // If this is the rematerializable definition MI itself and // all of its uses are rematerialized, simply delete it. if (MI == ReMatOrigDefMI && CanDelete) { - DEBUG(dbgs() << "\t\t\t\tErasing re-materlizable def: " + DEBUG(dbgs() << "\t\t\t\tErasing re-materializable def: " << MI << '\n'); RemoveMachineInstrFromMaps(MI); vrm.RemoveMachineInstrFromMaps(MI); @@ -1302,6 +1309,12 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, MachineInstr *MI = &*ri; MachineOperand &O = ri.getOperand(); ++ri; + if (MI->isDebugValue()) { + // Remove debug info for now. + O.setReg(0U); + DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI); + continue; + } assert(!O.isImplicit() && "Spilling register that's used as implicit use?"); SlotIndex index = getInstructionIndex(MI); if (index < start || index >= end) @@ -1525,7 +1538,7 @@ LiveIntervals::handleSpilledImpDefs(const LiveInterval &li, VirtRegMap &vrm, MachineInstr *MI = &*ri; ++ri; if (O.isDef()) { - assert(MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF && + assert(MI->isImplicitDef() && "Register def was not rewritten?"); RemoveMachineInstrFromMaps(MI); vrm.RemoveMachineInstrFromMaps(MI); @@ -2056,7 +2069,7 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li, std::string msg; raw_string_ostream Msg(msg); Msg << "Ran out of registers during register allocation!"; - if (MI->getOpcode() == TargetInstrInfo::INLINEASM) { + if (MI->isInlineAsm()) { Msg << "\nPlease check your inline asm statement for invalid " << "constraints:\n"; MI->print(Msg, tm_); diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index b44a220..8a124dc 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -543,6 +543,8 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) { MachineInstr *MI = I; + if (MI->isDebugValue()) + continue; DistanceMap.insert(std::make_pair(MI, Dist++)); // Process all of the operands of the instruction... @@ -550,7 +552,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { // Unless it is a PHI node. In this case, ONLY process the DEF, not any // of the uses. They will be handled in other basic blocks. - if (MI->getOpcode() == TargetInstrInfo::PHI) + if (MI->isPHI()) NumOperandsToProcess = 1; SmallVector<unsigned, 4> UseRegs; @@ -692,7 +694,7 @@ void LiveVariables::analyzePHINodes(const MachineFunction& Fn) { for (MachineFunction::const_iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end(); - BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI) + BBI != BBE && BBI->isPHI(); ++BBI) for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) PHIVarInfo[BBI->getOperand(i + 1).getMBB()->getNumber()] .push_back(BBI->getOperand(i).getReg()); @@ -771,8 +773,7 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB, // All registers used by PHI nodes in SuccBB must be live through BB. for (MachineBasicBlock::const_iterator BBI = SuccBB->begin(), - BBE = SuccBB->end(); - BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI) + BBE = SuccBB->end(); BBI != BBE && BBI->isPHI(); ++BBI) for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) if (BBI->getOperand(i+1).getMBB() == BB) getVarInfo(BBI->getOperand(i).getReg()).AliveBlocks.set(NumNew); diff --git a/lib/CodeGen/LowerSubregs.cpp b/lib/CodeGen/LowerSubregs.cpp index 1121d9b..b4ef648 100644 --- a/lib/CodeGen/LowerSubregs.cpp +++ b/lib/CodeGen/LowerSubregs.cpp @@ -129,7 +129,7 @@ bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) { if (MI->getOperand(1).isKill()) { // We must make sure the super-register gets killed. Replace the // instruction with KILL. - MI->setDesc(TII->get(TargetInstrInfo::KILL)); + MI->setDesc(TII->get(TargetOpcode::KILL)); MI->RemoveOperand(2); // SubIdx DEBUG(dbgs() << "subreg: replace by: " << *MI); return true; @@ -242,7 +242,7 @@ bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) { // <undef>, we need to make sure it is alive by inserting a KILL if (MI->getOperand(1).isUndef() && !MI->getOperand(0).isDead()) { MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), - TII->get(TargetInstrInfo::KILL), DstReg); + TII->get(TargetOpcode::KILL), DstReg); if (MI->getOperand(2).isUndef()) MIB.addReg(InsReg, RegState::Undef); else @@ -260,7 +260,7 @@ bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) { // If the source register being inserted is undef, then this becomes a // KILL. BuildMI(*MBB, MI, MI->getDebugLoc(), - TII->get(TargetInstrInfo::KILL), DstSubReg); + TII->get(TargetOpcode::KILL), DstSubReg); else { bool Emitted = TII->copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1); (void)Emitted; @@ -314,11 +314,11 @@ bool LowerSubregsInstructionPass::runOnMachineFunction(MachineFunction &MF) { mi != me;) { MachineBasicBlock::iterator nmi = llvm::next(mi); MachineInstr *MI = mi; - if (MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) { + if (MI->isExtractSubreg()) { MadeChange |= LowerExtract(MI); - } else if (MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG) { + } else if (MI->isInsertSubreg()) { MadeChange |= LowerInsert(MI); - } else if (MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) { + } else if (MI->isSubregToReg()) { MadeChange |= LowerSubregToReg(MI); } mi = nmi; diff --git a/lib/CodeGen/MachOWriter.cpp b/lib/CodeGen/MachOWriter.cpp deleted file mode 100644 index e8bbe21..0000000 --- a/lib/CodeGen/MachOWriter.cpp +++ /dev/null @@ -1,125 +0,0 @@ -//===-- MachOWriter.cpp - Target-independent Mach-O Writer code -----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the target-independent Mach-O writer. This file writes -// out the Mach-O file in the following order: -// -// #1 FatHeader (universal-only) -// #2 FatArch (universal-only, 1 per universal arch) -// Per arch: -// #3 Header -// #4 Load Commands -// #5 Sections -// #6 Relocations -// #7 Symbols -// #8 Strings -// -//===----------------------------------------------------------------------===// - -#include "MachOWriter.h" -#include "llvm/Function.h" -#include "llvm/CodeGen/FileWriters.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCCodeEmitter.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FormattedStream.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -using namespace llvm; - -namespace llvm { -MachineFunctionPass *createMachOWriter(formatted_raw_ostream &O, - TargetMachine &TM, - const MCAsmInfo *T, - MCCodeEmitter *MCE) { - return new MachOWriter(O, TM, T, MCE); -} -} - -//===----------------------------------------------------------------------===// -// MachOWriter Implementation -//===----------------------------------------------------------------------===// - -char MachOWriter::ID = 0; - -MachOWriter::MachOWriter(formatted_raw_ostream &o, TargetMachine &tm, - const MCAsmInfo *T, MCCodeEmitter *MCE) - : MachineFunctionPass(&ID), O(o), TM(tm), MAI(T), MCCE(MCE), - OutContext(*new MCContext()), - OutStreamer(*createMachOStreamer(OutContext, O, MCCE)) { -} - -MachOWriter::~MachOWriter() { - delete &OutStreamer; - delete &OutContext; - delete MCCE; -} - -bool MachOWriter::doInitialization(Module &M) { - // Initialize TargetLoweringObjectFile. - TM.getTargetLowering()->getObjFileLowering().Initialize(OutContext, TM); - - return false; -} - -/// doFinalization - Now that the module has been completely processed, emit -/// the Mach-O file to 'O'. -bool MachOWriter::doFinalization(Module &M) { - OutStreamer.Finish(); - return false; -} - -bool MachOWriter::runOnMachineFunction(MachineFunction &MF) { - const Function *F = MF.getFunction(); - TargetLoweringObjectFile &TLOF = TM.getTargetLowering()->getObjFileLowering(); - const MCSection *S = TLOF.SectionForGlobal(F, Mang, TM); - OutStreamer.SwitchSection(S); - - for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); - I != E; ++I) { - // Print a label for the basic block. - for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); - II != IE; ++II) { - const MachineInstr *MI = II; - MCInst OutMI; - OutMI.setOpcode(MI->getOpcode()); - - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - MCOperand MCOp; - - switch (MO.getType()) { - default: - MI->dump(); - llvm_unreachable("unknown operand type"); - case MachineOperand::MO_Register: - // Ignore all implicit register operands. - if (MO.isImplicit()) continue; - MCOp = MCOperand::CreateReg(MO.getReg()); - break; - case MachineOperand::MO_Immediate: - MCOp = MCOperand::CreateImm(MO.getImm()); - break; - } - OutMI.addOperand(MCOp); - } - - OutStreamer.EmitInstruction(OutMI); - } - } - - return false; -} diff --git a/lib/CodeGen/MachOWriter.h b/lib/CodeGen/MachOWriter.h deleted file mode 100644 index 2e7e67d..0000000 --- a/lib/CodeGen/MachOWriter.h +++ /dev/null @@ -1,88 +0,0 @@ -//=== MachOWriter.h - Target-independent Mach-O writer support --*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the MachOWriter class. -// -//===----------------------------------------------------------------------===// - -#ifndef MACHOWRITER_H -#define MACHOWRITER_H - -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/Target/TargetMachine.h" - -namespace llvm { - class GlobalVariable; - class Mangler; - class MCCodeEmitter; - class MCContext; - class MCStreamer; - - /// MachOWriter - This class implements the common target-independent code for - /// writing Mach-O files. Targets should derive a class from this to - /// parameterize the output format. - /// - class MachOWriter : public MachineFunctionPass { - static char ID; - - protected: - /// Output stream to send the resultant object file to. - /// - formatted_raw_ostream &O; - - /// Target machine description. - /// - TargetMachine &TM; - - /// Target Asm Printer information. - /// - const MCAsmInfo *MAI; - - /// MCCE - The MCCodeEmitter object that we are exposing to emit machine - /// code for functions to the .o file. - MCCodeEmitter *MCCE; - - /// OutContext - This is the context for the output file that we are - /// streaming. This owns all of the global MC-related objects for the - /// generated translation unit. - MCContext &OutContext; - - /// OutStreamer - This is the MCStreamer object for the file we are - /// generating. This contains the transient state for the current - /// translation unit that we are generating (such as the current section - /// etc). - MCStreamer &OutStreamer; - - /// Name-mangler for global names. - /// - Mangler *Mang; - - /// doInitialization - Emit the file header and all of the global variables - /// for the module to the Mach-O file. - bool doInitialization(Module &M); - - /// doFinalization - Now that the module has been completely processed, emit - /// the Mach-O file to 'O'. - bool doFinalization(Module &M); - - bool runOnMachineFunction(MachineFunction &MF); - - public: - explicit MachOWriter(formatted_raw_ostream &O, TargetMachine &TM, - const MCAsmInfo *T, MCCodeEmitter *MCE); - - virtual ~MachOWriter(); - - virtual const char *getPassName() const { - return "Mach-O Writer"; - } - }; -} - -#endif diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 9215bd5..655a0bf 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -14,15 +14,18 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/BasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetInstrDesc.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/ADT/SmallString.h" #include "llvm/Support/Debug.h" #include "llvm/Support/LeakDetector.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Assembly/Writer.h" #include <algorithm> using namespace llvm; @@ -36,6 +39,18 @@ MachineBasicBlock::~MachineBasicBlock() { LeakDetector::removeGarbageObject(this); } +/// getSymbol - Return the MCSymbol for this basic block. +/// +MCSymbol *MachineBasicBlock::getSymbol(MCContext &Ctx) const { + SmallString<60> Name; + const MachineFunction *MF = getParent(); + raw_svector_ostream(Name) + << MF->getTarget().getMCAsmInfo()->getPrivateGlobalPrefix() << "BB" + << MF->getFunctionNumber() << '_' << getNumber(); + return Ctx.GetOrCreateSymbol(Name.str()); +} + + raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) { MBB.print(OS); return OS; @@ -525,7 +540,7 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA, } /// findDebugLoc - find the next valid DebugLoc starting at MBBI, skipping -/// any DEBUG_VALUE instructions. Return UnknownLoc if there is none. +/// any DBG_VALUE instructions. Return UnknownLoc if there is none. DebugLoc MachineBasicBlock::findDebugLoc(MachineBasicBlock::iterator &MBBI) { DebugLoc DL; @@ -533,8 +548,7 @@ MachineBasicBlock::findDebugLoc(MachineBasicBlock::iterator &MBBI) { if (MBBI != E) { // Skip debug declarations, we don't want a DebugLoc from them. MachineBasicBlock::iterator MBBI2 = MBBI; - while (MBBI2 != E && - MBBI2->getOpcode()==TargetInstrInfo::DEBUG_VALUE) + while (MBBI2 != E && MBBI2->isDebugValue()) MBBI2++; if (MBBI2 != E) DL = MBBI2->getDebugLoc(); diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 1e3e314..f141c56 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -16,7 +16,6 @@ #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/Instructions.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/Config/config.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" @@ -26,12 +25,16 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetFrameInfo.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -70,9 +73,9 @@ FunctionPass *llvm::createMachineFunctionPrinterPass(raw_ostream &OS, return new Printer(OS, Banner); } -//===---------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// // MachineFunction implementation -//===---------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// // Out of line virtual method. MachineFunctionInfo::~MachineFunctionInfo() {} @@ -81,8 +84,8 @@ void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) { MBB->getParent()->DeleteMachineBasicBlock(MBB); } -MachineFunction::MachineFunction(Function *F, - const TargetMachine &TM) +MachineFunction::MachineFunction(Function *F, const TargetMachine &TM, + unsigned FunctionNum) : Fn(F), Target(TM) { if (TM.getRegisterInfo()) RegInfo = new (Allocator.Allocate<MachineRegisterInfo>()) @@ -95,16 +98,8 @@ MachineFunction::MachineFunction(Function *F, ConstantPool = new (Allocator.Allocate<MachineConstantPool>()) MachineConstantPool(TM.getTargetData()); Alignment = TM.getTargetLowering()->getFunctionAlignment(F); - - // Set up jump table. - const TargetData &TD = *TM.getTargetData(); - bool IsPic = TM.getRelocationModel() == Reloc::PIC_; - unsigned EntrySize = IsPic ? 4 : TD.getPointerSize(); - unsigned TyAlignment = IsPic ? - TD.getABITypeAlignment(Type::getInt32Ty(F->getContext())) - : TD.getPointerABIAlignment(); - JumpTableInfo = new (Allocator.Allocate<MachineJumpTableInfo>()) - MachineJumpTableInfo(EntrySize, TyAlignment); + FunctionNumber = FunctionNum; + JumpTableInfo = 0; } MachineFunction::~MachineFunction() { @@ -121,9 +116,23 @@ MachineFunction::~MachineFunction() { } FrameInfo->~MachineFrameInfo(); Allocator.Deallocate(FrameInfo); ConstantPool->~MachineConstantPool(); Allocator.Deallocate(ConstantPool); - JumpTableInfo->~MachineJumpTableInfo(); Allocator.Deallocate(JumpTableInfo); + + if (JumpTableInfo) { + JumpTableInfo->~MachineJumpTableInfo(); + Allocator.Deallocate(JumpTableInfo); + } } +/// getOrCreateJumpTableInfo - Get the JumpTableInfo for this function, if it +/// does already exist, allocate one. +MachineJumpTableInfo *MachineFunction:: +getOrCreateJumpTableInfo(unsigned EntryKind) { + if (JumpTableInfo) return JumpTableInfo; + + JumpTableInfo = new (Allocator.Allocate<MachineJumpTableInfo>()) + MachineJumpTableInfo((MachineJumpTableInfo::JTEntryKind)EntryKind); + return JumpTableInfo; +} /// RenumberBlocks - This discards all of the MachineBasicBlock numbers and /// recomputes them. This guarantees that the MBB numbers are sequential, @@ -178,7 +187,7 @@ MachineFunction::CreateMachineInstr(const TargetInstrDesc &TID, } /// CloneMachineInstr - Create a new MachineInstr which is a copy of the -/// 'Orig' instruction, identical in all ways except the the instruction +/// 'Orig' instruction, identical in all ways except the instruction /// has no parent, prev, or next. /// MachineInstr * @@ -311,7 +320,8 @@ void MachineFunction::print(raw_ostream &OS) const { FrameInfo->print(*this, OS); // Print JumpTable Information - JumpTableInfo->print(OS); + if (JumpTableInfo) + JumpTableInfo->print(OS); // Print Constant Pool ConstantPool->print(OS); @@ -435,6 +445,26 @@ DILocation MachineFunction::getDILocation(DebugLoc DL) const { return DILocation(DebugLocInfo.DebugLocations[Idx]); } + +/// getJTISymbol - Return the MCSymbol for the specified non-empty jump table. +/// If isLinkerPrivate is specified, an 'l' label is returned, otherwise a +/// normal 'L' label is returned. +MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, + bool isLinkerPrivate) const { + assert(JumpTableInfo && "No jump tables"); + + assert(JTI < JumpTableInfo->getJumpTables().size() && "Invalid JTI!"); + const MCAsmInfo &MAI = *getTarget().getMCAsmInfo(); + + const char *Prefix = isLinkerPrivate ? MAI.getLinkerPrivateGlobalPrefix() : + MAI.getPrivateGlobalPrefix(); + SmallString<60> Name; + raw_svector_ostream(Name) + << Prefix << "JTI" << getFunctionNumber() << '_' << JTI; + return Ctx.GetOrCreateSymbol(Name.str()); +} + + //===----------------------------------------------------------------------===// // MachineFrameInfo implementation //===----------------------------------------------------------------------===// @@ -528,6 +558,39 @@ void MachineFrameInfo::dump(const MachineFunction &MF) const { // MachineJumpTableInfo implementation //===----------------------------------------------------------------------===// +/// getEntrySize - Return the size of each entry in the jump table. +unsigned MachineJumpTableInfo::getEntrySize(const TargetData &TD) const { + // The size of a jump table entry is 4 bytes unless the entry is just the + // address of a block, in which case it is the pointer size. + switch (getEntryKind()) { + case MachineJumpTableInfo::EK_BlockAddress: + return TD.getPointerSize(); + case MachineJumpTableInfo::EK_GPRel32BlockAddress: + case MachineJumpTableInfo::EK_LabelDifference32: + case MachineJumpTableInfo::EK_Custom32: + return 4; + } + assert(0 && "Unknown jump table encoding!"); + return ~0; +} + +/// getEntryAlignment - Return the alignment of each entry in the jump table. +unsigned MachineJumpTableInfo::getEntryAlignment(const TargetData &TD) const { + // The alignment of a jump table entry is the alignment of int32 unless the + // entry is just the address of a block, in which case it is the pointer + // alignment. + switch (getEntryKind()) { + case MachineJumpTableInfo::EK_BlockAddress: + return TD.getPointerABIAlignment(); + case MachineJumpTableInfo::EK_GPRel32BlockAddress: + case MachineJumpTableInfo::EK_LabelDifference32: + case MachineJumpTableInfo::EK_Custom32: + return TD.getABIIntegerTypeAlignment(32); + } + assert(0 && "Unknown jump table encoding!"); + return ~0; +} + /// getJumpTableIndex - Create a new jump table entry in the jump table info /// or return an existing one. /// @@ -538,11 +601,11 @@ unsigned MachineJumpTableInfo::getJumpTableIndex( return JumpTables.size()-1; } + /// ReplaceMBBInJumpTables - If Old is the target of any jump tables, update /// the jump tables to branch to New instead. -bool -MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old, - MachineBasicBlock *New) { +bool MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old, + MachineBasicBlock *New) { assert(Old != New && "Not making a change?"); bool MadeChange = false; for (size_t i = 0, e = JumpTables.size(); i != e; ++i) @@ -552,10 +615,9 @@ MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old, /// ReplaceMBBInJumpTable - If Old is a target of the jump tables, update /// the jump table to branch to New instead. -bool -MachineJumpTableInfo::ReplaceMBBInJumpTable(unsigned Idx, - MachineBasicBlock *Old, - MachineBasicBlock *New) { +bool MachineJumpTableInfo::ReplaceMBBInJumpTable(unsigned Idx, + MachineBasicBlock *Old, + MachineBasicBlock *New) { assert(Old != New && "Not making a change?"); bool MadeChange = false; MachineJumpTableEntry &JTE = JumpTables[Idx]; diff --git a/lib/CodeGen/MachineFunctionAnalysis.cpp b/lib/CodeGen/MachineFunctionAnalysis.cpp index f5febc5..8d87e3e 100644 --- a/lib/CodeGen/MachineFunctionAnalysis.cpp +++ b/lib/CodeGen/MachineFunctionAnalysis.cpp @@ -36,7 +36,7 @@ MachineFunctionAnalysis::~MachineFunctionAnalysis() { bool MachineFunctionAnalysis::runOnFunction(Function &F) { assert(!MF && "MachineFunctionAnalysis already initialized!"); - MF = new MachineFunction(&F, TM); + MF = new MachineFunction(&F, TM, NextFnNum++); return false; } diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index ef2fcee..b6d98e8 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -127,7 +127,8 @@ void MachineOperand::ChangeToImmediate(int64_t ImmVal) { /// the specified value. If an operand is known to be an register already, /// the setReg method should be used. void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, - bool isKill, bool isDead, bool isUndef) { + bool isKill, bool isDead, bool isUndef, + bool isDebug) { // If this operand is already a register operand, use setReg to update the // register's use/def lists. if (isReg()) { @@ -152,6 +153,7 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, IsDead = isDead; IsUndef = isUndef; IsEarlyClobber = false; + IsDebug = isDebug; SubReg = 0; } @@ -303,7 +305,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { MachineMemOperand::MachineMemOperand(const Value *v, unsigned int f, int64_t o, uint64_t s, unsigned int a) : Offset(o), Size(s), V(v), - Flags((f & 7) | ((Log2_32(a) + 1) << 3)) { + Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)) { assert(getBaseAlignment() == a && "Alignment is not a power of 2!"); assert((isLoad() || isStore()) && "Not a load/store!"); } @@ -325,7 +327,8 @@ void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) { if (MMO->getBaseAlignment() >= getBaseAlignment()) { // Update the alignment value. - Flags = (Flags & 7) | ((Log2_32(MMO->getBaseAlignment()) + 1) << 3); + Flags = (Flags & ((1 << MOMaxBits) - 1)) | + ((Log2_32(MMO->getBaseAlignment()) + 1) << MOMaxBits); // Also update the base and offset, because the new alignment may // not be applicable with the old ones. V = MMO->getValue(); @@ -740,20 +743,6 @@ unsigned MachineInstr::getNumExplicitOperands() const { } -/// isLabel - Returns true if the MachineInstr represents a label. -/// -bool MachineInstr::isLabel() const { - return getOpcode() == TargetInstrInfo::DBG_LABEL || - getOpcode() == TargetInstrInfo::EH_LABEL || - getOpcode() == TargetInstrInfo::GC_LABEL; -} - -/// isDebugLabel - Returns true if the MachineInstr represents a debug label. -/// -bool MachineInstr::isDebugLabel() const { - return getOpcode() == TargetInstrInfo::DBG_LABEL; -} - /// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of /// the specific register or -1 if it is not found. It further tightens /// the search criteria to a use that kills the register if isKill is true. @@ -819,7 +808,7 @@ int MachineInstr::findFirstPredOperandIdx() const { /// first tied use operand index by reference is UseOpIdx is not null. bool MachineInstr:: isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const { - if (getOpcode() == TargetInstrInfo::INLINEASM) { + if (isInlineAsm()) { assert(DefOpIdx >= 2); const MachineOperand &MO = getOperand(DefOpIdx); if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0) @@ -878,7 +867,7 @@ isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const { /// operand index by reference. bool MachineInstr:: isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const { - if (getOpcode() == TargetInstrInfo::INLINEASM) { + if (isInlineAsm()) { const MachineOperand &MO = getOperand(UseOpIdx); if (!MO.isReg() || !MO.isUse() || MO.getReg() == 0) return false; @@ -1046,7 +1035,7 @@ bool MachineInstr::hasVolatileMemoryRef() const { /// isInvariantLoad - Return true if this instruction is loading from a /// location whose value is invariant across the function. For example, -/// loading a value from the constant pool or from from the argument area +/// loading a value from the constant pool or from the argument area /// of a function if it does not change. This should only return true of /// *all* loads the instruction does are invariant (if it does multiple loads). bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const { @@ -1088,7 +1077,7 @@ bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const { /// merges together the same virtual register, return the register, otherwise /// return 0. unsigned MachineInstr::isConstantValuePHI() const { - if (getOpcode() != TargetInstrInfo::PHI) + if (!isPHI()) return 0; assert(getNumOperands() >= 3 && "It's illegal to have a PHI without source operands"); diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index ffcc8ab..92c84f3 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -336,7 +336,7 @@ static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *RegInfo) { for (MachineRegisterInfo::use_iterator UI = RegInfo->use_begin(Reg), UE = RegInfo->use_end(); UI != UE; ++UI) { MachineInstr *UseMI = &*UI; - if (UseMI->getOpcode() == TargetInstrInfo::PHI) + if (UseMI->isPHI()) return true; } return false; @@ -363,7 +363,7 @@ bool MachineLICM::isLoadFromConstantMemory(MachineInstr *MI) { /// IsProfitableToHoist - Return true if it is potentially profitable to hoist /// the given loop invariant. bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { - if (MI.getOpcode() == TargetInstrInfo::IMPLICIT_DEF) + if (MI.isImplicitDef()) return false; // FIXME: For now, only hoist re-materilizable instructions. LICM will diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index ed5bb5e..5052af7 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -40,6 +40,7 @@ MachineModuleInfoImpl::~MachineModuleInfoImpl() {} MachineModuleInfo::MachineModuleInfo() : ImmutablePass(&ID) , ObjFileMMI(0) +, CurCallSite(0) , CallsEHReturn(0) , CallsUnwindInit(0) , DbgInfoAvailable(false) { @@ -71,6 +72,7 @@ void MachineModuleInfo::EndFunction() { // Clean up exception info. LandingPads.clear(); + CallSiteMap.clear(); TypeInfos.clear(); FilterIds.clear(); FilterEnds.clear(); diff --git a/lib/CodeGen/MachineModuleInfoImpls.cpp b/lib/CodeGen/MachineModuleInfoImpls.cpp index 7a62929..39d2c75 100644 --- a/lib/CodeGen/MachineModuleInfoImpls.cpp +++ b/lib/CodeGen/MachineModuleInfoImpls.cpp @@ -22,22 +22,23 @@ using namespace llvm; // Out of line virtual method. void MachineModuleInfoMachO::Anchor() {} - +void MachineModuleInfoELF::Anchor() {} static int SortSymbolPair(const void *LHS, const void *RHS) { const MCSymbol *LHSS = - ((const std::pair<const MCSymbol*, const MCSymbol*>*)LHS)->first; + ((const std::pair<MCSymbol*, MCSymbol*>*)LHS)->first; const MCSymbol *RHSS = - ((const std::pair<const MCSymbol*, const MCSymbol*>*)RHS)->first; + ((const std::pair<MCSymbol*, MCSymbol*>*)RHS)->first; return LHSS->getName().compare(RHSS->getName()); } /// GetSortedStubs - Return the entries from a DenseMap in a deterministic /// sorted orer. -MachineModuleInfoMachO::SymbolListTy -MachineModuleInfoMachO::GetSortedStubs(const DenseMap<const MCSymbol*, - const MCSymbol*> &Map) { - MachineModuleInfoMachO::SymbolListTy List(Map.begin(), Map.end()); +MachineModuleInfoImpl::SymbolListTy +MachineModuleInfoImpl::GetSortedStubs(const DenseMap<MCSymbol*, + MCSymbol*> &Map) { + MachineModuleInfoImpl::SymbolListTy List(Map.begin(), Map.end()); + if (!List.empty()) qsort(&List[0], List.size(), sizeof(List[0]), SortSymbolPair); return List; diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp index 467ea5d..2255dc3 100644 --- a/lib/CodeGen/MachineSSAUpdater.cpp +++ b/lib/CodeGen/MachineSSAUpdater.cpp @@ -20,6 +20,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -92,13 +93,13 @@ unsigned LookForIdenticalPHI(MachineBasicBlock *BB, return 0; MachineBasicBlock::iterator I = BB->front(); - if (I->getOpcode() != TargetInstrInfo::PHI) + if (!I->isPHI()) return 0; AvailableValsTy AVals; for (unsigned i = 0, e = PredValues.size(); i != e; ++i) AVals[PredValues[i].first] = PredValues[i].second; - while (I != BB->end() && I->getOpcode() == TargetInstrInfo::PHI) { + while (I != BB->end() && I->isPHI()) { bool Same = true; for (unsigned i = 1, e = I->getNumOperands(); i != e; i += 2) { unsigned SrcReg = I->getOperand(i).getReg(); @@ -155,7 +156,7 @@ unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) { // If there are no predecessors, just return undef. if (BB->pred_empty()) { // Insert an implicit_def to represent an undef value. - MachineInstr *NewDef = InsertNewDef(TargetInstrInfo::IMPLICIT_DEF, + MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF, BB, BB->getFirstTerminator(), VRC, MRI, TII); return NewDef->getOperand(0).getReg(); @@ -192,7 +193,7 @@ unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) { // Otherwise, we do need a PHI: insert one now. MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front(); - MachineInstr *InsertedPHI = InsertNewDef(TargetInstrInfo::PHI, BB, + MachineInstr *InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB, Loc, VRC, MRI, TII); // Fill in all the predecessors of the PHI. @@ -231,7 +232,7 @@ MachineBasicBlock *findCorrespondingPred(const MachineInstr *MI, void MachineSSAUpdater::RewriteUse(MachineOperand &U) { MachineInstr *UseMI = U.getParent(); unsigned NewVR = 0; - if (UseMI->getOpcode() == TargetInstrInfo::PHI) { + if (UseMI->isPHI()) { MachineBasicBlock *SourceBB = findCorrespondingPred(UseMI, &U); NewVR = GetValueAtEndOfBlockInternal(SourceBB); } else { @@ -277,7 +278,7 @@ unsigned MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){ // it. When we get back to the first instance of the recursion we will fill // in the PHI node. MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front(); - MachineInstr *NewPHI = InsertNewDef(TargetInstrInfo::PHI, BB, Loc, + MachineInstr *NewPHI = InsertNewDef(TargetOpcode::PHI, BB, Loc, VRC, MRI,TII); unsigned NewVR = NewPHI->getOperand(0).getReg(); InsertRes.first->second = NewVR; @@ -289,7 +290,7 @@ unsigned MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){ // be invalidated. if (BB->pred_empty()) { // Insert an implicit_def to represent an undef value. - MachineInstr *NewDef = InsertNewDef(TargetInstrInfo::IMPLICIT_DEF, + MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF, BB, BB->getFirstTerminator(), VRC, MRI, TII); return InsertRes.first->second = NewDef->getOperand(0).getReg(); @@ -358,7 +359,7 @@ unsigned MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){ MachineInstr *InsertedPHI; if (InsertedVal == 0) { MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front(); - InsertedPHI = InsertNewDef(TargetInstrInfo::PHI, BB, Loc, + InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB, Loc, VRC, MRI, TII); InsertedVal = InsertedPHI->getOperand(0).getReg(); } else { diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index c177e3c..c391576 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -77,7 +77,7 @@ bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg, // Determine the block of the use. MachineInstr *UseInst = &*I; MachineBasicBlock *UseBlock = UseInst->getParent(); - if (UseInst->getOpcode() == TargetInstrInfo::PHI) { + if (UseInst->isPHI()) { // PHI nodes use the operand in the predecessor block, not the block with // the PHI. UseBlock = UseInst->getOperand(I.getOperandNo()+1).getMBB(); @@ -269,8 +269,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // Determine where to insert into. Skip phi nodes. MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin(); - while (InsertPos != SuccToSinkTo->end() && - InsertPos->getOpcode() == TargetInstrInfo::PHI) + while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI()) ++InsertPos; // Move the instruction. diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 584c21b..434a1e8 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -590,7 +590,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { // must be live in. PHI instructions are handled separately. if (MInfo.regsKilled.count(Reg)) report("Using a killed virtual register", MO, MONum); - else if (MI->getOpcode() != TargetInstrInfo::PHI) + else if (!MI->isPHI()) MInfo.vregsLiveIn.insert(std::make_pair(Reg, MI)); } } @@ -650,10 +650,8 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } case MachineOperand::MO_MachineBasicBlock: - if (MI->getOpcode() == TargetInstrInfo::PHI) { - if (!MO->getMBB()->isSuccessor(MI->getParent())) - report("PHI operand is not in the CFG", MO, MONum); - } + if (MI->isPHI() && !MO->getMBB()->isSuccessor(MI->getParent())) + report("PHI operand is not in the CFG", MO, MONum); break; default: @@ -783,7 +781,7 @@ void MachineVerifier::calcRegsRequired() { // calcRegsPassed has been run so BBInfo::isLiveOut is valid. void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) { for (MachineBasicBlock::const_iterator BBI = MBB->begin(), BBE = MBB->end(); - BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI) { + BBI != BBE && BBI->isPHI(); ++BBI) { DenseSet<const MachineBasicBlock*> seen; for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) { diff --git a/lib/CodeGen/Makefile b/lib/CodeGen/Makefile index 8c0204c..4ab3e3c 100644 --- a/lib/CodeGen/Makefile +++ b/lib/CodeGen/Makefile @@ -11,7 +11,6 @@ LEVEL = ../.. LIBRARYNAME = LLVMCodeGen PARALLEL_DIRS = SelectionDAG AsmPrinter BUILD_ARCHIVE = 1 -CXXFLAGS = -fno-rtti include $(LEVEL)/Makefile.common diff --git a/lib/CodeGen/OptimizeExts.cpp b/lib/CodeGen/OptimizeExts.cpp index 096f9d4..acb6869 100644 --- a/lib/CodeGen/OptimizeExts.cpp +++ b/lib/CodeGen/OptimizeExts.cpp @@ -110,7 +110,7 @@ bool OptimizeExts::OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB, MachineInstr *UseMI = &*UI; if (UseMI == MI) continue; - if (UseMI->getOpcode() == TargetInstrInfo::PHI) { + if (UseMI->isPHI()) { ExtendLife = false; continue; } @@ -150,7 +150,7 @@ bool OptimizeExts::OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB, UI = MRI->use_begin(DstReg); for (MachineRegisterInfo::use_iterator UE = MRI->use_end(); UI != UE; ++UI) - if (UI->getOpcode() == TargetInstrInfo::PHI) + if (UI->isPHI()) PHIBBs.insert(UI->getParent()); const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); @@ -162,7 +162,7 @@ bool OptimizeExts::OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB, continue; unsigned NewVR = MRI->createVirtualRegister(RC); BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), - TII->get(TargetInstrInfo::EXTRACT_SUBREG), NewVR) + TII->get(TargetOpcode::EXTRACT_SUBREG), NewVR) .addReg(DstReg).addImm(SubIdx); UseMO->setReg(NewVR); ++NumReuse; diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp new file mode 100644 index 0000000..2717d4d --- /dev/null +++ b/lib/CodeGen/OptimizePHIs.cpp @@ -0,0 +1,189 @@ +//===-- OptimizePHIs.cpp - Optimize machine instruction PHIs --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass optimizes machine instruction PHIs to take advantage of +// opportunities created during DAG legalization. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "phi-opt" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Function.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumPHICycles, "Number of PHI cycles replaced"); +STATISTIC(NumDeadPHICycles, "Number of dead PHI cycles"); + +namespace { + class OptimizePHIs : public MachineFunctionPass { + MachineRegisterInfo *MRI; + const TargetInstrInfo *TII; + + public: + static char ID; // Pass identification + OptimizePHIs() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + typedef SmallPtrSet<MachineInstr*, 16> InstrSet; + typedef SmallPtrSetIterator<MachineInstr*> InstrSetIterator; + + bool IsSingleValuePHICycle(MachineInstr *MI, unsigned &SingleValReg, + InstrSet &PHIsInCycle); + bool IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle); + bool OptimizeBB(MachineBasicBlock &MBB); + }; +} + +char OptimizePHIs::ID = 0; +static RegisterPass<OptimizePHIs> +X("opt-phis", "Optimize machine instruction PHIs"); + +FunctionPass *llvm::createOptimizePHIsPass() { return new OptimizePHIs(); } + +bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) { + MRI = &Fn.getRegInfo(); + TII = Fn.getTarget().getInstrInfo(); + + // Find dead PHI cycles and PHI cycles that can be replaced by a single + // value. InstCombine does these optimizations, but DAG legalization may + // introduce new opportunities, e.g., when i64 values are split up for + // 32-bit targets. + bool Changed = false; + for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) + Changed |= OptimizeBB(*I); + + return Changed; +} + +/// IsSingleValuePHICycle - Check if MI is a PHI where all the source operands +/// are copies of SingleValReg, possibly via copies through other PHIs. If +/// SingleValReg is zero on entry, it is set to the register with the single +/// non-copy value. PHIsInCycle is a set used to keep track of the PHIs that +/// have been scanned. +bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI, + unsigned &SingleValReg, + InstrSet &PHIsInCycle) { + assert(MI->isPHI() && "IsSingleValuePHICycle expects a PHI instruction"); + unsigned DstReg = MI->getOperand(0).getReg(); + + // See if we already saw this register. + if (!PHIsInCycle.insert(MI)) + return true; + + // Don't scan crazily complex things. + if (PHIsInCycle.size() == 16) + return false; + + // Scan the PHI operands. + for (unsigned i = 1; i != MI->getNumOperands(); i += 2) { + unsigned SrcReg = MI->getOperand(i).getReg(); + if (SrcReg == DstReg) + continue; + MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); + + // Skip over register-to-register moves. + unsigned MvSrcReg, MvDstReg, SrcSubIdx, DstSubIdx; + if (SrcMI && + TII->isMoveInstr(*SrcMI, MvSrcReg, MvDstReg, SrcSubIdx, DstSubIdx) && + SrcSubIdx == 0 && DstSubIdx == 0 && + TargetRegisterInfo::isVirtualRegister(MvSrcReg)) + SrcMI = MRI->getVRegDef(MvSrcReg); + if (!SrcMI) + return false; + + if (SrcMI->isPHI()) { + if (!IsSingleValuePHICycle(SrcMI, SingleValReg, PHIsInCycle)) + return false; + } else { + // Fail if there is more than one non-phi/non-move register. + if (SingleValReg != 0) + return false; + SingleValReg = SrcReg; + } + } + return true; +} + +/// IsDeadPHICycle - Check if the register defined by a PHI is only used by +/// other PHIs in a cycle. +bool OptimizePHIs::IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle) { + assert(MI->isPHI() && "IsDeadPHICycle expects a PHI instruction"); + unsigned DstReg = MI->getOperand(0).getReg(); + assert(TargetRegisterInfo::isVirtualRegister(DstReg) && + "PHI destination is not a virtual register"); + + // See if we already saw this register. + if (!PHIsInCycle.insert(MI)) + return true; + + // Don't scan crazily complex things. + if (PHIsInCycle.size() == 16) + return false; + + for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DstReg), + E = MRI->use_end(); I != E; ++I) { + MachineInstr *UseMI = &*I; + if (!UseMI->isPHI() || !IsDeadPHICycle(UseMI, PHIsInCycle)) + return false; + } + + return true; +} + +/// OptimizeBB - Remove dead PHI cycles and PHI cycles that can be replaced by +/// a single value. +bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) { + bool Changed = false; + for (MachineBasicBlock::iterator + MII = MBB.begin(), E = MBB.end(); MII != E; ) { + MachineInstr *MI = &*MII++; + if (!MI->isPHI()) + break; + + // Check for single-value PHI cycles. + unsigned SingleValReg = 0; + InstrSet PHIsInCycle; + if (IsSingleValuePHICycle(MI, SingleValReg, PHIsInCycle) && + SingleValReg != 0) { + MRI->replaceRegWith(MI->getOperand(0).getReg(), SingleValReg); + MI->eraseFromParent(); + ++NumPHICycles; + Changed = true; + continue; + } + + // Check for dead PHI cycles. + PHIsInCycle.clear(); + if (IsDeadPHICycle(MI, PHIsInCycle)) { + for (InstrSetIterator PI = PHIsInCycle.begin(), PE = PHIsInCycle.end(); + PI != PE; ++PI) { + MachineInstr *PhiMI = *PI; + if (&*MII == PhiMI) + ++MII; + PhiMI->eraseFromParent(); + } + ++NumDeadPHICycles; + Changed = true; + } + } + return Changed; +} diff --git a/lib/CodeGen/PBQP/AnnotatedGraph.h b/lib/CodeGen/PBQP/AnnotatedGraph.h deleted file mode 100644 index 738dea0..0000000 --- a/lib/CodeGen/PBQP/AnnotatedGraph.h +++ /dev/null @@ -1,184 +0,0 @@ -//===-- AnnotatedGraph.h - Annotated PBQP Graph -----------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Annotated PBQP Graph class. This class is used internally by the PBQP solver -// to cache information to speed up reduction. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_PBQP_ANNOTATEDGRAPH_H -#define LLVM_CODEGEN_PBQP_ANNOTATEDGRAPH_H - -#include "GraphBase.h" - -namespace PBQP { - - -template <typename NodeData, typename EdgeData> class AnnotatedEdge; - -template <typename NodeData, typename EdgeData> -class AnnotatedNode : public NodeBase<AnnotatedNode<NodeData, EdgeData>, - AnnotatedEdge<NodeData, EdgeData> > { -private: - - NodeData nodeData; - -public: - - AnnotatedNode(const Vector &costs, const NodeData &nodeData) : - NodeBase<AnnotatedNode<NodeData, EdgeData>, - AnnotatedEdge<NodeData, EdgeData> >(costs), - nodeData(nodeData) {} - - NodeData& getNodeData() { return nodeData; } - const NodeData& getNodeData() const { return nodeData; } - -}; - -template <typename NodeData, typename EdgeData> -class AnnotatedEdge : public EdgeBase<AnnotatedNode<NodeData, EdgeData>, - AnnotatedEdge<NodeData, EdgeData> > { -private: - - typedef typename GraphBase<AnnotatedNode<NodeData, EdgeData>, - AnnotatedEdge<NodeData, EdgeData> >::NodeIterator - NodeIterator; - - EdgeData edgeData; - -public: - - - AnnotatedEdge(const NodeIterator &node1Itr, const NodeIterator &node2Itr, - const Matrix &costs, const EdgeData &edgeData) : - EdgeBase<AnnotatedNode<NodeData, EdgeData>, - AnnotatedEdge<NodeData, EdgeData> >(node1Itr, node2Itr, costs), - edgeData(edgeData) {} - - EdgeData& getEdgeData() { return edgeData; } - const EdgeData& getEdgeData() const { return edgeData; } - -}; - -template <typename NodeData, typename EdgeData> -class AnnotatedGraph : public GraphBase<AnnotatedNode<NodeData, EdgeData>, - AnnotatedEdge<NodeData, EdgeData> > { -private: - - typedef GraphBase<AnnotatedNode<NodeData, EdgeData>, - AnnotatedEdge<NodeData, EdgeData> > PGraph; - - typedef AnnotatedNode<NodeData, EdgeData> NodeEntry; - typedef AnnotatedEdge<NodeData, EdgeData> EdgeEntry; - - - void copyFrom(const AnnotatedGraph &other) { - if (!other.areNodeIDsValid()) { - other.assignNodeIDs(); - } - std::vector<NodeIterator> newNodeItrs(other.getNumNodes()); - - for (ConstNodeIterator nItr = other.nodesBegin(), nEnd = other.nodesEnd(); - nItr != nEnd; ++nItr) { - newNodeItrs[other.getNodeID(nItr)] = addNode(other.getNodeCosts(nItr)); - } - - for (ConstEdgeIterator eItr = other.edgesBegin(), eEnd = other.edgesEnd(); - eItr != eEnd; ++eItr) { - - unsigned node1ID = other.getNodeID(other.getEdgeNode1(eItr)), - node2ID = other.getNodeID(other.getEdgeNode2(eItr)); - - addEdge(newNodeItrs[node1ID], newNodeItrs[node2ID], - other.getEdgeCosts(eItr), other.getEdgeData(eItr)); - } - - } - -public: - - typedef typename PGraph::NodeIterator NodeIterator; - typedef typename PGraph::ConstNodeIterator ConstNodeIterator; - typedef typename PGraph::EdgeIterator EdgeIterator; - typedef typename PGraph::ConstEdgeIterator ConstEdgeIterator; - - AnnotatedGraph() {} - - AnnotatedGraph(const AnnotatedGraph &other) { - copyFrom(other); - } - - AnnotatedGraph& operator=(const AnnotatedGraph &other) { - PGraph::clear(); - copyFrom(other); - return *this; - } - - NodeIterator addNode(const Vector &costs, const NodeData &data) { - return PGraph::addConstructedNode(NodeEntry(costs, data)); - } - - EdgeIterator addEdge(const NodeIterator &node1Itr, - const NodeIterator &node2Itr, - const Matrix &costs, const EdgeData &data) { - return PGraph::addConstructedEdge(EdgeEntry(node1Itr, node2Itr, - costs, data)); - } - - NodeData& getNodeData(const NodeIterator &nodeItr) { - return PGraph::getNodeEntry(nodeItr).getNodeData(); - } - - const NodeData& getNodeData(const NodeIterator &nodeItr) const { - return PGraph::getNodeEntry(nodeItr).getNodeData(); - } - - EdgeData& getEdgeData(const EdgeIterator &edgeItr) { - return PGraph::getEdgeEntry(edgeItr).getEdgeData(); - } - - const EdgeEntry& getEdgeData(const EdgeIterator &edgeItr) const { - return PGraph::getEdgeEntry(edgeItr).getEdgeData(); - } - - SimpleGraph toSimpleGraph() const { - SimpleGraph g; - - if (!PGraph::areNodeIDsValid()) { - PGraph::assignNodeIDs(); - } - std::vector<SimpleGraph::NodeIterator> newNodeItrs(PGraph::getNumNodes()); - - for (ConstNodeIterator nItr = PGraph::nodesBegin(), - nEnd = PGraph::nodesEnd(); - nItr != nEnd; ++nItr) { - - newNodeItrs[getNodeID(nItr)] = g.addNode(getNodeCosts(nItr)); - } - - for (ConstEdgeIterator - eItr = PGraph::edgesBegin(), eEnd = PGraph::edgesEnd(); - eItr != eEnd; ++eItr) { - - unsigned node1ID = getNodeID(getEdgeNode1(eItr)), - node2ID = getNodeID(getEdgeNode2(eItr)); - - g.addEdge(newNodeItrs[node1ID], newNodeItrs[node2ID], - getEdgeCosts(eItr)); - } - - return g; - } - -}; - - -} - -#endif // LLVM_CODEGEN_PBQP_ANNOTATEDGRAPH_H diff --git a/lib/CodeGen/PBQP/ExhaustiveSolver.h b/lib/CodeGen/PBQP/ExhaustiveSolver.h deleted file mode 100644 index 35ec4f1..0000000 --- a/lib/CodeGen/PBQP/ExhaustiveSolver.h +++ /dev/null @@ -1,110 +0,0 @@ -//===-- ExhaustiveSolver.h - Brute Force PBQP Solver ------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Uses a trivial brute force algorithm to solve a PBQP problem. -// PBQP is NP-HARD - This solver should only be used for debugging small -// problems. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_PBQP_EXHAUSTIVESOLVER_H -#define LLVM_CODEGEN_PBQP_EXHAUSTIVESOLVER_H - -#include "Solver.h" - -namespace PBQP { - -/// A brute force PBQP solver. This solver takes exponential time. It should -/// only be used for debugging purposes. -class ExhaustiveSolverImpl { -private: - - const SimpleGraph &g; - - PBQPNum getSolutionCost(const Solution &solution) const { - PBQPNum cost = 0.0; - - for (SimpleGraph::ConstNodeIterator - nodeItr = g.nodesBegin(), nodeEnd = g.nodesEnd(); - nodeItr != nodeEnd; ++nodeItr) { - - unsigned nodeId = g.getNodeID(nodeItr); - - cost += g.getNodeCosts(nodeItr)[solution.getSelection(nodeId)]; - } - - for (SimpleGraph::ConstEdgeIterator - edgeItr = g.edgesBegin(), edgeEnd = g.edgesEnd(); - edgeItr != edgeEnd; ++edgeItr) { - - SimpleGraph::ConstNodeIterator n1 = g.getEdgeNode1Itr(edgeItr), - n2 = g.getEdgeNode2Itr(edgeItr); - unsigned sol1 = solution.getSelection(g.getNodeID(n1)), - sol2 = solution.getSelection(g.getNodeID(n2)); - - cost += g.getEdgeCosts(edgeItr)[sol1][sol2]; - } - - return cost; - } - -public: - - ExhaustiveSolverImpl(const SimpleGraph &g) : g(g) {} - - Solution solve() const { - Solution current(g.getNumNodes(), true), optimal(current); - - PBQPNum bestCost = std::numeric_limits<PBQPNum>::infinity(); - bool finished = false; - - while (!finished) { - PBQPNum currentCost = getSolutionCost(current); - - if (currentCost < bestCost) { - optimal = current; - bestCost = currentCost; - } - - // assume we're done. - finished = true; - - for (unsigned i = 0; i < g.getNumNodes(); ++i) { - if (current.getSelection(i) == - (g.getNodeCosts(g.getNodeItr(i)).getLength() - 1)) { - current.setSelection(i, 0); - } - else { - current.setSelection(i, current.getSelection(i) + 1); - finished = false; - break; - } - } - - } - - optimal.setSolutionCost(bestCost); - - return optimal; - } - -}; - -class ExhaustiveSolver : public Solver { -public: - ~ExhaustiveSolver() {} - Solution solve(const SimpleGraph &g) const { - ExhaustiveSolverImpl solver(g); - return solver.solve(); - } -}; - -} - -#endif // LLVM_CODGEN_PBQP_EXHAUSTIVESOLVER_HPP diff --git a/lib/CodeGen/PBQP/Graph.h b/lib/CodeGen/PBQP/Graph.h new file mode 100644 index 0000000..b2224cb --- /dev/null +++ b/lib/CodeGen/PBQP/Graph.h @@ -0,0 +1,425 @@ +//===-------------------- Graph.h - PBQP Graph ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// PBQP Graph class. +// +//===----------------------------------------------------------------------===// + + +#ifndef LLVM_CODEGEN_PBQP_GRAPH_H +#define LLVM_CODEGEN_PBQP_GRAPH_H + +#include "Math.h" + +#include <list> +#include <vector> +#include <map> + +namespace PBQP { + + /// PBQP Graph class. + /// Instances of this class describe PBQP problems. + class Graph { + private: + + // ----- TYPEDEFS ----- + class NodeEntry; + class EdgeEntry; + + typedef std::list<NodeEntry> NodeList; + typedef std::list<EdgeEntry> EdgeList; + + public: + + typedef NodeList::iterator NodeItr; + typedef NodeList::const_iterator ConstNodeItr; + + typedef EdgeList::iterator EdgeItr; + typedef EdgeList::const_iterator ConstEdgeItr; + + private: + + typedef std::list<EdgeItr> AdjEdgeList; + + public: + + typedef AdjEdgeList::iterator AdjEdgeItr; + + private: + + class NodeEntry { + private: + Vector costs; + AdjEdgeList adjEdges; + unsigned degree; + void *data; + public: + NodeEntry(const Vector &costs) : costs(costs), degree(0) {} + Vector& getCosts() { return costs; } + const Vector& getCosts() const { return costs; } + unsigned getDegree() const { return degree; } + AdjEdgeItr edgesBegin() { return adjEdges.begin(); } + AdjEdgeItr edgesEnd() { return adjEdges.end(); } + AdjEdgeItr addEdge(EdgeItr e) { + ++degree; + return adjEdges.insert(adjEdges.end(), e); + } + void removeEdge(AdjEdgeItr ae) { + --degree; + adjEdges.erase(ae); + } + void setData(void *data) { this->data = data; } + void* getData() { return data; } + }; + + class EdgeEntry { + private: + NodeItr node1, node2; + Matrix costs; + AdjEdgeItr node1AEItr, node2AEItr; + void *data; + public: + EdgeEntry(NodeItr node1, NodeItr node2, const Matrix &costs) + : node1(node1), node2(node2), costs(costs) {} + NodeItr getNode1() const { return node1; } + NodeItr getNode2() const { return node2; } + Matrix& getCosts() { return costs; } + const Matrix& getCosts() const { return costs; } + void setNode1AEItr(AdjEdgeItr ae) { node1AEItr = ae; } + AdjEdgeItr getNode1AEItr() { return node1AEItr; } + void setNode2AEItr(AdjEdgeItr ae) { node2AEItr = ae; } + AdjEdgeItr getNode2AEItr() { return node2AEItr; } + void setData(void *data) { this->data = data; } + void *getData() { return data; } + }; + + // ----- MEMBERS ----- + + NodeList nodes; + unsigned numNodes; + + EdgeList edges; + unsigned numEdges; + + // ----- INTERNAL METHODS ----- + + NodeEntry& getNode(NodeItr nItr) { return *nItr; } + const NodeEntry& getNode(ConstNodeItr nItr) const { return *nItr; } + + EdgeEntry& getEdge(EdgeItr eItr) { return *eItr; } + const EdgeEntry& getEdge(ConstEdgeItr eItr) const { return *eItr; } + + NodeItr addConstructedNode(const NodeEntry &n) { + ++numNodes; + return nodes.insert(nodes.end(), n); + } + + EdgeItr addConstructedEdge(const EdgeEntry &e) { + assert(findEdge(e.getNode1(), e.getNode2()) == edges.end() && + "Attempt to add duplicate edge."); + ++numEdges; + EdgeItr edgeItr = edges.insert(edges.end(), e); + EdgeEntry &ne = getEdge(edgeItr); + NodeEntry &n1 = getNode(ne.getNode1()); + NodeEntry &n2 = getNode(ne.getNode2()); + // Sanity check on matrix dimensions: + assert((n1.getCosts().getLength() == ne.getCosts().getRows()) && + (n2.getCosts().getLength() == ne.getCosts().getCols()) && + "Edge cost dimensions do not match node costs dimensions."); + ne.setNode1AEItr(n1.addEdge(edgeItr)); + ne.setNode2AEItr(n2.addEdge(edgeItr)); + return edgeItr; + } + + inline void copyFrom(const Graph &other); + public: + + /// \brief Construct an empty PBQP graph. + Graph() : numNodes(0), numEdges(0) {} + + /// \brief Copy construct this graph from "other". Note: Does not copy node + /// and edge data, only graph structure and costs. + /// @param other Source graph to copy from. + Graph(const Graph &other) : numNodes(0), numEdges(0) { + copyFrom(other); + } + + /// \brief Make this graph a copy of "other". Note: Does not copy node and + /// edge data, only graph structure and costs. + /// @param other The graph to copy from. + /// @return A reference to this graph. + /// + /// This will clear the current graph, erasing any nodes and edges added, + /// before copying from other. + Graph& operator=(const Graph &other) { + clear(); + copyFrom(other); + return *this; + } + + /// \brief Add a node with the given costs. + /// @param costs Cost vector for the new node. + /// @return Node iterator for the added node. + NodeItr addNode(const Vector &costs) { + return addConstructedNode(NodeEntry(costs)); + } + + /// \brief Add an edge between the given nodes with the given costs. + /// @param n1Itr First node. + /// @param n2Itr Second node. + /// @return Edge iterator for the added edge. + EdgeItr addEdge(Graph::NodeItr n1Itr, Graph::NodeItr n2Itr, + const Matrix &costs) { + assert(getNodeCosts(n1Itr).getLength() == costs.getRows() && + getNodeCosts(n2Itr).getLength() == costs.getCols() && + "Matrix dimensions mismatch."); + return addConstructedEdge(EdgeEntry(n1Itr, n2Itr, costs)); + } + + /// \brief Get the number of nodes in the graph. + /// @return Number of nodes in the graph. + unsigned getNumNodes() const { return numNodes; } + + /// \brief Get the number of edges in the graph. + /// @return Number of edges in the graph. + unsigned getNumEdges() const { return numEdges; } + + /// \brief Get a node's cost vector. + /// @param nItr Node iterator. + /// @return Node cost vector. + Vector& getNodeCosts(NodeItr nItr) { return getNode(nItr).getCosts(); } + + /// \brief Get a node's cost vector (const version). + /// @param nItr Node iterator. + /// @return Node cost vector. + const Vector& getNodeCosts(ConstNodeItr nItr) const { + return getNode(nItr).getCosts(); + } + + /// \brief Set a node's data pointer. + /// @param nItr Node iterator. + /// @param data Pointer to node data. + /// + /// Typically used by a PBQP solver to attach data to aid in solution. + void setNodeData(NodeItr nItr, void *data) { getNode(nItr).setData(data); } + + /// \brief Get the node's data pointer. + /// @param nItr Node iterator. + /// @return Pointer to node data. + void* getNodeData(NodeItr nItr) { return getNode(nItr).getData(); } + + /// \brief Get an edge's cost matrix. + /// @param eItr Edge iterator. + /// @return Edge cost matrix. + Matrix& getEdgeCosts(EdgeItr eItr) { return getEdge(eItr).getCosts(); } + + /// \brief Get an edge's cost matrix (const version). + /// @param eItr Edge iterator. + /// @return Edge cost matrix. + const Matrix& getEdgeCosts(ConstEdgeItr eItr) const { + return getEdge(eItr).getCosts(); + } + + /// \brief Set an edge's data pointer. + /// @param eItr Edge iterator. + /// @param data Pointer to edge data. + /// + /// Typically used by a PBQP solver to attach data to aid in solution. + void setEdgeData(EdgeItr eItr, void *data) { getEdge(eItr).setData(data); } + + /// \brief Get an edge's data pointer. + /// @param eItr Edge iterator. + /// @return Pointer to edge data. + void* getEdgeData(EdgeItr eItr) { return getEdge(eItr).getData(); } + + /// \brief Get a node's degree. + /// @param nItr Node iterator. + /// @return The degree of the node. + unsigned getNodeDegree(NodeItr nItr) const { + return getNode(nItr).getDegree(); + } + + /// \brief Begin iterator for node set. + NodeItr nodesBegin() { return nodes.begin(); } + + /// \brief Begin const iterator for node set. + ConstNodeItr nodesBegin() const { return nodes.begin(); } + + /// \brief End iterator for node set. + NodeItr nodesEnd() { return nodes.end(); } + + /// \brief End const iterator for node set. + ConstNodeItr nodesEnd() const { return nodes.end(); } + + /// \brief Begin iterator for edge set. + EdgeItr edgesBegin() { return edges.begin(); } + + /// \brief End iterator for edge set. + EdgeItr edgesEnd() { return edges.end(); } + + /// \brief Get begin iterator for adjacent edge set. + /// @param nItr Node iterator. + /// @return Begin iterator for the set of edges connected to the given node. + AdjEdgeItr adjEdgesBegin(NodeItr nItr) { + return getNode(nItr).edgesBegin(); + } + + /// \brief Get end iterator for adjacent edge set. + /// @param nItr Node iterator. + /// @return End iterator for the set of edges connected to the given node. + AdjEdgeItr adjEdgesEnd(NodeItr nItr) { + return getNode(nItr).edgesEnd(); + } + + /// \brief Get the first node connected to this edge. + /// @param eItr Edge iterator. + /// @return The first node connected to the given edge. + NodeItr getEdgeNode1(EdgeItr eItr) { + return getEdge(eItr).getNode1(); + } + + /// \brief Get the second node connected to this edge. + /// @param eItr Edge iterator. + /// @return The second node connected to the given edge. + NodeItr getEdgeNode2(EdgeItr eItr) { + return getEdge(eItr).getNode2(); + } + + /// \brief Get the "other" node connected to this edge. + /// @param eItr Edge iterator. + /// @param nItr Node iterator for the "given" node. + /// @return The iterator for the "other" node connected to this edge. + NodeItr getEdgeOtherNode(EdgeItr eItr, NodeItr nItr) { + EdgeEntry &e = getEdge(eItr); + if (e.getNode1() == nItr) { + return e.getNode2(); + } // else + return e.getNode1(); + } + + /// \brief Get the edge connecting two nodes. + /// @param n1Itr First node iterator. + /// @param n2Itr Second node iterator. + /// @return An iterator for edge (n1Itr, n2Itr) if such an edge exists, + /// otherwise returns edgesEnd(). + EdgeItr findEdge(NodeItr n1Itr, NodeItr n2Itr) { + for (AdjEdgeItr aeItr = adjEdgesBegin(n1Itr), aeEnd = adjEdgesEnd(n1Itr); + aeItr != aeEnd; ++aeItr) { + if ((getEdgeNode1(*aeItr) == n2Itr) || + (getEdgeNode2(*aeItr) == n2Itr)) { + return *aeItr; + } + } + return edges.end(); + } + + /// \brief Remove a node from the graph. + /// @param nItr Node iterator. + void removeNode(NodeItr nItr) { + NodeEntry &n = getNode(nItr); + for (AdjEdgeItr itr = n.edgesBegin(), end = n.edgesEnd(); itr != end;) { + EdgeItr eItr = *itr; + ++itr; + removeEdge(eItr); + } + nodes.erase(nItr); + --numNodes; + } + + /// \brief Remove an edge from the graph. + /// @param eItr Edge iterator. + void removeEdge(EdgeItr eItr) { + EdgeEntry &e = getEdge(eItr); + NodeEntry &n1 = getNode(e.getNode1()); + NodeEntry &n2 = getNode(e.getNode2()); + n1.removeEdge(e.getNode1AEItr()); + n2.removeEdge(e.getNode2AEItr()); + edges.erase(eItr); + --numEdges; + } + + /// \brief Remove all nodes and edges from the graph. + void clear() { + nodes.clear(); + edges.clear(); + numNodes = numEdges = 0; + } + + /// \brief Print a representation of this graph in DOT format. + /// @param os Output stream to print on. + template <typename OStream> + void printDot(OStream &os) { + + os << "graph {\n"; + + for (NodeItr nodeItr = nodesBegin(), nodeEnd = nodesEnd(); + nodeItr != nodeEnd; ++nodeItr) { + + os << " node" << nodeItr << " [ label=\"" + << nodeItr << ": " << getNodeCosts(nodeItr) << "\" ]\n"; + } + + os << " edge [ len=" << getNumNodes() << " ]\n"; + + for (EdgeItr edgeItr = edgesBegin(), edgeEnd = edgesEnd(); + edgeItr != edgeEnd; ++edgeItr) { + + os << " node" << getEdgeNode1(edgeItr) + << " -- node" << getEdgeNode2(edgeItr) + << " [ label=\""; + + const Matrix &edgeCosts = getEdgeCosts(edgeItr); + + for (unsigned i = 0; i < edgeCosts.getRows(); ++i) { + os << edgeCosts.getRowAsVector(i) << "\\n"; + } + os << "\" ]\n"; + } + os << "}\n"; + } + + }; + + class NodeItrComparator { + public: + bool operator()(Graph::NodeItr n1, Graph::NodeItr n2) const { + return &*n1 < &*n2; + } + + bool operator()(Graph::ConstNodeItr n1, Graph::ConstNodeItr n2) const { + return &*n1 < &*n2; + } + }; + + class EdgeItrCompartor { + public: + bool operator()(Graph::EdgeItr e1, Graph::EdgeItr e2) const { + return &*e1 < &*e2; + } + + bool operator()(Graph::ConstEdgeItr e1, Graph::ConstEdgeItr e2) const { + return &*e1 < &*e2; + } + }; + + void Graph::copyFrom(const Graph &other) { + std::map<Graph::ConstNodeItr, Graph::NodeItr, + NodeItrComparator> nodeMap; + + for (Graph::ConstNodeItr nItr = other.nodesBegin(), + nEnd = other.nodesEnd(); + nItr != nEnd; ++nItr) { + nodeMap[nItr] = addNode(other.getNodeCosts(nItr)); + } + + } + +} + +#endif // LLVM_CODEGEN_PBQP_GRAPH_HPP diff --git a/lib/CodeGen/PBQP/GraphBase.h b/lib/CodeGen/PBQP/GraphBase.h deleted file mode 100644 index becd98a..0000000 --- a/lib/CodeGen/PBQP/GraphBase.h +++ /dev/null @@ -1,582 +0,0 @@ -//===-- GraphBase.h - Abstract Base PBQP Graph ------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Base class for PBQP Graphs. -// -//===----------------------------------------------------------------------===// - - -#ifndef LLVM_CODEGEN_PBQP_GRAPHBASE_H -#define LLVM_CODEGEN_PBQP_GRAPHBASE_H - -#include "PBQPMath.h" - -#include <list> -#include <vector> - -namespace PBQP { - -// UGLY, but I'm not sure there's a good way around this: We need to be able to -// look up a Node's "adjacent edge list" structure type before the Node type is -// fully constructed. We can enable this by pushing the choice of data type -// out into this traits class. -template <typename Graph> -class NodeBaseTraits { - public: - typedef std::list<typename Graph::EdgeIterator> AdjEdgeList; - typedef typename AdjEdgeList::iterator AdjEdgeIterator; - typedef typename AdjEdgeList::const_iterator ConstAdjEdgeIterator; -}; - -/// \brief Base for concrete graph classes. Provides a basic set of graph -/// operations which are useful for PBQP solvers. -template <typename NodeEntry, typename EdgeEntry> -class GraphBase { -private: - - typedef GraphBase<NodeEntry, EdgeEntry> ThisGraphT; - - typedef std::list<NodeEntry> NodeList; - typedef std::list<EdgeEntry> EdgeList; - - NodeList nodeList; - unsigned nodeListSize; - - EdgeList edgeList; - unsigned edgeListSize; - - GraphBase(const ThisGraphT &other) { abort(); } - void operator=(const ThisGraphT &other) { abort(); } - -public: - - /// \brief Iterates over the nodes of a graph. - typedef typename NodeList::iterator NodeIterator; - /// \brief Iterates over the nodes of a const graph. - typedef typename NodeList::const_iterator ConstNodeIterator; - /// \brief Iterates over the edges of a graph. - typedef typename EdgeList::iterator EdgeIterator; - /// \brief Iterates over the edges of a const graph. - typedef typename EdgeList::const_iterator ConstEdgeIterator; - - /// \brief Iterates over the edges attached to a node. - typedef typename NodeBaseTraits<ThisGraphT>::AdjEdgeIterator - AdjEdgeIterator; - - /// \brief Iterates over the edges attached to a node in a const graph. - typedef typename NodeBaseTraits<ThisGraphT>::ConstAdjEdgeIterator - ConstAdjEdgeIterator; - -private: - - typedef std::vector<NodeIterator> IDToNodeMap; - - IDToNodeMap idToNodeMap; - bool nodeIDsValid; - - void invalidateNodeIDs() { - if (nodeIDsValid) { - idToNodeMap.clear(); - nodeIDsValid = false; - } - } - - template <typename ItrT> - bool iteratorInRange(ItrT itr, const ItrT &begin, const ItrT &end) { - for (ItrT t = begin; t != end; ++t) { - if (itr == t) - return true; - } - - return false; - } - -protected: - - GraphBase() : nodeListSize(0), edgeListSize(0), nodeIDsValid(false) {} - - NodeEntry& getNodeEntry(const NodeIterator &nodeItr) { return *nodeItr; } - const NodeEntry& getNodeEntry(const ConstNodeIterator &nodeItr) const { - return *nodeItr; - } - - EdgeEntry& getEdgeEntry(const EdgeIterator &edgeItr) { return *edgeItr; } - const EdgeEntry& getEdgeEntry(const ConstEdgeIterator &edgeItr) const { - return *edgeItr; - } - - NodeIterator addConstructedNode(const NodeEntry &nodeEntry) { - ++nodeListSize; - - invalidateNodeIDs(); - - NodeIterator newNodeItr = nodeList.insert(nodeList.end(), nodeEntry); - - return newNodeItr; - } - - EdgeIterator addConstructedEdge(const EdgeEntry &edgeEntry) { - - assert((findEdge(edgeEntry.getNode1Itr(), edgeEntry.getNode2Itr()) - == edgeList.end()) && "Attempt to add duplicate edge."); - - ++edgeListSize; - - // Add the edge to the graph. - EdgeIterator edgeItr = edgeList.insert(edgeList.end(), edgeEntry); - - // Get a reference to the version in the graph. - EdgeEntry &newEdgeEntry = getEdgeEntry(edgeItr); - - // Node entries: - NodeEntry &node1Entry = getNodeEntry(newEdgeEntry.getNode1Itr()), - &node2Entry = getNodeEntry(newEdgeEntry.getNode2Itr()); - - // Sanity check on matrix dimensions. - assert((node1Entry.getCosts().getLength() == - newEdgeEntry.getCosts().getRows()) && - (node2Entry.getCosts().getLength() == - newEdgeEntry.getCosts().getCols()) && - "Matrix dimensions do not match cost vector dimensions."); - - // Create links between nodes and edges. - newEdgeEntry.setNode1ThisEdgeItr( - node1Entry.addAdjEdge(edgeItr)); - newEdgeEntry.setNode2ThisEdgeItr( - node2Entry.addAdjEdge(edgeItr)); - - return edgeItr; - } - -public: - - /// \brief Returns the number of nodes in this graph. - unsigned getNumNodes() const { return nodeListSize; } - - /// \brief Returns the number of edges in this graph. - unsigned getNumEdges() const { return edgeListSize; } - - /// \brief Return the cost vector for the given node. - Vector& getNodeCosts(const NodeIterator &nodeItr) { - return getNodeEntry(nodeItr).getCosts(); - } - - /// \brief Return the cost vector for the give node. - const Vector& getNodeCosts(const ConstNodeIterator &nodeItr) const { - return getNodeEntry(nodeItr).getCosts(); - } - - /// \brief Return the degree of the given node. - unsigned getNodeDegree(const NodeIterator &nodeItr) const { - return getNodeEntry(nodeItr).getDegree(); - } - - /// \brief Assigns sequential IDs to the nodes, starting at 0, which - /// remain valid until the next addition or removal of a node. - void assignNodeIDs() { - unsigned curID = 0; - idToNodeMap.resize(getNumNodes()); - for (NodeIterator nodeItr = nodesBegin(), nodeEnd = nodesEnd(); - nodeItr != nodeEnd; ++nodeItr, ++curID) { - getNodeEntry(nodeItr).setID(curID); - idToNodeMap[curID] = nodeItr; - } - nodeIDsValid = true; - } - - /// \brief Assigns sequential IDs to the nodes using the ordering of the - /// given vector. - void assignNodeIDs(const std::vector<NodeIterator> &nodeOrdering) { - assert((getNumNodes() == nodeOrdering.size()) && - "Wrong number of nodes in node ordering."); - idToNodeMap = nodeOrdering; - for (unsigned nodeID = 0; nodeID < idToNodeMap.size(); ++nodeID) { - getNodeEntry(idToNodeMap[nodeID]).setID(nodeID); - } - nodeIDsValid = true; - } - - /// \brief Returns true if valid node IDs are assigned, false otherwise. - bool areNodeIDsValid() const { return nodeIDsValid; } - - /// \brief Return the numeric ID of the given node. - /// - /// Calls to this method will result in an assertion failure if there have - /// been any node additions or removals since the last call to - /// assignNodeIDs(). - unsigned getNodeID(const ConstNodeIterator &nodeItr) const { - assert(nodeIDsValid && "Attempt to retrieve invalid ID."); - return getNodeEntry(nodeItr).getID(); - } - - /// \brief Returns the iterator associated with the given node ID. - NodeIterator getNodeItr(unsigned nodeID) { - assert(nodeIDsValid && "Attempt to retrieve iterator with invalid ID."); - return idToNodeMap[nodeID]; - } - - /// \brief Returns the iterator associated with the given node ID. - ConstNodeIterator getNodeItr(unsigned nodeID) const { - assert(nodeIDsValid && "Attempt to retrieve iterator with invalid ID."); - return idToNodeMap[nodeID]; - } - - /// \brief Removes the given node (and all attached edges) from the graph. - void removeNode(const NodeIterator &nodeItr) { - assert(iteratorInRange(nodeItr, nodeList.begin(), nodeList.end()) && - "Iterator does not belong to this graph!"); - - invalidateNodeIDs(); - - NodeEntry &nodeEntry = getNodeEntry(nodeItr); - - // We need to copy this out because it will be destroyed as the edges are - // removed. - typedef std::vector<EdgeIterator> AdjEdgeList; - typedef typename AdjEdgeList::iterator AdjEdgeListItr; - - AdjEdgeList adjEdges; - adjEdges.reserve(nodeEntry.getDegree()); - std::copy(nodeEntry.adjEdgesBegin(), nodeEntry.adjEdgesEnd(), - std::back_inserter(adjEdges)); - - // Iterate over the copied out edges and remove them from the graph. - for (AdjEdgeListItr itr = adjEdges.begin(), end = adjEdges.end(); - itr != end; ++itr) { - removeEdge(*itr); - } - - // Erase the node from the nodelist. - nodeList.erase(nodeItr); - --nodeListSize; - } - - NodeIterator nodesBegin() { return nodeList.begin(); } - ConstNodeIterator nodesBegin() const { return nodeList.begin(); } - NodeIterator nodesEnd() { return nodeList.end(); } - ConstNodeIterator nodesEnd() const { return nodeList.end(); } - - AdjEdgeIterator adjEdgesBegin(const NodeIterator &nodeItr) { - return getNodeEntry(nodeItr).adjEdgesBegin(); - } - - ConstAdjEdgeIterator adjEdgesBegin(const ConstNodeIterator &nodeItr) const { - return getNodeEntry(nodeItr).adjEdgesBegin(); - } - - AdjEdgeIterator adjEdgesEnd(const NodeIterator &nodeItr) { - return getNodeEntry(nodeItr).adjEdgesEnd(); - } - - ConstAdjEdgeIterator adjEdgesEnd(const ConstNodeIterator &nodeItr) const { - getNodeEntry(nodeItr).adjEdgesEnd(); - } - - EdgeIterator findEdge(const NodeIterator &node1Itr, - const NodeIterator &node2Itr) { - - for (AdjEdgeIterator adjEdgeItr = adjEdgesBegin(node1Itr), - adjEdgeEnd = adjEdgesEnd(node1Itr); - adjEdgeItr != adjEdgeEnd; ++adjEdgeItr) { - if ((getEdgeNode1Itr(*adjEdgeItr) == node2Itr) || - (getEdgeNode2Itr(*adjEdgeItr) == node2Itr)) { - return *adjEdgeItr; - } - } - - return edgeList.end(); - } - - ConstEdgeIterator findEdge(const ConstNodeIterator &node1Itr, - const ConstNodeIterator &node2Itr) const { - - for (ConstAdjEdgeIterator adjEdgeItr = adjEdgesBegin(node1Itr), - adjEdgeEnd = adjEdgesEnd(node1Itr); - adjEdgeItr != adjEdgeEnd; ++adjEdgeItr) { - if ((getEdgeNode1Itr(*adjEdgeItr) == node2Itr) || - (getEdgeNode2Itr(*adjEdgeItr) == node2Itr)) { - return *adjEdgeItr; - } - } - - return edgeList.end(); - } - - Matrix& getEdgeCosts(const EdgeIterator &edgeItr) { - return getEdgeEntry(edgeItr).getCosts(); - } - - const Matrix& getEdgeCosts(const ConstEdgeIterator &edgeItr) const { - return getEdgeEntry(edgeItr).getCosts(); - } - - NodeIterator getEdgeNode1Itr(const EdgeIterator &edgeItr) { - return getEdgeEntry(edgeItr).getNode1Itr(); - } - - ConstNodeIterator getEdgeNode1Itr(const ConstEdgeIterator &edgeItr) const { - return getEdgeEntry(edgeItr).getNode1Itr(); - } - - NodeIterator getEdgeNode2Itr(const EdgeIterator &edgeItr) { - return getEdgeEntry(edgeItr).getNode2Itr(); - } - - ConstNodeIterator getEdgeNode2Itr(const ConstEdgeIterator &edgeItr) const { - return getEdgeEntry(edgeItr).getNode2Itr(); - } - - NodeIterator getEdgeOtherNode(const EdgeIterator &edgeItr, - const NodeIterator &nodeItr) { - - EdgeEntry &edgeEntry = getEdgeEntry(edgeItr); - if (nodeItr == edgeEntry.getNode1Itr()) { - return edgeEntry.getNode2Itr(); - } - //else - return edgeEntry.getNode1Itr(); - } - - ConstNodeIterator getEdgeOtherNode(const ConstEdgeIterator &edgeItr, - const ConstNodeIterator &nodeItr) const { - - const EdgeEntry &edgeEntry = getEdgeEntry(edgeItr); - if (nodeItr == edgeEntry.getNode1Itr()) { - return edgeEntry.getNode2Itr(); - } - //else - return edgeEntry.getNode1Itr(); - } - - void removeEdge(const EdgeIterator &edgeItr) { - assert(iteratorInRange(edgeItr, edgeList.begin(), edgeList.end()) && - "Iterator does not belong to this graph!"); - - --edgeListSize; - - // Get the edge entry. - EdgeEntry &edgeEntry = getEdgeEntry(edgeItr); - - // Get the nodes entry. - NodeEntry &node1Entry(getNodeEntry(edgeEntry.getNode1Itr())), - &node2Entry(getNodeEntry(edgeEntry.getNode2Itr())); - - // Disconnect the edge from the nodes. - node1Entry.removeAdjEdge(edgeEntry.getNode1ThisEdgeItr()); - node2Entry.removeAdjEdge(edgeEntry.getNode2ThisEdgeItr()); - - // Remove the edge from the graph. - edgeList.erase(edgeItr); - } - - EdgeIterator edgesBegin() { return edgeList.begin(); } - ConstEdgeIterator edgesBegin() const { return edgeList.begin(); } - EdgeIterator edgesEnd() { return edgeList.end(); } - ConstEdgeIterator edgesEnd() const { return edgeList.end(); } - - void clear() { - nodeList.clear(); - nodeListSize = 0; - edgeList.clear(); - edgeListSize = 0; - idToNodeMap.clear(); - } - - template <typename OStream> - void printDot(OStream &os) const { - - assert(areNodeIDsValid() && - "Cannot print a .dot of a graph unless IDs have been assigned."); - - os << "graph {\n"; - - for (ConstNodeIterator nodeItr = nodesBegin(), nodeEnd = nodesEnd(); - nodeItr != nodeEnd; ++nodeItr) { - - os << " node" << getNodeID(nodeItr) << " [ label=\"" - << getNodeID(nodeItr) << ": " << getNodeCosts(nodeItr) << "\" ]\n"; - } - - os << " edge [ len=" << getNumNodes() << " ]\n"; - - for (ConstEdgeIterator edgeItr = edgesBegin(), edgeEnd = edgesEnd(); - edgeItr != edgeEnd; ++edgeItr) { - - os << " node" << getNodeID(getEdgeNode1Itr(edgeItr)) - << " -- node" << getNodeID(getEdgeNode2Itr(edgeItr)) - << " [ label=\""; - - const Matrix &edgeCosts = getEdgeCosts(edgeItr); - - for (unsigned i = 0; i < edgeCosts.getRows(); ++i) { - os << edgeCosts.getRowAsVector(i) << "\\n"; - } - - os << "\" ]\n"; - } - - os << "}\n"; - } - - template <typename OStream> - void printDot(OStream &os) { - if (!areNodeIDsValid()) { - assignNodeIDs(); - } - - const_cast<const ThisGraphT*>(this)->printDot(os); - } - - template <typename OStream> - void dumpTo(OStream &os) const { - typedef ConstNodeIterator ConstNodeID; - - assert(areNodeIDsValid() && - "Cannot dump a graph unless IDs have been assigned."); - - for (ConstNodeIterator nItr = nodesBegin(), nEnd = nodesEnd(); - nItr != nEnd; ++nItr) { - os << getNodeID(nItr) << "\n"; - } - - unsigned edgeNumber = 1; - for (ConstEdgeIterator eItr = edgesBegin(), eEnd = edgesEnd(); - eItr != eEnd; ++eItr) { - - os << edgeNumber++ << ": { " - << getNodeID(getEdgeNode1Itr(eItr)) << ", " - << getNodeID(getEdgeNode2Itr(eItr)) << " }\n"; - } - - } - - template <typename OStream> - void dumpTo(OStream &os) { - if (!areNodeIDsValid()) { - assignNodeIDs(); - } - - const_cast<const ThisGraphT*>(this)->dumpTo(os); - } - -}; - -/// \brief Provides a base from which to derive nodes for GraphBase. -template <typename NodeImpl, typename EdgeImpl> -class NodeBase { -private: - - typedef GraphBase<NodeImpl, EdgeImpl> GraphBaseT; - typedef NodeBaseTraits<GraphBaseT> ThisNodeBaseTraits; - -public: - typedef typename GraphBaseT::EdgeIterator EdgeIterator; - -private: - typedef typename ThisNodeBaseTraits::AdjEdgeList AdjEdgeList; - - unsigned degree, id; - Vector costs; - AdjEdgeList adjEdges; - - void operator=(const NodeBase& other) { - assert(false && "Can't assign NodeEntrys."); - } - -public: - - typedef typename ThisNodeBaseTraits::AdjEdgeIterator AdjEdgeIterator; - typedef typename ThisNodeBaseTraits::ConstAdjEdgeIterator - ConstAdjEdgeIterator; - - NodeBase(const Vector &costs) : degree(0), costs(costs) { - assert((costs.getLength() > 0) && "Can't have zero-length cost vector."); - } - - Vector& getCosts() { return costs; } - const Vector& getCosts() const { return costs; } - - unsigned getDegree() const { return degree; } - - void setID(unsigned id) { this->id = id; } - unsigned getID() const { return id; } - - AdjEdgeIterator addAdjEdge(const EdgeIterator &edgeItr) { - ++degree; - return adjEdges.insert(adjEdges.end(), edgeItr); - } - - void removeAdjEdge(const AdjEdgeIterator &adjEdgeItr) { - --degree; - adjEdges.erase(adjEdgeItr); - } - - AdjEdgeIterator adjEdgesBegin() { return adjEdges.begin(); } - ConstAdjEdgeIterator adjEdgesBegin() const { return adjEdges.begin(); } - AdjEdgeIterator adjEdgesEnd() { return adjEdges.end(); } - ConstAdjEdgeIterator adjEdgesEnd() const { return adjEdges.end(); } - -}; - -template <typename NodeImpl, typename EdgeImpl> -class EdgeBase { -public: - typedef typename GraphBase<NodeImpl, EdgeImpl>::NodeIterator NodeIterator; - typedef typename GraphBase<NodeImpl, EdgeImpl>::EdgeIterator EdgeIterator; - - typedef typename NodeImpl::AdjEdgeIterator NodeAdjEdgeIterator; - -private: - - NodeIterator node1Itr, node2Itr; - NodeAdjEdgeIterator node1ThisEdgeItr, node2ThisEdgeItr; - Matrix costs; - - void operator=(const EdgeBase &other) { - assert(false && "Can't assign EdgeEntrys."); - } - -public: - - EdgeBase(const NodeIterator &node1Itr, const NodeIterator &node2Itr, - const Matrix &costs) : - node1Itr(node1Itr), node2Itr(node2Itr), costs(costs) { - - assert((costs.getRows() > 0) && (costs.getCols() > 0) && - "Can't have zero-dimensioned cost matrices"); - } - - Matrix& getCosts() { return costs; } - const Matrix& getCosts() const { return costs; } - - const NodeIterator& getNode1Itr() const { return node1Itr; } - const NodeIterator& getNode2Itr() const { return node2Itr; } - - void setNode1ThisEdgeItr(const NodeAdjEdgeIterator &node1ThisEdgeItr) { - this->node1ThisEdgeItr = node1ThisEdgeItr; - } - - const NodeAdjEdgeIterator& getNode1ThisEdgeItr() const { - return node1ThisEdgeItr; - } - - void setNode2ThisEdgeItr(const NodeAdjEdgeIterator &node2ThisEdgeItr) { - this->node2ThisEdgeItr = node2ThisEdgeItr; - } - - const NodeAdjEdgeIterator& getNode2ThisEdgeItr() const { - return node2ThisEdgeItr; - } - -}; - - -} - -#endif // LLVM_CODEGEN_PBQP_GRAPHBASE_HPP diff --git a/lib/CodeGen/PBQP/HeuristicBase.h b/lib/CodeGen/PBQP/HeuristicBase.h new file mode 100644 index 0000000..3bb24e1 --- /dev/null +++ b/lib/CodeGen/PBQP/HeuristicBase.h @@ -0,0 +1,242 @@ +//===-- HeuristcBase.h --- Heuristic base class for PBQP --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_PBQP_HEURISTICBASE_H +#define LLVM_CODEGEN_PBQP_HEURISTICBASE_H + +#include "HeuristicSolver.h" + +namespace PBQP { + + /// \brief Abstract base class for heuristic implementations. + /// + /// This class provides a handy base for heuristic implementations with common + /// solver behaviour implemented for a number of methods. + /// + /// To implement your own heuristic using this class as a base you'll have to + /// implement, as a minimum, the following methods: + /// <ul> + /// <li> void addToHeuristicList(Graph::NodeItr) : Add a node to the + /// heuristic reduction list. + /// <li> void heuristicReduce() : Perform a single heuristic reduction. + /// <li> void preUpdateEdgeCosts(Graph::EdgeItr) : Handle the (imminent) + /// change to the cost matrix on the given edge (by R2). + /// <li> void postUpdateEdgeCostts(Graph::EdgeItr) : Handle the new + /// costs on the given edge. + /// <li> void handleAddEdge(Graph::EdgeItr) : Handle the addition of a new + /// edge into the PBQP graph (by R2). + /// <li> void handleRemoveEdge(Graph::EdgeItr, Graph::NodeItr) : Handle the + /// disconnection of the given edge from the given node. + /// <li> A constructor for your derived class : to pass back a reference to + /// the solver which is using this heuristic. + /// </ul> + /// + /// These methods are implemented in this class for documentation purposes, + /// but will assert if called. + /// + /// Note that this class uses the curiously recursive template idiom to + /// forward calls to the derived class. These methods need not be made + /// virtual, and indeed probably shouldn't for performance reasons. + /// + /// You'll also need to provide NodeData and EdgeData structs in your class. + /// These can be used to attach data relevant to your heuristic to each + /// node/edge in the PBQP graph. + + template <typename HImpl> + class HeuristicBase { + private: + + typedef std::list<Graph::NodeItr> OptimalList; + + HeuristicSolverImpl<HImpl> &s; + Graph &g; + OptimalList optimalList; + + // Return a reference to the derived heuristic. + HImpl& impl() { return static_cast<HImpl&>(*this); } + + // Add the given node to the optimal reductions list. Keep an iterator to + // its location for fast removal. + void addToOptimalReductionList(Graph::NodeItr nItr) { + optimalList.insert(optimalList.end(), nItr); + } + + public: + + /// \brief Construct an instance with a reference to the given solver. + /// @param solver The solver which is using this heuristic instance. + HeuristicBase(HeuristicSolverImpl<HImpl> &solver) + : s(solver), g(s.getGraph()) { } + + /// \brief Get the solver which is using this heuristic instance. + /// @return The solver which is using this heuristic instance. + /// + /// You can use this method to get access to the solver in your derived + /// heuristic implementation. + HeuristicSolverImpl<HImpl>& getSolver() { return s; } + + /// \brief Get the graph representing the problem to be solved. + /// @return The graph representing the problem to be solved. + Graph& getGraph() { return g; } + + /// \brief Tell the solver to simplify the graph before the reduction phase. + /// @return Whether or not the solver should run a simplification phase + /// prior to the main setup and reduction. + /// + /// HeuristicBase returns true from this method as it's a sensible default, + /// however you can over-ride it in your derived class if you want different + /// behaviour. + bool solverRunSimplify() const { return true; } + + /// \brief Decide whether a node should be optimally or heuristically + /// reduced. + /// @return Whether or not the given node should be listed for optimal + /// reduction (via R0, R1 or R2). + /// + /// HeuristicBase returns true for any node with degree less than 3. This is + /// sane and sensible for many situations, but not all. You can over-ride + /// this method in your derived class if you want a different selection + /// criteria. Note however that your criteria for selecting optimal nodes + /// should be <i>at least</i> as strong as this. I.e. Nodes of degree 3 or + /// higher should not be selected under any circumstances. + bool shouldOptimallyReduce(Graph::NodeItr nItr) { + if (g.getNodeDegree(nItr) < 3) + return true; + // else + return false; + } + + /// \brief Add the given node to the list of nodes to be optimally reduced. + /// @return nItr Node iterator to be added. + /// + /// You probably don't want to over-ride this, except perhaps to record + /// statistics before calling this implementation. HeuristicBase relies on + /// its behaviour. + void addToOptimalReduceList(Graph::NodeItr nItr) { + optimalList.push_back(nItr); + } + + /// \brief Initialise the heuristic. + /// + /// HeuristicBase iterates over all nodes in the problem and adds them to + /// the appropriate list using addToOptimalReduceList or + /// addToHeuristicReduceList based on the result of shouldOptimallyReduce. + /// + /// This behaviour should be fine for most situations. + void setup() { + for (Graph::NodeItr nItr = g.nodesBegin(), nEnd = g.nodesEnd(); + nItr != nEnd; ++nItr) { + if (impl().shouldOptimallyReduce(nItr)) { + addToOptimalReduceList(nItr); + } else { + impl().addToHeuristicReduceList(nItr); + } + } + } + + /// \brief Optimally reduce one of the nodes in the optimal reduce list. + /// @return True if a reduction takes place, false if the optimal reduce + /// list is empty. + /// + /// Selects a node from the optimal reduce list and removes it, applying + /// R0, R1 or R2 as appropriate based on the selected node's degree. + bool optimalReduce() { + if (optimalList.empty()) + return false; + + Graph::NodeItr nItr = optimalList.front(); + optimalList.pop_front(); + + switch (s.getSolverDegree(nItr)) { + case 0: s.applyR0(nItr); break; + case 1: s.applyR1(nItr); break; + case 2: s.applyR2(nItr); break; + default: assert(false && + "Optimal reductions of degree > 2 nodes is invalid."); + } + + return true; + } + + /// \brief Perform the PBQP reduction process. + /// + /// Reduces the problem to the empty graph by repeated application of the + /// reduction rules R0, R1, R2 and RN. + /// R0, R1 or R2 are always applied if possible before RN is used. + void reduce() { + bool finished = false; + + while (!finished) { + if (!optimalReduce()) + if (!impl().heuristicReduce()) + finished = true; + } + } + + /// \brief Add a node to the heuristic reduce list. + /// @param nItr Node iterator to add to the heuristic reduce list. + void addToHeuristicList(Graph::NodeItr nItr) { + assert(false && "Must be implemented in derived class."); + } + + /// \brief Heuristically reduce one of the nodes in the heuristic + /// reduce list. + /// @return True if a reduction takes place, false if the heuristic reduce + /// list is empty. + void heuristicReduce() { + assert(false && "Must be implemented in derived class."); + } + + /// \brief Prepare a change in the costs on the given edge. + /// @param eItr Edge iterator. + void preUpdateEdgeCosts(Graph::EdgeItr eItr) { + assert(false && "Must be implemented in derived class."); + } + + /// \brief Handle the change in the costs on the given edge. + /// @param eItr Edge iterator. + void postUpdateEdgeCostts(Graph::EdgeItr eItr) { + assert(false && "Must be implemented in derived class."); + } + + /// \brief Handle the addition of a new edge into the PBQP graph. + /// @param eItr Edge iterator for the added edge. + void handleAddEdge(Graph::EdgeItr eItr) { + assert(false && "Must be implemented in derived class."); + } + + /// \brief Handle disconnection of an edge from a node. + /// @param eItr Edge iterator for edge being disconnected. + /// @param nItr Node iterator for the node being disconnected from. + /// + /// Edges are frequently removed due to the removal of a node. This + /// method allows for the effect to be computed only for the remaining + /// node in the graph. + void handleRemoveEdge(Graph::EdgeItr eItr, Graph::NodeItr nItr) { + assert(false && "Must be implemented in derived class."); + } + + /// \brief Clean up any structures used by HeuristicBase. + /// + /// At present this just performs a sanity check: that the optimal reduce + /// list is empty now that reduction has completed. + /// + /// If your derived class has more complex structures which need tearing + /// down you should over-ride this method but include a call back to this + /// implementation. + void cleanup() { + assert(optimalList.empty() && "Nodes left over in optimal reduce list?"); + } + + }; + +} + + +#endif // LLVM_CODEGEN_PBQP_HEURISTICBASE_H diff --git a/lib/CodeGen/PBQP/HeuristicSolver.h b/lib/CodeGen/PBQP/HeuristicSolver.h index f78a58a..c156264 100644 --- a/lib/CodeGen/PBQP/HeuristicSolver.h +++ b/lib/CodeGen/PBQP/HeuristicSolver.h @@ -1,4 +1,4 @@ -//===-- HeuristicSolver.h - Heuristic PBQP Solver ---------------*- C++ -*-===// +//===-- HeuristicSolver.h - Heuristic PBQP Solver --------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -9,780 +9,598 @@ // // Heuristic PBQP solver. This solver is able to perform optimal reductions for // nodes of degree 0, 1 or 2. For nodes of degree >2 a plugable heuristic is -// used to to select a node for reduction. +// used to select a node for reduction. // //===----------------------------------------------------------------------===// #ifndef LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H #define LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H -#include "Solver.h" -#include "AnnotatedGraph.h" -#include "llvm/Support/raw_ostream.h" +#include "Graph.h" +#include "Solution.h" +#include <vector> #include <limits> namespace PBQP { -/// \brief Important types for the HeuristicSolverImpl. -/// -/// Declared seperately to allow access to heuristic classes before the solver -/// is fully constructed. -template <typename HeuristicNodeData, typename HeuristicEdgeData> -class HSITypes { -public: - - class NodeData; - class EdgeData; - - typedef AnnotatedGraph<NodeData, EdgeData> SolverGraph; - typedef typename SolverGraph::NodeIterator GraphNodeIterator; - typedef typename SolverGraph::EdgeIterator GraphEdgeIterator; - typedef typename SolverGraph::AdjEdgeIterator GraphAdjEdgeIterator; - - typedef std::list<GraphNodeIterator> NodeList; - typedef typename NodeList::iterator NodeListIterator; - - typedef std::vector<GraphNodeIterator> NodeStack; - typedef typename NodeStack::iterator NodeStackIterator; - - class NodeData { - friend class EdgeData; - + /// \brief Heuristic PBQP solver implementation. + /// + /// This class should usually be created (and destroyed) indirectly via a call + /// to HeuristicSolver<HImpl>::solve(Graph&). + /// See the comments for HeuristicSolver. + /// + /// HeuristicSolverImpl provides the R0, R1 and R2 reduction rules, + /// backpropagation phase, and maintains the internal copy of the graph on + /// which the reduction is carried out (the original being kept to facilitate + /// backpropagation). + template <typename HImpl> + class HeuristicSolverImpl { private: - typedef std::list<GraphEdgeIterator> LinksList; + typedef typename HImpl::NodeData HeuristicNodeData; + typedef typename HImpl::EdgeData HeuristicEdgeData; - unsigned numLinks; - LinksList links, solvedLinks; - NodeListIterator bucketItr; - HeuristicNodeData heuristicData; + typedef std::list<Graph::EdgeItr> SolverEdges; public: - - typedef typename LinksList::iterator AdjLinkIterator; + + /// \brief Iterator type for edges in the solver graph. + typedef SolverEdges::iterator SolverEdgeItr; private: - AdjLinkIterator addLink(const GraphEdgeIterator &edgeItr) { - ++numLinks; - return links.insert(links.end(), edgeItr); - } + class NodeData { + public: + NodeData() : solverDegree(0) {} - void delLink(const AdjLinkIterator &adjLinkItr) { - --numLinks; - links.erase(adjLinkItr); - } + HeuristicNodeData& getHeuristicData() { return hData; } - public: - - NodeData() : numLinks(0) {} - - unsigned getLinkDegree() const { return numLinks; } - - HeuristicNodeData& getHeuristicData() { return heuristicData; } - const HeuristicNodeData& getHeuristicData() const { - return heuristicData; - } + SolverEdgeItr addSolverEdge(Graph::EdgeItr eItr) { + ++solverDegree; + return solverEdges.insert(solverEdges.end(), eItr); + } - void setBucketItr(const NodeListIterator &bucketItr) { - this->bucketItr = bucketItr; - } + void removeSolverEdge(SolverEdgeItr seItr) { + --solverDegree; + solverEdges.erase(seItr); + } - const NodeListIterator& getBucketItr() const { - return bucketItr; - } + SolverEdgeItr solverEdgesBegin() { return solverEdges.begin(); } + SolverEdgeItr solverEdgesEnd() { return solverEdges.end(); } + unsigned getSolverDegree() const { return solverDegree; } + void clearSolverEdges() { + solverDegree = 0; + solverEdges.clear(); + } + + private: + HeuristicNodeData hData; + unsigned solverDegree; + SolverEdges solverEdges; + }; + + class EdgeData { + public: + HeuristicEdgeData& getHeuristicData() { return hData; } + + void setN1SolverEdgeItr(SolverEdgeItr n1SolverEdgeItr) { + this->n1SolverEdgeItr = n1SolverEdgeItr; + } - AdjLinkIterator adjLinksBegin() { - return links.begin(); - } + SolverEdgeItr getN1SolverEdgeItr() { return n1SolverEdgeItr; } - AdjLinkIterator adjLinksEnd() { - return links.end(); - } + void setN2SolverEdgeItr(SolverEdgeItr n2SolverEdgeItr){ + this->n2SolverEdgeItr = n2SolverEdgeItr; + } - void addSolvedLink(const GraphEdgeIterator &solvedLinkItr) { - solvedLinks.push_back(solvedLinkItr); - } + SolverEdgeItr getN2SolverEdgeItr() { return n2SolverEdgeItr; } - AdjLinkIterator solvedLinksBegin() { - return solvedLinks.begin(); - } + private: - AdjLinkIterator solvedLinksEnd() { - return solvedLinks.end(); - } + HeuristicEdgeData hData; + SolverEdgeItr n1SolverEdgeItr, n2SolverEdgeItr; + }; - }; + Graph &g; + HImpl h; + Solution s; + std::vector<Graph::NodeItr> stack; - class EdgeData { - private: + typedef std::list<NodeData> NodeDataList; + NodeDataList nodeDataList; - SolverGraph &g; - GraphNodeIterator node1Itr, node2Itr; - HeuristicEdgeData heuristicData; - typename NodeData::AdjLinkIterator node1ThisEdgeItr, node2ThisEdgeItr; + typedef std::list<EdgeData> EdgeDataList; + EdgeDataList edgeDataList; public: - EdgeData(SolverGraph &g) : g(g) {} - - HeuristicEdgeData& getHeuristicData() { return heuristicData; } - const HeuristicEdgeData& getHeuristicData() const { - return heuristicData; - } - - void setup(const GraphEdgeIterator &thisEdgeItr) { - node1Itr = g.getEdgeNode1Itr(thisEdgeItr); - node2Itr = g.getEdgeNode2Itr(thisEdgeItr); - - node1ThisEdgeItr = g.getNodeData(node1Itr).addLink(thisEdgeItr); - node2ThisEdgeItr = g.getNodeData(node2Itr).addLink(thisEdgeItr); - } - - void unlink() { - g.getNodeData(node1Itr).delLink(node1ThisEdgeItr); - g.getNodeData(node2Itr).delLink(node2ThisEdgeItr); - } - - }; - -}; - -template <typename Heuristic> -class HeuristicSolverImpl { -public: - // Typedefs to make life easier: - typedef HSITypes<typename Heuristic::NodeData, - typename Heuristic::EdgeData> HSIT; - typedef typename HSIT::SolverGraph SolverGraph; - typedef typename HSIT::NodeData NodeData; - typedef typename HSIT::EdgeData EdgeData; - typedef typename HSIT::GraphNodeIterator GraphNodeIterator; - typedef typename HSIT::GraphEdgeIterator GraphEdgeIterator; - typedef typename HSIT::GraphAdjEdgeIterator GraphAdjEdgeIterator; - - typedef typename HSIT::NodeList NodeList; - typedef typename HSIT::NodeListIterator NodeListIterator; - - typedef std::vector<GraphNodeIterator> NodeStack; - typedef typename NodeStack::iterator NodeStackIterator; - - /// \brief Constructor, which performs all the actual solver work. - HeuristicSolverImpl(const SimpleGraph &orig) : - solution(orig.getNumNodes(), true) - { - copyGraph(orig); - simplify(); - setup(); - computeSolution(); - computeSolutionCost(orig); - } - - /// \brief Returns the graph for this solver. - SolverGraph& getGraph() { return g; } - - /// \brief Return the solution found by this solver. - const Solution& getSolution() const { return solution; } - -private: - - /// \brief Add the given node to the appropriate bucket for its link - /// degree. - void addToBucket(const GraphNodeIterator &nodeItr) { - NodeData &nodeData = g.getNodeData(nodeItr); - - switch (nodeData.getLinkDegree()) { - case 0: nodeData.setBucketItr( - r0Bucket.insert(r0Bucket.end(), nodeItr)); - break; - case 1: nodeData.setBucketItr( - r1Bucket.insert(r1Bucket.end(), nodeItr)); - break; - case 2: nodeData.setBucketItr( - r2Bucket.insert(r2Bucket.end(), nodeItr)); - break; - default: heuristic.addToRNBucket(nodeItr); - break; - } - } - - /// \brief Remove the given node from the appropriate bucket for its link - /// degree. - void removeFromBucket(const GraphNodeIterator &nodeItr) { - NodeData &nodeData = g.getNodeData(nodeItr); - - switch (nodeData.getLinkDegree()) { - case 0: r0Bucket.erase(nodeData.getBucketItr()); break; - case 1: r1Bucket.erase(nodeData.getBucketItr()); break; - case 2: r2Bucket.erase(nodeData.getBucketItr()); break; - default: heuristic.removeFromRNBucket(nodeItr); break; - } - } - -public: - - /// \brief Add a link. - void addLink(const GraphEdgeIterator &edgeItr) { - g.getEdgeData(edgeItr).setup(edgeItr); - - if ((g.getNodeData(g.getEdgeNode1Itr(edgeItr)).getLinkDegree() > 2) || - (g.getNodeData(g.getEdgeNode2Itr(edgeItr)).getLinkDegree() > 2)) { - heuristic.handleAddLink(edgeItr); - } - } - - /// \brief Remove link, update info for node. - /// - /// Only updates information for the given node, since usually the other - /// is about to be removed. - void removeLink(const GraphEdgeIterator &edgeItr, - const GraphNodeIterator &nodeItr) { - - if (g.getNodeData(nodeItr).getLinkDegree() > 2) { - heuristic.handleRemoveLink(edgeItr, nodeItr); - } - g.getEdgeData(edgeItr).unlink(); - } - - /// \brief Remove link, update info for both nodes. Useful for R2 only. - void removeLinkR2(const GraphEdgeIterator &edgeItr) { - GraphNodeIterator node1Itr = g.getEdgeNode1Itr(edgeItr); - - if (g.getNodeData(node1Itr).getLinkDegree() > 2) { - heuristic.handleRemoveLink(edgeItr, node1Itr); + /// \brief Construct a heuristic solver implementation to solve the given + /// graph. + /// @param g The graph representing the problem instance to be solved. + HeuristicSolverImpl(Graph &g) : g(g), h(*this) {} + + /// \brief Get the graph being solved by this solver. + /// @return The graph representing the problem instance being solved by this + /// solver. + Graph& getGraph() { return g; } + + /// \brief Get the heuristic data attached to the given node. + /// @param nItr Node iterator. + /// @return The heuristic data attached to the given node. + HeuristicNodeData& getHeuristicNodeData(Graph::NodeItr nItr) { + return getSolverNodeData(nItr).getHeuristicData(); + } + + /// \brief Get the heuristic data attached to the given edge. + /// @param eItr Edge iterator. + /// @return The heuristic data attached to the given node. + HeuristicEdgeData& getHeuristicEdgeData(Graph::EdgeItr eItr) { + return getSolverEdgeData(eItr).getHeuristicData(); + } + + /// \brief Begin iterator for the set of edges adjacent to the given node in + /// the solver graph. + /// @param nItr Node iterator. + /// @return Begin iterator for the set of edges adjacent to the given node + /// in the solver graph. + SolverEdgeItr solverEdgesBegin(Graph::NodeItr nItr) { + return getSolverNodeData(nItr).solverEdgesBegin(); + } + + /// \brief End iterator for the set of edges adjacent to the given node in + /// the solver graph. + /// @param nItr Node iterator. + /// @return End iterator for the set of edges adjacent to the given node in + /// the solver graph. + SolverEdgeItr solverEdgesEnd(Graph::NodeItr nItr) { + return getSolverNodeData(nItr).solverEdgesEnd(); + } + + /// \brief Remove a node from the solver graph. + /// @param eItr Edge iterator for edge to be removed. + /// + /// Does <i>not</i> notify the heuristic of the removal. That should be + /// done manually if necessary. + void removeSolverEdge(Graph::EdgeItr eItr) { + EdgeData &eData = getSolverEdgeData(eItr); + NodeData &n1Data = getSolverNodeData(g.getEdgeNode1(eItr)), + &n2Data = getSolverNodeData(g.getEdgeNode2(eItr)); + + n1Data.removeSolverEdge(eData.getN1SolverEdgeItr()); + n2Data.removeSolverEdge(eData.getN2SolverEdgeItr()); + } + + /// \brief Compute a solution to the PBQP problem instance with which this + /// heuristic solver was constructed. + /// @return A solution to the PBQP problem. + /// + /// Performs the full PBQP heuristic solver algorithm, including setup, + /// calls to the heuristic (which will call back to the reduction rules in + /// this class), and cleanup. + Solution computeSolution() { + setup(); + h.setup(); + h.reduce(); + backpropagate(); + h.cleanup(); + cleanup(); + return s; + } + + /// \brief Add to the end of the stack. + /// @param nItr Node iterator to add to the reduction stack. + void pushToStack(Graph::NodeItr nItr) { + getSolverNodeData(nItr).clearSolverEdges(); + stack.push_back(nItr); + } + + /// \brief Returns the solver degree of the given node. + /// @param nItr Node iterator for which degree is requested. + /// @return Node degree in the <i>solver</i> graph (not the original graph). + unsigned getSolverDegree(Graph::NodeItr nItr) { + return getSolverNodeData(nItr).getSolverDegree(); + } + + /// \brief Set the solution of the given node. + /// @param nItr Node iterator to set solution for. + /// @param selection Selection for node. + void setSolution(const Graph::NodeItr &nItr, unsigned selection) { + s.setSelection(nItr, selection); + + for (Graph::AdjEdgeItr aeItr = g.adjEdgesBegin(nItr), + aeEnd = g.adjEdgesEnd(nItr); + aeItr != aeEnd; ++aeItr) { + Graph::EdgeItr eItr(*aeItr); + Graph::NodeItr anItr(g.getEdgeOtherNode(eItr, nItr)); + getSolverNodeData(anItr).addSolverEdge(eItr); + } } - removeLink(edgeItr, g.getEdgeNode2Itr(edgeItr)); - } - - /// \brief Removes all links connected to the given node. - void unlinkNode(const GraphNodeIterator &nodeItr) { - NodeData &nodeData = g.getNodeData(nodeItr); - - typedef std::vector<GraphEdgeIterator> TempEdgeList; - TempEdgeList edgesToUnlink; - edgesToUnlink.reserve(nodeData.getLinkDegree()); + /// \brief Apply rule R0. + /// @param nItr Node iterator for node to apply R0 to. + /// + /// Node will be automatically pushed to the solver stack. + void applyR0(Graph::NodeItr nItr) { + assert(getSolverNodeData(nItr).getSolverDegree() == 0 && + "R0 applied to node with degree != 0."); - // Copy adj edges into a temp vector. We want to destroy them during - // the unlink, and we can't do that while we're iterating over them. - std::copy(nodeData.adjLinksBegin(), nodeData.adjLinksEnd(), - std::back_inserter(edgesToUnlink)); - - for (typename TempEdgeList::iterator - edgeItr = edgesToUnlink.begin(), edgeEnd = edgesToUnlink.end(); - edgeItr != edgeEnd; ++edgeItr) { - - GraphNodeIterator otherNode = g.getEdgeOtherNode(*edgeItr, nodeItr); - - removeFromBucket(otherNode); - removeLink(*edgeItr, otherNode); - addToBucket(otherNode); + // Nothing to do. Just push the node onto the reduction stack. + pushToStack(nItr); } - } - - /// \brief Push the given node onto the stack to be solved with - /// backpropagation. - void pushStack(const GraphNodeIterator &nodeItr) { - stack.push_back(nodeItr); - } - - /// \brief Set the solution of the given node. - void setSolution(const GraphNodeIterator &nodeItr, unsigned solIndex) { - solution.setSelection(g.getNodeID(nodeItr), solIndex); - - for (GraphAdjEdgeIterator adjEdgeItr = g.adjEdgesBegin(nodeItr), - adjEdgeEnd = g.adjEdgesEnd(nodeItr); - adjEdgeItr != adjEdgeEnd; ++adjEdgeItr) { - GraphEdgeIterator edgeItr(*adjEdgeItr); - GraphNodeIterator adjNodeItr(g.getEdgeOtherNode(edgeItr, nodeItr)); - g.getNodeData(adjNodeItr).addSolvedLink(edgeItr); - } - } - -private: - SolverGraph g; - Heuristic heuristic; - Solution solution; + /// \brief Apply rule R1. + /// @param nItr Node iterator for node to apply R1 to. + /// + /// Node will be automatically pushed to the solver stack. + void applyR1(Graph::NodeItr xnItr) { + NodeData &nd = getSolverNodeData(xnItr); + assert(nd.getSolverDegree() == 1 && + "R1 applied to node with degree != 1."); - NodeList r0Bucket, - r1Bucket, - r2Bucket; + Graph::EdgeItr eItr = *nd.solverEdgesBegin(); - NodeStack stack; - - // Copy the SimpleGraph into an annotated graph which we can use for reduction. - void copyGraph(const SimpleGraph &orig) { - - assert((g.getNumEdges() == 0) && (g.getNumNodes() == 0) && - "Graph should be empty prior to solver setup."); - - assert(orig.areNodeIDsValid() && - "Cannot copy from a graph with invalid node IDs."); - - std::vector<GraphNodeIterator> newNodeItrs; - - for (unsigned nodeID = 0; nodeID < orig.getNumNodes(); ++nodeID) { - newNodeItrs.push_back( - g.addNode(orig.getNodeCosts(orig.getNodeItr(nodeID)), NodeData())); + const Matrix &eCosts = g.getEdgeCosts(eItr); + const Vector &xCosts = g.getNodeCosts(xnItr); + + // Duplicate a little to avoid transposing matrices. + if (xnItr == g.getEdgeNode1(eItr)) { + Graph::NodeItr ynItr = g.getEdgeNode2(eItr); + Vector &yCosts = g.getNodeCosts(ynItr); + for (unsigned j = 0; j < yCosts.getLength(); ++j) { + PBQPNum min = eCosts[0][j] + xCosts[0]; + for (unsigned i = 1; i < xCosts.getLength(); ++i) { + PBQPNum c = eCosts[i][j] + xCosts[i]; + if (c < min) + min = c; + } + yCosts[j] += min; + } + h.handleRemoveEdge(eItr, ynItr); + } else { + Graph::NodeItr ynItr = g.getEdgeNode1(eItr); + Vector &yCosts = g.getNodeCosts(ynItr); + for (unsigned i = 0; i < yCosts.getLength(); ++i) { + PBQPNum min = eCosts[i][0] + xCosts[0]; + for (unsigned j = 1; j < xCosts.getLength(); ++j) { + PBQPNum c = eCosts[i][j] + xCosts[j]; + if (c < min) + min = c; + } + yCosts[i] += min; + } + h.handleRemoveEdge(eItr, ynItr); + } + removeSolverEdge(eItr); + assert(nd.getSolverDegree() == 0 && + "Degree 1 with edge removed should be 0."); + pushToStack(xnItr); } - for (SimpleGraph::ConstEdgeIterator - origEdgeItr = orig.edgesBegin(), origEdgeEnd = orig.edgesEnd(); - origEdgeItr != origEdgeEnd; ++origEdgeItr) { - - unsigned id1 = orig.getNodeID(orig.getEdgeNode1Itr(origEdgeItr)), - id2 = orig.getNodeID(orig.getEdgeNode2Itr(origEdgeItr)); + /// \brief Apply rule R2. + /// @param nItr Node iterator for node to apply R2 to. + /// + /// Node will be automatically pushed to the solver stack. + void applyR2(Graph::NodeItr xnItr) { + assert(getSolverNodeData(xnItr).getSolverDegree() == 2 && + "R2 applied to node with degree != 2."); - g.addEdge(newNodeItrs[id1], newNodeItrs[id2], - orig.getEdgeCosts(origEdgeItr), EdgeData(g)); - } + NodeData &nd = getSolverNodeData(xnItr); + const Vector &xCosts = g.getNodeCosts(xnItr); - // Assign IDs to the new nodes using the ordering from the old graph, - // this will lead to nodes in the new graph getting the same ID as the - // corresponding node in the old graph. - g.assignNodeIDs(newNodeItrs); - } + SolverEdgeItr aeItr = nd.solverEdgesBegin(); + Graph::EdgeItr yxeItr = *aeItr, + zxeItr = *(++aeItr); - // Simplify the annotated graph by eliminating independent edges and trivial - // nodes. - void simplify() { - disconnectTrivialNodes(); - eliminateIndependentEdges(); - } + Graph::NodeItr ynItr = g.getEdgeOtherNode(yxeItr, xnItr), + znItr = g.getEdgeOtherNode(zxeItr, xnItr); - // Eliminate trivial nodes. - void disconnectTrivialNodes() { - for (GraphNodeIterator nodeItr = g.nodesBegin(), nodeEnd = g.nodesEnd(); - nodeItr != nodeEnd; ++nodeItr) { + bool flipEdge1 = (g.getEdgeNode1(yxeItr) == xnItr), + flipEdge2 = (g.getEdgeNode1(zxeItr) == xnItr); - if (g.getNodeCosts(nodeItr).getLength() == 1) { + const Matrix *yxeCosts = flipEdge1 ? + new Matrix(g.getEdgeCosts(yxeItr).transpose()) : + &g.getEdgeCosts(yxeItr); - std::vector<GraphEdgeIterator> edgesToRemove; + const Matrix *zxeCosts = flipEdge2 ? + new Matrix(g.getEdgeCosts(zxeItr).transpose()) : + &g.getEdgeCosts(zxeItr); - for (GraphAdjEdgeIterator adjEdgeItr = g.adjEdgesBegin(nodeItr), - adjEdgeEnd = g.adjEdgesEnd(nodeItr); - adjEdgeItr != adjEdgeEnd; ++adjEdgeItr) { + unsigned xLen = xCosts.getLength(), + yLen = yxeCosts->getRows(), + zLen = zxeCosts->getRows(); + + Matrix delta(yLen, zLen); - GraphEdgeIterator edgeItr = *adjEdgeItr; - - if (g.getEdgeNode1Itr(edgeItr) == nodeItr) { - GraphNodeIterator otherNodeItr = g.getEdgeNode2Itr(edgeItr); - g.getNodeCosts(otherNodeItr) += - g.getEdgeCosts(edgeItr).getRowAsVector(0); - } - else { - GraphNodeIterator otherNodeItr = g.getEdgeNode1Itr(edgeItr); - g.getNodeCosts(otherNodeItr) += - g.getEdgeCosts(edgeItr).getColAsVector(0); + for (unsigned i = 0; i < yLen; ++i) { + for (unsigned j = 0; j < zLen; ++j) { + PBQPNum min = (*yxeCosts)[i][0] + (*zxeCosts)[j][0] + xCosts[0]; + for (unsigned k = 1; k < xLen; ++k) { + PBQPNum c = (*yxeCosts)[i][k] + (*zxeCosts)[j][k] + xCosts[k]; + if (c < min) { + min = c; + } } - - edgesToRemove.push_back(edgeItr); + delta[i][j] = min; } + } - while (!edgesToRemove.empty()) { - g.removeEdge(edgesToRemove.back()); - edgesToRemove.pop_back(); + if (flipEdge1) + delete yxeCosts; + + if (flipEdge2) + delete zxeCosts; + + Graph::EdgeItr yzeItr = g.findEdge(ynItr, znItr); + bool addedEdge = false; + + if (yzeItr == g.edgesEnd()) { + yzeItr = g.addEdge(ynItr, znItr, delta); + addedEdge = true; + } else { + Matrix &yzeCosts = g.getEdgeCosts(yzeItr); + h.preUpdateEdgeCosts(yzeItr); + if (ynItr == g.getEdgeNode1(yzeItr)) { + yzeCosts += delta; + } else { + yzeCosts += delta.transpose(); } } - } - } - void eliminateIndependentEdges() { - std::vector<GraphEdgeIterator> edgesToProcess; + bool nullCostEdge = tryNormaliseEdgeMatrix(yzeItr); - for (GraphEdgeIterator edgeItr = g.edgesBegin(), edgeEnd = g.edgesEnd(); - edgeItr != edgeEnd; ++edgeItr) { - edgesToProcess.push_back(edgeItr); - } - - while (!edgesToProcess.empty()) { - tryToEliminateEdge(edgesToProcess.back()); - edgesToProcess.pop_back(); - } - } - - void tryToEliminateEdge(const GraphEdgeIterator &edgeItr) { - if (tryNormaliseEdgeMatrix(edgeItr)) { - g.removeEdge(edgeItr); - } - } - - bool tryNormaliseEdgeMatrix(const GraphEdgeIterator &edgeItr) { - - Matrix &edgeCosts = g.getEdgeCosts(edgeItr); - Vector &uCosts = g.getNodeCosts(g.getEdgeNode1Itr(edgeItr)), - &vCosts = g.getNodeCosts(g.getEdgeNode2Itr(edgeItr)); - - for (unsigned r = 0; r < edgeCosts.getRows(); ++r) { - PBQPNum rowMin = edgeCosts.getRowMin(r); - uCosts[r] += rowMin; - if (rowMin != std::numeric_limits<PBQPNum>::infinity()) { - edgeCosts.subFromRow(r, rowMin); + if (!addedEdge) { + // If we modified the edge costs let the heuristic know. + h.postUpdateEdgeCosts(yzeItr); } - else { - edgeCosts.setRow(r, 0); + + if (nullCostEdge) { + // If this edge ended up null remove it. + if (!addedEdge) { + // We didn't just add it, so we need to notify the heuristic + // and remove it from the solver. + h.handleRemoveEdge(yzeItr, ynItr); + h.handleRemoveEdge(yzeItr, znItr); + removeSolverEdge(yzeItr); + } + g.removeEdge(yzeItr); + } else if (addedEdge) { + // If the edge was added, and non-null, finish setting it up, add it to + // the solver & notify heuristic. + edgeDataList.push_back(EdgeData()); + g.setEdgeData(yzeItr, &edgeDataList.back()); + addSolverEdge(yzeItr); + h.handleAddEdge(yzeItr); } - } - for (unsigned c = 0; c < edgeCosts.getCols(); ++c) { - PBQPNum colMin = edgeCosts.getColMin(c); - vCosts[c] += colMin; - if (colMin != std::numeric_limits<PBQPNum>::infinity()) { - edgeCosts.subFromCol(c, colMin); - } - else { - edgeCosts.setCol(c, 0); - } - } + h.handleRemoveEdge(yxeItr, ynItr); + removeSolverEdge(yxeItr); + h.handleRemoveEdge(zxeItr, znItr); + removeSolverEdge(zxeItr); - return edgeCosts.isZero(); - } + pushToStack(xnItr); + } - void setup() { - setupLinks(); - heuristic.initialise(*this); - setupBuckets(); - } + private: - void setupLinks() { - for (GraphEdgeIterator edgeItr = g.edgesBegin(), edgeEnd = g.edgesEnd(); - edgeItr != edgeEnd; ++edgeItr) { - g.getEdgeData(edgeItr).setup(edgeItr); + NodeData& getSolverNodeData(Graph::NodeItr nItr) { + return *static_cast<NodeData*>(g.getNodeData(nItr)); } - } - void setupBuckets() { - for (GraphNodeIterator nodeItr = g.nodesBegin(), nodeEnd = g.nodesEnd(); - nodeItr != nodeEnd; ++nodeItr) { - addToBucket(nodeItr); - } - } - - void computeSolution() { - assert(g.areNodeIDsValid() && - "Nodes cannot be added/removed during reduction."); - - reduce(); - computeTrivialSolutions(); - backpropagate(); - } - - void printNode(const GraphNodeIterator &nodeItr) { - llvm::errs() << "Node " << g.getNodeID(nodeItr) << " (" << &*nodeItr << "):\n" - << " costs = " << g.getNodeCosts(nodeItr) << "\n" - << " link degree = " << g.getNodeData(nodeItr).getLinkDegree() << "\n" - << " links = [ "; - - for (typename HSIT::NodeData::AdjLinkIterator - aeItr = g.getNodeData(nodeItr).adjLinksBegin(), - aeEnd = g.getNodeData(nodeItr).adjLinksEnd(); - aeItr != aeEnd; ++aeItr) { - llvm::errs() << "(" << g.getNodeID(g.getEdgeNode1Itr(*aeItr)) - << ", " << g.getNodeID(g.getEdgeNode2Itr(*aeItr)) - << ") "; + EdgeData& getSolverEdgeData(Graph::EdgeItr eItr) { + return *static_cast<EdgeData*>(g.getEdgeData(eItr)); } - llvm::errs() << "]\n"; - } - void dumpState() { - llvm::errs() << "\n"; + void addSolverEdge(Graph::EdgeItr eItr) { + EdgeData &eData = getSolverEdgeData(eItr); + NodeData &n1Data = getSolverNodeData(g.getEdgeNode1(eItr)), + &n2Data = getSolverNodeData(g.getEdgeNode2(eItr)); - for (GraphNodeIterator nodeItr = g.nodesBegin(), nodeEnd = g.nodesEnd(); - nodeItr != nodeEnd; ++nodeItr) { - printNode(nodeItr); + eData.setN1SolverEdgeItr(n1Data.addSolverEdge(eItr)); + eData.setN2SolverEdgeItr(n2Data.addSolverEdge(eItr)); } - NodeList* buckets[] = { &r0Bucket, &r1Bucket, &r2Bucket }; - - for (unsigned b = 0; b < 3; ++b) { - NodeList &bucket = *buckets[b]; - - llvm::errs() << "Bucket " << b << ": [ "; - - for (NodeListIterator nItr = bucket.begin(), nEnd = bucket.end(); - nItr != nEnd; ++nItr) { - llvm::errs() << g.getNodeID(*nItr) << " "; + void setup() { + if (h.solverRunSimplify()) { + simplify(); } - llvm::errs() << "]\n"; - } + // Create node data objects. + for (Graph::NodeItr nItr = g.nodesBegin(), nEnd = g.nodesEnd(); + nItr != nEnd; ++nItr) { + nodeDataList.push_back(NodeData()); + g.setNodeData(nItr, &nodeDataList.back()); + } - llvm::errs() << "Stack: [ "; - for (NodeStackIterator nsItr = stack.begin(), nsEnd = stack.end(); - nsItr != nsEnd; ++nsItr) { - llvm::errs() << g.getNodeID(*nsItr) << " "; + // Create edge data objects. + for (Graph::EdgeItr eItr = g.edgesBegin(), eEnd = g.edgesEnd(); + eItr != eEnd; ++eItr) { + edgeDataList.push_back(EdgeData()); + g.setEdgeData(eItr, &edgeDataList.back()); + addSolverEdge(eItr); + } } - llvm::errs() << "]\n"; - } - - void reduce() { - bool reductionFinished = r1Bucket.empty() && r2Bucket.empty() && - heuristic.rNBucketEmpty(); - while (!reductionFinished) { - - if (!r1Bucket.empty()) { - processR1(); - } - else if (!r2Bucket.empty()) { - processR2(); - } - else if (!heuristic.rNBucketEmpty()) { - solution.setProvedOptimal(false); - solution.incRNReductions(); - heuristic.processRN(); - } - else reductionFinished = true; + void simplify() { + disconnectTrivialNodes(); + eliminateIndependentEdges(); } - - } - void processR1() { + // Eliminate trivial nodes. + void disconnectTrivialNodes() { + unsigned numDisconnected = 0; - // Remove the first node in the R0 bucket: - GraphNodeIterator xNodeItr = r1Bucket.front(); - r1Bucket.pop_front(); + for (Graph::NodeItr nItr = g.nodesBegin(), nEnd = g.nodesEnd(); + nItr != nEnd; ++nItr) { - solution.incR1Reductions(); + if (g.getNodeCosts(nItr).getLength() == 1) { - //llvm::errs() << "Applying R1 to " << g.getNodeID(xNodeItr) << "\n"; + std::vector<Graph::EdgeItr> edgesToRemove; - assert((g.getNodeData(xNodeItr).getLinkDegree() == 1) && - "Node in R1 bucket has degree != 1"); + for (Graph::AdjEdgeItr aeItr = g.adjEdgesBegin(nItr), + aeEnd = g.adjEdgesEnd(nItr); + aeItr != aeEnd; ++aeItr) { - GraphEdgeIterator edgeItr = *g.getNodeData(xNodeItr).adjLinksBegin(); + Graph::EdgeItr eItr = *aeItr; - const Matrix &edgeCosts = g.getEdgeCosts(edgeItr); + if (g.getEdgeNode1(eItr) == nItr) { + Graph::NodeItr otherNodeItr = g.getEdgeNode2(eItr); + g.getNodeCosts(otherNodeItr) += + g.getEdgeCosts(eItr).getRowAsVector(0); + } + else { + Graph::NodeItr otherNodeItr = g.getEdgeNode1(eItr); + g.getNodeCosts(otherNodeItr) += + g.getEdgeCosts(eItr).getColAsVector(0); + } - const Vector &xCosts = g.getNodeCosts(xNodeItr); - unsigned xLen = xCosts.getLength(); + edgesToRemove.push_back(eItr); + } - // Duplicate a little code to avoid transposing matrices: - if (xNodeItr == g.getEdgeNode1Itr(edgeItr)) { - GraphNodeIterator yNodeItr = g.getEdgeNode2Itr(edgeItr); - Vector &yCosts = g.getNodeCosts(yNodeItr); - unsigned yLen = yCosts.getLength(); + if (!edgesToRemove.empty()) + ++numDisconnected; - for (unsigned j = 0; j < yLen; ++j) { - PBQPNum min = edgeCosts[0][j] + xCosts[0]; - for (unsigned i = 1; i < xLen; ++i) { - PBQPNum c = edgeCosts[i][j] + xCosts[i]; - if (c < min) - min = c; + while (!edgesToRemove.empty()) { + g.removeEdge(edgesToRemove.back()); + edgesToRemove.pop_back(); + } } - yCosts[j] += min; } } - else { - GraphNodeIterator yNodeItr = g.getEdgeNode1Itr(edgeItr); - Vector &yCosts = g.getNodeCosts(yNodeItr); - unsigned yLen = yCosts.getLength(); - for (unsigned i = 0; i < yLen; ++i) { - PBQPNum min = edgeCosts[i][0] + xCosts[0]; + void eliminateIndependentEdges() { + std::vector<Graph::EdgeItr> edgesToProcess; + unsigned numEliminated = 0; - for (unsigned j = 1; j < xLen; ++j) { - PBQPNum c = edgeCosts[i][j] + xCosts[j]; - if (c < min) - min = c; - } - yCosts[i] += min; + for (Graph::EdgeItr eItr = g.edgesBegin(), eEnd = g.edgesEnd(); + eItr != eEnd; ++eItr) { + edgesToProcess.push_back(eItr); } - } - - unlinkNode(xNodeItr); - pushStack(xNodeItr); - } - - void processR2() { - - GraphNodeIterator xNodeItr = r2Bucket.front(); - r2Bucket.pop_front(); - - solution.incR2Reductions(); - - // Unlink is unsafe here. At some point it may optimistically more a node - // to a lower-degree list when its degree will later rise, or vice versa, - // violating the assumption that node degrees monotonically decrease - // during the reduction phase. Instead we'll bucket shuffle manually. - pushStack(xNodeItr); - - assert((g.getNodeData(xNodeItr).getLinkDegree() == 2) && - "Node in R2 bucket has degree != 2"); - - const Vector &xCosts = g.getNodeCosts(xNodeItr); - typename NodeData::AdjLinkIterator tempItr = - g.getNodeData(xNodeItr).adjLinksBegin(); - - GraphEdgeIterator yxEdgeItr = *tempItr, - zxEdgeItr = *(++tempItr); + while (!edgesToProcess.empty()) { + if (tryToEliminateEdge(edgesToProcess.back())) + ++numEliminated; + edgesToProcess.pop_back(); + } + } - GraphNodeIterator yNodeItr = g.getEdgeOtherNode(yxEdgeItr, xNodeItr), - zNodeItr = g.getEdgeOtherNode(zxEdgeItr, xNodeItr); + bool tryToEliminateEdge(Graph::EdgeItr eItr) { + if (tryNormaliseEdgeMatrix(eItr)) { + g.removeEdge(eItr); + return true; + } + return false; + } - removeFromBucket(yNodeItr); - removeFromBucket(zNodeItr); + bool tryNormaliseEdgeMatrix(Graph::EdgeItr &eItr) { - removeLink(yxEdgeItr, yNodeItr); - removeLink(zxEdgeItr, zNodeItr); + const PBQPNum infinity = std::numeric_limits<PBQPNum>::infinity(); - // Graph some of the costs: - bool flipEdge1 = (g.getEdgeNode1Itr(yxEdgeItr) == xNodeItr), - flipEdge2 = (g.getEdgeNode1Itr(zxEdgeItr) == xNodeItr); + Matrix &edgeCosts = g.getEdgeCosts(eItr); + Vector &uCosts = g.getNodeCosts(g.getEdgeNode1(eItr)), + &vCosts = g.getNodeCosts(g.getEdgeNode2(eItr)); - const Matrix *yxCosts = flipEdge1 ? - new Matrix(g.getEdgeCosts(yxEdgeItr).transpose()) : - &g.getEdgeCosts(yxEdgeItr), - *zxCosts = flipEdge2 ? - new Matrix(g.getEdgeCosts(zxEdgeItr).transpose()) : - &g.getEdgeCosts(zxEdgeItr); + for (unsigned r = 0; r < edgeCosts.getRows(); ++r) { + PBQPNum rowMin = infinity; - unsigned xLen = xCosts.getLength(), - yLen = yxCosts->getRows(), - zLen = zxCosts->getRows(); + for (unsigned c = 0; c < edgeCosts.getCols(); ++c) { + if (vCosts[c] != infinity && edgeCosts[r][c] < rowMin) + rowMin = edgeCosts[r][c]; + } - // Compute delta: - Matrix delta(yLen, zLen); + uCosts[r] += rowMin; - for (unsigned i = 0; i < yLen; ++i) { - for (unsigned j = 0; j < zLen; ++j) { - PBQPNum min = (*yxCosts)[i][0] + (*zxCosts)[j][0] + xCosts[0]; - for (unsigned k = 1; k < xLen; ++k) { - PBQPNum c = (*yxCosts)[i][k] + (*zxCosts)[j][k] + xCosts[k]; - if (c < min) { - min = c; - } + if (rowMin != infinity) { + edgeCosts.subFromRow(r, rowMin); + } + else { + edgeCosts.setRow(r, 0); } - delta[i][j] = min; } - } - - if (flipEdge1) - delete yxCosts; - if (flipEdge2) - delete zxCosts; + for (unsigned c = 0; c < edgeCosts.getCols(); ++c) { + PBQPNum colMin = infinity; - // Deal with the potentially induced yz edge. - GraphEdgeIterator yzEdgeItr = g.findEdge(yNodeItr, zNodeItr); - if (yzEdgeItr == g.edgesEnd()) { - yzEdgeItr = g.addEdge(yNodeItr, zNodeItr, delta, EdgeData(g)); - } - else { - // There was an edge, but we're going to screw with it. Delete the old - // link, update the costs. We'll re-link it later. - removeLinkR2(yzEdgeItr); - g.getEdgeCosts(yzEdgeItr) += - (yNodeItr == g.getEdgeNode1Itr(yzEdgeItr)) ? - delta : delta.transpose(); - } + for (unsigned r = 0; r < edgeCosts.getRows(); ++r) { + if (uCosts[r] != infinity && edgeCosts[r][c] < colMin) + colMin = edgeCosts[r][c]; + } - bool nullCostEdge = tryNormaliseEdgeMatrix(yzEdgeItr); + vCosts[c] += colMin; - // Nulled the edge, remove it entirely. - if (nullCostEdge) { - g.removeEdge(yzEdgeItr); - } - else { - // Edge remains - re-link it. - addLink(yzEdgeItr); - } + if (colMin != infinity) { + edgeCosts.subFromCol(c, colMin); + } + else { + edgeCosts.setCol(c, 0); + } + } - addToBucket(yNodeItr); - addToBucket(zNodeItr); + return edgeCosts.isZero(); } - void computeTrivialSolutions() { - - for (NodeListIterator r0Itr = r0Bucket.begin(), r0End = r0Bucket.end(); - r0Itr != r0End; ++r0Itr) { - GraphNodeIterator nodeItr = *r0Itr; - - solution.incR0Reductions(); - setSolution(nodeItr, g.getNodeCosts(nodeItr).minIndex()); + void backpropagate() { + while (!stack.empty()) { + computeSolution(stack.back()); + stack.pop_back(); + } } - } - - void backpropagate() { - while (!stack.empty()) { - computeSolution(stack.back()); - stack.pop_back(); - } - } + void computeSolution(Graph::NodeItr nItr) { - void computeSolution(const GraphNodeIterator &nodeItr) { + NodeData &nodeData = getSolverNodeData(nItr); - NodeData &nodeData = g.getNodeData(nodeItr); + Vector v(g.getNodeCosts(nItr)); - Vector v(g.getNodeCosts(nodeItr)); + // Solve based on existing solved edges. + for (SolverEdgeItr solvedEdgeItr = nodeData.solverEdgesBegin(), + solvedEdgeEnd = nodeData.solverEdgesEnd(); + solvedEdgeItr != solvedEdgeEnd; ++solvedEdgeItr) { - // Solve based on existing links. - for (typename NodeData::AdjLinkIterator - solvedLinkItr = nodeData.solvedLinksBegin(), - solvedLinkEnd = nodeData.solvedLinksEnd(); - solvedLinkItr != solvedLinkEnd; ++solvedLinkItr) { + Graph::EdgeItr eItr(*solvedEdgeItr); + Matrix &edgeCosts = g.getEdgeCosts(eItr); - GraphEdgeIterator solvedEdgeItr(*solvedLinkItr); - Matrix &edgeCosts = g.getEdgeCosts(solvedEdgeItr); + if (nItr == g.getEdgeNode1(eItr)) { + Graph::NodeItr adjNode(g.getEdgeNode2(eItr)); + unsigned adjSolution = s.getSelection(adjNode); + v += edgeCosts.getColAsVector(adjSolution); + } + else { + Graph::NodeItr adjNode(g.getEdgeNode1(eItr)); + unsigned adjSolution = s.getSelection(adjNode); + v += edgeCosts.getRowAsVector(adjSolution); + } - if (nodeItr == g.getEdgeNode1Itr(solvedEdgeItr)) { - GraphNodeIterator adjNode(g.getEdgeNode2Itr(solvedEdgeItr)); - unsigned adjSolution = - solution.getSelection(g.getNodeID(adjNode)); - v += edgeCosts.getColAsVector(adjSolution); - } - else { - GraphNodeIterator adjNode(g.getEdgeNode1Itr(solvedEdgeItr)); - unsigned adjSolution = - solution.getSelection(g.getNodeID(adjNode)); - v += edgeCosts.getRowAsVector(adjSolution); } + setSolution(nItr, v.minIndex()); } - setSolution(nodeItr, v.minIndex()); - } - - void computeSolutionCost(const SimpleGraph &orig) { - PBQPNum cost = 0.0; - - for (SimpleGraph::ConstNodeIterator - nodeItr = orig.nodesBegin(), nodeEnd = orig.nodesEnd(); - nodeItr != nodeEnd; ++nodeItr) { - - unsigned nodeId = orig.getNodeID(nodeItr); - - cost += orig.getNodeCosts(nodeItr)[solution.getSelection(nodeId)]; + void cleanup() { + h.cleanup(); + nodeDataList.clear(); + edgeDataList.clear(); } + }; - for (SimpleGraph::ConstEdgeIterator - edgeItr = orig.edgesBegin(), edgeEnd = orig.edgesEnd(); - edgeItr != edgeEnd; ++edgeItr) { - - SimpleGraph::ConstNodeIterator n1 = orig.getEdgeNode1Itr(edgeItr), - n2 = orig.getEdgeNode2Itr(edgeItr); - unsigned sol1 = solution.getSelection(orig.getNodeID(n1)), - sol2 = solution.getSelection(orig.getNodeID(n2)); - - cost += orig.getEdgeCosts(edgeItr)[sol1][sol2]; + /// \brief PBQP heuristic solver class. + /// + /// Given a PBQP Graph g representing a PBQP problem, you can find a solution + /// by calling + /// <tt>Solution s = HeuristicSolver<H>::solve(g);</tt> + /// + /// The choice of heuristic for the H parameter will affect both the solver + /// speed and solution quality. The heuristic should be chosen based on the + /// nature of the problem being solved. + /// Currently the only solver included with LLVM is the Briggs heuristic for + /// register allocation. + template <typename HImpl> + class HeuristicSolver { + public: + static Solution solve(Graph &g) { + HeuristicSolverImpl<HImpl> hs(g); + return hs.computeSolution(); } - - solution.setSolutionCost(cost); - } - -}; - -template <typename Heuristic> -class HeuristicSolver : public Solver { -public: - Solution solve(const SimpleGraph &g) const { - HeuristicSolverImpl<Heuristic> solverImpl(g); - return solverImpl.getSolution(); - } -}; + }; } diff --git a/lib/CodeGen/PBQP/Heuristics/Briggs.h b/lib/CodeGen/PBQP/Heuristics/Briggs.h index 1228f65..30d34d9 100644 --- a/lib/CodeGen/PBQP/Heuristics/Briggs.h +++ b/lib/CodeGen/PBQP/Heuristics/Briggs.h @@ -18,365 +18,447 @@ #ifndef LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H #define LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H +#include "llvm/Support/Compiler.h" #include "../HeuristicSolver.h" +#include "../HeuristicBase.h" #include <set> +#include <limits> namespace PBQP { -namespace Heuristics { - -class Briggs { - public: - - class NodeData; - class EdgeData; - - private: - - typedef HeuristicSolverImpl<Briggs> Solver; - typedef HSITypes<NodeData, EdgeData> HSIT; - typedef HSIT::SolverGraph SolverGraph; - typedef HSIT::GraphNodeIterator GraphNodeIterator; - typedef HSIT::GraphEdgeIterator GraphEdgeIterator; - - class LinkDegreeComparator { + namespace Heuristics { + + /// \brief PBQP Heuristic which applies an allocability test based on + /// Briggs. + /// + /// This heuristic assumes that the elements of cost vectors in the PBQP + /// problem represent storage options, with the first being the spill + /// option and subsequent elements representing legal registers for the + /// corresponding node. Edge cost matrices are likewise assumed to represent + /// register constraints. + /// If one or more nodes can be proven allocable by this heuristic (by + /// inspection of their constraint matrices) then the allocable node of + /// highest degree is selected for the next reduction and pushed to the + /// solver stack. If no nodes can be proven allocable then the node with + /// the lowest estimated spill cost is selected and push to the solver stack + /// instead. + /// + /// This implementation is built on top of HeuristicBase. + class Briggs : public HeuristicBase<Briggs> { + private: + + class LinkDegreeComparator { public: - LinkDegreeComparator() : g(0) {} - LinkDegreeComparator(SolverGraph *g) : g(g) {} - - bool operator()(const GraphNodeIterator &node1Itr, - const GraphNodeIterator &node2Itr) const { - assert((g != 0) && "Graph object not set, cannot access node data."); - unsigned n1Degree = g->getNodeData(node1Itr).getLinkDegree(), - n2Degree = g->getNodeData(node2Itr).getLinkDegree(); - if (n1Degree > n2Degree) { + LinkDegreeComparator(HeuristicSolverImpl<Briggs> &s) : s(&s) {} + bool operator()(Graph::NodeItr n1Itr, Graph::NodeItr n2Itr) const { + if (s->getSolverDegree(n1Itr) > s->getSolverDegree(n2Itr)) return true; - } - else if (n1Degree < n2Degree) { + if (s->getSolverDegree(n1Itr) < s->getSolverDegree(n2Itr)) return false; - } - // else they're "equal" by degree, differentiate based on ID. - return g->getNodeID(node1Itr) < g->getNodeID(node2Itr); + return (&*n1Itr < &*n2Itr); } - private: - SolverGraph *g; - }; + HeuristicSolverImpl<Briggs> *s; + }; - class SpillPriorityComparator { + class SpillCostComparator { public: - SpillPriorityComparator() : g(0) {} - SpillPriorityComparator(SolverGraph *g) : g(g) {} - - bool operator()(const GraphNodeIterator &node1Itr, - const GraphNodeIterator &node2Itr) const { - assert((g != 0) && "Graph object not set, cannot access node data."); - PBQPNum cost1 = - g->getNodeCosts(node1Itr)[0] / - g->getNodeData(node1Itr).getLinkDegree(), - cost2 = - g->getNodeCosts(node2Itr)[0] / - g->getNodeData(node2Itr).getLinkDegree(); - - if (cost1 < cost2) { + SpillCostComparator(HeuristicSolverImpl<Briggs> &s) + : s(&s), g(&s.getGraph()) {} + bool operator()(Graph::NodeItr n1Itr, Graph::NodeItr n2Itr) const { + PBQPNum cost1 = g->getNodeCosts(n1Itr)[0] / s->getSolverDegree(n1Itr), + cost2 = g->getNodeCosts(n2Itr)[0] / s->getSolverDegree(n2Itr); + if (cost1 < cost2) return true; - } - else if (cost1 > cost2) { + if (cost1 > cost2) return false; - } - // else they'er "equal" again, differentiate based on address again. - return g->getNodeID(node1Itr) < g->getNodeID(node2Itr); + return (&*n1Itr < &*n2Itr); } private: - SolverGraph *g; - }; - - typedef std::set<GraphNodeIterator, LinkDegreeComparator> - RNAllocableNodeList; - typedef RNAllocableNodeList::iterator RNAllocableNodeListIterator; - - typedef std::set<GraphNodeIterator, SpillPriorityComparator> - RNUnallocableNodeList; - typedef RNUnallocableNodeList::iterator RNUnallocableNodeListIterator; - - public: - - class NodeData { - private: - RNAllocableNodeListIterator rNAllocableNodeListItr; - RNUnallocableNodeListIterator rNUnallocableNodeListItr; - unsigned numRegOptions, numDenied, numSafe; - std::vector<unsigned> unsafeDegrees; - bool allocable; - - void addRemoveLink(SolverGraph &g, const GraphNodeIterator &nodeItr, - const GraphEdgeIterator &edgeItr, bool add) { + HeuristicSolverImpl<Briggs> *s; + Graph *g; + }; - //assume we're adding... - unsigned udTarget = 0, dir = 1; + typedef std::list<Graph::NodeItr> RNAllocableList; + typedef RNAllocableList::iterator RNAllocableListItr; - if (!add) { - udTarget = 1; - dir = ~0; - } + typedef std::list<Graph::NodeItr> RNUnallocableList; + typedef RNUnallocableList::iterator RNUnallocableListItr; - EdgeData &linkEdgeData = g.getEdgeData(edgeItr).getHeuristicData(); + public: - EdgeData::ConstUnsafeIterator edgeUnsafeBegin, edgeUnsafeEnd; + struct NodeData { + typedef std::vector<unsigned> UnsafeDegreesArray; + bool isHeuristic, isAllocable, isInitialized; + unsigned numDenied, numSafe; + UnsafeDegreesArray unsafeDegrees; + RNAllocableListItr rnaItr; + RNUnallocableListItr rnuItr; - if (nodeItr == g.getEdgeNode1Itr(edgeItr)) { - numDenied += (dir * linkEdgeData.getWorstDegree()); - edgeUnsafeBegin = linkEdgeData.unsafeBegin(); - edgeUnsafeEnd = linkEdgeData.unsafeEnd(); - } - else { - numDenied += (dir * linkEdgeData.getReverseWorstDegree()); - edgeUnsafeBegin = linkEdgeData.reverseUnsafeBegin(); - edgeUnsafeEnd = linkEdgeData.reverseUnsafeEnd(); - } + NodeData() + : isHeuristic(false), isAllocable(false), isInitialized(false), + numDenied(0), numSafe(0) { } + }; - assert((unsafeDegrees.size() == - static_cast<unsigned>( - std::distance(edgeUnsafeBegin, edgeUnsafeEnd))) - && "Unsafe array size mismatch."); - - std::vector<unsigned>::iterator unsafeDegreesItr = - unsafeDegrees.begin(); - - for (EdgeData::ConstUnsafeIterator edgeUnsafeItr = edgeUnsafeBegin; - edgeUnsafeItr != edgeUnsafeEnd; - ++edgeUnsafeItr, ++unsafeDegreesItr) { - - if ((*edgeUnsafeItr == 1) && (*unsafeDegreesItr == udTarget)) { - numSafe -= dir; - } - *unsafeDegreesItr += (dir * (*edgeUnsafeItr)); - } - - allocable = (numDenied < numRegOptions) || (numSafe > 0); - } - - public: - - void setup(SolverGraph &g, const GraphNodeIterator &nodeItr) { - - numRegOptions = g.getNodeCosts(nodeItr).getLength() - 1; - - numSafe = numRegOptions; // Optimistic, correct below. - numDenied = 0; // Also optimistic. - unsafeDegrees.resize(numRegOptions, 0); - - HSIT::NodeData &nodeData = g.getNodeData(nodeItr); - - for (HSIT::NodeData::AdjLinkIterator - adjLinkItr = nodeData.adjLinksBegin(), - adjLinkEnd = nodeData.adjLinksEnd(); - adjLinkItr != adjLinkEnd; ++adjLinkItr) { - - addRemoveLink(g, nodeItr, *adjLinkItr, true); - } - } - - bool isAllocable() const { return allocable; } - - void handleAddLink(SolverGraph &g, const GraphNodeIterator &nodeItr, - const GraphEdgeIterator &adjEdge) { - addRemoveLink(g, nodeItr, adjEdge, true); + struct EdgeData { + typedef std::vector<unsigned> UnsafeArray; + unsigned worst, reverseWorst; + UnsafeArray unsafe, reverseUnsafe; + bool isUpToDate; + + EdgeData() : worst(0), reverseWorst(0), isUpToDate(false) {} + }; + + /// \brief Construct an instance of the Briggs heuristic. + /// @param solver A reference to the solver which is using this heuristic. + Briggs(HeuristicSolverImpl<Briggs> &solver) : + HeuristicBase<Briggs>(solver) {} + + /// \brief Determine whether a node should be reduced using optimal + /// reduction. + /// @param nItr Node iterator to be considered. + /// @return True if the given node should be optimally reduced, false + /// otherwise. + /// + /// Selects nodes of degree 0, 1 or 2 for optimal reduction, with one + /// exception. Nodes whose spill cost (element 0 of their cost vector) is + /// infinite are checked for allocability first. Allocable nodes may be + /// optimally reduced, but nodes whose allocability cannot be proven are + /// selected for heuristic reduction instead. + bool shouldOptimallyReduce(Graph::NodeItr nItr) { + if (getSolver().getSolverDegree(nItr) < 3) { + return true; } - - void handleRemoveLink(SolverGraph &g, const GraphNodeIterator &nodeItr, - const GraphEdgeIterator &adjEdge) { - addRemoveLink(g, nodeItr, adjEdge, false); + // else + return false; + } + + /// \brief Add a node to the heuristic reduce list. + /// @param nItr Node iterator to add to the heuristic reduce list. + void addToHeuristicReduceList(Graph::NodeItr nItr) { + NodeData &nd = getHeuristicNodeData(nItr); + initializeNode(nItr); + nd.isHeuristic = true; + if (nd.isAllocable) { + nd.rnaItr = rnAllocableList.insert(rnAllocableList.end(), nItr); + } else { + nd.rnuItr = rnUnallocableList.insert(rnUnallocableList.end(), nItr); } - - void setRNAllocableNodeListItr( - const RNAllocableNodeListIterator &rNAllocableNodeListItr) { - - this->rNAllocableNodeListItr = rNAllocableNodeListItr; + } + + /// \brief Heuristically reduce one of the nodes in the heuristic + /// reduce list. + /// @return True if a reduction takes place, false if the heuristic reduce + /// list is empty. + /// + /// If the list of allocable nodes is non-empty a node is selected + /// from it and pushed to the stack. Otherwise if the non-allocable list + /// is non-empty a node is selected from it and pushed to the stack. + /// If both lists are empty the method simply returns false with no action + /// taken. + bool heuristicReduce() { + if (!rnAllocableList.empty()) { + RNAllocableListItr rnaItr = + min_element(rnAllocableList.begin(), rnAllocableList.end(), + LinkDegreeComparator(getSolver())); + Graph::NodeItr nItr = *rnaItr; + rnAllocableList.erase(rnaItr); + handleRemoveNode(nItr); + getSolver().pushToStack(nItr); + return true; + } else if (!rnUnallocableList.empty()) { + RNUnallocableListItr rnuItr = + min_element(rnUnallocableList.begin(), rnUnallocableList.end(), + SpillCostComparator(getSolver())); + Graph::NodeItr nItr = *rnuItr; + rnUnallocableList.erase(rnuItr); + handleRemoveNode(nItr); + getSolver().pushToStack(nItr); + return true; } - - RNAllocableNodeListIterator getRNAllocableNodeListItr() const { - return rNAllocableNodeListItr; + // else + return false; + } + + /// \brief Prepare a change in the costs on the given edge. + /// @param eItr Edge iterator. + void preUpdateEdgeCosts(Graph::EdgeItr eItr) { + Graph &g = getGraph(); + Graph::NodeItr n1Itr = g.getEdgeNode1(eItr), + n2Itr = g.getEdgeNode2(eItr); + NodeData &n1 = getHeuristicNodeData(n1Itr), + &n2 = getHeuristicNodeData(n2Itr); + + if (n1.isHeuristic) + subtractEdgeContributions(eItr, getGraph().getEdgeNode1(eItr)); + if (n2.isHeuristic) + subtractEdgeContributions(eItr, getGraph().getEdgeNode2(eItr)); + + EdgeData &ed = getHeuristicEdgeData(eItr); + ed.isUpToDate = false; + } + + /// \brief Handle the change in the costs on the given edge. + /// @param eItr Edge iterator. + void postUpdateEdgeCosts(Graph::EdgeItr eItr) { + // This is effectively the same as adding a new edge now, since + // we've factored out the costs of the old one. + handleAddEdge(eItr); + } + + /// \brief Handle the addition of a new edge into the PBQP graph. + /// @param eItr Edge iterator for the added edge. + /// + /// Updates allocability of any nodes connected by this edge which are + /// being managed by the heuristic. If allocability changes they are + /// moved to the appropriate list. + void handleAddEdge(Graph::EdgeItr eItr) { + Graph &g = getGraph(); + Graph::NodeItr n1Itr = g.getEdgeNode1(eItr), + n2Itr = g.getEdgeNode2(eItr); + NodeData &n1 = getHeuristicNodeData(n1Itr), + &n2 = getHeuristicNodeData(n2Itr); + + // If neither node is managed by the heuristic there's nothing to be + // done. + if (!n1.isHeuristic && !n2.isHeuristic) + return; + + // Ok - we need to update at least one node. + computeEdgeContributions(eItr); + + // Update node 1 if it's managed by the heuristic. + if (n1.isHeuristic) { + bool n1WasAllocable = n1.isAllocable; + addEdgeContributions(eItr, n1Itr); + updateAllocability(n1Itr); + if (n1WasAllocable && !n1.isAllocable) { + rnAllocableList.erase(n1.rnaItr); + n1.rnuItr = + rnUnallocableList.insert(rnUnallocableList.end(), n1Itr); + } } - void setRNUnallocableNodeListItr( - const RNUnallocableNodeListIterator &rNUnallocableNodeListItr) { - - this->rNUnallocableNodeListItr = rNUnallocableNodeListItr; + // Likewise for node 2. + if (n2.isHeuristic) { + bool n2WasAllocable = n2.isAllocable; + addEdgeContributions(eItr, n2Itr); + updateAllocability(n2Itr); + if (n2WasAllocable && !n2.isAllocable) { + rnAllocableList.erase(n2.rnaItr); + n2.rnuItr = + rnUnallocableList.insert(rnUnallocableList.end(), n2Itr); + } } - - RNUnallocableNodeListIterator getRNUnallocableNodeListItr() const { - return rNUnallocableNodeListItr; + } + + /// \brief Handle disconnection of an edge from a node. + /// @param eItr Edge iterator for edge being disconnected. + /// @param nItr Node iterator for the node being disconnected from. + /// + /// Updates allocability of the given node and, if appropriate, moves the + /// node to a new list. + void handleRemoveEdge(Graph::EdgeItr eItr, Graph::NodeItr nItr) { + NodeData &nd = getHeuristicNodeData(nItr); + + // If the node is not managed by the heuristic there's nothing to be + // done. + if (!nd.isHeuristic) + return; + + EdgeData &ed ATTRIBUTE_UNUSED = getHeuristicEdgeData(eItr); + + assert(ed.isUpToDate && "Edge data is not up to date."); + + // Update node. + bool ndWasAllocable = nd.isAllocable; + subtractEdgeContributions(eItr, nItr); + updateAllocability(nItr); + + // If the node has gone optimal... + if (shouldOptimallyReduce(nItr)) { + nd.isHeuristic = false; + addToOptimalReduceList(nItr); + if (ndWasAllocable) { + rnAllocableList.erase(nd.rnaItr); + } else { + rnUnallocableList.erase(nd.rnuItr); + } + } else { + // Node didn't go optimal, but we might have to move it + // from "unallocable" to "allocable". + if (!ndWasAllocable && nd.isAllocable) { + rnUnallocableList.erase(nd.rnuItr); + nd.rnaItr = rnAllocableList.insert(rnAllocableList.end(), nItr); + } } + } + private: - }; + NodeData& getHeuristicNodeData(Graph::NodeItr nItr) { + return getSolver().getHeuristicNodeData(nItr); + } - class EdgeData { - private: + EdgeData& getHeuristicEdgeData(Graph::EdgeItr eItr) { + return getSolver().getHeuristicEdgeData(eItr); + } - typedef std::vector<unsigned> UnsafeArray; + // Work out what this edge will contribute to the allocability of the + // nodes connected to it. + void computeEdgeContributions(Graph::EdgeItr eItr) { + EdgeData &ed = getHeuristicEdgeData(eItr); - unsigned worstDegree, - reverseWorstDegree; - UnsafeArray unsafe, reverseUnsafe; + if (ed.isUpToDate) + return; // Edge data is already up to date. - public: + Matrix &eCosts = getGraph().getEdgeCosts(eItr); - EdgeData() : worstDegree(0), reverseWorstDegree(0) {} + unsigned numRegs = eCosts.getRows() - 1, + numReverseRegs = eCosts.getCols() - 1; - typedef UnsafeArray::const_iterator ConstUnsafeIterator; + std::vector<unsigned> rowInfCounts(numRegs, 0), + colInfCounts(numReverseRegs, 0); - void setup(SolverGraph &g, const GraphEdgeIterator &edgeItr) { - const Matrix &edgeCosts = g.getEdgeCosts(edgeItr); - unsigned numRegs = edgeCosts.getRows() - 1, - numReverseRegs = edgeCosts.getCols() - 1; + ed.worst = 0; + ed.reverseWorst = 0; + ed.unsafe.clear(); + ed.unsafe.resize(numRegs, 0); + ed.reverseUnsafe.clear(); + ed.reverseUnsafe.resize(numReverseRegs, 0); - unsafe.resize(numRegs, 0); - reverseUnsafe.resize(numReverseRegs, 0); + for (unsigned i = 0; i < numRegs; ++i) { + for (unsigned j = 0; j < numReverseRegs; ++j) { + if (eCosts[i + 1][j + 1] == + std::numeric_limits<PBQPNum>::infinity()) { + ed.unsafe[i] = 1; + ed.reverseUnsafe[j] = 1; + ++rowInfCounts[i]; + ++colInfCounts[j]; - std::vector<unsigned> rowInfCounts(numRegs, 0), - colInfCounts(numReverseRegs, 0); + if (colInfCounts[j] > ed.worst) { + ed.worst = colInfCounts[j]; + } - for (unsigned i = 0; i < numRegs; ++i) { - for (unsigned j = 0; j < numReverseRegs; ++j) { - if (edgeCosts[i + 1][j + 1] == - std::numeric_limits<PBQPNum>::infinity()) { - unsafe[i] = 1; - reverseUnsafe[j] = 1; - ++rowInfCounts[i]; - ++colInfCounts[j]; - - if (colInfCounts[j] > worstDegree) { - worstDegree = colInfCounts[j]; - } - - if (rowInfCounts[i] > reverseWorstDegree) { - reverseWorstDegree = rowInfCounts[i]; - } + if (rowInfCounts[i] > ed.reverseWorst) { + ed.reverseWorst = rowInfCounts[i]; } } } } - unsigned getWorstDegree() const { return worstDegree; } - unsigned getReverseWorstDegree() const { return reverseWorstDegree; } - ConstUnsafeIterator unsafeBegin() const { return unsafe.begin(); } - ConstUnsafeIterator unsafeEnd() const { return unsafe.end(); } - ConstUnsafeIterator reverseUnsafeBegin() const { - return reverseUnsafe.begin(); + ed.isUpToDate = true; + } + + // Add the contributions of the given edge to the given node's + // numDenied and safe members. No action is taken other than to update + // these member values. Once updated these numbers can be used by clients + // to update the node's allocability. + void addEdgeContributions(Graph::EdgeItr eItr, Graph::NodeItr nItr) { + EdgeData &ed = getHeuristicEdgeData(eItr); + + assert(ed.isUpToDate && "Using out-of-date edge numbers."); + + NodeData &nd = getHeuristicNodeData(nItr); + unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1; + + bool nIsNode1 = nItr == getGraph().getEdgeNode1(eItr); + EdgeData::UnsafeArray &unsafe = + nIsNode1 ? ed.unsafe : ed.reverseUnsafe; + nd.numDenied += nIsNode1 ? ed.worst : ed.reverseWorst; + + for (unsigned r = 0; r < numRegs; ++r) { + if (unsafe[r]) { + if (nd.unsafeDegrees[r]==0) { + --nd.numSafe; + } + ++nd.unsafeDegrees[r]; + } } - ConstUnsafeIterator reverseUnsafeEnd() const { - return reverseUnsafe.end(); + } + + // Subtract the contributions of the given edge to the given node's + // numDenied and safe members. No action is taken other than to update + // these member values. Once updated these numbers can be used by clients + // to update the node's allocability. + void subtractEdgeContributions(Graph::EdgeItr eItr, Graph::NodeItr nItr) { + EdgeData &ed = getHeuristicEdgeData(eItr); + + assert(ed.isUpToDate && "Using out-of-date edge numbers."); + + NodeData &nd = getHeuristicNodeData(nItr); + unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1; + + bool nIsNode1 = nItr == getGraph().getEdgeNode1(eItr); + EdgeData::UnsafeArray &unsafe = + nIsNode1 ? ed.unsafe : ed.reverseUnsafe; + nd.numDenied -= nIsNode1 ? ed.worst : ed.reverseWorst; + + for (unsigned r = 0; r < numRegs; ++r) { + if (unsafe[r]) { + if (nd.unsafeDegrees[r] == 1) { + ++nd.numSafe; + } + --nd.unsafeDegrees[r]; + } } - }; - - void initialise(Solver &solver) { - this->s = &solver; - g = &s->getGraph(); - rNAllocableBucket = RNAllocableNodeList(LinkDegreeComparator(g)); - rNUnallocableBucket = - RNUnallocableNodeList(SpillPriorityComparator(g)); - - for (GraphEdgeIterator - edgeItr = g->edgesBegin(), edgeEnd = g->edgesEnd(); - edgeItr != edgeEnd; ++edgeItr) { - - g->getEdgeData(edgeItr).getHeuristicData().setup(*g, edgeItr); - } - - for (GraphNodeIterator - nodeItr = g->nodesBegin(), nodeEnd = g->nodesEnd(); - nodeItr != nodeEnd; ++nodeItr) { - - g->getNodeData(nodeItr).getHeuristicData().setup(*g, nodeItr); - } - } - - void addToRNBucket(const GraphNodeIterator &nodeItr) { - NodeData &nodeData = g->getNodeData(nodeItr).getHeuristicData(); - - if (nodeData.isAllocable()) { - nodeData.setRNAllocableNodeListItr( - rNAllocableBucket.insert(rNAllocableBucket.begin(), nodeItr)); - } - else { - nodeData.setRNUnallocableNodeListItr( - rNUnallocableBucket.insert(rNUnallocableBucket.begin(), nodeItr)); - } - } + } - void removeFromRNBucket(const GraphNodeIterator &nodeItr) { - NodeData &nodeData = g->getNodeData(nodeItr).getHeuristicData(); - - if (nodeData.isAllocable()) { - rNAllocableBucket.erase(nodeData.getRNAllocableNodeListItr()); - } - else { - rNUnallocableBucket.erase(nodeData.getRNUnallocableNodeListItr()); - } - } + void updateAllocability(Graph::NodeItr nItr) { + NodeData &nd = getHeuristicNodeData(nItr); + unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1; + nd.isAllocable = nd.numDenied < numRegs || nd.numSafe > 0; + } - void handleAddLink(const GraphEdgeIterator &edgeItr) { - // We assume that if we got here this edge is attached to at least - // one high degree node. - g->getEdgeData(edgeItr).getHeuristicData().setup(*g, edgeItr); - - GraphNodeIterator n1Itr = g->getEdgeNode1Itr(edgeItr), - n2Itr = g->getEdgeNode2Itr(edgeItr); - - HSIT::NodeData &n1Data = g->getNodeData(n1Itr), - &n2Data = g->getNodeData(n2Itr); - - if (n1Data.getLinkDegree() > 2) { - n1Data.getHeuristicData().handleAddLink(*g, n1Itr, edgeItr); - } - if (n2Data.getLinkDegree() > 2) { - n2Data.getHeuristicData().handleAddLink(*g, n2Itr, edgeItr); - } - } + void initializeNode(Graph::NodeItr nItr) { + NodeData &nd = getHeuristicNodeData(nItr); - void handleRemoveLink(const GraphEdgeIterator &edgeItr, - const GraphNodeIterator &nodeItr) { - NodeData &nodeData = g->getNodeData(nodeItr).getHeuristicData(); - nodeData.handleRemoveLink(*g, nodeItr, edgeItr); - } + if (nd.isInitialized) + return; // Node data is already up to date. - void processRN() { - - if (!rNAllocableBucket.empty()) { - GraphNodeIterator selectedNodeItr = *rNAllocableBucket.begin(); - //std::cerr << "RN safely pushing " << g->getNodeID(selectedNodeItr) << "\n"; - rNAllocableBucket.erase(rNAllocableBucket.begin()); - s->pushStack(selectedNodeItr); - s->unlinkNode(selectedNodeItr); - } - else { - GraphNodeIterator selectedNodeItr = *rNUnallocableBucket.begin(); - //std::cerr << "RN optimistically pushing " << g->getNodeID(selectedNodeItr) << "\n"; - rNUnallocableBucket.erase(rNUnallocableBucket.begin()); - s->pushStack(selectedNodeItr); - s->unlinkNode(selectedNodeItr); - } - - } + unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1; - bool rNBucketEmpty() const { - return (rNAllocableBucket.empty() && rNUnallocableBucket.empty()); - } + nd.numDenied = 0; + nd.numSafe = numRegs; + nd.unsafeDegrees.resize(numRegs, 0); -private: + typedef HeuristicSolverImpl<Briggs>::SolverEdgeItr SolverEdgeItr; - Solver *s; - SolverGraph *g; - RNAllocableNodeList rNAllocableBucket; - RNUnallocableNodeList rNUnallocableBucket; -}; + for (SolverEdgeItr aeItr = getSolver().solverEdgesBegin(nItr), + aeEnd = getSolver().solverEdgesEnd(nItr); + aeItr != aeEnd; ++aeItr) { + + Graph::EdgeItr eItr = *aeItr; + computeEdgeContributions(eItr); + addEdgeContributions(eItr, nItr); + } + updateAllocability(nItr); + nd.isInitialized = true; + } + + void handleRemoveNode(Graph::NodeItr xnItr) { + typedef HeuristicSolverImpl<Briggs>::SolverEdgeItr SolverEdgeItr; + std::vector<Graph::EdgeItr> edgesToRemove; + for (SolverEdgeItr aeItr = getSolver().solverEdgesBegin(xnItr), + aeEnd = getSolver().solverEdgesEnd(xnItr); + aeItr != aeEnd; ++aeItr) { + Graph::NodeItr ynItr = getGraph().getEdgeOtherNode(*aeItr, xnItr); + handleRemoveEdge(*aeItr, ynItr); + edgesToRemove.push_back(*aeItr); + } + while (!edgesToRemove.empty()) { + getSolver().removeSolverEdge(edgesToRemove.back()); + edgesToRemove.pop_back(); + } + } + RNAllocableList rnAllocableList; + RNUnallocableList rnUnallocableList; + }; -} + } } diff --git a/lib/CodeGen/PBQP/PBQPMath.h b/lib/CodeGen/PBQP/Math.h index 20737a2..e7598bf 100644 --- a/lib/CodeGen/PBQP/PBQPMath.h +++ b/lib/CodeGen/PBQP/Math.h @@ -1,4 +1,4 @@ -//===-- PBQPMath.h - PBQP Vector and Matrix classes -------------*- C++ -*-===// +//===------ Math.h - PBQP Vector and Matrix classes -------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_PBQP_PBQPMATH_H -#define LLVM_CODEGEN_PBQP_PBQPMATH_H +#ifndef LLVM_CODEGEN_PBQP_MATH_H +#define LLVM_CODEGEN_PBQP_MATH_H #include <cassert> #include <algorithm> @@ -16,7 +16,7 @@ namespace PBQP { -typedef double PBQPNum; +typedef float PBQPNum; /// \brief PBQP Vector class. class Vector { @@ -285,4 +285,4 @@ OStream& operator<<(OStream &os, const Matrix &m) { } -#endif // LLVM_CODEGEN_PBQP_PBQPMATH_HPP +#endif // LLVM_CODEGEN_PBQP_MATH_H diff --git a/lib/CodeGen/PBQP/SimpleGraph.h b/lib/CodeGen/PBQP/SimpleGraph.h deleted file mode 100644 index 13e63ce..0000000 --- a/lib/CodeGen/PBQP/SimpleGraph.h +++ /dev/null @@ -1,100 +0,0 @@ -//===-- SimpleGraph.h - Simple PBQP Graph -----------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Simple PBQP graph class representing a PBQP problem. Graphs of this type -// can be passed to a PBQPSolver instance to solve the PBQP problem. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_PBQP_SIMPLEGRAPH_H -#define LLVM_CODEGEN_PBQP_SIMPLEGRAPH_H - -#include "GraphBase.h" - -namespace PBQP { - -class SimpleEdge; - -class SimpleNode : public NodeBase<SimpleNode, SimpleEdge> { -public: - SimpleNode(const Vector &costs) : - NodeBase<SimpleNode, SimpleEdge>(costs) {} -}; - -class SimpleEdge : public EdgeBase<SimpleNode, SimpleEdge> { -public: - SimpleEdge(const NodeIterator &node1Itr, const NodeIterator &node2Itr, - const Matrix &costs) : - EdgeBase<SimpleNode, SimpleEdge>(node1Itr, node2Itr, costs) {} -}; - -class SimpleGraph : public GraphBase<SimpleNode, SimpleEdge> { -private: - - typedef GraphBase<SimpleNode, SimpleEdge> PGraph; - - void copyFrom(const SimpleGraph &other) { - assert(other.areNodeIDsValid() && - "Cannot copy from another graph unless IDs have been assigned."); - - std::vector<NodeIterator> newNodeItrs(other.getNumNodes()); - - for (ConstNodeIterator nItr = other.nodesBegin(), nEnd = other.nodesEnd(); - nItr != nEnd; ++nItr) { - newNodeItrs[other.getNodeID(nItr)] = addNode(other.getNodeCosts(nItr)); - } - - for (ConstEdgeIterator eItr = other.edgesBegin(), eEnd = other.edgesEnd(); - eItr != eEnd; ++eItr) { - - unsigned node1ID = other.getNodeID(other.getEdgeNode1Itr(eItr)), - node2ID = other.getNodeID(other.getEdgeNode2Itr(eItr)); - - addEdge(newNodeItrs[node1ID], newNodeItrs[node2ID], - other.getEdgeCosts(eItr)); - } - } - - void copyFrom(SimpleGraph &other) { - if (!other.areNodeIDsValid()) { - other.assignNodeIDs(); - } - copyFrom(const_cast<const SimpleGraph&>(other)); - } - -public: - - SimpleGraph() {} - - - SimpleGraph(const SimpleGraph &other) : PGraph() { - copyFrom(other); - } - - SimpleGraph& operator=(const SimpleGraph &other) { - clear(); - copyFrom(other); - return *this; - } - - NodeIterator addNode(const Vector &costs) { - return PGraph::addConstructedNode(SimpleNode(costs)); - } - - EdgeIterator addEdge(const NodeIterator &node1Itr, - const NodeIterator &node2Itr, - const Matrix &costs) { - return PGraph::addConstructedEdge(SimpleEdge(node1Itr, node2Itr, costs)); - } - -}; - -} - -#endif // LLVM_CODEGEN_PBQP_SIMPLEGRAPH_H diff --git a/lib/CodeGen/PBQP/Solution.h b/lib/CodeGen/PBQP/Solution.h index aee684d..294b537 100644 --- a/lib/CodeGen/PBQP/Solution.h +++ b/lib/CodeGen/PBQP/Solution.h @@ -7,81 +7,51 @@ // //===----------------------------------------------------------------------===// // -// Annotated PBQP Graph class. This class is used internally by the PBQP solver -// to cache information to speed up reduction. +// PBQP Solution class. // //===----------------------------------------------------------------------===// #ifndef LLVM_CODEGEN_PBQP_SOLUTION_H #define LLVM_CODEGEN_PBQP_SOLUTION_H -#include "PBQPMath.h" +#include "Math.h" +#include "Graph.h" -namespace PBQP { - -class Solution { - - friend class SolverImplementation; - -private: - - std::vector<unsigned> selections; - PBQPNum solutionCost; - bool provedOptimal; - unsigned r0Reductions, r1Reductions, - r2Reductions, rNReductions; - -public: - - Solution() : - solutionCost(0.0), provedOptimal(false), - r0Reductions(0), r1Reductions(0), r2Reductions(0), rNReductions(0) {} - - Solution(unsigned length, bool assumeOptimal) : - selections(length), solutionCost(0.0), provedOptimal(assumeOptimal), - r0Reductions(0), r1Reductions(0), r2Reductions(0), rNReductions(0) {} - - void setProvedOptimal(bool provedOptimal) { - this->provedOptimal = provedOptimal; - } - - void setSelection(unsigned nodeID, unsigned selection) { - selections[nodeID] = selection; - } +#include <map> - void setSolutionCost(PBQPNum solutionCost) { - this->solutionCost = solutionCost; - } - - void incR0Reductions() { ++r0Reductions; } - void incR1Reductions() { ++r1Reductions; } - void incR2Reductions() { ++r2Reductions; } - void incRNReductions() { ++rNReductions; } - - unsigned numNodes() const { return selections.size(); } - - unsigned getSelection(unsigned nodeID) const { - return selections[nodeID]; - } - - PBQPNum getCost() const { return solutionCost; } - - bool isProvedOptimal() const { return provedOptimal; } - - unsigned getR0Reductions() const { return r0Reductions; } - unsigned getR1Reductions() const { return r1Reductions; } - unsigned getR2Reductions() const { return r2Reductions; } - unsigned getRNReductions() const { return rNReductions; } - - bool operator==(const Solution &other) const { - return (selections == other.selections); - } - - bool operator!=(const Solution &other) const { - return !(*this == other); - } +namespace PBQP { -}; + /// \brief Represents a solution to a PBQP problem. + /// + /// To get the selection for each node in the problem use the getSelection method. + class Solution { + private: + typedef std::map<Graph::NodeItr, unsigned, NodeItrComparator> SelectionsMap; + SelectionsMap selections; + + public: + + /// \brief Number of nodes for which selections have been made. + /// @return Number of nodes for which selections have been made. + unsigned numNodes() const { return selections.size(); } + + /// \brief Set the selection for a given node. + /// @param nItr Node iterator. + /// @param selection Selection for nItr. + void setSelection(Graph::NodeItr nItr, unsigned selection) { + selections[nItr] = selection; + } + + /// \brief Get a node's selection. + /// @param nItr Node iterator. + /// @return The selection for nItr; + unsigned getSelection(Graph::NodeItr nItr) const { + SelectionsMap::const_iterator sItr = selections.find(nItr); + assert(sItr != selections.end() && "No selection for node."); + return sItr->second; + } + + }; } diff --git a/lib/CodeGen/PBQP/Solver.h b/lib/CodeGen/PBQP/Solver.h deleted file mode 100644 index a445de8..0000000 --- a/lib/CodeGen/PBQP/Solver.h +++ /dev/null @@ -1,31 +0,0 @@ -//===-- Solver.h ------- PBQP solver interface ------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - - -#ifndef LLVM_CODEGEN_PBQP_SOLVER_H -#define LLVM_CODEGEN_PBQP_SOLVER_H - -#include "SimpleGraph.h" -#include "Solution.h" - -namespace PBQP { - -/// \brief Interface for solver classes. -class Solver { -public: - - virtual ~Solver() = 0; - virtual Solution solve(const SimpleGraph &orig) const = 0; -}; - -Solver::~Solver() {} - -} - -#endif // LLVM_CODEGEN_PBQP_SOLVER_H diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index 365df30..b740c68 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Function.h" #include "llvm/Target/TargetMachine.h" #include "llvm/ADT/SmallPtrSet.h" @@ -95,14 +96,14 @@ bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &Fn) { /// bool llvm::PHIElimination::EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB) { - if (MBB.empty() || MBB.front().getOpcode() != TargetInstrInfo::PHI) + if (MBB.empty() || !MBB.front().isPHI()) return false; // Quick exit for basic blocks without PHIs. // Get an iterator to the first instruction after the last PHI node (this may // also be the end of the basic block). MachineBasicBlock::iterator AfterPHIsIt = SkipPHIsAndLabels(MBB, MBB.begin()); - while (MBB.front().getOpcode() == TargetInstrInfo::PHI) + while (MBB.front().isPHI()) LowerAtomicPHINode(MBB, AfterPHIsIt); return true; @@ -115,7 +116,7 @@ static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi, for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) { unsigned SrcReg = MPhi->getOperand(i).getReg(); const MachineInstr *DefMI = MRI->getVRegDef(SrcReg); - if (!DefMI || DefMI->getOpcode() != TargetInstrInfo::IMPLICIT_DEF) + if (!DefMI || !DefMI->isImplicitDef()) return false; } return true; @@ -197,7 +198,7 @@ void llvm::PHIElimination::LowerAtomicPHINode( // If all sources of a PHI node are implicit_def, just emit an // implicit_def instead of a copy. BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(), - TII->get(TargetInstrInfo::IMPLICIT_DEF), DestReg); + TII->get(TargetOpcode::IMPLICIT_DEF), DestReg); else { // Can we reuse an earlier PHI node? This only happens for critical edges, // typically those created by tail duplication. @@ -281,7 +282,7 @@ void llvm::PHIElimination::LowerAtomicPHINode( // If source is defined by an implicit def, there is no need to insert a // copy. MachineInstr *DefMI = MRI->getVRegDef(SrcReg); - if (DefMI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) { + if (DefMI->isImplicitDef()) { ImpDefs.insert(DefMI); continue; } @@ -375,7 +376,7 @@ void llvm::PHIElimination::analyzePHINodes(const MachineFunction& Fn) { for (MachineFunction::const_iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end(); - BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI) + BBI != BBE && BBI->isPHI(); ++BBI) for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) ++VRegPHIUseCount[BBVRegPair(BBI->getOperand(i+1).getMBB()->getNumber(), BBI->getOperand(i).getReg())]; @@ -384,12 +385,11 @@ void llvm::PHIElimination::analyzePHINodes(const MachineFunction& Fn) { bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB, LiveVariables &LV) { - if (MBB.empty() || MBB.front().getOpcode() != TargetInstrInfo::PHI || - MBB.isLandingPad()) + if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad()) return false; // Quick exit for basic blocks without PHIs. for (MachineBasicBlock::const_iterator BBI = MBB.begin(), BBE = MBB.end(); - BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI) { + BBI != BBE && BBI->isPHI(); ++BBI) { for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) { unsigned Reg = BBI->getOperand(i).getReg(); MachineBasicBlock *PreMBB = BBI->getOperand(i+1).getMBB(); @@ -438,7 +438,7 @@ MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A, // Fix PHI nodes in B so they refer to NMBB instead of A for (MachineBasicBlock::iterator i = B->begin(), e = B->end(); - i != e && i->getOpcode() == TargetInstrInfo::PHI; ++i) + i != e && i->isPHI(); ++i) for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2) if (i->getOperand(ni+1).getMBB() == A) i->getOperand(ni+1).setMBB(NMBB); diff --git a/lib/CodeGen/PHIElimination.h b/lib/CodeGen/PHIElimination.h index 1bcc9dc..f3ab9e2 100644 --- a/lib/CodeGen/PHIElimination.h +++ b/lib/CodeGen/PHIElimination.h @@ -14,10 +14,11 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" namespace llvm { - + class LiveVariables; + /// Lower PHI instructions to copies. class PHIElimination : public MachineFunctionPass { MachineRegisterInfo *MRI; // Machine register information @@ -108,13 +109,29 @@ namespace llvm { // SkipPHIsAndLabels - Copies need to be inserted after phi nodes and // also after any exception handling labels: in landing pads execution // starts at the label, so any copies placed before it won't be executed! + // We also deal with DBG_VALUEs, which are a bit tricky: + // PHI + // DBG_VALUE + // LABEL + // Here the DBG_VALUE needs to be skipped, and if it refers to a PHI it + // needs to be annulled or, better, moved to follow the label, as well. + // PHI + // DBG_VALUE + // no label + // Here it is not a good idea to skip the DBG_VALUE. + // FIXME: For now we skip and annul all DBG_VALUEs, maximally simple and + // maximally stupid. MachineBasicBlock::iterator SkipPHIsAndLabels(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) { // Rather than assuming that EH labels come before other kinds of labels, // just skip all labels. - while (I != MBB.end() && - (I->getOpcode() == TargetInstrInfo::PHI || I->isLabel())) + while (I != MBB.end() && + (I->isPHI() || I->isLabel() || I->isDebugValue())) { + if (I->isDebugValue() && I->getNumOperands()==3 && + I->getOperand(0).isReg()) + I->getOperand(0).setReg(0U); ++I; + } return I; } diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp index 8cbc8c2..70e91aa 100644 --- a/lib/CodeGen/PreAllocSplitting.cpp +++ b/lib/CodeGen/PreAllocSplitting.cpp @@ -686,8 +686,7 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) { SlotIndex DefIdx = LIs->getInstructionIndex(&*DI); DefIdx = DefIdx.getDefIndex(); - assert(DI->getOpcode() != TargetInstrInfo::PHI && - "PHI instr in code during pre-alloc splitting."); + assert(!DI->isPHI() && "PHI instr in code during pre-alloc splitting."); VNInfo* NewVN = LI->getNextValue(DefIdx, 0, true, Alloc); // If the def is a move, set the copy field. diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index a00f450..d7179b3 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -49,9 +49,9 @@ bool ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI, Reg == SrcReg) return true; - if (OpIdx == 2 && MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) + if (OpIdx == 2 && MI->isSubregToReg()) return true; - if (OpIdx == 1 && MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) + if (OpIdx == 1 && MI->isExtractSubreg()) return true; return false; } @@ -88,7 +88,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { I != E; ) { MachineInstr *MI = &*I; ++I; - if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) { + if (MI->isImplicitDef()) { unsigned Reg = MI->getOperand(0).getReg(); ImpDefRegs.insert(Reg); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { @@ -99,7 +99,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { continue; } - if (MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG) { + if (MI->isInsertSubreg()) { MachineOperand &MO = MI->getOperand(2); if (ImpDefRegs.count(MO.getReg())) { // %reg1032<def> = INSERT_SUBREG %reg1032, undef, 2 @@ -127,7 +127,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { // Use is a copy, just turn it into an implicit_def. if (CanTurnIntoImplicitDef(MI, Reg, i, tii_)) { bool isKill = MO.isKill(); - MI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF)); + MI->setDesc(tii_->get(TargetOpcode::IMPLICIT_DEF)); for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j) MI->RemoveOperand(j); if (isKill) { @@ -187,7 +187,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { for (MachineRegisterInfo::def_iterator DI = mri_->def_begin(Reg), DE = mri_->def_end(); DI != DE; ++DI) { MachineInstr *DeadImpDef = &*DI; - if (DeadImpDef->getOpcode() != TargetInstrInfo::IMPLICIT_DEF) { + if (!DeadImpDef->isImplicitDef()) { Skip = true; break; } @@ -205,10 +205,9 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { // Process each use instruction once. for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(Reg), UE = mri_->use_end(); UI != UE; ++UI) { - MachineInstr *RMI = &*UI; - MachineBasicBlock *RMBB = RMI->getParent(); - if (RMBB == MBB) + if (UI.getOperand().isUndef()) continue; + MachineInstr *RMI = &*UI; if (ModInsts.insert(RMI)) RUses.push_back(RMI); } @@ -220,7 +219,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; if (tii_->isMoveInstr(*RMI, SrcReg, DstReg, SrcSubReg, DstSubReg) && Reg == SrcReg) { - RMI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF)); + RMI->setDesc(tii_->get(TargetOpcode::IMPLICIT_DEF)); bool isKill = false; SmallVector<unsigned, 4> Ops; @@ -264,8 +263,8 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { } } RUses.clear(); + ModInsts.clear(); } - ModInsts.clear(); ImpDefRegs.clear(); ImpDefMIs.clear(); } diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 709d46a..040259e 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -161,7 +161,7 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) { if (Size > MaxCallFrameSize) MaxCallFrameSize = Size; HasCalls = true; FrameSDOps.push_back(I); - } else if (I->getOpcode() == TargetInstrInfo::INLINEASM) { + } else if (I->isInlineAsm()) { // An InlineAsm might be a call; assume it is to get the stack frame // aligned correctly for calls. HasCalls = true; @@ -476,8 +476,6 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // Loop over all of the stack objects, assigning sequential addresses... MachineFrameInfo *FFI = Fn.getFrameInfo(); - unsigned MaxAlign = 1; - // Start at the beginning of the local area. // The Offset is the distance from the stack top in the direction // of stack growth -- so it's always nonnegative. @@ -517,9 +515,6 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { Offset += FFI->getObjectSize(i); unsigned Align = FFI->getObjectAlignment(i); - // If the alignment of this object is greater than that of the stack, - // then increase the stack alignment to match. - MaxAlign = std::max(MaxAlign, Align); // Adjust to alignment boundary Offset = (Offset+Align-1)/Align*Align; @@ -529,9 +524,6 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex; for (int i = MaxCSFI; i >= MinCSFI ; --i) { unsigned Align = FFI->getObjectAlignment(i); - // If the alignment of this object is greater than that of the stack, - // then increase the stack alignment to match. - MaxAlign = std::max(MaxAlign, Align); // Adjust to alignment boundary Offset = (Offset+Align-1)/Align*Align; @@ -540,6 +532,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { } } + unsigned MaxAlign = FFI->getMaxAlignment(); + // Make sure the special register scavenging spill slot is closest to the // frame pointer if a frame pointer is required. const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); @@ -605,11 +599,6 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // Update frame info to pretend that this is part of the stack... FFI->setStackSize(Offset - LocalAreaOffset); - - // Remember the required stack alignment in case targets need it to perform - // dynamic stack alignment. - if (MaxAlign > FFI->getMaxAlignment()) - FFI->setMaxAlignment(MaxAlign); } diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp index cbb5826..04303cf 100644 --- a/lib/CodeGen/RegAllocLocal.cpp +++ b/lib/CodeGen/RegAllocLocal.cpp @@ -490,9 +490,12 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, // If the virtual register is already available, just update the instruction // and return. if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) { - MarkPhysRegRecentlyUsed(PR); // Already have this value available! MI->getOperand(OpNum).setReg(PR); // Assign the input register - getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum); + if (!MI->isDebugValue()) { + // Do not do these for DBG_VALUE as they can affect codegen. + MarkPhysRegRecentlyUsed(PR); // Already have this value available! + getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum); + } return MI; } @@ -531,7 +534,7 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, std::string msg; raw_string_ostream Msg(msg); Msg << "Ran out of registers during register allocation!"; - if (MI->getOpcode() == TargetInstrInfo::INLINEASM) { + if (MI->isInlineAsm()) { Msg << "\nPlease check your inline asm statement for invalid " << "constraints:\n"; MI->print(Msg, TM); @@ -544,7 +547,7 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, std::string msg; raw_string_ostream Msg(msg); Msg << "Ran out of registers during register allocation!"; - if (MI->getOpcode() == TargetInstrInfo::INLINEASM) { + if (MI->isInlineAsm()) { Msg << "\nPlease check your inline asm statement for invalid " << "constraints:\n"; MI->print(Msg, TM); @@ -609,6 +612,8 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) { DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > LastUseDef; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { + if (I->isDebugValue()) + continue; for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { MachineOperand& MO = I->getOperand(i); // Uses don't trigger any flags, but we need to save @@ -691,7 +696,13 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) { bool usedOutsideBlock = isPhysReg ? false : UsedInMultipleBlocks.test(MO.getReg() - TargetRegisterInfo::FirstVirtualRegister); - if (!isPhysReg && !usedOutsideBlock) + if (!isPhysReg && !usedOutsideBlock) { + // DBG_VALUE complicates this: if the only refs of a register outside + // this block are DBG_VALUE, we can't keep the reg live just for that, + // as it will cause the reg to be spilled at the end of this block when + // it wouldn't have been otherwise. Nullify the DBG_VALUEs when that + // happens. + bool UsedByDebugValueOnly = false; for (MachineRegisterInfo::reg_iterator UI = MRI.reg_begin(MO.getReg()), UE = MRI.reg_end(); UI != UE; ++UI) // Two cases: @@ -699,12 +710,26 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) { // - used in the same block before it is defined (loop) if (UI->getParent() != &MBB || (MO.isDef() && UI.getOperand().isUse() && precedes(&*UI, MI))) { + if (UI->isDebugValue()) { + UsedByDebugValueOnly = true; + continue; + } + // A non-DBG_VALUE use means we can leave DBG_VALUE uses alone. UsedInMultipleBlocks.set(MO.getReg() - TargetRegisterInfo::FirstVirtualRegister); usedOutsideBlock = true; + UsedByDebugValueOnly = false; break; } - + if (UsedByDebugValueOnly) + for (MachineRegisterInfo::reg_iterator UI = MRI.reg_begin(MO.getReg()), + UE = MRI.reg_end(); UI != UE; ++UI) + if (UI->isDebugValue() && + (UI->getParent() != &MBB || + (MO.isDef() && precedes(&*UI, MI)))) + UI.getOperand().setReg(0U); + } + // Physical registers and those that are not live-out of the block // are killed/dead at their last use/def within this block. if (isPhysReg || !usedOutsideBlock) { @@ -764,8 +789,11 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { // Determine whether this is a copy instruction. The cases where the // source or destination are phys regs are handled specially. unsigned SrcCopyReg, DstCopyReg, SrcCopySubReg, DstCopySubReg; + unsigned SrcCopyPhysReg = 0U; bool isCopy = TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg, SrcCopySubReg, DstCopySubReg); + if (isCopy && TargetRegisterInfo::isVirtualRegister(SrcCopyReg)) + SrcCopyPhysReg = getVirt2PhysRegMapSlot(SrcCopyReg); // Loop over the implicit uses, making sure that they are at the head of the // use order list, so they don't get reallocated. @@ -793,7 +821,7 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { // have in them, then mark them unallocatable. // If any virtual regs are earlyclobber, allocate them now (before // freeing inputs that are killed). - if (MI->getOpcode()==TargetInstrInfo::INLINEASM) { + if (MI->isInlineAsm()) { for (unsigned i = 0; i != MI->getNumOperands(); ++i) { MachineOperand& MO = MI->getOperand(i); if (MO.isReg() && MO.isDef() && MO.isEarlyClobber() && @@ -838,6 +866,18 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { } } + // If a DBG_VALUE says something is located in a spilled register, + // change the DBG_VALUE to be undef, which prevents the register + // from being reloaded here. Doing that would change the generated + // code, unless another use immediately follows this instruction. + if (MI->isDebugValue() && + MI->getNumOperands()==3 && MI->getOperand(0).isReg()) { + unsigned VirtReg = MI->getOperand(0).getReg(); + if (VirtReg && TargetRegisterInfo::isVirtualRegister(VirtReg) && + !getVirt2PhysRegMapSlot(VirtReg)) + MI->getOperand(0).setReg(0U); + } + // Get the used operands into registers. This has the potential to spill // incoming values if we are out of registers. Note that we completely // ignore physical register uses here. We assume that if an explicit @@ -965,13 +1005,26 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { // If DestVirtReg already has a value, use it. if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) { + // If this is a copy try to reuse the input as the output; + // that will make the copy go away. // If this is a copy, the source reg is a phys reg, and // that reg is available, use that phys reg for DestPhysReg. + // If this is a copy, the source reg is a virtual reg, and + // the phys reg that was assigned to that virtual reg is now + // available, use that phys reg for DestPhysReg. (If it's now + // available that means this was the last use of the source.) if (isCopy && TargetRegisterInfo::isPhysicalRegister(SrcCopyReg) && isPhysRegAvailable(SrcCopyReg)) { DestPhysReg = SrcCopyReg; assignVirtToPhysReg(DestVirtReg, DestPhysReg); + } else if (isCopy && + TargetRegisterInfo::isVirtualRegister(SrcCopyReg) && + SrcCopyPhysReg && isPhysRegAvailable(SrcCopyPhysReg) && + MF->getRegInfo().getRegClass(DestVirtReg)-> + contains(SrcCopyPhysReg)) { + DestPhysReg = SrcCopyPhysReg; + assignVirtToPhysReg(DestVirtReg, DestPhysReg); } else DestPhysReg = getReg(MBB, MI, DestVirtReg); } diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index fc59653..2701faf 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -32,7 +32,7 @@ #define DEBUG_TYPE "regalloc" #include "PBQP/HeuristicSolver.h" -#include "PBQP/SimpleGraph.h" +#include "PBQP/Graph.h" #include "PBQP/Heuristics/Briggs.h" #include "VirtRegMap.h" #include "VirtRegRewriter.h" @@ -58,12 +58,12 @@ using namespace llvm; static RegisterRegAlloc registerPBQPRepAlloc("pbqp", "PBQP register allocator.", - llvm::createPBQPRegisterAllocator); + llvm::createPBQPRegisterAllocator); static cl::opt<bool> pbqpCoalescing("pbqp-coalescing", - cl::desc("Attempt coalescing during PBQP register allocation."), - cl::init(false), cl::Hidden); + cl::desc("Attempt coalescing during PBQP register allocation."), + cl::init(false), cl::Hidden); namespace { @@ -114,6 +114,8 @@ namespace { typedef std::set<LiveInterval*> LiveIntervalSet; + typedef std::vector<PBQP::Graph::NodeItr> NodeVector; + MachineFunction *mf; const TargetMachine *tm; const TargetRegisterInfo *tri; @@ -130,6 +132,7 @@ namespace { AllowedSetMap allowedSets; LiveIntervalSet vregIntervalsToAlloc, emptyVRegIntervals; + NodeVector problemNodes; /// Builds a PBQP cost vector. @@ -174,7 +177,7 @@ namespace { /// allocation problem for this function. /// /// @return a PBQP solver object for the register allocation problem. - PBQP::SimpleGraph constructPBQPProblem(); + PBQP::Graph constructPBQPProblem(); /// \brief Adds a stack interval if the given live interval has been /// spilled. Used to support stack slot coloring. @@ -408,16 +411,16 @@ PBQPRegAlloc::CoalesceMap PBQPRegAlloc::findCoalesces() { // We also need any physical regs to be allocable, coalescing with // a non-allocable register is invalid. if (srcRegIsPhysical) { - if (std::find(srcRegClass->allocation_order_begin(*mf), - srcRegClass->allocation_order_end(*mf), srcReg) == - srcRegClass->allocation_order_end(*mf)) + if (std::find(dstRegClass->allocation_order_begin(*mf), + dstRegClass->allocation_order_end(*mf), srcReg) == + dstRegClass->allocation_order_end(*mf)) continue; } if (dstRegIsPhysical) { - if (std::find(dstRegClass->allocation_order_begin(*mf), - dstRegClass->allocation_order_end(*mf), dstReg) == - dstRegClass->allocation_order_end(*mf)) + if (std::find(srcRegClass->allocation_order_begin(*mf), + srcRegClass->allocation_order_end(*mf), dstReg) == + srcRegClass->allocation_order_end(*mf)) continue; } @@ -439,6 +442,12 @@ PBQPRegAlloc::CoalesceMap PBQPRegAlloc::findCoalesces() { vniItr = srcLI->vni_begin(), vniEnd = srcLI->vni_end(); vniItr != vniEnd; ++vniItr) { + // If we find a poorly defined def we err on the side of caution. + if (!(*vniItr)->def.isValid()) { + badDef = true; + break; + } + // If we find a def that kills the coalescing opportunity then // record it and break from the loop. if (dstLI->liveAt((*vniItr)->def)) { @@ -460,6 +469,11 @@ PBQPRegAlloc::CoalesceMap PBQPRegAlloc::findCoalesces() { if ((*vniItr)->getCopy() == instr) continue; + if (!(*vniItr)->def.isValid()) { + badDef = true; + break; + } + if (srcLI->liveAt((*vniItr)->def)) { badDef = true; break; @@ -510,11 +524,10 @@ void PBQPRegAlloc::findVRegIntervalsToAlloc() { } } -PBQP::SimpleGraph PBQPRegAlloc::constructPBQPProblem() { +PBQP::Graph PBQPRegAlloc::constructPBQPProblem() { typedef std::vector<const LiveInterval*> LIVector; typedef std::vector<unsigned> RegVector; - typedef std::vector<PBQP::SimpleGraph::NodeIterator> NodeVector; // This will store the physical intervals for easy reference. LIVector physIntervals; @@ -553,8 +566,8 @@ PBQP::SimpleGraph PBQPRegAlloc::constructPBQPProblem() { } // Construct a PBQP solver for this problem - PBQP::SimpleGraph problem; - NodeVector problemNodes(vregIntervalsToAlloc.size()); + PBQP::Graph problem; + problemNodes.resize(vregIntervalsToAlloc.size()); // Resize allowedSets container appropriately. allowedSets.resize(vregIntervalsToAlloc.size()); @@ -657,12 +670,7 @@ PBQP::SimpleGraph PBQPRegAlloc::constructPBQPProblem() { } } - problem.assignNodeIDs(); - assert(problem.getNumNodes() == allowedSets.size()); - for (unsigned i = 0; i < allowedSets.size(); ++i) { - assert(problem.getNodeItr(i) == problemNodes[i]); - } /* std::cerr << "Allocating for " << problem.getNumNodes() << " nodes, " << problem.getNumEdges() << " edges.\n"; @@ -696,10 +704,6 @@ void PBQPRegAlloc::addStackInterval(const LiveInterval *spilled, bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) { - // Assert that this is a valid solution to the regalloc problem. - assert(solution.getCost() != std::numeric_limits<PBQP::PBQPNum>::infinity() && - "Invalid (infinite cost) solution for PBQP problem."); - // Set to true if we have any spills bool anotherRoundNeeded = false; @@ -709,7 +713,7 @@ bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) { // Iterate over the nodes mapping the PBQP solution to a register assignment. for (unsigned node = 0; node < node2LI.size(); ++node) { unsigned virtReg = node2LI[node]->reg, - allocSelection = solution.getSelection(node); + allocSelection = solution.getSelection(problemNodes[node]); // If the PBQP solution is non-zero it's a physical register... @@ -849,7 +853,7 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) { vrm = &getAnalysis<VirtRegMap>(); - DEBUG(dbgs() << "PBQP2 Register Allocating for " << mf->getFunction()->getName() << "\n"); + DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getFunction()->getName() << "\n"); // Allocator main loop: // @@ -876,10 +880,9 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) { while (!pbqpAllocComplete) { DEBUG(dbgs() << " PBQP Regalloc round " << round << ":\n"); - PBQP::SimpleGraph problem = constructPBQPProblem(); - PBQP::HeuristicSolver<PBQP::Heuristics::Briggs> solver; - problem.assignNodeIDs(); - PBQP::Solution solution = solver.solve(problem); + PBQP::Graph problem = constructPBQPProblem(); + PBQP::Solution solution = + PBQP::HeuristicSolver<PBQP::Heuristics::Briggs>::solve(problem); pbqpAllocComplete = mapPBQPToRegAlloc(solution); @@ -895,6 +898,7 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) { li2Node.clear(); node2LI.clear(); allowedSets.clear(); + problemNodes.clear(); DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *vrm << "\n"); diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 8883064..7da7848 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1881,7 +1881,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), MemVT, - LN0->isVolatile(), LN0->getAlignment()); + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); AddToWorkList(N); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -1903,7 +1904,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), MemVT, - LN0->isVolatile(), LN0->getAlignment()); + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); AddToWorkList(N); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -1935,7 +1937,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), - ExtVT, LN0->isVolatile(), LN0->getAlignment()); + ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); AddToWorkList(N); CombineTo(LN0, NewLoad, NewLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -1970,7 +1973,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, LN0->getChain(), NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset(), - ExtVT, LN0->isVolatile(), Alignment); + ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), + Alignment); AddToWorkList(N); CombineTo(LN0, Load, Load.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -2640,7 +2644,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // If the shift is not a no-op (in which case this should be just a sign // extend already), the truncated to type is legal, sign_extend is legal - // on that type, and the the truncate to that type is both legal and free, + // on that type, and the truncate to that type is both legal and free, // perform the transform. if ((ShiftAmt > 0) && TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) && @@ -3143,7 +3147,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), N0.getValueType(), - LN0->isVolatile(), LN0->getAlignment()); + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); CombineTo(N, ExtLoad); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(), ExtLoad); @@ -3185,7 +3190,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), MemVT, - LN0->isVolatile(), LN0->getAlignment()); + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), @@ -3220,6 +3226,14 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { NegOne, DAG.getConstant(0, VT), cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); if (SCC.getNode()) return SCC; + if (!LegalOperations || + TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT))) + return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, + DAG.getSetCC(N->getDebugLoc(), + TLI.getSetCCResultType(VT), + N0.getOperand(0), N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()), + NegOne, DAG.getConstant(0, VT)); } @@ -3307,7 +3321,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), N0.getValueType(), - LN0->isVolatile(), LN0->getAlignment()); + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); CombineTo(N, ExtLoad); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(), ExtLoad); @@ -3349,7 +3364,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), MemVT, - LN0->isVolatile(), LN0->getAlignment()); + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(), @@ -3463,7 +3479,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), N0.getValueType(), - LN0->isVolatile(), LN0->getAlignment()); + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); CombineTo(N, ExtLoad); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(), ExtLoad); @@ -3505,7 +3522,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { VT, LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), MemVT, - LN0->isVolatile(), LN0->getAlignment()); + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), @@ -3628,10 +3646,11 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { SDValue Load = (ExtType == ISD::NON_EXTLOAD) ? DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff, - LN0->isVolatile(), NewAlign) + LN0->isVolatile(), LN0->isNonTemporal(), NewAlign) : DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(), NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff, - ExtVT, LN0->isVolatile(), NewAlign); + ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), + NewAlign); // Replace the old load's chain with the new load's chain. WorkListRemover DeadNodes(*this); @@ -3718,7 +3737,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), EVT, - LN0->isVolatile(), LN0->getAlignment()); + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -3734,7 +3754,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), EVT, - LN0->isVolatile(), LN0->getAlignment()); + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -3818,7 +3839,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(), LD1->getBasePtr(), LD1->getSrcValue(), - LD1->getSrcValueOffset(), false, Align); + LD1->getSrcValueOffset(), false, false, Align); } return SDValue(); @@ -3888,7 +3909,8 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), - LN0->isVolatile(), OrigAlign); + LN0->isVolatile(), LN0->isNonTemporal(), + OrigAlign); AddToWorkList(N); CombineTo(N0.getNode(), DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), @@ -4484,7 +4506,8 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), N0.getValueType(), - LN0->isVolatile(), LN0->getAlignment()); + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), @@ -4952,7 +4975,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { LD->getValueType(0), Chain, Ptr, LD->getSrcValue(), LD->getSrcValueOffset(), LD->getMemoryVT(), - LD->isVolatile(), Align); + LD->isVolatile(), LD->isNonTemporal(), Align); } } @@ -5034,7 +5057,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(), BetterChain, Ptr, LD->getSrcValue(), LD->getSrcValueOffset(), - LD->isVolatile(), LD->getAlignment()); + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); } else { ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), LD->getValueType(0), @@ -5042,6 +5066,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { LD->getSrcValueOffset(), LD->getMemoryVT(), LD->isVolatile(), + LD->isNonTemporal(), LD->getAlignment()); } @@ -5141,13 +5166,14 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(), LD->getChain(), NewPtr, LD->getSrcValue(), LD->getSrcValueOffset(), - LD->isVolatile(), NewAlign); + LD->isVolatile(), LD->isNonTemporal(), + NewAlign); SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD, DAG.getConstant(NewImm, NewVT)); SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(), NewVal, NewPtr, ST->getSrcValue(), ST->getSrcValueOffset(), - false, NewAlign); + false, false, NewAlign); AddToWorkList(NewPtr.getNode()); AddToWorkList(NewLD.getNode()); @@ -5176,7 +5202,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { return DAG.getTruncStore(Chain, N->getDebugLoc(), Value, Ptr, ST->getSrcValue(), ST->getSrcValueOffset(), ST->getMemoryVT(), - ST->isVolatile(), Align); + ST->isVolatile(), ST->isNonTemporal(), Align); } } @@ -5193,7 +5219,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0), Ptr, ST->getSrcValue(), - ST->getSrcValueOffset(), ST->isVolatile(), OrigAlign); + ST->getSrcValueOffset(), ST->isVolatile(), + ST->isNonTemporal(), OrigAlign); } // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' @@ -5219,7 +5246,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { return DAG.getStore(Chain, N->getDebugLoc(), Tmp, Ptr, ST->getSrcValue(), ST->getSrcValueOffset(), ST->isVolatile(), - ST->getAlignment()); + ST->isNonTemporal(), ST->getAlignment()); } break; case MVT::f64: @@ -5231,7 +5258,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { return DAG.getStore(Chain, N->getDebugLoc(), Tmp, Ptr, ST->getSrcValue(), ST->getSrcValueOffset(), ST->isVolatile(), - ST->getAlignment()); + ST->isNonTemporal(), ST->getAlignment()); } else if (!ST->isVolatile() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { // Many FP stores are not made apparent until after legalize, e.g. for @@ -5245,18 +5272,21 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { int SVOffset = ST->getSrcValueOffset(); unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo, Ptr, ST->getSrcValue(), ST->getSrcValueOffset(), - isVolatile, ST->getAlignment()); + isVolatile, isNonTemporal, + ST->getAlignment()); Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr, DAG.getConstant(4, Ptr.getValueType())); SVOffset += 4; Alignment = MinAlign(Alignment, 4U); SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi, Ptr, ST->getSrcValue(), - SVOffset, isVolatile, Alignment); + SVOffset, isVolatile, isNonTemporal, + Alignment); return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, St0, St1); } @@ -5278,12 +5308,13 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (ST->isTruncatingStore()) { ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr, ST->getSrcValue(),ST->getSrcValueOffset(), - ST->getMemoryVT(), - ST->isVolatile(), ST->getAlignment()); + ST->getMemoryVT(), ST->isVolatile(), + ST->isNonTemporal(), ST->getAlignment()); } else { ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr, ST->getSrcValue(), ST->getSrcValueOffset(), - ST->isVolatile(), ST->getAlignment()); + ST->isVolatile(), ST->isNonTemporal(), + ST->getAlignment()); } // Create token to keep both nodes around. @@ -5317,7 +5348,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter, Ptr, ST->getSrcValue(), ST->getSrcValueOffset(), ST->getMemoryVT(), - ST->isVolatile(), ST->getAlignment()); + ST->isVolatile(), ST->isNonTemporal(), + ST->getAlignment()); // Otherwise, see if we can simplify the operation with // SimplifyDemandedBits, which only works if the value has a single use. @@ -5350,7 +5382,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0), Ptr, ST->getSrcValue(), ST->getSrcValueOffset(), ST->getMemoryVT(), - ST->isVolatile(), ST->getAlignment()); + ST->isVolatile(), ST->isNonTemporal(), + ST->getAlignment()); } return ReduceLoadOpStoreWidth(N); @@ -5395,12 +5428,16 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { SDValue InVec = N->getOperand(0); if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { - // If the operand is wider than the vector element type then it is implicitly - // truncated. Make that explicit here. + // Check if the result type doesn't match the inserted element type. A + // SCALAR_TO_VECTOR may truncate the inserted element and the + // EXTRACT_VECTOR_ELT may widen the extracted vector. EVT EltVT = InVec.getValueType().getVectorElementType(); SDValue InOp = InVec.getOperand(0); - if (InOp.getValueType() != EltVT) - return DAG.getNode(ISD::TRUNCATE, InVec.getDebugLoc(), EltVT, InOp); + EVT NVT = N->getValueType(0); + if (InOp.getValueType() != NVT) { + assert(InOp.getValueType().isInteger() && NVT.isInteger()); + return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT); + } return InOp; } @@ -5491,7 +5528,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset(), - LN0->isVolatile(), Align); + LN0->isVolatile(), LN0->isNonTemporal(), Align); } return SDValue(); @@ -5871,6 +5908,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, LLD->getChain(), Addr, 0, 0, LLD->isVolatile(), + LLD->isNonTemporal(), LLD->getAlignment()); } else { Load = DAG.getExtLoad(LLD->getExtensionType(), @@ -5879,6 +5917,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, LLD->getChain(), Addr, 0, 0, LLD->getMemoryVT(), LLD->isVolatile(), + LLD->isNonTemporal(), LLD->getAlignment()); } @@ -5986,7 +6025,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, CstOffset); return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, PseudoSourceValue::getConstantPool(), 0, false, - Alignment); + false, Alignment); } } diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 09fd657..35ef5b7 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -121,7 +121,7 @@ unsigned FastISel::getRegForValue(Value *V) { Reg = LocalValueMap[CE]; } else if (isa<UndefValue>(V)) { Reg = createResultReg(TLI.getRegClassFor(VT)); - BuildMI(MBB, DL, TII.get(TargetInstrInfo::IMPLICIT_DEF), Reg); + BuildMI(MBB, DL, TII.get(TargetOpcode::IMPLICIT_DEF), Reg); } // If target-independent code couldn't handle the value, give target-specific @@ -332,6 +332,8 @@ bool FastISel::SelectCall(User *I) { return true; Value *Address = DI->getAddress(); + if (!Address) + return true; AllocaInst *AI = dyn_cast<AllocaInst>(Address); // Don't handle byval struct arguments or VLAs, for example. if (!AI) break; @@ -343,6 +345,9 @@ bool FastISel::SelectCall(User *I) { if (MDNode *Dbg = DI->getMetadata("dbg")) MMI->setVariableDbgInfo(DI->getVariable(), FI, Dbg); } + // Building the map above is target independent. Generating DBG_VALUE + // inline is target dependent; do this now. + (void)TargetSelectInstruction(cast<Instruction>(I)); return true; } case Intrinsic::eh_exception: { @@ -966,7 +971,7 @@ unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT, const TargetRegisterClass* RC = MRI.getRegClass(Op0); unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); - const TargetInstrDesc &II = TII.get(TargetInstrInfo::EXTRACT_SUBREG); + const TargetInstrDesc &II = TII.get(TargetOpcode::EXTRACT_SUBREG); if (II.getNumDefs() >= 1) BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addImm(Idx); diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index dc7d82d..50f4c32 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -227,7 +227,7 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf, unsigned NumRegisters = TLI.getNumRegisters(Fn->getContext(), VT); const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); for (unsigned i = 0; i != NumRegisters; ++i) - BuildMI(MBB, DL, TII->get(TargetInstrInfo::PHI), PHIReg + i); + BuildMI(MBB, DL, TII->get(TargetOpcode::PHI), PHIReg + i); PHIReg += NumRegisters; } } diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 9c50936..02fe85d 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -178,7 +178,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, const TargetInstrDesc &II, bool IsClone, bool IsCloned, DenseMap<SDValue, unsigned> &VRBaseMap) { - assert(Node->getMachineOpcode() != TargetInstrInfo::IMPLICIT_DEF && + assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF && "IMPLICIT_DEF should have been handled as a special case elsewhere!"); for (unsigned i = 0; i < II.getNumDefs(); ++i) { @@ -236,7 +236,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, unsigned InstrEmitter::getVR(SDValue Op, DenseMap<SDValue, unsigned> &VRBaseMap) { if (Op.isMachineOpcode() && - Op.getMachineOpcode() == TargetInstrInfo::IMPLICIT_DEF) { + Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { // Add an IMPLICIT_DEF instruction before every use. unsigned VReg = getDstOfOnlyCopyToRegUse(Op.getNode(), Op.getResNo()); // IMPLICIT_DEF can produce any type of result so its TargetInstrDesc @@ -246,7 +246,7 @@ unsigned InstrEmitter::getVR(SDValue Op, VReg = MRI->createVirtualRegister(RC); } BuildMI(MBB, Op.getDebugLoc(), - TII->get(TargetInstrInfo::IMPLICIT_DEF), VReg); + TII->get(TargetOpcode::IMPLICIT_DEF), VReg); return VReg; } @@ -396,12 +396,12 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, } } - if (Opc == TargetInstrInfo::EXTRACT_SUBREG) { + if (Opc == TargetOpcode::EXTRACT_SUBREG) { unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); // Create the extract_subreg machine instruction. MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), - TII->get(TargetInstrInfo::EXTRACT_SUBREG)); + TII->get(TargetOpcode::EXTRACT_SUBREG)); // Figure out the register class to create for the destreg. unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); @@ -424,8 +424,8 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap); MI->addOperand(MachineOperand::CreateImm(SubIdx)); MBB->insert(InsertPos, MI); - } else if (Opc == TargetInstrInfo::INSERT_SUBREG || - Opc == TargetInstrInfo::SUBREG_TO_REG) { + } else if (Opc == TargetOpcode::INSERT_SUBREG || + Opc == TargetOpcode::SUBREG_TO_REG) { SDValue N0 = Node->getOperand(0); SDValue N1 = Node->getOperand(1); SDValue N2 = Node->getOperand(2); @@ -452,7 +452,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, // If creating a subreg_to_reg, then the first input operand // is an implicit value immediate, otherwise it's a register - if (Opc == TargetInstrInfo::SUBREG_TO_REG) { + if (Opc == TargetOpcode::SUBREG_TO_REG) { const ConstantSDNode *SD = cast<ConstantSDNode>(N0); MI->addOperand(MachineOperand::CreateImm(SD->getZExtValue())); } else @@ -507,20 +507,20 @@ void InstrEmitter::EmitNode(SDNode *Node, bool IsClone, bool IsCloned, unsigned Opc = Node->getMachineOpcode(); // Handle subreg insert/extract specially - if (Opc == TargetInstrInfo::EXTRACT_SUBREG || - Opc == TargetInstrInfo::INSERT_SUBREG || - Opc == TargetInstrInfo::SUBREG_TO_REG) { + if (Opc == TargetOpcode::EXTRACT_SUBREG || + Opc == TargetOpcode::INSERT_SUBREG || + Opc == TargetOpcode::SUBREG_TO_REG) { EmitSubregNode(Node, VRBaseMap); return; } // Handle COPY_TO_REGCLASS specially. - if (Opc == TargetInstrInfo::COPY_TO_REGCLASS) { + if (Opc == TargetOpcode::COPY_TO_REGCLASS) { EmitCopyToRegClassNode(Node, VRBaseMap); return; } - if (Opc == TargetInstrInfo::IMPLICIT_DEF) + if (Opc == TargetOpcode::IMPLICIT_DEF) // We want a unique VR for each IMPLICIT_DEF use. return; @@ -640,7 +640,7 @@ void InstrEmitter::EmitNode(SDNode *Node, bool IsClone, bool IsCloned, // Create the inline asm machine instruction. MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), - TII->get(TargetInstrInfo::INLINEASM)); + TII->get(TargetOpcode::INLINEASM)); // Add the asm string as an external symbol operand. const char *AsmStr = diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 5e3f58a..e9321da 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -377,9 +377,10 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP, return DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT, DAG.getEntryNode(), CPIdx, PseudoSourceValue::getConstantPool(), - 0, VT, false, Alignment); + 0, VT, false, false, Alignment); return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, false, Alignment); + PseudoSourceValue::getConstantPool(), 0, false, false, + Alignment); } /// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores. @@ -402,7 +403,8 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // FIXME: Does not handle truncating floating point stores! SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, intVT, Val); return DAG.getStore(Chain, dl, Result, Ptr, ST->getSrcValue(), - SVOffset, ST->isVolatile(), Alignment); + SVOffset, ST->isVolatile(), ST->isNonTemporal(), + Alignment); } else { // Do a (aligned) store to a stack slot, then copy from the stack slot // to the final destination using (unaligned) integer loads and stores. @@ -418,7 +420,8 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // Perform the original store, only redirected to the stack slot. SDValue Store = DAG.getTruncStore(Chain, dl, - Val, StackPtr, NULL, 0, StoredVT); + Val, StackPtr, NULL, 0, StoredVT, + false, false, 0); SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy()); SmallVector<SDValue, 8> Stores; unsigned Offset = 0; @@ -426,11 +429,12 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // Do all but one copies using the full register width. for (unsigned i = 1; i < NumRegs; i++) { // Load one integer register's worth from the stack slot. - SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, NULL, 0); + SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, NULL, 0, + false, false, 0); // Store it to the final location. Remember the store. Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr, ST->getSrcValue(), SVOffset + Offset, - ST->isVolatile(), + ST->isVolatile(), ST->isNonTemporal(), MinAlign(ST->getAlignment(), Offset))); // Increment the pointers. Offset += RegBytes; @@ -446,11 +450,12 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // Load from the stack slot. SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr, - NULL, 0, MemVT); + NULL, 0, MemVT, false, false, 0); Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr, ST->getSrcValue(), SVOffset + Offset, MemVT, ST->isVolatile(), + ST->isNonTemporal(), MinAlign(ST->getAlignment(), Offset))); // The order of the stores doesn't matter - say it with a TokenFactor. return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], @@ -474,13 +479,14 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, SDValue Store1, Store2; Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr, ST->getSrcValue(), SVOffset, NewStoredVT, - ST->isVolatile(), Alignment); + ST->isVolatile(), ST->isNonTemporal(), Alignment); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, TLI.getPointerTy())); Alignment = MinAlign(Alignment, IncrementSize); Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr, ST->getSrcValue(), SVOffset + IncrementSize, - NewStoredVT, ST->isVolatile(), Alignment); + NewStoredVT, ST->isVolatile(), ST->isNonTemporal(), + Alignment); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); } @@ -502,7 +508,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, // then bitconvert to floating point or vector. SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset, LD->isVolatile(), - LD->getAlignment()); + LD->isNonTemporal(), LD->getAlignment()); SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, LoadedVT, newLoad); if (VT.isFloatingPoint() && LoadedVT != VT) Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result); @@ -530,10 +536,11 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, // Load one integer register's worth from the original location. SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset + Offset, LD->isVolatile(), + LD->isNonTemporal(), MinAlign(LD->getAlignment(), Offset)); // Follow the load with a store to the stack slot. Remember the store. Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr, - NULL, 0)); + NULL, 0, false, false, 0)); // Increment the pointers. Offset += RegBytes; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); @@ -546,12 +553,13 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr, LD->getSrcValue(), SVOffset + Offset, MemVT, LD->isVolatile(), + LD->isNonTemporal(), MinAlign(LD->getAlignment(), Offset)); // Follow the load with a store to the stack slot. Remember the store. // On big-endian machines this requires a truncating store to ensure // that the bits end up in the right place. Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr, - NULL, 0, MemVT)); + NULL, 0, MemVT, false, false, 0)); // The order of the stores doesn't matter - say it with a TokenFactor. SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], @@ -559,7 +567,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, // Finally, perform the original load only redirected to the stack slot. Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase, - NULL, 0, LoadedVT); + NULL, 0, LoadedVT, false, false, 0); // Callers expect a MERGE_VALUES node. SDValue Ops[] = { Load, TF }; @@ -588,20 +596,22 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, SDValue Lo, Hi; if (TLI.isLittleEndian()) { Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(), - SVOffset, NewLoadedVT, LD->isVolatile(), Alignment); + SVOffset, NewLoadedVT, LD->isVolatile(), + LD->isNonTemporal(), Alignment); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, TLI.getPointerTy())); Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(), SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(), - MinAlign(Alignment, IncrementSize)); + LD->isNonTemporal(), MinAlign(Alignment, IncrementSize)); } else { Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(), - SVOffset, NewLoadedVT, LD->isVolatile(), Alignment); + SVOffset, NewLoadedVT, LD->isVolatile(), + LD->isNonTemporal(), Alignment); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, TLI.getPointerTy())); Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(), SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(), - MinAlign(Alignment, IncrementSize)); + LD->isNonTemporal(), MinAlign(Alignment, IncrementSize)); } // aggregate the two parts @@ -643,7 +653,8 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, // Store the vector. SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Tmp1, StackPtr, - PseudoSourceValue::getFixedStack(SPFI), 0); + PseudoSourceValue::getFixedStack(SPFI), 0, + false, false, 0); // Truncate or zero extend offset to target pointer type. unsigned CastOpc = IdxVT.bitsGT(PtrVT) ? ISD::TRUNCATE : ISD::ZERO_EXTEND; @@ -654,10 +665,12 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr); // Store the scalar value. Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, - PseudoSourceValue::getFixedStack(SPFI), 0, EltVT); + PseudoSourceValue::getFixedStack(SPFI), 0, EltVT, + false, false, 0); // Load the updated vector. return DAG.getLoad(VT, dl, Ch, StackPtr, - PseudoSourceValue::getFixedStack(SPFI), 0); + PseudoSourceValue::getFixedStack(SPFI), 0, + false, false, 0); } @@ -702,6 +715,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { int SVOffset = ST->getSrcValueOffset(); unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); DebugLoc dl = ST->getDebugLoc(); if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) { if (CFP->getValueType(0) == MVT::f32 && @@ -710,14 +724,14 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { bitcastToAPInt().zextOrTrunc(32), MVT::i32); return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), - SVOffset, isVolatile, Alignment); + SVOffset, isVolatile, isNonTemporal, Alignment); } else if (CFP->getValueType(0) == MVT::f64) { // If this target supports 64-bit registers, do a single 64-bit store. if (getTypeAction(MVT::i64) == Legal) { Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). zextOrTrunc(64), MVT::i64); return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), - SVOffset, isVolatile, Alignment); + SVOffset, isVolatile, isNonTemporal, Alignment); } else if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) { // Otherwise, if the target supports 32-bit registers, use 2 32-bit // stores. If the target supports neither 32- nor 64-bits, this @@ -728,11 +742,11 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { if (TLI.isBigEndian()) std::swap(Lo, Hi); Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getSrcValue(), - SVOffset, isVolatile, Alignment); + SVOffset, isVolatile, isNonTemporal, Alignment); Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, DAG.getIntPtrConstant(4)); Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), SVOffset+4, - isVolatile, MinAlign(Alignment, 4U)); + isVolatile, isNonTemporal, MinAlign(Alignment, 4U)); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -1108,7 +1122,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getSrcValue(), LD->getSrcValueOffset(), - LD->isVolatile(), LD->getAlignment()); + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); Tmp3 = LegalizeOp(DAG.getNode(ISD::BIT_CONVERT, dl, VT, Tmp1)); Tmp4 = LegalizeOp(Tmp1.getValue(1)); break; @@ -1125,6 +1140,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { int SVOffset = LD->getSrcValueOffset(); unsigned Alignment = LD->getAlignment(); bool isVolatile = LD->isVolatile(); + bool isNonTemporal = LD->isNonTemporal(); if (SrcWidth != SrcVT.getStoreSizeInBits() && // Some targets pretend to have an i1 loading operation, and actually @@ -1150,7 +1166,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), Tmp1, Tmp2, LD->getSrcValue(), SVOffset, - NVT, isVolatile, Alignment); + NVT, isVolatile, isNonTemporal, Alignment); Ch = Result.getValue(1); // The chain. @@ -1187,7 +1203,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Tmp1, Tmp2, LD->getSrcValue(), SVOffset, RoundVT, isVolatile, - Alignment); + isNonTemporal, Alignment); // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; @@ -1195,7 +1211,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { DAG.getIntPtrConstant(IncrementSize)); Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2, LD->getSrcValue(), SVOffset + IncrementSize, - ExtraVT, isVolatile, + ExtraVT, isVolatile, isNonTemporal, MinAlign(Alignment, IncrementSize)); // Build a factor node to remember that this load is independent of the @@ -1215,7 +1231,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // Load the top RoundWidth bits. Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2, LD->getSrcValue(), SVOffset, RoundVT, isVolatile, - Alignment); + isNonTemporal, Alignment); // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; @@ -1224,7 +1240,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Tmp1, Tmp2, LD->getSrcValue(), SVOffset + IncrementSize, - ExtraVT, isVolatile, + ExtraVT, isVolatile, isNonTemporal, MinAlign(Alignment, IncrementSize)); // Build a factor node to remember that this load is independent of the @@ -1284,7 +1300,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { (SrcVT == MVT::f64 && Node->getValueType(0) == MVT::f128)) { SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getSrcValue(), LD->getSrcValueOffset(), - LD->isVolatile(), LD->getAlignment()); + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); Result = DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Load); Tmp1 = LegalizeOp(Result); // Relegalize new nodes. @@ -1297,7 +1314,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), Tmp1, Tmp2, LD->getSrcValue(), LD->getSrcValueOffset(), SrcVT, - LD->isVolatile(), LD->getAlignment()); + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); SDValue ValRes; if (ExtType == ISD::SEXTLOAD) ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, @@ -1325,6 +1343,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { int SVOffset = ST->getSrcValueOffset(); unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); if (!ST->isTruncatingStore()) { if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { @@ -1361,7 +1380,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3); Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), SVOffset, isVolatile, - Alignment); + isNonTemporal, Alignment); break; } break; @@ -1379,7 +1398,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StVT.getStoreSizeInBits()); Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT); Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), - SVOffset, NVT, isVolatile, Alignment); + SVOffset, NVT, isVolatile, isNonTemporal, + Alignment); } else if (StWidth & (StWidth - 1)) { // If not storing a power-of-2 number of bits, expand as two stores. assert(!StVT.isVector() && "Unsupported truncstore!"); @@ -1399,7 +1419,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // Store the bottom RoundWidth bits. Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), SVOffset, RoundVT, - isVolatile, Alignment); + isVolatile, isNonTemporal, Alignment); // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; @@ -1409,6 +1429,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { DAG.getConstant(RoundWidth, TLI.getShiftAmountTy())); Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), SVOffset + IncrementSize, ExtraVT, isVolatile, + isNonTemporal, MinAlign(Alignment, IncrementSize)); } else { // Big endian - avoid unaligned stores. @@ -1417,7 +1438,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3, DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy())); Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), - SVOffset, RoundVT, isVolatile, Alignment); + SVOffset, RoundVT, isVolatile, isNonTemporal, + Alignment); // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; @@ -1425,6 +1447,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { DAG.getIntPtrConstant(IncrementSize)); Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), SVOffset + IncrementSize, ExtraVT, isVolatile, + isNonTemporal, MinAlign(Alignment, IncrementSize)); } @@ -1457,7 +1480,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { assert(isTypeLegal(StVT) && "Do not know how to expand this store!"); Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3); Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), - SVOffset, isVolatile, Alignment); + SVOffset, isVolatile, isNonTemporal, + Alignment); break; } } @@ -1484,7 +1508,8 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { DebugLoc dl = Op.getDebugLoc(); // Store the value to a temporary stack slot, then LOAD the returned part. SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); - SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0); + SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0, + false, false, 0); // Add the offset to the index. unsigned EltSize = @@ -1500,10 +1525,12 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr); if (Op.getValueType().isVector()) - return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0); + return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0, + false, false, 0); else return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, - NULL, 0, Vec.getValueType().getVectorElementType()); + NULL, 0, Vec.getValueType().getVectorElementType(), + false, false, 0); } SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { @@ -1533,12 +1560,14 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx); // If EltVT smaller than OpVT, only store the bits necessary. - if (EltVT.bitsLT(OpVT)) + if (!OpVT.isVector() && EltVT.bitsLT(OpVT)) { Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl, - Node->getOperand(i), Idx, SV, Offset, EltVT)); - else + Node->getOperand(i), Idx, SV, Offset, + EltVT, false, false, 0)); + } else Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, - Node->getOperand(i), Idx, SV, Offset)); + Node->getOperand(i), Idx, SV, Offset, + false, false, 0)); } SDValue StoreChain; @@ -1549,7 +1578,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { StoreChain = DAG.getEntryNode(); // Result is a load from the stack slot. - return DAG.getLoad(VT, dl, StoreChain, FIPtr, SV, 0); + return DAG.getLoad(VT, dl, StoreChain, FIPtr, SV, 0, false, false, 0); } SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { @@ -1572,12 +1601,14 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { SDValue StackPtr = DAG.CreateStackTemporary(Tmp2.getValueType()); SDValue StorePtr = StackPtr, LoadPtr = StackPtr; SDValue Ch = - DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StorePtr, NULL, 0); + DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StorePtr, NULL, 0, + false, false, 0); if (Tmp2.getValueType() == MVT::f64 && TLI.isLittleEndian()) LoadPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), LoadPtr, DAG.getIntPtrConstant(4)); SignBit = DAG.getExtLoad(ISD::SEXTLOAD, dl, TLI.getPointerTy(), - Ch, LoadPtr, NULL, 0, MVT::i32); + Ch, LoadPtr, NULL, 0, MVT::i32, + false, false, 0); } SignBit = DAG.getSetCC(dl, TLI.getSetCCResultType(SignBit.getValueType()), @@ -1701,20 +1732,21 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, if (SrcSize > SlotSize) Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, - SV, 0, SlotVT, false, SrcAlign); + SV, 0, SlotVT, false, false, SrcAlign); else { assert(SrcSize == SlotSize && "Invalid store"); Store = DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, - SV, 0, false, SrcAlign); + SV, 0, false, false, SrcAlign); } // Result is a load from the stack slot. if (SlotSize == DestSize) - return DAG.getLoad(DestVT, dl, Store, FIPtr, SV, 0, false, DestAlign); + return DAG.getLoad(DestVT, dl, Store, FIPtr, SV, 0, false, false, + DestAlign); assert(SlotSize < DestSize && "Unknown extension!"); return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, SV, 0, SlotVT, - false, DestAlign); + false, false, DestAlign); } SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { @@ -1729,9 +1761,11 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(0), StackPtr, PseudoSourceValue::getFixedStack(SPFI), 0, - Node->getValueType(0).getVectorElementType()); + Node->getValueType(0).getVectorElementType(), + false, false, 0); return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr, - PseudoSourceValue::getFixedStack(SPFI), 0); + PseudoSourceValue::getFixedStack(SPFI), 0, + false, false, 0); } @@ -1805,7 +1839,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, PseudoSourceValue::getConstantPool(), 0, - false, Alignment); + false, false, Alignment); } if (!MoreThanTwoValues) { @@ -1943,13 +1977,16 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, } // store the lo of the constructed double - based on integer input SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, - Op0Mapped, Lo, NULL, 0); + Op0Mapped, Lo, NULL, 0, + false, false, 0); // initial hi portion of constructed double SDValue InitialHi = DAG.getConstant(0x43300000u, MVT::i32); // store the hi of the constructed double - biased exponent - SDValue Store2=DAG.getStore(Store1, dl, InitialHi, Hi, NULL, 0); + SDValue Store2=DAG.getStore(Store1, dl, InitialHi, Hi, NULL, 0, + false, false, 0); // load the constructed double - SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, NULL, 0); + SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, NULL, 0, + false, false, 0); // FP constant to bias correct the final result SDValue Bias = DAG.getConstantFP(isSigned ? BitsToDouble(0x4330000080000000ULL) : @@ -2004,13 +2041,13 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, if (DestVT == MVT::f32) FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx, PseudoSourceValue::getConstantPool(), 0, - false, Alignment); + false, false, Alignment); else { FudgeInReg = LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, DAG.getEntryNode(), CPIdx, PseudoSourceValue::getConstantPool(), 0, - MVT::f32, false, Alignment)); + MVT::f32, false, false, Alignment)); } return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg); @@ -2350,16 +2387,19 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, EVT VT = Node->getValueType(0); Tmp1 = Node->getOperand(0); Tmp2 = Node->getOperand(1); - SDValue VAList = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0); + SDValue VAList = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0, + false, false, 0); // Increment the pointer, VAList, to the next vaarg Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList, DAG.getConstant(TLI.getTargetData()-> getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())), TLI.getPointerTy())); // Store the incremented VAList to the legalized pointer - Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Tmp2, V, 0); + Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Tmp2, V, 0, + false, false, 0); // Load the actual argument out of the pointer VAList - Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0)); + Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0, + false, false, 0)); Results.push_back(Results[0].getValue(1)); break; } @@ -2369,8 +2409,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue(); const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue(); Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0), - Node->getOperand(2), VS, 0); - Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), VD, 0); + Node->getOperand(2), VS, 0, false, false, 0); + Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), VD, 0, + false, false, 0); Results.push_back(Tmp1); break; } @@ -2767,7 +2808,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, DAG.getIntPtrConstant(1)); } else { // FIXME: We should be able to fall back to a libcall with an illegal - // type in some cases cases. + // type in some cases. // Also, we can fall back to a division in some cases, but that's a big // performance hit in the general case. llvm_unreachable("Don't know how to expand this operation yet!"); @@ -2816,15 +2857,19 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, SDValue Index = Node->getOperand(2); EVT PTy = TLI.getPointerTy(); - MachineFunction &MF = DAG.getMachineFunction(); - unsigned EntrySize = MF.getJumpTableInfo()->getEntrySize(); - Index= DAG.getNode(ISD::MUL, dl, PTy, + + const TargetData &TD = *TLI.getTargetData(); + unsigned EntrySize = + DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD); + + Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(EntrySize, PTy)); SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8); SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr, - PseudoSourceValue::getJumpTable(), 0, MemVT); + PseudoSourceValue::getJumpTable(), 0, MemVT, + false, false, 0); Addr = LD; if (TLI.getTargetMachine().getRelocationModel() == Reloc::PIC_) { // For PIC, the sequence is: diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 4f0fce7..35a7c7c 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -444,7 +444,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { NewL = DAG.getLoad(L->getAddressingMode(), dl, L->getExtensionType(), NVT, L->getChain(), L->getBasePtr(), L->getOffset(), L->getSrcValue(), L->getSrcValueOffset(), NVT, - L->isVolatile(), L->getAlignment()); + L->isVolatile(), L->isNonTemporal(), L->getAlignment()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); @@ -456,8 +456,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { L->getMemoryVT(), L->getChain(), L->getBasePtr(), L->getOffset(), L->getSrcValue(), L->getSrcValueOffset(), - L->getMemoryVT(), - L->isVolatile(), L->getAlignment()); + L->getMemoryVT(), L->isVolatile(), + L->isNonTemporal(), L->getAlignment()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); @@ -755,7 +755,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(), ST->getSrcValue(), ST->getSrcValueOffset(), - ST->isVolatile(), ST->getAlignment()); + ST->isVolatile(), ST->isNonTemporal(), + ST->getAlignment()); } @@ -1073,8 +1074,8 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo, Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr, LD->getSrcValue(), LD->getSrcValueOffset(), - LD->getMemoryVT(), - LD->isVolatile(), LD->getAlignment()); + LD->getMemoryVT(), LD->isVolatile(), + LD->isNonTemporal(), LD->getAlignment()); // Remember the chain. Chain = Hi.getValue(1); @@ -1382,6 +1383,6 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) { return DAG.getTruncStore(Chain, N->getDebugLoc(), Hi, Ptr, ST->getSrcValue(), ST->getSrcValueOffset(), - ST->getMemoryVT(), - ST->isVolatile(), ST->getAlignment()); + ST->getMemoryVT(), ST->isVolatile(), + ST->isNonTemporal(), ST->getAlignment()); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 9932cf4..e4d123f 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -359,7 +359,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) { SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(), N->getSrcValue(), N->getSrcValueOffset(), N->getMemoryVT(), N->isVolatile(), - N->getAlignment()); + N->isNonTemporal(), N->getAlignment()); // Legalized the chain result - switch anything that used the old chain to // use the new one. @@ -873,6 +873,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ int SVOffset = N->getSrcValueOffset(); unsigned Alignment = N->getAlignment(); bool isVolatile = N->isVolatile(); + bool isNonTemporal = N->isNonTemporal(); DebugLoc dl = N->getDebugLoc(); SDValue Val = GetPromotedInteger(N->getValue()); // Get promoted value. @@ -880,7 +881,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ // Truncate the value and store the result. return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getSrcValue(), SVOffset, N->getMemoryVT(), - isVolatile, Alignment); + isVolatile, isNonTemporal, Alignment); } SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) { @@ -1500,6 +1501,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, int SVOffset = N->getSrcValueOffset(); unsigned Alignment = N->getAlignment(); bool isVolatile = N->isVolatile(); + bool isNonTemporal = N->isNonTemporal(); DebugLoc dl = N->getDebugLoc(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); @@ -1508,7 +1510,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, EVT MemVT = N->getMemoryVT(); Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset, - MemVT, isVolatile, Alignment); + MemVT, isVolatile, isNonTemporal, Alignment); // Remember the chain. Ch = Lo.getValue(1); @@ -1530,7 +1532,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, } else if (TLI.isLittleEndian()) { // Little-endian - low bits are at low addresses. Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset, - isVolatile, Alignment); + isVolatile, isNonTemporal, Alignment); unsigned ExcessBits = N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); @@ -1542,7 +1544,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, DAG.getIntPtrConstant(IncrementSize)); Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset+IncrementSize, NEVT, - isVolatile, MinAlign(Alignment, IncrementSize)); + isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); // Build a factor node to remember that this load is independent of the // other one. @@ -1560,7 +1563,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset, EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits() - ExcessBits), - isVolatile, Alignment); + isVolatile, isNonTemporal, Alignment); // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, @@ -1569,7 +1572,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset+IncrementSize, EVT::getIntegerVT(*DAG.getContext(), ExcessBits), - isVolatile, MinAlign(Alignment, IncrementSize)); + isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); // Build a factor node to remember that this load is independent of the // other one. @@ -2212,6 +2216,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { int SVOffset = N->getSrcValueOffset(); unsigned Alignment = N->getAlignment(); bool isVolatile = N->isVolatile(); + bool isNonTemporal = N->isNonTemporal(); DebugLoc dl = N->getDebugLoc(); SDValue Lo, Hi; @@ -2220,13 +2225,14 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { if (N->getMemoryVT().bitsLE(NVT)) { GetExpandedInteger(N->getValue(), Lo, Hi); return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset, - N->getMemoryVT(), isVolatile, Alignment); + N->getMemoryVT(), isVolatile, isNonTemporal, + Alignment); } else if (TLI.isLittleEndian()) { // Little-endian - low bits are at low addresses. GetExpandedInteger(N->getValue(), Lo, Hi); Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset, - isVolatile, Alignment); + isVolatile, isNonTemporal, Alignment); unsigned ExcessBits = N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); @@ -2238,7 +2244,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { DAG.getIntPtrConstant(IncrementSize)); Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset+IncrementSize, NEVT, - isVolatile, MinAlign(Alignment, IncrementSize)); + isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } else { // Big-endian - high bits are at low addresses. Favor aligned stores at @@ -2264,7 +2271,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { // Store both the high bits and maybe some of the low bits. Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(), - SVOffset, HiVT, isVolatile, Alignment); + SVOffset, HiVT, isVolatile, isNonTemporal, + Alignment); // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, @@ -2273,7 +2281,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset+IncrementSize, EVT::getIntegerVT(*DAG.getContext(), ExcessBits), - isVolatile, MinAlign(Alignment, IncrementSize)); + isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } } @@ -2341,7 +2350,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { // FIXME: Avoid the extend by constructing the right constant pool? SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(), FudgePtr, NULL, 0, MVT::f32, - false, Alignment); + false, false, Alignment); return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 37f36a3..0d929f1 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -871,9 +871,10 @@ SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op, // the source and destination types. SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT); // Emit a store to the stack slot. - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, NULL, 0); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, NULL, 0, + false, false, 0); // Result is a load from the stack slot. - return DAG.getLoad(DestVT, dl, Store, StackPtr, NULL, 0); + return DAG.getLoad(DestVT, dl, Store, StackPtr, NULL, 0, false, false, 0); } /// CustomLowerNode - Replace the node's results with custom code provided diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index b5dbd41..b0af357 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -609,6 +609,7 @@ private: SDValue WidenVecRes_SIGN_EXTEND_INREG(SDNode* N); SDValue WidenVecRes_SELECT(SDNode* N); SDValue WidenVecRes_SELECT_CC(SDNode* N); + SDValue WidenVecRes_SETCC(SDNode* N); SDValue WidenVecRes_UNDEF(SDNode *N); SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N); SDValue WidenVecRes_VSETCC(SDNode* N); @@ -633,43 +634,33 @@ private: // Vector Widening Utilities Support: LegalizeVectorTypes.cpp //===--------------------------------------------------------------------===// - /// Helper genWidenVectorLoads - Helper function to generate a set of + /// Helper GenWidenVectorLoads - Helper function to generate a set of /// loads to load a vector with a resulting wider type. It takes - /// ExtType: Extension type - /// LdChain: list of chains for the load we have generated. - /// Chain: incoming chain for the ld vector. - /// BasePtr: base pointer to load from. - /// SV: memory disambiguation source value. - /// SVOffset: memory disambiugation offset. - /// Alignment: alignment of the memory. - /// isVolatile: volatile load. - /// LdWidth: width of memory that we want to load. - /// ResType: the wider result result type for the resulting vector. - /// dl: DebugLoc to be applied to new nodes - SDValue GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, SDValue Chain, - SDValue BasePtr, const Value *SV, - int SVOffset, unsigned Alignment, - bool isVolatile, unsigned LdWidth, - EVT ResType, DebugLoc dl); + /// LdChain: list of chains for the load to be generated. + /// Ld: load to widen + SDValue GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, + LoadSDNode *LD); + + /// GenWidenVectorExtLoads - Helper function to generate a set of extension + /// loads to load a ector with a resulting wider type. It takes + /// LdChain: list of chains for the load to be generated. + /// Ld: load to widen + /// ExtType: extension element type + SDValue GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain, + LoadSDNode *LD, ISD::LoadExtType ExtType); /// Helper genWidenVectorStores - Helper function to generate a set of /// stores to store a widen vector into non widen memory - /// It takes /// StChain: list of chains for the stores we have generated - /// Chain: incoming chain for the ld vector - /// BasePtr: base pointer to load from - /// SV: memory disambiguation source value - /// SVOffset: memory disambiugation offset - /// Alignment: alignment of the memory - /// isVolatile: volatile lod - /// ValOp: value to store - /// StWidth: width of memory that we want to store - /// dl: DebugLoc to be applied to new nodes - void GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, SDValue Chain, - SDValue BasePtr, const Value *SV, - int SVOffset, unsigned Alignment, - bool isVolatile, SDValue ValOp, - unsigned StWidth, DebugLoc dl); + /// ST: store of a widen value + void GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, StoreSDNode *ST); + + /// Helper genWidenVectorTruncStores - Helper function to generate a set of + /// stores to store a truncate widen vector into non widen memory + /// StChain: list of chains for the stores we have generated + /// ST: store of a widen value + void GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain, + StoreSDNode *ST); /// Modifies a vector input (widen or narrows) to a vector of NVT. The /// input vector must have the same element type as NVT. diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index a1b6ced..5e83b4b 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -122,10 +122,11 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo, const Value *SV = PseudoSourceValue::getFixedStack(SPFI); // Emit a store to the stack slot. - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, SV, 0); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, SV, 0, + false, false, 0); // Load the first half from the stack slot. - Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, 0); + Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, 0, false, false, 0); // Increment the pointer to the other half. unsigned IncrementSize = NOutVT.getSizeInBits() / 8; @@ -134,7 +135,7 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo, // Load the second half from the stack slot. Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, IncrementSize, false, - MinAlign(Alignment, IncrementSize)); + false, MinAlign(Alignment, IncrementSize)); // Handle endianness of the load. if (TLI.isBigEndian()) @@ -205,11 +206,12 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, int SVOffset = LD->getSrcValueOffset(); unsigned Alignment = LD->getAlignment(); bool isVolatile = LD->isVolatile(); + bool isNonTemporal = LD->isNonTemporal(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset, - isVolatile, Alignment); + isVolatile, isNonTemporal, Alignment); // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits() / 8; @@ -217,7 +219,8 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, DAG.getIntPtrConstant(IncrementSize)); Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset+IncrementSize, - isVolatile, MinAlign(Alignment, IncrementSize)); + isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); // Build a factor node to remember that this load is independent of the // other one. @@ -383,6 +386,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { int SVOffset = St->getSrcValueOffset(); unsigned Alignment = St->getAlignment(); bool isVolatile = St->isVolatile(); + bool isNonTemporal = St->isNonTemporal(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); unsigned IncrementSize = NVT.getSizeInBits() / 8; @@ -394,14 +398,15 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { std::swap(Lo, Hi); Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getSrcValue(), SVOffset, - isVolatile, Alignment); + isVolatile, isNonTemporal, Alignment); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!"); Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getSrcValue(), SVOffset + IncrementSize, - isVolatile, MinAlign(Alignment, IncrementSize)); + isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 808bac7..8363c3a 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -172,7 +172,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { DAG.getUNDEF(N->getBasePtr().getValueType()), N->getSrcValue(), N->getSrcValueOffset(), N->getMemoryVT().getVectorElementType(), - N->isVolatile(), N->getOriginalAlignment()); + N->isVolatile(), N->isNonTemporal(), + N->getOriginalAlignment()); // Legalized the chain result - switch anything that used the old chain to // use the new one. @@ -366,11 +367,13 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ N->getBasePtr(), N->getSrcValue(), N->getSrcValueOffset(), N->getMemoryVT().getVectorElementType(), - N->isVolatile(), N->getAlignment()); + N->isVolatile(), N->isNonTemporal(), + N->getAlignment()); return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), N->getBasePtr(), N->getSrcValue(), N->getSrcValueOffset(), - N->isVolatile(), N->getOriginalAlignment()); + N->isVolatile(), N->isNonTemporal(), + N->getOriginalAlignment()); } @@ -696,17 +699,20 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, EVT VecVT = Vec.getValueType(); EVT EltVT = VecVT.getVectorElementType(); SDValue StackPtr = DAG.CreateStackTemporary(VecVT); - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0, + false, false, 0); // Store the new element. This may be larger than the vector element type, // so use a truncating store. SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); unsigned Alignment = TLI.getTargetData()->getPrefTypeAlignment(VecVT.getTypeForEVT(*DAG.getContext())); - Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT); + Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT, + false, false, 0); // Load the Lo part from the stack slot. - Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, NULL, 0); + Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, NULL, 0, + false, false, 0); // Increment the pointer to the other part. unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8; @@ -715,7 +721,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, // Load the Hi part from the stack slot. Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, NULL, 0, false, - MinAlign(Alignment, IncrementSize)); + false, MinAlign(Alignment, IncrementSize)); } void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, @@ -743,19 +749,20 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, EVT MemoryVT = LD->getMemoryVT(); unsigned Alignment = LD->getOriginalAlignment(); bool isVolatile = LD->isVolatile(); + bool isNonTemporal = LD->isNonTemporal(); EVT LoMemVT, HiMemVT; GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT); Lo = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, LoVT, Ch, Ptr, Offset, - SV, SVOffset, LoMemVT, isVolatile, Alignment); + SV, SVOffset, LoMemVT, isVolatile, isNonTemporal, Alignment); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); SVOffset += IncrementSize; Hi = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, HiVT, Ch, Ptr, Offset, - SV, SVOffset, HiMemVT, isVolatile, Alignment); + SV, SVOffset, HiMemVT, isVolatile, isNonTemporal, Alignment); // Build a factor node to remember that this load is independent of the // other one. @@ -1086,12 +1093,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { SDValue StackPtr = DAG.CreateStackTemporary(VecVT); int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); const Value *SV = PseudoSourceValue::getFixedStack(SPFI); - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, SV, 0); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, SV, 0, + false, false, 0); // Load back the required element. StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, - SV, 0, EltVT); + SV, 0, EltVT, false, false, 0); } SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { @@ -1106,6 +1114,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { EVT MemoryVT = N->getMemoryVT(); unsigned Alignment = N->getOriginalAlignment(); bool isVol = N->isVolatile(); + bool isNT = N->isNonTemporal(); SDValue Lo, Hi; GetSplitVector(N->getOperand(1), Lo, Hi); @@ -1116,10 +1125,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { if (isTruncating) Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset, - LoMemVT, isVol, Alignment); + LoMemVT, isVol, isNT, Alignment); else Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset, - isVol, Alignment); + isVol, isNT, Alignment); // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, @@ -1128,10 +1137,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { if (isTruncating) Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset, - HiMemVT, isVol, Alignment); + HiMemVT, isVol, isNT, Alignment); else Hi = DAG.getStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset, - isVol, Alignment); + isVol, isNT, Alignment); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -1172,6 +1181,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break; case ISD::SELECT: Res = WidenVecRes_SELECT(N); break; case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break; + case ISD::SETCC: Res = WidenVecRes_SETCC(N); break; case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break; case ISD::VECTOR_SHUFFLE: Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N)); @@ -1241,10 +1251,96 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { // Binary op widening. + unsigned Opcode = N->getOpcode(); + DebugLoc dl = N->getDebugLoc(); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue InOp1 = GetWidenedVector(N->getOperand(0)); - SDValue InOp2 = GetWidenedVector(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp1, InOp2); + EVT WidenEltVT = WidenVT.getVectorElementType(); + EVT VT = WidenVT; + unsigned NumElts = VT.getVectorNumElements(); + while (!TLI.isTypeLegal(VT) && NumElts != 1) { + NumElts = NumElts / 2; + VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); + } + + if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) { + // Operation doesn't trap so just widen as normal. + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2); + } else if (NumElts == 1) { + // No legal vector version so unroll the vector operation and then widen. + return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); + } else { + // Since the operation can trap, apply operation on the original vector. + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + unsigned CurNumElts = N->getValueType(0).getVectorNumElements(); + + SmallVector<SDValue, 16> ConcatOps(CurNumElts); + unsigned ConcatEnd = 0; // Current ConcatOps index. + unsigned Idx = 0; // Current Idx into input vectors. + while (CurNumElts != 0) { + while (CurNumElts >= NumElts) { + SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1, + DAG.getIntPtrConstant(Idx)); + SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2, + DAG.getIntPtrConstant(Idx)); + ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2); + Idx += NumElts; + CurNumElts -= NumElts; + } + EVT PrevVecVT = VT; + do { + NumElts = NumElts / 2; + VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); + } while (!TLI.isTypeLegal(VT) && NumElts != 1); + + if (NumElts == 1) { + // Since we are using concat vector, build a vector from the scalar ops. + SDValue VecOp = DAG.getUNDEF(PrevVecVT); + for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) { + SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, + InOp1, DAG.getIntPtrConstant(Idx)); + SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, + InOp2, DAG.getIntPtrConstant(Idx)); + VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, PrevVecVT, VecOp, + DAG.getNode(Opcode, dl, WidenEltVT, EOp1, EOp2), + DAG.getIntPtrConstant(i)); + } + CurNumElts = 0; + ConcatOps[ConcatEnd++] = VecOp; + } + } + + // Check to see if we have a single operation with the widen type. + if (ConcatEnd == 1) { + VT = ConcatOps[0].getValueType(); + if (VT == WidenVT) + return ConcatOps[0]; + } + + // Rebuild vector to one with the widen type + Idx = ConcatEnd - 1; + while (Idx != 0) { + VT = ConcatOps[Idx--].getValueType(); + while (Idx != 0 && ConcatOps[Idx].getValueType() == VT) + --Idx; + if (Idx != 0) { + VT = ConcatOps[Idx].getValueType(); + ConcatOps[Idx+1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, + &ConcatOps[Idx+1], ConcatEnd - Idx - 1); + ConcatEnd = Idx + 2; + } + } + + unsigned NumOps = WidenVT.getVectorNumElements()/VT.getVectorNumElements(); + if (NumOps != ConcatEnd ) { + SDValue UndefVal = DAG.getUNDEF(VT); + for (unsigned j = ConcatEnd; j < NumOps; ++j) + ConcatOps[j] = UndefVal; + } + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], NumOps); + } } SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { @@ -1655,68 +1751,24 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { LoadSDNode *LD = cast<LoadSDNode>(N); - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); - EVT LdVT = LD->getMemoryVT(); - DebugLoc dl = N->getDebugLoc(); - assert(LdVT.isVector() && WidenVT.isVector()); - - // Load information - SDValue Chain = LD->getChain(); - SDValue BasePtr = LD->getBasePtr(); - int SVOffset = LD->getSrcValueOffset(); - unsigned Align = LD->getAlignment(); - bool isVolatile = LD->isVolatile(); - const Value *SV = LD->getSrcValue(); ISD::LoadExtType ExtType = LD->getExtensionType(); SDValue Result; SmallVector<SDValue, 16> LdChain; // Chain for the series of load - if (ExtType != ISD::NON_EXTLOAD) { - // For extension loads, we can not play the tricks of chopping legal - // vector types and bit cast it to the right type. Instead, we unroll - // the load and build a vector. - EVT EltVT = WidenVT.getVectorElementType(); - EVT LdEltVT = LdVT.getVectorElementType(); - unsigned NumElts = LdVT.getVectorNumElements(); - - // Load each element and widen - unsigned WidenNumElts = WidenVT.getVectorNumElements(); - SmallVector<SDValue, 16> Ops(WidenNumElts); - unsigned Increment = LdEltVT.getSizeInBits() / 8; - Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, SV, SVOffset, - LdEltVT, isVolatile, Align); - LdChain.push_back(Ops[0].getValue(1)); - unsigned i = 0, Offset = Increment; - for (i=1; i < NumElts; ++i, Offset += Increment) { - SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), - BasePtr, DAG.getIntPtrConstant(Offset)); - Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, SV, - SVOffset + Offset, LdEltVT, isVolatile, Align); - LdChain.push_back(Ops[i].getValue(1)); - } - - // Fill the rest with undefs - SDValue UndefVal = DAG.getUNDEF(EltVT); - for (; i != WidenNumElts; ++i) - Ops[i] = UndefVal; - - Result = DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], Ops.size()); - } else { - assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType()); - unsigned int LdWidth = LdVT.getSizeInBits(); - Result = GenWidenVectorLoads(LdChain, Chain, BasePtr, SV, SVOffset, - Align, isVolatile, LdWidth, WidenVT, dl); - } - - // If we generate a single load, we can use that for the chain. Otherwise, - // build a factor node to remember the multiple loads are independent and - // chain to that. - SDValue NewChain; - if (LdChain.size() == 1) - NewChain = LdChain[0]; - else - NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &LdChain[0], - LdChain.size()); + if (ExtType != ISD::NON_EXTLOAD) + Result = GenWidenVectorExtLoads(LdChain, LD, ExtType); + else + Result = GenWidenVectorLoads(LdChain, LD); + + // If we generate a single load, we can use that for the chain. Otherwise, + // build a factor node to remember the multiple loads are independent and + // chain to that. + SDValue NewChain; + if (LdChain.size() == 1) + NewChain = LdChain[0]; + else + NewChain = DAG.getNode(ISD::TokenFactor, LD->getDebugLoc(), MVT::Other, + &LdChain[0], LdChain.size()); // Modified the chain - switch anything that used the old chain to use // the new one. @@ -1762,6 +1814,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) { N->getOperand(1), InOp1, InOp2, N->getOperand(4)); } +SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + return DAG.getNode(ISD::SETCC, N->getDebugLoc(), WidenVT, + InOp1, InOp2, N->getOperand(2)); +} + SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); return DAG.getUNDEF(WidenVT); @@ -1954,57 +2014,17 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { // We have to widen the value but we want only to store the original // vector type. StoreSDNode *ST = cast<StoreSDNode>(N); - SDValue Chain = ST->getChain(); - SDValue BasePtr = ST->getBasePtr(); - const Value *SV = ST->getSrcValue(); - int SVOffset = ST->getSrcValueOffset(); - unsigned Align = ST->getAlignment(); - bool isVolatile = ST->isVolatile(); - SDValue ValOp = GetWidenedVector(ST->getValue()); - DebugLoc dl = N->getDebugLoc(); - - EVT StVT = ST->getMemoryVT(); - EVT ValVT = ValOp.getValueType(); - // It must be true that we the widen vector type is bigger than where - // we need to store. - assert(StVT.isVector() && ValOp.getValueType().isVector()); - assert(StVT.bitsLT(ValOp.getValueType())); SmallVector<SDValue, 16> StChain; - if (ST->isTruncatingStore()) { - // For truncating stores, we can not play the tricks of chopping legal - // vector types and bit cast it to the right type. Instead, we unroll - // the store. - EVT StEltVT = StVT.getVectorElementType(); - EVT ValEltVT = ValVT.getVectorElementType(); - unsigned Increment = ValEltVT.getSizeInBits() / 8; - unsigned NumElts = StVT.getVectorNumElements(); - SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, - DAG.getIntPtrConstant(0)); - StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, SV, - SVOffset, StEltVT, - isVolatile, Align)); - unsigned Offset = Increment; - for (unsigned i=1; i < NumElts; ++i, Offset += Increment) { - SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), - BasePtr, DAG.getIntPtrConstant(Offset)); - SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, - DAG.getIntPtrConstant(0)); - StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, SV, - SVOffset + Offset, StEltVT, - isVolatile, MinAlign(Align, Offset))); - } - } - else { - assert(StVT.getVectorElementType() == ValVT.getVectorElementType()); - // Store value - GenWidenVectorStores(StChain, Chain, BasePtr, SV, SVOffset, - Align, isVolatile, ValOp, StVT.getSizeInBits(), dl); - } + if (ST->isTruncatingStore()) + GenWidenVectorTruncStores(StChain, ST); + else + GenWidenVectorStores(StChain, ST); + if (StChain.size() == 1) return StChain[0]; else - return DAG.getNode(ISD::TokenFactor, dl, + return DAG.getNode(ISD::TokenFactor, ST->getDebugLoc(), MVT::Other,&StChain[0],StChain.size()); } @@ -2012,179 +2032,390 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { // Vector Widening Utilities //===----------------------------------------------------------------------===// +// Utility function to find the type to chop up a widen vector for load/store +// TLI: Target lowering used to determine legal types. +// Width: Width left need to load/store. +// WidenVT: The widen vector type to load to/store from +// Align: If 0, don't allow use of a wider type +// WidenEx: If Align is not 0, the amount additional we can load/store from. + +static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, + unsigned Width, EVT WidenVT, + unsigned Align = 0, unsigned WidenEx = 0) { + EVT WidenEltVT = WidenVT.getVectorElementType(); + unsigned WidenWidth = WidenVT.getSizeInBits(); + unsigned WidenEltWidth = WidenEltVT.getSizeInBits(); + unsigned AlignInBits = Align*8; + + // If we have one element to load/store, return it. + EVT RetVT = WidenEltVT; + if (Width == WidenEltWidth) + return RetVT; + + // See if there is larger legal integer than the element type to load/store + unsigned VT; + for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE; + VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) { + EVT MemVT((MVT::SimpleValueType) VT); + unsigned MemVTWidth = MemVT.getSizeInBits(); + if (MemVT.getSizeInBits() <= WidenEltWidth) + break; + if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 && + (MemVTWidth <= Width || + (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { + RetVT = MemVT; + break; + } + } -// Utility function to find a vector type and its associated element -// type from a preferred width and whose vector type must be the same size -// as the VecVT. -// TLI: Target lowering used to determine legal types. -// Width: Preferred width to store. -// VecVT: Vector value type whose size we must match. -// Returns NewVecVT and NewEltVT - the vector type and its associated -// element type. -static void FindAssocWidenVecType(SelectionDAG& DAG, - const TargetLowering &TLI, unsigned Width, - EVT VecVT, - EVT& NewEltVT, EVT& NewVecVT) { - unsigned EltWidth = Width + 1; - if (TLI.isTypeLegal(VecVT)) { - // We start with the preferred with, making it a power of 2 and find a - // legal vector type of that width. If not, we reduce it by another of 2. - // For incoming type is legal, this process will end as a vector of the - // smallest loadable type should always be legal. - do { - assert(EltWidth > 0); - EltWidth = 1 << Log2_32(EltWidth - 1); - NewEltVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth); - unsigned NumElts = VecVT.getSizeInBits() / EltWidth; - NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, NumElts); - } while (!TLI.isTypeLegal(NewVecVT) || - VecVT.getSizeInBits() != NewVecVT.getSizeInBits()); - } else { - // The incoming vector type is illegal and is the result of widening - // a vector to a power of 2. In this case, we will use the preferred - // with as long as it is a multiple of the incoming vector length. - // The legalization process will eventually make this into a legal type - // and remove the illegal bit converts (which would turn to stack converts - // if they are allow to exist). - do { - assert(EltWidth > 0); - EltWidth = 1 << Log2_32(EltWidth - 1); - NewEltVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth); - unsigned NumElts = VecVT.getSizeInBits() / EltWidth; - NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, NumElts); - } while (!TLI.isTypeLegal(NewEltVT) || - VecVT.getSizeInBits() != NewVecVT.getSizeInBits()); + // See if there is a larger vector type to load/store that has the same vector + // element type and is evenly divisible with the WidenVT. + for (VT = (unsigned)MVT::LAST_VECTOR_VALUETYPE; + VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) { + EVT MemVT = (MVT::SimpleValueType) VT; + unsigned MemVTWidth = MemVT.getSizeInBits(); + if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() && + (WidenWidth % MemVTWidth) == 0 && + (MemVTWidth <= Width || + (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { + if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT) + return MemVT; + } } + + return RetVT; +} + +// Builds a vector type from scalar loads +// VecTy: Resulting Vector type +// LDOps: Load operators to build a vector type +// [Start,End) the list of loads to use. +static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, + SmallVector<SDValue, 16>& LdOps, + unsigned Start, unsigned End) { + DebugLoc dl = LdOps[Start].getDebugLoc(); + EVT LdTy = LdOps[Start].getValueType(); + unsigned Width = VecTy.getSizeInBits(); + unsigned NumElts = Width / LdTy.getSizeInBits(); + EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), LdTy, NumElts); + + unsigned Idx = 1; + SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT,LdOps[Start]); + + for (unsigned i = Start + 1; i != End; ++i) { + EVT NewLdTy = LdOps[i].getValueType(); + if (NewLdTy != LdTy) { + NumElts = Width / NewLdTy.getSizeInBits(); + NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts); + VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp); + // Readjust position and vector position based on new load type + Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits(); + LdTy = NewLdTy; + } + VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i], + DAG.getIntPtrConstant(Idx++)); + } + return DAG.getNode(ISD::BIT_CONVERT, dl, VecTy, VecOp); } SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, - SDValue Chain, - SDValue BasePtr, - const Value *SV, - int SVOffset, - unsigned Alignment, - bool isVolatile, - unsigned LdWidth, - EVT ResType, - DebugLoc dl) { + LoadSDNode * LD) { // The strategy assumes that we can efficiently load powers of two widths. - // The routines chops the vector into the largest power of 2 load and - // can be inserted into a legal vector and then cast the result into the - // vector type we want. This avoids unnecessary stack converts. + // The routines chops the vector into the largest vector loads with the same + // element type or scalar loads and then recombines it to the widen vector + // type. + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); + unsigned WidenWidth = WidenVT.getSizeInBits(); + EVT LdVT = LD->getMemoryVT(); + DebugLoc dl = LD->getDebugLoc(); + assert(LdVT.isVector() && WidenVT.isVector()); + assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType()); - // TODO: If the Ldwidth is legal, alignment is the same as the LdWidth, and - // the load is nonvolatile, we an use a wider load for the value. + // Load information + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + int SVOffset = LD->getSrcValueOffset(); + unsigned Align = LD->getAlignment(); + bool isVolatile = LD->isVolatile(); + bool isNonTemporal = LD->isNonTemporal(); + const Value *SV = LD->getSrcValue(); + + int LdWidth = LdVT.getSizeInBits(); + int WidthDiff = WidenWidth - LdWidth; // Difference + unsigned LdAlign = (isVolatile) ? 0 : Align; // Allow wider loads // Find the vector type that can load from. - EVT NewEltVT, NewVecVT; - unsigned NewEltVTWidth; - FindAssocWidenVecType(DAG, TLI, LdWidth, ResType, NewEltVT, NewVecVT); - NewEltVTWidth = NewEltVT.getSizeInBits(); - - SDValue LdOp = DAG.getLoad(NewEltVT, dl, Chain, BasePtr, SV, SVOffset, - isVolatile, Alignment); - SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); + EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); + int NewVTWidth = NewVT.getSizeInBits(); + SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV, SVOffset, + isVolatile, isNonTemporal, Align); LdChain.push_back(LdOp.getValue(1)); // Check if we can load the element with one instruction - if (LdWidth == NewEltVTWidth) { - return DAG.getNode(ISD::BIT_CONVERT, dl, ResType, VecOp); + if (LdWidth <= NewVTWidth) { + if (NewVT.isVector()) { + if (NewVT != WidenVT) { + assert(WidenWidth % NewVTWidth == 0); + unsigned NumConcat = WidenWidth / NewVTWidth; + SmallVector<SDValue, 16> ConcatOps(NumConcat); + SDValue UndefVal = DAG.getUNDEF(NewVT); + ConcatOps[0] = LdOp; + for (unsigned i = 1; i != NumConcat; ++i) + ConcatOps[i] = UndefVal; + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], + NumConcat); + } else + return LdOp; + } else { + unsigned NumElts = WidenWidth / LdWidth; + EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts); + SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); + return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, VecOp); + } } - unsigned Idx = 1; - LdWidth -= NewEltVTWidth; + // Load vector by using multiple loads from largest vector to scalar + SmallVector<SDValue, 16> LdOps; + LdOps.push_back(LdOp); + + LdWidth -= NewVTWidth; unsigned Offset = 0; while (LdWidth > 0) { - unsigned Increment = NewEltVTWidth / 8; + unsigned Increment = NewVTWidth / 8; Offset += Increment; BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, DAG.getIntPtrConstant(Increment)); - if (LdWidth < NewEltVTWidth) { - // Our current type we are using is too large, use a smaller size by - // using a smaller power of 2 - unsigned oNewEltVTWidth = NewEltVTWidth; - FindAssocWidenVecType(DAG, TLI, LdWidth, ResType, NewEltVT, NewVecVT); - NewEltVTWidth = NewEltVT.getSizeInBits(); - // Readjust position and vector position based on new load type - Idx = Idx * (oNewEltVTWidth/NewEltVTWidth); - VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp); + if (LdWidth < NewVTWidth) { + // Our current type we are using is too large, find a better size + NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); + NewVTWidth = NewVT.getSizeInBits(); } - SDValue LdOp = DAG.getLoad(NewEltVT, dl, Chain, BasePtr, SV, - SVOffset+Offset, isVolatile, - MinAlign(Alignment, Offset)); + SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV, + SVOffset+Offset, isVolatile, + isNonTemporal, MinAlign(Align, Increment)); LdChain.push_back(LdOp.getValue(1)); - VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOp, - DAG.getIntPtrConstant(Idx++)); + LdOps.push_back(LdOp); - LdWidth -= NewEltVTWidth; + LdWidth -= NewVTWidth; } - return DAG.getNode(ISD::BIT_CONVERT, dl, ResType, VecOp); -} + // Build the vector from the loads operations + unsigned End = LdOps.size(); + if (LdOps[0].getValueType().isVector()) { + // If the load contains vectors, build the vector using concat vector. + // All of the vectors used to loads are power of 2 and the scalars load + // can be combined to make a power of 2 vector. + SmallVector<SDValue, 16> ConcatOps(End); + int i = End - 1; + int Idx = End; + EVT LdTy = LdOps[i].getValueType(); + // First combine the scalar loads to a vector + if (!LdTy.isVector()) { + for (--i; i >= 0; --i) { + LdTy = LdOps[i].getValueType(); + if (LdTy.isVector()) + break; + } + ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i+1, End); + } + ConcatOps[--Idx] = LdOps[i]; + for (--i; i >= 0; --i) { + EVT NewLdTy = LdOps[i].getValueType(); + if (NewLdTy != LdTy) { + // Create a larger vector + ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy, + &ConcatOps[Idx], End - Idx); + Idx = End - 1; + LdTy = NewLdTy; + } + ConcatOps[--Idx] = LdOps[i]; + } -void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, - SDValue Chain, - SDValue BasePtr, - const Value *SV, - int SVOffset, - unsigned Alignment, - bool isVolatile, - SDValue ValOp, - unsigned StWidth, - DebugLoc dl) { - // Breaks the stores into a series of power of 2 width stores. For any - // width, we convert the vector to the vector of element size that we - // want to store. This avoids requiring a stack convert. - - // Find a width of the element type we can store with - EVT WidenVT = ValOp.getValueType(); - EVT NewEltVT, NewVecVT; - - FindAssocWidenVecType(DAG, TLI, StWidth, WidenVT, NewEltVT, NewVecVT); - unsigned NewEltVTWidth = NewEltVT.getSizeInBits(); - - SDValue VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, ValOp); - SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewEltVT, VecOp, - DAG.getIntPtrConstant(0)); - SDValue StOp = DAG.getStore(Chain, dl, EOp, BasePtr, SV, SVOffset, - isVolatile, Alignment); - StChain.push_back(StOp); + if (WidenWidth != LdTy.getSizeInBits()*(End - Idx)) { + // We need to fill the rest with undefs to build the vector + unsigned NumOps = WidenWidth / LdTy.getSizeInBits(); + SmallVector<SDValue, 16> WidenOps(NumOps); + SDValue UndefVal = DAG.getUNDEF(LdTy); + unsigned i = 0; + for (; i != End-Idx; ++i) + WidenOps[i] = ConcatOps[Idx+i]; + for (; i != NumOps; ++i) + WidenOps[i] = UndefVal; + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &WidenOps[0],NumOps); + } else + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, + &ConcatOps[Idx], End - Idx); + } else // All the loads are scalar loads. + return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End); +} + +SDValue +DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain, + LoadSDNode * LD, + ISD::LoadExtType ExtType) { + // For extension loads, it may not be more efficient to chop up the vector + // and then extended it. Instead, we unroll the load and build a new vector. + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0)); + EVT LdVT = LD->getMemoryVT(); + DebugLoc dl = LD->getDebugLoc(); + assert(LdVT.isVector() && WidenVT.isVector()); - // Check if we are done - if (StWidth == NewEltVTWidth) { - return; + // Load information + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + int SVOffset = LD->getSrcValueOffset(); + unsigned Align = LD->getAlignment(); + bool isVolatile = LD->isVolatile(); + bool isNonTemporal = LD->isNonTemporal(); + const Value *SV = LD->getSrcValue(); + + EVT EltVT = WidenVT.getVectorElementType(); + EVT LdEltVT = LdVT.getVectorElementType(); + unsigned NumElts = LdVT.getVectorNumElements(); + + // Load each element and widen + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + SmallVector<SDValue, 16> Ops(WidenNumElts); + unsigned Increment = LdEltVT.getSizeInBits() / 8; + Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, SV, SVOffset, + LdEltVT, isVolatile, isNonTemporal, Align); + LdChain.push_back(Ops[0].getValue(1)); + unsigned i = 0, Offset = Increment; + for (i=1; i < NumElts; ++i, Offset += Increment) { + SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), + BasePtr, DAG.getIntPtrConstant(Offset)); + Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, SV, + SVOffset + Offset, LdEltVT, isVolatile, + isNonTemporal, Align); + LdChain.push_back(Ops[i].getValue(1)); } - unsigned Idx = 1; - StWidth -= NewEltVTWidth; - unsigned Offset = 0; + // Fill the rest with undefs + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; i != WidenNumElts; ++i) + Ops[i] = UndefVal; - while (StWidth > 0) { - unsigned Increment = NewEltVTWidth / 8; - Offset += Increment; - BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getIntPtrConstant(Increment)); + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], Ops.size()); +} - if (StWidth < NewEltVTWidth) { - // Our current type we are using is too large, use a smaller size by - // using a smaller power of 2 - unsigned oNewEltVTWidth = NewEltVTWidth; - FindAssocWidenVecType(DAG, TLI, StWidth, WidenVT, NewEltVT, NewVecVT); - NewEltVTWidth = NewEltVT.getSizeInBits(); - // Readjust position and vector position based on new load type - Idx = Idx * (oNewEltVTWidth/NewEltVTWidth); - VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp); - } - EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewEltVT, VecOp, +void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, + StoreSDNode *ST) { + // The strategy assumes that we can efficiently store powers of two widths. + // The routines chops the vector into the largest vector stores with the same + // element type or scalar stores. + SDValue Chain = ST->getChain(); + SDValue BasePtr = ST->getBasePtr(); + const Value *SV = ST->getSrcValue(); + int SVOffset = ST->getSrcValueOffset(); + unsigned Align = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); + SDValue ValOp = GetWidenedVector(ST->getValue()); + DebugLoc dl = ST->getDebugLoc(); + + EVT StVT = ST->getMemoryVT(); + unsigned StWidth = StVT.getSizeInBits(); + EVT ValVT = ValOp.getValueType(); + unsigned ValWidth = ValVT.getSizeInBits(); + EVT ValEltVT = ValVT.getVectorElementType(); + unsigned ValEltWidth = ValEltVT.getSizeInBits(); + assert(StVT.getVectorElementType() == ValEltVT); + + int Idx = 0; // current index to store + unsigned Offset = 0; // offset from base to store + while (StWidth != 0) { + // Find the largest vector type we can store with + EVT NewVT = FindMemType(DAG, TLI, StWidth, ValVT); + unsigned NewVTWidth = NewVT.getSizeInBits(); + unsigned Increment = NewVTWidth / 8; + if (NewVT.isVector()) { + unsigned NumVTElts = NewVT.getVectorNumElements(); + do { + SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp, + DAG.getIntPtrConstant(Idx)); + StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV, + SVOffset + Offset, isVolatile, + isNonTemporal, + MinAlign(Align, Offset))); + StWidth -= NewVTWidth; + Offset += Increment; + Idx += NumVTElts; + BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, + DAG.getIntPtrConstant(Increment)); + } while (StWidth != 0 && StWidth >= NewVTWidth); + } else { + // Cast the vector to the scalar type we can store + unsigned NumElts = ValWidth / NewVTWidth; + EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts); + SDValue VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, ValOp); + // Readjust index position based on new vector type + Idx = Idx * ValEltWidth / NewVTWidth; + do { + SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp, DAG.getIntPtrConstant(Idx++)); - StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV, - SVOffset + Offset, isVolatile, - MinAlign(Alignment, Offset))); - StWidth -= NewEltVTWidth; + StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV, + SVOffset + Offset, isVolatile, + isNonTemporal, MinAlign(Align, Offset))); + StWidth -= NewVTWidth; + Offset += Increment; + BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, + DAG.getIntPtrConstant(Increment)); + } while (StWidth != 0 && StWidth >= NewVTWidth); + // Restore index back to be relative to the original widen element type + Idx = Idx * NewVTWidth / ValEltWidth; + } + } +} + +void +DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain, + StoreSDNode *ST) { + // For extension loads, it may not be more efficient to truncate the vector + // and then store it. Instead, we extract each element and then store it. + SDValue Chain = ST->getChain(); + SDValue BasePtr = ST->getBasePtr(); + const Value *SV = ST->getSrcValue(); + int SVOffset = ST->getSrcValueOffset(); + unsigned Align = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); + SDValue ValOp = GetWidenedVector(ST->getValue()); + DebugLoc dl = ST->getDebugLoc(); + + EVT StVT = ST->getMemoryVT(); + EVT ValVT = ValOp.getValueType(); + + // It must be true that we the widen vector type is bigger than where + // we need to store. + assert(StVT.isVector() && ValOp.getValueType().isVector()); + assert(StVT.bitsLT(ValOp.getValueType())); + + // For truncating stores, we can not play the tricks of chopping legal + // vector types and bit cast it to the right type. Instead, we unroll + // the store. + EVT StEltVT = StVT.getVectorElementType(); + EVT ValEltVT = ValVT.getVectorElementType(); + unsigned Increment = ValEltVT.getSizeInBits() / 8; + unsigned NumElts = StVT.getVectorNumElements(); + SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, + DAG.getIntPtrConstant(0)); + StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, SV, + SVOffset, StEltVT, + isVolatile, isNonTemporal, Align)); + unsigned Offset = Increment; + for (unsigned i=1; i < NumElts; ++i, Offset += Increment) { + SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), + BasePtr, DAG.getIntPtrConstant(Offset)); + SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, + DAG.getIntPtrConstant(0)); + StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, SV, + SVOffset + Offset, StEltVT, + isVolatile, isNonTemporal, + MinAlign(Align, Offset))); } } diff --git a/lib/CodeGen/SelectionDAG/Makefile b/lib/CodeGen/SelectionDAG/Makefile index 4706e68..ea716fd 100644 --- a/lib/CodeGen/SelectionDAG/Makefile +++ b/lib/CodeGen/SelectionDAG/Makefile @@ -9,6 +9,5 @@ LEVEL = ../../.. LIBRARYNAME = LLVMSelectionDAG -CXXFLAGS = -fno-rtti include $(LEVEL)/Makefile.common diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index dea5993..3f1766d 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -345,6 +345,15 @@ void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle, ++NumBacktracks; } +static bool isOperandOf(const SUnit *SU, SDNode *N) { + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getFlaggedNode()) { + if (SUNode->isOperandOf(N)) + return true; + } + return false; +} + /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled /// successors to the newly created node. SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { @@ -427,8 +436,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { I != E; ++I) { if (I->isCtrl()) ChainPreds.push_back(*I); - else if (I->getSUnit()->getNode() && - I->getSUnit()->getNode()->isOperandOf(LoadNode)) + else if (isOperandOf(I->getSUnit(), LoadNode)) LoadPreds.push_back(*I); else NodePreds.push_back(*I); @@ -1034,9 +1042,9 @@ namespace { // CopyToReg should be close to its uses to facilitate coalescing and // avoid spilling. return 0; - if (Opc == TargetInstrInfo::EXTRACT_SUBREG || - Opc == TargetInstrInfo::SUBREG_TO_REG || - Opc == TargetInstrInfo::INSERT_SUBREG) + if (Opc == TargetOpcode::EXTRACT_SUBREG || + Opc == TargetOpcode::SUBREG_TO_REG || + Opc == TargetOpcode::INSERT_SUBREG) // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be // close to their uses to facilitate coalescing. return 0; @@ -1437,7 +1445,7 @@ void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { while (SuccSU->Succs.size() == 1 && SuccSU->getNode()->isMachineOpcode() && SuccSU->getNode()->getMachineOpcode() == - TargetInstrInfo::COPY_TO_REGCLASS) + TargetOpcode::COPY_TO_REGCLASS) SuccSU = SuccSU->Succs.front().getSUnit(); // Don't constrain non-instruction nodes. if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) @@ -1451,9 +1459,9 @@ void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG; // these may be coalesced away. We want them close to their uses. unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); - if (SuccOpc == TargetInstrInfo::EXTRACT_SUBREG || - SuccOpc == TargetInstrInfo::INSERT_SUBREG || - SuccOpc == TargetInstrInfo::SUBREG_TO_REG) + if (SuccOpc == TargetOpcode::EXTRACT_SUBREG || + SuccOpc == TargetOpcode::INSERT_SUBREG || + SuccOpc == TargetOpcode::SUBREG_TO_REG) continue; if ((!canClobber(SuccSU, DUSU) || (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) || diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index f1b6f1e..43cf37e 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -829,6 +829,7 @@ void SelectionDAG::clear() { EntryNode.UseList = 0; AllNodes.push_back(&EntryNode); Root = getEntryNode(); + delete Ordering; Ordering = new SDNodeOrdering(); } @@ -1925,19 +1926,28 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, } case ISD::SREM: if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { - const APInt &RA = Rem->getAPIntValue(); - if (RA.isPowerOf2() || (-RA).isPowerOf2()) { - APInt LowBits = RA.isStrictlyPositive() ? (RA - 1) : ~RA; + const APInt &RA = Rem->getAPIntValue().abs(); + if (RA.isPowerOf2()) { + APInt LowBits = RA - 1; APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); ComputeMaskedBits(Op.getOperand(0), Mask2,KnownZero2,KnownOne2,Depth+1); - // If the sign bit of the first operand is zero, the sign bit of - // the result is zero. If the first operand has no one bits below - // the second operand's single 1 bit, its sign will be zero. + // The low bits of the first operand are unchanged by the srem. + KnownZero = KnownZero2 & LowBits; + KnownOne = KnownOne2 & LowBits; + + // If the first operand is non-negative or has all low bits zero, then + // the upper bits are all zero. if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits)) - KnownZero2 |= ~LowBits; + KnownZero |= ~LowBits; - KnownZero |= KnownZero2 & Mask; + // If the first operand is negative and not all low bits are zero, then + // the upper bits are all one. + if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0)) + KnownOne |= ~LowBits; + + KnownZero &= Mask; + KnownOne &= Mask; assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); } @@ -2755,13 +2765,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, // EXTRACT_VECTOR_ELT of INSERT_VECTOR_ELT is often formed when vector // operations are lowered to scalars. if (N1.getOpcode() == ISD::INSERT_VECTOR_ELT) { - // If the indices are the same, return the inserted element. - if (N1.getOperand(2) == N2) - return N1.getOperand(1); - // If the indices are known different, extract the element from + // If the indices are the same, return the inserted element else + // if the indices are known different, extract the element from // the original vector. - else if (isa<ConstantSDNode>(N1.getOperand(2)) && - isa<ConstantSDNode>(N2)) + if (N1.getOperand(2) == N2) { + if (VT == N1.getOperand(1).getValueType()) + return N1.getOperand(1); + else + return getSExtOrTrunc(N1.getOperand(1), DL, VT); + } else if (isa<ConstantSDNode>(N1.getOperand(2)) && + isa<ConstantSDNode>(N2)) return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2); } break; @@ -3287,7 +3300,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff); Store = DAG.getStore(Chain, dl, Value, getMemBasePlusOffset(Dst, DstOff, DAG), - DstSV, DstSVOff + DstOff, false, DstAlign); + DstSV, DstSVOff + DstOff, false, false, DstAlign); } else { // The type might not be legal for the target. This should only happen // if the type is smaller than a legal type, as on PPC, so the right @@ -3298,10 +3311,11 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, assert(NVT.bitsGE(VT)); Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain, getMemBasePlusOffset(Src, SrcOff, DAG), - SrcSV, SrcSVOff + SrcOff, VT, false, Align); + SrcSV, SrcSVOff + SrcOff, VT, false, false, Align); Store = DAG.getTruncStore(Chain, dl, Value, - getMemBasePlusOffset(Dst, DstOff, DAG), - DstSV, DstSVOff + DstOff, VT, false, DstAlign); + getMemBasePlusOffset(Dst, DstOff, DAG), + DstSV, DstSVOff + DstOff, VT, false, false, + DstAlign); } OutChains.push_back(Store); SrcOff += VTSize; @@ -3346,7 +3360,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, Value = DAG.getLoad(VT, dl, Chain, getMemBasePlusOffset(Src, SrcOff, DAG), - SrcSV, SrcSVOff + SrcOff, false, Align); + SrcSV, SrcSVOff + SrcOff, false, false, Align); LoadValues.push_back(Value); LoadChains.push_back(Value.getValue(1)); SrcOff += VTSize; @@ -3361,7 +3375,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, Store = DAG.getStore(Chain, dl, LoadValues[i], getMemBasePlusOffset(Dst, DstOff, DAG), - DstSV, DstSVOff + DstOff, false, DstAlign); + DstSV, DstSVOff + DstOff, false, false, DstAlign); OutChains.push_back(Store); DstOff += VTSize; } @@ -3396,7 +3410,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, SDValue Value = getMemsetValue(Src, VT, DAG, dl); SDValue Store = DAG.getStore(Chain, dl, Value, getMemBasePlusOffset(Dst, DstOff, DAG), - DstSV, DstSVOff + DstOff); + DstSV, DstSVOff + DstOff, false, false, 0); OutChains.push_back(Store); DstOff += VTSize; } @@ -3776,7 +3790,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl, ISD::LoadExtType ExtType, EVT VT, SDValue Chain, SDValue Ptr, SDValue Offset, const Value *SV, int SVOffset, EVT MemVT, - bool isVolatile, unsigned Alignment) { + bool isVolatile, bool isNonTemporal, + unsigned Alignment) { if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(VT); @@ -3790,6 +3805,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl, unsigned Flags = MachineMemOperand::MOLoad; if (isVolatile) Flags |= MachineMemOperand::MOVolatile; + if (isNonTemporal) + Flags |= MachineMemOperand::MONonTemporal; MachineMemOperand *MMO = MF.getMachineMemOperand(SV, Flags, SVOffset, MemVT.getStoreSize(), Alignment); @@ -3844,20 +3861,22 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl, SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, const Value *SV, int SVOffset, - bool isVolatile, unsigned Alignment) { + bool isVolatile, bool isNonTemporal, + unsigned Alignment) { SDValue Undef = getUNDEF(Ptr.getValueType()); return getLoad(ISD::UNINDEXED, dl, ISD::NON_EXTLOAD, VT, Chain, Ptr, Undef, - SV, SVOffset, VT, isVolatile, Alignment); + SV, SVOffset, VT, isVolatile, isNonTemporal, Alignment); } SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT, SDValue Chain, SDValue Ptr, const Value *SV, int SVOffset, EVT MemVT, - bool isVolatile, unsigned Alignment) { + bool isVolatile, bool isNonTemporal, + unsigned Alignment) { SDValue Undef = getUNDEF(Ptr.getValueType()); return getLoad(ISD::UNINDEXED, dl, ExtType, VT, Chain, Ptr, Undef, - SV, SVOffset, MemVT, isVolatile, Alignment); + SV, SVOffset, MemVT, isVolatile, isNonTemporal, Alignment); } SDValue @@ -3869,12 +3888,13 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base, return getLoad(AM, dl, LD->getExtensionType(), OrigLoad.getValueType(), LD->getChain(), Base, Offset, LD->getSrcValue(), LD->getSrcValueOffset(), LD->getMemoryVT(), - LD->isVolatile(), LD->getAlignment()); + LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); } SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr, const Value *SV, int SVOffset, - bool isVolatile, unsigned Alignment) { + bool isVolatile, bool isNonTemporal, + unsigned Alignment) { if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(Val.getValueType()); @@ -3888,6 +3908,8 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, unsigned Flags = MachineMemOperand::MOStore; if (isVolatile) Flags |= MachineMemOperand::MOVolatile; + if (isNonTemporal) + Flags |= MachineMemOperand::MONonTemporal; MachineMemOperand *MMO = MF.getMachineMemOperand(SV, Flags, SVOffset, Val.getValueType().getStoreSize(), Alignment); @@ -3920,7 +3942,8 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr, const Value *SV, int SVOffset, EVT SVT, - bool isVolatile, unsigned Alignment) { + bool isVolatile, bool isNonTemporal, + unsigned Alignment) { if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(SVT); @@ -3934,6 +3957,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, unsigned Flags = MachineMemOperand::MOStore; if (isVolatile) Flags |= MachineMemOperand::MOVolatile; + if (isNonTemporal) + Flags |= MachineMemOperand::MONonTemporal; MachineMemOperand *MMO = MF.getMachineMemOperand(SV, Flags, SVOffset, SVT.getStoreSize(), Alignment); @@ -4860,23 +4885,23 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs, } /// getTargetExtractSubreg - A convenience function for creating -/// TargetInstrInfo::EXTRACT_SUBREG nodes. +/// TargetOpcode::EXTRACT_SUBREG nodes. SDValue SelectionDAG::getTargetExtractSubreg(int SRIdx, DebugLoc DL, EVT VT, SDValue Operand) { SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32); - SDNode *Subreg = getMachineNode(TargetInstrInfo::EXTRACT_SUBREG, DL, + SDNode *Subreg = getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, Operand, SRIdxVal); return SDValue(Subreg, 0); } /// getTargetInsertSubreg - A convenience function for creating -/// TargetInstrInfo::INSERT_SUBREG nodes. +/// TargetOpcode::INSERT_SUBREG nodes. SDValue SelectionDAG::getTargetInsertSubreg(int SRIdx, DebugLoc DL, EVT VT, SDValue Operand, SDValue Subreg) { SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32); - SDNode *Result = getMachineNode(TargetInstrInfo::INSERT_SUBREG, DL, + SDNode *Result = getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT, Operand, Subreg, SRIdxVal); return SDValue(Result, 0); } @@ -5212,11 +5237,12 @@ unsigned SelectionDAG::AssignTopologicalOrder() { } } if (I == SortedPos) { - allnodes_iterator J = I; - SDNode *S = ++J; - dbgs() << "Offending node:\n"; +#ifndef NDEBUG + SDNode *S = ++I; + dbgs() << "Overran sorted position:\n"; S->dumprFull(); - assert(0 && "Overran sorted position"); +#endif + llvm_unreachable(0); } } @@ -5237,7 +5263,7 @@ unsigned SelectionDAG::AssignTopologicalOrder() { } /// AssignOrdering - Assign an order to the SDNode. -void SelectionDAG::AssignOrdering(SDNode *SD, unsigned Order) { +void SelectionDAG::AssignOrdering(const SDNode *SD, unsigned Order) { assert(SD && "Trying to assign an order to a null node!"); Ordering->add(SD, Order); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 23c7059..85ecb95 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -131,6 +131,17 @@ namespace { } } + /// areValueTypesLegal - Return true if types of all the values are legal. + bool areValueTypesLegal() { + for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { + EVT RegisterVT = RegVTs[Value]; + if (!TLI->isTypeLegal(RegisterVT)) + return false; + } + return true; + } + + /// append - Add the specified values to this one. void append(const RegsForValue &RHS) { TLI = RHS.TLI; @@ -176,7 +187,6 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order, assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Val = Parts[0]; - DAG.AssignOrdering(Val.getNode(), Order); if (NumParts > 1) { // Assemble the value from multiple parts. @@ -209,10 +219,6 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order, Val = DAG.getNode(ISD::BUILD_PAIR, dl, RoundVT, Lo, Hi); - DAG.AssignOrdering(Lo.getNode(), Order); - DAG.AssignOrdering(Hi.getNode(), Order); - DAG.AssignOrdering(Val.getNode(), Order); - if (RoundParts < NumParts) { // Assemble the trailing non-power-of-2 part. unsigned OddParts = NumParts - RoundParts; @@ -226,15 +232,11 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order, std::swap(Lo, Hi); EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Hi = DAG.getNode(ISD::ANY_EXTEND, dl, TotalVT, Hi); - DAG.AssignOrdering(Hi.getNode(), Order); Hi = DAG.getNode(ISD::SHL, dl, TotalVT, Hi, DAG.getConstant(Lo.getValueType().getSizeInBits(), TLI.getPointerTy())); - DAG.AssignOrdering(Hi.getNode(), Order); Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, TotalVT, Lo); - DAG.AssignOrdering(Lo.getNode(), Order); Val = DAG.getNode(ISD::OR, dl, TotalVT, Lo, Hi); - DAG.AssignOrdering(Val.getNode(), Order); } } else if (ValueVT.isVector()) { // Handle a multi-element vector. @@ -275,7 +277,6 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order, Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, dl, ValueVT, &Ops[0], NumIntermediates); - DAG.AssignOrdering(Val.getNode(), Order); } else if (PartVT.isFloatingPoint()) { // FP split into multiple FP parts (for ppcf128) assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) && @@ -286,10 +287,6 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order, if (TLI.isBigEndian()) std::swap(Lo, Hi); Val = DAG.getNode(ISD::BUILD_PAIR, dl, ValueVT, Lo, Hi); - - DAG.AssignOrdering(Hi.getNode(), Order); - DAG.AssignOrdering(Lo.getNode(), Order); - DAG.AssignOrdering(Val.getNode(), Order); } else { // FP split into integer parts (soft fp) assert(ValueVT.isFloatingPoint() && PartVT.isInteger() && @@ -307,18 +304,14 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order, if (PartVT.isVector()) { assert(ValueVT.isVector() && "Unknown vector conversion!"); - SDValue Res = DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val); - DAG.AssignOrdering(Res.getNode(), Order); - return Res; + return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val); } if (ValueVT.isVector()) { assert(ValueVT.getVectorElementType() == PartVT && ValueVT.getVectorNumElements() == 1 && "Only trivial scalar-to-vector conversions should get here!"); - SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, ValueVT, Val); - DAG.AssignOrdering(Res.getNode(), Order); - return Res; + return DAG.getNode(ISD::BUILD_VECTOR, dl, ValueVT, Val); } if (PartVT.isInteger() && @@ -330,36 +323,24 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order, if (AssertOp != ISD::DELETED_NODE) Val = DAG.getNode(AssertOp, dl, PartVT, Val, DAG.getValueType(ValueVT)); - DAG.AssignOrdering(Val.getNode(), Order); - Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val); - DAG.AssignOrdering(Val.getNode(), Order); - return Val; + return DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val); } else { - Val = DAG.getNode(ISD::ANY_EXTEND, dl, ValueVT, Val); - DAG.AssignOrdering(Val.getNode(), Order); - return Val; + return DAG.getNode(ISD::ANY_EXTEND, dl, ValueVT, Val); } } if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { if (ValueVT.bitsLT(Val.getValueType())) { // FP_ROUND's are always exact here. - Val = DAG.getNode(ISD::FP_ROUND, dl, ValueVT, Val, - DAG.getIntPtrConstant(1)); - DAG.AssignOrdering(Val.getNode(), Order); - return Val; + return DAG.getNode(ISD::FP_ROUND, dl, ValueVT, Val, + DAG.getIntPtrConstant(1)); } - Val = DAG.getNode(ISD::FP_EXTEND, dl, ValueVT, Val); - DAG.AssignOrdering(Val.getNode(), Order); - return Val; + return DAG.getNode(ISD::FP_EXTEND, dl, ValueVT, Val); } - if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) { - Val = DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val); - DAG.AssignOrdering(Val.getNode(), Order); - return Val; - } + if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) + return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val); llvm_unreachable("Unknown mismatch!"); return SDValue(); @@ -414,8 +395,6 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order, } } - DAG.AssignOrdering(Val.getNode(), Order); - // The value may have changed - recompute ValueVT. ValueVT = Val.getValueType(); assert(NumParts * PartBits == ValueVT.getSizeInBits() && @@ -448,9 +427,6 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order, NumParts = RoundParts; ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val); - - DAG.AssignOrdering(OddVal.getNode(), Order); - DAG.AssignOrdering(Val.getNode(), Order); } // The number of parts is a power of 2. Repeatedly bisect the value using @@ -460,8 +436,6 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order, ValueVT.getSizeInBits()), Val); - DAG.AssignOrdering(Parts[0].getNode(), Order); - for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) { for (unsigned i = 0; i < NumParts; i += StepSize) { unsigned ThisBits = StepSize * PartBits / 2; @@ -476,16 +450,11 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order, ThisVT, Part0, DAG.getConstant(0, PtrVT)); - DAG.AssignOrdering(Part0.getNode(), Order); - DAG.AssignOrdering(Part1.getNode(), Order); - if (ThisBits == PartBits && ThisVT != PartVT) { Part0 = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Part0); Part1 = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Part1); - DAG.AssignOrdering(Part0.getNode(), Order); - DAG.AssignOrdering(Part1.getNode(), Order); } } } @@ -511,7 +480,6 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order, } } - DAG.AssignOrdering(Val.getNode(), Order); Parts[0] = Val; return; } @@ -539,8 +507,6 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order, Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, IntermediateVT, Val, DAG.getConstant(i, PtrVT)); - - DAG.AssignOrdering(Ops[i].getNode(), Order); } // Split the intermediate operands into legal parts. @@ -638,23 +604,34 @@ SDValue SelectionDAGBuilder::getControlRoot() { return Root; } +void SelectionDAGBuilder::AssignOrderingToNode(const SDNode *Node) { + if (DAG.GetOrdering(Node) != 0) return; // Already has ordering. + DAG.AssignOrdering(Node, SDNodeOrder); + + for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I) + AssignOrderingToNode(Node->getOperand(I).getNode()); +} + void SelectionDAGBuilder::visit(Instruction &I) { visit(I.getOpcode(), I); } void SelectionDAGBuilder::visit(unsigned Opcode, User &I) { - // We're processing a new instruction. - ++SDNodeOrder; - // Note: this doesn't use InstVisitor, because it has to work with // ConstantExpr's in addition to instructions. switch (Opcode) { default: llvm_unreachable("Unknown instruction type encountered!"); // Build the switch statement using the Instruction.def file. #define HANDLE_INST(NUM, OPCODE, CLASS) \ - case Instruction::OPCODE: return visit##OPCODE((CLASS&)I); + case Instruction::OPCODE: visit##OPCODE((CLASS&)I); break; #include "llvm/Instruction.def" } + + // Assign the ordering to the freshly created DAG nodes. + if (NodeMap.count(&I)) { + ++SDNodeOrder; + AssignOrderingToNode(getValue(&I).getNode()); + } } SDValue SelectionDAGBuilder::getValue(const Value *V) { @@ -699,10 +676,8 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { Constants.push_back(SDValue(Val, i)); } - SDValue Res = DAG.getMergeValues(&Constants[0], Constants.size(), - getCurDebugLoc()); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); - return Res; + return DAG.getMergeValues(&Constants[0], Constants.size(), + getCurDebugLoc()); } if (isa<StructType>(C->getType()) || isa<ArrayType>(C->getType())) { @@ -725,10 +700,8 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { Constants[i] = DAG.getConstant(0, EltVT); } - SDValue Res = DAG.getMergeValues(&Constants[0], NumElts, - getCurDebugLoc()); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); - return Res; + return DAG.getMergeValues(&Constants[0], NumElts, + getCurDebugLoc()); } if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) @@ -756,10 +729,8 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { } // Create a BUILD_VECTOR node. - SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), - VT, &Ops[0], Ops.size()); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); - return NodeMap[V] = Res; + return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), + VT, &Ops[0], Ops.size()); } // If this is a static alloca, generate it as the frameindex instead of @@ -873,16 +844,11 @@ void SelectionDAGBuilder::visitRet(ReturnInst &I) { Chains[i] = DAG.getStore(Chain, getCurDebugLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i), - Add, NULL, Offsets[i], false, 0); - - DAG.AssignOrdering(Add.getNode(), SDNodeOrder); - DAG.AssignOrdering(Chains[i].getNode(), SDNodeOrder); + Add, NULL, Offsets[i], false, false, 0); } Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, &Chains[0], NumValues); - - DAG.AssignOrdering(Chain.getNode(), SDNodeOrder); } else { for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { SmallVector<EVT, 4> ValueVTs; @@ -948,7 +914,6 @@ void SelectionDAGBuilder::visitRet(ReturnInst &I) { // Update the DAG with the new chain value resulting from return lowering. DAG.setRoot(Chain); - DAG.AssignOrdering(Chain.getNode(), SDNodeOrder); } /// CopyToExportRegsIfNeeded - If the given value has virtual registers @@ -1209,13 +1174,10 @@ void SelectionDAGBuilder::visitBr(BranchInst &I) { CurMBB->addSuccessor(Succ0MBB); // If this is not a fall-through branch, emit the branch. - if (Succ0MBB != NextBlock) { - SDValue V = DAG.getNode(ISD::BR, getCurDebugLoc(), + if (Succ0MBB != NextBlock) + DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, getControlRoot(), - DAG.getBasicBlock(Succ0MBB)); - DAG.setRoot(V); - DAG.AssignOrdering(V.getNode(), SDNodeOrder); - } + DAG.getBasicBlock(Succ0MBB))); return; } @@ -1321,8 +1283,6 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB) { } } - DAG.AssignOrdering(Cond.getNode(), SDNodeOrder); - // Update successor info CurMBB->addSuccessor(CB.TrueBB); CurMBB->addSuccessor(CB.FalseBB); @@ -1340,13 +1300,11 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB) { std::swap(CB.TrueBB, CB.FalseBB); SDValue True = DAG.getConstant(1, Cond.getValueType()); Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True); - DAG.AssignOrdering(Cond.getNode(), SDNodeOrder); } SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, MVT::Other, getControlRoot(), Cond, DAG.getBasicBlock(CB.TrueBB)); - DAG.AssignOrdering(BrCond.getNode(), SDNodeOrder); // If the branch was constant folded, fix up the CFG. if (BrCond.getOpcode() == ISD::BR) { @@ -1356,12 +1314,9 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB) { if (BrCond == getControlRoot()) CurMBB->removeSuccessor(CB.TrueBB); - if (CB.FalseBB != NextBlock) { + if (CB.FalseBB != NextBlock) BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, DAG.getBasicBlock(CB.FalseBB)); - - DAG.AssignOrdering(BrCond.getNode(), SDNodeOrder); - } } DAG.setRoot(BrCond); @@ -1379,10 +1334,6 @@ void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) { MVT::Other, Index.getValue(1), Table, Index); DAG.setRoot(BrJumpTable); - - DAG.AssignOrdering(Index.getNode(), SDNodeOrder); - DAG.AssignOrdering(Table.getNode(), SDNodeOrder); - DAG.AssignOrdering(BrJumpTable.getNode(), SDNodeOrder); } /// visitJumpTableHeader - This function emits necessary code to produce index @@ -1398,7 +1349,7 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, DAG.getConstant(JTH.First, VT)); // The SDNode we just created, which holds the value being switched on minus - // the the smallest case value, needs to be copied to a virtual register so it + // the smallest case value, needs to be copied to a virtual register so it // can be used as an index into the jump table in a subsequent basic block. // This value may be smaller or larger than the target's pointer type, and // therefore require extension or truncating. @@ -1417,11 +1368,6 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, DAG.getConstant(JTH.Last-JTH.First,VT), ISD::SETUGT); - DAG.AssignOrdering(Sub.getNode(), SDNodeOrder); - DAG.AssignOrdering(SwitchOp.getNode(), SDNodeOrder); - DAG.AssignOrdering(CopyTo.getNode(), SDNodeOrder); - DAG.AssignOrdering(CMP.getNode(), SDNodeOrder); - // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. MachineBasicBlock *NextBlock = 0; @@ -1434,13 +1380,9 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, MVT::Other, CopyTo, CMP, DAG.getBasicBlock(JT.Default)); - DAG.AssignOrdering(BrCond.getNode(), SDNodeOrder); - - if (JT.MBB != NextBlock) { + if (JT.MBB != NextBlock) BrCond = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrCond, DAG.getBasicBlock(JT.MBB)); - DAG.AssignOrdering(BrCond.getNode(), SDNodeOrder); - } DAG.setRoot(BrCond); } @@ -1467,11 +1409,6 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B) { SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), B.Reg, ShiftOp); - DAG.AssignOrdering(Sub.getNode(), SDNodeOrder); - DAG.AssignOrdering(RangeCmp.getNode(), SDNodeOrder); - DAG.AssignOrdering(ShiftOp.getNode(), SDNodeOrder); - DAG.AssignOrdering(CopyTo.getNode(), SDNodeOrder); - // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. MachineBasicBlock *NextBlock = 0; @@ -1488,13 +1425,9 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B) { MVT::Other, CopyTo, RangeCmp, DAG.getBasicBlock(B.Default)); - DAG.AssignOrdering(BrRange.getNode(), SDNodeOrder); - - if (MBB != NextBlock) { + if (MBB != NextBlock) BrRange = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, CopyTo, DAG.getBasicBlock(MBB)); - DAG.AssignOrdering(BrRange.getNode(), SDNodeOrder); - } DAG.setRoot(BrRange); } @@ -1520,11 +1453,6 @@ void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB, AndOp, DAG.getConstant(0, TLI.getPointerTy()), ISD::SETNE); - DAG.AssignOrdering(ShiftOp.getNode(), SDNodeOrder); - DAG.AssignOrdering(SwitchVal.getNode(), SDNodeOrder); - DAG.AssignOrdering(AndOp.getNode(), SDNodeOrder); - DAG.AssignOrdering(AndCmp.getNode(), SDNodeOrder); - CurMBB->addSuccessor(B.TargetBB); CurMBB->addSuccessor(NextMBB); @@ -1532,8 +1460,6 @@ void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB, MVT::Other, getControlRoot(), AndCmp, DAG.getBasicBlock(B.TargetBB)); - DAG.AssignOrdering(BrAnd.getNode(), SDNodeOrder); - // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. MachineBasicBlock *NextBlock = 0; @@ -1541,11 +1467,9 @@ void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB, if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; - if (NextMBB != NextBlock) { + if (NextMBB != NextBlock) BrAnd = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrAnd, DAG.getBasicBlock(NextMBB)); - DAG.AssignOrdering(BrAnd.getNode(), SDNodeOrder); - } DAG.setRoot(BrAnd); } @@ -1570,11 +1494,9 @@ void SelectionDAGBuilder::visitInvoke(InvokeInst &I) { CurMBB->addSuccessor(LandingPad); // Drop into normal successor. - SDValue Branch = DAG.getNode(ISD::BR, getCurDebugLoc(), - MVT::Other, getControlRoot(), - DAG.getBasicBlock(Return)); - DAG.setRoot(Branch); - DAG.AssignOrdering(Branch.getNode(), SDNodeOrder); + DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), + MVT::Other, getControlRoot(), + DAG.getBasicBlock(Return))); } void SelectionDAGBuilder::visitUnwind(UnwindInst &I) { @@ -1733,8 +1655,8 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR, std::vector<MachineBasicBlock*> DestBBs; APInt TEI = First; for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) { - const APInt& Low = cast<ConstantInt>(I->Low)->getValue(); - const APInt& High = cast<ConstantInt>(I->High)->getValue(); + const APInt &Low = cast<ConstantInt>(I->Low)->getValue(); + const APInt &High = cast<ConstantInt>(I->High)->getValue(); if (Low.sle(TEI) && TEI.sle(High)) { DestBBs.push_back(I->BB); @@ -1757,7 +1679,9 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR, // Create a jump table index for this jump table, or return an existing // one. - unsigned JTI = CurMF->getJumpTableInfo()->getJumpTableIndex(DestBBs); + unsigned JTEncoding = TLI.getJumpTableEncoding(); + unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding) + ->getJumpTableIndex(DestBBs); // Set the jump table information so that we can codegen it as a second // MachineBasicBlock @@ -2086,13 +2010,10 @@ void SelectionDAGBuilder::visitSwitch(SwitchInst &SI) { // If this is not a fall-through branch, emit the branch. CurMBB->addSuccessor(Default); - if (Default != NextBlock) { - SDValue Res = DAG.getNode(ISD::BR, getCurDebugLoc(), - MVT::Other, getControlRoot(), - DAG.getBasicBlock(Default)); - DAG.setRoot(Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); - } + if (Default != NextBlock) + DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), + MVT::Other, getControlRoot(), + DAG.getBasicBlock(Default))); return; } @@ -2141,15 +2062,19 @@ void SelectionDAGBuilder::visitSwitch(SwitchInst &SI) { } void SelectionDAGBuilder::visitIndirectBr(IndirectBrInst &I) { - // Update machine-CFG edges. + // Update machine-CFG edges with unique successors. + SmallVector<BasicBlock*, 32> succs; + succs.reserve(I.getNumSuccessors()); for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) - CurMBB->addSuccessor(FuncInfo.MBBMap[I.getSuccessor(i)]); - - SDValue Res = DAG.getNode(ISD::BRIND, getCurDebugLoc(), - MVT::Other, getControlRoot(), - getValue(I.getAddress())); - DAG.setRoot(Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + succs.push_back(I.getSuccessor(i)); + array_pod_sort(succs.begin(), succs.end()); + succs.erase(std::unique(succs.begin(), succs.end()), succs.end()); + for (unsigned i = 0, e = succs.size(); i != e; ++i) + CurMBB->addSuccessor(FuncInfo.MBBMap[succs[i]]); + + DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(), + MVT::Other, getControlRoot(), + getValue(I.getAddress()))); } void SelectionDAGBuilder::visitFSub(User &I) { @@ -2164,10 +2089,8 @@ void SelectionDAGBuilder::visitFSub(User &I) { Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size()); if (CV == CNZ) { SDValue Op2 = getValue(I.getOperand(1)); - SDValue Res = DAG.getNode(ISD::FNEG, getCurDebugLoc(), - Op2.getValueType(), Op2); - setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(), + Op2.getValueType(), Op2)); return; } } @@ -2176,10 +2099,8 @@ void SelectionDAGBuilder::visitFSub(User &I) { if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0))) if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) { SDValue Op2 = getValue(I.getOperand(1)); - SDValue Res = DAG.getNode(ISD::FNEG, getCurDebugLoc(), - Op2.getValueType(), Op2); - setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(), + Op2.getValueType(), Op2)); return; } @@ -2189,10 +2110,8 @@ void SelectionDAGBuilder::visitFSub(User &I) { void SelectionDAGBuilder::visitBinary(User &I, unsigned OpCode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); - SDValue Res = DAG.getNode(OpCode, getCurDebugLoc(), - Op1.getValueType(), Op1, Op2); - setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getNode(OpCode, getCurDebugLoc(), + Op1.getValueType(), Op1, Op2)); } void SelectionDAGBuilder::visitShift(User &I, unsigned Opcode) { @@ -2225,12 +2144,8 @@ void SelectionDAGBuilder::visitShift(User &I, unsigned Opcode) { TLI.getPointerTy(), Op2); } - SDValue Res = DAG.getNode(Opcode, getCurDebugLoc(), - Op1.getValueType(), Op1, Op2); - setValue(&I, Res); - DAG.AssignOrdering(Op1.getNode(), SDNodeOrder); - DAG.AssignOrdering(Op2.getNode(), SDNodeOrder); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(), + Op1.getValueType(), Op1, Op2)); } void SelectionDAGBuilder::visitICmp(User &I) { @@ -2244,9 +2159,7 @@ void SelectionDAGBuilder::visitICmp(User &I) { ISD::CondCode Opcode = getICmpCondCode(predicate); EVT DestVT = TLI.getValueType(I.getType()); - SDValue Res = DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode); - setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode)); } void SelectionDAGBuilder::visitFCmp(User &I) { @@ -2259,9 +2172,7 @@ void SelectionDAGBuilder::visitFCmp(User &I) { SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Condition = getFCmpCondCode(predicate); EVT DestVT = TLI.getValueType(I.getType()); - SDValue Res = DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition); - setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition)); } void SelectionDAGBuilder::visitSelect(User &I) { @@ -2275,7 +2186,7 @@ void SelectionDAGBuilder::visitSelect(User &I) { SDValue TrueVal = getValue(I.getOperand(1)); SDValue FalseVal = getValue(I.getOperand(2)); - for (unsigned i = 0; i != NumValues; ++i) { + for (unsigned i = 0; i != NumValues; ++i) Values[i] = DAG.getNode(ISD::SELECT, getCurDebugLoc(), TrueVal.getNode()->getValueType(i), Cond, SDValue(TrueVal.getNode(), @@ -2283,23 +2194,16 @@ void SelectionDAGBuilder::visitSelect(User &I) { SDValue(FalseVal.getNode(), FalseVal.getResNo() + i)); - DAG.AssignOrdering(Values[i].getNode(), SDNodeOrder); - } - - SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), - DAG.getVTList(&ValueVTs[0], NumValues), - &Values[0], NumValues); - setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + DAG.getVTList(&ValueVTs[0], NumValues), + &Values[0], NumValues)); } void SelectionDAGBuilder::visitTrunc(User &I) { // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); - SDValue Res = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N); - setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N)); } void SelectionDAGBuilder::visitZExt(User &I) { @@ -2307,9 +2211,7 @@ void SelectionDAGBuilder::visitZExt(User &I) { // ZExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); - SDValue Res = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N); - setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N)); } void SelectionDAGBuilder::visitSExt(User &I) { @@ -2317,64 +2219,50 @@ void SelectionDAGBuilder::visitSExt(User &I) { // SExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); - SDValue Res = DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N); - setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N)); } void SelectionDAGBuilder::visitFPTrunc(User &I) { // FPTrunc is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); - SDValue Res = DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(), - DestVT, N, DAG.getIntPtrConstant(0)); - setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(), + DestVT, N, DAG.getIntPtrConstant(0))); } void SelectionDAGBuilder::visitFPExt(User &I){ // FPTrunc is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); - SDValue Res = DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N); - setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N)); } void SelectionDAGBuilder::visitFPToUI(User &I) { // FPToUI is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); - SDValue Res = DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N); - setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N)); } void SelectionDAGBuilder::visitFPToSI(User &I) { // FPToSI is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); - SDValue Res = DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N); - setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N)); } void SelectionDAGBuilder::visitUIToFP(User &I) { // UIToFP is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); - SDValue Res = DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N); - setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N)); } void SelectionDAGBuilder::visitSIToFP(User &I){ // SIToFP is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); - SDValue Res = DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N); - setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N)); } void SelectionDAGBuilder::visitPtrToInt(User &I) { @@ -2383,9 +2271,7 @@ void SelectionDAGBuilder::visitPtrToInt(User &I) { SDValue N = getValue(I.getOperand(0)); EVT SrcVT = N.getValueType(); EVT DestVT = TLI.getValueType(I.getType()); - SDValue Res = DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT); - setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT)); } void SelectionDAGBuilder::visitIntToPtr(User &I) { @@ -2394,9 +2280,7 @@ void SelectionDAGBuilder::visitIntToPtr(User &I) { SDValue N = getValue(I.getOperand(0)); EVT SrcVT = N.getValueType(); EVT DestVT = TLI.getValueType(I.getType()); - SDValue Res = DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT); - setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT)); } void SelectionDAGBuilder::visitBitCast(User &I) { @@ -2405,14 +2289,11 @@ void SelectionDAGBuilder::visitBitCast(User &I) { // BitCast assures us that source and destination are the same size so this is // either a BIT_CONVERT or a no-op. - if (DestVT != N.getValueType()) { - SDValue Res = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), - DestVT, N); // convert types. - setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); - } else { + if (DestVT != N.getValueType()) + setValue(&I, DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), + DestVT, N)); // convert types. + else setValue(&I, N); // noop cast. - } } void SelectionDAGBuilder::visitInsertElement(User &I) { @@ -2421,13 +2302,9 @@ void SelectionDAGBuilder::visitInsertElement(User &I) { SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), TLI.getPointerTy(), getValue(I.getOperand(2))); - SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurDebugLoc(), - TLI.getValueType(I.getType()), - InVec, InVal, InIdx); - setValue(&I, Res); - - DAG.AssignOrdering(InIdx.getNode(), SDNodeOrder); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurDebugLoc(), + TLI.getValueType(I.getType()), + InVec, InVal, InIdx)); } void SelectionDAGBuilder::visitExtractElement(User &I) { @@ -2435,15 +2312,10 @@ void SelectionDAGBuilder::visitExtractElement(User &I) { SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), TLI.getPointerTy(), getValue(I.getOperand(1))); - SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), - TLI.getValueType(I.getType()), InVec, InIdx); - setValue(&I, Res); - - DAG.AssignOrdering(InIdx.getNode(), SDNodeOrder); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), + TLI.getValueType(I.getType()), InVec, InIdx)); } - // Utility for visitShuffleVector - Returns true if the mask is mask starting // from SIndx and increasing to the element length (undefs are allowed). static bool SequentialMask(SmallVectorImpl<int> &Mask, unsigned SIndx) { @@ -2462,8 +2334,7 @@ void SelectionDAGBuilder::visitShuffleVector(User &I) { // Convert the ConstantVector mask operand into an array of ints, with -1 // representing undef values. SmallVector<Constant*, 8> MaskElts; - cast<Constant>(I.getOperand(2))->getVectorElements(*DAG.getContext(), - MaskElts); + cast<Constant>(I.getOperand(2))->getVectorElements(MaskElts); unsigned MaskNumElts = MaskElts.size(); for (unsigned i = 0; i != MaskNumElts; ++i) { if (isa<UndefValue>(MaskElts[i])) @@ -2477,10 +2348,8 @@ void SelectionDAGBuilder::visitShuffleVector(User &I) { unsigned SrcNumElts = SrcVT.getVectorNumElements(); if (SrcNumElts == MaskNumElts) { - SDValue Res = DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, - &Mask[0]); - setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, + &Mask[0])); return; } @@ -2491,10 +2360,8 @@ void SelectionDAGBuilder::visitShuffleVector(User &I) { // lengths match. if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) { // The shuffle is concatenating two vectors together. - SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), - VT, Src1, Src2); - setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), + VT, Src1, Src2)); return; } @@ -2526,12 +2393,8 @@ void SelectionDAGBuilder::visitShuffleVector(User &I) { MappedOps.push_back(Idx + MaskNumElts - SrcNumElts); } - SDValue Res = DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, - &MappedOps[0]); - setValue(&I, Res); - DAG.AssignOrdering(Src1.getNode(), SDNodeOrder); - DAG.AssignOrdering(Src2.getNode(), SDNodeOrder); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, + &MappedOps[0])); return; } @@ -2583,9 +2446,7 @@ void SelectionDAGBuilder::visitShuffleVector(User &I) { } if (RangeUse[0] == 0 && RangeUse[1] == 0) { - SDValue Res = DAG.getUNDEF(VT); - setValue(&I, Res); // Vectors are not used. - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used. return; } else if (RangeUse[0] < 2 && RangeUse[1] < 2) { @@ -2597,8 +2458,6 @@ void SelectionDAGBuilder::visitShuffleVector(User &I) { else Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurDebugLoc(), VT, Src, DAG.getIntPtrConstant(StartIdx[Input])); - - DAG.AssignOrdering(Src.getNode(), SDNodeOrder); } // Calculate new mask. @@ -2613,10 +2472,8 @@ void SelectionDAGBuilder::visitShuffleVector(User &I) { MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts); } - SDValue Res = DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, - &MappedOps[0]); - setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, + &MappedOps[0])); return; } } @@ -2643,14 +2500,11 @@ void SelectionDAGBuilder::visitShuffleVector(User &I) { DAG.getConstant(Idx - SrcNumElts, PtrVT)); Ops.push_back(Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); } } - SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), - VT, &Ops[0], Ops.size()); - setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), + VT, &Ops[0], Ops.size())); } void SelectionDAGBuilder::visitInsertValue(InsertValueInst &I) { @@ -2689,11 +2543,9 @@ void SelectionDAGBuilder::visitInsertValue(InsertValueInst &I) { Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : SDValue(Agg.getNode(), Agg.getResNo() + i); - SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), - DAG.getVTList(&AggValueVTs[0], NumAggValues), - &Values[0], NumAggValues); - setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + DAG.getVTList(&AggValueVTs[0], NumAggValues), + &Values[0], NumAggValues)); } void SelectionDAGBuilder::visitExtractValue(ExtractValueInst &I) { @@ -2719,11 +2571,9 @@ void SelectionDAGBuilder::visitExtractValue(ExtractValueInst &I) { DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) : SDValue(Agg.getNode(), Agg.getResNo() + i); - SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), - DAG.getVTList(&ValValueVTs[0], NumValValues), - &Values[0], NumValValues); - setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + DAG.getVTList(&ValValueVTs[0], NumValValues), + &Values[0], NumValValues)); } void SelectionDAGBuilder::visitGetElementPtr(User &I) { @@ -2740,7 +2590,6 @@ void SelectionDAGBuilder::visitGetElementPtr(User &I) { uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field); N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N, DAG.getIntPtrConstant(Offset)); - DAG.AssignOrdering(N.getNode(), SDNodeOrder); } Ty = StTy->getElementType(Field); @@ -2764,9 +2613,6 @@ void SelectionDAGBuilder::visitGetElementPtr(User &I) { N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N, OffsVal); - - DAG.AssignOrdering(OffsVal.getNode(), SDNodeOrder); - DAG.AssignOrdering(N.getNode(), SDNodeOrder); continue; } @@ -2792,13 +2638,10 @@ void SelectionDAGBuilder::visitGetElementPtr(User &I) { IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(), N.getValueType(), IdxN, Scale); } - - DAG.AssignOrdering(IdxN.getNode(), SDNodeOrder); } N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N, IdxN); - DAG.AssignOrdering(N.getNode(), SDNodeOrder); } } @@ -2823,11 +2666,8 @@ void SelectionDAGBuilder::visitAlloca(AllocaInst &I) { AllocSize, DAG.getConstant(TySize, AllocSize.getValueType())); - DAG.AssignOrdering(AllocSize.getNode(), SDNodeOrder); - EVT IntPtr = TLI.getPointerTy(); AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr); - DAG.AssignOrdering(AllocSize.getNode(), SDNodeOrder); // Handle alignment. If the requested alignment is less than or equal to // the stack alignment, ignore it. If the size is greater than or equal to @@ -2842,13 +2682,11 @@ void SelectionDAGBuilder::visitAlloca(AllocaInst &I) { AllocSize = DAG.getNode(ISD::ADD, getCurDebugLoc(), AllocSize.getValueType(), AllocSize, DAG.getIntPtrConstant(StackAlign-1)); - DAG.AssignOrdering(AllocSize.getNode(), SDNodeOrder); // Mask out the low bits for alignment purposes. AllocSize = DAG.getNode(ISD::AND, getCurDebugLoc(), AllocSize.getValueType(), AllocSize, DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1))); - DAG.AssignOrdering(AllocSize.getNode(), SDNodeOrder); SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) }; SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other); @@ -2856,7 +2694,6 @@ void SelectionDAGBuilder::visitAlloca(AllocaInst &I) { VTs, Ops, 3); setValue(&I, DSA); DAG.setRoot(DSA.getValue(1)); - DAG.AssignOrdering(DSA.getNode(), SDNodeOrder); // Inform the Frame Information that we have just allocated a variable-sized // object. @@ -2868,7 +2705,9 @@ void SelectionDAGBuilder::visitLoad(LoadInst &I) { SDValue Ptr = getValue(SV); const Type *Ty = I.getType(); + bool isVolatile = I.isVolatile(); + bool isNonTemporal = I.getMetadata("nontemporal") != 0; unsigned Alignment = I.getAlignment(); SmallVector<EVT, 4> ValueVTs; @@ -2900,13 +2739,11 @@ void SelectionDAGBuilder::visitLoad(LoadInst &I) { PtrVT, Ptr, DAG.getConstant(Offsets[i], PtrVT)); SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root, - A, SV, Offsets[i], isVolatile, Alignment); + A, SV, Offsets[i], isVolatile, + isNonTemporal, Alignment); Values[i] = L; Chains[i] = L.getValue(1); - - DAG.AssignOrdering(A.getNode(), SDNodeOrder); - DAG.AssignOrdering(L.getNode(), SDNodeOrder); } if (!ConstantMemory) { @@ -2916,15 +2753,11 @@ void SelectionDAGBuilder::visitLoad(LoadInst &I) { DAG.setRoot(Chain); else PendingLoads.push_back(Chain); - - DAG.AssignOrdering(Chain.getNode(), SDNodeOrder); } - SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), - DAG.getVTList(&ValueVTs[0], NumValues), - &Values[0], NumValues); - setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + DAG.getVTList(&ValueVTs[0], NumValues), + &Values[0], NumValues)); } void SelectionDAGBuilder::visitStore(StoreInst &I) { @@ -2948,6 +2781,7 @@ void SelectionDAGBuilder::visitStore(StoreInst &I) { SmallVector<SDValue, 4> Chains(NumValues); EVT PtrVT = Ptr.getValueType(); bool isVolatile = I.isVolatile(); + bool isNonTemporal = I.getMetadata("nontemporal") != 0; unsigned Alignment = I.getAlignment(); for (unsigned i = 0; i != NumValues; ++i) { @@ -2955,16 +2789,12 @@ void SelectionDAGBuilder::visitStore(StoreInst &I) { DAG.getConstant(Offsets[i], PtrVT)); Chains[i] = DAG.getStore(Root, getCurDebugLoc(), SDValue(Src.getNode(), Src.getResNo() + i), - Add, PtrV, Offsets[i], isVolatile, Alignment); - - DAG.AssignOrdering(Add.getNode(), SDNodeOrder); - DAG.AssignOrdering(Chains[i].getNode(), SDNodeOrder); + Add, PtrV, Offsets[i], isVolatile, + isNonTemporal, Alignment); } - SDValue Res = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), - MVT::Other, &Chains[0], NumValues); - DAG.setRoot(Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + DAG.setRoot(DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + MVT::Other, &Chains[0], NumValues)); } /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC @@ -3035,8 +2865,6 @@ void SelectionDAGBuilder::visitTargetIntrinsic(CallInst &I, VTs, &Ops[0], Ops.size()); } - DAG.AssignOrdering(Result.getNode(), SDNodeOrder); - if (HasChain) { SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1); if (OnlyLoad) @@ -3049,7 +2877,6 @@ void SelectionDAGBuilder::visitTargetIntrinsic(CallInst &I, if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) { EVT VT = TLI.getValueType(PTy); Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result); - DAG.AssignOrdering(Result.getNode(), SDNodeOrder); } setValue(&I, Result); @@ -3068,12 +2895,7 @@ GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl, unsigned Order) { DAG.getConstant(0x007fffff, MVT::i32)); SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1, DAG.getConstant(0x3f800000, MVT::i32)); - SDValue Res = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t2); - - DAG.AssignOrdering(t1.getNode(), Order); - DAG.AssignOrdering(t2.getNode(), Order); - DAG.AssignOrdering(Res.getNode(), Order); - return Res; + return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t2); } /// GetExponent - Get the exponent: @@ -3090,13 +2912,7 @@ GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, DAG.getConstant(23, TLI.getPointerTy())); SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1, DAG.getConstant(127, MVT::i32)); - SDValue Res = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2); - - DAG.AssignOrdering(t0.getNode(), Order); - DAG.AssignOrdering(t1.getNode(), Order); - DAG.AssignOrdering(t2.getNode(), Order); - DAG.AssignOrdering(Res.getNode(), Order); - return Res; + return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2); } /// getF32Constant - Get 32-bit floating point constant. @@ -3120,7 +2936,6 @@ SelectionDAGBuilder::implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op) { I.getOperand(1)); setValue(&I, L); DAG.setRoot(L.getValue(1)); - DAG.AssignOrdering(L.getNode(), SDNodeOrder); return 0; } @@ -3131,10 +2946,7 @@ SelectionDAGBuilder::implVisitAluOverflow(CallInst &I, ISD::NodeType Op) { SDValue Op2 = getValue(I.getOperand(2)); SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); - SDValue Result = DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2); - - setValue(&I, Result); - DAG.AssignOrdering(Result.getNode(), SDNodeOrder); + setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2)); return 0; } @@ -3162,15 +2974,9 @@ SelectionDAGBuilder::visitExp(CallInst &I) { SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); - DAG.AssignOrdering(t0.getNode(), SDNodeOrder); - DAG.AssignOrdering(IntegerPartOfX.getNode(), SDNodeOrder); - DAG.AssignOrdering(t1.getNode(), SDNodeOrder); - DAG.AssignOrdering(X.getNode(), SDNodeOrder); - // IntegerPartOfX <<= 23; IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, DAG.getConstant(23, TLI.getPointerTy())); - DAG.AssignOrdering(IntegerPartOfX.getNode(), SDNodeOrder); if (LimitFloatPrecision <= 6) { // For floating-point precision of 6: @@ -3194,14 +3000,6 @@ SelectionDAGBuilder::visitExp(CallInst &I) { TwoToFracPartOfX, IntegerPartOfX); result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t6); - - DAG.AssignOrdering(t2.getNode(), SDNodeOrder); - DAG.AssignOrdering(t3.getNode(), SDNodeOrder); - DAG.AssignOrdering(t4.getNode(), SDNodeOrder); - DAG.AssignOrdering(t5.getNode(), SDNodeOrder); - DAG.AssignOrdering(t6.getNode(), SDNodeOrder); - DAG.AssignOrdering(TwoToFracPartOfX.getNode(), SDNodeOrder); - DAG.AssignOrdering(result.getNode(), SDNodeOrder); } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { // For floating-point precision of 12: // @@ -3228,16 +3026,6 @@ SelectionDAGBuilder::visitExp(CallInst &I) { TwoToFracPartOfX, IntegerPartOfX); result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t8); - - DAG.AssignOrdering(t2.getNode(), SDNodeOrder); - DAG.AssignOrdering(t3.getNode(), SDNodeOrder); - DAG.AssignOrdering(t4.getNode(), SDNodeOrder); - DAG.AssignOrdering(t5.getNode(), SDNodeOrder); - DAG.AssignOrdering(t6.getNode(), SDNodeOrder); - DAG.AssignOrdering(t7.getNode(), SDNodeOrder); - DAG.AssignOrdering(t8.getNode(), SDNodeOrder); - DAG.AssignOrdering(TwoToFracPartOfX.getNode(), SDNodeOrder); - DAG.AssignOrdering(result.getNode(), SDNodeOrder); } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 // For floating-point precision of 18: // @@ -3277,29 +3065,12 @@ SelectionDAGBuilder::visitExp(CallInst &I) { TwoToFracPartOfX, IntegerPartOfX); result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t14); - - DAG.AssignOrdering(t2.getNode(), SDNodeOrder); - DAG.AssignOrdering(t3.getNode(), SDNodeOrder); - DAG.AssignOrdering(t4.getNode(), SDNodeOrder); - DAG.AssignOrdering(t5.getNode(), SDNodeOrder); - DAG.AssignOrdering(t6.getNode(), SDNodeOrder); - DAG.AssignOrdering(t7.getNode(), SDNodeOrder); - DAG.AssignOrdering(t8.getNode(), SDNodeOrder); - DAG.AssignOrdering(t9.getNode(), SDNodeOrder); - DAG.AssignOrdering(t10.getNode(), SDNodeOrder); - DAG.AssignOrdering(t11.getNode(), SDNodeOrder); - DAG.AssignOrdering(t12.getNode(), SDNodeOrder); - DAG.AssignOrdering(t13.getNode(), SDNodeOrder); - DAG.AssignOrdering(t14.getNode(), SDNodeOrder); - DAG.AssignOrdering(TwoToFracPartOfX.getNode(), SDNodeOrder); - DAG.AssignOrdering(result.getNode(), SDNodeOrder); } } else { // No special expansion. result = DAG.getNode(ISD::FEXP, dl, getValue(I.getOperand(1)).getValueType(), getValue(I.getOperand(1))); - DAG.AssignOrdering(result.getNode(), SDNodeOrder); } setValue(&I, result); @@ -3317,15 +3088,11 @@ SelectionDAGBuilder::visitLog(CallInst &I) { SDValue Op = getValue(I.getOperand(1)); SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); - DAG.AssignOrdering(Op1.getNode(), SDNodeOrder); - // Scale the exponent by log(2) [0.69314718f]. SDValue Exp = GetExponent(DAG, Op1, TLI, dl, SDNodeOrder); SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, getF32Constant(DAG, 0x3f317218)); - DAG.AssignOrdering(LogOfExponent.getNode(), SDNodeOrder); - // Get the significand and build it into a floating-point number with // exponent of 1. SDValue X = GetSignificand(DAG, Op1, dl, SDNodeOrder); @@ -3348,12 +3115,6 @@ SelectionDAGBuilder::visitLog(CallInst &I) { result = DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa); - - DAG.AssignOrdering(t0.getNode(), SDNodeOrder); - DAG.AssignOrdering(t1.getNode(), SDNodeOrder); - DAG.AssignOrdering(t2.getNode(), SDNodeOrder); - DAG.AssignOrdering(LogOfMantissa.getNode(), SDNodeOrder); - DAG.AssignOrdering(result.getNode(), SDNodeOrder); } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { // For floating-point precision of 12: // @@ -3380,16 +3141,6 @@ SelectionDAGBuilder::visitLog(CallInst &I) { result = DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa); - - DAG.AssignOrdering(t0.getNode(), SDNodeOrder); - DAG.AssignOrdering(t1.getNode(), SDNodeOrder); - DAG.AssignOrdering(t2.getNode(), SDNodeOrder); - DAG.AssignOrdering(t3.getNode(), SDNodeOrder); - DAG.AssignOrdering(t4.getNode(), SDNodeOrder); - DAG.AssignOrdering(t5.getNode(), SDNodeOrder); - DAG.AssignOrdering(t6.getNode(), SDNodeOrder); - DAG.AssignOrdering(LogOfMantissa.getNode(), SDNodeOrder); - DAG.AssignOrdering(result.getNode(), SDNodeOrder); } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 // For floating-point precision of 18: // @@ -3424,27 +3175,12 @@ SelectionDAGBuilder::visitLog(CallInst &I) { result = DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa); - - DAG.AssignOrdering(t0.getNode(), SDNodeOrder); - DAG.AssignOrdering(t1.getNode(), SDNodeOrder); - DAG.AssignOrdering(t2.getNode(), SDNodeOrder); - DAG.AssignOrdering(t3.getNode(), SDNodeOrder); - DAG.AssignOrdering(t4.getNode(), SDNodeOrder); - DAG.AssignOrdering(t5.getNode(), SDNodeOrder); - DAG.AssignOrdering(t6.getNode(), SDNodeOrder); - DAG.AssignOrdering(t7.getNode(), SDNodeOrder); - DAG.AssignOrdering(t8.getNode(), SDNodeOrder); - DAG.AssignOrdering(t9.getNode(), SDNodeOrder); - DAG.AssignOrdering(t10.getNode(), SDNodeOrder); - DAG.AssignOrdering(LogOfMantissa.getNode(), SDNodeOrder); - DAG.AssignOrdering(result.getNode(), SDNodeOrder); } } else { // No special expansion. result = DAG.getNode(ISD::FLOG, dl, getValue(I.getOperand(1)).getValueType(), getValue(I.getOperand(1))); - DAG.AssignOrdering(result.getNode(), SDNodeOrder); } setValue(&I, result); @@ -3462,13 +3198,9 @@ SelectionDAGBuilder::visitLog2(CallInst &I) { SDValue Op = getValue(I.getOperand(1)); SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); - DAG.AssignOrdering(Op1.getNode(), SDNodeOrder); - // Get the exponent. SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl, SDNodeOrder); - DAG.AssignOrdering(LogOfExponent.getNode(), SDNodeOrder); - // Get the significand and build it into a floating-point number with // exponent of 1. SDValue X = GetSignificand(DAG, Op1, dl, SDNodeOrder); @@ -3491,12 +3223,6 @@ SelectionDAGBuilder::visitLog2(CallInst &I) { result = DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa); - - DAG.AssignOrdering(t0.getNode(), SDNodeOrder); - DAG.AssignOrdering(t1.getNode(), SDNodeOrder); - DAG.AssignOrdering(t2.getNode(), SDNodeOrder); - DAG.AssignOrdering(Log2ofMantissa.getNode(), SDNodeOrder); - DAG.AssignOrdering(result.getNode(), SDNodeOrder); } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { // For floating-point precision of 12: // @@ -3523,16 +3249,6 @@ SelectionDAGBuilder::visitLog2(CallInst &I) { result = DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa); - - DAG.AssignOrdering(t0.getNode(), SDNodeOrder); - DAG.AssignOrdering(t1.getNode(), SDNodeOrder); - DAG.AssignOrdering(t2.getNode(), SDNodeOrder); - DAG.AssignOrdering(t3.getNode(), SDNodeOrder); - DAG.AssignOrdering(t4.getNode(), SDNodeOrder); - DAG.AssignOrdering(t5.getNode(), SDNodeOrder); - DAG.AssignOrdering(t6.getNode(), SDNodeOrder); - DAG.AssignOrdering(Log2ofMantissa.getNode(), SDNodeOrder); - DAG.AssignOrdering(result.getNode(), SDNodeOrder); } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 // For floating-point precision of 18: // @@ -3568,27 +3284,12 @@ SelectionDAGBuilder::visitLog2(CallInst &I) { result = DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa); - - DAG.AssignOrdering(t0.getNode(), SDNodeOrder); - DAG.AssignOrdering(t1.getNode(), SDNodeOrder); - DAG.AssignOrdering(t2.getNode(), SDNodeOrder); - DAG.AssignOrdering(t3.getNode(), SDNodeOrder); - DAG.AssignOrdering(t4.getNode(), SDNodeOrder); - DAG.AssignOrdering(t5.getNode(), SDNodeOrder); - DAG.AssignOrdering(t6.getNode(), SDNodeOrder); - DAG.AssignOrdering(t7.getNode(), SDNodeOrder); - DAG.AssignOrdering(t8.getNode(), SDNodeOrder); - DAG.AssignOrdering(t9.getNode(), SDNodeOrder); - DAG.AssignOrdering(t10.getNode(), SDNodeOrder); - DAG.AssignOrdering(Log2ofMantissa.getNode(), SDNodeOrder); - DAG.AssignOrdering(result.getNode(), SDNodeOrder); } } else { // No special expansion. result = DAG.getNode(ISD::FLOG2, dl, getValue(I.getOperand(1)).getValueType(), getValue(I.getOperand(1))); - DAG.AssignOrdering(result.getNode(), SDNodeOrder); } setValue(&I, result); @@ -3606,15 +3307,11 @@ SelectionDAGBuilder::visitLog10(CallInst &I) { SDValue Op = getValue(I.getOperand(1)); SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); - DAG.AssignOrdering(Op1.getNode(), SDNodeOrder); - // Scale the exponent by log10(2) [0.30102999f]. SDValue Exp = GetExponent(DAG, Op1, TLI, dl, SDNodeOrder); SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, getF32Constant(DAG, 0x3e9a209a)); - DAG.AssignOrdering(LogOfExponent.getNode(), SDNodeOrder); - // Get the significand and build it into a floating-point number with // exponent of 1. SDValue X = GetSignificand(DAG, Op1, dl, SDNodeOrder); @@ -3637,12 +3334,6 @@ SelectionDAGBuilder::visitLog10(CallInst &I) { result = DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa); - - DAG.AssignOrdering(t0.getNode(), SDNodeOrder); - DAG.AssignOrdering(t1.getNode(), SDNodeOrder); - DAG.AssignOrdering(t2.getNode(), SDNodeOrder); - DAG.AssignOrdering(Log10ofMantissa.getNode(), SDNodeOrder); - DAG.AssignOrdering(result.getNode(), SDNodeOrder); } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { // For floating-point precision of 12: // @@ -3665,14 +3356,6 @@ SelectionDAGBuilder::visitLog10(CallInst &I) { result = DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa); - - DAG.AssignOrdering(t0.getNode(), SDNodeOrder); - DAG.AssignOrdering(t1.getNode(), SDNodeOrder); - DAG.AssignOrdering(t2.getNode(), SDNodeOrder); - DAG.AssignOrdering(t3.getNode(), SDNodeOrder); - DAG.AssignOrdering(t4.getNode(), SDNodeOrder); - DAG.AssignOrdering(Log10ofMantissa.getNode(), SDNodeOrder); - DAG.AssignOrdering(result.getNode(), SDNodeOrder); } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 // For floating-point precision of 18: // @@ -3703,25 +3386,12 @@ SelectionDAGBuilder::visitLog10(CallInst &I) { result = DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa); - - DAG.AssignOrdering(t0.getNode(), SDNodeOrder); - DAG.AssignOrdering(t1.getNode(), SDNodeOrder); - DAG.AssignOrdering(t2.getNode(), SDNodeOrder); - DAG.AssignOrdering(t3.getNode(), SDNodeOrder); - DAG.AssignOrdering(t4.getNode(), SDNodeOrder); - DAG.AssignOrdering(t5.getNode(), SDNodeOrder); - DAG.AssignOrdering(t6.getNode(), SDNodeOrder); - DAG.AssignOrdering(t7.getNode(), SDNodeOrder); - DAG.AssignOrdering(t8.getNode(), SDNodeOrder); - DAG.AssignOrdering(Log10ofMantissa.getNode(), SDNodeOrder); - DAG.AssignOrdering(result.getNode(), SDNodeOrder); } } else { // No special expansion. result = DAG.getNode(ISD::FLOG10, dl, getValue(I.getOperand(1)).getValueType(), getValue(I.getOperand(1))); - DAG.AssignOrdering(result.getNode(), SDNodeOrder); } setValue(&I, result); @@ -3740,8 +3410,6 @@ SelectionDAGBuilder::visitExp2(CallInst &I) { SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op); - DAG.AssignOrdering(IntegerPartOfX.getNode(), SDNodeOrder); - // FractionalPartOfX = x - (float)IntegerPartOfX; SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1); @@ -3750,10 +3418,6 @@ SelectionDAGBuilder::visitExp2(CallInst &I) { IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, DAG.getConstant(23, TLI.getPointerTy())); - DAG.AssignOrdering(t1.getNode(), SDNodeOrder); - DAG.AssignOrdering(X.getNode(), SDNodeOrder); - DAG.AssignOrdering(IntegerPartOfX.getNode(), SDNodeOrder); - if (LimitFloatPrecision <= 6) { // For floating-point precision of 6: // @@ -3775,14 +3439,6 @@ SelectionDAGBuilder::visitExp2(CallInst &I) { result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, TwoToFractionalPartOfX); - - DAG.AssignOrdering(t2.getNode(), SDNodeOrder); - DAG.AssignOrdering(t3.getNode(), SDNodeOrder); - DAG.AssignOrdering(t4.getNode(), SDNodeOrder); - DAG.AssignOrdering(t5.getNode(), SDNodeOrder); - DAG.AssignOrdering(t6.getNode(), SDNodeOrder); - DAG.AssignOrdering(TwoToFractionalPartOfX.getNode(), SDNodeOrder); - DAG.AssignOrdering(result.getNode(), SDNodeOrder); } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { // For floating-point precision of 12: // @@ -3808,16 +3464,6 @@ SelectionDAGBuilder::visitExp2(CallInst &I) { result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, TwoToFractionalPartOfX); - - DAG.AssignOrdering(t2.getNode(), SDNodeOrder); - DAG.AssignOrdering(t3.getNode(), SDNodeOrder); - DAG.AssignOrdering(t4.getNode(), SDNodeOrder); - DAG.AssignOrdering(t5.getNode(), SDNodeOrder); - DAG.AssignOrdering(t6.getNode(), SDNodeOrder); - DAG.AssignOrdering(t7.getNode(), SDNodeOrder); - DAG.AssignOrdering(t8.getNode(), SDNodeOrder); - DAG.AssignOrdering(TwoToFractionalPartOfX.getNode(), SDNodeOrder); - DAG.AssignOrdering(result.getNode(), SDNodeOrder); } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 // For floating-point precision of 18: // @@ -3854,29 +3500,12 @@ SelectionDAGBuilder::visitExp2(CallInst &I) { result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, TwoToFractionalPartOfX); - - DAG.AssignOrdering(t2.getNode(), SDNodeOrder); - DAG.AssignOrdering(t3.getNode(), SDNodeOrder); - DAG.AssignOrdering(t4.getNode(), SDNodeOrder); - DAG.AssignOrdering(t5.getNode(), SDNodeOrder); - DAG.AssignOrdering(t6.getNode(), SDNodeOrder); - DAG.AssignOrdering(t7.getNode(), SDNodeOrder); - DAG.AssignOrdering(t8.getNode(), SDNodeOrder); - DAG.AssignOrdering(t9.getNode(), SDNodeOrder); - DAG.AssignOrdering(t10.getNode(), SDNodeOrder); - DAG.AssignOrdering(t11.getNode(), SDNodeOrder); - DAG.AssignOrdering(t12.getNode(), SDNodeOrder); - DAG.AssignOrdering(t13.getNode(), SDNodeOrder); - DAG.AssignOrdering(t14.getNode(), SDNodeOrder); - DAG.AssignOrdering(TwoToFractionalPartOfX.getNode(), SDNodeOrder); - DAG.AssignOrdering(result.getNode(), SDNodeOrder); } } else { // No special expansion. result = DAG.getNode(ISD::FEXP2, dl, getValue(I.getOperand(1)).getValueType(), getValue(I.getOperand(1))); - DAG.AssignOrdering(result.getNode(), SDNodeOrder); } setValue(&I, result); @@ -3918,17 +3547,10 @@ SelectionDAGBuilder::visitPow(CallInst &I) { SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); - DAG.AssignOrdering(t0.getNode(), SDNodeOrder); - DAG.AssignOrdering(t1.getNode(), SDNodeOrder); - DAG.AssignOrdering(IntegerPartOfX.getNode(), SDNodeOrder); - DAG.AssignOrdering(X.getNode(), SDNodeOrder); - // IntegerPartOfX <<= 23; IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, DAG.getConstant(23, TLI.getPointerTy())); - DAG.AssignOrdering(IntegerPartOfX.getNode(), SDNodeOrder); - if (LimitFloatPrecision <= 6) { // For floating-point precision of 6: // @@ -3950,14 +3572,6 @@ SelectionDAGBuilder::visitPow(CallInst &I) { result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, TwoToFractionalPartOfX); - - DAG.AssignOrdering(t2.getNode(), SDNodeOrder); - DAG.AssignOrdering(t3.getNode(), SDNodeOrder); - DAG.AssignOrdering(t4.getNode(), SDNodeOrder); - DAG.AssignOrdering(t5.getNode(), SDNodeOrder); - DAG.AssignOrdering(t6.getNode(), SDNodeOrder); - DAG.AssignOrdering(TwoToFractionalPartOfX.getNode(), SDNodeOrder); - DAG.AssignOrdering(result.getNode(), SDNodeOrder); } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { // For floating-point precision of 12: // @@ -3983,16 +3597,6 @@ SelectionDAGBuilder::visitPow(CallInst &I) { result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, TwoToFractionalPartOfX); - - DAG.AssignOrdering(t2.getNode(), SDNodeOrder); - DAG.AssignOrdering(t3.getNode(), SDNodeOrder); - DAG.AssignOrdering(t4.getNode(), SDNodeOrder); - DAG.AssignOrdering(t5.getNode(), SDNodeOrder); - DAG.AssignOrdering(t6.getNode(), SDNodeOrder); - DAG.AssignOrdering(t7.getNode(), SDNodeOrder); - DAG.AssignOrdering(t8.getNode(), SDNodeOrder); - DAG.AssignOrdering(TwoToFractionalPartOfX.getNode(), SDNodeOrder); - DAG.AssignOrdering(result.getNode(), SDNodeOrder); } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 // For floating-point precision of 18: // @@ -4029,22 +3633,6 @@ SelectionDAGBuilder::visitPow(CallInst &I) { result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, TwoToFractionalPartOfX); - - DAG.AssignOrdering(t2.getNode(), SDNodeOrder); - DAG.AssignOrdering(t3.getNode(), SDNodeOrder); - DAG.AssignOrdering(t4.getNode(), SDNodeOrder); - DAG.AssignOrdering(t5.getNode(), SDNodeOrder); - DAG.AssignOrdering(t6.getNode(), SDNodeOrder); - DAG.AssignOrdering(t7.getNode(), SDNodeOrder); - DAG.AssignOrdering(t8.getNode(), SDNodeOrder); - DAG.AssignOrdering(t9.getNode(), SDNodeOrder); - DAG.AssignOrdering(t10.getNode(), SDNodeOrder); - DAG.AssignOrdering(t11.getNode(), SDNodeOrder); - DAG.AssignOrdering(t12.getNode(), SDNodeOrder); - DAG.AssignOrdering(t13.getNode(), SDNodeOrder); - DAG.AssignOrdering(t14.getNode(), SDNodeOrder); - DAG.AssignOrdering(TwoToFractionalPartOfX.getNode(), SDNodeOrder); - DAG.AssignOrdering(result.getNode(), SDNodeOrder); } } else { // No special expansion. @@ -4052,7 +3640,6 @@ SelectionDAGBuilder::visitPow(CallInst &I) { getValue(I.getOperand(1)).getValueType(), getValue(I.getOperand(1)), getValue(I.getOperand(2))); - DAG.AssignOrdering(result.getNode(), SDNodeOrder); } setValue(&I, result); @@ -4129,16 +3716,12 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { case Intrinsic::vaend: visitVAEnd(I); return 0; case Intrinsic::vacopy: visitVACopy(I); return 0; case Intrinsic::returnaddress: - Res = DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(), - getValue(I.getOperand(1))); - setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(), + getValue(I.getOperand(1)))); return 0; case Intrinsic::frameaddress: - Res = DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(), - getValue(I.getOperand(1))); - setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(), + getValue(I.getOperand(1)))); return 0; case Intrinsic::setjmp: return "_setjmp"+!TLI.usesUnderscoreSetJmp(); @@ -4149,10 +3732,8 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { SDValue Op2 = getValue(I.getOperand(2)); SDValue Op3 = getValue(I.getOperand(3)); unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue(); - Res = DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false, - I.getOperand(1), 0, I.getOperand(2), 0); - DAG.setRoot(Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false, + I.getOperand(1), 0, I.getOperand(2), 0)); return 0; } case Intrinsic::memset: { @@ -4160,10 +3741,8 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { SDValue Op2 = getValue(I.getOperand(2)); SDValue Op3 = getValue(I.getOperand(3)); unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue(); - Res = DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, - I.getOperand(1), 0); - DAG.setRoot(Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, + I.getOperand(1), 0)); return 0; } case Intrinsic::memmove: { @@ -4179,20 +3758,18 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { Size = C->getZExtValue(); if (AA->alias(I.getOperand(1), Size, I.getOperand(2), Size) == AliasAnalysis::NoAlias) { - Res = DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false, - I.getOperand(1), 0, I.getOperand(2), 0); - DAG.setRoot(Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false, + I.getOperand(1), 0, I.getOperand(2), 0)); return 0; } - Res = DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, - I.getOperand(1), 0, I.getOperand(2), 0); - DAG.setRoot(Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, + I.getOperand(1), 0, I.getOperand(2), 0)); return 0; } case Intrinsic::dbg_declare: { + // FIXME: currently, we get here only if OptLevel != CodeGenOpt::None. + // The real handling of this intrinsic is in FastISel. if (OptLevel != CodeGenOpt::None) // FIXME: Variable debug info is not supported here. return 0; @@ -4205,6 +3782,8 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { MDNode *Variable = DI.getVariable(); Value *Address = DI.getAddress(); + if (!Address) + return 0; if (BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) Address = BCI->getOperand(0); AllocaInst *AI = dyn_cast<AllocaInst>(Address); @@ -4222,6 +3801,39 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { MMI->setVariableDbgInfo(Variable, FI, Dbg); return 0; } + case Intrinsic::dbg_value: { + // FIXME: currently, we get here only if OptLevel != CodeGenOpt::None. + // The real handling of this intrinsic is in FastISel. + if (OptLevel != CodeGenOpt::None) + // FIXME: Variable debug info is not supported here. + return 0; + DwarfWriter *DW = DAG.getDwarfWriter(); + if (!DW) + return 0; + DbgValueInst &DI = cast<DbgValueInst>(I); + if (!DIDescriptor::ValidDebugInfo(DI.getVariable(), CodeGenOpt::None)) + return 0; + + MDNode *Variable = DI.getVariable(); + Value *V = DI.getValue(); + if (!V) + return 0; + if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) + V = BCI->getOperand(0); + AllocaInst *AI = dyn_cast<AllocaInst>(V); + // Don't handle byval struct arguments or VLAs, for example. + if (!AI) + return 0; + DenseMap<const AllocaInst*, int>::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + if (SI == FuncInfo.StaticAllocaMap.end()) + return 0; // VLAs. + int FI = SI->second; + if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) + if (MDNode *Dbg = DI.getMetadata("dbg")) + MMI->setVariableDbgInfo(Variable, FI, Dbg); + return 0; + } case Intrinsic::eh_exception: { // Insert the EXCEPTIONADDR instruction. assert(CurMBB->isLandingPad() &&"Call to eh.exception not in landing pad!"); @@ -4231,7 +3843,6 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { SDValue Op = DAG.getNode(ISD::EXCEPTIONADDR, dl, VTs, Ops, 1); setValue(&I, Op); DAG.setRoot(Op.getValue(1)); - DAG.AssignOrdering(Op.getNode(), SDNodeOrder); return 0; } @@ -4255,13 +3866,8 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { Ops[0] = getValue(I.getOperand(1)); Ops[1] = getRoot(); SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2); - DAG.setRoot(Op.getValue(1)); - - Res = DAG.getSExtOrTrunc(Op, dl, MVT::i32); - setValue(&I, Res); - DAG.AssignOrdering(Op.getNode(), SDNodeOrder); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getSExtOrTrunc(Op, dl, MVT::i32)); return 0; } @@ -4279,7 +3885,6 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { } setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); return 0; } @@ -4287,13 +3892,11 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { case Intrinsic::eh_return_i64: if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) { MMI->setCallsEHReturn(true); - Res = DAG.getNode(ISD::EH_RETURN, dl, - MVT::Other, - getControlRoot(), - getValue(I.getOperand(1)), - getValue(I.getOperand(2))); - DAG.setRoot(Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl, + MVT::Other, + getControlRoot(), + getValue(I.getOperand(1)), + getValue(I.getOperand(2)))); } else { setValue(&I, DAG.getConstant(0, TLI.getPointerTy())); } @@ -4316,15 +3919,20 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { SDValue FA = DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(), DAG.getConstant(0, TLI.getPointerTy())); - Res = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), - FA, Offset); - setValue(&I, Res); - DAG.AssignOrdering(CfaArg.getNode(), SDNodeOrder); - DAG.AssignOrdering(Offset.getNode(), SDNodeOrder); - DAG.AssignOrdering(FA.getNode(), SDNodeOrder); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), + FA, Offset)); + return 0; + } + case Intrinsic::eh_sjlj_callsite: { + MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); + ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1)); + assert(CI && "Non-constant call site value in eh.sjlj.callsite!"); + assert(MMI->getCurrentCallSite() == 0 && "Overlapping call sites!"); + + MMI->setCurrentCallSite(CI->getZExtValue()); return 0; } + case Intrinsic::convertff: case Intrinsic::convertfsi: case Intrinsic::convertfui: @@ -4355,35 +3963,26 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { getValue(I.getOperand(3)), Code); setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); return 0; } case Intrinsic::sqrt: - Res = DAG.getNode(ISD::FSQRT, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1))); - setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getNode(ISD::FSQRT, dl, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1)))); return 0; case Intrinsic::powi: - Res = ExpandPowI(dl, getValue(I.getOperand(1)), getValue(I.getOperand(2)), - DAG); - setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, ExpandPowI(dl, getValue(I.getOperand(1)), + getValue(I.getOperand(2)), DAG)); return 0; case Intrinsic::sin: - Res = DAG.getNode(ISD::FSIN, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1))); - setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getNode(ISD::FSIN, dl, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1)))); return 0; case Intrinsic::cos: - Res = DAG.getNode(ISD::FCOS, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1))); - setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getNode(ISD::FCOS, dl, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1)))); return 0; case Intrinsic::log: visitLog(I); @@ -4405,9 +4004,7 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { return 0; case Intrinsic::pcmarker: { SDValue Tmp = getValue(I.getOperand(1)); - Res = DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp); - DAG.setRoot(Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + DAG.setRoot(DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp)); return 0; } case Intrinsic::readcyclecounter: { @@ -4417,38 +4014,29 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { &Op, 1); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); return 0; } case Intrinsic::bswap: - Res = DAG.getNode(ISD::BSWAP, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1))); - setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getNode(ISD::BSWAP, dl, + getValue(I.getOperand(1)).getValueType(), + getValue(I.getOperand(1)))); return 0; case Intrinsic::cttz: { SDValue Arg = getValue(I.getOperand(1)); EVT Ty = Arg.getValueType(); - Res = DAG.getNode(ISD::CTTZ, dl, Ty, Arg); - setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getNode(ISD::CTTZ, dl, Ty, Arg)); return 0; } case Intrinsic::ctlz: { SDValue Arg = getValue(I.getOperand(1)); EVT Ty = Arg.getValueType(); - Res = DAG.getNode(ISD::CTLZ, dl, Ty, Arg); - setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getNode(ISD::CTLZ, dl, Ty, Arg)); return 0; } case Intrinsic::ctpop: { SDValue Arg = getValue(I.getOperand(1)); EVT Ty = Arg.getValueType(); - Res = DAG.getNode(ISD::CTPOP, dl, Ty, Arg); - setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getNode(ISD::CTPOP, dl, Ty, Arg)); return 0; } case Intrinsic::stacksave: { @@ -4457,14 +4045,11 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { DAG.getVTList(TLI.getPointerTy(), MVT::Other), &Op, 1); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); return 0; } case Intrinsic::stackrestore: { Res = getValue(I.getOperand(1)); - Res = DAG.getNode(ISD::STACKRESTORE, dl, MVT::Other, getRoot(), Res); - DAG.setRoot(Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, dl, MVT::Other, getRoot(), Res)); return 0; } case Intrinsic::stackprotector: { @@ -4484,10 +4069,9 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { // Store the stack protector onto the stack. Res = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN, PseudoSourceValue::getFixedStack(FI), - 0, true); + 0, true, false, 0); setValue(&I, Res); DAG.setRoot(Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); return 0; } case Intrinsic::objectsize: { @@ -4505,7 +4089,6 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { Res = DAG.getConstant(0, Ty); setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); return 0; } case Intrinsic::var_annotation: @@ -4529,7 +4112,6 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { setValue(&I, Res); DAG.setRoot(Res.getValue(1)); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); return 0; } case Intrinsic::gcroot: @@ -4546,14 +4128,10 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!"); return 0; case Intrinsic::flt_rounds: - Res = DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32); - setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32)); return 0; case Intrinsic::trap: - Res = DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()); - DAG.setRoot(Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot())); return 0; case Intrinsic::uadd_with_overflow: return implVisitAluOverflow(I, ISD::UADDO); @@ -4574,9 +4152,7 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { Ops[1] = getValue(I.getOperand(1)); Ops[2] = getValue(I.getOperand(2)); Ops[3] = getValue(I.getOperand(3)); - Res = DAG.getNode(ISD::PREFETCH, dl, MVT::Other, &Ops[0], 4); - DAG.setRoot(Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + DAG.setRoot(DAG.getNode(ISD::PREFETCH, dl, MVT::Other, &Ops[0], 4)); return 0; } @@ -4586,9 +4162,7 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { for (int x = 1; x < 6; ++x) Ops[x] = getValue(I.getOperand(x)); - Res = DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, &Ops[0], 6); - DAG.setRoot(Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + DAG.setRoot(DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, &Ops[0], 6)); return 0; } case Intrinsic::atomic_cmp_swap: { @@ -4603,7 +4177,6 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { I.getOperand(1)); setValue(&I, L); DAG.setRoot(L.getValue(1)); - DAG.AssignOrdering(L.getNode(), SDNodeOrder); return 0; } case Intrinsic::atomic_load_add: @@ -4632,9 +4205,7 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { case Intrinsic::invariant_start: case Intrinsic::lifetime_start: // Discard region information. - Res = DAG.getUNDEF(TLI.getPointerTy()); - setValue(&I, Res); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); + setValue(&I, DAG.getUNDEF(TLI.getPointerTy())); return 0; case Intrinsic::invariant_end: case Intrinsic::lifetime_end: @@ -4649,19 +4220,25 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { /// between it and the return. /// /// This function only tests target-independent requirements. -/// For target-dependent requirements, a target should override -/// TargetLowering::IsEligibleForTailCallOptimization. -/// static bool -isInTailCallPosition(const Instruction *I, Attributes CalleeRetAttr, +isInTailCallPosition(CallSite CS, Attributes CalleeRetAttr, const TargetLowering &TLI) { + const Instruction *I = CS.getInstruction(); const BasicBlock *ExitBB = I->getParent(); const TerminatorInst *Term = ExitBB->getTerminator(); const ReturnInst *Ret = dyn_cast<ReturnInst>(Term); const Function *F = ExitBB->getParent(); - // The block must end in a return statement or an unreachable. - if (!Ret && !isa<UnreachableInst>(Term)) return false; + // The block must end in a return statement or unreachable. + // + // FIXME: Decline tailcall if it's not guaranteed and if the block ends in + // an unreachable, for now. The way tailcall optimization is currently + // implemented means it will add an epilogue followed by a jump. That is + // not profitable. Also, if the callee is a special function (e.g. + // longjmp on x86), it can end up causing miscompilation that has not + // been fully understood. + if (!Ret && + (!GuaranteedTailCallOpt || !isa<UnreachableInst>(Term))) return false; // If I will have a chain, make sure no other instruction that will have a // chain interposes between I and the return. @@ -4690,6 +4267,10 @@ isInTailCallPosition(const Instruction *I, Attributes CalleeRetAttr, if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias) return false; + // It's not safe to eliminate the sign / zero extension of the return value. + if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt)) + return false; + // Otherwise, make sure the unmodified return value of I is the return value. for (const Instruction *U = dyn_cast<Instruction>(Ret->getOperand(0)); ; U = dyn_cast<Instruction>(U->getOperand(0))) { @@ -4785,6 +4366,15 @@ void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee, // used to detect deletion of the invoke via the MachineModuleInfo. BeginLabel = MMI->NextLabelID(); + // For SjLj, keep track of which landing pads go with which invokes + // so as to maintain the ordering of pads in the LSDA. + unsigned CallSiteIndex = MMI->getCurrentCallSite(); + if (CallSiteIndex) { + MMI->setCallSiteBeginLabel(BeginLabel, CallSiteIndex); + // Now that the call site is handled, stop tracking it. + MMI->setCurrentCallSite(0); + } + // Both PendingLoads and PendingExports must be flushed here; // this call might not return. (void)getRoot(); @@ -4795,9 +4385,7 @@ void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee, // Check if target-independent constraints permit a tail call here. // Target-dependent constraints are checked within TLI.LowerCallTo. if (isTailCall && - !isInTailCallPosition(CS.getInstruction(), - CS.getAttributes().getRetAttributes(), - TLI)) + !isInTailCallPosition(CS, CS.getAttributes().getRetAttributes(), TLI)) isTailCall = false; std::pair<SDValue,SDValue> Result = @@ -4815,7 +4403,6 @@ void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee, "Null value expected with tail call!"); if (Result.first.getNode()) { setValue(CS.getInstruction(), Result.first); - DAG.AssignOrdering(Result.first.getNode(), SDNodeOrder); } else if (!CanLowerReturn && Result.second.getNode()) { // The instruction result is the result of loading from the // hidden sret parameter. @@ -4834,7 +4421,7 @@ void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee, DemoteStackSlot, DAG.getConstant(Offsets[i], PtrVT)); SDValue L = DAG.getLoad(OutVTs[i], getCurDebugLoc(), Result.second, - Add, NULL, Offsets[i], false, 1); + Add, NULL, Offsets[i], false, false, 1); Values[i] = L; Chains[i] = L.getValue(1); } @@ -4860,27 +4447,22 @@ void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee, getCopyFromParts(DAG, getCurDebugLoc(), SDNodeOrder, &Values[CurReg], NumRegs, RegisterVT, VT, AssertOp); ReturnValues.push_back(ReturnValue); - DAG.AssignOrdering(ReturnValue.getNode(), SDNodeOrder); CurReg += NumRegs; } - SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), - DAG.getVTList(&RetTys[0], RetTys.size()), - &ReturnValues[0], ReturnValues.size()); - setValue(CS.getInstruction(), Res); + setValue(CS.getInstruction(), + DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + DAG.getVTList(&RetTys[0], RetTys.size()), + &ReturnValues[0], ReturnValues.size())); - DAG.AssignOrdering(Chain.getNode(), SDNodeOrder); - DAG.AssignOrdering(Res.getNode(), SDNodeOrder); } // As a special case, a null chain means that a tail call has been emitted and // the DAG root is already updated. - if (Result.second.getNode()) { + if (Result.second.getNode()) DAG.setRoot(Result.second); - DAG.AssignOrdering(Result.second.getNode(), SDNodeOrder); - } else { + else HasTailCall = true; - } if (LandingPad && MMI) { // Insert a label at the end of the invoke call to mark the try range. This @@ -4941,7 +4523,8 @@ static SDValue getMemCmpLoad(Value *PtrVal, MVT LoadVT, const Type *LoadTy, SDValue Ptr = Builder.getValue(PtrVal); SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root, Ptr, PtrVal /*SrcValue*/, 0/*SVOffset*/, - false /*volatile*/, 1 /* align=1 */); + false /*volatile*/, + false /*nontemporal*/, 1 /* align=1 */); if (!ConstantMemory) Builder.PendingLoads.push_back(LoadVal.getValue(1)); @@ -5054,7 +4637,7 @@ void SelectionDAGBuilder::visitCall(CallInst &I) { StringRef Name = F->getName(); if (Name == "copysign" || Name == "copysignf") { if (I.getNumOperands() == 3 && // Basic sanity checks. - I.getOperand(1)->getType()->isFloatingPoint() && + I.getOperand(1)->getType()->isFloatingPointTy() && I.getType() == I.getOperand(1)->getType() && I.getType() == I.getOperand(2)->getType()) { SDValue LHS = getValue(I.getOperand(1)); @@ -5065,7 +4648,7 @@ void SelectionDAGBuilder::visitCall(CallInst &I) { } } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") { if (I.getNumOperands() == 2 && // Basic sanity checks. - I.getOperand(1)->getType()->isFloatingPoint() && + I.getOperand(1)->getType()->isFloatingPointTy() && I.getType() == I.getOperand(1)->getType()) { SDValue Tmp = getValue(I.getOperand(1)); setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(), @@ -5074,7 +4657,7 @@ void SelectionDAGBuilder::visitCall(CallInst &I) { } } else if (Name == "sin" || Name == "sinf" || Name == "sinl") { if (I.getNumOperands() == 2 && // Basic sanity checks. - I.getOperand(1)->getType()->isFloatingPoint() && + I.getOperand(1)->getType()->isFloatingPointTy() && I.getType() == I.getOperand(1)->getType() && I.onlyReadsMemory()) { SDValue Tmp = getValue(I.getOperand(1)); @@ -5084,7 +4667,7 @@ void SelectionDAGBuilder::visitCall(CallInst &I) { } } else if (Name == "cos" || Name == "cosf" || Name == "cosl") { if (I.getNumOperands() == 2 && // Basic sanity checks. - I.getOperand(1)->getType()->isFloatingPoint() && + I.getOperand(1)->getType()->isFloatingPointTy() && I.getType() == I.getOperand(1)->getType() && I.onlyReadsMemory()) { SDValue Tmp = getValue(I.getOperand(1)); @@ -5094,7 +4677,7 @@ void SelectionDAGBuilder::visitCall(CallInst &I) { } } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") { if (I.getNumOperands() == 2 && // Basic sanity checks. - I.getOperand(1)->getType()->isFloatingPoint() && + I.getOperand(1)->getType()->isFloatingPointTy() && I.getType() == I.getOperand(1)->getType() && I.onlyReadsMemory()) { SDValue Tmp = getValue(I.getOperand(1)); @@ -5120,9 +4703,7 @@ void SelectionDAGBuilder::visitCall(CallInst &I) { // Check if we can potentially perform a tail call. More detailed checking is // be done within LowerCallTo, after more information about the call is known. - bool isTailCall = PerformTailCallOpt && I.isTailCall(); - - LowerCallTo(&I, Callee, isTailCall); + LowerCallTo(&I, Callee, I.isTailCall()); } /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from @@ -5152,7 +4733,6 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl, } Chain = P.getValue(1); - DAG.AssignOrdering(P.getNode(), Order); // If the source register was virtual and if we know something about it, // add an assert node. @@ -5188,11 +4768,9 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl, else if (NumZeroBits >= RegSize-32) isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32 - if (FromVT != MVT::Other) { + if (FromVT != MVT::Other) P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl, RegisterVT, P, DAG.getValueType(FromVT)); - DAG.AssignOrdering(P.getNode(), Order); - } } } @@ -5201,16 +4779,13 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl, Values[Value] = getCopyFromParts(DAG, dl, Order, Parts.begin(), NumRegs, RegisterVT, ValueVT); - DAG.AssignOrdering(Values[Value].getNode(), Order); Part += NumRegs; Parts.clear(); } - SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl, - DAG.getVTList(&ValueVTs[0], ValueVTs.size()), - &Values[0], ValueVTs.size()); - DAG.AssignOrdering(Res.getNode(), Order); - return Res; + return DAG.getNode(ISD::MERGE_VALUES, dl, + DAG.getVTList(&ValueVTs[0], ValueVTs.size()), + &Values[0], ValueVTs.size()); } /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the @@ -5246,7 +4821,6 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, } Chains[i] = Part.getValue(0); - DAG.AssignOrdering(Part.getNode(), Order); } if (NumRegs == 1 || Flag) @@ -5263,8 +4837,6 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, Chain = Chains[NumRegs-1]; else Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs); - - DAG.AssignOrdering(Chain.getNode(), Order); } /// AddInlineAsmOperands - Add this value to the specified inlineasm node @@ -5281,16 +4853,12 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, SDValue Res = DAG.getTargetConstant(Flag, MVT::i32); Ops.push_back(Res); - DAG.AssignOrdering(Res.getNode(), Order); - for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { unsigned NumRegs = TLI->getNumRegisters(*DAG.getContext(), ValueVTs[Value]); EVT RegisterVT = RegVTs[Value]; for (unsigned i = 0; i != NumRegs; ++i) { assert(Reg < Regs.size() && "Mismatch in # registers expected"); - SDValue Res = DAG.getRegister(Regs[Reg++], RegisterVT); - Ops.push_back(Res); - DAG.AssignOrdering(Res.getNode(), Order); + Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT)); } } } @@ -5309,7 +4877,7 @@ isAllocatableRegister(unsigned Reg, MachineFunction &MF, EVT ThisVT = MVT::Other; const TargetRegisterClass *RC = *RCI; - // If none of the the value types for this register class are valid, we + // If none of the value types for this register class are valid, we // can't use it. For example, 64-bit reg classes on 32-bit targets. for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); I != E; ++I) { @@ -5509,8 +5077,6 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, RegVT, OpInfo.CallOperand); OpInfo.ConstraintVT = RegVT; } - - DAG.AssignOrdering(OpInfo.CallOperand.getNode(), SDNodeOrder); } NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT); @@ -5776,7 +5342,8 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); Chain = DAG.getStore(Chain, getCurDebugLoc(), - OpInfo.CallOperand, StackSlot, NULL, 0); + OpInfo.CallOperand, StackSlot, NULL, 0, + false, false, 0); OpInfo.CallOperand = StackSlot; } @@ -5972,7 +5539,8 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { "Don't know how to handle indirect register inputs yet!"); // Copy the input into the appropriate registers. - if (OpInfo.AssignedRegs.Regs.empty()) { + if (OpInfo.AssignedRegs.Regs.empty() || + !OpInfo.AssignedRegs.areValueTypesLegal()) { llvm_report_error("Couldn't allocate input reg for" " constraint '"+ OpInfo.ConstraintCode +"'!"); } @@ -6061,7 +5629,8 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { SDValue Val = DAG.getStore(Chain, getCurDebugLoc(), StoresToEmit[i].first, getValue(StoresToEmit[i].second), - StoresToEmit[i].second, 0); + StoresToEmit[i].second, 0, + false, false, 0); OutChains.push_back(Val); } @@ -6116,9 +5685,6 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, SDValue Callee, ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl, unsigned Order) { - assert((!isTailCall || PerformTailCallOpt) && - "isTailCall set when tail-call optimizations are disabled!"); - // Handle all of the outgoing arguments. SmallVector<ISD::OutputArg, 32> Outs; for (unsigned i = 0, e = Args.size(); i != e; ++i) { @@ -6207,12 +5773,6 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, } } - // Check if target-dependent constraints permit a tail call here. - // Target-independent constraints should be checked by the caller. - if (isTailCall && - !IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, Ins, DAG)) - isTailCall = false; - SmallVector<SDValue, 4> InVals; Chain = LowerCall(Chain, Callee, CallConv, isVarArg, isTailCall, Outs, Ins, dl, DAG, InVals); @@ -6231,8 +5791,6 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, "LowerCall emitted a value with the wrong type!"); }); - DAG.AssignOrdering(Chain.getNode(), Order); - // For a tail call, the return value is merely live-out and there aren't // any nodes in the DAG representing it. Return a special value to // indicate that a tail call has been emitted and no more Instructions @@ -6256,11 +5814,9 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, EVT RegisterVT = getRegisterType(RetTy->getContext(), VT); unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT); - SDValue ReturnValue = - getCopyFromParts(DAG, dl, Order, &InVals[CurReg], NumRegs, - RegisterVT, VT, AssertOp); - ReturnValues.push_back(ReturnValue); - DAG.AssignOrdering(ReturnValue.getNode(), Order); + ReturnValues.push_back(getCopyFromParts(DAG, dl, Order, &InVals[CurReg], + NumRegs, RegisterVT, VT, + AssertOp)); CurReg += NumRegs; } @@ -6273,7 +5829,6 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(&RetTys[0], RetTys.size()), &ReturnValues[0], ReturnValues.size()); - DAG.AssignOrdering(Res.getNode(), Order); return std::make_pair(Res, Chain); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index db656e3..bc4b33d 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -342,6 +342,11 @@ public: void CopyValueToVirtualRegister(Value *V, unsigned Reg); + /// AssignOrderingToNode - Assign an ordering to the node. The order is gotten + /// from how the code appeared in the source. The ordering is used by the + /// scheduler to effectively turn off scheduling. + void AssignOrderingToNode(const SDNode *Node); + void visit(Instruction &I); void visit(unsigned Opcode, User &I); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 2bec964..eead526 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -457,6 +457,21 @@ public: }; } +/// TrivialTruncElim - Eliminate some trivial nops that can result from +/// ShrinkDemandedOps: (trunc (ext n)) -> n. +static bool TrivialTruncElim(SDValue Op, + TargetLowering::TargetLoweringOpt &TLO) { + SDValue N0 = Op.getOperand(0); + EVT VT = Op.getValueType(); + if ((N0.getOpcode() == ISD::ZERO_EXTEND || + N0.getOpcode() == ISD::SIGN_EXTEND || + N0.getOpcode() == ISD::ANY_EXTEND) && + N0.getOperand(0).getValueType() == VT) { + return TLO.CombineTo(Op, N0.getOperand(0)); + } + return false; +} + /// ShrinkDemandedOps - A late transformation pass that shrink expressions /// using TargetLowering::TargetLoweringOpt::ShrinkDemandedOp. It converts /// x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. @@ -489,7 +504,9 @@ void SelectionDAGISel::ShrinkDemandedOps() { APInt Demanded = APInt::getAllOnesValue(BitWidth); APInt KnownZero, KnownOne; if (TLI.SimplifyDemandedBits(SDValue(N, 0), Demanded, - KnownZero, KnownOne, TLO)) { + KnownZero, KnownOne, TLO) || + (N->getOpcode() == ISD::TRUNCATE && + TrivialTruncElim(SDValue(N, 0), TLO))) { // Revisit the node. Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), N), Worklist.end()); @@ -801,7 +818,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, // landing pad can thus be detected via the MachineModuleInfo. unsigned LabelID = MMI->addLandingPad(BB); - const TargetInstrDesc &II = TII.get(TargetInstrInfo::EH_LABEL); + const TargetInstrDesc &II = TII.get(TargetOpcode::EH_LABEL); BuildMI(BB, SDB->getCurDebugLoc(), II).addImm(LabelID); // Mark exception register as live in. @@ -953,7 +970,7 @@ SelectionDAGISel::FinishBasicBlock() { SDB->BitTestCases.empty()) { for (unsigned i = 0, e = SDB->PHINodesToUpdate.size(); i != e; ++i) { MachineInstr *PHI = SDB->PHINodesToUpdate[i].first; - assert(PHI->getOpcode() == TargetInstrInfo::PHI && + assert(PHI->isPHI() && "This is not a machine PHI node that we are updating!"); PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[i].second, false)); @@ -1000,7 +1017,7 @@ SelectionDAGISel::FinishBasicBlock() { for (unsigned pi = 0, pe = SDB->PHINodesToUpdate.size(); pi != pe; ++pi) { MachineInstr *PHI = SDB->PHINodesToUpdate[pi].first; MachineBasicBlock *PHIBB = PHI->getParent(); - assert(PHI->getOpcode() == TargetInstrInfo::PHI && + assert(PHI->isPHI() && "This is not a machine PHI node that we are updating!"); // This is "default" BB. We have two jumps to it. From "header" BB and // from last "case" BB. @@ -1056,7 +1073,7 @@ SelectionDAGISel::FinishBasicBlock() { for (unsigned pi = 0, pe = SDB->PHINodesToUpdate.size(); pi != pe; ++pi) { MachineInstr *PHI = SDB->PHINodesToUpdate[pi].first; MachineBasicBlock *PHIBB = PHI->getParent(); - assert(PHI->getOpcode() == TargetInstrInfo::PHI && + assert(PHI->isPHI() && "This is not a machine PHI node that we are updating!"); // "default" BB. We can go there only from header BB. if (PHIBB == SDB->JTCases[i].second.Default) { @@ -1079,7 +1096,7 @@ SelectionDAGISel::FinishBasicBlock() { // need to update PHI nodes in that block. for (unsigned i = 0, e = SDB->PHINodesToUpdate.size(); i != e; ++i) { MachineInstr *PHI = SDB->PHINodesToUpdate[i].first; - assert(PHI->getOpcode() == TargetInstrInfo::PHI && + assert(PHI->isPHI() && "This is not a machine PHI node that we are updating!"); if (BB->isSuccessor(PHI->getParent())) { PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[i].second, @@ -1116,7 +1133,7 @@ SelectionDAGISel::FinishBasicBlock() { // BB may have been removed from the CFG if a branch was constant folded. if (ThisBB->isSuccessor(BB)) { for (MachineBasicBlock::iterator Phi = BB->begin(); - Phi != BB->end() && Phi->getOpcode() == TargetInstrInfo::PHI; + Phi != BB->end() && Phi->isPHI(); ++Phi) { // This value for this PHI node is recorded in PHINodesToUpdate. for (unsigned pn = 0; ; ++pn) { @@ -1324,8 +1341,7 @@ static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, /// isNonImmUse - Start searching from Root up the DAG to check is Def can /// be reached. Return true if that's the case. However, ignore direct uses /// by ImmedUse (which would be U in the example illustrated in -/// IsLegalAndProfitableToFold) and by Root (which can happen in the store -/// case). +/// IsLegalToFold) and by Root (which can happen in the store case). /// FIXME: to be really generic, we should allow direct use by any node /// that is being folded. But realisticly since we only fold loads which /// have one non-chain use, we only need to watch out for load/op/store @@ -1336,11 +1352,17 @@ static inline bool isNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse) { return findNonImmUse(Root, Def, ImmedUse, Root, Visited); } -/// IsLegalAndProfitableToFold - Returns true if the specific operand node N of -/// U can be folded during instruction selection that starts at Root and -/// folding N is profitable. -bool SelectionDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, - SDNode *Root) const { +/// IsProfitableToFold - Returns true if it's profitable to fold the specific +/// operand node N of U during instruction selection that starts at Root. +bool SelectionDAGISel::IsProfitableToFold(SDValue N, SDNode *U, + SDNode *Root) const { + if (OptLevel == CodeGenOpt::None) return false; + return N.hasOneUse(); +} + +/// IsLegalToFold - Returns true if the specific operand node N of +/// U can be folded during instruction selection that starts at Root. +bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root) const { if (OptLevel == CodeGenOpt::None) return false; // If Root use can somehow reach N through a path that that doesn't contain @@ -1394,7 +1416,7 @@ bool SelectionDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, VT = Root->getValueType(Root->getNumValues()-1); } - return !isNonImmUse(Root, N, U); + return !isNonImmUse(Root, N.getNode(), U); } SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) { @@ -1410,15 +1432,14 @@ SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) { } SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) { - return CurDAG->SelectNodeTo(N, TargetInstrInfo::IMPLICIT_DEF, - N->getValueType(0)); + return CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF,N->getValueType(0)); } SDNode *SelectionDAGISel::Select_EH_LABEL(SDNode *N) { SDValue Chain = N->getOperand(0); unsigned C = cast<LabelSDNode>(N)->getLabelID(); SDValue Tmp = CurDAG->getTargetConstant(C, MVT::i32); - return CurDAG->SelectNodeTo(N, TargetInstrInfo::EH_LABEL, + return CurDAG->SelectNodeTo(N, TargetOpcode::EH_LABEL, MVT::Other, Tmp, Chain); } diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 81c51c4..e88af4f 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -13,6 +13,7 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" @@ -21,6 +22,8 @@ #include "llvm/GlobalVariable.h" #include "llvm/DerivedTypes.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/ErrorHandling.h" @@ -507,7 +510,6 @@ TargetLowering::TargetLowering(TargetMachine &tm,TargetLoweringObjectFile *tlof) setOperationAction(ISD::TRAP, MVT::Other, Expand); IsLittleEndian = TD->isLittleEndian(); - UsesGlobalOffsetTable = false; ShiftAmountTy = PointerTy = MVT::getIntegerVT(8*TD->getPointerSize()); memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*)); memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray)); @@ -538,6 +540,24 @@ TargetLowering::~TargetLowering() { delete &TLOF; } +/// canOpTrap - Returns true if the operation can trap for the value type. +/// VT must be a legal type. +bool TargetLowering::canOpTrap(unsigned Op, EVT VT) const { + assert(isTypeLegal(VT)); + switch (Op) { + default: + return false; + case ISD::FDIV: + case ISD::FREM: + case ISD::SDIV: + case ISD::UDIV: + case ISD::SREM: + case ISD::UREM: + return true; + } +} + + static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, unsigned &NumIntermediates, EVT &RegisterVT, @@ -682,7 +702,7 @@ void TargetLowering::computeRegisterProperties() { for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { EVT SVT = (MVT::SimpleValueType)nVT; if (isTypeLegal(SVT) && SVT.getVectorElementType() == EltVT && - SVT.getVectorNumElements() > NElts) { + SVT.getVectorNumElements() > NElts && NElts != 1) { TransformToType[i] = SVT; ValueTypeActions.setTypeAction(VT, Promote); IsLegalWiderType = true; @@ -793,13 +813,40 @@ unsigned TargetLowering::getByValTypeAlignment(const Type *Ty) const { return TD->getCallFrameTypeAlignment(Ty); } +/// getJumpTableEncoding - Return the entry encoding for a jump table in the +/// current function. The returned value is a member of the +/// MachineJumpTableInfo::JTEntryKind enum. +unsigned TargetLowering::getJumpTableEncoding() const { + // In non-pic modes, just use the address of a block. + if (getTargetMachine().getRelocationModel() != Reloc::PIC_) + return MachineJumpTableInfo::EK_BlockAddress; + + // In PIC mode, if the target supports a GPRel32 directive, use it. + if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != 0) + return MachineJumpTableInfo::EK_GPRel32BlockAddress; + + // Otherwise, use a label difference. + return MachineJumpTableInfo::EK_LabelDifference32; +} + SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const { - if (usesGlobalOffsetTable()) + // If our PIC model is GP relative, use the global offset table as the base. + if (getJumpTableEncoding() == MachineJumpTableInfo::EK_GPRel32BlockAddress) return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy()); return Table; } +/// getPICJumpTableRelocBaseExpr - This returns the relocation base for the +/// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an +/// MCExpr. +const MCExpr * +TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF, + unsigned JTI,MCContext &Ctx) const{ + // The normal PIC reloc base is the label at the start of the jump table. + return MCSymbolRefExpr::Create(MF->getJTISymbol(JTI, Ctx), Ctx); +} + bool TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // Assume that everything is safe in static mode. @@ -1669,7 +1716,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr, Lod->getSrcValue(), Lod->getSrcValueOffset() + bestOffset, - false, NewAlign); + false, false, NewAlign); return DAG.getSetCC(dl, VT, DAG.getNode(ISD::AND, dl, newVT, NewLoad, DAG.getConstant(bestMask.trunc(bestWidth), @@ -2337,7 +2384,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint, E = RI->regclass_end(); RCI != E; ++RCI) { const TargetRegisterClass *RC = *RCI; - // If none of the the value types for this register class are valid, we + // If none of the value types for this register class are valid, we // can't use it. For example, 64-bit reg classes on 32-bit targets. bool isLegal = false; for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp index 27d429b..e7b0cff 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -197,7 +197,7 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start; // We are about to delete CopyMI, so need to remove it as the 'instruction - // that defines this value #'. Update the the valnum with the new defining + // that defines this value #'. Update the valnum with the new defining // instruction #. BValNo->def = FillerStart; BValNo->setCopy(0); @@ -375,8 +375,9 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, // If some of the uses of IntA.reg is already coalesced away, return false. // It's not possible to determine whether it's safe to perform the coalescing. - for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg), - UE = mri_->use_end(); UI != UE; ++UI) { + for (MachineRegisterInfo::use_nodbg_iterator UI = + mri_->use_nodbg_begin(IntA.reg), + UE = mri_->use_nodbg_end(); UI != UE; ++UI) { MachineInstr *UseMI = &*UI; SlotIndex UseIdx = li_->getInstructionIndex(UseMI); LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); @@ -430,6 +431,12 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, ++UI; if (JoinedCopies.count(UseMI)) continue; + if (UseMI->isDebugValue()) { + // FIXME These don't have an instruction index. Not clear we have enough + // info to decide whether to do this replacement or not. For now do it. + UseMO.setReg(NewReg); + continue; + } SlotIndex UseIdx = li_->getInstructionIndex(UseMI).getUseIndex(); LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); if (ULR == IntA.end() || ULR->valno != AValNo) @@ -659,7 +666,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, return false; if (TID.getNumDefs() != 1) return false; - if (DefMI->getOpcode() != TargetInstrInfo::IMPLICIT_DEF) { + if (!DefMI->isImplicitDef()) { // Make sure the copy destination register class fits the instruction // definition register class. The mismatch can happen as a result of earlier // extract_subreg, insert_subreg, subreg_to_reg coalescing. @@ -764,11 +771,16 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, SubIdx = 0; } + // Copy the register use-list before traversing it. We may be adding operands + // and invalidating pointers. + SmallVector<std::pair<MachineInstr*, unsigned>, 32> reglist; for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg), - E = mri_->reg_end(); I != E; ) { - MachineOperand &O = I.getOperand(); - MachineInstr *UseMI = &*I; - ++I; + E = mri_->reg_end(); I != E; ++I) + reglist.push_back(std::make_pair(&*I, I.getOperandNo())); + + for (unsigned N=0; N != reglist.size(); ++N) { + MachineInstr *UseMI = reglist[N].first; + MachineOperand &O = UseMI->getOperand(reglist[N].second); unsigned OldSubIdx = O.getSubReg(); if (DstIsPhys) { unsigned UseDstReg = DstReg; @@ -789,6 +801,19 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, O.setReg(UseDstReg); O.setSubReg(0); + if (OldSubIdx) { + // Def and kill of subregister of a virtual register actually defs and + // kills the whole register. Add imp-defs and imp-kills as needed. + if (O.isDef()) { + if(O.isDead()) + UseMI->addRegisterDead(DstReg, tri_, true); + else + UseMI->addRegisterDefined(DstReg, tri_); + } else if (!O.isUndef() && + (O.isKill() || + UseMI->isRegTiedToDefOperand(&O-&UseMI->getOperand(0)))) + UseMI->addRegisterKilled(DstReg, tri_, true); + } continue; } @@ -1029,8 +1054,9 @@ SimpleRegisterCoalescing::isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI, unsigned Threshold = allocatableRCRegs_[RC].count() * 2; unsigned Length = li_->getApproximateInstructionCount(DstInt); if (Length > Threshold && - (((float)std::distance(mri_->use_begin(DstInt.reg), - mri_->use_end()) / Length) < (1.0 / Threshold))) + (((float)std::distance(mri_->use_nodbg_begin(DstInt.reg), + mri_->use_nodbg_end()) / Length) < + (1.0 / Threshold))) return false; // If the virtual register live interval extends into a loop, turn down @@ -1079,15 +1105,16 @@ SimpleRegisterCoalescing::isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI, MachineBasicBlock *CopyMBB, LiveInterval &DstInt, LiveInterval &SrcInt) { - // If the virtual register live interval is long but it has low use desity, + // If the virtual register live interval is long but it has low use density, // do not join them, instead mark the physical register as its allocation // preference. const TargetRegisterClass *RC = mri_->getRegClass(SrcInt.reg); unsigned Threshold = allocatableRCRegs_[RC].count() * 2; unsigned Length = li_->getApproximateInstructionCount(SrcInt); if (Length > Threshold && - (((float)std::distance(mri_->use_begin(SrcInt.reg), - mri_->use_end()) / Length) < (1.0 / Threshold))) + (((float)std::distance(mri_->use_nodbg_begin(SrcInt.reg), + mri_->use_nodbg_end()) / Length) < + (1.0 / Threshold))) return false; if (SrcInt.empty()) @@ -1139,12 +1166,14 @@ SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned LargeReg, LiveInterval &SmallInt = li_->getInterval(SmallReg); unsigned LargeSize = li_->getApproximateInstructionCount(LargeInt); unsigned SmallSize = li_->getApproximateInstructionCount(SmallInt); - if (SmallSize > Threshold || LargeSize > Threshold) - if ((float)std::distance(mri_->use_begin(SmallReg), - mri_->use_end()) / SmallSize < - (float)std::distance(mri_->use_begin(LargeReg), - mri_->use_end()) / LargeSize) + if (LargeSize > Threshold) { + unsigned SmallUses = std::distance(mri_->use_nodbg_begin(SmallReg), + mri_->use_nodbg_end()); + unsigned LargeUses = std::distance(mri_->use_nodbg_begin(LargeReg), + mri_->use_nodbg_end()); + if (SmallUses*LargeSize < LargeUses*SmallSize) return false; + } return true; } @@ -1164,13 +1193,15 @@ SimpleRegisterCoalescing::HasIncompatibleSubRegDefUse(MachineInstr *CopyMI, for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(VirtReg), E = mri_->reg_end(); I != E; ++I) { MachineOperand &O = I.getOperand(); + if (O.isDebug()) + continue; MachineInstr *MI = &*I; if (MI == CopyMI || JoinedCopies.count(MI)) continue; unsigned SubIdx = O.getSubReg(); if (SubIdx && !tri_->getSubReg(PhysReg, SubIdx)) return true; - if (MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) { + if (MI->isExtractSubreg()) { SubIdx = MI->getOperand(2).getImm(); if (O.isUse() && !tri_->getSubReg(PhysReg, SubIdx)) return true; @@ -1184,8 +1215,7 @@ SimpleRegisterCoalescing::HasIncompatibleSubRegDefUse(MachineInstr *CopyMI, return true; } } - if (MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG || - MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) { + if (MI->isInsertSubreg() || MI->isSubregToReg()) { SubIdx = MI->getOperand(3).getImm(); if (VirtReg == MI->getOperand(0).getReg()) { if (!tri_->getSubReg(PhysReg, SubIdx)) @@ -1296,9 +1326,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { DEBUG(dbgs() << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI); unsigned SrcReg, DstReg, SrcSubIdx = 0, DstSubIdx = 0; - bool isExtSubReg = CopyMI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG; - bool isInsSubReg = CopyMI->getOpcode() == TargetInstrInfo::INSERT_SUBREG; - bool isSubRegToReg = CopyMI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG; + bool isExtSubReg = CopyMI->isExtractSubreg(); + bool isInsSubReg = CopyMI->isInsertSubreg(); + bool isSubRegToReg = CopyMI->isSubregToReg(); unsigned SubIdx = 0; if (isExtSubReg) { DstReg = CopyMI->getOperand(0).getReg(); @@ -1551,7 +1581,10 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { (isExtSubReg || DstRC->isASubClass()) && !isWinToJoinCrossClass(LargeReg, SmallReg, allocatableRCRegs_[NewRC].count())) { - DEBUG(dbgs() << "\tSrc/Dest are different register classes.\n"); + DEBUG(dbgs() << "\tSrc/Dest are different register classes: " + << SrcRC->getName() << "/" + << DstRC->getName() << " -> " + << NewRC->getName() << ".\n"); // Allow the coalescer to try again in case either side gets coalesced to // a physical register that's compatible with the other side. e.g. // r1024 = MOV32to32_ r1025 @@ -1631,8 +1664,8 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { unsigned Length = li_->getApproximateInstructionCount(JoinVInt); float Ratio = 1.0 / Threshold; if (Length > Threshold && - (((float)std::distance(mri_->use_begin(JoinVReg), - mri_->use_end()) / Length) < Ratio)) { + (((float)std::distance(mri_->use_nodbg_begin(JoinVReg), + mri_->use_nodbg_end()) / Length) < Ratio)) { mri_->setRegAllocationHint(JoinVInt.reg, 0, JoinPReg); ++numAborts; DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n"); @@ -1755,6 +1788,23 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { UpdateRegDefsUses(SrcReg, DstReg, SubIdx); + // If we have extended the live range of a physical register, make sure we + // update live-in lists as well. + if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { + const LiveInterval &VRegInterval = li_->getInterval(SrcReg); + SmallVector<MachineBasicBlock*, 16> BlockSeq; + for (LiveInterval::const_iterator I = VRegInterval.begin(), + E = VRegInterval.end(); I != E; ++I ) { + li_->findLiveInMBBs(I->start, I->end, BlockSeq); + for (unsigned idx = 0, size = BlockSeq.size(); idx != size; ++idx) { + MachineBasicBlock &block = *BlockSeq[idx]; + if (!block.isLiveIn(DstReg)) + block.addLiveIn(DstReg); + } + BlockSeq.clear(); + } + } + // SrcReg is guarateed to be the register whose live interval that is // being merged. li_->removeInterval(SrcReg); @@ -1849,11 +1899,11 @@ static bool isValNoDefMove(const MachineInstr *MI, unsigned DR, unsigned SR, unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) ; - else if (MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) { + else if (MI->isExtractSubreg()) { DstReg = MI->getOperand(0).getReg(); SrcReg = MI->getOperand(1).getReg(); - } else if (MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG || - MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG) { + } else if (MI->isSubregToReg() || + MI->isInsertSubreg()) { DstReg = MI->getOperand(0).getReg(); SrcReg = MI->getOperand(2).getReg(); } else @@ -2425,16 +2475,15 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB, // If this isn't a copy nor a extract_subreg, we can't join intervals. unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; bool isInsUndef = false; - if (Inst->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) { + if (Inst->isExtractSubreg()) { DstReg = Inst->getOperand(0).getReg(); SrcReg = Inst->getOperand(1).getReg(); - } else if (Inst->getOpcode() == TargetInstrInfo::INSERT_SUBREG) { + } else if (Inst->isInsertSubreg()) { DstReg = Inst->getOperand(0).getReg(); SrcReg = Inst->getOperand(2).getReg(); if (Inst->getOperand(1).isUndef()) isInsUndef = true; - } else if (Inst->getOpcode() == TargetInstrInfo::INSERT_SUBREG || - Inst->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) { + } else if (Inst->isInsertSubreg() || Inst->isSubregToReg()) { DstReg = Inst->getOperand(0).getReg(); SrcReg = Inst->getOperand(2).getReg(); } else if (!tii_->isMoveInstr(*Inst, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) @@ -2549,8 +2598,8 @@ SimpleRegisterCoalescing::differingRegisterClasses(unsigned RegA, return !RegClassA->contains(RegB); } -/// lastRegisterUse - Returns the last use of the specific register between -/// cycles Start and End or NULL if there are no uses. +/// lastRegisterUse - Returns the last (non-debug) use of the specific register +/// between cycles Start and End or NULL if there are no uses. MachineOperand * SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start, SlotIndex End, @@ -2559,8 +2608,8 @@ SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start, UseIdx = SlotIndex(); if (TargetRegisterInfo::isVirtualRegister(Reg)) { MachineOperand *LastUse = NULL; - for (MachineRegisterInfo::use_iterator I = mri_->use_begin(Reg), - E = mri_->use_end(); I != E; ++I) { + for (MachineRegisterInfo::use_nodbg_iterator I = mri_->use_nodbg_begin(Reg), + E = mri_->use_nodbg_end(); I != E; ++I) { MachineOperand &Use = I.getOperand(); MachineInstr *UseMI = Use.getParent(); unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; @@ -2670,10 +2719,8 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { // Delete all coalesced copies. bool DoDelete = true; if (!tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { - assert((MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG || - MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG || - MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) && - "Unrecognized copy instruction"); + assert((MI->isExtractSubreg() || MI->isInsertSubreg() || + MI->isSubregToReg()) && "Unrecognized copy instruction"); DstReg = MI->getOperand(0).getReg(); if (TargetRegisterInfo::isPhysicalRegister(DstReg)) // Do not delete extract_subreg, insert_subreg of physical diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 9558933..8d4d1b2 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -51,6 +51,7 @@ namespace { Value *PersonalityFn; Constant *SelectorFn; Constant *ExceptionFn; + Constant *CallSiteFn; Value *CallSite; public: @@ -116,6 +117,7 @@ bool SjLjEHPass::doInitialization(Module &M) { LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda); SelectorFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_selector); ExceptionFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_exception); + CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite); PersonalityFn = 0; return true; @@ -143,15 +145,14 @@ void SjLjEHPass::markInvokeCallSite(InvokeInst *II, unsigned InvokeNo, } } - // Insert a store of the invoke num before the invoke and store zero into the - // location afterward. + // Insert a store of the invoke num before the invoke new StoreInst(CallSiteNoC, CallSite, true, II); // volatile + CallInst::Create(CallSiteFn, CallSiteNoC, "", II); // Add a switch case to our unwind block. CatchSwitch->addCase(SwitchValC, II->getUnwindDest()); - // We still want this to look like an invoke so we emit the LSDA properly - // FIXME: ??? Or will this cause strangeness with mis-matched IDs like - // when it was in the front end? + // We still want this to look like an invoke so we emit the LSDA properly, + // so we don't transform the invoke into a call here. } /// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index a23efb2..6110ef5 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -95,7 +95,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { push_back(createEntry(0, index)); - // Iterate over the the function. + // Iterate over the function. for (MachineFunction::iterator mbbItr = mf->begin(), mbbEnd = mf->end(); mbbItr != mbbEnd; ++mbbItr) { MachineBasicBlock *mbb = &*mbbItr; @@ -107,8 +107,8 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { for (MachineBasicBlock::iterator miItr = mbb->begin(), miEnd = mbb->end(); miItr != miEnd; ++miItr) { - MachineInstr *mi = &*miItr; - if (mi->getOpcode()==TargetInstrInfo::DEBUG_VALUE) + MachineInstr *mi = miItr; + if (mi->isDebugValue()) continue; if (miItr == mbb->getFirstTerminator()) { diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index 48bb5af..8a6a727 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -113,7 +113,7 @@ bool StackProtector::RequiresStackProtector() const { if (const ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) { // We apparently only care about character arrays. - if (!AT->getElementType()->isInteger(8)) + if (!AT->getElementType()->isIntegerTy(8)) continue; // If an array has more than SSPBufferSize bytes of allocated space, diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index 2170703..12d38f0 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -504,10 +504,8 @@ bool StackSlotColoring::PropagateBackward(MachineBasicBlock::iterator MII, // Abort the use is actually a sub-register def. We don't have enough // information to figure out if it is really legal. - if (MO.getSubReg() || - TID.getOpcode() == TargetInstrInfo::EXTRACT_SUBREG || - TID.getOpcode() == TargetInstrInfo::INSERT_SUBREG || - TID.getOpcode() == TargetInstrInfo::SUBREG_TO_REG) + if (MO.getSubReg() || MII->isExtractSubreg() || + MII->isInsertSubreg() || MII->isSubregToReg()) return false; const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI); @@ -569,8 +567,7 @@ bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII, // Abort the use is actually a sub-register use. We don't have enough // information to figure out if it is really legal. - if (MO.getSubReg() || - TID.getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) + if (MO.getSubReg() || MII->isExtractSubreg()) return false; const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI); diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp index bd7cb75..f8f6a55 100644 --- a/lib/CodeGen/StrongPHIElimination.cpp +++ b/lib/CodeGen/StrongPHIElimination.cpp @@ -49,7 +49,7 @@ namespace { std::map<unsigned, std::vector<unsigned> > Stacks; // Registers in UsedByAnother are PHI nodes that are themselves - // used as operands to another another PHI node + // used as operands to another PHI node std::set<unsigned> UsedByAnother; // RenameSets are the is a map from a PHI-defined register @@ -419,7 +419,7 @@ void StrongPHIElimination::processBlock(MachineBasicBlock* MBB) { // Iterate over all the PHI nodes in this block MachineBasicBlock::iterator P = MBB->begin(); - while (P != MBB->end() && P->getOpcode() == TargetInstrInfo::PHI) { + while (P != MBB->end() && P->isPHI()) { unsigned DestReg = P->getOperand(0).getReg(); // Don't both doing PHI elimination for dead PHI's. @@ -452,7 +452,7 @@ void StrongPHIElimination::processBlock(MachineBasicBlock* MBB) { // We don't need to insert copies for implicit_defs. MachineInstr* DefMI = MRI.getVRegDef(SrcReg); - if (DefMI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) + if (DefMI->isImplicitDef()) ProcessedNames.insert(SrcReg); // Check for trivial interferences via liveness information, allowing us @@ -470,7 +470,7 @@ void StrongPHIElimination::processBlock(MachineBasicBlock* MBB) { if (isLiveIn(SrcReg, P->getParent(), LI) || isLiveOut(P->getOperand(0).getReg(), MRI.getVRegDef(SrcReg)->getParent(), LI) || - ( MRI.getVRegDef(SrcReg)->getOpcode() == TargetInstrInfo::PHI && + ( MRI.getVRegDef(SrcReg)->isPHI() && isLiveIn(P->getOperand(0).getReg(), MRI.getVRegDef(SrcReg)->getParent(), LI) ) || ProcessedNames.count(SrcReg) || @@ -810,7 +810,7 @@ void StrongPHIElimination::InsertCopies(MachineDomTreeNode* MDTN, // Rewrite register uses from Stacks for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) { - if (I->getOpcode() == TargetInstrInfo::PHI) + if (I->isPHI()) continue; for (unsigned i = 0; i < I->getNumOperands(); ++i) @@ -907,8 +907,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) { // Determine which phi node operands need copies for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) - if (!I->empty() && - I->begin()->getOpcode() == TargetInstrInfo::PHI) + if (!I->empty() && I->begin()->isPHI()) processBlock(I); // Break interferences where two different phis want to coalesce @@ -996,7 +995,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) { for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { for (MachineBasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE; ++BI) - if (BI->getOpcode() == TargetInstrInfo::PHI) + if (BI->isPHI()) phis.push_back(BI); } diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index d6860bc..3223e53 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -121,7 +121,7 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { MBB->pred_end()); MachineBasicBlock::iterator MI = MBB->begin(); while (MI != MBB->end()) { - if (MI->getOpcode() != TargetInstrInfo::PHI) + if (!MI->isPHI()) break; for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), PE = Preds.end(); PI != PE; ++PI) { @@ -378,7 +378,7 @@ TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, MachineBasicBlock *SuccBB = *SI; for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end(); II != EE; ++II) { - if (II->getOpcode() != TargetInstrInfo::PHI) + if (!II->isPHI()) break; unsigned Idx = 0; for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) { @@ -403,26 +403,45 @@ TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, II->RemoveOperand(i); } } - II->RemoveOperand(Idx+1); - II->RemoveOperand(Idx); - } + } else + Idx = 0; + + // If Idx is set, the operands at Idx and Idx+1 must be removed. + // We reuse the location to avoid expensive RemoveOperand calls. + DenseMap<unsigned,AvailableValsTy>::iterator LI=SSAUpdateVals.find(Reg); if (LI != SSAUpdateVals.end()) { // This register is defined in the tail block. for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { MachineBasicBlock *SrcBB = LI->second[j].first; unsigned SrcReg = LI->second[j].second; - II->addOperand(MachineOperand::CreateReg(SrcReg, false)); - II->addOperand(MachineOperand::CreateMBB(SrcBB)); + if (Idx != 0) { + II->getOperand(Idx).setReg(SrcReg); + II->getOperand(Idx+1).setMBB(SrcBB); + Idx = 0; + } else { + II->addOperand(MachineOperand::CreateReg(SrcReg, false)); + II->addOperand(MachineOperand::CreateMBB(SrcBB)); + } } } else { // Live in tail block, must also be live in predecessors. for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) { MachineBasicBlock *SrcBB = TDBBs[j]; - II->addOperand(MachineOperand::CreateReg(Reg, false)); - II->addOperand(MachineOperand::CreateMBB(SrcBB)); + if (Idx != 0) { + II->getOperand(Idx).setReg(Reg); + II->getOperand(Idx+1).setMBB(SrcBB); + Idx = 0; + } else { + II->addOperand(MachineOperand::CreateReg(Reg, false)); + II->addOperand(MachineOperand::CreateMBB(SrcBB)); + } } } + if (Idx != 0) { + II->RemoveOperand(Idx+1); + II->RemoveOperand(Idx); + } } } } @@ -476,7 +495,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, if (InstrCount == MaxDuplicateCount) return false; // Remember if we saw a call. if (I->getDesc().isCall()) HasCall = true; - if (I->getOpcode() != TargetInstrInfo::PHI) + if (!I->isPHI()) InstrCount += 1; } // Heuristically, don't tail-duplicate calls if it would expand code size, @@ -528,7 +547,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, while (I != TailBB->end()) { MachineInstr *MI = &*I; ++I; - if (MI->getOpcode() == TargetInstrInfo::PHI) { + if (MI->isPHI()) { // Replace the uses of the def of the PHI with the register coming // from PredBB. ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos); @@ -580,7 +599,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; MachineBasicBlock::iterator I = TailBB->begin(); // Process PHI instructions first. - while (I != TailBB->end() && I->getOpcode() == TargetInstrInfo::PHI) { + while (I != TailBB->end() && I->isPHI()) { // Replace the uses of the def of the PHI with the register coming // from PredBB. MachineInstr *MI = &*I++; diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp new file mode 100644 index 0000000..190b533 --- /dev/null +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -0,0 +1,874 @@ +//===-- llvm/CodeGen/TargetLoweringObjectFileImpl.cpp - Object File Info --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements classes used to handle lowerings specific to common +// object file formats. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// ELF +//===----------------------------------------------------------------------===// +typedef StringMap<const MCSectionELF*> ELFUniqueMapTy; + +TargetLoweringObjectFileELF::~TargetLoweringObjectFileELF() { + // If we have the section uniquing map, free it. + delete (ELFUniqueMapTy*)UniquingMap; +} + +const MCSection *TargetLoweringObjectFileELF:: +getELFSection(StringRef Section, unsigned Type, unsigned Flags, + SectionKind Kind, bool IsExplicit) const { + if (UniquingMap == 0) + UniquingMap = new ELFUniqueMapTy(); + ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)UniquingMap; + + // Do the lookup, if we have a hit, return it. + const MCSectionELF *&Entry = Map[Section]; + if (Entry) return Entry; + + return Entry = MCSectionELF::Create(Section, Type, Flags, Kind, IsExplicit, + getContext()); +} + +void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, + const TargetMachine &TM) { + if (UniquingMap != 0) + ((ELFUniqueMapTy*)UniquingMap)->clear(); + TargetLoweringObjectFile::Initialize(Ctx, TM); + + BSSSection = + getELFSection(".bss", MCSectionELF::SHT_NOBITS, + MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, + SectionKind::getBSS()); + + TextSection = + getELFSection(".text", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_EXECINSTR | MCSectionELF::SHF_ALLOC, + SectionKind::getText()); + + DataSection = + getELFSection(".data", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, + SectionKind::getDataRel()); + + ReadOnlySection = + getELFSection(".rodata", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC, + SectionKind::getReadOnly()); + + TLSDataSection = + getELFSection(".tdata", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS | + MCSectionELF::SHF_WRITE, SectionKind::getThreadData()); + + TLSBSSSection = + getELFSection(".tbss", MCSectionELF::SHT_NOBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS | + MCSectionELF::SHF_WRITE, SectionKind::getThreadBSS()); + + DataRelSection = + getELFSection(".data.rel", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, + SectionKind::getDataRel()); + + DataRelLocalSection = + getELFSection(".data.rel.local", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, + SectionKind::getDataRelLocal()); + + DataRelROSection = + getELFSection(".data.rel.ro", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, + SectionKind::getReadOnlyWithRel()); + + DataRelROLocalSection = + getELFSection(".data.rel.ro.local", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, + SectionKind::getReadOnlyWithRelLocal()); + + MergeableConst4Section = + getELFSection(".rodata.cst4", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE, + SectionKind::getMergeableConst4()); + + MergeableConst8Section = + getELFSection(".rodata.cst8", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE, + SectionKind::getMergeableConst8()); + + MergeableConst16Section = + getELFSection(".rodata.cst16", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE, + SectionKind::getMergeableConst16()); + + StaticCtorSection = + getELFSection(".ctors", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, + SectionKind::getDataRel()); + + StaticDtorSection = + getELFSection(".dtors", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, + SectionKind::getDataRel()); + + // Exception Handling Sections. + + // FIXME: We're emitting LSDA info into a readonly section on ELF, even though + // it contains relocatable pointers. In PIC mode, this is probably a big + // runtime hit for C++ apps. Either the contents of the LSDA need to be + // adjusted or this should be a data section. + LSDASection = + getELFSection(".gcc_except_table", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC, SectionKind::getReadOnly()); + EHFrameSection = + getELFSection(".eh_frame", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, + SectionKind::getDataRel()); + + // Debug Info Sections. + DwarfAbbrevSection = + getELFSection(".debug_abbrev", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfInfoSection = + getELFSection(".debug_info", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfLineSection = + getELFSection(".debug_line", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfFrameSection = + getELFSection(".debug_frame", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfPubNamesSection = + getELFSection(".debug_pubnames", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfPubTypesSection = + getELFSection(".debug_pubtypes", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfStrSection = + getELFSection(".debug_str", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfLocSection = + getELFSection(".debug_loc", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfARangesSection = + getELFSection(".debug_aranges", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfRangesSection = + getELFSection(".debug_ranges", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfMacroInfoSection = + getELFSection(".debug_macinfo", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); +} + + +static SectionKind +getELFKindForNamedSection(StringRef Name, SectionKind K) { + if (Name.empty() || Name[0] != '.') return K; + + // Some lame default implementation based on some magic section names. + if (Name == ".bss" || + Name.startswith(".bss.") || + Name.startswith(".gnu.linkonce.b.") || + Name.startswith(".llvm.linkonce.b.") || + Name == ".sbss" || + Name.startswith(".sbss.") || + Name.startswith(".gnu.linkonce.sb.") || + Name.startswith(".llvm.linkonce.sb.")) + return SectionKind::getBSS(); + + if (Name == ".tdata" || + Name.startswith(".tdata.") || + Name.startswith(".gnu.linkonce.td.") || + Name.startswith(".llvm.linkonce.td.")) + return SectionKind::getThreadData(); + + if (Name == ".tbss" || + Name.startswith(".tbss.") || + Name.startswith(".gnu.linkonce.tb.") || + Name.startswith(".llvm.linkonce.tb.")) + return SectionKind::getThreadBSS(); + + return K; +} + + +static unsigned getELFSectionType(StringRef Name, SectionKind K) { + + if (Name == ".init_array") + return MCSectionELF::SHT_INIT_ARRAY; + + if (Name == ".fini_array") + return MCSectionELF::SHT_FINI_ARRAY; + + if (Name == ".preinit_array") + return MCSectionELF::SHT_PREINIT_ARRAY; + + if (K.isBSS() || K.isThreadBSS()) + return MCSectionELF::SHT_NOBITS; + + return MCSectionELF::SHT_PROGBITS; +} + + +static unsigned +getELFSectionFlags(SectionKind K) { + unsigned Flags = 0; + + if (!K.isMetadata()) + Flags |= MCSectionELF::SHF_ALLOC; + + if (K.isText()) + Flags |= MCSectionELF::SHF_EXECINSTR; + + if (K.isWriteable()) + Flags |= MCSectionELF::SHF_WRITE; + + if (K.isThreadLocal()) + Flags |= MCSectionELF::SHF_TLS; + + // K.isMergeableConst() is left out to honour PR4650 + if (K.isMergeableCString() || K.isMergeableConst4() || + K.isMergeableConst8() || K.isMergeableConst16()) + Flags |= MCSectionELF::SHF_MERGE; + + if (K.isMergeableCString()) + Flags |= MCSectionELF::SHF_STRINGS; + + return Flags; +} + + +const MCSection *TargetLoweringObjectFileELF:: +getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + StringRef SectionName = GV->getSection(); + + // Infer section flags from the section name if we can. + Kind = getELFKindForNamedSection(SectionName, Kind); + + return getELFSection(SectionName, + getELFSectionType(SectionName, Kind), + getELFSectionFlags(Kind), Kind, true); +} + +static const char *getSectionPrefixForUniqueGlobal(SectionKind Kind) { + if (Kind.isText()) return ".gnu.linkonce.t."; + if (Kind.isReadOnly()) return ".gnu.linkonce.r."; + + if (Kind.isThreadData()) return ".gnu.linkonce.td."; + if (Kind.isThreadBSS()) return ".gnu.linkonce.tb."; + + if (Kind.isDataNoRel()) return ".gnu.linkonce.d."; + if (Kind.isDataRelLocal()) return ".gnu.linkonce.d.rel.local."; + if (Kind.isDataRel()) return ".gnu.linkonce.d.rel."; + if (Kind.isReadOnlyWithRelLocal()) return ".gnu.linkonce.d.rel.ro.local."; + + assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); + return ".gnu.linkonce.d.rel.ro."; +} + +const MCSection *TargetLoweringObjectFileELF:: +SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + + // If this global is linkonce/weak and the target handles this by emitting it + // into a 'uniqued' section name, create and return the section now. + if (GV->isWeakForLinker() && !Kind.isCommon() && !Kind.isBSS()) { + const char *Prefix = getSectionPrefixForUniqueGlobal(Kind); + SmallString<128> Name; + Name.append(Prefix, Prefix+strlen(Prefix)); + Mang->getNameWithPrefix(Name, GV, false); + return getELFSection(Name.str(), getELFSectionType(Name.str(), Kind), + getELFSectionFlags(Kind), Kind); + } + + if (Kind.isText()) return TextSection; + + if (Kind.isMergeable1ByteCString() || + Kind.isMergeable2ByteCString() || + Kind.isMergeable4ByteCString()) { + + // We also need alignment here. + // FIXME: this is getting the alignment of the character, not the + // alignment of the global! + unsigned Align = + TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV)); + + const char *SizeSpec = ".rodata.str1."; + if (Kind.isMergeable2ByteCString()) + SizeSpec = ".rodata.str2."; + else if (Kind.isMergeable4ByteCString()) + SizeSpec = ".rodata.str4."; + else + assert(Kind.isMergeable1ByteCString() && "unknown string width"); + + + std::string Name = SizeSpec + utostr(Align); + return getELFSection(Name, MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | + MCSectionELF::SHF_MERGE | + MCSectionELF::SHF_STRINGS, + Kind); + } + + if (Kind.isMergeableConst()) { + if (Kind.isMergeableConst4() && MergeableConst4Section) + return MergeableConst4Section; + if (Kind.isMergeableConst8() && MergeableConst8Section) + return MergeableConst8Section; + if (Kind.isMergeableConst16() && MergeableConst16Section) + return MergeableConst16Section; + return ReadOnlySection; // .const + } + + if (Kind.isReadOnly()) return ReadOnlySection; + + if (Kind.isThreadData()) return TLSDataSection; + if (Kind.isThreadBSS()) return TLSBSSSection; + + // Note: we claim that common symbols are put in BSSSection, but they are + // really emitted with the magic .comm directive, which creates a symbol table + // entry but not a section. + if (Kind.isBSS() || Kind.isCommon()) return BSSSection; + + if (Kind.isDataNoRel()) return DataSection; + if (Kind.isDataRelLocal()) return DataRelLocalSection; + if (Kind.isDataRel()) return DataRelSection; + if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection; + + assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); + return DataRelROSection; +} + +/// getSectionForConstant - Given a mergeable constant with the +/// specified size and relocation information, return a section that it +/// should be placed in. +const MCSection *TargetLoweringObjectFileELF:: +getSectionForConstant(SectionKind Kind) const { + if (Kind.isMergeableConst4() && MergeableConst4Section) + return MergeableConst4Section; + if (Kind.isMergeableConst8() && MergeableConst8Section) + return MergeableConst8Section; + if (Kind.isMergeableConst16() && MergeableConst16Section) + return MergeableConst16Section; + if (Kind.isReadOnly()) + return ReadOnlySection; + + if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection; + assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); + return DataRelROSection; +} + +const MCExpr *TargetLoweringObjectFileELF:: +getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI, unsigned Encoding) const { + + if (Encoding & dwarf::DW_EH_PE_indirect) { + MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo<MachineModuleInfoELF>(); + + SmallString<128> Name; + Mang->getNameWithPrefix(Name, GV, true); + Name += ".DW.stub"; + + // Add information about the stub reference to ELFMMI so that the stub + // gets emitted by the asmprinter. + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name.str()); + MCSymbol *&StubSym = ELFMMI.getGVStubEntry(Sym); + if (StubSym == 0) { + Name.clear(); + Mang->getNameWithPrefix(Name, GV, false); + StubSym = getContext().GetOrCreateSymbol(Name.str()); + } + + return TargetLoweringObjectFile:: + getSymbolForDwarfReference(Sym, MMI, + Encoding & ~dwarf::DW_EH_PE_indirect); + } + + return TargetLoweringObjectFile:: + getSymbolForDwarfGlobalReference(GV, Mang, MMI, Encoding); +} + +//===----------------------------------------------------------------------===// +// MachO +//===----------------------------------------------------------------------===// + +typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy; + +TargetLoweringObjectFileMachO::~TargetLoweringObjectFileMachO() { + // If we have the MachO uniquing map, free it. + delete (MachOUniqueMapTy*)UniquingMap; +} + + +const MCSectionMachO *TargetLoweringObjectFileMachO:: +getMachOSection(StringRef Segment, StringRef Section, + unsigned TypeAndAttributes, + unsigned Reserved2, SectionKind Kind) const { + // We unique sections by their segment/section pair. The returned section + // may not have the same flags as the requested section, if so this should be + // diagnosed by the client as an error. + + // Create the map if it doesn't already exist. + if (UniquingMap == 0) + UniquingMap = new MachOUniqueMapTy(); + MachOUniqueMapTy &Map = *(MachOUniqueMapTy*)UniquingMap; + + // Form the name to look up. + SmallString<64> Name; + Name += Segment; + Name.push_back(','); + Name += Section; + + // Do the lookup, if we have a hit, return it. + const MCSectionMachO *&Entry = Map[Name.str()]; + if (Entry) return Entry; + + // Otherwise, return a new section. + return Entry = MCSectionMachO::Create(Segment, Section, TypeAndAttributes, + Reserved2, Kind, getContext()); +} + + +void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, + const TargetMachine &TM) { + if (UniquingMap != 0) + ((MachOUniqueMapTy*)UniquingMap)->clear(); + TargetLoweringObjectFile::Initialize(Ctx, TM); + + TextSection // .text + = getMachOSection("__TEXT", "__text", + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + SectionKind::getText()); + DataSection // .data + = getMachOSection("__DATA", "__data", 0, SectionKind::getDataRel()); + + CStringSection // .cstring + = getMachOSection("__TEXT", "__cstring", MCSectionMachO::S_CSTRING_LITERALS, + SectionKind::getMergeable1ByteCString()); + UStringSection + = getMachOSection("__TEXT","__ustring", 0, + SectionKind::getMergeable2ByteCString()); + FourByteConstantSection // .literal4 + = getMachOSection("__TEXT", "__literal4", MCSectionMachO::S_4BYTE_LITERALS, + SectionKind::getMergeableConst4()); + EightByteConstantSection // .literal8 + = getMachOSection("__TEXT", "__literal8", MCSectionMachO::S_8BYTE_LITERALS, + SectionKind::getMergeableConst8()); + + // ld_classic doesn't support .literal16 in 32-bit mode, and ld64 falls back + // to using it in -static mode. + SixteenByteConstantSection = 0; + if (TM.getRelocationModel() != Reloc::Static && + TM.getTargetData()->getPointerSize() == 32) + SixteenByteConstantSection = // .literal16 + getMachOSection("__TEXT", "__literal16",MCSectionMachO::S_16BYTE_LITERALS, + SectionKind::getMergeableConst16()); + + ReadOnlySection // .const + = getMachOSection("__TEXT", "__const", 0, SectionKind::getReadOnly()); + + TextCoalSection + = getMachOSection("__TEXT", "__textcoal_nt", + MCSectionMachO::S_COALESCED | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + SectionKind::getText()); + ConstTextCoalSection + = getMachOSection("__TEXT", "__const_coal", MCSectionMachO::S_COALESCED, + SectionKind::getText()); + ConstDataCoalSection + = getMachOSection("__DATA","__const_coal", MCSectionMachO::S_COALESCED, + SectionKind::getText()); + ConstDataSection // .const_data + = getMachOSection("__DATA", "__const", 0, + SectionKind::getReadOnlyWithRel()); + DataCoalSection + = getMachOSection("__DATA","__datacoal_nt", MCSectionMachO::S_COALESCED, + SectionKind::getDataRel()); + DataCommonSection + = getMachOSection("__DATA","__common", MCSectionMachO::S_ZEROFILL, + SectionKind::getBSS()); + DataBSSSection + = getMachOSection("__DATA","__bss", MCSectionMachO::S_ZEROFILL, + SectionKind::getBSS()); + + + LazySymbolPointerSection + = getMachOSection("__DATA", "__la_symbol_ptr", + MCSectionMachO::S_LAZY_SYMBOL_POINTERS, + SectionKind::getMetadata()); + NonLazySymbolPointerSection + = getMachOSection("__DATA", "__nl_symbol_ptr", + MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS, + SectionKind::getMetadata()); + + if (TM.getRelocationModel() == Reloc::Static) { + StaticCtorSection + = getMachOSection("__TEXT", "__constructor", 0,SectionKind::getDataRel()); + StaticDtorSection + = getMachOSection("__TEXT", "__destructor", 0, SectionKind::getDataRel()); + } else { + StaticCtorSection + = getMachOSection("__DATA", "__mod_init_func", + MCSectionMachO::S_MOD_INIT_FUNC_POINTERS, + SectionKind::getDataRel()); + StaticDtorSection + = getMachOSection("__DATA", "__mod_term_func", + MCSectionMachO::S_MOD_TERM_FUNC_POINTERS, + SectionKind::getDataRel()); + } + + // Exception Handling. + LSDASection = getMachOSection("__DATA", "__gcc_except_tab", 0, + SectionKind::getDataRel()); + EHFrameSection = + getMachOSection("__TEXT", "__eh_frame", + MCSectionMachO::S_COALESCED | + MCSectionMachO::S_ATTR_NO_TOC | + MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS | + MCSectionMachO::S_ATTR_LIVE_SUPPORT, + SectionKind::getReadOnly()); + + // Debug Information. + DwarfAbbrevSection = + getMachOSection("__DWARF", "__debug_abbrev", MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfInfoSection = + getMachOSection("__DWARF", "__debug_info", MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfLineSection = + getMachOSection("__DWARF", "__debug_line", MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfFrameSection = + getMachOSection("__DWARF", "__debug_frame", MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfPubNamesSection = + getMachOSection("__DWARF", "__debug_pubnames", MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfPubTypesSection = + getMachOSection("__DWARF", "__debug_pubtypes", MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfStrSection = + getMachOSection("__DWARF", "__debug_str", MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfLocSection = + getMachOSection("__DWARF", "__debug_loc", MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfARangesSection = + getMachOSection("__DWARF", "__debug_aranges", MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfRangesSection = + getMachOSection("__DWARF", "__debug_ranges", MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfMacroInfoSection = + getMachOSection("__DWARF", "__debug_macinfo", MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfDebugInlineSection = + getMachOSection("__DWARF", "__debug_inlined", MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); +} + +const MCSection *TargetLoweringObjectFileMachO:: +getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + // Parse the section specifier and create it if valid. + StringRef Segment, Section; + unsigned TAA, StubSize; + std::string ErrorCode = + MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section, + TAA, StubSize); + if (!ErrorCode.empty()) { + // If invalid, report the error with llvm_report_error. + llvm_report_error("Global variable '" + GV->getNameStr() + + "' has an invalid section specifier '" + GV->getSection()+ + "': " + ErrorCode + "."); + // Fall back to dropping it into the data section. + return DataSection; + } + + // Get the section. + const MCSectionMachO *S = + getMachOSection(Segment, Section, TAA, StubSize, Kind); + + // Okay, now that we got the section, verify that the TAA & StubSize agree. + // If the user declared multiple globals with different section flags, we need + // to reject it here. + if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) { + // If invalid, report the error with llvm_report_error. + llvm_report_error("Global variable '" + GV->getNameStr() + + "' section type or attributes does not match previous" + " section specifier"); + } + + return S; +} + +const MCSection *TargetLoweringObjectFileMachO:: +SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + assert(!Kind.isThreadLocal() && "Darwin doesn't support TLS"); + + if (Kind.isText()) + return GV->isWeakForLinker() ? TextCoalSection : TextSection; + + // If this is weak/linkonce, put this in a coalescable section, either in text + // or data depending on if it is writable. + if (GV->isWeakForLinker()) { + if (Kind.isReadOnly()) + return ConstTextCoalSection; + return DataCoalSection; + } + + // FIXME: Alignment check should be handled by section classifier. + if (Kind.isMergeable1ByteCString() || + Kind.isMergeable2ByteCString()) { + if (TM.getTargetData()->getPreferredAlignment( + cast<GlobalVariable>(GV)) < 32) { + if (Kind.isMergeable1ByteCString()) + return CStringSection; + assert(Kind.isMergeable2ByteCString()); + return UStringSection; + } + } + + if (Kind.isMergeableConst()) { + if (Kind.isMergeableConst4()) + return FourByteConstantSection; + if (Kind.isMergeableConst8()) + return EightByteConstantSection; + if (Kind.isMergeableConst16() && SixteenByteConstantSection) + return SixteenByteConstantSection; + } + + // Otherwise, if it is readonly, but not something we can specially optimize, + // just drop it in .const. + if (Kind.isReadOnly()) + return ReadOnlySection; + + // If this is marked const, put it into a const section. But if the dynamic + // linker needs to write to it, put it in the data segment. + if (Kind.isReadOnlyWithRel()) + return ConstDataSection; + + // Put zero initialized globals with strong external linkage in the + // DATA, __common section with the .zerofill directive. + if (Kind.isBSSExtern()) + return DataCommonSection; + + // Put zero initialized globals with local linkage in __DATA,__bss directive + // with the .zerofill directive (aka .lcomm). + if (Kind.isBSSLocal()) + return DataBSSSection; + + // Otherwise, just drop the variable in the normal data section. + return DataSection; +} + +const MCSection * +TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind) const { + // If this constant requires a relocation, we have to put it in the data + // segment, not in the text segment. + if (Kind.isDataRel() || Kind.isReadOnlyWithRel()) + return ConstDataSection; + + if (Kind.isMergeableConst4()) + return FourByteConstantSection; + if (Kind.isMergeableConst8()) + return EightByteConstantSection; + if (Kind.isMergeableConst16() && SixteenByteConstantSection) + return SixteenByteConstantSection; + return ReadOnlySection; // .const +} + +/// shouldEmitUsedDirectiveFor - This hook allows targets to selectively decide +/// not to emit the UsedDirective for some symbols in llvm.used. +// FIXME: REMOVE this (rdar://7071300) +bool TargetLoweringObjectFileMachO:: +shouldEmitUsedDirectiveFor(const GlobalValue *GV, Mangler *Mang) const { + /// On Darwin, internally linked data beginning with "L" or "l" does not have + /// the directive emitted (this occurs in ObjC metadata). + if (!GV) return false; + + // Check whether the mangled name has the "Private" or "LinkerPrivate" prefix. + if (GV->hasLocalLinkage() && !isa<Function>(GV)) { + // FIXME: ObjC metadata is currently emitted as internal symbols that have + // \1L and \0l prefixes on them. Fix them to be Private/LinkerPrivate and + // this horrible hack can go away. + SmallString<64> Name; + Mang->getNameWithPrefix(Name, GV, false); + if (Name[0] == 'L' || Name[0] == 'l') + return false; + } + + return true; +} + +const MCExpr *TargetLoweringObjectFileMachO:: +getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI, unsigned Encoding) const { + // The mach-o version of this method defaults to returning a stub reference. + + if (Encoding & dwarf::DW_EH_PE_indirect) { + SmallString<128> Name; + Mang->getNameWithPrefix(Name, GV, true); + Name += "$non_lazy_ptr"; + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name.str()); + + return TargetLoweringObjectFile:: + getSymbolForDwarfReference(Sym, MMI, + Encoding & ~dwarf::DW_EH_PE_indirect); + } + + return TargetLoweringObjectFile:: + getSymbolForDwarfGlobalReference(GV, Mang, MMI, Encoding); +} + + +//===----------------------------------------------------------------------===// +// COFF +//===----------------------------------------------------------------------===// + +typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy; + +TargetLoweringObjectFileCOFF::~TargetLoweringObjectFileCOFF() { + delete (COFFUniqueMapTy*)UniquingMap; +} + + +const MCSection *TargetLoweringObjectFileCOFF:: +getCOFFSection(StringRef Name, bool isDirective, SectionKind Kind) const { + // Create the map if it doesn't already exist. + if (UniquingMap == 0) + UniquingMap = new MachOUniqueMapTy(); + COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)UniquingMap; + + // Do the lookup, if we have a hit, return it. + const MCSectionCOFF *&Entry = Map[Name]; + if (Entry) return Entry; + + return Entry = MCSectionCOFF::Create(Name, isDirective, Kind, getContext()); +} + +void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx, + const TargetMachine &TM) { + if (UniquingMap != 0) + ((COFFUniqueMapTy*)UniquingMap)->clear(); + TargetLoweringObjectFile::Initialize(Ctx, TM); + TextSection = getCOFFSection("\t.text", true, SectionKind::getText()); + DataSection = getCOFFSection("\t.data", true, SectionKind::getDataRel()); + StaticCtorSection = + getCOFFSection(".ctors", false, SectionKind::getDataRel()); + StaticDtorSection = + getCOFFSection(".dtors", false, SectionKind::getDataRel()); + + // FIXME: We're emitting LSDA info into a readonly section on COFF, even + // though it contains relocatable pointers. In PIC mode, this is probably a + // big runtime hit for C++ apps. Either the contents of the LSDA need to be + // adjusted or this should be a data section. + LSDASection = + getCOFFSection(".gcc_except_table", false, SectionKind::getReadOnly()); + EHFrameSection = + getCOFFSection(".eh_frame", false, SectionKind::getDataRel()); + + // Debug info. + // FIXME: Don't use 'directive' mode here. + DwarfAbbrevSection = + getCOFFSection("\t.section\t.debug_abbrev,\"dr\"", + true, SectionKind::getMetadata()); + DwarfInfoSection = + getCOFFSection("\t.section\t.debug_info,\"dr\"", + true, SectionKind::getMetadata()); + DwarfLineSection = + getCOFFSection("\t.section\t.debug_line,\"dr\"", + true, SectionKind::getMetadata()); + DwarfFrameSection = + getCOFFSection("\t.section\t.debug_frame,\"dr\"", + true, SectionKind::getMetadata()); + DwarfPubNamesSection = + getCOFFSection("\t.section\t.debug_pubnames,\"dr\"", + true, SectionKind::getMetadata()); + DwarfPubTypesSection = + getCOFFSection("\t.section\t.debug_pubtypes,\"dr\"", + true, SectionKind::getMetadata()); + DwarfStrSection = + getCOFFSection("\t.section\t.debug_str,\"dr\"", + true, SectionKind::getMetadata()); + DwarfLocSection = + getCOFFSection("\t.section\t.debug_loc,\"dr\"", + true, SectionKind::getMetadata()); + DwarfARangesSection = + getCOFFSection("\t.section\t.debug_aranges,\"dr\"", + true, SectionKind::getMetadata()); + DwarfRangesSection = + getCOFFSection("\t.section\t.debug_ranges,\"dr\"", + true, SectionKind::getMetadata()); + DwarfMacroInfoSection = + getCOFFSection("\t.section\t.debug_macinfo,\"dr\"", + true, SectionKind::getMetadata()); +} + +const MCSection *TargetLoweringObjectFileCOFF:: +getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + return getCOFFSection(GV->getSection(), false, Kind); +} + +static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) { + if (Kind.isText()) + return ".text$linkonce"; + if (Kind.isWriteable()) + return ".data$linkonce"; + return ".rdata$linkonce"; +} + + +const MCSection *TargetLoweringObjectFileCOFF:: +SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + assert(!Kind.isThreadLocal() && "Doesn't support TLS"); + + // If this global is linkonce/weak and the target handles this by emitting it + // into a 'uniqued' section name, create and return the section now. + if (GV->isWeakForLinker()) { + const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind); + SmallString<128> Name(Prefix, Prefix+strlen(Prefix)); + Mang->getNameWithPrefix(Name, GV, false); + return getCOFFSection(Name.str(), false, Kind); + } + + if (Kind.isText()) + return getTextSection(); + + return getDataSection(); +} + diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index a3f6364..6f4ca82 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -213,6 +213,9 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, unsigned NumVisited = 0; for (MachineBasicBlock::iterator I = llvm::next(OldPos); I != KillPos; ++I) { MachineInstr *OtherMI = I; + // DBG_VALUE cannot be counted against the limit. + if (OtherMI->isDebugValue()) + continue; if (NumVisited > 30) // FIXME: Arbitrary limit to reduce compile time cost. return false; ++NumVisited; @@ -316,7 +319,7 @@ bool TwoAddressInstructionPass::NoUseAfterLastDef(unsigned Reg, E = MRI->reg_end(); I != E; ++I) { MachineOperand &MO = I.getOperand(); MachineInstr *MI = MO.getParent(); - if (MI->getParent() != MBB) + if (MI->getParent() != MBB || MI->isDebugValue()) continue; DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI); if (DI == DistanceMap.end()) @@ -339,7 +342,7 @@ MachineInstr *TwoAddressInstructionPass::FindLastUseInMBB(unsigned Reg, E = MRI->reg_end(); I != E; ++I) { MachineOperand &MO = I.getOperand(); MachineInstr *MI = MO.getParent(); - if (MI->getParent() != MBB) + if (MI->getParent() != MBB || MI->isDebugValue()) continue; DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI); if (DI == DistanceMap.end()) @@ -365,13 +368,13 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII, DstReg = 0; unsigned SrcSubIdx, DstSubIdx; if (!TII->isMoveInstr(MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { - if (MI.getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) { + if (MI.isExtractSubreg()) { DstReg = MI.getOperand(0).getReg(); SrcReg = MI.getOperand(1).getReg(); - } else if (MI.getOpcode() == TargetInstrInfo::INSERT_SUBREG) { + } else if (MI.isInsertSubreg()) { DstReg = MI.getOperand(0).getReg(); SrcReg = MI.getOperand(2).getReg(); - } else if (MI.getOpcode() == TargetInstrInfo::SUBREG_TO_REG) { + } else if (MI.isSubregToReg()) { DstReg = MI.getOperand(0).getReg(); SrcReg = MI.getOperand(2).getReg(); } @@ -429,8 +432,7 @@ static bool isKilled(MachineInstr &MI, unsigned Reg, /// as a two-address use. If so, return the destination register by reference. static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) { const TargetInstrDesc &TID = MI.getDesc(); - unsigned NumOps = (MI.getOpcode() == TargetInstrInfo::INLINEASM) - ? MI.getNumOperands() : TID.getNumOperands(); + unsigned NumOps = MI.isInlineAsm() ? MI.getNumOperands():TID.getNumOperands(); for (unsigned i = 0; i != NumOps; ++i) { const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg) @@ -452,11 +454,11 @@ MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB, const TargetInstrInfo *TII, bool &IsCopy, unsigned &DstReg, bool &IsDstPhys) { - MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg); - if (UI == MRI->use_end()) + MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(Reg); + if (UI == MRI->use_nodbg_end()) return 0; MachineInstr &UseMI = *UI; - if (++UI != MRI->use_end()) + if (++UI != MRI->use_nodbg_end()) // More than one use. return 0; if (UseMI.getParent() != MBB) @@ -924,6 +926,10 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end(); mi != me; ) { MachineBasicBlock::iterator nmi = llvm::next(mi); + if (mi->isDebugValue()) { + mi = nmi; + continue; + } const TargetInstrDesc &TID = mi->getDesc(); bool FirstTied = true; @@ -933,7 +939,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { // First scan through all the tied register uses in this instruction // and record a list of pairs of tied operands for each register. - unsigned NumOps = (mi->getOpcode() == TargetInstrInfo::INLINEASM) + unsigned NumOps = mi->isInlineAsm() ? mi->getNumOperands() : TID.getNumOperands(); for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) { unsigned DstIdx = 0; diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp index 6ab5db2..b0f0a07 100644 --- a/lib/CodeGen/UnreachableBlockElim.cpp +++ b/lib/CodeGen/UnreachableBlockElim.cpp @@ -148,8 +148,7 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { MachineBasicBlock* succ = *BB->succ_begin(); MachineBasicBlock::iterator start = succ->begin(); - while (start != succ->end() && - start->getOpcode() == TargetInstrInfo::PHI) { + while (start != succ->end() && start->isPHI()) { for (unsigned i = start->getNumOperands() - 1; i >= 2; i-=2) if (start->getOperand(i).isMBB() && start->getOperand(i).getMBB() == BB) { @@ -188,8 +187,7 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { SmallPtrSet<MachineBasicBlock*, 8> preds(BB->pred_begin(), BB->pred_end()); MachineBasicBlock::iterator phi = BB->begin(); - while (phi != BB->end() && - phi->getOpcode() == TargetInstrInfo::PHI) { + while (phi != BB->end() && phi->isPHI()) { for (unsigned i = phi->getNumOperands() - 1; i >= 2; i-=2) if (!preds.count(phi->getOperand(i).getMBB())) { phi->RemoveOperand(i); diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index d4fb2e4..5956b61 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -9,7 +9,7 @@ // // This file implements the VirtRegMap class. // -// It also contains implementations of the the Spiller interface, which, given a +// It also contains implementations of the Spiller interface, which, given a // virtual register map and a machine function, eliminates all virtual // references by replacing them with physical register references - adding spill // code as necessary. diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp index df2b8d2..84e0398 100644 --- a/lib/CodeGen/VirtRegRewriter.cpp +++ b/lib/CodeGen/VirtRegRewriter.cpp @@ -62,6 +62,7 @@ VirtRegRewriter::~VirtRegRewriter() {} /// substitutePhysReg - Replace virtual register in MachineOperand with a /// physical register. Do the right thing with the sub-register index. +/// Note that operands may be added, so the MO reference is no longer valid. static void substitutePhysReg(MachineOperand &MO, unsigned Reg, const TargetRegisterInfo &TRI) { if (unsigned SubIdx = MO.getSubReg()) { @@ -123,14 +124,15 @@ struct TrivialRewriter : public VirtRegRewriter { continue; unsigned pReg = VRM.getPhys(reg); mri->setPhysRegUsed(pReg); - for (MachineRegisterInfo::reg_iterator regItr = mri->reg_begin(reg), - regEnd = mri->reg_end(); regItr != regEnd;) { - MachineOperand &mop = regItr.getOperand(); - assert(mop.isReg() && mop.getReg() == reg && "reg_iterator broken?"); - ++regItr; - substitutePhysReg(mop, pReg, *tri); - changed = true; - } + // Copy the register use-list before traversing it. + SmallVector<std::pair<MachineInstr*, unsigned>, 32> reglist; + for (MachineRegisterInfo::reg_iterator I = mri->reg_begin(reg), + E = mri->reg_end(); I != E; ++I) + reglist.push_back(std::make_pair(&*I, I.getOperandNo())); + for (unsigned N=0; N != reglist.size(); ++N) + substitutePhysReg(reglist[N].first->getOperand(reglist[N].second), + pReg, *tri); + changed |= !reglist.empty(); } } @@ -1759,7 +1761,7 @@ private: // Mark is killed. MachineInstr *CopyMI = prior(InsertLoc); - CopyMI->setAsmPrinterFlag(AsmPrinter::ReloadReuse); + CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse); MachineOperand *KillOpnd = CopyMI->findRegisterUseOperand(InReg); KillOpnd->setIsKill(); UpdateKills(*CopyMI, TRI, RegKills, KillOps); @@ -1850,31 +1852,30 @@ private: KilledMIRegs.clear(); for (unsigned j = 0, e = VirtUseOps.size(); j != e; ++j) { unsigned i = VirtUseOps[j]; - MachineOperand &MO = MI.getOperand(i); - unsigned VirtReg = MO.getReg(); + unsigned VirtReg = MI.getOperand(i).getReg(); assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Not a virtual register?"); - unsigned SubIdx = MO.getSubReg(); + unsigned SubIdx = MI.getOperand(i).getSubReg(); if (VRM.isAssignedReg(VirtReg)) { // This virtual register was assigned a physreg! unsigned Phys = VRM.getPhys(VirtReg); RegInfo->setPhysRegUsed(Phys); - if (MO.isDef()) + if (MI.getOperand(i).isDef()) ReusedOperands.markClobbered(Phys); - substitutePhysReg(MO, Phys, *TRI); + substitutePhysReg(MI.getOperand(i), Phys, *TRI); if (VRM.isImplicitlyDefined(VirtReg)) // FIXME: Is this needed? BuildMI(MBB, &MI, MI.getDebugLoc(), - TII->get(TargetInstrInfo::IMPLICIT_DEF), Phys); + TII->get(TargetOpcode::IMPLICIT_DEF), Phys); continue; } // This virtual register is now known to be a spilled value. - if (!MO.isUse()) + if (!MI.getOperand(i).isUse()) continue; // Handle defs in the loop below (handle use&def here though) - bool AvoidReload = MO.isUndef(); + bool AvoidReload = MI.getOperand(i).isUndef(); // Check if it is defined by an implicit def. It should not be spilled. // Note, this is for correctness reason. e.g. // 8 %reg1024<def> = IMPLICIT_DEF @@ -1902,8 +1903,7 @@ private: // = EXTRACT_SUBREG fi#1 // fi#1 is available in EDI, but it cannot be reused because it's not in // the right register file. - if (PhysReg && !AvoidReload && - (SubIdx || MI.getOpcode() == TargetInstrInfo::EXTRACT_SUBREG)) { + if (PhysReg && !AvoidReload && (SubIdx || MI.isExtractSubreg())) { const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg); if (!RC->contains(PhysReg)) PhysReg = 0; @@ -2038,7 +2038,7 @@ private: TII->copyRegToReg(MBB, InsertLoc, DesignatedReg, PhysReg, RC, RC); MachineInstr *CopyMI = prior(InsertLoc); - CopyMI->setAsmPrinterFlag(AsmPrinter::ReloadReuse); + CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse); UpdateKills(*CopyMI, TRI, RegKills, KillOps); // This invalidates DesignatedReg. @@ -2167,7 +2167,7 @@ private: // virtual or needing to clobber any values if it's physical). NextMII = &MI; --NextMII; // backtrack to the copy. - NextMII->setAsmPrinterFlag(AsmPrinter::ReloadReuse); + NextMII->setAsmPrinterFlag(MachineInstr::ReloadReuse); // Propagate the sub-register index over. if (SubIdx) { DefMO = NextMII->findRegisterDefOperand(DestReg); diff --git a/lib/CompilerDriver/CompilationGraph.cpp b/lib/CompilerDriver/CompilationGraph.cpp index 56e5b9c..524607b 100644 --- a/lib/CompilerDriver/CompilationGraph.cpp +++ b/lib/CompilerDriver/CompilationGraph.cpp @@ -33,9 +33,11 @@ using namespace llvmc; namespace llvmc { const std::string& LanguageMap::GetLanguage(const sys::Path& File) const { - LanguageMap::const_iterator Lang = this->find(File.getSuffix()); + StringRef suf = File.getSuffix(); + LanguageMap::const_iterator Lang = this->find(suf); if (Lang == this->end()) - throw std::runtime_error(("Unknown suffix: " + File.getSuffix()).str()); + throw std::runtime_error("File '" + File.str() + + "' has unknown suffix '" + suf.str() + '\''); return Lang->second; } } diff --git a/lib/CompilerDriver/Makefile b/lib/CompilerDriver/Makefile index a5ecfd5..66c6d11 100644 --- a/lib/CompilerDriver/Makefile +++ b/lib/CompilerDriver/Makefile @@ -22,6 +22,7 @@ else endif REQUIRES_EH := 1 +REQUIRES_RTTI := 1 include $(LEVEL)/Makefile.common diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index 89c4290..6db3ef9 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -18,7 +18,6 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" -#include "llvm/ModuleProvider.h" #include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/Debug.h" @@ -36,25 +35,29 @@ using namespace llvm; STATISTIC(NumInitBytes, "Number of bytes of global vars initialized"); STATISTIC(NumGlobals , "Number of global vars initialized"); -ExecutionEngine *(*ExecutionEngine::JITCtor)(ModuleProvider *MP, - std::string *ErrorStr, - JITMemoryManager *JMM, - CodeGenOpt::Level OptLevel, - bool GVsWithCode, - CodeModel::Model CMM) = 0; -ExecutionEngine *(*ExecutionEngine::InterpCtor)(ModuleProvider *MP, +ExecutionEngine *(*ExecutionEngine::JITCtor)( + Module *M, + std::string *ErrorStr, + JITMemoryManager *JMM, + CodeGenOpt::Level OptLevel, + bool GVsWithCode, + CodeModel::Model CMM, + StringRef MArch, + StringRef MCPU, + const SmallVectorImpl<std::string>& MAttrs) = 0; +ExecutionEngine *(*ExecutionEngine::InterpCtor)(Module *M, std::string *ErrorStr) = 0; ExecutionEngine::EERegisterFn ExecutionEngine::ExceptionTableRegister = 0; -ExecutionEngine::ExecutionEngine(ModuleProvider *P) +ExecutionEngine::ExecutionEngine(Module *M) : EEState(*this), LazyFunctionCreator(0) { CompilingLazily = false; GVCompilationDisabled = false; SymbolSearchingDisabled = false; - Modules.push_back(P); - assert(P && "ModuleProvider is null?"); + Modules.push_back(M); + assert(M && "Module is null?"); } ExecutionEngine::~ExecutionEngine() { @@ -69,38 +72,18 @@ char* ExecutionEngine::getMemoryForGV(const GlobalVariable* GV) { return new char[GVSize]; } -/// removeModuleProvider - Remove a ModuleProvider from the list of modules. -/// Relases the Module from the ModuleProvider, materializing it in the -/// process, and returns the materialized Module. -Module* ExecutionEngine::removeModuleProvider(ModuleProvider *P, - std::string *ErrInfo) { - for(SmallVector<ModuleProvider *, 1>::iterator I = Modules.begin(), +/// removeModule - Remove a Module from the list of modules. +bool ExecutionEngine::removeModule(Module *M) { + for(SmallVector<Module *, 1>::iterator I = Modules.begin(), E = Modules.end(); I != E; ++I) { - ModuleProvider *MP = *I; - if (MP == P) { + Module *Found = *I; + if (Found == M) { Modules.erase(I); - clearGlobalMappingsFromModule(MP->getModule()); - return MP->releaseModule(ErrInfo); - } - } - return NULL; -} - -/// deleteModuleProvider - Remove a ModuleProvider from the list of modules, -/// and deletes the ModuleProvider and owned Module. Avoids materializing -/// the underlying module. -void ExecutionEngine::deleteModuleProvider(ModuleProvider *P, - std::string *ErrInfo) { - for(SmallVector<ModuleProvider *, 1>::iterator I = Modules.begin(), - E = Modules.end(); I != E; ++I) { - ModuleProvider *MP = *I; - if (MP == P) { - Modules.erase(I); - clearGlobalMappingsFromModule(MP->getModule()); - delete MP; - return; + clearGlobalMappingsFromModule(M); + return true; } } + return false; } /// FindFunctionNamed - Search all of the active modules to find the one that @@ -108,7 +91,7 @@ void ExecutionEngine::deleteModuleProvider(ModuleProvider *P, /// general code. Function *ExecutionEngine::FindFunctionNamed(const char *FnName) { for (unsigned i = 0, e = Modules.size(); i != e; ++i) { - if (Function *F = Modules[i]->getModule()->getFunction(FnName)) + if (Function *F = Modules[i]->getFunction(FnName)) return F; } return 0; @@ -316,7 +299,7 @@ void ExecutionEngine::runStaticConstructorsDestructors(Module *module, void ExecutionEngine::runStaticConstructorsDestructors(bool isDtors) { // Execute global ctors/dtors for each module in the program. for (unsigned m = 0, e = Modules.size(); m != e; ++m) - runStaticConstructorsDestructors(Modules[m]->getModule(), isDtors); + runStaticConstructorsDestructors(Modules[m], isDtors); } #ifndef NDEBUG @@ -356,7 +339,7 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn, } // FALLS THROUGH case 1: - if (!FTy->getParamType(0)->isInteger(32)) { + if (!FTy->getParamType(0)->isIntegerTy(32)) { llvm_report_error("Invalid type for first argument of main() supplied"); } // FALLS THROUGH @@ -393,12 +376,12 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn, /// Interpreter or there's an error. If even an Interpreter cannot be created, /// NULL is returned. /// -ExecutionEngine *ExecutionEngine::create(ModuleProvider *MP, +ExecutionEngine *ExecutionEngine::create(Module *M, bool ForceInterpreter, std::string *ErrorStr, CodeGenOpt::Level OptLevel, bool GVsWithCode) { - return EngineBuilder(MP) + return EngineBuilder(M) .setEngineKind(ForceInterpreter ? EngineKind::Interpreter : EngineKind::JIT) @@ -408,16 +391,6 @@ ExecutionEngine *ExecutionEngine::create(ModuleProvider *MP, .create(); } -ExecutionEngine *ExecutionEngine::create(Module *M) { - return EngineBuilder(M).create(); -} - -/// EngineBuilder - Overloaded constructor that automatically creates an -/// ExistingModuleProvider for an existing module. -EngineBuilder::EngineBuilder(Module *m) : MP(new ExistingModuleProvider(m)) { - InitEngine(); -} - ExecutionEngine *EngineBuilder::create() { // Make sure we can resolve symbols in the program as well. The zero arg // to the function tells DynamicLibrary to load the program, not a library. @@ -442,8 +415,9 @@ ExecutionEngine *EngineBuilder::create() { if (WhichEngine & EngineKind::JIT) { if (ExecutionEngine::JITCtor) { ExecutionEngine *EE = - ExecutionEngine::JITCtor(MP, ErrorStr, JMM, OptLevel, - AllocateGVsWithCode, CMModel); + ExecutionEngine::JITCtor(M, ErrorStr, JMM, OptLevel, + AllocateGVsWithCode, CMModel, + MArch, MCPU, MAttrs); if (EE) return EE; } } @@ -452,7 +426,7 @@ ExecutionEngine *EngineBuilder::create() { // an interpreter instead. if (WhichEngine & EngineKind::Interpreter) { if (ExecutionEngine::InterpCtor) - return ExecutionEngine::InterpCtor(MP, ErrorStr); + return ExecutionEngine::InterpCtor(M, ErrorStr); if (ErrorStr) *ErrorStr = "Interpreter has not been linked in."; return 0; @@ -625,18 +599,18 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { switch (Op0->getType()->getTypeID()) { default: llvm_unreachable("Invalid bitcast operand"); case Type::IntegerTyID: - assert(DestTy->isFloatingPoint() && "invalid bitcast"); + assert(DestTy->isFloatingPointTy() && "invalid bitcast"); if (DestTy->isFloatTy()) GV.FloatVal = GV.IntVal.bitsToFloat(); else if (DestTy->isDoubleTy()) GV.DoubleVal = GV.IntVal.bitsToDouble(); break; case Type::FloatTyID: - assert(DestTy->isInteger(32) && "Invalid bitcast"); + assert(DestTy->isIntegerTy(32) && "Invalid bitcast"); GV.IntVal.floatToBits(GV.FloatVal); break; case Type::DoubleTyID: - assert(DestTy->isInteger(64) && "Invalid bitcast"); + assert(DestTy->isIntegerTy(64) && "Invalid bitcast"); GV.IntVal.doubleToBits(GV.DoubleVal); break; case Type::PointerTyID: @@ -968,7 +942,7 @@ void ExecutionEngine::emitGlobals() { if (Modules.size() != 1) { for (unsigned m = 0, e = Modules.size(); m != e; ++m) { - Module &M = *Modules[m]->getModule(); + Module &M = *Modules[m]; for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { const GlobalValue *GV = I; @@ -1002,7 +976,7 @@ void ExecutionEngine::emitGlobals() { std::vector<const GlobalValue*> NonCanonicalGlobals; for (unsigned m = 0, e = Modules.size(); m != e; ++m) { - Module &M = *Modules[m]->getModule(); + Module &M = *Modules[m]; for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { // In the multi-module case, see what this global maps to. diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp index 412b493..141cb27 100644 --- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp +++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp @@ -174,20 +174,16 @@ void LLVMFreeMachineCodeForFunction(LLVMExecutionEngineRef EE, LLVMValueRef F) { } void LLVMAddModuleProvider(LLVMExecutionEngineRef EE, LLVMModuleProviderRef MP){ - unwrap(EE)->addModuleProvider(unwrap(MP)); + unwrap(EE)->addModule(unwrap(MP)); } LLVMBool LLVMRemoveModuleProvider(LLVMExecutionEngineRef EE, LLVMModuleProviderRef MP, LLVMModuleRef *OutMod, char **OutError) { - std::string Error; - if (Module *Gone = unwrap(EE)->removeModuleProvider(unwrap(MP), &Error)) { - *OutMod = wrap(Gone); - return 0; - } - if (OutError) - *OutError = strdup(Error.c_str()); - return 1; + Module *M = unwrap(MP); + unwrap(EE)->removeModule(M); + *OutMod = wrap(M); + return 0; } LLVMBool LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name, diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp index 73f5558..e234cf1 100644 --- a/lib/ExecutionEngine/Interpreter/Execution.cpp +++ b/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -591,7 +591,7 @@ void Interpreter::popStackAndReturnValueToCaller(const Type *RetTy, ECStack.pop_back(); if (ECStack.empty()) { // Finished main. Put result into exit code... - if (RetTy && RetTy->isInteger()) { // Nonvoid return type? + if (RetTy && RetTy->isIntegerTy()) { // Nonvoid return type? ExitValue = Result; // Capture the exit value of the program } else { memset(&ExitValue.Untyped, 0, sizeof(ExitValue.Untyped)); @@ -979,7 +979,7 @@ GenericValue Interpreter::executeFPToUIInst(Value *SrcVal, const Type *DstTy, const Type *SrcTy = SrcVal->getType(); uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth(); GenericValue Dest, Src = getOperandValue(SrcVal, SF); - assert(SrcTy->isFloatingPoint() && "Invalid FPToUI instruction"); + assert(SrcTy->isFloatingPointTy() && "Invalid FPToUI instruction"); if (SrcTy->getTypeID() == Type::FloatTyID) Dest.IntVal = APIntOps::RoundFloatToAPInt(Src.FloatVal, DBitWidth); @@ -993,7 +993,7 @@ GenericValue Interpreter::executeFPToSIInst(Value *SrcVal, const Type *DstTy, const Type *SrcTy = SrcVal->getType(); uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth(); GenericValue Dest, Src = getOperandValue(SrcVal, SF); - assert(SrcTy->isFloatingPoint() && "Invalid FPToSI instruction"); + assert(SrcTy->isFloatingPointTy() && "Invalid FPToSI instruction"); if (SrcTy->getTypeID() == Type::FloatTyID) Dest.IntVal = APIntOps::RoundFloatToAPInt(Src.FloatVal, DBitWidth); @@ -1005,7 +1005,7 @@ GenericValue Interpreter::executeFPToSIInst(Value *SrcVal, const Type *DstTy, GenericValue Interpreter::executeUIToFPInst(Value *SrcVal, const Type *DstTy, ExecutionContext &SF) { GenericValue Dest, Src = getOperandValue(SrcVal, SF); - assert(DstTy->isFloatingPoint() && "Invalid UIToFP instruction"); + assert(DstTy->isFloatingPointTy() && "Invalid UIToFP instruction"); if (DstTy->getTypeID() == Type::FloatTyID) Dest.FloatVal = APIntOps::RoundAPIntToFloat(Src.IntVal); @@ -1017,7 +1017,7 @@ GenericValue Interpreter::executeUIToFPInst(Value *SrcVal, const Type *DstTy, GenericValue Interpreter::executeSIToFPInst(Value *SrcVal, const Type *DstTy, ExecutionContext &SF) { GenericValue Dest, Src = getOperandValue(SrcVal, SF); - assert(DstTy->isFloatingPoint() && "Invalid SIToFP instruction"); + assert(DstTy->isFloatingPointTy() && "Invalid SIToFP instruction"); if (DstTy->getTypeID() == Type::FloatTyID) Dest.FloatVal = APIntOps::RoundSignedAPIntToFloat(Src.IntVal); @@ -1058,24 +1058,24 @@ GenericValue Interpreter::executeBitCastInst(Value *SrcVal, const Type *DstTy, if (isa<PointerType>(DstTy)) { assert(isa<PointerType>(SrcTy) && "Invalid BitCast"); Dest.PointerVal = Src.PointerVal; - } else if (DstTy->isInteger()) { + } else if (DstTy->isIntegerTy()) { if (SrcTy->isFloatTy()) { Dest.IntVal.zext(sizeof(Src.FloatVal) * CHAR_BIT); Dest.IntVal.floatToBits(Src.FloatVal); } else if (SrcTy->isDoubleTy()) { Dest.IntVal.zext(sizeof(Src.DoubleVal) * CHAR_BIT); Dest.IntVal.doubleToBits(Src.DoubleVal); - } else if (SrcTy->isInteger()) { + } else if (SrcTy->isIntegerTy()) { Dest.IntVal = Src.IntVal; } else llvm_unreachable("Invalid BitCast"); } else if (DstTy->isFloatTy()) { - if (SrcTy->isInteger()) + if (SrcTy->isIntegerTy()) Dest.FloatVal = Src.IntVal.bitsToFloat(); else Dest.FloatVal = Src.FloatVal; } else if (DstTy->isDoubleTy()) { - if (SrcTy->isInteger()) + if (SrcTy->isIntegerTy()) Dest.DoubleVal = Src.IntVal.bitsToDouble(); else Dest.DoubleVal = Src.DoubleVal; diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp index c02d84f..7b061d3 100644 --- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp +++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp @@ -368,7 +368,7 @@ GenericValue lle_X_sprintf(const FunctionType *FT, switch (Last) { case '%': - strcpy(Buffer, "%"); break; + memcpy(Buffer, "%", 2); break; case 'c': sprintf(Buffer, FmtBuf, uint32_t(Args[ArgNo++].IntVal.getZExtValue())); break; @@ -400,8 +400,9 @@ GenericValue lle_X_sprintf(const FunctionType *FT, errs() << "<unknown printf code '" << *FmtStr << "'!>"; ArgNo++; break; } - strcpy(OutputBuffer, Buffer); - OutputBuffer += strlen(Buffer); + size_t Len = strlen(Buffer); + memcpy(OutputBuffer, Buffer, Len + 1); + OutputBuffer += Len; } break; } diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.cpp b/lib/ExecutionEngine/Interpreter/Interpreter.cpp index 9be6a92..43e3453 100644 --- a/lib/ExecutionEngine/Interpreter/Interpreter.cpp +++ b/lib/ExecutionEngine/Interpreter/Interpreter.cpp @@ -17,7 +17,6 @@ #include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" -#include "llvm/ModuleProvider.h" #include <cstring> using namespace llvm; @@ -33,20 +32,20 @@ extern "C" void LLVMLinkInInterpreter() { } /// create - Create a new interpreter object. This can never fail. /// -ExecutionEngine *Interpreter::create(ModuleProvider *MP, std::string* ErrStr) { - // Tell this ModuleProvide to materialize and release the module - if (!MP->materializeModule(ErrStr)) +ExecutionEngine *Interpreter::create(Module *M, std::string* ErrStr) { + // Tell this Module to materialize everything and release the GVMaterializer. + if (M->MaterializeAllPermanently(ErrStr)) // We got an error, just return 0 return 0; - return new Interpreter(MP); + return new Interpreter(M); } //===----------------------------------------------------------------------===// // Interpreter ctor - Initialize stuff // -Interpreter::Interpreter(ModuleProvider *M) - : ExecutionEngine(M), TD(M->getModule()) { +Interpreter::Interpreter(Module *M) + : ExecutionEngine(M), TD(M) { memset(&ExitValue.Untyped, 0, sizeof(ExitValue.Untyped)); setTargetData(&TD); diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h index 038830c..bc4200b 100644 --- a/lib/ExecutionEngine/Interpreter/Interpreter.h +++ b/lib/ExecutionEngine/Interpreter/Interpreter.h @@ -94,7 +94,7 @@ class Interpreter : public ExecutionEngine, public InstVisitor<Interpreter> { std::vector<Function*> AtExitHandlers; public: - explicit Interpreter(ModuleProvider *M); + explicit Interpreter(Module *M); ~Interpreter(); /// runAtExitHandlers - Run any functions registered by the program's calls to @@ -108,7 +108,7 @@ public: /// create - Create an interpreter ExecutionEngine. This can never fail. /// - static ExecutionEngine *create(ModuleProvider *M, std::string *ErrorStr = 0); + static ExecutionEngine *create(Module *M, std::string *ErrorStr = 0); /// run - Start execution with the specified function and arguments. /// diff --git a/lib/ExecutionEngine/Interpreter/Makefile b/lib/ExecutionEngine/Interpreter/Makefile index 4df38ea..5def136 100644 --- a/lib/ExecutionEngine/Interpreter/Makefile +++ b/lib/ExecutionEngine/Interpreter/Makefile @@ -9,6 +9,5 @@ LEVEL = ../../.. LIBRARYNAME = LLVMInterpreter -CXXFLAGS = -fno-rtti include $(LEVEL)/Makefile.common diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp index faf724f..18a996e 100644 --- a/lib/ExecutionEngine/JIT/JIT.cpp +++ b/lib/ExecutionEngine/JIT/JIT.cpp @@ -18,7 +18,7 @@ #include "llvm/Function.h" #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" -#include "llvm/ModuleProvider.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/JITCodeEmitter.h" #include "llvm/CodeGen/MachineCodeInfo.h" #include "llvm/ExecutionEngine/GenericValue.h" @@ -28,6 +28,7 @@ #include "llvm/Target/TargetJITInfo.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MutexGuard.h" #include "llvm/System/DynamicLibrary.h" #include "llvm/Config/config.h" @@ -172,7 +173,7 @@ void DarwinRegisterFrame(void* FrameBegin) { ob->encoding.i = 0; ob->encoding.b.encoding = llvm::dwarf::DW_EH_PE_omit; - // Put the info on both places, as libgcc uses the first or the the second + // Put the info on both places, as libgcc uses the first or the second // field. Note that we rely on having two pointers here. If fde_end was a // char, things would get complicated. ob->fde_end = (char*)LOI->unseenObjects; @@ -193,35 +194,44 @@ void DarwinRegisterFrame(void* FrameBegin) { /// createJIT - This is the factory method for creating a JIT for the current /// machine, it does not fall back to the interpreter. This takes ownership -/// of the module provider. -ExecutionEngine *ExecutionEngine::createJIT(ModuleProvider *MP, +/// of the module. +ExecutionEngine *ExecutionEngine::createJIT(Module *M, std::string *ErrorStr, JITMemoryManager *JMM, CodeGenOpt::Level OptLevel, bool GVsWithCode, - CodeModel::Model CMM) { - return JIT::createJIT(MP, ErrorStr, JMM, OptLevel, GVsWithCode, CMM); + CodeModel::Model CMM) { + // Use the defaults for extra parameters. Users can use EngineBuilder to + // set them. + StringRef MArch = ""; + StringRef MCPU = ""; + SmallVector<std::string, 1> MAttrs; + return JIT::createJIT(M, ErrorStr, JMM, OptLevel, GVsWithCode, CMM, + MArch, MCPU, MAttrs); } -ExecutionEngine *JIT::createJIT(ModuleProvider *MP, +ExecutionEngine *JIT::createJIT(Module *M, std::string *ErrorStr, JITMemoryManager *JMM, CodeGenOpt::Level OptLevel, bool GVsWithCode, - CodeModel::Model CMM) { + CodeModel::Model CMM, + StringRef MArch, + StringRef MCPU, + const SmallVectorImpl<std::string>& MAttrs) { // Make sure we can resolve symbols in the program as well. The zero arg // to the function tells DynamicLibrary to load the program, not a library. if (sys::DynamicLibrary::LoadLibraryPermanently(0, ErrorStr)) return 0; // Pick a target either via -march or by guessing the native arch. - TargetMachine *TM = JIT::selectTarget(MP, ErrorStr); + TargetMachine *TM = JIT::selectTarget(M, MArch, MCPU, MAttrs, ErrorStr); if (!TM || (ErrorStr && ErrorStr->length() > 0)) return 0; TM->setCodeModel(CMM); // If the target supports JIT code generation, create a the JIT. if (TargetJITInfo *TJ = TM->getJITInfo()) { - return new JIT(MP, *TM, *TJ, JMM, OptLevel, GVsWithCode); + return new JIT(M, *TM, *TJ, JMM, OptLevel, GVsWithCode); } else { if (ErrorStr) *ErrorStr = "target does not support JIT code generation"; @@ -229,16 +239,63 @@ ExecutionEngine *JIT::createJIT(ModuleProvider *MP, } } -JIT::JIT(ModuleProvider *MP, TargetMachine &tm, TargetJITInfo &tji, +namespace { +/// This class supports the global getPointerToNamedFunction(), which allows +/// bugpoint or gdb users to search for a function by name without any context. +class JitPool { + SmallPtrSet<JIT*, 1> JITs; // Optimize for process containing just 1 JIT. + mutable sys::Mutex Lock; +public: + void Add(JIT *jit) { + MutexGuard guard(Lock); + JITs.insert(jit); + } + void Remove(JIT *jit) { + MutexGuard guard(Lock); + JITs.erase(jit); + } + void *getPointerToNamedFunction(const char *Name) const { + MutexGuard guard(Lock); + assert(JITs.size() != 0 && "No Jit registered"); + //search function in every instance of JIT + for (SmallPtrSet<JIT*, 1>::const_iterator Jit = JITs.begin(), + end = JITs.end(); + Jit != end; ++Jit) { + if (Function *F = (*Jit)->FindFunctionNamed(Name)) + return (*Jit)->getPointerToFunction(F); + } + // The function is not available : fallback on the first created (will + // search in symbol of the current program/library) + return (*JITs.begin())->getPointerToNamedFunction(Name); + } +}; +ManagedStatic<JitPool> AllJits; +} +extern "C" { + // getPointerToNamedFunction - This function is used as a global wrapper to + // JIT::getPointerToNamedFunction for the purpose of resolving symbols when + // bugpoint is debugging the JIT. In that scenario, we are loading an .so and + // need to resolve function(s) that are being mis-codegenerated, so we need to + // resolve their addresses at runtime, and this is the way to do it. + void *getPointerToNamedFunction(const char *Name) { + return AllJits->getPointerToNamedFunction(Name); + } +} + +JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, JITMemoryManager *JMM, CodeGenOpt::Level OptLevel, bool GVsWithCode) - : ExecutionEngine(MP), TM(tm), TJI(tji), AllocateGVsWithCode(GVsWithCode) { + : ExecutionEngine(M), TM(tm), TJI(tji), AllocateGVsWithCode(GVsWithCode), + isAlreadyCodeGenerating(false) { setTargetData(TM.getTargetData()); - jitstate = new JITState(MP); + jitstate = new JITState(M); // Initialize JCE JCE = createEmitter(*this, JMM, TM); + // Register in global list of all JITs. + AllJits->Add(this); + // Add target data MutexGuard locked(lock); FunctionPassManager &PM = jitstate->getPM(locked); @@ -273,21 +330,21 @@ JIT::JIT(ModuleProvider *MP, TargetMachine &tm, TargetJITInfo &tji, } JIT::~JIT() { + AllJits->Remove(this); delete jitstate; delete JCE; delete &TM; } -/// addModuleProvider - Add a new ModuleProvider to the JIT. If we previously -/// removed the last ModuleProvider, we need re-initialize jitstate with a valid -/// ModuleProvider. -void JIT::addModuleProvider(ModuleProvider *MP) { +/// addModule - Add a new Module to the JIT. If we previously removed the last +/// Module, we need re-initialize jitstate with a valid Module. +void JIT::addModule(Module *M) { MutexGuard locked(lock); if (Modules.empty()) { assert(!jitstate && "jitstate should be NULL if Modules vector is empty!"); - jitstate = new JITState(MP); + jitstate = new JITState(M); FunctionPassManager &PM = jitstate->getPM(locked); PM.add(new TargetData(*TM.getTargetData())); @@ -302,18 +359,17 @@ void JIT::addModuleProvider(ModuleProvider *MP) { PM.doInitialization(); } - ExecutionEngine::addModuleProvider(MP); + ExecutionEngine::addModule(M); } -/// removeModuleProvider - If we are removing the last ModuleProvider, -/// invalidate the jitstate since the PassManager it contains references a -/// released ModuleProvider. -Module *JIT::removeModuleProvider(ModuleProvider *MP, std::string *E) { - Module *result = ExecutionEngine::removeModuleProvider(MP, E); +/// removeModule - If we are removing the last Module, invalidate the jitstate +/// since the PassManager it contains references a released Module. +bool JIT::removeModule(Module *M) { + bool result = ExecutionEngine::removeModule(M); MutexGuard locked(lock); - if (jitstate->getMP() == MP) { + if (jitstate->getModule() == M) { delete jitstate; jitstate = 0; } @@ -336,62 +392,6 @@ Module *JIT::removeModuleProvider(ModuleProvider *MP, std::string *E) { return result; } -/// deleteModuleProvider - Remove a ModuleProvider from the list of modules, -/// and deletes the ModuleProvider and owned Module. Avoids materializing -/// the underlying module. -void JIT::deleteModuleProvider(ModuleProvider *MP, std::string *E) { - ExecutionEngine::deleteModuleProvider(MP, E); - - MutexGuard locked(lock); - - if (jitstate->getMP() == MP) { - delete jitstate; - jitstate = 0; - } - - if (!jitstate && !Modules.empty()) { - jitstate = new JITState(Modules[0]); - - FunctionPassManager &PM = jitstate->getPM(locked); - PM.add(new TargetData(*TM.getTargetData())); - - // Turn the machine code intermediate representation into bytes in memory - // that may be executed. - if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) { - llvm_report_error("Target does not support machine code emission!"); - } - - // Initialize passes. - PM.doInitialization(); - } -} - -/// materializeFunction - make sure the given function is fully read. If the -/// module is corrupt, this returns true and fills in the optional string with -/// information about the problem. If successful, this returns false. -bool JIT::materializeFunction(Function *F, std::string *ErrInfo) { - // Read in the function if it exists in this Module. - if (F->hasNotBeenReadFromBitcode()) { - // Determine the module provider this function is provided by. - Module *M = F->getParent(); - ModuleProvider *MP = 0; - for (unsigned i = 0, e = Modules.size(); i != e; ++i) { - if (Modules[i]->getModule() == M) { - MP = Modules[i]; - break; - } - } - if (MP) - return MP->materializeFunction(F, ErrInfo); - - if (ErrInfo) - *ErrInfo = "Function isn't in a module we know about!"; - return true; - } - // Succeed if the function is already read. - return false; -} - /// run - Start execution with the specified function and arguments. /// GenericValue JIT::runFunction(Function *F, @@ -411,10 +411,10 @@ GenericValue JIT::runFunction(Function *F, // Handle some common cases first. These cases correspond to common `main' // prototypes. - if (RetTy->isInteger(32) || RetTy->isVoidTy()) { + if (RetTy->isIntegerTy(32) || RetTy->isVoidTy()) { switch (ArgValues.size()) { case 3: - if (FTy->getParamType(0)->isInteger(32) && + if (FTy->getParamType(0)->isIntegerTy(32) && isa<PointerType>(FTy->getParamType(1)) && isa<PointerType>(FTy->getParamType(2))) { int (*PF)(int, char **, const char **) = @@ -429,7 +429,7 @@ GenericValue JIT::runFunction(Function *F, } break; case 2: - if (FTy->getParamType(0)->isInteger(32) && + if (FTy->getParamType(0)->isIntegerTy(32) && isa<PointerType>(FTy->getParamType(1))) { int (*PF)(int, char **) = (int(*)(int, char **))(intptr_t)FPtr; @@ -442,7 +442,7 @@ GenericValue JIT::runFunction(Function *F, break; case 1: if (FTy->getNumParams() == 1 && - FTy->getParamType(0)->isInteger(32)) { + FTy->getParamType(0)->isIntegerTy(32)) { GenericValue rv; int (*PF)(int) = (int(*)(int))(intptr_t)FPtr; rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue())); @@ -553,8 +553,12 @@ GenericValue JIT::runFunction(Function *F, else ReturnInst::Create(F->getContext(), StubBB); // Just return void. - // Finally, return the value returned by our nullary stub function. - return runFunction(Stub, std::vector<GenericValue>()); + // Finally, call our nullary stub function. + GenericValue Result = runFunction(Stub, std::vector<GenericValue>()); + // Erase it, since no other function can have a reference to it. + Stub->eraseFromParent(); + // And return the result. + return Result; } void JIT::RegisterJITEventListener(JITEventListener *L) { @@ -620,7 +624,6 @@ void JIT::runJITOnFunction(Function *F, MachineCodeInfo *MCI) { } void JIT::runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked) { - static bool isAlreadyCodeGenerating = false; assert(!isAlreadyCodeGenerating && "Error: Recursive compilation detected!"); // JIT the function @@ -661,7 +664,7 @@ void *JIT::getPointerToFunction(Function *F) { // Now that this thread owns the lock, make sure we read in the function if it // exists in this Module. std::string ErrorMsg; - if (materializeFunction(F, &ErrorMsg)) { + if (F->Materialize(&ErrorMsg)) { llvm_report_error("Error reading function '" + F->getName()+ "' from bitcode file: " + ErrorMsg); } diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h index b6f74ff..edae719 100644 --- a/lib/ExecutionEngine/JIT/JIT.h +++ b/lib/ExecutionEngine/JIT/JIT.h @@ -30,20 +30,20 @@ class TargetMachine; class JITState { private: FunctionPassManager PM; // Passes to compile a function - ModuleProvider *MP; // ModuleProvider used to create the PM + Module *M; // Module used to create the PM /// PendingFunctions - Functions which have not been code generated yet, but /// were called from a function being code generated. std::vector<AssertingVH<Function> > PendingFunctions; public: - explicit JITState(ModuleProvider *MP) : PM(MP), MP(MP) {} + explicit JITState(Module *M) : PM(M), M(M) {} FunctionPassManager &getPM(const MutexGuard &L) { return PM; } - ModuleProvider *getMP() const { return MP; } + Module *getModule() const { return M; } std::vector<AssertingVH<Function> > &getPendingFunctions(const MutexGuard &L){ return PendingFunctions; } @@ -61,16 +61,20 @@ class JIT : public ExecutionEngine { /// should be set to true. Doing so breaks freeMachineCodeForFunction. bool AllocateGVsWithCode; + /// True while the JIT is generating code. Used to assert against recursive + /// entry. + bool isAlreadyCodeGenerating; + JITState *jitstate; - JIT(ModuleProvider *MP, TargetMachine &tm, TargetJITInfo &tji, + JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, JITMemoryManager *JMM, CodeGenOpt::Level OptLevel, bool AllocateGVsWithCode); public: ~JIT(); static void Register() { - JITCtor = create; + JITCtor = createJIT; } /// getJITInfo - Return the target JIT information structure. @@ -80,35 +84,22 @@ public: /// create - Create an return a new JIT compiler if there is one available /// for the current target. Otherwise, return null. /// - static ExecutionEngine *create(ModuleProvider *MP, + static ExecutionEngine *create(Module *M, std::string *Err, JITMemoryManager *JMM, CodeGenOpt::Level OptLevel = CodeGenOpt::Default, bool GVsWithCode = true, CodeModel::Model CMM = CodeModel::Default) { - return ExecutionEngine::createJIT(MP, Err, JMM, OptLevel, GVsWithCode, + return ExecutionEngine::createJIT(M, Err, JMM, OptLevel, GVsWithCode, CMM); } - virtual void addModuleProvider(ModuleProvider *MP); + virtual void addModule(Module *M); - /// removeModuleProvider - Remove a ModuleProvider from the list of modules. - /// Relases the Module from the ModuleProvider, materializing it in the - /// process, and returns the materialized Module. - virtual Module *removeModuleProvider(ModuleProvider *MP, - std::string *ErrInfo = 0); - - /// deleteModuleProvider - Remove a ModuleProvider from the list of modules, - /// and deletes the ModuleProvider and owned Module. Avoids materializing - /// the underlying module. - virtual void deleteModuleProvider(ModuleProvider *P,std::string *ErrInfo = 0); - - /// materializeFunction - make sure the given function is fully read. If the - /// module is corrupt, this returns true and fills in the optional string with - /// information about the problem. If successful, this returns false. - /// - bool materializeFunction(Function *F, std::string *ErrInfo = 0); + /// removeModule - Remove a Module from the list of modules. Returns true if + /// M is found. + virtual bool removeModule(Module *M); /// runFunction - Start execution with the specified function and arguments. /// @@ -177,14 +168,21 @@ public: /// selectTarget - Pick a target either via -march or by guessing the native /// arch. Add any CPU features specified via -mcpu or -mattr. - static TargetMachine *selectTarget(ModuleProvider *MP, std::string *Err); + static TargetMachine *selectTarget(Module *M, + StringRef MArch, + StringRef MCPU, + const SmallVectorImpl<std::string>& MAttrs, + std::string *Err); - static ExecutionEngine *createJIT(ModuleProvider *MP, + static ExecutionEngine *createJIT(Module *M, std::string *ErrorStr, JITMemoryManager *JMM, CodeGenOpt::Level OptLevel, bool GVsWithCode, - CodeModel::Model CMM); + CodeModel::Model CMM, + StringRef MArch, + StringRef MCPU, + const SmallVectorImpl<std::string>& MAttrs); // Run the JIT on F and return information about the generated code void runJITOnFunction(Function *F, MachineCodeInfo *MCI = 0); diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp index c1051a9..946351b 100644 --- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp @@ -522,7 +522,11 @@ JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const { JCE->emitInt64(((intptr_t)Jit.getPointerToGlobal(Personality))); } - JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4); + // LSDA encoding: This must match the encoding used in EmitEHFrame () + if (PointerSize == 4) + JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4); + else + JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8); JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4); } else { JCE->emitULEB128Bytes(1); diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp index 4dc119d..57c4375 100644 --- a/lib/ExecutionEngine/JIT/JITEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp @@ -37,6 +37,7 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MutexGuard.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" @@ -57,13 +58,12 @@ using namespace llvm; STATISTIC(NumBytes, "Number of bytes of machine code compiled"); STATISTIC(NumRelos, "Number of relocations applied"); STATISTIC(NumRetries, "Number of retries with more memory"); -static JIT *TheJIT = 0; // A declaration may stop being a declaration once it's fully read from bitcode. // This function returns true if F is fully read and is still a declaration. static bool isNonGhostDeclaration(const Function *F) { - return F->isDeclaration() && !F->hasNotBeenReadFromBitcode(); + return F->isDeclaration() && !F->isMaterializable(); } //===----------------------------------------------------------------------===// @@ -109,9 +109,13 @@ namespace { /// particular GlobalVariable so that we can reuse them if necessary. GlobalToIndirectSymMapTy GlobalToIndirectSymMap; + /// Instance of the JIT this ResolverState serves. + JIT *TheJIT; + public: - JITResolverState() : FunctionToLazyStubMap(this), - FunctionToCallSitesMap(this) {} + JITResolverState(JIT *jit) : FunctionToLazyStubMap(this), + FunctionToCallSitesMap(this), + TheJIT(jit) {} FunctionToLazyStubMapTy& getFunctionToLazyStubMap( const MutexGuard& locked) { @@ -227,18 +231,13 @@ namespace { JITEmitter &JE; - static JITResolver *TheJITResolver; - public: - explicit JITResolver(JIT &jit, JITEmitter &je) : nextGOTIndex(0), JE(je) { - TheJIT = &jit; + /// Instance of JIT corresponding to this Resolver. + JIT *TheJIT; + public: + explicit JITResolver(JIT &jit, JITEmitter &je) + : state(&jit), nextGOTIndex(0), JE(je), TheJIT(&jit) { LazyResolverFn = jit.getJITInfo().getLazyResolverFunction(JITCompilerFn); - assert(TheJITResolver == 0 && "Multiple JIT resolvers?"); - TheJITResolver = this; - } - - ~JITResolver() { - TheJITResolver = 0; } /// getLazyFunctionStubIfAvailable - This returns a pointer to a function's @@ -273,6 +272,44 @@ namespace { static void *JITCompilerFn(void *Stub); }; + class StubToResolverMapTy { + /// Map a stub address to a specific instance of a JITResolver so that + /// lazily-compiled functions can find the right resolver to use. + /// + /// Guarded by Lock. + std::map<void*, JITResolver*> Map; + + /// Guards Map from concurrent accesses. + mutable sys::Mutex Lock; + + public: + /// Registers a Stub to be resolved by Resolver. + void RegisterStubResolver(void *Stub, JITResolver *Resolver) { + MutexGuard guard(Lock); + Map.insert(std::make_pair(Stub, Resolver)); + } + /// Unregisters the Stub when it's invalidated. + void UnregisterStubResolver(void *Stub) { + MutexGuard guard(Lock); + Map.erase(Stub); + } + /// Returns the JITResolver instance that owns the Stub. + JITResolver *getResolverFromStub(void *Stub) const { + MutexGuard guard(Lock); + // The address given to us for the stub may not be exactly right, it might + // be a little bit after the stub. As such, use upper_bound to find it. + // This is the same trick as in LookupFunctionFromCallSite from + // JITResolverState. + std::map<void*, JITResolver*>::const_iterator I = Map.upper_bound(Stub); + assert(I != Map.begin() && "This is not a known stub!"); + --I; + return I->second; + } + }; + /// This needs to be static so that a lazy call stub can access it with no + /// context except the address of the stub. + ManagedStatic<StubToResolverMapTy> StubToResolverMap; + /// JITEmitter - The JIT implementation of the MachineCodeEmitter, which is /// used to output functions to memory for execution. class JITEmitter : public JITCodeEmitter { @@ -371,10 +408,13 @@ namespace { DILocation PrevDLT; + /// Instance of the JIT + JIT *TheJIT; + public: JITEmitter(JIT &jit, JITMemoryManager *JMM, TargetMachine &TM) : SizeEstimate(0), Resolver(jit, *this), MMI(0), CurFn(0), - EmittedFunctions(this), PrevDLT(NULL) { + EmittedFunctions(this), PrevDLT(NULL), TheJIT(&jit) { MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager(); if (jit.getJITInfo().needsGOT()) { MemMgr->AllocateGOT(); @@ -495,8 +535,6 @@ namespace { }; } -JITResolver *JITResolver::TheJITResolver = 0; - void CallSiteValueMapConfig::onDelete(JITResolverState *JRS, Function *F) { JRS->EraseAllCallSitesPrelocked(F); } @@ -551,6 +589,10 @@ void *JITResolver::getLazyFunctionStub(Function *F) { DEBUG(dbgs() << "JIT: Lazy stub emitted at [" << Stub << "] for function '" << F->getName() << "'\n"); + // Register this JITResolver as the one corresponding to this call site so + // JITCompilerFn will be able to find it. + StubToResolverMap->RegisterStubResolver(Stub, this); + // Finally, keep track of the stub-to-Function mapping so that the // JITCompilerFn knows which function to compile! state.AddCallSite(locked, Stub, F); @@ -637,6 +679,9 @@ void JITResolver::getRelocatableGVs(SmallVectorImpl<GlobalValue*> &GVs, GlobalValue *JITResolver::invalidateStub(void *Stub) { MutexGuard locked(TheJIT->lock); + // Remove the stub from the StubToResolverMap. + StubToResolverMap->UnregisterStubResolver(Stub); + GlobalToIndirectSymMapTy &GM = state.getGlobalToIndirectSymMap(locked); // Look up the cheap way first, to see if it's a function stub we are @@ -671,7 +716,8 @@ GlobalValue *JITResolver::invalidateStub(void *Stub) { /// been entered. It looks up which function this stub corresponds to, compiles /// it if necessary, then returns the resultant function pointer. void *JITResolver::JITCompilerFn(void *Stub) { - JITResolver &JR = *TheJITResolver; + JITResolver *JR = StubToResolverMap->getResolverFromStub(Stub); + assert(JR && "Unable to find the corresponding JITResolver to the call site"); Function* F = 0; void* ActualPtr = 0; @@ -680,24 +726,24 @@ void *JITResolver::JITCompilerFn(void *Stub) { // Only lock for getting the Function. The call getPointerToFunction made // in this function might trigger function materializing, which requires // JIT lock to be unlocked. - MutexGuard locked(TheJIT->lock); + MutexGuard locked(JR->TheJIT->lock); // The address given to us for the stub may not be exactly right, it might // be a little bit after the stub. As such, use upper_bound to find it. pair<void*, Function*> I = - JR.state.LookupFunctionFromCallSite(locked, Stub); + JR->state.LookupFunctionFromCallSite(locked, Stub); F = I.second; ActualPtr = I.first; } // If we have already code generated the function, just return the address. - void *Result = TheJIT->getPointerToGlobalIfAvailable(F); + void *Result = JR->TheJIT->getPointerToGlobalIfAvailable(F); if (!Result) { // Otherwise we don't have it, do lazy compilation now. // If lazy compilation is disabled, emit a useful error message and abort. - if (!TheJIT->isCompilingLazily()) { + if (!JR->TheJIT->isCompilingLazily()) { llvm_report_error("LLVM JIT requested to do lazy compilation of function '" + F->getName() + "' when lazy compiles are disabled!"); } @@ -706,11 +752,11 @@ void *JITResolver::JITCompilerFn(void *Stub) { << "' In stub ptr = " << Stub << " actual ptr = " << ActualPtr << "\n"); - Result = TheJIT->getPointerToFunction(F); + Result = JR->TheJIT->getPointerToFunction(F); } // Reacquire the lock to update the GOT map. - MutexGuard locked(TheJIT->lock); + MutexGuard locked(JR->TheJIT->lock); // We might like to remove the call site from the CallSiteToFunction map, but // we can't do that! Multiple threads could be stuck, waiting to acquire the @@ -725,8 +771,8 @@ void *JITResolver::JITCompilerFn(void *Stub) { // if they see it still using the stub address. // Note: this is done so the Resolver doesn't have to manage GOT memory // Do this without allocating map space if the target isn't using a GOT - if(JR.revGOTMap.find(Stub) != JR.revGOTMap.end()) - JR.revGOTMap[Result] = JR.revGOTMap[Stub]; + if(JR->revGOTMap.find(Stub) != JR->revGOTMap.end()) + JR->revGOTMap[Result] = JR->revGOTMap[Stub]; return Result; } @@ -839,7 +885,7 @@ static unsigned GetConstantPoolSizeInBytes(MachineConstantPool *MCP, return Size; } -static unsigned GetJumpTableSizeInBytes(MachineJumpTableInfo *MJTI) { +static unsigned GetJumpTableSizeInBytes(MachineJumpTableInfo *MJTI, JIT *jit) { const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); if (JT.empty()) return 0; @@ -847,9 +893,7 @@ static unsigned GetJumpTableSizeInBytes(MachineJumpTableInfo *MJTI) { for (unsigned i = 0, e = JT.size(); i != e; ++i) NumEntries += JT[i].MBBs.size(); - unsigned EntrySize = MJTI->getEntrySize(); - - return NumEntries * EntrySize; + return NumEntries * MJTI->getEntrySize(*jit->getTargetData()); } static uintptr_t RoundUpToAlign(uintptr_t Size, unsigned Alignment) { @@ -1017,7 +1061,6 @@ void JITEmitter::startFunction(MachineFunction &F) { if (MemMgr->NeedsExactSize()) { DEBUG(dbgs() << "JIT: ExactSize\n"); const TargetInstrInfo* TII = F.getTarget().getInstrInfo(); - MachineJumpTableInfo *MJTI = F.getJumpTableInfo(); MachineConstantPool *MCP = F.getConstantPool(); // Ensure the constant pool/jump table info is at least 4-byte aligned. @@ -1029,11 +1072,14 @@ void JITEmitter::startFunction(MachineFunction &F) { // Add the constant pool size ActualSize += GetConstantPoolSizeInBytes(MCP, TheJIT->getTargetData()); - // Add the aligment of the jump table info - ActualSize = RoundUpToAlign(ActualSize, MJTI->getAlignment()); + if (MachineJumpTableInfo *MJTI = F.getJumpTableInfo()) { + // Add the aligment of the jump table info + ActualSize = RoundUpToAlign(ActualSize, + MJTI->getEntryAlignment(*TheJIT->getTargetData())); - // Add the jump table size - ActualSize += GetJumpTableSizeInBytes(MJTI); + // Add the jump table size + ActualSize += GetJumpTableSizeInBytes(MJTI, TheJIT); + } // Add the alignment for the function ActualSize = RoundUpToAlign(ActualSize, @@ -1062,7 +1108,8 @@ void JITEmitter::startFunction(MachineFunction &F) { emitAlignment(16); emitConstantPool(F.getConstantPool()); - initJumpTableInfo(F.getJumpTableInfo()); + if (MachineJumpTableInfo *MJTI = F.getJumpTableInfo()) + initJumpTableInfo(MJTI); // About to start emitting the machine code for the function. emitAlignment(std::max(F.getFunction()->getAlignment(), 8U)); @@ -1084,7 +1131,8 @@ bool JITEmitter::finishFunction(MachineFunction &F) { return true; } - emitJumpTableInfo(F.getJumpTableInfo()); + if (MachineJumpTableInfo *MJTI = F.getJumpTableInfo()) + emitJumpTableInfo(MJTI); // FnStart is the start of the text, not the start of the constant pool and // other per-function data. @@ -1404,13 +1452,14 @@ void JITEmitter::initJumpTableInfo(MachineJumpTableInfo *MJTI) { for (unsigned i = 0, e = JT.size(); i != e; ++i) NumEntries += JT[i].MBBs.size(); - unsigned EntrySize = MJTI->getEntrySize(); + unsigned EntrySize = MJTI->getEntrySize(*TheJIT->getTargetData()); // Just allocate space for all the jump tables now. We will fix up the actual // MBB entries in the tables after we emit the code for each block, since then // we will know the final locations of the MBBs in memory. JumpTable = MJTI; - JumpTableBase = allocateSpace(NumEntries * EntrySize, MJTI->getAlignment()); + JumpTableBase = allocateSpace(NumEntries * EntrySize, + MJTI->getEntryAlignment(*TheJIT->getTargetData())); } void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) { @@ -1420,8 +1469,32 @@ void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) { const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); if (JT.empty() || JumpTableBase == 0) return; - if (TargetMachine::getRelocationModel() == Reloc::PIC_) { - assert(MJTI->getEntrySize() == 4 && "Cross JIT'ing?"); + + switch (MJTI->getEntryKind()) { + case MachineJumpTableInfo::EK_BlockAddress: { + // EK_BlockAddress - Each entry is a plain address of block, e.g.: + // .word LBB123 + assert(MJTI->getEntrySize(*TheJIT->getTargetData()) == sizeof(void*) && + "Cross JIT'ing?"); + + // For each jump table, map each target in the jump table to the address of + // an emitted MachineBasicBlock. + intptr_t *SlotPtr = (intptr_t*)JumpTableBase; + + for (unsigned i = 0, e = JT.size(); i != e; ++i) { + const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs; + // Store the address of the basic block for this jump table slot in the + // memory we allocated for the jump table in 'initJumpTableInfo' + for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) + *SlotPtr++ = getMachineBasicBlockAddress(MBBs[mi]); + } + break; + } + + case MachineJumpTableInfo::EK_Custom32: + case MachineJumpTableInfo::EK_GPRel32BlockAddress: + case MachineJumpTableInfo::EK_LabelDifference32: { + assert(MJTI->getEntrySize(*TheJIT->getTargetData()) == 4&&"Cross JIT'ing?"); // For each jump table, place the offset from the beginning of the table // to the target address. int *SlotPtr = (int*)JumpTableBase; @@ -1433,23 +1506,12 @@ void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) { uintptr_t Base = (uintptr_t)SlotPtr; for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) { uintptr_t MBBAddr = getMachineBasicBlockAddress(MBBs[mi]); + /// FIXME: USe EntryKind instead of magic "getPICJumpTableEntry" hook. *SlotPtr++ = TheJIT->getJITInfo().getPICJumpTableEntry(MBBAddr, Base); } } - } else { - assert(MJTI->getEntrySize() == sizeof(void*) && "Cross JIT'ing?"); - - // For each jump table, map each target in the jump table to the address of - // an emitted MachineBasicBlock. - intptr_t *SlotPtr = (intptr_t*)JumpTableBase; - - for (unsigned i = 0, e = JT.size(); i != e; ++i) { - const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs; - // Store the address of the basic block for this jump table slot in the - // memory we allocated for the jump table in 'initJumpTableInfo' - for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) - *SlotPtr++ = getMachineBasicBlockAddress(MBBs[mi]); - } + break; + } } } @@ -1505,9 +1567,9 @@ uintptr_t JITEmitter::getJumpTableEntryAddress(unsigned Index) const { const std::vector<MachineJumpTableEntry> &JT = JumpTable->getJumpTables(); assert(Index < JT.size() && "Invalid jump table index!"); - unsigned Offset = 0; - unsigned EntrySize = JumpTable->getEntrySize(); + unsigned EntrySize = JumpTable->getEntrySize(*TheJIT->getTargetData()); + unsigned Offset = 0; for (unsigned i = 0; i < Index; ++i) Offset += JT[i].MBBs.size(); @@ -1536,19 +1598,6 @@ JITCodeEmitter *JIT::createEmitter(JIT &jit, JITMemoryManager *JMM, return new JITEmitter(jit, JMM, tm); } -// getPointerToNamedFunction - This function is used as a global wrapper to -// JIT::getPointerToNamedFunction for the purpose of resolving symbols when -// bugpoint is debugging the JIT. In that scenario, we are loading an .so and -// need to resolve function(s) that are being mis-codegenerated, so we need to -// resolve their addresses at runtime, and this is the way to do it. -extern "C" { - void *getPointerToNamedFunction(const char *Name) { - if (Function *F = TheJIT->FindFunctionNamed(Name)) - return TheJIT->getPointerToFunction(F); - return TheJIT->getPointerToNamedFunction(Name); - } -} - // getPointerToFunctionOrStub - If the specified function has been // code-gen'd, return a pointer to the function. If not, compile it, or use // a stub to implement lazy compilation if available. diff --git a/lib/ExecutionEngine/JIT/Makefile b/lib/ExecutionEngine/JIT/Makefile index 1c93c06..aafa3d9 100644 --- a/lib/ExecutionEngine/JIT/Makefile +++ b/lib/ExecutionEngine/JIT/Makefile @@ -9,7 +9,6 @@ LEVEL = ../../.. LIBRARYNAME = LLVMJIT -CXXFLAGS = -fno-rtti # Get the $(ARCH) setting include $(LEVEL)/Makefile.config diff --git a/lib/ExecutionEngine/JIT/TargetSelect.cpp b/lib/ExecutionEngine/JIT/TargetSelect.cpp index 8bed33b..3349c33 100644 --- a/lib/ExecutionEngine/JIT/TargetSelect.cpp +++ b/lib/ExecutionEngine/JIT/TargetSelect.cpp @@ -15,7 +15,6 @@ #include "JIT.h" #include "llvm/Module.h" -#include "llvm/ModuleProvider.h" #include "llvm/ADT/Triple.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" @@ -25,28 +24,14 @@ #include "llvm/Target/TargetRegistry.h" using namespace llvm; -static cl::opt<std::string> -MArch("march", - cl::desc("Architecture to generate assembly for (see --version)")); - -static cl::opt<std::string> -MCPU("mcpu", - cl::desc("Target a specific cpu type (-mcpu=help for details)"), - cl::value_desc("cpu-name"), - cl::init("")); - -static cl::list<std::string> -MAttrs("mattr", - cl::CommaSeparated, - cl::desc("Target specific attributes (-mattr=help for details)"), - cl::value_desc("a1,+a2,-a3,...")); - /// selectTarget - Pick a target either via -march or by guessing the native /// arch. Add any CPU features specified via -mcpu or -mattr. -TargetMachine *JIT::selectTarget(ModuleProvider *MP, std::string *ErrorStr) { - Module &Mod = *MP->getModule(); - - Triple TheTriple(Mod.getTargetTriple()); +TargetMachine *JIT::selectTarget(Module *Mod, + StringRef MArch, + StringRef MCPU, + const SmallVectorImpl<std::string>& MAttrs, + std::string *ErrorStr) { + Triple TheTriple(Mod->getTargetTriple()); if (TheTriple.getTriple().empty()) TheTriple.setTriple(sys::getHostTriple()); diff --git a/lib/ExecutionEngine/Makefile b/lib/ExecutionEngine/Makefile index 2387b0e..e0e050e 100644 --- a/lib/ExecutionEngine/Makefile +++ b/lib/ExecutionEngine/Makefile @@ -9,6 +9,5 @@ LEVEL = ../.. LIBRARYNAME = LLVMExecutionEngine PARALLEL_DIRS = Interpreter JIT -CXXFLAGS = -fno-rtti include $(LEVEL)/Makefile.common diff --git a/lib/Linker/LinkArchives.cpp b/lib/Linker/LinkArchives.cpp index 365ec05..2c4ed7f 100644 --- a/lib/Linker/LinkArchives.cpp +++ b/lib/Linker/LinkArchives.cpp @@ -14,7 +14,6 @@ #include "llvm/Linker.h" #include "llvm/Module.h" -#include "llvm/ModuleProvider.h" #include "llvm/ADT/SetOperations.h" #include "llvm/Bitcode/Archive.h" #include "llvm/Config/config.h" @@ -139,8 +138,10 @@ Linker::LinkInArchive(const sys::Path &Filename, bool &is_native) { do { CurrentlyUndefinedSymbols = UndefinedSymbols; - // Find the modules we need to link into the target module - std::set<ModuleProvider*> Modules; + // Find the modules we need to link into the target module. Note that arch + // keeps ownership of these modules and may return the same Module* from a + // subsequent call. + std::set<Module*> Modules; if (!arch->findModulesDefiningSymbols(UndefinedSymbols, Modules, &ErrMsg)) return error("Cannot find symbols in '" + Filename.str() + "': " + ErrMsg); @@ -156,19 +157,17 @@ Linker::LinkInArchive(const sys::Path &Filename, bool &is_native) { NotDefinedByArchive.insert(UndefinedSymbols.begin(), UndefinedSymbols.end()); - // Loop over all the ModuleProviders that we got back from the archive - for (std::set<ModuleProvider*>::iterator I=Modules.begin(), E=Modules.end(); + // Loop over all the Modules that we got back from the archive + for (std::set<Module*>::iterator I=Modules.begin(), E=Modules.end(); I != E; ++I) { // Get the module we must link in. std::string moduleErrorMsg; - std::auto_ptr<Module> AutoModule((*I)->releaseModule( &moduleErrorMsg )); - if (!moduleErrorMsg.empty()) - return error("Could not load a module: " + moduleErrorMsg); - - Module* aModule = AutoModule.get(); - + Module* aModule = *I; if (aModule != NULL) { + if (aModule->MaterializeAll(&moduleErrorMsg)) + return error("Could not load a module: " + moduleErrorMsg); + verbose(" Linking in module: " + aModule->getModuleIdentifier()); // Link it in diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index e2cd47a..7f441b0 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -392,9 +392,20 @@ static Value *RemapOperand(const Value *In, assert(!isa<GlobalValue>(CPV) && "Unmapped global?"); llvm_unreachable("Unknown type of derived type constant value!"); } - } else if (isa<MDNode>(In) || isa<MDString>(In)) { - Result = const_cast<Value*>(In); - } else if (isa<InlineAsm>(In)) { + } else if (const MDNode *MD = dyn_cast<MDNode>(In)) { + if (MD->isFunctionLocal()) { + SmallVector<Value*, 4> Elts; + for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) { + if (MD->getOperand(i)) + Elts.push_back(RemapOperand(MD->getOperand(i), ValueMap)); + else + Elts.push_back(NULL); + } + Result = MDNode::get(In->getContext(), Elts.data(), MD->getNumOperands()); + } else { + Result = const_cast<Value*>(In); + } + } else if (isa<MDString>(In) || isa<InlineAsm>(In) || isa<Instruction>(In)) { Result = const_cast<Value*>(In); } diff --git a/lib/Linker/Makefile b/lib/Linker/Makefile index 2179fd2..19e646b 100644 --- a/lib/Linker/Makefile +++ b/lib/Linker/Makefile @@ -10,7 +10,6 @@ LEVEL = ../.. LIBRARYNAME = LLVMLinker BUILD_ARCHIVE := 1 -CXXFLAGS = -fno-rtti include $(LEVEL)/Makefile.common diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp index 547f904..f3f063f 100644 --- a/lib/MC/MCAsmInfo.cpp +++ b/lib/MC/MCAsmInfo.cpp @@ -22,11 +22,10 @@ MCAsmInfo::MCAsmInfo() { HasSubsectionsViaSymbols = false; HasMachoZeroFillDirective = false; HasStaticCtorDtorReferenceInStaticMode = false; - NeedsSet = false; MaxInstLength = 4; PCSymbol = "$"; SeparatorChar = ';'; - CommentColumn = 60; + CommentColumn = 40; CommentString = "#"; GlobalPrefix = ""; PrivateGlobalPrefix = "."; @@ -48,12 +47,11 @@ MCAsmInfo::MCAsmInfo() { AlignDirective = "\t.align\t"; AlignmentIsInBytes = true; TextAlignFillValue = 0; - JumpTableDirective = 0; - PICJumpTableDirective = 0; + GPRel32Directive = 0; GlobalDirective = "\t.globl\t"; - SetDirective = 0; + HasSetDirective = true; HasLCOMMDirective = false; - COMMDirectiveTakesAlignment = true; + COMMDirectiveAlignmentIsInBytes = true; HasDotTypeDotSizeDirective = true; HasSingleParameterDotFile = true; HasNoDeadStrip = false; diff --git a/lib/MC/MCAsmInfoCOFF.cpp b/lib/MC/MCAsmInfoCOFF.cpp index e395acd..9130493 100644 --- a/lib/MC/MCAsmInfoCOFF.cpp +++ b/lib/MC/MCAsmInfoCOFF.cpp @@ -18,14 +18,13 @@ using namespace llvm; MCAsmInfoCOFF::MCAsmInfoCOFF() { GlobalPrefix = "_"; + COMMDirectiveAlignmentIsInBytes = false; HasLCOMMDirective = true; - COMMDirectiveTakesAlignment = false; HasDotTypeDotSizeDirective = false; HasSingleParameterDotFile = false; PrivateGlobalPrefix = "L"; // Prefix for private global symbols WeakRefDirective = "\t.weak\t"; - LinkOnceDirective = "\t.linkonce same_size\n"; - SetDirective = "\t.set\t"; + LinkOnceDirective = "\t.linkonce discard\n"; // Doesn't support visibility: HiddenVisibilityAttr = ProtectedVisibilityAttr = MCSA_Invalid; @@ -37,4 +36,3 @@ MCAsmInfoCOFF::MCAsmInfoCOFF() { SupportsDebugInformation = true; DwarfSectionOffsetDirective = "\t.secrel32\t"; } - diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp index 9902f50..da865ad 100644 --- a/lib/MC/MCAsmInfoDarwin.cpp +++ b/lib/MC/MCAsmInfoDarwin.cpp @@ -21,12 +21,12 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() { GlobalPrefix = "_"; PrivateGlobalPrefix = "L"; LinkerPrivateGlobalPrefix = "l"; - NeedsSet = true; AllowQuotesInName = true; HasSingleParameterDotFile = false; HasSubsectionsViaSymbols = true; AlignmentIsInBytes = false; + COMMDirectiveAlignmentIsInBytes = false; InlineAsmStart = " InlineAsm Start"; InlineAsmEnd = " InlineAsm End"; @@ -36,7 +36,6 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() { ZeroDirective = "\t.space\t"; // ".space N" emits N zeros. HasMachoZeroFillDirective = true; // Uses .zerofill HasStaticCtorDtorReferenceInStaticMode = true; - SetDirective = "\t.set"; HiddenVisibilityAttr = MCSA_PrivateExtern; // Doesn't support protected visibility. diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index bf39239..6add1b4 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -29,25 +29,32 @@ namespace { class MCAsmStreamer : public MCStreamer { formatted_raw_ostream &OS; const MCAsmInfo &MAI; - bool IsLittleEndian, IsVerboseAsm; MCInstPrinter *InstPrinter; MCCodeEmitter *Emitter; SmallString<128> CommentToEmit; raw_svector_ostream CommentStream; + + unsigned IsLittleEndian : 1; + unsigned IsVerboseAsm : 1; + unsigned ShowInst : 1; + public: MCAsmStreamer(MCContext &Context, formatted_raw_ostream &os, const MCAsmInfo &mai, bool isLittleEndian, bool isVerboseAsm, MCInstPrinter *printer, - MCCodeEmitter *emitter) - : MCStreamer(Context), OS(os), MAI(mai), IsLittleEndian(isLittleEndian), - IsVerboseAsm(isVerboseAsm), InstPrinter(printer), Emitter(emitter), - CommentStream(CommentToEmit) {} + MCCodeEmitter *emitter, bool showInst) + : MCStreamer(Context), OS(os), MAI(mai), InstPrinter(printer), + Emitter(emitter), CommentStream(CommentToEmit), + IsLittleEndian(isLittleEndian), IsVerboseAsm(isVerboseAsm), + ShowInst(showInst) { + if (InstPrinter && IsVerboseAsm) + InstPrinter->setCommentStream(CommentStream); + } ~MCAsmStreamer() {} bool isLittleEndian() const { return IsLittleEndian; } - - + inline void EmitEOL() { // If we don't have any comments, just emit a \n. if (!IsVerboseAsm) { @@ -57,13 +64,20 @@ public: EmitCommentsAndEOL(); } void EmitCommentsAndEOL(); - + + /// isVerboseAsm - Return true if this streamer supports verbose assembly at + /// all. + virtual bool isVerboseAsm() const { return IsVerboseAsm; } + /// AddComment - Add a comment that can be emitted to the generated .s /// file if applicable as a QoI issue to make the output of the compiler /// more readable. This only affects the MCAsmStreamer, and only when /// verbose assembly output is enabled. virtual void AddComment(const Twine &T); - + + /// AddEncodingComment - Add a comment showing the encoding of an instruction. + virtual void AddEncodingComment(const MCInst &Inst); + /// GetCommentOS - Return a raw_ostream that comments can be written to. /// Unlike AddComment, you are required to terminate comments with \n if you /// use this method. @@ -72,12 +86,12 @@ public: return nulls(); // Discard comments unless in verbose asm mode. return CommentStream; } - + /// AddBlankLine - Emit a blank line to a .s file to pretty it up. virtual void AddBlankLine() { EmitEOL(); } - + /// @name MCStreamer Interface /// @{ @@ -93,6 +107,7 @@ public: virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue); + virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value); virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment); @@ -109,6 +124,8 @@ public: virtual void EmitValue(const MCExpr *Value, unsigned Size,unsigned AddrSpace); virtual void EmitIntValue(uint64_t Value, unsigned Size, unsigned AddrSpace); + virtual void EmitGPRel32Value(const MCExpr *Value); + virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue, unsigned AddrSpace); @@ -119,9 +136,12 @@ public: virtual void EmitValueToOffset(const MCExpr *Offset, unsigned char Value = 0); - - virtual void EmitInstruction(const MCInst &Inst); + virtual void EmitFileDirective(StringRef Filename); + virtual void EmitDwarfFileDirective(unsigned FileNo, StringRef Filename); + + virtual void EmitInstruction(const MCInst &Inst); + virtual void Finish(); /// @} @@ -178,12 +198,6 @@ static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) { return Value & ((uint64_t) (int64_t) -1 >> (64 - Bytes * 8)); } -static inline const MCExpr *truncateToSize(const MCExpr *Value, - unsigned Bytes) { - // FIXME: Do we really need this routine? - return Value; -} - void MCAsmStreamer::SwitchSection(const MCSection *Section) { assert(Section && "Cannot switch to a null section!"); if (Section != CurSection) { @@ -226,7 +240,29 @@ void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) { switch (Attribute) { case MCSA_Invalid: assert(0 && "Invalid symbol attribute"); - case MCSA_Global: OS << MAI.getGlobalDirective(); break; // .globl + case MCSA_ELF_TypeFunction: /// .type _foo, STT_FUNC # aka @function + case MCSA_ELF_TypeIndFunction: /// .type _foo, STT_GNU_IFUNC + case MCSA_ELF_TypeObject: /// .type _foo, STT_OBJECT # aka @object + case MCSA_ELF_TypeTLS: /// .type _foo, STT_TLS # aka @tls_object + case MCSA_ELF_TypeCommon: /// .type _foo, STT_COMMON # aka @common + case MCSA_ELF_TypeNoType: /// .type _foo, STT_NOTYPE # aka @notype + assert(MAI.hasDotTypeDotSizeDirective() && "Symbol Attr not supported"); + OS << "\t.type\t" << *Symbol << ',' + << ((MAI.getCommentString()[0] != '@') ? '@' : '%'); + switch (Attribute) { + default: assert(0 && "Unknown ELF .type"); + case MCSA_ELF_TypeFunction: OS << "function"; break; + case MCSA_ELF_TypeIndFunction: OS << "gnu_indirect_function"; break; + case MCSA_ELF_TypeObject: OS << "object"; break; + case MCSA_ELF_TypeTLS: OS << "tls_object"; break; + case MCSA_ELF_TypeCommon: OS << "common"; break; + case MCSA_ELF_TypeNoType: OS << "no_type"; break; + } + EmitEOL(); + return; + case MCSA_Global: // .globl/.global + OS << MAI.getGlobalDirective(); + break; case MCSA_Hidden: OS << ".hidden "; break; case MCSA_IndirectSymbol: OS << ".indirect_symbol "; break; case MCSA_Internal: OS << ".internal "; break; @@ -251,11 +287,16 @@ void MCAsmStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { EmitEOL(); } +void MCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) { + assert(MAI.hasDotTypeDotSizeDirective()); + OS << "\t.size\t" << *Symbol << ", " << *Value << '\n'; +} + void MCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) { OS << "\t.comm\t" << *Symbol << ',' << Size; - if (ByteAlignment != 0 && MAI.getCOMMDirectiveTakesAlignment()) { - if (MAI.getAlignmentIsInBytes()) + if (ByteAlignment != 0) { + if (MAI.getCOMMDirectiveAlignmentIsInBytes()) OS << ',' << ByteAlignment; else OS << ',' << Log2_32(ByteAlignment); @@ -292,6 +333,40 @@ void MCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, static inline char toOctal(int X) { return (X&7)+'0'; } +static void PrintQuotedString(StringRef Data, raw_ostream &OS) { + OS << '"'; + + for (unsigned i = 0, e = Data.size(); i != e; ++i) { + unsigned char C = Data[i]; + if (C == '"' || C == '\\') { + OS << '\\' << (char)C; + continue; + } + + if (isprint((unsigned char)C)) { + OS << (char)C; + continue; + } + + switch (C) { + case '\b': OS << "\\b"; break; + case '\f': OS << "\\f"; break; + case '\n': OS << "\\n"; break; + case '\r': OS << "\\r"; break; + case '\t': OS << "\\t"; break; + default: + OS << '\\'; + OS << toOctal(C >> 6); + OS << toOctal(C >> 3); + OS << toOctal(C >> 0); + break; + } + } + + OS << '"'; +} + + void MCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) { assert(CurSection && "Cannot emit contents before setting section!"); if (Data.empty()) return; @@ -312,34 +387,8 @@ void MCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) { OS << MAI.getAsciiDirective(); } - OS << " \""; - for (unsigned i = 0, e = Data.size(); i != e; ++i) { - unsigned char C = Data[i]; - if (C == '"' || C == '\\') { - OS << '\\' << (char)C; - continue; - } - - if (isprint((unsigned char)C)) { - OS << (char)C; - continue; - } - - switch (C) { - case '\b': OS << "\\b"; break; - case '\f': OS << "\\f"; break; - case '\n': OS << "\\n"; break; - case '\r': OS << "\\r"; break; - case '\t': OS << "\\t"; break; - default: - OS << '\\'; - OS << toOctal(C >> 6); - OS << toOctal(C >> 3); - OS << toOctal(C >> 0); - break; - } - } - OS << '"'; + OS << ' '; + PrintQuotedString(Data, OS); EmitEOL(); } @@ -386,10 +435,17 @@ void MCAsmStreamer::EmitValue(const MCExpr *Value, unsigned Size, } assert(Directive && "Invalid size for machine code value!"); - OS << Directive << *truncateToSize(Value, Size); + OS << Directive << *Value; + EmitEOL(); +} + +void MCAsmStreamer::EmitGPRel32Value(const MCExpr *Value) { + assert(MAI.getGPRel32Directive() != 0); + OS << MAI.getGPRel32Directive() << *Value; EmitEOL(); } + /// EmitFill - Emit NumBytes bytes worth of the value specified by /// FillValue. This implements directives such as '.space'. void MCAsmStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue, @@ -464,49 +520,133 @@ void MCAsmStreamer::EmitValueToOffset(const MCExpr *Offset, EmitEOL(); } -void MCAsmStreamer::EmitInstruction(const MCInst &Inst) { - assert(CurSection && "Cannot emit contents before setting section!"); - // If we have an AsmPrinter, use that to print. - if (InstPrinter) { - InstPrinter->printInst(&Inst); - EmitEOL(); +void MCAsmStreamer::EmitFileDirective(StringRef Filename) { + assert(MAI.hasSingleParameterDotFile()); + OS << "\t.file\t"; + PrintQuotedString(Filename, OS); + EmitEOL(); +} - // Show the encoding if we have a code emitter. - if (Emitter) { - SmallString<256> Code; - raw_svector_ostream VecOS(Code); - Emitter->EncodeInstruction(Inst, VecOS); - VecOS.flush(); - - OS.indent(20); - OS << " # encoding: ["; - for (unsigned i = 0, e = Code.size(); i != e; ++i) { - if (i) - OS << ','; - OS << format("%#04x", uint8_t(Code[i])); +void MCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo, StringRef Filename){ + OS << "\t.file\t" << FileNo << ' '; + PrintQuotedString(Filename, OS); + EmitEOL(); +} + +void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) { + raw_ostream &OS = GetCommentOS(); + SmallString<256> Code; + SmallVector<MCFixup, 4> Fixups; + raw_svector_ostream VecOS(Code); + Emitter->EncodeInstruction(Inst, VecOS, Fixups); + VecOS.flush(); + + // If we are showing fixups, create symbolic markers in the encoded + // representation. We do this by making a per-bit map to the fixup item index, + // then trying to display it as nicely as possible. + SmallVector<uint8_t, 64> FixupMap; + FixupMap.resize(Code.size() * 8); + for (unsigned i = 0, e = Code.size() * 8; i != e; ++i) + FixupMap[i] = 0; + + for (unsigned i = 0, e = Fixups.size(); i != e; ++i) { + MCFixup &F = Fixups[i]; + const MCFixupKindInfo &Info = Emitter->getFixupKindInfo(F.getKind()); + for (unsigned j = 0; j != Info.TargetSize; ++j) { + unsigned Index = F.getOffset() * 8 + Info.TargetOffset + j; + assert(Index < Code.size() * 8 && "Invalid offset in fixup!"); + FixupMap[Index] = 1 + i; + } + } + + OS << "encoding: ["; + for (unsigned i = 0, e = Code.size(); i != e; ++i) { + if (i) + OS << ','; + + // See if all bits are the same map entry. + uint8_t MapEntry = FixupMap[i * 8 + 0]; + for (unsigned j = 1; j != 8; ++j) { + if (FixupMap[i * 8 + j] == MapEntry) + continue; + + MapEntry = uint8_t(~0U); + break; + } + + if (MapEntry != uint8_t(~0U)) { + if (MapEntry == 0) { + OS << format("0x%02x", uint8_t(Code[i])); + } else { + assert(Code[i] == 0 && "Encoder wrote into fixed up bit!"); + OS << char('A' + MapEntry - 1); + } + } else { + // Otherwise, write out in binary. + OS << "0b"; + for (unsigned j = 8; j--;) { + unsigned Bit = (Code[i] >> j) & 1; + if (uint8_t MapEntry = FixupMap[i * 8 + j]) { + assert(Bit == 0 && "Encoder wrote into fixed up bit!"); + OS << char('A' + MapEntry - 1); + } else + OS << Bit; } - OS << "]\n"; } + } + OS << "]\n"; - return; + for (unsigned i = 0, e = Fixups.size(); i != e; ++i) { + MCFixup &F = Fixups[i]; + const MCFixupKindInfo &Info = Emitter->getFixupKindInfo(F.getKind()); + OS << " fixup " << char('A' + i) << " - " << "offset: " << F.getOffset() + << ", value: " << *F.getValue() << ", kind: " << Info.Name << "\n"; } +} + +void MCAsmStreamer::EmitInstruction(const MCInst &Inst) { + assert(CurSection && "Cannot emit contents before setting section!"); - // Otherwise fall back to a structural printing for now. Eventually we should - // always have access to the target specific printer. - Inst.print(OS, &MAI); + // Show the encoding in a comment if we have a code emitter. + if (Emitter) + AddEncodingComment(Inst); + + // Show the MCInst if enabled. + if (ShowInst) { + raw_ostream &OS = GetCommentOS(); + OS << "<MCInst #" << Inst.getOpcode(); + + StringRef InstName; + if (InstPrinter) + InstName = InstPrinter->getOpcodeName(Inst.getOpcode()); + if (!InstName.empty()) + OS << ' ' << InstName; + + for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) { + OS << "\n "; + Inst.getOperand(i).print(OS, &MAI); + } + OS << ">\n"; + } + + // If we have an AsmPrinter, use that to print, otherwise dump the MCInst. + if (InstPrinter) + InstPrinter->printInst(&Inst); + else + Inst.print(OS, &MAI); EmitEOL(); } void MCAsmStreamer::Finish() { OS.flush(); } - + MCStreamer *llvm::createAsmStreamer(MCContext &Context, formatted_raw_ostream &OS, const MCAsmInfo &MAI, bool isLittleEndian, bool isVerboseAsm, MCInstPrinter *IP, - MCCodeEmitter *CE) { + MCCodeEmitter *CE, bool ShowInst) { return new MCAsmStreamer(Context, OS, MAI, isLittleEndian, isVerboseAsm, - IP, CE); + IP, CE, ShowInst); } diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index f0f5a47..653fbf2 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -13,14 +13,20 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" -#include "llvm/Target/TargetMachOWriterInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachO.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Debug.h" + +// FIXME: Gross. +#include "../Target/X86/X86FixupKinds.h" + #include <vector> using namespace llvm; @@ -45,6 +51,30 @@ static bool isVirtualSection(const MCSection &Section) { return (Type == MCSectionMachO::S_ZEROFILL); } +static unsigned getFixupKindLog2Size(MCFixupKind Kind) { + switch (Kind) { + default: llvm_unreachable("invalid fixup kind!"); + case X86::reloc_pcrel_1byte: + case FK_Data_1: return 0; + case FK_Data_2: return 1; + case X86::reloc_pcrel_4byte: + case X86::reloc_riprel_4byte: + case FK_Data_4: return 2; + case FK_Data_8: return 3; + } +} + +static bool isFixupKindPCRel(MCFixupKind Kind) { + switch (Kind) { + default: + return false; + case X86::reloc_pcrel_1byte: + case X86::reloc_pcrel_4byte: + case X86::reloc_riprel_4byte: + return true; + } +} + class MachObjectWriter { // See <mach-o/loader.h>. enum { @@ -203,9 +233,9 @@ public: Write32(Header_Magic32); // FIXME: Support cputype. - Write32(TargetMachOWriterInfo::HDR_CPU_TYPE_I386); + Write32(MachO::CPUTypeI386); // FIXME: Support cpusubtype. - Write32(TargetMachOWriterInfo::HDR_CPU_SUBTYPE_I386_ALL); + Write32(MachO::CPUSubType_I386_ALL); Write32(HFT_Object); Write32(NumLoadCommands); // Object files have a single load command, the // segment. @@ -266,11 +296,15 @@ public: Write32(SD.getSize()); // size Write32(FileOffset); + unsigned Flags = Section.getTypeAndAttributes(); + if (SD.hasInstructions()) + Flags |= MCSectionMachO::S_ATTR_SOME_INSTRUCTIONS; + assert(isPowerOf2_32(SD.getAlignment()) && "Invalid alignment!"); Write32(Log2_32(SD.getAlignment())); Write32(NumRelocations ? RelocationsStart : 0); Write32(NumRelocations); - Write32(Section.getTypeAndAttributes()); + Write32(Flags); Write32(0); // reserved1 Write32(Section.getStubSize()); // reserved2 @@ -398,12 +432,12 @@ public: uint32_t Word0; uint32_t Word1; }; - void ComputeScatteredRelocationInfo(MCAssembler &Asm, - MCSectionData::Fixup &Fixup, + void ComputeScatteredRelocationInfo(MCAssembler &Asm, MCFragment &Fragment, + MCAsmFixup &Fixup, const MCValue &Target, DenseMap<const MCSymbol*,MCSymbolData*> &SymbolMap, std::vector<MachRelocationEntry> &Relocs) { - uint32_t Address = Fixup.Fragment->getOffset() + Fixup.Offset; + uint32_t Address = Fragment.getOffset() + Fixup.Offset; unsigned IsPCRel = 0; unsigned Type = RIT_Vanilla; @@ -420,11 +454,14 @@ public: Value2 = SD->getFragment()->getAddress() + SD->getOffset(); } - unsigned Log2Size = Log2_32(Fixup.Size); - assert((1U << Log2Size) == Fixup.Size && "Invalid fixup size!"); + unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind); // The value which goes in the fixup is current value of the expression. Fixup.FixedValue = Value - Value2 + Target.getConstant(); + if (isFixupKindPCRel(Fixup.Kind)) { + Fixup.FixedValue -= Address + (1 << Log2Size); + IsPCRel = 1; + } MachRelocationEntry MRE; MRE.Word0 = ((Address << 0) | @@ -449,8 +486,8 @@ public: } } - void ComputeRelocationInfo(MCAssembler &Asm, - MCSectionData::Fixup &Fixup, + void ComputeRelocationInfo(MCAssembler &Asm, MCDataFragment &Fragment, + MCAsmFixup &Fixup, DenseMap<const MCSymbol*,MCSymbolData*> &SymbolMap, std::vector<MachRelocationEntry> &Relocs) { MCValue Target; @@ -462,11 +499,11 @@ public: if (Target.getSymB() || (Target.getSymA() && !Target.getSymA()->isUndefined() && Target.getConstant())) - return ComputeScatteredRelocationInfo(Asm, Fixup, Target, + return ComputeScatteredRelocationInfo(Asm, Fragment, Fixup, Target, SymbolMap, Relocs); // See <reloc.h>. - uint32_t Address = Fixup.Fragment->getOffset() + Fixup.Offset; + uint32_t Address = Fragment.getOffset() + Fixup.Offset; uint32_t Value = 0; unsigned Index = 0; unsigned IsPCRel = 0; @@ -475,6 +512,8 @@ public: if (Target.isAbsolute()) { // constant // SymbolNum of 0 indicates the absolute section. + // + // FIXME: When is this generated? Type = RIT_Vanilla; Value = 0; llvm_unreachable("FIXME: Not yet implemented!"); @@ -491,10 +530,11 @@ public: // // FIXME: O(N) Index = 1; - for (MCAssembler::iterator it = Asm.begin(), - ie = Asm.end(); it != ie; ++it, ++Index) + MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); + for (; it != ie; ++it, ++Index) if (&*it == SD->getFragment()->getParent()) break; + assert(it != ie && "Unable to find section index!"); Value = SD->getFragment()->getAddress() + SD->getOffset(); } @@ -504,8 +544,12 @@ public: // The value which goes in the fixup is current value of the expression. Fixup.FixedValue = Value + Target.getConstant(); - unsigned Log2Size = Log2_32(Fixup.Size); - assert((1U << Log2Size) == Fixup.Size && "Invalid fixup size!"); + unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind); + + if (isFixupKindPCRel(Fixup.Kind)) { + Fixup.FixedValue -= Address + (1<<Log2Size); + IsPCRel = 1; + } // struct relocation_info (8 bytes) MachRelocationEntry MRE; @@ -742,7 +786,7 @@ public: SD.getAddress() + SD.getFileSize()); } - // The section data is passed to 4 bytes. + // The section data is padded to 4 bytes. // // FIXME: Is this machine dependent? unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4); @@ -757,22 +801,25 @@ public: // ... and then the section headers. // // We also compute the section relocations while we do this. Note that - // compute relocation info will also update the fixup to have the correct - // value; this will be overwrite the appropriate data in the fragment when - // it is written. + // computing relocation info will also update the fixup to have the correct + // value; this will overwrite the appropriate data in the fragment when it + // is written. std::vector<MachRelocationEntry> RelocInfos; uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize; - for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; - ++it) { + for (MCAssembler::iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { MCSectionData &SD = *it; // The assembler writes relocations in the reverse order they were seen. // // FIXME: It is probably more complicated than this. unsigned NumRelocsStart = RelocInfos.size(); - for (unsigned i = 0, e = SD.fixup_size(); i != e; ++i) - ComputeRelocationInfo(Asm, SD.getFixups()[e - i - 1], SymbolMap, - RelocInfos); + for (MCSectionData::reverse_iterator it2 = SD.rbegin(), + ie2 = SD.rend(); it2 != ie2; ++it2) + if (MCDataFragment *DF = dyn_cast<MCDataFragment>(&*it2)) + for (unsigned i = 0, e = DF->fixup_size(); i != e; ++i) + ComputeRelocationInfo(Asm, *DF, DF->getFixups()[e - i - 1], + SymbolMap, RelocInfos); unsigned NumRelocs = RelocInfos.size() - NumRelocsStart; uint64_t SectionStart = SectionDataStart + SD.getAddress(); @@ -867,6 +914,16 @@ public: OS << StringTable.str(); } } + + void ApplyFixup(const MCAsmFixup &Fixup, MCDataFragment &DF) { + unsigned Size = 1 << getFixupKindLog2Size(Fixup.Kind); + + // FIXME: Endianness assumption. + assert(Fixup.Offset + Size <= DF.getContents().size() && + "Invalid fixup offset!"); + for (unsigned i = 0; i != Size; ++i) + DF.getContents()[Fixup.Offset + i] = uint8_t(Fixup.FixedValue >> (i * 8)); + } }; /* *** */ @@ -901,34 +958,12 @@ MCSectionData::MCSectionData(const MCSection &_Section, MCAssembler *A) Address(~UINT64_C(0)), Size(~UINT64_C(0)), FileSize(~UINT64_C(0)), - LastFixupLookup(~0) + HasInstructions(false) { if (A) A->getSectionList().push_back(this); } -const MCSectionData::Fixup * -MCSectionData::LookupFixup(const MCFragment *Fragment, uint64_t Offset) const { - // Use a one level cache to turn the common case of accessing the fixups in - // order into O(1) instead of O(N). - unsigned i = LastFixupLookup, Count = Fixups.size(), End = Fixups.size(); - if (i >= End) - i = 0; - while (Count--) { - const Fixup &F = Fixups[i]; - if (F.Fragment == Fragment && F.Offset == Offset) { - LastFixupLookup = i; - return &F; - } - - ++i; - if (i == End) - i = 0; - } - - return 0; -} - /* *** */ MCSymbolData::MCSymbolData() : Symbol(0) {} @@ -975,31 +1010,10 @@ void MCAssembler::LayoutSection(MCSectionData &SD) { } case MCFragment::FT_Data: + case MCFragment::FT_Fill: F.setFileSize(F.getMaxFileSize()); break; - case MCFragment::FT_Fill: { - MCFillFragment &FF = cast<MCFillFragment>(F); - - F.setFileSize(F.getMaxFileSize()); - - MCValue Target; - if (!FF.getValue().EvaluateAsRelocatable(Target)) - llvm_report_error("expected relocatable expression"); - - // If the fill value is constant, thats it. - if (Target.isAbsolute()) - break; - - // Otherwise, add fixups for the values. - for (uint64_t i = 0, e = FF.getCount(); i != e; ++i) { - MCSectionData::Fixup Fix(F, i * FF.getValueSize(), - FF.getValue(),FF.getValueSize()); - SD.getFixups().push_back(Fix); - } - break; - } - case MCFragment::FT_Org: { MCOrgFragment &OF = cast<MCOrgFragment>(F); @@ -1082,39 +1096,30 @@ static void WriteFileData(raw_ostream &OS, const MCFragment &F, break; } - case MCFragment::FT_Data: + case MCFragment::FT_Data: { + MCDataFragment &DF = cast<MCDataFragment>(F); + + // Apply the fixups. + // + // FIXME: Move elsewhere. + for (MCDataFragment::const_fixup_iterator it = DF.fixup_begin(), + ie = DF.fixup_end(); it != ie; ++it) + MOW.ApplyFixup(*it, DF); + OS << cast<MCDataFragment>(F).getContents().str(); break; + } case MCFragment::FT_Fill: { MCFillFragment &FF = cast<MCFillFragment>(F); - - int64_t Value = 0; - - MCValue Target; - if (!FF.getValue().EvaluateAsRelocatable(Target)) - llvm_report_error("expected relocatable expression"); - - if (Target.isAbsolute()) - Value = Target.getConstant(); for (uint64_t i = 0, e = FF.getCount(); i != e; ++i) { - if (!Target.isAbsolute()) { - // Find the fixup. - // - // FIXME: Find a better way to write in the fixes. - const MCSectionData::Fixup *Fixup = - F.getParent()->LookupFixup(&F, i * FF.getValueSize()); - assert(Fixup && "Missing fixup for fill value!"); - Value = Fixup->FixedValue; - } - switch (FF.getValueSize()) { default: assert(0 && "Invalid size!"); - case 1: MOW.Write8 (uint8_t (Value)); break; - case 2: MOW.Write16(uint16_t(Value)); break; - case 4: MOW.Write32(uint32_t(Value)); break; - case 8: MOW.Write64(uint64_t(Value)); break; + case 1: MOW.Write8 (uint8_t (FF.getValue())); break; + case 2: MOW.Write16(uint16_t(FF.getValue())); break; + case 4: MOW.Write32(uint32_t(FF.getValue())); break; + case 8: MOW.Write64(uint64_t(FF.getValue())); break; } } break; @@ -1162,6 +1167,10 @@ static void WriteFileData(raw_ostream &OS, const MCSectionData &SD, } void MCAssembler::Finish() { + DEBUG_WITH_TYPE("mc-dump", { + llvm::errs() << "assembler backend - pre-layout\n--\n"; + dump(); }); + // Layout the concrete sections and fragments. uint64_t Address = 0; MCSectionData *Prev = 0; @@ -1200,9 +1209,149 @@ void MCAssembler::Finish() { Address += SD.getSize(); } + DEBUG_WITH_TYPE("mc-dump", { + llvm::errs() << "assembler backend - post-layout\n--\n"; + dump(); }); + // Write the object file. MachObjectWriter MOW(OS); MOW.WriteObject(*this); OS.flush(); } + + +// Debugging methods + +namespace llvm { + +raw_ostream &operator<<(raw_ostream &OS, const MCAsmFixup &AF) { + OS << "<MCAsmFixup" << " Offset:" << AF.Offset << " Value:" << *AF.Value + << " Kind:" << AF.Kind << ">"; + return OS; +} + +} + +void MCFragment::dump() { + raw_ostream &OS = llvm::errs(); + + OS << "<MCFragment " << (void*) this << " Offset:" << Offset + << " FileSize:" << FileSize; + + OS << ">"; +} + +void MCAlignFragment::dump() { + raw_ostream &OS = llvm::errs(); + + OS << "<MCAlignFragment "; + this->MCFragment::dump(); + OS << "\n "; + OS << " Alignment:" << getAlignment() + << " Value:" << getValue() << " ValueSize:" << getValueSize() + << " MaxBytesToEmit:" << getMaxBytesToEmit() << ">"; +} + +void MCDataFragment::dump() { + raw_ostream &OS = llvm::errs(); + + OS << "<MCDataFragment "; + this->MCFragment::dump(); + OS << "\n "; + OS << " Contents:["; + for (unsigned i = 0, e = getContents().size(); i != e; ++i) { + if (i) OS << ","; + OS << hexdigit((Contents[i] >> 4) & 0xF) << hexdigit(Contents[i] & 0xF); + } + OS << "] (" << getContents().size() << " bytes)"; + + if (!getFixups().empty()) { + OS << ",\n "; + OS << " Fixups:["; + for (fixup_iterator it = fixup_begin(), ie = fixup_end(); it != ie; ++it) { + if (it != fixup_begin()) OS << ",\n "; + OS << *it; + } + OS << "]"; + } + + OS << ">"; +} + +void MCFillFragment::dump() { + raw_ostream &OS = llvm::errs(); + + OS << "<MCFillFragment "; + this->MCFragment::dump(); + OS << "\n "; + OS << " Value:" << getValue() << " ValueSize:" << getValueSize() + << " Count:" << getCount() << ">"; +} + +void MCOrgFragment::dump() { + raw_ostream &OS = llvm::errs(); + + OS << "<MCOrgFragment "; + this->MCFragment::dump(); + OS << "\n "; + OS << " Offset:" << getOffset() << " Value:" << getValue() << ">"; +} + +void MCZeroFillFragment::dump() { + raw_ostream &OS = llvm::errs(); + + OS << "<MCZeroFillFragment "; + this->MCFragment::dump(); + OS << "\n "; + OS << " Size:" << getSize() << " Alignment:" << getAlignment() << ">"; +} + +void MCSectionData::dump() { + raw_ostream &OS = llvm::errs(); + + OS << "<MCSectionData"; + OS << " Alignment:" << getAlignment() << " Address:" << Address + << " Size:" << Size << " FileSize:" << FileSize + << " Fragments:["; + for (iterator it = begin(), ie = end(); it != ie; ++it) { + if (it != begin()) OS << ",\n "; + it->dump(); + } + OS << "]>"; +} + +void MCSymbolData::dump() { + raw_ostream &OS = llvm::errs(); + + OS << "<MCSymbolData Symbol:" << getSymbol() + << " Fragment:" << getFragment() << " Offset:" << getOffset() + << " Flags:" << getFlags() << " Index:" << getIndex(); + if (isCommon()) + OS << " (common, size:" << getCommonSize() + << " align: " << getCommonAlignment() << ")"; + if (isExternal()) + OS << " (external)"; + if (isPrivateExtern()) + OS << " (private extern)"; + OS << ">"; +} + +void MCAssembler::dump() { + raw_ostream &OS = llvm::errs(); + + OS << "<MCAssembler\n"; + OS << " Sections:["; + for (iterator it = begin(), ie = end(); it != ie; ++it) { + if (it != begin()) OS << ",\n "; + it->dump(); + } + OS << "],\n"; + OS << " Symbols:["; + + for (symbol_iterator it = symbol_begin(), ie = symbol_end(); it != ie; ++it) { + if (it != symbol_begin()) OS << ",\n "; + it->dump(); + } + OS << "]>\n"; +} diff --git a/lib/MC/MCCodeEmitter.cpp b/lib/MC/MCCodeEmitter.cpp index c122763..accb06c 100644 --- a/lib/MC/MCCodeEmitter.cpp +++ b/lib/MC/MCCodeEmitter.cpp @@ -16,3 +16,15 @@ MCCodeEmitter::MCCodeEmitter() { MCCodeEmitter::~MCCodeEmitter() { } + +const MCFixupKindInfo &MCCodeEmitter::getFixupKindInfo(MCFixupKind Kind) const { + static const MCFixupKindInfo Builtins[] = { + { "FK_Data_1", 0, 8 }, + { "FK_Data_2", 0, 16 }, + { "FK_Data_4", 0, 32 }, + { "FK_Data_8", 0, 64 } + }; + + assert(Kind <= 3 && "Unknown fixup kind"); + return Builtins[Kind]; +} diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp index 1ee1b1b..e419043 100644 --- a/lib/MC/MCExpr.cpp +++ b/lib/MC/MCExpr.cpp @@ -17,6 +17,8 @@ using namespace llvm; void MCExpr::print(raw_ostream &OS) const { switch (getKind()) { + case MCExpr::Target: + return cast<MCTargetExpr>(this)->PrintImpl(OS); case MCExpr::Constant: OS << cast<MCConstantExpr>(*this).getValue(); return; @@ -131,6 +133,7 @@ const MCSymbolRefExpr *MCSymbolRefExpr::Create(StringRef Name, MCContext &Ctx) { return Create(Ctx.GetOrCreateSymbol(Name), Ctx); } +void MCTargetExpr::Anchor() {} /* *** */ @@ -168,6 +171,9 @@ static bool EvaluateSymbolicAdd(const MCValue &LHS, const MCSymbol *RHS_A, bool MCExpr::EvaluateAsRelocatable(MCValue &Res) const { switch (getKind()) { + case Target: + return cast<MCTargetExpr>(this)->EvaluateAsRelocatableImpl(Res); + case Constant: Res = MCValue::get(cast<MCConstantExpr>(this)->getValue()); return true; @@ -246,8 +252,8 @@ bool MCExpr::EvaluateAsRelocatable(MCValue &Res) const { } // FIXME: We need target hooks for the evaluation. It may be limited in - // width, and gas defines the result of comparisons differently from Apple - // as (the result is sign extended). + // width, and gas defines the result of comparisons and right shifts + // differently from Apple as. int64_t LHS = LHSValue.getConstant(), RHS = RHSValue.getConstant(); int64_t Result = 0; switch (ABE->getOpcode()) { diff --git a/lib/MC/MCInstPrinter.cpp b/lib/MC/MCInstPrinter.cpp index e90c03c..92a7154 100644 --- a/lib/MC/MCInstPrinter.cpp +++ b/lib/MC/MCInstPrinter.cpp @@ -8,7 +8,14 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCInstPrinter.h" +#include "llvm/ADT/StringRef.h" using namespace llvm; MCInstPrinter::~MCInstPrinter() { } + +/// getOpcodeName - Return the name of the specified opcode enum (e.g. +/// "MOV32ri") or empty if we can't resolve it. +StringRef MCInstPrinter::getOpcodeName(unsigned Opcode) const { + return ""; +} diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp index e559c65..0c9627d 100644 --- a/lib/MC/MCMachOStreamer.cpp +++ b/lib/MC/MCMachOStreamer.cpp @@ -46,13 +46,9 @@ class MCMachOStreamer : public MCStreamer { private: MCAssembler Assembler; - MCCodeEmitter *Emitter; - MCSectionData *CurSectionData; - DenseMap<const MCSection*, MCSectionData*> SectionMap; - DenseMap<const MCSymbol*, MCSymbolData*> SymbolMap; private: @@ -91,6 +87,7 @@ public: const MCExpr *AddValueSymbols(const MCExpr *Value) { switch (Value->getKind()) { + case MCExpr::Target: assert(0 && "Can't handle target exprs yet!"); case MCExpr::Constant: break; @@ -124,6 +121,9 @@ public: virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue); virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment); + virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) { + assert(0 && "macho doesn't support this directive"); + } virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) { assert(0 && "macho doesn't support this directive"); } @@ -131,11 +131,22 @@ public: unsigned Size = 0, unsigned ByteAlignment = 0); virtual void EmitBytes(StringRef Data, unsigned AddrSpace); virtual void EmitValue(const MCExpr *Value, unsigned Size,unsigned AddrSpace); + virtual void EmitGPRel32Value(const MCExpr *Value) { + assert(0 && "macho doesn't support this directive"); + } virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, unsigned ValueSize = 1, unsigned MaxBytesToEmit = 0); virtual void EmitValueToOffset(const MCExpr *Offset, unsigned char Value = 0); + + virtual void EmitFileDirective(StringRef Filename) { + errs() << "FIXME: MCMachoStreamer:EmitFileDirective not implemented\n"; + } + virtual void EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) { + errs() << "FIXME: MCMachoStreamer:EmitDwarfFileDirective not implemented\n"; + } + virtual void EmitInstruction(const MCInst &Inst); virtual void Finish(); @@ -220,6 +231,12 @@ void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol, // defined. switch (Attribute) { case MCSA_Invalid: + case MCSA_ELF_TypeFunction: + case MCSA_ELF_TypeIndFunction: + case MCSA_ELF_TypeObject: + case MCSA_ELF_TypeTLS: + case MCSA_ELF_TypeCommon: + case MCSA_ELF_TypeNoType: case MCSA_IndirectSymbol: case MCSA_Hidden: case MCSA_Internal: @@ -316,7 +333,24 @@ void MCMachOStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) { void MCMachOStreamer::EmitValue(const MCExpr *Value, unsigned Size, unsigned AddrSpace) { - new MCFillFragment(*AddValueSymbols(Value), Size, 1, CurSectionData); + // Assume the front-end will have evaluate things absolute expressions, so + // just create data + fixup. + MCDataFragment *DF = dyn_cast_or_null<MCDataFragment>(getCurrentFragment()); + if (!DF) + DF = new MCDataFragment(CurSectionData); + + // Avoid fixups when possible. + int64_t AbsValue; + if (Value->EvaluateAsAbsolute(AbsValue)) { + // FIXME: Endianness assumption. + for (unsigned i = 0; i != Size; ++i) + DF->getContents().push_back(uint8_t(AbsValue >> (i * 8))); + } else { + DF->getFixups().push_back(MCAsmFixup(DF->getContents().size(), + *AddValueSymbols(Value), + MCFixup::getKindForSize(Size))); + DF->getContents().resize(DF->getContents().size() + Size, 0); + } } void MCMachOStreamer::EmitValueToAlignment(unsigned ByteAlignment, @@ -346,13 +380,25 @@ void MCMachOStreamer::EmitInstruction(const MCInst &Inst) { if (!Emitter) llvm_unreachable("no code emitter available!"); - // FIXME: Emitting an instruction should cause S_ATTR_SOME_INSTRUCTIONS to - // be set for the current section. - // FIXME: Relocations! + CurSectionData->setHasInstructions(true); + + SmallVector<MCFixup, 4> Fixups; SmallString<256> Code; raw_svector_ostream VecOS(Code); - Emitter->EncodeInstruction(Inst, VecOS); - EmitBytes(VecOS.str(), 0); + Emitter->EncodeInstruction(Inst, VecOS, Fixups); + VecOS.flush(); + + // Add the fixups and data. + MCDataFragment *DF = dyn_cast_or_null<MCDataFragment>(getCurrentFragment()); + if (!DF) + DF = new MCDataFragment(CurSectionData); + for (unsigned i = 0, e = Fixups.size(); i != e; ++i) { + MCFixup &F = Fixups[i]; + DF->getFixups().push_back(MCAsmFixup(DF->getContents().size()+F.getOffset(), + *F.getValue(), + F.getKind())); + } + DF->getContents().append(Code.begin(), Code.end()); } void MCMachOStreamer::Finish() { diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp index 7c219b3..46e9ebf 100644 --- a/lib/MC/MCNullStreamer.cpp +++ b/lib/MC/MCNullStreamer.cpp @@ -38,7 +38,7 @@ namespace { virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute){} virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {} - + virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {} virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) {} virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {} @@ -50,7 +50,7 @@ namespace { virtual void EmitValue(const MCExpr *Value, unsigned Size, unsigned AddrSpace) {} - + virtual void EmitGPRel32Value(const MCExpr *Value) {} virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, unsigned ValueSize = 1, unsigned MaxBytesToEmit = 0) {} @@ -58,6 +58,8 @@ namespace { virtual void EmitValueToOffset(const MCExpr *Offset, unsigned char Value = 0) {} + virtual void EmitFileDirective(StringRef Filename) {} + virtual void EmitDwarfFileDirective(unsigned FileNo,StringRef Filename) {} virtual void EmitInstruction(const MCInst &Inst) {} virtual void Finish() {} diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index dd438b7..51ad5d1 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -325,18 +325,25 @@ bool AsmParser::ParseExpression(const MCExpr *&Res) { /// expr ::= primaryexpr /// bool AsmParser::ParseExpression(const MCExpr *&Res, SMLoc &EndLoc) { + // Parse the expression. Res = 0; - return ParsePrimaryExpr(Res, EndLoc) || - ParseBinOpRHS(1, Res, EndLoc); -} - -bool AsmParser::ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) { - if (ParseParenExpr(Res, EndLoc)) + if (ParsePrimaryExpr(Res, EndLoc) || ParseBinOpRHS(1, Res, EndLoc)) return true; + // Try to constant fold it up front, if possible. + int64_t Value; + if (Res->EvaluateAsAbsolute(Value)) + Res = MCConstantExpr::Create(Value, getContext()); + return false; } +bool AsmParser::ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) { + Res = 0; + return ParseParenExpr(Res, EndLoc) || + ParseBinOpRHS(1, Res, EndLoc); +} + bool AsmParser::ParseAbsoluteExpression(int64_t &Res) { const MCExpr *Expr; @@ -1709,14 +1716,18 @@ bool AsmParser::ParseDirectiveFile(StringRef, SMLoc DirectiveLoc) { if (Lexer.isNot(AsmToken::String)) return TokError("unexpected token in '.file' directive"); - StringRef ATTRIBUTE_UNUSED FileName = getTok().getString(); + StringRef Filename = getTok().getString(); + Filename = Filename.substr(1, Filename.size()-2); Lex(); if (Lexer.isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.file' directive"); - // FIXME: Do something with the .file. - + if (FileNumber == -1) + Out.EmitFileDirective(Filename); + else + Out.EmitDwarfFileDirective(FileNumber, Filename); + return false; } diff --git a/lib/MC/MCParser/Makefile b/lib/MC/MCParser/Makefile index e4eb483..4477757 100644 --- a/lib/MC/MCParser/Makefile +++ b/lib/MC/MCParser/Makefile @@ -10,7 +10,6 @@ LEVEL = ../../.. LIBRARYNAME = LLVMMCParser BUILD_ARCHIVE := 1 -CXXFLAGS = -fno-rtti include $(LEVEL)/Makefile.common diff --git a/lib/MC/Makefile b/lib/MC/Makefile index 371776f..a661fa6 100644 --- a/lib/MC/Makefile +++ b/lib/MC/Makefile @@ -11,7 +11,6 @@ LEVEL = ../.. LIBRARYNAME = LLVMMC BUILD_ARCHIVE := 1 PARALLEL_DIRS := MCParser -CXXFLAGS = -fno-rtti include $(LEVEL)/Makefile.common diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp index 9d14684..3bce3f3 100644 --- a/lib/Support/APInt.cpp +++ b/lib/Support/APInt.cpp @@ -273,7 +273,7 @@ APInt& APInt::operator-=(const APInt& RHS) { return clearUnusedBits(); } -/// Multiplies an integer array, x by a a uint64_t integer and places the result +/// Multiplies an integer array, x, by a uint64_t integer and places the result /// into dest. /// @returns the carry out of the multiplication. /// @brief Multiply a multi-digit APInt by a single digit (64-bit) integer. @@ -767,8 +767,23 @@ bool APInt::isPowerOf2() const { } unsigned APInt::countLeadingZerosSlowCase() const { - unsigned Count = 0; - for (unsigned i = getNumWords(); i > 0u; --i) { + // Treat the most significand word differently because it might have + // meaningless bits set beyond the precision. + unsigned BitsInMSW = BitWidth % APINT_BITS_PER_WORD; + integerPart MSWMask; + if (BitsInMSW) MSWMask = (integerPart(1) << BitsInMSW) - 1; + else { + MSWMask = ~integerPart(0); + BitsInMSW = APINT_BITS_PER_WORD; + } + + unsigned i = getNumWords(); + integerPart MSW = pVal[i-1] & MSWMask; + if (MSW) + return CountLeadingZeros_64(MSW) - (APINT_BITS_PER_WORD - BitsInMSW); + + unsigned Count = BitsInMSW; + for (--i; i > 0u; --i) { if (pVal[i-1] == 0) Count += APINT_BITS_PER_WORD; else { @@ -776,10 +791,7 @@ unsigned APInt::countLeadingZerosSlowCase() const { break; } } - unsigned remainder = BitWidth % APINT_BITS_PER_WORD; - if (remainder) - Count -= APINT_BITS_PER_WORD - remainder; - return std::min(Count, BitWidth); + return Count; } static unsigned countLeadingOnes_64(uint64_t V, unsigned skip) { @@ -1754,7 +1766,7 @@ void APInt::divide(const APInt LHS, unsigned lhsWords, // First, compose the values into an array of 32-bit words instead of // 64-bit words. This is a necessity of both the "short division" algorithm - // and the the Knuth "classical algorithm" which requires there to be native + // and the Knuth "classical algorithm" which requires there to be native // operations for +, -, and * on an m bit value with an m*2 bit result. We // can't use 64-bit operands here because we don't have native results of // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index fa692be8..961dc1f 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -507,8 +507,9 @@ void cl::ParseCommandLineOptions(int argc, char **argv, // Copy the program name into ProgName, making sure not to overflow it. std::string ProgName = sys::Path(argv[0]).getLast(); - if (ProgName.size() > 79) ProgName.resize(79); - strcpy(ProgramName, ProgName.c_str()); + size_t Len = std::min(ProgName.size(), size_t(79)); + memcpy(ProgramName, ProgName.data(), Len); + ProgramName[Len] = '\0'; ProgramOverview = Overview; bool ErrorParsing = false; diff --git a/lib/Support/ConstantRange.cpp b/lib/Support/ConstantRange.cpp index ddf14e3..2746f7a 100644 --- a/lib/Support/ConstantRange.cpp +++ b/lib/Support/ConstantRange.cpp @@ -540,6 +540,11 @@ ConstantRange::add(const ConstantRange &Other) const { ConstantRange ConstantRange::multiply(const ConstantRange &Other) const { + // TODO: If either operand is a single element and the multiply is known to + // be non-wrapping, round the result min and max value to the appropriate + // multiple of that element. If wrapping is possible, at least adjust the + // range according to the greatest power-of-two factor of the single element. + if (isEmptySet() || Other.isEmptySet()) return ConstantRange(getBitWidth(), /*isFullSet=*/false); if (isFullSet() || Other.isFullSet()) @@ -650,7 +655,12 @@ ConstantRange::lshr(const ConstantRange &Amount) const { /// print - Print out the bounds to a stream... /// void ConstantRange::print(raw_ostream &OS) const { - OS << "[" << Lower << "," << Upper << ")"; + if (isFullSet()) + OS << "full-set"; + else if (isEmptySet()) + OS << "empty-set"; + else + OS << "[" << Lower << "," << Upper << ")"; } /// dump - Allow printing from a debugger easily... diff --git a/lib/Support/FileUtilities.cpp b/lib/Support/FileUtilities.cpp index 21080b6..095395f 100644 --- a/lib/Support/FileUtilities.cpp +++ b/lib/Support/FileUtilities.cpp @@ -13,11 +13,11 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/FileUtilities.h" -#include "llvm/System/Path.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/System/Path.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringExtras.h" #include <cstdlib> #include <cstring> #include <cctype> @@ -139,11 +139,11 @@ static bool CompareNumbers(const char *&F1P, const char *&F2P, Diff = 0; // Both zero. if (Diff > RelTolerance) { if (ErrorMsg) { - *ErrorMsg = "Compared: " + ftostr(V1) + " and " + ftostr(V2) + "\n"; - *ErrorMsg += "abs. diff = " + ftostr(std::abs(V1-V2)) + - " rel.diff = " + ftostr(Diff) + "\n"; - *ErrorMsg += "Out of tolerance: rel/abs: " + ftostr(RelTolerance) + - "/" + ftostr(AbsTolerance); + raw_string_ostream(*ErrorMsg) + << "Compared: " << V1 << " and " << V2 << '\n' + << "abs. diff = " << std::abs(V1-V2) << " rel.diff = " << Diff << '\n' + << "Out of tolerance: rel/abs: " << RelTolerance << '/' + << AbsTolerance; } return true; } diff --git a/lib/Support/FormattedStream.cpp b/lib/Support/FormattedStream.cpp index 9ab3666..39b6cb3 100644 --- a/lib/Support/FormattedStream.cpp +++ b/lib/Support/FormattedStream.cpp @@ -59,12 +59,13 @@ void formatted_raw_ostream::ComputeColumn(const char *Ptr, size_t Size) { /// \param MinPad - The minimum space to give after the most recent /// I/O, even if the current column + minpad > newcol. /// -void formatted_raw_ostream::PadToColumn(unsigned NewCol) { +formatted_raw_ostream &formatted_raw_ostream::PadToColumn(unsigned NewCol) { // Figure out what's in the buffer and add it to the column count. ComputeColumn(getBufferStart(), GetNumBytesInBuffer()); // Output spaces until we reach the desired column. indent(std::max(int(NewCol - ColumnScanned), 1)); + return *this; } void formatted_raw_ostream::write_impl(const char *Ptr, size_t Size) { diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp index bdc637a..83c7964 100644 --- a/lib/Support/SourceMgr.cpp +++ b/lib/Support/SourceMgr.cpp @@ -35,7 +35,7 @@ SourceMgr::~SourceMgr() { // Delete the line # cache if allocated. if (LineNoCacheTy *Cache = getCache(LineNoCache)) delete Cache; - + while (!Buffers.empty()) { delete Buffers.back().Buffer; Buffers.pop_back(); @@ -47,7 +47,7 @@ SourceMgr::~SourceMgr() { /// ~0, otherwise it returns the buffer ID of the stacked file. unsigned SourceMgr::AddIncludeFile(const std::string &Filename, SMLoc IncludeLoc) { - + MemoryBuffer *NewBuf = MemoryBuffer::getFile(Filename.c_str()); // If the file didn't exist directly, see if it's in an include path. @@ -55,7 +55,7 @@ unsigned SourceMgr::AddIncludeFile(const std::string &Filename, std::string IncFile = IncludeDirectories[i] + "/" + Filename; NewBuf = MemoryBuffer::getFile(IncFile.c_str()); } - + if (NewBuf == 0) return ~0U; return AddNewSourceBuffer(NewBuf, IncludeLoc); @@ -79,20 +79,20 @@ int SourceMgr::FindBufferContainingLoc(SMLoc Loc) const { unsigned SourceMgr::FindLineNumber(SMLoc Loc, int BufferID) const { if (BufferID == -1) BufferID = FindBufferContainingLoc(Loc); assert(BufferID != -1 && "Invalid Location!"); - + MemoryBuffer *Buff = getBufferInfo(BufferID).Buffer; - + // Count the number of \n's between the start of the file and the specified // location. unsigned LineNo = 1; - + const char *Ptr = Buff->getBufferStart(); // If we have a line number cache, and if the query is to a later point in the // same file, start searching from the last query location. This optimizes // for the case when multiple diagnostics come out of one file in order. if (LineNoCacheTy *Cache = getCache(LineNoCache)) - if (Cache->LastQueryBufferID == BufferID && + if (Cache->LastQueryBufferID == BufferID && Cache->LastQuery <= Loc.getPointer()) { Ptr = Cache->LastQuery; LineNo = Cache->LineNoOfQuery; @@ -102,12 +102,12 @@ unsigned SourceMgr::FindLineNumber(SMLoc Loc, int BufferID) const { // we see. for (; SMLoc::getFromPointer(Ptr) != Loc; ++Ptr) if (*Ptr == '\n') ++LineNo; - - + + // Allocate the line number cache if it doesn't exist. if (LineNoCache == 0) LineNoCache = new LineNoCacheTy(); - + // Update the line # cache. LineNoCacheTy &Cache = *getCache(LineNoCache); Cache.LastQueryBufferID = BufferID; @@ -118,12 +118,12 @@ unsigned SourceMgr::FindLineNumber(SMLoc Loc, int BufferID) const { void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const { if (IncludeLoc == SMLoc()) return; // Top of stack. - + int CurBuf = FindBufferContainingLoc(IncludeLoc); assert(CurBuf != -1 && "Invalid or unspecified location!"); PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS); - + OS << "Included from " << getBufferInfo(CurBuf).Buffer->getBufferIdentifier() << ":" << FindLineNumber(IncludeLoc, CurBuf) << ":\n"; @@ -137,12 +137,12 @@ void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const { /// prefixed to the message. SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, const std::string &Msg, const char *Type, bool ShowLine) const { - + // First thing to do: find the current buffer containing the specified // location. int CurBuf = FindBufferContainingLoc(Loc); assert(CurBuf != -1 && "Invalid or unspecified location!"); - + MemoryBuffer *CurMB = getBufferInfo(CurBuf).Buffer; // Scan backward to find the start of the line. @@ -160,7 +160,7 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, const std::string &Msg, ++LineEnd; LineStr = std::string(LineStart, LineEnd); } - + std::string PrintedMsg; if (Type) { PrintedMsg = Type; @@ -173,7 +173,7 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, const std::string &Msg, LineStr, ShowLine); } -void SourceMgr::PrintMessage(SMLoc Loc, const std::string &Msg, +void SourceMgr::PrintMessage(SMLoc Loc, const std::string &Msg, const char *Type, bool ShowLine) const { raw_ostream &OS = errs(); @@ -188,7 +188,7 @@ void SourceMgr::PrintMessage(SMLoc Loc, const std::string &Msg, // SMDiagnostic Implementation //===----------------------------------------------------------------------===// -void SMDiagnostic::Print(const char *ProgName, raw_ostream &S) { +void SMDiagnostic::Print(const char *ProgName, raw_ostream &S) const { if (ProgName && ProgName[0]) S << ProgName << ": "; @@ -197,7 +197,7 @@ void SMDiagnostic::Print(const char *ProgName, raw_ostream &S) { S << "<stdin>"; else S << Filename; - + if (LineNo != -1) { S << ':' << LineNo; if (ColumnNo != -1) @@ -205,12 +205,12 @@ void SMDiagnostic::Print(const char *ProgName, raw_ostream &S) { } S << ": "; } - + S << Message << '\n'; if (LineNo != -1 && ColumnNo != -1 && ShowLine) { S << LineContents << '\n'; - + // Print out spaces/tabs before the caret. for (unsigned i = 0; i != unsigned(ColumnNo); ++i) S << (LineContents[i] == '\t' ? '\t' : ' '); diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index 2fec094..5a76184 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -33,6 +33,7 @@ const char *Triple::getArchTypeName(ArchType Kind) { case ppc64: return "powerpc64"; case ppc: return "powerpc"; case sparc: return "sparc"; + case sparcv9: return "sparcv9"; case systemz: return "s390x"; case tce: return "tce"; case thumb: return "thumb"; @@ -61,6 +62,7 @@ const char *Triple::getArchTypePrefix(ArchType Kind) { case ppc64: case ppc: return "ppc"; + case sparcv9: case sparc: return "sparc"; case x86: @@ -127,6 +129,8 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { return ppc; if (Name == "sparc") return sparc; + if (Name == "sparcv9") + return sparcv9; if (Name == "systemz") return systemz; if (Name == "tce") @@ -250,6 +254,8 @@ void Triple::Parse() const { Arch = mipsel; else if (ArchName == "sparc") Arch = sparc; + else if (ArchName == "sparcv9") + Arch = sparcv9; else if (ArchName == "s390x") Arch = systemz; else if (ArchName == "tce") diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index 10d7ec0..25c3fbd 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -20,7 +20,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringExtras.h" +#include <cctype> #include <sys/stat.h> #include <sys/types.h> @@ -209,7 +209,7 @@ raw_ostream &raw_ostream::operator<<(const void *P) { } raw_ostream &raw_ostream::operator<<(double N) { - return this->operator<<(ftostr(N)); + return this->operator<<(format("%e", N)); } @@ -574,12 +574,18 @@ void raw_svector_ostream::resync() { } void raw_svector_ostream::write_impl(const char *Ptr, size_t Size) { - assert(Ptr == OS.end() && OS.size() + Size <= OS.capacity() && - "Invalid write_impl() call!"); - - // We don't need to copy the bytes, just commit the bytes to the - // SmallVector. - OS.set_size(OS.size() + Size); + // If we're writing bytes from the end of the buffer into the smallvector, we + // don't need to copy the bytes, just commit the bytes because they are + // already in the right place. + if (Ptr == OS.end()) { + assert(OS.size() + Size <= OS.capacity() && "Invalid write_impl() call!"); + OS.set_size(OS.size() + Size); + } else { + assert(GetNumBytesInBuffer() == 0 && + "Should be writing from buffer if some bytes in it"); + // Otherwise, do copy the bytes. + OS.append(Ptr, Ptr+Size); + } // Grow the vector if necessary. if (OS.capacity() - OS.size() < 64) diff --git a/lib/System/Makefile b/lib/System/Makefile index d4fd60e..bb013b9 100644 --- a/lib/System/Makefile +++ b/lib/System/Makefile @@ -10,7 +10,7 @@ LEVEL = ../.. LIBRARYNAME = LLVMSystem BUILD_ARCHIVE = 1 - +REQUIRES_RTTI = 1 include $(LEVEL)/Makefile.config ifeq ($(HOST_OS),MingW) diff --git a/lib/System/Unix/Program.inc b/lib/System/Unix/Program.inc index 43c3606..c10498a 100644 --- a/lib/System/Unix/Program.inc +++ b/lib/System/Unix/Program.inc @@ -126,7 +126,7 @@ static void TimeOutHandler(int Sig) { static void SetMemoryLimits (unsigned size) { -#if HAVE_SYS_RESOURCE_H +#if HAVE_SYS_RESOURCE_H && HAVE_GETRLIMIT && HAVE_SETRLIMIT struct rlimit r; __typeof__ (r.rlim_cur) limit = (__typeof__ (r.rlim_cur)) (size) * 1048576; @@ -323,4 +323,9 @@ bool Program::ChangeStdoutToBinary(){ return false; } +bool Program::ChangeStderrToBinary(){ + // Do nothing, as Unix doesn't differentiate between text and binary. + return false; +} + } diff --git a/lib/System/Unix/Signals.inc b/lib/System/Unix/Signals.inc index 676e1e5..c8ec68a 100644 --- a/lib/System/Unix/Signals.inc +++ b/lib/System/Unix/Signals.inc @@ -52,7 +52,16 @@ static const int *const IntSigsEnd = // KillSigs - Signals that are synchronous with the program that will cause it // to die. static const int KillSigs[] = { - SIGILL, SIGTRAP, SIGABRT, SIGFPE, SIGBUS, SIGSEGV, SIGSYS, SIGXCPU, SIGXFSZ + SIGILL, SIGTRAP, SIGABRT, SIGFPE, SIGBUS, SIGSEGV +#ifdef SIGSYS + , SIGSYS +#endif +#ifdef SIGXCPU + , SIGXCPU +#endif +#ifdef SIGXFSZ + , SIGXFSZ +#endif #ifdef SIGEMT , SIGEMT #endif diff --git a/lib/System/Win32/Program.inc b/lib/System/Win32/Program.inc index a69826f..a3b40d0 100644 --- a/lib/System/Win32/Program.inc +++ b/lib/System/Win32/Program.inc @@ -379,4 +379,9 @@ bool Program::ChangeStdoutToBinary(){ return result == -1; } +bool Program::ChangeStderrToBinary(){ + int result = _setmode( _fileno(stderr), _O_BINARY ); + return result == -1; +} + } diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 21445ad..b08f942 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -23,9 +23,7 @@ namespace llvm { class ARMBaseTargetMachine; class FunctionPass; -class MachineCodeEmitter; class JITCodeEmitter; -class ObjectCodeEmitter; class formatted_raw_ostream; // Enums corresponding to ARM condition codes @@ -95,12 +93,8 @@ inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) { FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM, CodeGenOpt::Level OptLevel); -FunctionPass *createARMCodeEmitterPass(ARMBaseTargetMachine &TM, - MachineCodeEmitter &MCE); FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM, JITCodeEmitter &JCE); -FunctionPass *createARMObjectCodeEmitterPass(ARMBaseTargetMachine &TM, - ObjectCodeEmitter &OCE); FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false); FunctionPass *createARMExpandPseudoPass(); diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index fd46a4a..6fe7c2c 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -450,10 +450,10 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { switch (Opc) { default: llvm_unreachable("Unknown or unset size field for instr!"); - case TargetInstrInfo::IMPLICIT_DEF: - case TargetInstrInfo::KILL: - case TargetInstrInfo::DBG_LABEL: - case TargetInstrInfo::EH_LABEL: + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: + case TargetOpcode::DBG_LABEL: + case TargetOpcode::EH_LABEL: return 0; } break; @@ -470,9 +470,9 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case ARM::Int_eh_sjlj_setjmp: return 24; case ARM::tInt_eh_sjlj_setjmp: - return 22; + return 14; case ARM::t2Int_eh_sjlj_setjmp: - return 22; + return 14; case ARM::BR_JTr: case ARM::BR_JTm: case ARM::BR_JTadd: @@ -490,6 +490,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { MI->getOperand(NumOps - (TID.isPredicable() ? 3 : 2)); unsigned JTI = JTOP.getIndex(); const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); + assert(MJTI != 0); const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); assert(JTI < JT.size()); // Thumb instructions are 2 byte aligned, but JT entries are 4 byte diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index ba9e044..91e3550 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -478,7 +478,7 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg, /// bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - return (NoFramePointerElim || + return ((NoFramePointerElim && MFI->hasCalls())|| needsStackRealignment(MF) || MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()); @@ -583,14 +583,6 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, SmallVector<unsigned, 4> UnspilledCS2GPRs; ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - - // Calculate and set max stack object alignment early, so we can decide - // whether we will need stack realignment (and thus FP). - if (RealignStack) { - MachineFrameInfo *MFI = MF.getFrameInfo(); - MFI->calculateMaxStackAlignment(); - } - // Spill R4 if Thumb2 function requires stack realignment - it will be used as // scratch register. // FIXME: It will be better just to find spare register here. @@ -803,10 +795,10 @@ ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const { } int -ARMBaseRegisterInfo::getFrameIndexReference(MachineFunction &MF, int FI, +ARMBaseRegisterInfo::getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize(); bool isFixed = MFI->isFixedObjectIndex(FI); @@ -845,7 +837,8 @@ ARMBaseRegisterInfo::getFrameIndexReference(MachineFunction &MF, int FI, int -ARMBaseRegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const { +ARMBaseRegisterInfo::getFrameIndexOffset(const MachineFunction &MF, + int FI) const { unsigned FrameReg; return getFrameIndexReference(MF, FI, FrameReg); } diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index f5ca25c..33ba21d 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -107,9 +107,9 @@ public: // Debug information queries. unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const; - int getFrameIndexReference(MachineFunction &MF, int FI, + int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const; - int getFrameIndexOffset(MachineFunction &MF, int FI) const; + int getFrameIndexOffset(const MachineFunction &MF, int FI) const; // Exception handling queries. unsigned getEHExceptionRegister() const; diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index 17e7d44..bd703f4 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -24,9 +24,7 @@ #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/PassManager.h" -#include "llvm/CodeGen/MachineCodeEmitter.h" #include "llvm/CodeGen/JITCodeEmitter.h" -#include "llvm/CodeGen/ObjectCodeEmitter.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -46,42 +44,34 @@ STATISTIC(NumEmitted, "Number of machine instructions emitted"); namespace { - class ARMCodeEmitter { - public: - /// getBinaryCodeForInstr - This function, generated by the - /// CodeEmitterGenerator using TableGen, produces the binary encoding for - /// machine instructions. - unsigned getBinaryCodeForInstr(const MachineInstr &MI); - }; - - template<class CodeEmitter> - class Emitter : public MachineFunctionPass, public ARMCodeEmitter { + class ARMCodeEmitter : public MachineFunctionPass { ARMJITInfo *JTI; const ARMInstrInfo *II; const TargetData *TD; const ARMSubtarget *Subtarget; TargetMachine &TM; - CodeEmitter &MCE; + JITCodeEmitter &MCE; const std::vector<MachineConstantPoolEntry> *MCPEs; const std::vector<MachineJumpTableEntry> *MJTEs; bool IsPIC; - + void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<MachineModuleInfo>(); MachineFunctionPass::getAnalysisUsage(AU); } - - public: + static char ID; - explicit Emitter(TargetMachine &tm, CodeEmitter &mce) - : MachineFunctionPass(&ID), JTI(0), II(0), TD(0), TM(tm), - MCE(mce), MCPEs(0), MJTEs(0), - IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} - Emitter(TargetMachine &tm, CodeEmitter &mce, - const ARMInstrInfo &ii, const TargetData &td) - : MachineFunctionPass(&ID), JTI(0), II(&ii), TD(&td), TM(tm), - MCE(mce), MCPEs(0), MJTEs(0), - IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} + public: + ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) + : MachineFunctionPass(&ID), JTI(0), II((ARMInstrInfo*)tm.getInstrInfo()), + TD(tm.getTargetData()), TM(tm), + MCE(mce), MCPEs(0), MJTEs(0), + IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} + + /// getBinaryCodeForInstr - This function, generated by the + /// CodeEmitterGenerator using TableGen, produces the binary encoding for + /// machine instructions. + unsigned getBinaryCodeForInstr(const MachineInstr &MI); bool runOnMachineFunction(MachineFunction &MF); @@ -94,21 +84,13 @@ namespace { private: void emitWordLE(unsigned Binary); - void emitDWordLE(uint64_t Binary); - void emitConstPoolInstruction(const MachineInstr &MI); - void emitMOVi2piecesInstruction(const MachineInstr &MI); - void emitLEApcrelJTInstruction(const MachineInstr &MI); - void emitPseudoMoveInstruction(const MachineInstr &MI); - void addPCLabel(unsigned LabelID); - void emitPseudoInstruction(const MachineInstr &MI); - unsigned getMachineSoRegOpValue(const MachineInstr &MI, const TargetInstrDesc &TID, const MachineOperand &MO, @@ -176,28 +158,18 @@ namespace { void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc, intptr_t JTBase = 0); }; - template <class CodeEmitter> - char Emitter<CodeEmitter>::ID = 0; } -/// createARMCodeEmitterPass - Return a pass that emits the collected ARM code -/// to the specified MCE object. +char ARMCodeEmitter::ID = 0; -FunctionPass *llvm::createARMCodeEmitterPass(ARMBaseTargetMachine &TM, - MachineCodeEmitter &MCE) { - return new Emitter<MachineCodeEmitter>(TM, MCE); -} +/// createARMJITCodeEmitterPass - Return a pass that emits the collected ARM +/// code to the specified MCE object. FunctionPass *llvm::createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM, JITCodeEmitter &JCE) { - return new Emitter<JITCodeEmitter>(TM, JCE); -} -FunctionPass *llvm::createARMObjectCodeEmitterPass(ARMBaseTargetMachine &TM, - ObjectCodeEmitter &OCE) { - return new Emitter<ObjectCodeEmitter>(TM, OCE); + return new ARMCodeEmitter(TM, JCE); } -template<class CodeEmitter> -bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) { +bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) { assert((MF.getTarget().getRelocationModel() != Reloc::Default || MF.getTarget().getRelocationModel() != Reloc::Static) && "JIT relocation model must be set to static or default!"); @@ -206,7 +178,8 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) { TD = ((ARMTargetMachine&)MF.getTarget()).getTargetData(); Subtarget = &TM.getSubtarget<ARMSubtarget>(); MCPEs = &MF.getConstantPool()->getConstants(); - MJTEs = &MF.getJumpTableInfo()->getJumpTables(); + MJTEs = 0; + if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables(); IsPIC = TM.getRelocationModel() == Reloc::PIC_; JTI->Initialize(MF, IsPIC); MCE.setModuleInfo(&getAnalysis<MachineModuleInfo>()); @@ -229,8 +202,7 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) { /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value. /// -template<class CodeEmitter> -unsigned Emitter<CodeEmitter>::getShiftOp(unsigned Imm) const { +unsigned ARMCodeEmitter::getShiftOp(unsigned Imm) const { switch (ARM_AM::getAM2ShiftOpc(Imm)) { default: llvm_unreachable("Unknown shift opc!"); case ARM_AM::asr: return 2; @@ -244,9 +216,8 @@ unsigned Emitter<CodeEmitter>::getShiftOp(unsigned Imm) const { /// getMachineOpValue - Return binary encoding of operand. If the machine /// operand requires relocation, record the relocation and return zero. -template<class CodeEmitter> -unsigned Emitter<CodeEmitter>::getMachineOpValue(const MachineInstr &MI, - const MachineOperand &MO) { +unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) { if (MO.isReg()) return ARMRegisterInfo::getRegisterNumbering(MO.getReg()); else if (MO.isImm()) @@ -276,10 +247,9 @@ unsigned Emitter<CodeEmitter>::getMachineOpValue(const MachineInstr &MI, /// emitGlobalAddress - Emit the specified address to the code stream. /// -template<class CodeEmitter> -void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc, - bool MayNeedFarStub, bool Indirect, - intptr_t ACPV) { +void ARMCodeEmitter::emitGlobalAddress(GlobalValue *GV, unsigned Reloc, + bool MayNeedFarStub, bool Indirect, + intptr_t ACPV) { MachineRelocation MR = Indirect ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc, GV, ACPV, MayNeedFarStub) @@ -291,9 +261,7 @@ void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc, /// emitExternalSymbolAddress - Arrange for the address of an external symbol to /// be emitted to the current location in the function, and allow it to be PC /// relative. -template<class CodeEmitter> -void Emitter<CodeEmitter>::emitExternalSymbolAddress(const char *ES, - unsigned Reloc) { +void ARMCodeEmitter::emitExternalSymbolAddress(const char *ES, unsigned Reloc) { MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(), Reloc, ES)); } @@ -301,9 +269,7 @@ void Emitter<CodeEmitter>::emitExternalSymbolAddress(const char *ES, /// emitConstPoolAddress - Arrange for the address of an constant pool /// to be emitted to the current location in the function, and allow it to be PC /// relative. -template<class CodeEmitter> -void Emitter<CodeEmitter>::emitConstPoolAddress(unsigned CPI, - unsigned Reloc) { +void ARMCodeEmitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc) { // Tell JIT emitter we'll resolve the address. MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(), Reloc, CPI, 0, true)); @@ -312,37 +278,31 @@ void Emitter<CodeEmitter>::emitConstPoolAddress(unsigned CPI, /// emitJumpTableAddress - Arrange for the address of a jump table to /// be emitted to the current location in the function, and allow it to be PC /// relative. -template<class CodeEmitter> -void Emitter<CodeEmitter>::emitJumpTableAddress(unsigned JTIndex, - unsigned Reloc) { +void ARMCodeEmitter::emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) { MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(), Reloc, JTIndex, 0, true)); } /// emitMachineBasicBlock - Emit the specified address basic block. -template<class CodeEmitter> -void Emitter<CodeEmitter>::emitMachineBasicBlock(MachineBasicBlock *BB, - unsigned Reloc, intptr_t JTBase) { +void ARMCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB, + unsigned Reloc, intptr_t JTBase) { MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(), Reloc, BB, JTBase)); } -template<class CodeEmitter> -void Emitter<CodeEmitter>::emitWordLE(unsigned Binary) { +void ARMCodeEmitter::emitWordLE(unsigned Binary) { DEBUG(errs() << " 0x"; errs().write_hex(Binary) << "\n"); MCE.emitWordLE(Binary); } -template<class CodeEmitter> -void Emitter<CodeEmitter>::emitDWordLE(uint64_t Binary) { +void ARMCodeEmitter::emitDWordLE(uint64_t Binary) { DEBUG(errs() << " 0x"; errs().write_hex(Binary) << "\n"); MCE.emitDWordLE(Binary); } -template<class CodeEmitter> -void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI) { +void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) { DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI); MCE.processDebugLoc(MI.getDebugLoc(), true); @@ -411,8 +371,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI) { MCE.processDebugLoc(MI.getDebugLoc(), false); } -template<class CodeEmitter> -void Emitter<CodeEmitter>::emitConstPoolInstruction(const MachineInstr &MI) { +void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) { unsigned CPI = MI.getOperand(0).getImm(); // CP instruction index. unsigned CPIndex = MI.getOperand(1).getIndex(); // Actual cp entry index. const MachineConstantPoolEntry &MCPE = (*MCPEs)[CPIndex]; @@ -474,8 +433,7 @@ void Emitter<CodeEmitter>::emitConstPoolInstruction(const MachineInstr &MI) { } } -template<class CodeEmitter> -void Emitter<CodeEmitter>::emitMOVi2piecesInstruction(const MachineInstr &MI) { +void ARMCodeEmitter::emitMOVi2piecesInstruction(const MachineInstr &MI) { const MachineOperand &MO0 = MI.getOperand(0); const MachineOperand &MO1 = MI.getOperand(1); assert(MO1.isImm() && ARM_AM::getSOImmVal(MO1.isImm()) != -1 && @@ -517,8 +475,7 @@ void Emitter<CodeEmitter>::emitMOVi2piecesInstruction(const MachineInstr &MI) { emitWordLE(Binary); } -template<class CodeEmitter> -void Emitter<CodeEmitter>::emitLEApcrelJTInstruction(const MachineInstr &MI) { +void ARMCodeEmitter::emitLEApcrelJTInstruction(const MachineInstr &MI) { // It's basically add r, pc, (LJTI - $+8) const TargetInstrDesc &TID = MI.getDesc(); @@ -545,8 +502,7 @@ void Emitter<CodeEmitter>::emitLEApcrelJTInstruction(const MachineInstr &MI) { emitWordLE(Binary); } -template<class CodeEmitter> -void Emitter<CodeEmitter>::emitPseudoMoveInstruction(const MachineInstr &MI) { +void ARMCodeEmitter::emitPseudoMoveInstruction(const MachineInstr &MI) { unsigned Opcode = MI.getDesc().Opcode; // Part of binary is determined by TableGn. @@ -585,21 +541,19 @@ void Emitter<CodeEmitter>::emitPseudoMoveInstruction(const MachineInstr &MI) { emitWordLE(Binary); } -template<class CodeEmitter> -void Emitter<CodeEmitter>::addPCLabel(unsigned LabelID) { +void ARMCodeEmitter::addPCLabel(unsigned LabelID) { DEBUG(errs() << " ** LPC" << LabelID << " @ " << (void*)MCE.getCurrentPCValue() << '\n'); JTI->addPCLabelAddr(LabelID, MCE.getCurrentPCValue()); } -template<class CodeEmitter> -void Emitter<CodeEmitter>::emitPseudoInstruction(const MachineInstr &MI) { +void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) { unsigned Opcode = MI.getDesc().Opcode; switch (Opcode) { default: llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction"); // FIXME: Add support for MOVimm32. - case TargetInstrInfo::INLINEASM: { + case TargetOpcode::INLINEASM: { // We allow inline assembler nodes with empty bodies - they can // implicitly define registers, which is ok for JIT. if (MI.getOperand(0).getSymbolName()[0]) { @@ -607,12 +561,12 @@ void Emitter<CodeEmitter>::emitPseudoInstruction(const MachineInstr &MI) { } break; } - case TargetInstrInfo::DBG_LABEL: - case TargetInstrInfo::EH_LABEL: + case TargetOpcode::DBG_LABEL: + case TargetOpcode::EH_LABEL: MCE.emitLabel(MI.getOperand(0).getImm()); break; - case TargetInstrInfo::IMPLICIT_DEF: - case TargetInstrInfo::KILL: + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: // Do nothing. break; case ARM::CONSTPOOL_ENTRY: @@ -661,8 +615,7 @@ void Emitter<CodeEmitter>::emitPseudoInstruction(const MachineInstr &MI) { } } -template<class CodeEmitter> -unsigned Emitter<CodeEmitter>::getMachineSoRegOpValue( +unsigned ARMCodeEmitter::getMachineSoRegOpValue( const MachineInstr &MI, const TargetInstrDesc &TID, const MachineOperand &MO, @@ -721,8 +674,7 @@ unsigned Emitter<CodeEmitter>::getMachineSoRegOpValue( return Binary | ARM_AM::getSORegOffset(MO2.getImm()) << 7; } -template<class CodeEmitter> -unsigned Emitter<CodeEmitter>::getMachineSoImmOpValue(unsigned SoImm) { +unsigned ARMCodeEmitter::getMachineSoImmOpValue(unsigned SoImm) { int SoImmVal = ARM_AM::getSOImmVal(SoImm); assert(SoImmVal != -1 && "Not a valid so_imm value!"); @@ -735,8 +687,7 @@ unsigned Emitter<CodeEmitter>::getMachineSoImmOpValue(unsigned SoImm) { return Binary; } -template<class CodeEmitter> -unsigned Emitter<CodeEmitter>::getAddrModeSBit(const MachineInstr &MI, +unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI, const TargetInstrDesc &TID) const { for (unsigned i = MI.getNumOperands(), e = TID.getNumOperands(); i != e; --i){ const MachineOperand &MO = MI.getOperand(i-1); @@ -746,8 +697,7 @@ unsigned Emitter<CodeEmitter>::getAddrModeSBit(const MachineInstr &MI, return 0; } -template<class CodeEmitter> -void Emitter<CodeEmitter>::emitDataProcessingInstruction( +void ARMCodeEmitter::emitDataProcessingInstruction( const MachineInstr &MI, unsigned ImplicitRd, unsigned ImplicitRn) { @@ -813,8 +763,7 @@ void Emitter<CodeEmitter>::emitDataProcessingInstruction( emitWordLE(Binary); } -template<class CodeEmitter> -void Emitter<CodeEmitter>::emitLoadStoreInstruction( +void ARMCodeEmitter::emitLoadStoreInstruction( const MachineInstr &MI, unsigned ImplicitRd, unsigned ImplicitRn) { @@ -889,8 +838,7 @@ void Emitter<CodeEmitter>::emitLoadStoreInstruction( emitWordLE(Binary); } -template<class CodeEmitter> -void Emitter<CodeEmitter>::emitMiscLoadStoreInstruction(const MachineInstr &MI, +void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI, unsigned ImplicitRn) { const TargetInstrDesc &TID = MI.getDesc(); unsigned Form = TID.TSFlags & ARMII::FormMask; @@ -977,8 +925,7 @@ static unsigned getAddrModeUPBits(unsigned Mode) { return Binary; } -template<class CodeEmitter> -void Emitter<CodeEmitter>::emitLoadStoreMultipleInstruction( +void ARMCodeEmitter::emitLoadStoreMultipleInstruction( const MachineInstr &MI) { // Part of binary is determined by TableGn. unsigned Binary = getBinaryCodeForInstr(MI); @@ -1011,8 +958,7 @@ void Emitter<CodeEmitter>::emitLoadStoreMultipleInstruction( emitWordLE(Binary); } -template<class CodeEmitter> -void Emitter<CodeEmitter>::emitMulFrmInstruction(const MachineInstr &MI) { +void ARMCodeEmitter::emitMulFrmInstruction(const MachineInstr &MI) { const TargetInstrDesc &TID = MI.getDesc(); // Part of binary is determined by TableGn. @@ -1049,8 +995,7 @@ void Emitter<CodeEmitter>::emitMulFrmInstruction(const MachineInstr &MI) { emitWordLE(Binary); } -template<class CodeEmitter> -void Emitter<CodeEmitter>::emitExtendInstruction(const MachineInstr &MI) { +void ARMCodeEmitter::emitExtendInstruction(const MachineInstr &MI) { const TargetInstrDesc &TID = MI.getDesc(); // Part of binary is determined by TableGn. @@ -1087,8 +1032,7 @@ void Emitter<CodeEmitter>::emitExtendInstruction(const MachineInstr &MI) { emitWordLE(Binary); } -template<class CodeEmitter> -void Emitter<CodeEmitter>::emitMiscArithInstruction(const MachineInstr &MI) { +void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) { const TargetInstrDesc &TID = MI.getDesc(); // Part of binary is determined by TableGn. @@ -1126,8 +1070,7 @@ void Emitter<CodeEmitter>::emitMiscArithInstruction(const MachineInstr &MI) { emitWordLE(Binary); } -template<class CodeEmitter> -void Emitter<CodeEmitter>::emitBranchInstruction(const MachineInstr &MI) { +void ARMCodeEmitter::emitBranchInstruction(const MachineInstr &MI) { const TargetInstrDesc &TID = MI.getDesc(); if (TID.Opcode == ARM::TPsoft) { @@ -1146,8 +1089,7 @@ void Emitter<CodeEmitter>::emitBranchInstruction(const MachineInstr &MI) { emitWordLE(Binary); } -template<class CodeEmitter> -void Emitter<CodeEmitter>::emitInlineJumpTable(unsigned JTIndex) { +void ARMCodeEmitter::emitInlineJumpTable(unsigned JTIndex) { // Remember the base address of the inline jump table. uintptr_t JTBase = MCE.getCurrentPCValue(); JTI->addJumpTableBaseAddr(JTIndex, JTBase); @@ -1167,8 +1109,7 @@ void Emitter<CodeEmitter>::emitInlineJumpTable(unsigned JTIndex) { } } -template<class CodeEmitter> -void Emitter<CodeEmitter>::emitMiscBranchInstruction(const MachineInstr &MI) { +void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) { const TargetInstrDesc &TID = MI.getDesc(); // Handle jump tables. @@ -1249,8 +1190,7 @@ static unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) { return Binary; } -template<class CodeEmitter> -void Emitter<CodeEmitter>::emitVFPArithInstruction(const MachineInstr &MI) { +void ARMCodeEmitter::emitVFPArithInstruction(const MachineInstr &MI) { const TargetInstrDesc &TID = MI.getDesc(); // Part of binary is determined by TableGn. @@ -1289,8 +1229,7 @@ void Emitter<CodeEmitter>::emitVFPArithInstruction(const MachineInstr &MI) { emitWordLE(Binary); } -template<class CodeEmitter> -void Emitter<CodeEmitter>::emitVFPConversionInstruction( +void ARMCodeEmitter::emitVFPConversionInstruction( const MachineInstr &MI) { const TargetInstrDesc &TID = MI.getDesc(); unsigned Form = TID.TSFlags & ARMII::FormMask; @@ -1347,8 +1286,7 @@ void Emitter<CodeEmitter>::emitVFPConversionInstruction( emitWordLE(Binary); } -template<class CodeEmitter> -void Emitter<CodeEmitter>::emitVFPLoadStoreInstruction(const MachineInstr &MI) { +void ARMCodeEmitter::emitVFPLoadStoreInstruction(const MachineInstr &MI) { // Part of binary is determined by TableGn. unsigned Binary = getBinaryCodeForInstr(MI); @@ -1382,8 +1320,7 @@ void Emitter<CodeEmitter>::emitVFPLoadStoreInstruction(const MachineInstr &MI) { emitWordLE(Binary); } -template<class CodeEmitter> -void Emitter<CodeEmitter>::emitVFPLoadStoreMultipleInstruction( +void ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction( const MachineInstr &MI) { // Part of binary is determined by TableGn. unsigned Binary = getBinaryCodeForInstr(MI); @@ -1418,8 +1355,7 @@ void Emitter<CodeEmitter>::emitVFPLoadStoreMultipleInstruction( emitWordLE(Binary); } -template<class CodeEmitter> -void Emitter<CodeEmitter>::emitMiscInstruction(const MachineInstr &MI) { +void ARMCodeEmitter::emitMiscInstruction(const MachineInstr &MI) { // Part of binary is determined by TableGn. unsigned Binary = getBinaryCodeForInstr(MI); diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index acd30d2..8fa3c04 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -302,9 +302,9 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { // Thumb1 functions containing constant pools get 4-byte alignment. // This is so we can keep exact track of where the alignment padding goes. - // Set default. Thumb1 function is 2-byte aligned, ARM and Thumb2 are 4-byte - // aligned. - AFI->setAlign(isThumb1 ? 1U : 2U); + // ARM and Thumb2 functions need to be 4-byte aligned. + if (!isThumb1) + MF.EnsureAlignment(2); // 2 = log2(4) // Perform the initial placement of the constant pool entries. To start with, // we put them all at the end of the function. @@ -312,7 +312,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { if (!MCP.isEmpty()) { DoInitialPlacement(MF, CPEMIs); if (isThumb1) - AFI->setAlign(2U); + MF.EnsureAlignment(2); // 2 = log2(4) } /// The next UID to take is the first unused one. @@ -506,7 +506,7 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, case ARM::tBR_JTr: // A Thumb1 table jump may involve padding; for the offsets to // be right, functions containing these must be 4-byte aligned. - AFI->setAlign(2U); + MF.EnsureAlignment(2U); if ((Offset+MBBSize)%4 != 0 || HasInlineAsm) // FIXME: Add a pseudo ALIGN instruction instead. MBBSize += 2; // padding @@ -732,7 +732,7 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { // This pass should be run after register allocation, so there should be no // PHI nodes to update. - assert((Succ->empty() || Succ->begin()->getOpcode() != TargetInstrInfo::PHI) + assert((Succ->empty() || !Succ->begin()->isPHI()) && "PHI nodes should be eliminated by now!"); } @@ -1624,6 +1624,8 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { // FIXME: After the tables are shrunk, can we get rid some of the // constantpool tables? MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); + if (MJTI == 0) return false; + const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) { MachineInstr *MI = T2JumpTables[i]; @@ -1730,6 +1732,8 @@ bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) { bool MadeChange = false; MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); + if (MJTI == 0) return false; + const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) { MachineInstr *MI = T2JumpTables[i]; diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index a260050..df4ae70 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -58,6 +58,8 @@ public: return "ARM Instruction Selection"; } + virtual void InstructionSelect(); + /// getI32Imm - Return a target constant of type i32 with the specified /// value. inline SDValue getI32Imm(unsigned Imm) { @@ -65,7 +67,7 @@ public: } SDNode *Select(SDNode *N); - virtual void InstructionSelect(); + bool SelectShifterOperandReg(SDNode *Op, SDValue N, SDValue &A, SDValue &B, SDValue &C); bool SelectAddrMode2(SDNode *Op, SDValue N, SDValue &Base, @@ -1007,12 +1009,12 @@ SDNode *ARMDAGToDAGISel::SelectDYN_ALLOC(SDNode *N) { SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) { DebugLoc dl = V0.getNode()->getDebugLoc(); SDValue Undef = - SDValue(CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF, dl, VT), 0); + SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0); SDValue SubReg0 = CurDAG->getTargetConstant(ARM::DSUBREG_0, MVT::i32); SDValue SubReg1 = CurDAG->getTargetConstant(ARM::DSUBREG_1, MVT::i32); - SDNode *Pair = CurDAG->getMachineNode(TargetInstrInfo::INSERT_SUBREG, dl, + SDNode *Pair = CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl, VT, Undef, V0, SubReg0); - return CurDAG->getMachineNode(TargetInstrInfo::INSERT_SUBREG, dl, + return CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl, VT, SDValue(Pair, 0), V1, SubReg1); } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 76c6a27..adf1644 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -863,7 +863,8 @@ ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); } return DAG.getStore(Chain, dl, Arg, PtrOff, - PseudoSourceValue::getStack(), LocMemOffset); + PseudoSourceValue::getStack(), LocMemOffset, + false, false, 0); } void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, @@ -897,11 +898,13 @@ void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, SDValue ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool isTailCall, + bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) { + // ARM target does not yet support tail call optimization. + isTailCall = false; // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; @@ -1029,7 +1032,8 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0); + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); Callee = DAG.getNode(ARMISD::PIC_ADD, dl, getPointerTy(), Callee, PICLabel); @@ -1050,7 +1054,8 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0); + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); Callee = DAG.getNode(ARMISD::PIC_ADD, dl, getPointerTy(), Callee, PICLabel); @@ -1236,7 +1241,8 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) { } CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0); + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); if (RelocM == Reloc::Static) return Result; SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); @@ -1259,7 +1265,8 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, - PseudoSourceValue::getConstantPool(), 0); + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); SDValue Chain = Argument.getValue(1); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); @@ -1306,21 +1313,24 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, - PseudoSourceValue::getConstantPool(), 0); + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); Chain = Offset.getValue(1); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, - PseudoSourceValue::getConstantPool(), 0); + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); } else { // local exec model ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, "tpoff"); Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, - PseudoSourceValue::getConstantPool(), 0); + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); } // The address of the thread local variable is the add of the thread @@ -1356,13 +1366,15 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0); + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); SDValue Chain = Result.getValue(1); SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); if (!UseGOTOFF) Result = DAG.getLoad(PtrVT, dl, Chain, Result, - PseudoSourceValue::getGOT(), 0); + PseudoSourceValue::getGOT(), 0, + false, false, 0); return Result; } else { // If we have T2 ops, we can materialize the address directly via movt/movw @@ -1374,7 +1386,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0); + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); } } } @@ -1401,7 +1414,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0); + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); SDValue Chain = Result.getValue(1); if (RelocM == Reloc::PIC_) { @@ -1411,7 +1425,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) Result = DAG.getLoad(PtrVT, dl, Chain, Result, - PseudoSourceValue::getGOT(), 0); + PseudoSourceValue::getGOT(), 0, + false, false, 0); return Result; } @@ -1432,13 +1447,15 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0); + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); } SDValue -ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { +ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); DebugLoc dl = Op.getDebugLoc(); switch (IntNo) { @@ -1464,7 +1481,8 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0); + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); SDValue Chain = Result.getValue(1); if (RelocM == Reloc::PIC_) { @@ -1474,7 +1492,11 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { return Result; } case Intrinsic::eh_sjlj_setjmp: - return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(1)); + SDValue Val = Subtarget->isThumb() ? + DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::SP, MVT::i32) : + DAG.getConstant(0, MVT::i32); + return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(1), + Val); } } @@ -1508,7 +1530,8 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); - return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0); + return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0, + false, false, 0); } SDValue @@ -1585,7 +1608,8 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, // Create load node to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, - PseudoSourceValue::getFixedStack(FI), 0); + PseudoSourceValue::getFixedStack(FI), 0, + false, false, 0); } else { Reg = MF.addLiveIn(NextVA.getLocReg(), RC); ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); @@ -1700,7 +1724,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, - PseudoSourceValue::getFixedStack(FI), 0)); + PseudoSourceValue::getFixedStack(FI), 0, + false, false, 0)); } } @@ -1738,7 +1763,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - PseudoSourceValue::getFixedStack(VarArgsFrameIndex), 0); + PseudoSourceValue::getFixedStack(VarArgsFrameIndex), 0, + false, false, 0); MemOps.push_back(Store); FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, DAG.getConstant(4, getPointerTy())); @@ -1932,13 +1958,14 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) { } if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, - PseudoSourceValue::getJumpTable(), 0); + PseudoSourceValue::getJumpTable(), 0, + false, false, 0); Chain = Addr.getValue(1); Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); } else { Addr = DAG.getLoad(PTy, dl, Chain, Addr, - PseudoSourceValue::getJumpTable(), 0); + PseudoSourceValue::getJumpTable(), 0, false, false, 0); Chain = Addr.getValue(1); return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); } @@ -1986,7 +2013,8 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { ? ARM::R7 : ARM::R11; SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); while (Depth--) - FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0); + FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0, + false, false, 0); return FrameAddr; } @@ -2031,7 +2059,7 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, Loads[i] = DAG.getLoad(VT, dl, Chain, DAG.getNode(ISD::ADD, dl, MVT::i32, Src, DAG.getConstant(SrcOff, MVT::i32)), - SrcSV, SrcSVOff + SrcOff); + SrcSV, SrcSVOff + SrcOff, false, false, 0); TFOps[i] = Loads[i].getValue(1); SrcOff += VTSize; } @@ -2040,9 +2068,9 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, for (i = 0; i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { TFOps[i] = DAG.getStore(Chain, dl, Loads[i], - DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, - DAG.getConstant(DstOff, MVT::i32)), - DstSV, DstSVOff + DstOff); + DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, + DAG.getConstant(DstOff, MVT::i32)), + DstSV, DstSVOff + DstOff, false, false, 0); DstOff += VTSize; } Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); @@ -2068,7 +2096,7 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, Loads[i] = DAG.getLoad(VT, dl, Chain, DAG.getNode(ISD::ADD, dl, MVT::i32, Src, DAG.getConstant(SrcOff, MVT::i32)), - SrcSV, SrcSVOff + SrcOff); + SrcSV, SrcSVOff + SrcOff, false, false, 0); TFOps[i] = Loads[i].getValue(1); ++i; SrcOff += VTSize; @@ -2090,7 +2118,7 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, TFOps[i] = DAG.getStore(Chain, dl, Loads[i], DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, DAG.getConstant(DstOff, MVT::i32)), - DstSV, DstSVOff + DstOff); + DstSV, DstSVOff + DstOff, false, false, 0); ++i; DstOff += VTSize; BytesLeft -= VTSize; @@ -3023,7 +3051,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::RETURNADDR: break; case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); - case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, + Subtarget); case ISD::BIT_CONVERT: return ExpandBIT_CONVERT(Op.getNode(), DAG); case ISD::SHL: case ISD::SRL: @@ -3852,8 +3881,11 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { if (!Subtarget->hasV6Ops()) // Pre-v6 does not support unaligned mem access. return false; - else if (!Subtarget->hasV6Ops()) { - // v6 may or may not support unaligned mem access. + else { + // v6+ may or may not support unaligned mem access depending on the system + // configuration. + // FIXME: This is pretty conservative. Should we provide cmdline option to + // control the behaviour? if (!Subtarget->isTargetDarwin()) return false; } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index cd9c027..3c5df45 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -278,7 +278,8 @@ namespace llvm { const CCValAssign &VA, ISD::ArgFlagsTy Flags); SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG); - SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG); + SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *Subtarget); SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG); SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG); SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG); @@ -319,7 +320,7 @@ namespace llvm { virtual SDValue LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool isTailCall, + bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 28b2821..db60458 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -56,6 +56,9 @@ def NEONGetLnFrm : Format<25>; def NEONSetLnFrm : Format<26>; def NEONDupFrm : Format<27>; +def MiscFrm : Format<29>; +def ThumbMiscFrm : Format<30>; + // Misc flags. // the instruction has a Rn register operand. @@ -1246,75 +1249,99 @@ class AXSI5<dag oops, dag iops, InstrItinClass itin, } // Double precision, unary -class ADuI<bits<8> opcod1, bits<4> opcod2, bits<4> opcod3, dag oops, dag iops, - InstrItinClass itin, string opc, string asm, list<dag> pattern> +class ADuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, + bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, + string asm, list<dag> pattern> : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, pattern> { - let Inst{27-20} = opcod1; - let Inst{19-16} = opcod2; + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{19-16} = opcod3; let Inst{11-8} = 0b1011; - let Inst{7-4} = opcod3; + let Inst{7-6} = opcod4; + let Inst{4} = opcod5; } // Double precision, binary -class ADbI<bits<8> opcod, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list<dag> pattern> +class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, + dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> { - let Inst{27-20} = opcod; + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; let Inst{11-8} = 0b1011; + let Inst{6} = op6; + let Inst{4} = op4; } // Single precision, unary -class ASuI<bits<8> opcod1, bits<4> opcod2, bits<4> opcod3, dag oops, dag iops, - InstrItinClass itin, string opc, string asm, list<dag> pattern> +class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, + bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, + string asm, list<dag> pattern> : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, pattern> { - // Bits 22 (D bit) and 5 (M bit) will be changed during instruction encoding. - let Inst{27-20} = opcod1; - let Inst{19-16} = opcod2; + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{19-16} = opcod3; let Inst{11-8} = 0b1010; - let Inst{7-4} = opcod3; + let Inst{7-6} = opcod4; + let Inst{4} = opcod5; } // Single precision unary, if no NEON // Same as ASuI except not available if NEON is enabled -class ASuIn<bits<8> opcod1, bits<4> opcod2, bits<4> opcod3, dag oops, dag iops, - InstrItinClass itin, string opc, string asm, list<dag> pattern> - : ASuI<opcod1, opcod2, opcod3, oops, iops, itin, opc, asm, pattern> { +class ASuIn<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, + bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, + string asm, list<dag> pattern> + : ASuI<opcod1, opcod2, opcod3, opcod4, opcod5, oops, iops, itin, opc, asm, + pattern> { list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP]; } // Single precision, binary -class ASbI<bits<8> opcod, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list<dag> pattern> +class ASbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list<dag> pattern> : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> { - // Bit 22 (D bit) can be changed during instruction encoding. - let Inst{27-20} = opcod; + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; let Inst{11-8} = 0b1010; + let Inst{6} = op6; + let Inst{4} = op4; } // Single precision binary, if no NEON // Same as ASbI except not available if NEON is enabled -class ASbIn<bits<8> opcod, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list<dag> pattern> - : ASbI<opcod, oops, iops, itin, opc, asm, pattern> { +class ASbIn<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, + dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> + : ASbI<opcod1, opcod2, op6, op4, oops, iops, itin, opc, asm, pattern> { list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP]; } // VFP conversion instructions -class AVConv1I<bits<8> opcod1, bits<4> opcod2, bits<4> opcod3, - dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list<dag> pattern> +class AVConv1I<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4, + dag oops, dag iops, InstrItinClass itin, string opc, string asm, + list<dag> pattern> : VFPAI<oops, iops, VFPConv1Frm, itin, opc, asm, pattern> { - let Inst{27-20} = opcod1; - let Inst{19-16} = opcod2; - let Inst{11-8} = opcod3; + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{19-16} = opcod3; + let Inst{11-8} = opcod4; let Inst{6} = 1; + let Inst{4} = 0; +} + +// VFP conversion between floating-point and fixed-point +class AVConv1XI<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5, + dag oops, dag iops, InstrItinClass itin, string opc, string asm, + list<dag> pattern> + : AVConv1I<op1, op2, op3, op4, oops, iops, itin, opc, asm, pattern> { + // size (fixed-point number): sx == 0 ? 16 : 32 + let Inst{7} = op5; // sx } // VFP conversion instructions, if no NEON -class AVConv1In<bits<8> opcod1, bits<4> opcod2, bits<4> opcod3, +class AVConv1In<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : AVConv1I<opcod1, opcod2, opcod3, oops, iops, itin, opc, asm, pattern> { + : AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm, + pattern> { list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP]; } diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index af508ee..1c6f78a 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -44,7 +44,8 @@ def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisPtrTy<1>, SDTCisVT<2, i32>]>; def SDT_ARMThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>; -def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>; +def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>, + SDTCisInt<2>]>; def SDT_ARMMEMBARRIERV7 : SDTypeProfile<0, 0, []>; def SDT_ARMSYNCBARRIERV7 : SDTypeProfile<0, 0, []>; @@ -604,6 +605,102 @@ PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary, [(ARMcallseq_start timm:$amt)]>; } +def NOP : AI<(outs), (ins), MiscFrm, NoItinerary, "nop", "", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV6T2]> { + let Inst{27-16} = 0b001100100000; + let Inst{7-0} = 0b00000000; +} + +def YIELD : AI<(outs), (ins), MiscFrm, NoItinerary, "yield", "", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV6T2]> { + let Inst{27-16} = 0b001100100000; + let Inst{7-0} = 0b00000001; +} + +def WFE : AI<(outs), (ins), MiscFrm, NoItinerary, "wfe", "", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV6T2]> { + let Inst{27-16} = 0b001100100000; + let Inst{7-0} = 0b00000010; +} + +def WFI : AI<(outs), (ins), MiscFrm, NoItinerary, "wfi", "", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV6T2]> { + let Inst{27-16} = 0b001100100000; + let Inst{7-0} = 0b00000011; +} + +def SEV : AI<(outs), (ins), MiscFrm, NoItinerary, "sev", "", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV6T2]> { + let Inst{27-16} = 0b001100100000; + let Inst{7-0} = 0b00000100; +} + +// The i32imm operand $val can be used by a debugger to store more information +// about the breakpoint. +def BKPT : AI<(outs), (ins i32imm:$val), MiscFrm, NoItinerary, "bkpt", "\t$val", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM]> { + let Inst{27-20} = 0b00010010; + let Inst{7-4} = 0b0111; +} + +// Change Processor State is a system instruction -- for disassembly only. +// The singleton $opt operand contains the following information: +// opt{4-0} = mode from Inst{4-0} +// opt{5} = changemode from Inst{17} +// opt{8-6} = AIF from Inst{8-6} +// opt{10-9} = imod from Inst{19-18} with 0b10 as enable and 0b11 as disable +def CPS : AXI<(outs),(ins i32imm:$opt), MiscFrm, NoItinerary, "cps${opt:cps}", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM]> { + let Inst{31-28} = 0b1111; + let Inst{27-20} = 0b00010000; + let Inst{16} = 0; + let Inst{5} = 0; +} + +def SETENDBE : AXI<(outs),(ins), MiscFrm, NoItinerary, "setend\tbe", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM]> { + let Inst{31-28} = 0b1111; + let Inst{27-20} = 0b00010000; + let Inst{16} = 1; + let Inst{9} = 1; + let Inst{7-4} = 0b0000; +} + +def SETENDLE : AXI<(outs),(ins), MiscFrm, NoItinerary, "setend\tle", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM]> { + let Inst{31-28} = 0b1111; + let Inst{27-20} = 0b00010000; + let Inst{16} = 1; + let Inst{9} = 0; + let Inst{7-4} = 0b0000; +} + +def DBG : AI<(outs), (ins i32imm:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV7]> { + let Inst{27-16} = 0b001100100000; + let Inst{7-4} = 0b1111; +} + +// A5.4 Permanently UNDEFINED instructions. +def TRAP : AI<(outs), (ins), MiscFrm, NoItinerary, "trap", "", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM]> { + let Inst{27-25} = 0b011; + let Inst{24-20} = 0b11111; + let Inst{7-5} = 0b111; + let Inst{4} = 0b1; +} + // Address computation and loads and stores in PIC mode. let isNotDuplicable = 1 in { def PICADD : AXI1<0b0100, (outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p), @@ -826,6 +923,20 @@ let isBranch = 1, isTerminator = 1 in { [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]>; } +// Branch and Exchange Jazelle -- for disassembly only +def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func", + [/* For disassembly only; pattern left blank */]> { + let Inst{23-20} = 0b0010; + //let Inst{19-8} = 0xfff; + let Inst{7-4} = 0b0010; +} + +// Supervisor call (software interrupt) -- for disassembly only +let isCall = 1 in { +def SVC : ABI<0b1111, (outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc", + [/* For disassembly only; pattern left blank */]>; +} + //===----------------------------------------------------------------------===// // Load / store Instructions. // @@ -908,6 +1019,20 @@ def LDRSB_POST: AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb), "ldrsb", "\t$dst, [$base], $offset", "$base = $base_wb", []>; } +// LDRT and LDRBT are for disassembly only. + +def LDRT : AI2ldwpo<(outs GPR:$dst, GPR:$base_wb), + (ins GPR:$base, am2offset:$offset), LdFrm, IIC_iLoadru, + "ldrt", "\t$dst, [$base], $offset", "$base = $base_wb", []> { + let Inst{21} = 1; // overwrite +} + +def LDRBT : AI2ldbpo<(outs GPR:$dst, GPR:$base_wb), + (ins GPR:$base,am2offset:$offset), LdFrm, IIC_iLoadru, + "ldrb", "\t$dst, [$base], $offset", "$base = $base_wb", []> { + let Inst{21} = 1; // overwrite +} + // Store def STR : AI2stw<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer, "str", "\t$src, $addr", @@ -971,6 +1096,24 @@ def STRB_POST: AI2stbpo<(outs GPR:$base_wb), [(set GPR:$base_wb, (post_truncsti8 GPR:$src, GPR:$base, am2offset:$offset))]>; +// STRT and STRBT are for disassembly only. + +def STRT : AI2stwpo<(outs GPR:$base_wb), + (ins GPR:$src, GPR:$base,am2offset:$offset), + StFrm, IIC_iStoreru, + "strt", "\t$src, [$base], $offset", "$base = $base_wb", + [/* For disassembly only; pattern left blank */]> { + let Inst{21} = 1; // overwrite +} + +def STRBT : AI2stbpo<(outs GPR:$base_wb), + (ins GPR:$src, GPR:$base,am2offset:$offset), + StFrm, IIC_iStoreru, + "strbt", "\t$src, [$base], $offset", "$base = $base_wb", + [/* For disassembly only; pattern left blank */]> { + let Inst{21} = 1; // overwrite +} + //===----------------------------------------------------------------------===// // Load / store multiple Instructions. // @@ -1015,7 +1158,7 @@ def MOVi16 : AI1<0b1000, (outs GPR:$dst), (ins i32imm:$src), DPFrm, IIC_iMOVi, "movw", "\t$dst, $src", [(set GPR:$dst, imm0_65535:$src)]>, - Requires<[IsARM, HasV6T2]> { + Requires<[IsARM, HasV6T2]>, UnaryDP { let Inst{20} = 0; let Inst{25} = 1; } @@ -1215,6 +1358,33 @@ def : ARMPat<(add GPR:$src, so_imm_neg:$imm), // (mul X, 2^n+1) -> (add (X << n), X) // (mul X, 2^n-1) -> (rsb X, (X << n)) +// Saturating adds/subtracts -- for disassembly only + +// GPR:$dst = GPR:$a op GPR:$b +class AQI<bits<8> op27_20, bits<4> op7_4, string opc> + : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, IIC_iALUr, + opc, "\t$dst, $a, $b", + [/* For disassembly only; pattern left blank */]> { + let Inst{27-20} = op27_20; + let Inst{7-4} = op7_4; +} + +def QADD : AQI<0b00010000, 0b0101, "qadd">; +def QADD16 : AQI<0b01100010, 0b0001, "qadd16">; +def QADD8 : AQI<0b01100010, 0b1001, "qadd8">; +def QASX : AQI<0b01100010, 0b0011, "qasx">; +def QDADD : AQI<0b00010100, 0b0101, "qdadd">; +def QDSUB : AQI<0b00010110, 0b0101, "qdsub">; +def QSAX : AQI<0b01100010, 0b0101, "qsax">; +def QSUB : AQI<0b00010010, 0b0101, "qsub">; +def QSUB16 : AQI<0b01100010, 0b0111, "qsub16">; +def QSUB8 : AQI<0b01100010, 0b1111, "qsub8">; +def UQADD16 : AQI<0b01100110, 0b0001, "uqadd16">; +def UQADD8 : AQI<0b01100110, 0b1001, "uqadd8">; +def UQASX : AQI<0b01100110, 0b0011, "uqasx">; +def UQSAX : AQI<0b01100110, 0b0101, "uqsax">; +def UQSUB16 : AQI<0b01100110, 0b0111, "uqsub16">; +def UQSUB8 : AQI<0b01100110, 0b1111, "uqsub8">; //===----------------------------------------------------------------------===// // Bitwise Instructions. @@ -1241,11 +1411,14 @@ def BFC : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm), def MVNr : AsI1<0b1111, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr, "mvn", "\t$dst, $src", [(set GPR:$dst, (not GPR:$src))]>, UnaryDP { + let Inst{25} = 0; let Inst{11-4} = 0b00000000; } def MVNs : AsI1<0b1111, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm, IIC_iMOVsr, "mvn", "\t$dst, $src", - [(set GPR:$dst, (not so_reg:$src))]>, UnaryDP; + [(set GPR:$dst, (not so_reg:$src))]>, UnaryDP { + let Inst{25} = 0; +} let isReMaterializable = 1, isAsCheapAsAMove = 1 in def MVNi : AsI1<0b1111, (outs GPR:$dst), (ins so_imm:$imm), DPFrm, IIC_iMOVi, "mvn", "\t$dst, $imm", @@ -1442,7 +1615,39 @@ multiclass AI_smla<string opc, PatFrag opnode> { defm SMUL : AI_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>; defm SMLA : AI_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>; -// TODO: Halfword multiple accumulate long: SMLAL<x><y> +// Halfword multiply accumulate long: SMLAL<x><y> -- for disassembly only +def SMLALBB : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b), + IIC_iMAC64, "smlalbb", "\t$ldst, $hdst, $a, $b", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV5TE]> { + let Inst{5} = 0; + let Inst{6} = 0; +} + +def SMLALBT : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b), + IIC_iMAC64, "smlalbt", "\t$ldst, $hdst, $a, $b", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV5TE]> { + let Inst{5} = 0; + let Inst{6} = 1; +} + +def SMLALTB : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b), + IIC_iMAC64, "smlaltb", "\t$ldst, $hdst, $a, $b", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV5TE]> { + let Inst{5} = 1; + let Inst{6} = 0; +} + +def SMLALTT : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b), + IIC_iMAC64, "smlaltt", "\t$ldst, $hdst, $a, $b", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV5TE]> { + let Inst{5} = 1; + let Inst{6} = 1; +} + // TODO: Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD //===----------------------------------------------------------------------===// @@ -1773,6 +1978,27 @@ def STREXD : AIstrex<0b01, (outs GPR:$success), []>; } +// SWP/SWPB are deprecated in V6/V7 and for disassembly only. +let mayLoad = 1 in { +def SWP : AI<(outs GPR:$dst), (ins GPR:$src, GPR:$ptr), LdStExFrm, NoItinerary, + "swp", "\t$dst, $src, [$ptr]", + [/* For disassembly only; pattern left blank */]> { + let Inst{27-23} = 0b00010; + let Inst{22} = 0; // B = 0 + let Inst{21-20} = 0b00; + let Inst{7-4} = 0b1001; +} + +def SWPB : AI<(outs GPR:$dst), (ins GPR:$src, GPR:$ptr), LdStExFrm, NoItinerary, + "swpb", "\t$dst, $src, [$ptr]", + [/* For disassembly only; pattern left blank */]> { + let Inst{27-23} = 0b00010; + let Inst{22} = 1; // B = 1 + let Inst{21-20} = 0b00; + let Inst{7-4} = 0b1001; +} +} + //===----------------------------------------------------------------------===// // TLS Instructions // @@ -1797,21 +2023,22 @@ let isCall = 1, // except for our own input by listing the relevant registers in Defs. By // doing so, we also cause the prologue/epilogue code to actively preserve // all of the callee-saved resgisters, which is exactly what we want. -let Defs = +// A constant value is passed in $val, and we use the location as a scratch. +let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15, D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, D31 ] in { - def Int_eh_sjlj_setjmp : XI<(outs), (ins GPR:$src), + def Int_eh_sjlj_setjmp : XI<(outs), (ins GPR:$src, GPR:$val), AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, NoItinerary, "str\tsp, [$src, #+8] @ eh_setjmp begin\n\t" - "add\tr12, pc, #8\n\t" - "str\tr12, [$src, #+4]\n\t" + "add\t$val, pc, #8\n\t" + "str\t$val, [$src, #+4]\n\t" "mov\tr0, #0\n\t" "add\tpc, pc, #0\n\t" "mov\tr0, #1 @ eh_setjmp end", "", - [(set R0, (ARMeh_sjlj_setjmp GPR:$src))]>; + [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>; } //===----------------------------------------------------------------------===// @@ -1954,3 +2181,116 @@ include "ARMInstrVFP.td" // include "ARMInstrNEON.td" + +//===----------------------------------------------------------------------===// +// Coprocessor Instructions. For disassembly only. +// + +def CDP : ABI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1, + nohash_imm:$CRd, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2), + NoItinerary, "cdp", "\tp$cop, $opc1, cr$CRd, cr$CRn, cr$CRm, $opc2", + [/* For disassembly only; pattern left blank */]> { + let Inst{4} = 0; +} + +def CDP2 : ABXI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1, + nohash_imm:$CRd, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2), + NoItinerary, "cdp2\tp$cop, $opc1, cr$CRd, cr$CRn, cr$CRm, $opc2", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-28} = 0b1111; + let Inst{4} = 0; +} + +def MCR : ABI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1, + GPR:$Rt, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2), + NoItinerary, "mcr", "\tp$cop, $opc1, $Rt, cr$CRn, cr$CRm, $opc2", + [/* For disassembly only; pattern left blank */]> { + let Inst{20} = 0; + let Inst{4} = 1; +} + +def MCR2 : ABXI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1, + GPR:$Rt, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2), + NoItinerary, "mcr2\tp$cop, $opc1, $Rt, cr$CRn, cr$CRm, $opc2", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-28} = 0b1111; + let Inst{20} = 0; + let Inst{4} = 1; +} + +def MRC : ABI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1, + GPR:$Rt, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2), + NoItinerary, "mrc", "\tp$cop, $opc1, $Rt, cr$CRn, cr$CRm, $opc2", + [/* For disassembly only; pattern left blank */]> { + let Inst{20} = 1; + let Inst{4} = 1; +} + +def MRC2 : ABXI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1, + GPR:$Rt, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2), + NoItinerary, "mrc2\tp$cop, $opc1, $Rt, cr$CRn, cr$CRm, $opc2", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-28} = 0b1111; + let Inst{20} = 1; + let Inst{4} = 1; +} + +def MCRR : ABI<0b1100, (outs), (ins nohash_imm:$cop, i32imm:$opc, + GPR:$Rt, GPR:$Rt2, nohash_imm:$CRm), + NoItinerary, "mcrr", "\tp$cop, $opc, $Rt, $Rt2, cr$CRm", + [/* For disassembly only; pattern left blank */]> { + let Inst{23-20} = 0b0100; +} + +def MCRR2 : ABXI<0b1100, (outs), (ins nohash_imm:$cop, i32imm:$opc, + GPR:$Rt, GPR:$Rt2, nohash_imm:$CRm), + NoItinerary, "mcrr2\tp$cop, $opc, $Rt, $Rt2, cr$CRm", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-28} = 0b1111; + let Inst{23-20} = 0b0100; +} + +def MRRC : ABI<0b1100, (outs), (ins nohash_imm:$cop, i32imm:$opc, + GPR:$Rt, GPR:$Rt2, nohash_imm:$CRm), + NoItinerary, "mrrc", "\tp$cop, $opc, $Rt, $Rt2, cr$CRm", + [/* For disassembly only; pattern left blank */]> { + let Inst{23-20} = 0b0101; +} + +def MRRC2 : ABXI<0b1100, (outs), (ins nohash_imm:$cop, i32imm:$opc, + GPR:$Rt, GPR:$Rt2, nohash_imm:$CRm), + NoItinerary, "mrrc2\tp$cop, $opc, $Rt, $Rt2, cr$CRm", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-28} = 0b1111; + let Inst{23-20} = 0b0101; +} + +//===----------------------------------------------------------------------===// +// Move between special register and ARM core register -- for disassembly only +// + +def MRS : ABI<0b0001,(outs GPR:$dst),(ins), NoItinerary, "mrs", "\t$dst, cpsr", + [/* For disassembly only; pattern left blank */]> { + let Inst{23-20} = 0b0000; + let Inst{7-4} = 0b0000; +} + +def MRSsys : ABI<0b0001,(outs GPR:$dst),(ins), NoItinerary,"mrs","\t$dst, spsr", + [/* For disassembly only; pattern left blank */]> { + let Inst{23-20} = 0b0100; + let Inst{7-4} = 0b0000; +} + +// FIXME: mask is ignored for the time being. +def MSR : ABI<0b0001,(outs),(ins GPR:$src), NoItinerary, "mrs", "\tcpsr, $src", + [/* For disassembly only; pattern left blank */]> { + let Inst{23-20} = 0b0010; + let Inst{7-4} = 0b0000; +} + +// FIXME: mask is ignored for the time being. +def MSRsys : ABI<0b0001,(outs),(ins GPR:$src),NoItinerary,"mrs","\tspsr, $src", + [/* For disassembly only; pattern left blank */]> { + let Inst{23-20} = 0b0110; + let Inst{7-4} = 0b0000; +} diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index cd063bf..e2be7ba 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -2192,9 +2192,27 @@ def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), // VBIF : Vector Bitwise Insert if False // like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", +def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1, + (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), + IIC_VBINiD, "vbif", "$dst, $src2, $src3", "$src1 = $dst", + [/* For disassembly only; pattern left blank */]>; +def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1, + (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), + IIC_VBINiQ, "vbif", "$dst, $src2, $src3", "$src1 = $dst", + [/* For disassembly only; pattern left blank */]>; + // VBIT : Vector Bitwise Insert if True // like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", -// These are not yet implemented. The TwoAddress pass will not go looking +def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1, + (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), + IIC_VBINiD, "vbit", "$dst, $src2, $src3", "$src1 = $dst", + [/* For disassembly only; pattern left blank */]>; +def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, + (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), + IIC_VBINiQ, "vbit", "$dst, $src2, $src3", "$src1 = $dst", + [/* For disassembly only; pattern left blank */]>; + +// VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking // for equivalent operations with different register constraints; it just // inserts copies. diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 746caff..64142ad 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -132,6 +132,14 @@ PseudoInst<(outs), (ins i32imm:$amt), NoItinerary, [(ARMcallseq_start imm:$amt)]>, Requires<[IsThumb1Only]>; } +// The i32imm operand $val can be used by a debugger to store more information +// about the breakpoint. +def tBKPT : T1I<(outs), (ins i32imm:$val), NoItinerary, "bkpt\t$val", + [/* For disassembly only; pattern left blank */]>, + T1Encoding<0b101111> { + let Inst{9-8} = 0b10; +} + // For both thumb1 and thumb2. let isNotDuplicable = 1 in def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr, @@ -775,7 +783,7 @@ def tMOVCCr : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iCMOVr, "mov", "\t$dst, $rhs", []>, T1Special<{1,0,?,?}>; -def tMOVCCi : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iCMOVi, +def tMOVCCi : T1pIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iCMOVi, "mov", "\t$dst, $rhs", []>, T1General<{1,0,0,?,?}>; @@ -813,23 +821,20 @@ let isCall = 1, // except for our own input by listing the relevant registers in Defs. By // doing so, we also cause the prologue/epilogue code to actively preserve // all of the callee-saved resgisters, which is exactly what we want. +// The current SP is passed in $val, and we reuse the reg as a scratch. let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R12 ] in { - def tInt_eh_sjlj_setjmp : ThumbXI<(outs), (ins GPR:$src), + def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val), AddrModeNone, SizeSpecial, NoItinerary, - "mov\tr12, r1\t@ begin eh.setjmp\n" - "\tmov\tr1, sp\n" - "\tstr\tr1, [$src, #8]\n" - "\tadr\tr1, 0f\n" - "\tadds\tr1, #1\n" - "\tstr\tr1, [$src, #4]\n" - "\tmov\tr1, r12\n" + "str\t$val, [$src, #8]\t@ begin eh.setjmp\n" + "\tmov\t$val, pc\n" + "\tadds\t$val, #9\n" + "\tstr\t$val, [$src, #4]\n" "\tmovs\tr0, #0\n" "\tb\t1f\n" - ".align 2\n" - "0:\tmovs\tr0, #1\t@ end eh.setjmp\n" + "\tmovs\tr0, #1\t@ end eh.setjmp\n" "1:", "", - [(set R0, (ARMeh_sjlj_setjmp GPR:$src))]>; + [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>; } //===----------------------------------------------------------------------===// // Non-Instruction Patterns diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index c7591d2..55c7aa2 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -1232,7 +1232,16 @@ def t2UBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width), let Inst{15} = 0; } -// FIXME: A8.6.18 BFI - Bitfield insert (Encoding T1) +// A8.6.18 BFI - Bitfield insert (Encoding T1) +// Added for disassembler with the pattern field purposely left blank. +// FIXME: Utilize this instruction in codgen. +def t2BFI : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width), + IIC_iALUi, "bfi", "\t$dst, $src, $lsb, $width", []> { + let Inst{31-27} = 0b11110; + let Inst{25} = 1; + let Inst{24-20} = 0b10110; + let Inst{15} = 0; +} defm t2ORN : T2I_bin_irs<0b0011, "orn", BinOpFrag<(or node:$LHS, (not node:$RHS))>>; @@ -1808,22 +1817,23 @@ let isCall = 1, // except for our own input by listing the relevant registers in Defs. By // doing so, we also cause the prologue/epilogue code to actively preserve // all of the callee-saved resgisters, which is exactly what we want. -let Defs = +// The current SP is passed in $val, and we reuse the reg as a scratch. +let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15, D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, D31 ] in { - def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins GPR:$src), + def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val), AddrModeNone, SizeSpecial, NoItinerary, - "str.w\tsp, [$src, #+8] @ eh_setjmp begin\n" - "\tadr\tr12, 0f\n" - "\torr.w\tr12, r12, #1\n" - "\tstr.w\tr12, [$src, #+4]\n" + "str\t$val, [$src, #8]\t@ begin eh.setjmp\n" + "\tmov\t$val, pc\n" + "\tadds\t$val, #9\n" + "\tstr\t$val, [$src, #4]\n" "\tmovs\tr0, #0\n" "\tb\t1f\n" - "0:\tmovs\tr0, #1 @ eh_setjmp end\n" + "\tmovs\tr0, #1\t@ end eh.setjmp\n" "1:", "", - [(set R0, (ARMeh_sjlj_setjmp GPR:$src))]>; + [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>; } diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 5bfe89d..e516593 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -114,52 +114,56 @@ def VSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, // FP Binary Operations. // -def VADDD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b), +def VADDD : ADbI<0b11100, 0b11, 0, 0, (outs DPR:$dst), (ins DPR:$a, DPR:$b), IIC_fpALU64, "vadd", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fadd DPR:$a, DPR:$b))]>; -def VADDS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b), +def VADDS : ASbIn<0b11100, 0b11, 0, 0, (outs SPR:$dst), (ins SPR:$a, SPR:$b), IIC_fpALU32, "vadd", ".f32\t$dst, $a, $b", [(set SPR:$dst, (fadd SPR:$a, SPR:$b))]>; // These are encoded as unary instructions. let Defs = [FPSCR] in { -def VCMPED : ADuI<0b11101011, 0b0100, 0b1100, (outs), (ins DPR:$a, DPR:$b), +def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins DPR:$a, DPR:$b), IIC_fpCMP64, "vcmpe", ".f64\t$a, $b", [(arm_cmpfp DPR:$a, DPR:$b)]>; -def VCMPES : ASuI<0b11101011, 0b0100, 0b1100, (outs), (ins SPR:$a, SPR:$b), +def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins DPR:$a, DPR:$b), + IIC_fpCMP64, "vcmp", ".f64\t$a, $b", + [/* For disassembly only; pattern left blank */]>; + +def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins SPR:$a, SPR:$b), IIC_fpCMP32, "vcmpe", ".f32\t$a, $b", [(arm_cmpfp SPR:$a, SPR:$b)]>; + +def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins SPR:$a, SPR:$b), + IIC_fpCMP32, "vcmp", ".f32\t$a, $b", + [/* For disassembly only; pattern left blank */]>; } -def VDIVD : ADbI<0b11101000, (outs DPR:$dst), (ins DPR:$a, DPR:$b), +def VDIVD : ADbI<0b11101, 0b00, 0, 0, (outs DPR:$dst), (ins DPR:$a, DPR:$b), IIC_fpDIV64, "vdiv", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fdiv DPR:$a, DPR:$b))]>; -def VDIVS : ASbI<0b11101000, (outs SPR:$dst), (ins SPR:$a, SPR:$b), +def VDIVS : ASbI<0b11101, 0b00, 0, 0, (outs SPR:$dst), (ins SPR:$a, SPR:$b), IIC_fpDIV32, "vdiv", ".f32\t$dst, $a, $b", [(set SPR:$dst, (fdiv SPR:$a, SPR:$b))]>; -def VMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b), +def VMULD : ADbI<0b11100, 0b10, 0, 0, (outs DPR:$dst), (ins DPR:$a, DPR:$b), IIC_fpMUL64, "vmul", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fmul DPR:$a, DPR:$b))]>; -def VMULS : ASbIn<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b), +def VMULS : ASbIn<0b11100, 0b10, 0, 0, (outs SPR:$dst), (ins SPR:$a, SPR:$b), IIC_fpMUL32, "vmul", ".f32\t$dst, $a, $b", [(set SPR:$dst, (fmul SPR:$a, SPR:$b))]>; -def VNMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b), +def VNMULD : ADbI<0b11100, 0b10, 1, 0, (outs DPR:$dst), (ins DPR:$a, DPR:$b), IIC_fpMUL64, "vnmul", ".f64\t$dst, $a, $b", - [(set DPR:$dst, (fneg (fmul DPR:$a, DPR:$b)))]> { - let Inst{6} = 1; -} + [(set DPR:$dst, (fneg (fmul DPR:$a, DPR:$b)))]>; -def VNMULS : ASbI<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b), +def VNMULS : ASbI<0b11100, 0b10, 1, 0, (outs SPR:$dst), (ins SPR:$a, SPR:$b), IIC_fpMUL32, "vnmul", ".f32\t$dst, $a, $b", - [(set SPR:$dst, (fneg (fmul SPR:$a, SPR:$b)))]> { - let Inst{6} = 1; -} + [(set SPR:$dst, (fneg (fmul SPR:$a, SPR:$b)))]>; // Match reassociated forms only if not sign dependent rounding. def : Pat<(fmul (fneg DPR:$a), DPR:$b), @@ -168,41 +172,45 @@ def : Pat<(fmul (fneg SPR:$a), SPR:$b), (VNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>; -def VSUBD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b), +def VSUBD : ADbI<0b11100, 0b11, 1, 0, (outs DPR:$dst), (ins DPR:$a, DPR:$b), IIC_fpALU64, "vsub", ".f64\t$dst, $a, $b", - [(set DPR:$dst, (fsub DPR:$a, DPR:$b))]> { - let Inst{6} = 1; -} + [(set DPR:$dst, (fsub DPR:$a, DPR:$b))]>; -def VSUBS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b), +def VSUBS : ASbIn<0b11100, 0b11, 1, 0, (outs SPR:$dst), (ins SPR:$a, SPR:$b), IIC_fpALU32, "vsub", ".f32\t$dst, $a, $b", - [(set SPR:$dst, (fsub SPR:$a, SPR:$b))]> { - let Inst{6} = 1; -} + [(set SPR:$dst, (fsub SPR:$a, SPR:$b))]>; //===----------------------------------------------------------------------===// // FP Unary Operations. // -def VABSD : ADuI<0b11101011, 0b0000, 0b1100, (outs DPR:$dst), (ins DPR:$a), +def VABSD : ADuI<0b11101, 0b11, 0b0000, 0b11, 0, (outs DPR:$dst), (ins DPR:$a), IIC_fpUNA64, "vabs", ".f64\t$dst, $a", [(set DPR:$dst, (fabs DPR:$a))]>; -def VABSS : ASuIn<0b11101011, 0b0000, 0b1100, (outs SPR:$dst), (ins SPR:$a), +def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0,(outs SPR:$dst), (ins SPR:$a), IIC_fpUNA32, "vabs", ".f32\t$dst, $a", [(set SPR:$dst, (fabs SPR:$a))]>; let Defs = [FPSCR] in { -def VCMPEZD : ADuI<0b11101011, 0b0101, 0b1100, (outs), (ins DPR:$a), +def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins DPR:$a), IIC_fpCMP64, "vcmpe", ".f64\t$a, #0", [(arm_cmpfp0 DPR:$a)]>; -def VCMPEZS : ASuI<0b11101011, 0b0101, 0b1100, (outs), (ins SPR:$a), +def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins DPR:$a), + IIC_fpCMP64, "vcmp", ".f64\t$a, #0", + [/* For disassembly only; pattern left blank */]>; + +def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins SPR:$a), IIC_fpCMP32, "vcmpe", ".f32\t$a, #0", [(arm_cmpfp0 SPR:$a)]>; + +def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins SPR:$a), + IIC_fpCMP32, "vcmp", ".f32\t$a, #0", + [/* For disassembly only; pattern left blank */]>; } -def VCVTDS : ASuI<0b11101011, 0b0111, 0b1100, (outs DPR:$dst), (ins SPR:$a), +def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0, (outs DPR:$dst), (ins SPR:$a), IIC_fpCVTDS, "vcvt", ".f64.f32\t$dst, $a", [(set DPR:$dst, (fextend SPR:$a))]>; @@ -213,30 +221,49 @@ def VCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm, let Inst{27-23} = 0b11101; let Inst{21-16} = 0b110111; let Inst{11-8} = 0b1011; - let Inst{7-4} = 0b1100; + let Inst{7-6} = 0b11; + let Inst{4} = 0; } +// Between half-precision and single-precision. For disassembly only. + +def VCVTBSH : ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a), + /* FIXME */ IIC_fpCVTDS, "vcvtb", ".f32.f16\t$dst, $a", + [/* For disassembly only; pattern left blank */]>; + +def VCVTBHS : ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$dst), (ins SPR:$a), + /* FIXME */ IIC_fpCVTDS, "vcvtb", ".f16.f32\t$dst, $a", + [/* For disassembly only; pattern left blank */]>; + +def VCVTTSH : ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$dst), (ins SPR:$a), + /* FIXME */ IIC_fpCVTDS, "vcvtt", ".f32.f16\t$dst, $a", + [/* For disassembly only; pattern left blank */]>; + +def VCVTTHS : ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$dst), (ins SPR:$a), + /* FIXME */ IIC_fpCVTDS, "vcvtt", ".f16.f32\t$dst, $a", + [/* For disassembly only; pattern left blank */]>; + let neverHasSideEffects = 1 in { -def VMOVD: ADuI<0b11101011, 0b0000, 0b0100, (outs DPR:$dst), (ins DPR:$a), +def VMOVD: ADuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs DPR:$dst), (ins DPR:$a), IIC_fpUNA64, "vmov", ".f64\t$dst, $a", []>; -def VMOVS: ASuI<0b11101011, 0b0000, 0b0100, (outs SPR:$dst), (ins SPR:$a), +def VMOVS: ASuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs SPR:$dst), (ins SPR:$a), IIC_fpUNA32, "vmov", ".f32\t$dst, $a", []>; } // neverHasSideEffects -def VNEGD : ADuI<0b11101011, 0b0001, 0b0100, (outs DPR:$dst), (ins DPR:$a), +def VNEGD : ADuI<0b11101, 0b11, 0b0001, 0b01, 0, (outs DPR:$dst), (ins DPR:$a), IIC_fpUNA64, "vneg", ".f64\t$dst, $a", [(set DPR:$dst, (fneg DPR:$a))]>; -def VNEGS : ASuIn<0b11101011, 0b0001, 0b0100, (outs SPR:$dst), (ins SPR:$a), +def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0,(outs SPR:$dst), (ins SPR:$a), IIC_fpUNA32, "vneg", ".f32\t$dst, $a", [(set SPR:$dst, (fneg SPR:$a))]>; -def VSQRTD : ADuI<0b11101011, 0b0001, 0b1100, (outs DPR:$dst), (ins DPR:$a), +def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs DPR:$dst), (ins DPR:$a), IIC_fpSQRT64, "vsqrt", ".f64\t$dst, $a", [(set DPR:$dst, (fsqrt DPR:$a))]>; -def VSQRTS : ASuI<0b11101011, 0b0001, 0b1100, (outs SPR:$dst), (ins SPR:$a), +def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs SPR:$dst), (ins SPR:$a), IIC_fpSQRT32, "vsqrt", ".f32\t$dst, $a", [(set SPR:$dst, (fsqrt SPR:$a))]>; @@ -255,7 +282,16 @@ def VMOVSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src), def VMOVRRD : AVConv3I<0b11000101, 0b1011, (outs GPR:$wb, GPR:$dst2), (ins DPR:$src), IIC_VMOVDI, "vmov", "\t$wb, $dst2, $src", - [/* FIXME: Can't write pattern for multiple result instr*/]>; + [/* FIXME: Can't write pattern for multiple result instr*/]> { + let Inst{7-6} = 0b00; +} + +def VMOVRRS : AVConv3I<0b11000101, 0b1010, + (outs GPR:$wb, GPR:$dst2), (ins SPR:$src1, SPR:$src2), + IIC_VMOVDI, "vmov", "\t$wb, $dst2, $src1, $src2", + [/* For disassembly only; pattern left blank */]> { + let Inst{7-6} = 0b00; +} // FMDHR: GPR -> SPR // FMDLR: GPR -> SPR @@ -263,7 +299,16 @@ def VMOVRRD : AVConv3I<0b11000101, 0b1011, def VMOVDRR : AVConv5I<0b11000100, 0b1011, (outs DPR:$dst), (ins GPR:$src1, GPR:$src2), IIC_VMOVID, "vmov", "\t$dst, $src1, $src2", - [(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]>; + [(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]> { + let Inst{7-6} = 0b00; +} + +def VMOVSRR : AVConv5I<0b11000100, 0b1010, + (outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2), + IIC_VMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2", + [/* For disassembly only; pattern left blank */]> { + let Inst{7-6} = 0b00; +} // FMRDH: SPR -> GPR // FMRDL: SPR -> GPR @@ -277,137 +322,271 @@ def VMOVDRR : AVConv5I<0b11000100, 0b1011, // Int to FP: -def VSITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a), +def VSITOD : AVConv1I<0b11101, 0b11, 0b1000, 0b1011, + (outs DPR:$dst), (ins SPR:$a), IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a", [(set DPR:$dst, (arm_sitof SPR:$a))]> { - let Inst{7} = 1; + let Inst{7} = 1; // s32 } -def VSITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a), +def VSITOS : AVConv1In<0b11101, 0b11, 0b1000, 0b1010, + (outs SPR:$dst),(ins SPR:$a), IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a", [(set SPR:$dst, (arm_sitof SPR:$a))]> { - let Inst{7} = 1; + let Inst{7} = 1; // s32 } -def VUITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a), +def VUITOD : AVConv1I<0b11101, 0b11, 0b1000, 0b1011, + (outs DPR:$dst), (ins SPR:$a), IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a", - [(set DPR:$dst, (arm_uitof SPR:$a))]>; + [(set DPR:$dst, (arm_uitof SPR:$a))]> { + let Inst{7} = 0; // u32 +} -def VUITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a), +def VUITOS : AVConv1In<0b11101, 0b11, 0b1000, 0b1010, + (outs SPR:$dst), (ins SPR:$a), IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a", - [(set SPR:$dst, (arm_uitof SPR:$a))]>; + [(set SPR:$dst, (arm_uitof SPR:$a))]> { + let Inst{7} = 0; // u32 +} // FP to Int: // Always set Z bit in the instruction, i.e. "round towards zero" variants. -def VTOSIZD : AVConv1I<0b11101011, 0b1101, 0b1011, +def VTOSIZD : AVConv1I<0b11101, 0b11, 0b1101, 0b1011, (outs SPR:$dst), (ins DPR:$a), IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a", [(set SPR:$dst, (arm_ftosi DPR:$a))]> { let Inst{7} = 1; // Z bit } -def VTOSIZS : AVConv1In<0b11101011, 0b1101, 0b1010, +def VTOSIZS : AVConv1In<0b11101, 0b11, 0b1101, 0b1010, (outs SPR:$dst), (ins SPR:$a), IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a", [(set SPR:$dst, (arm_ftosi SPR:$a))]> { let Inst{7} = 1; // Z bit } -def VTOUIZD : AVConv1I<0b11101011, 0b1100, 0b1011, +def VTOUIZD : AVConv1I<0b11101, 0b11, 0b1100, 0b1011, (outs SPR:$dst), (ins DPR:$a), IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a", [(set SPR:$dst, (arm_ftoui DPR:$a))]> { let Inst{7} = 1; // Z bit } -def VTOUIZS : AVConv1In<0b11101011, 0b1100, 0b1010, +def VTOUIZS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010, (outs SPR:$dst), (ins SPR:$a), IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a", [(set SPR:$dst, (arm_ftoui SPR:$a))]> { let Inst{7} = 1; // Z bit } +// And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR. +// For disassembly only. + +def VTOSIRD : AVConv1I<0b11101, 0b11, 0b1101, 0b1011, + (outs SPR:$dst), (ins DPR:$a), + IIC_fpCVTDI, "vcvtr", ".s32.f64\t$dst, $a", + [/* For disassembly only; pattern left blank */]> { + let Inst{7} = 0; // Z bit +} + +def VTOSIRS : AVConv1In<0b11101, 0b11, 0b1101, 0b1010, + (outs SPR:$dst), (ins SPR:$a), + IIC_fpCVTSI, "vcvtr", ".s32.f32\t$dst, $a", + [/* For disassembly only; pattern left blank */]> { + let Inst{7} = 0; // Z bit +} + +def VTOUIRD : AVConv1I<0b11101, 0b11, 0b1100, 0b1011, + (outs SPR:$dst), (ins DPR:$a), + IIC_fpCVTDI, "vcvtr", ".u32.f64\t$dst, $a", + [/* For disassembly only; pattern left blank */]> { + let Inst{7} = 0; // Z bit +} + +def VTOUIRS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010, + (outs SPR:$dst), (ins SPR:$a), + IIC_fpCVTSI, "vcvtr", ".u32.f32\t$dst, $a", + [/* For disassembly only; pattern left blank */]> { + let Inst{7} = 0; // Z bit +} + +// Convert between floating-point and fixed-point +// Data type for fixed-point naming convention: +// S16 (U=0, sx=0) -> SH +// U16 (U=1, sx=0) -> UH +// S32 (U=0, sx=1) -> SL +// U32 (U=1, sx=1) -> UL + +let Constraints = "$a = $dst" in { + +// FP to Fixed-Point: + +def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0, + (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), + IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0, + (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), + IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1, + (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), + IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1, + (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), + IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0, + (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), + IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VTOUHD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 0, + (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), + IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VTOSLD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 1, + (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), + IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VTOULD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 1, + (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), + IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +// Fixed-Point to FP: + +def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0, + (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), + IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0, + (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), + IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1, + (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), + IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1, + (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), + IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0, + (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), + IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VUHTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 0, + (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), + IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VSLTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 1, + (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), + IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1, + (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), + IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +} // End of 'let Constraints = "$src = $dst" in' + //===----------------------------------------------------------------------===// // FP FMA Operations. // -def VMLAD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), +def VMLAD : ADbI<0b11100, 0b00, 0, 0, + (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), IIC_fpMAC64, "vmla", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b), DPR:$dstin))]>, RegConstraint<"$dstin = $dst">; -def VMLAS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), +def VMLAS : ASbIn<0b11100, 0b00, 0, 0, + (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), IIC_fpMAC32, "vmla", ".f32\t$dst, $a, $b", [(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>, RegConstraint<"$dstin = $dst">; -def VNMLSD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), +def VNMLSD : ADbI<0b11100, 0b01, 0, 0, + (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), IIC_fpMAC64, "vnmls", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fsub (fmul DPR:$a, DPR:$b), DPR:$dstin))]>, RegConstraint<"$dstin = $dst">; -def VNMLSS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), +def VNMLSS : ASbI<0b11100, 0b01, 0, 0, + (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), IIC_fpMAC32, "vnmls", ".f32\t$dst, $a, $b", [(set SPR:$dst, (fsub (fmul SPR:$a, SPR:$b), SPR:$dstin))]>, RegConstraint<"$dstin = $dst">; -def VMLSD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), +def VMLSD : ADbI<0b11100, 0b00, 1, 0, + (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), IIC_fpMAC64, "vmls", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fadd (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>, - RegConstraint<"$dstin = $dst"> { - let Inst{6} = 1; -} + RegConstraint<"$dstin = $dst">; -def VMLSS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), +def VMLSS : ASbIn<0b11100, 0b00, 1, 0, + (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), IIC_fpMAC32, "vmls", ".f32\t$dst, $a, $b", [(set SPR:$dst, (fadd (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>, - RegConstraint<"$dstin = $dst"> { - let Inst{6} = 1; -} + RegConstraint<"$dstin = $dst">; def : Pat<(fsub DPR:$dstin, (fmul DPR:$a, DPR:$b)), (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>; def : Pat<(fsub SPR:$dstin, (fmul SPR:$a, SPR:$b)), (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>; -def VNMLAD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), +def VNMLAD : ADbI<0b11100, 0b01, 1, 0, + (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), IIC_fpMAC64, "vnmla", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fsub (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>, - RegConstraint<"$dstin = $dst"> { - let Inst{6} = 1; -} + RegConstraint<"$dstin = $dst">; -def VNMLAS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), +def VNMLAS : ASbI<0b11100, 0b01, 1, 0, + (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), IIC_fpMAC32, "vnmla", ".f32\t$dst, $a, $b", [(set SPR:$dst, (fsub (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>, - RegConstraint<"$dstin = $dst"> { - let Inst{6} = 1; -} + RegConstraint<"$dstin = $dst">; //===----------------------------------------------------------------------===// // FP Conditional moves. // -def VMOVDcc : ADuI<0b11101011, 0b0000, 0b0100, +def VMOVDcc : ADuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs DPR:$dst), (ins DPR:$false, DPR:$true), IIC_fpUNA64, "vmov", ".f64\t$dst, $true", [/*(set DPR:$dst, (ARMcmov DPR:$false, DPR:$true, imm:$cc))*/]>, RegConstraint<"$false = $dst">; -def VMOVScc : ASuI<0b11101011, 0b0000, 0b0100, +def VMOVScc : ASuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs SPR:$dst), (ins SPR:$false, SPR:$true), IIC_fpUNA32, "vmov", ".f32\t$dst, $true", [/*(set SPR:$dst, (ARMcmov SPR:$false, SPR:$true, imm:$cc))*/]>, RegConstraint<"$false = $dst">; -def VNEGDcc : ADuI<0b11101011, 0b0001, 0b0100, +def VNEGDcc : ADuI<0b11101, 0b11, 0b0001, 0b01, 0, (outs DPR:$dst), (ins DPR:$false, DPR:$true), IIC_fpUNA64, "vneg", ".f64\t$dst, $true", [/*(set DPR:$dst, (ARMcneg DPR:$false, DPR:$true, imm:$cc))*/]>, RegConstraint<"$false = $dst">; -def VNEGScc : ASuI<0b11101011, 0b0001, 0b0100, +def VNEGScc : ASuI<0b11101, 0b11, 0b0001, 0b01, 0, (outs SPR:$dst), (ins SPR:$false, SPR:$true), IIC_fpUNA32, "vneg", ".f32\t$dst, $true", [/*(set SPR:$dst, (ARMcneg SPR:$false, SPR:$true, imm:$cc))*/]>, @@ -432,6 +611,31 @@ def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs", let Inst{4} = 1; } +// FPSCR <-> GPR (for disassembly only) + +let Uses = [FPSCR] in { +def VMRS : VFPAI<(outs GPR:$dst), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs", + "\t$dst, fpscr", + [/* For disassembly only; pattern left blank */]> { + let Inst{27-20} = 0b11101111; + let Inst{19-16} = 0b0001; + let Inst{11-8} = 0b1010; + let Inst{7} = 0; + let Inst{4} = 1; +} +} + +let Defs = [FPSCR] in { +def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT, "vmsr", + "\tfpscr, $src", + [/* For disassembly only; pattern left blank */]> { + let Inst{27-20} = 0b11101110; + let Inst{19-16} = 0b0001; + let Inst{11-8} = 0b1010; + let Inst{7} = 0; + let Inst{4} = 1; +} +} // Materialize FP immediates. VFP3 only. let isReMaterializable = 1 in { diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index b78b95b..4e2d181 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -350,7 +350,8 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, : ARMRegisterInfo::getRegisterNumbering(Reg); // AM4 - register numbers in ascending order. // AM5 - consecutive register numbers in ascending order. - if (NewOffset == Offset + (int)Size && + if (Reg != ARM::SP && + NewOffset == Offset + (int)Size && ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) { Offset += Size; PRegNum = RegNum; diff --git a/lib/Target/ARM/ARMMCAsmInfo.cpp b/lib/Target/ARM/ARMMCAsmInfo.cpp index 3dd87c0..ccd6add 100644 --- a/lib/Target/ARM/ARMMCAsmInfo.cpp +++ b/lib/Target/ARM/ARMMCAsmInfo.cpp @@ -44,7 +44,6 @@ ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin() { AsmTransCBE = arm_asm_table; Data64bitsDirective = 0; CommentString = "@"; - COMMDirectiveTakesAlignment = false; SupportsDebugInformation = true; // Exceptions handling @@ -53,17 +52,16 @@ ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin() { } ARMELFMCAsmInfo::ARMELFMCAsmInfo() { + // ".comm align is in bytes but .align is pow-2." AlignmentIsInBytes = false; + Data64bitsDirective = 0; CommentString = "@"; - COMMDirectiveTakesAlignment = false; - - NeedsSet = false; + HasLEB128 = true; AbsoluteDebugSectionOffsets = true; PrivateGlobalPrefix = ".L"; WeakRefDirective = "\t.weak\t"; - SetDirective = "\t.set\t"; HasLCOMMDirective = true; DwarfRequiresFrameSection = false; diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h index 2176b27..c998ede 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -35,11 +35,6 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// 'isThumb'. bool hasThumb2; - /// Align - required alignment. ARM functions and Thumb functions with - /// constant pools require 4-byte alignment; other Thumb functions - /// require only 2-byte alignment. - unsigned Align; - /// VarArgsRegSaveSize - Size of the register save area for vararg functions. /// unsigned VarArgsRegSaveSize; @@ -94,7 +89,6 @@ public: ARMFunctionInfo() : isThumb(false), hasThumb2(false), - Align(2U), VarArgsRegSaveSize(0), HasStackFrame(false), LRSpilledForFarJump(false), FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), @@ -105,7 +99,6 @@ public: explicit ARMFunctionInfo(MachineFunction &MF) : isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()), hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()), - Align(isThumb ? 1U : 2U), VarArgsRegSaveSize(0), HasStackFrame(false), LRSpilledForFarJump(false), FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), @@ -118,9 +111,6 @@ public: bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; } bool isThumb2Function() const { return isThumb && hasThumb2; } - unsigned getAlign() const { return Align; } - void setAlign(unsigned a) { Align = a; } - unsigned getVarArgsRegSaveSize() const { return VarArgsRegSaveSize; } void setVarArgsRegSaveSize(unsigned s) { VarArgsRegSaveSize = s; } diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index d393e8d..0d4200c 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -123,8 +123,8 @@ def FPSCR : ARMReg<1, "fpscr">; // r10 == Stack Limit // def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, - R7, R8, R9, R10, R12, R11, - LR, SP, PC]> { + R7, R8, R9, R10, R11, R12, + SP, LR, PC]> { let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 71f3883..426862c 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -122,9 +122,9 @@ ARMSubtarget::GVIsIndirectSymbol(GlobalValue *GV, Reloc::Model RelocM) const { if (RelocM == Reloc::Static) return false; - // GV with ghost linkage (in JIT lazy compilation mode) do not require an - // extra load from stub. - bool isDecl = GV->isDeclaration() && !GV->hasNotBeenReadFromBitcode(); + // Materializable GVs (in JIT lazy compilation mode) do not require an extra + // load from stub. + bool isDecl = GV->isDeclaration() && !GV->isMaterializable(); if (!isTargetDarwin()) { // Extra load is needed for all externally visible. diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 4d20a5c..7233f5c 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -133,18 +133,6 @@ bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM, bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - MachineCodeEmitter &MCE) { - // FIXME: Move this to TargetJITInfo! - if (DefRelocModel == Reloc::Default) - setRelocationModel(Reloc::Static); - - // Machine code emitter pass for ARM. - PM.add(createARMCodeEmitterPass(*this, MCE)); - return false; -} - -bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, JITCodeEmitter &JCE) { // FIXME: Move this to TargetJITInfo! if (DefRelocModel == Reloc::Default) @@ -154,40 +142,3 @@ bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM, PM.add(createARMJITCodeEmitterPass(*this, JCE)); return false; } - -bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - ObjectCodeEmitter &OCE) { - // FIXME: Move this to TargetJITInfo! - if (DefRelocModel == Reloc::Default) - setRelocationModel(Reloc::Static); - - // Machine code emitter pass for ARM. - PM.add(createARMObjectCodeEmitterPass(*this, OCE)); - return false; -} - -bool ARMBaseTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - MachineCodeEmitter &MCE) { - // Machine code emitter pass for ARM. - PM.add(createARMCodeEmitterPass(*this, MCE)); - return false; -} - -bool ARMBaseTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - JITCodeEmitter &JCE) { - // Machine code emitter pass for ARM. - PM.add(createARMJITCodeEmitterPass(*this, JCE)); - return false; -} - -bool ARMBaseTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - ObjectCodeEmitter &OCE) { - // Machine code emitter pass for ARM. - PM.add(createARMObjectCodeEmitterPass(*this, OCE)); - return false; -} - diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index dd9542e..88e67e3 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -53,20 +53,7 @@ public: virtual bool addPreSched2(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - MachineCodeEmitter &MCE); - virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, JITCodeEmitter &MCE); - virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - ObjectCodeEmitter &OCE); - virtual bool addSimpleCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - MachineCodeEmitter &MCE); - virtual bool addSimpleCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - JITCodeEmitter &MCE); - virtual bool addSimpleCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - ObjectCodeEmitter &OCE); }; /// ARMTargetMachine - ARM target machine. diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h index 9703403..a488c0a 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.h +++ b/lib/Target/ARM/ARMTargetObjectFile.h @@ -10,7 +10,7 @@ #ifndef LLVM_TARGET_ARM_TARGETOBJECTFILE_H #define LLVM_TARGET_ARM_TARGETOBJECTFILE_H -#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/MC/MCSectionELF.h" namespace llvm { @@ -24,7 +24,7 @@ namespace llvm { if (TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI()) { StaticCtorSection = - getELFSection(".init_array", MCSectionELF::SHT_INIT_ARRAY, + getELFSection(".init_array", MCSectionELF::SHT_INIT_ARRAY, MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, SectionKind::getDataRel()); StaticDtorSection = diff --git a/lib/Target/ARM/AsmParser/Makefile b/lib/Target/ARM/AsmParser/Makefile index 4fb8564..97e5612 100644 --- a/lib/Target/ARM/AsmParser/Makefile +++ b/lib/Target/ARM/AsmParser/Makefile @@ -8,7 +8,6 @@ ##===----------------------------------------------------------------------===## LEVEL = ../../../.. LIBRARYNAME = LLVMARMAsmParser -CXXFLAGS = -fno-rtti # Hack: we need to include 'main' ARM target directory to grab private headers CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index e1f386e..f60cc33 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -30,6 +30,7 @@ #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInst.h" @@ -37,13 +38,11 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSet.h" #include "llvm/Support/CommandLine.h" @@ -53,8 +52,6 @@ #include <cctype> using namespace llvm; -STATISTIC(EmittedInsts, "Number of machine instrs printed"); - static cl::opt<bool> EnableMCInst("enable-arm-mcinst-printer", cl::Hidden, cl::desc("enable experimental asmprinter gunk in the arm backend")); @@ -76,8 +73,9 @@ namespace { public: explicit ARMAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, - const MCAsmInfo *T, bool V) - : AsmPrinter(O, TM, T, V), AFI(NULL), MCP(NULL) { + MCContext &Ctx, MCStreamer &Streamer, + const MCAsmInfo *T) + : AsmPrinter(O, TM, Ctx, Streamer, T), AFI(NULL), MCP(NULL) { Subtarget = &TM.getSubtarget<ARMSubtarget>(); } @@ -85,10 +83,6 @@ namespace { return "ARM Assembly Printer"; } - void printMCInst(const MCInst *MI) { - ARMInstPrinter(O, *MAI, VerboseAsm).printInstruction(MI); - } - void printInstructionThroughMCStreamer(const MachineInstr *MI); @@ -162,11 +156,18 @@ namespace { void printInstruction(const MachineInstr *MI); // autogenerated. static const char *getRegisterName(unsigned RegNo); - void printMachineInstruction(const MachineInstr *MI); + virtual void EmitInstruction(const MachineInstr *MI); bool runOnMachineFunction(MachineFunction &F); + + virtual void EmitConstantPool() {} // we emit constant pools customly! + virtual void EmitFunctionEntryLabel(); void EmitStartOfAsmFile(Module &M); void EmitEndOfAsmFile(Module &M); + MCSymbol *GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2, + const MachineBasicBlock *MBB) const; + MCSymbol *GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const; + /// EmitMachineConstantPoolValue - Print a machine constantpool value to /// the .s file. virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { @@ -199,7 +200,7 @@ namespace { MachineModuleInfoMachO &MMIMachO = MMI->getObjFileInfo<MachineModuleInfoMachO>(); - const MCSymbol *&StubSym = + MCSymbol *&StubSym = GV->hasHiddenVisibility() ? MMIMachO.getHiddenGVStubEntry(Sym) : MMIMachO.getGVStubEntry(Sym); if (StubSym == 0) @@ -219,7 +220,7 @@ namespace { O << "-."; O << ')'; } - O << '\n'; + OutStreamer.AddBlankLine(); } void getAnalysisUsage(AnalysisUsage &AU) const { @@ -233,97 +234,26 @@ namespace { #include "ARMGenAsmWriter.inc" -/// runOnMachineFunction - This uses the printInstruction() -/// method to print assembly for each instruction. -/// -bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { - this->MF = &MF; - - AFI = MF.getInfo<ARMFunctionInfo>(); - MCP = MF.getConstantPool(); - - SetupMachineFunction(MF); - O << "\n"; - - // NOTE: we don't print out constant pools here, they are handled as - // instructions. - - O << '\n'; - - // Print out labels for the function. - const Function *F = MF.getFunction(); - OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM)); - - switch (F->getLinkage()) { - default: llvm_unreachable("Unknown linkage type!"); - case Function::PrivateLinkage: - case Function::InternalLinkage: - break; - case Function::ExternalLinkage: - O << "\t.globl\t" << *CurrentFnSym << "\n"; - break; - case Function::LinkerPrivateLinkage: - case Function::WeakAnyLinkage: - case Function::WeakODRLinkage: - case Function::LinkOnceAnyLinkage: - case Function::LinkOnceODRLinkage: - if (Subtarget->isTargetDarwin()) { - O << "\t.globl\t" << *CurrentFnSym << "\n"; - O << "\t.weak_definition\t" << *CurrentFnSym << "\n"; - } else { - O << MAI->getWeakRefDirective() << *CurrentFnSym << "\n"; - } - break; - } - - printVisibility(CurrentFnSym, F->getVisibility()); - - unsigned FnAlign = 1 << MF.getAlignment(); // MF alignment is log2. +void ARMAsmPrinter::EmitFunctionEntryLabel() { if (AFI->isThumbFunction()) { - EmitAlignment(FnAlign, F, AFI->getAlign()); O << "\t.code\t16\n"; O << "\t.thumb_func"; if (Subtarget->isTargetDarwin()) - O << "\t" << *CurrentFnSym; - O << "\n"; - } else { - EmitAlignment(FnAlign, F); - } - - O << *CurrentFnSym << ":\n"; - // Emit pre-function debug information. - DW->BeginFunction(&MF); - - if (Subtarget->isTargetDarwin()) { - // If the function is empty, then we need to emit *something*. Otherwise, - // the function's label might be associated with something that it wasn't - // meant to be associated with. We emit a noop in this situation. - MachineFunction::iterator I = MF.begin(); - - if (++I == MF.end() && MF.front().empty()) - O << "\tnop\n"; - } - - // Print out code for the function. - for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); - I != E; ++I) { - // Print a label for the basic block. - if (I != MF.begin()) - EmitBasicBlockStart(I); - - // Print the assembly for the instruction. - for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); - II != E; ++II) - printMachineInstruction(II); + O << '\t' << *CurrentFnSym; + O << '\n'; } + + OutStreamer.EmitLabel(CurrentFnSym); +} - if (MAI->hasDotTypeDotSizeDirective()) - O << "\t.size " << *CurrentFnSym << ", .-" << *CurrentFnSym << "\n"; - - // Emit post-function debug information. - DW->EndFunction(&MF); +/// runOnMachineFunction - This uses the printInstruction() +/// method to print assembly for each instruction. +/// +bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { + AFI = MF.getInfo<ARMFunctionInfo>(); + MCP = MF.getConstantPool(); - return false; + return AsmPrinter::runOnMachineFunction(MF); } void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, @@ -367,7 +297,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, break; } case MachineOperand::MO_MachineBasicBlock: - O << *GetMBBSymbol(MO.getMBB()->getNumber()); + O << *MO.getMBB()->getSymbol(OutContext); return; case MachineOperand::MO_GlobalAddress: { bool isCallOp = Modifier && !strcmp(Modifier, "call"); @@ -889,7 +819,7 @@ void ARMAsmPrinter::printCPInstOperand(const MachineInstr *MI, int OpNum, // data itself. if (!strcmp(Modifier, "label")) { unsigned ID = MI->getOperand(OpNum).getImm(); - O << *GetCPISymbol(ID) << ":\n"; + OutStreamer.EmitLabel(GetCPISymbol(ID)); } else { assert(!strcmp(Modifier, "cpentry") && "Unknown modifier for CPE"); unsigned CPI = MI->getOperand(OpNum).getIndex(); @@ -904,6 +834,24 @@ void ARMAsmPrinter::printCPInstOperand(const MachineInstr *MI, int OpNum, } } +MCSymbol *ARMAsmPrinter:: +GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2, + const MachineBasicBlock *MBB) const { + SmallString<60> Name; + raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() + << getFunctionNumber() << '_' << uid << '_' << uid2 + << "_set_" << MBB->getNumber(); + return OutContext.GetOrCreateSymbol(Name.str()); +} + +MCSymbol *ARMAsmPrinter:: +GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const { + SmallString<60> Name; + raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "JTI" + << getFunctionNumber() << '_' << uid << '_' << uid2; + return OutContext.GetOrCreateSymbol(Name.str()); +} + void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNum) { assert(!Subtarget->isThumb2() && "Thumb2 should use double-jump jumptables!"); @@ -911,36 +859,34 @@ void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNum) { const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id unsigned JTI = MO1.getIndex(); - O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() - << '_' << JTI << '_' << MO2.getImm() << ":\n"; + MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm()); + OutStreamer.EmitLabel(JTISymbol); const char *JTEntryDirective = MAI->getData32bitsDirective(); - const MachineFunction *MF = MI->getParent()->getParent(); const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; - bool UseSet= MAI->getSetDirective() && TM.getRelocationModel() == Reloc::PIC_; + bool UseSet= MAI->hasSetDirective() && TM.getRelocationModel() == Reloc::PIC_; SmallPtrSet<MachineBasicBlock*, 8> JTSets; for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) { MachineBasicBlock *MBB = JTBBs[i]; bool isNew = JTSets.insert(MBB); - if (UseSet && isNew) - printPICJumpTableSetLabel(JTI, MO2.getImm(), MBB); + if (UseSet && isNew) { + O << "\t.set\t" + << *GetARMSetPICJumpTableLabel2(JTI, MO2.getImm(), MBB) << ',' + << *MBB->getSymbol(OutContext) << '-' << *JTISymbol << '\n'; + } O << JTEntryDirective << ' '; if (UseSet) - O << MAI->getPrivateGlobalPrefix() << getFunctionNumber() - << '_' << JTI << '_' << MO2.getImm() - << "_set_" << MBB->getNumber(); - else if (TM.getRelocationModel() == Reloc::PIC_) { - O << *GetMBBSymbol(MBB->getNumber()) - << '-' << MAI->getPrivateGlobalPrefix() << "JTI" - << getFunctionNumber() << '_' << JTI << '_' << MO2.getImm(); - } else { - O << *GetMBBSymbol(MBB->getNumber()); - } + O << *GetARMSetPICJumpTableLabel2(JTI, MO2.getImm(), MBB); + else if (TM.getRelocationModel() == Reloc::PIC_) + O << *MBB->getSymbol(OutContext) << '-' << *JTISymbol; + else + O << *MBB->getSymbol(OutContext); + if (i != e-1) O << '\n'; } @@ -950,10 +896,10 @@ void ARMAsmPrinter::printJT2BlockOperand(const MachineInstr *MI, int OpNum) { const MachineOperand &MO1 = MI->getOperand(OpNum); const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id unsigned JTI = MO1.getIndex(); - O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() - << '_' << JTI << '_' << MO2.getImm() << ":\n"; + + MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm()); + OutStreamer.EmitLabel(JTISymbol); - const MachineFunction *MF = MI->getParent()->getParent(); const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; @@ -969,13 +915,12 @@ void ARMAsmPrinter::printJT2BlockOperand(const MachineInstr *MI, int OpNum) { O << MAI->getData8bitsDirective(); else if (HalfWordOffset) O << MAI->getData16bitsDirective(); - if (ByteOffset || HalfWordOffset) { - O << '(' << *GetMBBSymbol(MBB->getNumber()); - O << "-" << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() - << '_' << JTI << '_' << MO2.getImm() << ")/2"; - } else { - O << "\tb.w " << *GetMBBSymbol(MBB->getNumber()); - } + + if (ByteOffset || HalfWordOffset) + O << '(' << *MBB->getSymbol(OutContext) << "-" << *JTISymbol << ")/2"; + else + O << "\tb.w " << *MBB->getSymbol(OutContext); + if (i != e-1) O << '\n'; } @@ -1076,12 +1021,7 @@ bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, return false; } -void ARMAsmPrinter::printMachineInstruction(const MachineInstr *MI) { - ++EmittedInsts; - - // Call the autogenerated instruction printer routines. - processDebugLoc(MI, true); - +void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { if (EnableMCInst) { printInstructionThroughMCStreamer(MI); } else { @@ -1090,12 +1030,8 @@ void ARMAsmPrinter::printMachineInstruction(const MachineInstr *MI) { EmitAlignment(2); printInstruction(MI); + OutStreamer.AddBlankLine(); } - - if (VerboseAsm) - EmitComments(*MI); - O << '\n'; - processDebugLoc(MI, false); } void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { @@ -1215,20 +1151,6 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { case ARM::t2MOVi32imm: assert(0 && "Should be lowered by thumb2it pass"); default: break; - case TargetInstrInfo::DBG_LABEL: - case TargetInstrInfo::EH_LABEL: - case TargetInstrInfo::GC_LABEL: - printLabel(MI); - return; - case TargetInstrInfo::KILL: - printKill(MI); - return; - case TargetInstrInfo::INLINEASM: - printInlineAsm(MI); - return; - case TargetInstrInfo::IMPLICIT_DEF: - printImplicitDef(MI); - return; case ARM::PICADD: { // FIXME: Remove asm string from td file. // This is a pseudo op for a label + instruction sequence, which looks like: // LPC0: @@ -1250,7 +1172,7 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); AddInst.addOperand(MCOperand::CreateReg(ARM::PC)); AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg())); - printMCInst(&AddInst); + OutStreamer.EmitInstruction(AddInst); return; } case ARM::CONSTPOOL_ENTRY: { // FIXME: Remove asm string from td file. @@ -1291,8 +1213,7 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg())); TmpInst.addOperand(MCOperand::CreateReg(0)); // cc_out - printMCInst(&TmpInst); - O << '\n'; + OutStreamer.EmitInstruction(TmpInst); } { @@ -1306,7 +1227,7 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg())); TmpInst.addOperand(MCOperand::CreateReg(0)); // cc_out - printMCInst(&TmpInst); + OutStreamer.EmitInstruction(TmpInst); } return; } @@ -1325,8 +1246,7 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm())); TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg())); - printMCInst(&TmpInst); - O << '\n'; + OutStreamer.EmitInstruction(TmpInst); } { @@ -1340,7 +1260,7 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm())); TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg())); - printMCInst(&TmpInst); + OutStreamer.EmitInstruction(TmpInst); } return; @@ -1349,8 +1269,7 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { MCInst TmpInst; MCInstLowering.Lower(MI, TmpInst); - - printMCInst(&TmpInst); + OutStreamer.EmitInstruction(TmpInst); } //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp index 6885ecb..d7d8e09 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp @@ -24,7 +24,6 @@ using namespace llvm; // Include the auto-generated portion of the assembly writer. #define MachineInstr MCInst #define ARMAsmPrinter ARMInstPrinter // FIXME: REMOVE. -#define NO_ASM_WRITER_BOILERPLATE #include "ARMGenAsmWriter.inc" #undef MachineInstr #undef ARMAsmPrinter @@ -353,6 +352,5 @@ void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum) { } void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum) { - // FIXME: remove this. - abort(); + O << "#" << MI->getOperand(OpNum).getImm() * 4; } diff --git a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp index f843ee2..1b2dd48 100644 --- a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp @@ -135,7 +135,7 @@ void ARMMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { break; case MachineOperand::MO_MachineBasicBlock: MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( - Printer.GetMBBSymbol(MO.getMBB()->getNumber()), Ctx)); + MO.getMBB()->getSymbol(Ctx), Ctx)); break; case MachineOperand::MO_GlobalAddress: MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO)); diff --git a/lib/Target/ARM/AsmPrinter/Makefile b/lib/Target/ARM/AsmPrinter/Makefile index 93b8fc9..208becc 100644 --- a/lib/Target/ARM/AsmPrinter/Makefile +++ b/lib/Target/ARM/AsmPrinter/Makefile @@ -8,7 +8,6 @@ ##===----------------------------------------------------------------------===## LEVEL = ../../../.. LIBRARYNAME = LLVMARMAsmPrinter -CXXFLAGS = -fno-rtti # Hack: we need to include 'main' arm target directory to grab private headers CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile index b766a86..a8dd38c 100644 --- a/lib/Target/ARM/Makefile +++ b/lib/Target/ARM/Makefile @@ -10,7 +10,6 @@ LEVEL = ../../.. LIBRARYNAME = LLVMARMCodeGen TARGET = ARM -CXXFLAGS = -fno-rtti # Make sure that tblgen is run, first thing. BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \ diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt index a6f26a5..9efb5a1 100644 --- a/lib/Target/ARM/README.txt +++ b/lib/Target/ARM/README.txt @@ -71,26 +71,6 @@ were disabled due to badness with the ARM carry flag on subtracts. //===---------------------------------------------------------------------===// -We currently compile abs: -int foo(int p) { return p < 0 ? -p : p; } - -into: - -_foo: - rsb r1, r0, #0 - cmn r0, #1 - movgt r1, r0 - mov r0, r1 - bx lr - -This is very, uh, literal. This could be a 3 operation sequence: - t = (p sra 31); - res = (p xor t)-t - -Which would be better. This occurs in png decode. - -//===---------------------------------------------------------------------===// - More load / store optimizations: 1) Better representation for block transfer? This is from Olden/power: diff --git a/lib/Target/ARM/TargetInfo/Makefile b/lib/Target/ARM/TargetInfo/Makefile index 589dbe5..6292ab1 100644 --- a/lib/Target/ARM/TargetInfo/Makefile +++ b/lib/Target/ARM/TargetInfo/Makefile @@ -8,7 +8,6 @@ ##===----------------------------------------------------------------------===## LEVEL = ../../../.. LIBRARYNAME = LLVMARMInfo -CXXFLAGS = -fno-rtti # Hack: we need to include 'main' target directory to grab private headers CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 387edaf..20f13f1 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -382,8 +382,8 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal); } else { - // AddrMode4 cannot handle any offset. - if (AddrMode == ARMII::AddrMode4) + // AddrMode4 and AddrMode6 cannot handle any offset. + if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6) return false; // AddrModeT2_so cannot handle any offset. If there is no offset @@ -418,15 +418,12 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, NewOpc = positiveOffsetOpcode(Opcode); NumBits = 12; } - } else { - // VFP and NEON address modes. - int InstrOffs = 0; - if (AddrMode == ARMII::AddrMode5) { - const MachineOperand &OffOp = MI.getOperand(FrameRegIdx+1); - InstrOffs = ARM_AM::getAM5Offset(OffOp.getImm()); - if (ARM_AM::getAM5Op(OffOp.getImm()) == ARM_AM::sub) - InstrOffs *= -1; - } + } else if (AddrMode == ARMII::AddrMode5) { + // VFP address mode. + const MachineOperand &OffOp = MI.getOperand(FrameRegIdx+1); + int InstrOffs = ARM_AM::getAM5Offset(OffOp.getImm()); + if (ARM_AM::getAM5Op(OffOp.getImm()) == ARM_AM::sub) + InstrOffs *= -1; NumBits = 8; Scale = 4; Offset += InstrOffs * 4; @@ -435,6 +432,8 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, Offset = -Offset; isSub = true; } + } else { + llvm_unreachable("Unsupported addressing mode!"); } if (NewOpc != Opcode) diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index 95288bf..5086eff 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -83,7 +83,7 @@ namespace { // FIXME: Do we need the 16-bit 'S' variant? { ARM::t2MOVr,ARM::tMOVgpr2gpr,0, 0, 0, 0, 0, 1,0, 0 }, { ARM::t2MOVCCr,0, ARM::tMOVCCr, 0, 0, 0, 0, 0,1, 0 }, - { ARM::t2MOVCCi,0, ARM::tMOVCCi, 0, 8, 0, 0, 0,1, 0 }, + { ARM::t2MOVCCi,0, ARM::tMOVCCi, 0, 8, 0, 1, 0,1, 0 }, { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 0 }, { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0 }, { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 0 }, diff --git a/lib/Target/Alpha/Alpha.h b/lib/Target/Alpha/Alpha.h index b8a0645..5cf4866 100644 --- a/lib/Target/Alpha/Alpha.h +++ b/lib/Target/Alpha/Alpha.h @@ -21,18 +21,12 @@ namespace llvm { class AlphaTargetMachine; class FunctionPass; - class MachineCodeEmitter; - class ObjectCodeEmitter; class formatted_raw_ostream; FunctionPass *createAlphaISelDag(AlphaTargetMachine &TM); FunctionPass *createAlphaPatternInstructionSelector(TargetMachine &TM); - FunctionPass *createAlphaCodeEmitterPass(AlphaTargetMachine &TM, - MachineCodeEmitter &MCE); FunctionPass *createAlphaJITCodeEmitterPass(AlphaTargetMachine &TM, JITCodeEmitter &JCE); - FunctionPass *createAlphaObjectCodeEmitterPass(AlphaTargetMachine &TM, - ObjectCodeEmitter &OCE); FunctionPass *createAlphaLLRPPass(AlphaTargetMachine &tm); FunctionPass *createAlphaBranchSelectionPass(); diff --git a/lib/Target/Alpha/AlphaCodeEmitter.cpp b/lib/Target/Alpha/AlphaCodeEmitter.cpp index b090f0d..eb5e429 100644 --- a/lib/Target/Alpha/AlphaCodeEmitter.cpp +++ b/lib/Target/Alpha/AlphaCodeEmitter.cpp @@ -17,9 +17,7 @@ #include "AlphaRelocations.h" #include "Alpha.h" #include "llvm/PassManager.h" -#include "llvm/CodeGen/MachineCodeEmitter.h" #include "llvm/CodeGen/JITCodeEmitter.h" -#include "llvm/CodeGen/ObjectCodeEmitter.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/Passes.h" @@ -30,11 +28,14 @@ using namespace llvm; namespace { - - class AlphaCodeEmitter { - MachineCodeEmitter &MCE; + class AlphaCodeEmitter : public MachineFunctionPass { + JITCodeEmitter &MCE; + const AlphaInstrInfo *II; public: - AlphaCodeEmitter(MachineCodeEmitter &mce) : MCE(mce) {} + static char ID; + + AlphaCodeEmitter(JITCodeEmitter &mce) : MachineFunctionPass(&ID), + MCE(mce) {} /// getBinaryCodeForInstr - This function, generated by the /// CodeEmitterGenerator using TableGen, produces the binary encoding for @@ -46,57 +47,30 @@ namespace { unsigned getMachineOpValue(const MachineInstr &MI, const MachineOperand &MO); - }; - - template <class CodeEmitter> - class Emitter : public MachineFunctionPass, public AlphaCodeEmitter - { - const AlphaInstrInfo *II; - TargetMachine &TM; - CodeEmitter &MCE; - - public: - static char ID; - explicit Emitter(TargetMachine &tm, CodeEmitter &mce) - : MachineFunctionPass(&ID), AlphaCodeEmitter(mce), - II(0), TM(tm), MCE(mce) {} - Emitter(TargetMachine &tm, CodeEmitter &mce, const AlphaInstrInfo& ii) - : MachineFunctionPass(&ID), AlphaCodeEmitter(mce), - II(&ii), TM(tm), MCE(mce) {} - + bool runOnMachineFunction(MachineFunction &MF); - + virtual const char *getPassName() const { return "Alpha Machine Code Emitter"; } - + private: void emitBasicBlock(MachineBasicBlock &MBB); }; - - template <class CodeEmitter> - char Emitter<CodeEmitter>::ID = 0; } +char AlphaCodeEmitter::ID = 0; + + /// createAlphaCodeEmitterPass - Return a pass that emits the collected Alpha /// code to the specified MCE object. -FunctionPass *llvm::createAlphaCodeEmitterPass(AlphaTargetMachine &TM, - MachineCodeEmitter &MCE) { - return new Emitter<MachineCodeEmitter>(TM, MCE); -} - FunctionPass *llvm::createAlphaJITCodeEmitterPass(AlphaTargetMachine &TM, JITCodeEmitter &JCE) { - return new Emitter<JITCodeEmitter>(TM, JCE); -} -FunctionPass *llvm::createAlphaObjectCodeEmitterPass(AlphaTargetMachine &TM, - ObjectCodeEmitter &OCE) { - return new Emitter<ObjectCodeEmitter>(TM, OCE); + return new AlphaCodeEmitter(JCE); } -template <class CodeEmitter> -bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) { +bool AlphaCodeEmitter::runOnMachineFunction(MachineFunction &MF) { II = ((AlphaTargetMachine&)MF.getTarget()).getInstrInfo(); do { @@ -108,8 +82,7 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) { return false; } -template <class CodeEmitter> -void Emitter<CodeEmitter>::emitBasicBlock(MachineBasicBlock &MBB) { +void AlphaCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) { MCE.StartMachineBasicBlock(&MBB); for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { @@ -122,8 +95,8 @@ void Emitter<CodeEmitter>::emitBasicBlock(MachineBasicBlock &MBB) { case Alpha::ALTENT: case Alpha::PCLABEL: case Alpha::MEMLABEL: - case TargetInstrInfo::IMPLICIT_DEF: - case TargetInstrInfo::KILL: + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: break; //skip these } MCE.processDebugLoc(MI.getDebugLoc(), false); diff --git a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp index eaefef9..d6b17c2 100644 --- a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp +++ b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp @@ -64,7 +64,7 @@ namespace { /// that the bits 1-7 of LHS are already zero. If LHS is non-null, we are /// in checking mode. If LHS is null, we assume that the mask has already /// been validated before. - uint64_t get_zapImm(SDValue LHS, uint64_t Constant) { + uint64_t get_zapImm(SDValue LHS, uint64_t Constant) const { uint64_t BitsToCheck = 0; unsigned Result = 0; for (unsigned i = 0; i != 8; ++i) { diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp index 471de7f..5d8310e 100644 --- a/lib/Target/Alpha/AlphaISelLowering.cpp +++ b/lib/Target/Alpha/AlphaISelLowering.cpp @@ -21,7 +21,7 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/Module.h" @@ -49,8 +49,6 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) setShiftAmountType(MVT::i64); setBooleanContents(ZeroOrOneBooleanContent); - setUsesGlobalOffsetTable(true); - addRegisterClass(MVT::i64, Alpha::GPRCRegisterClass); addRegisterClass(MVT::f64, Alpha::F8RCRegisterClass); addRegisterClass(MVT::f32, Alpha::F4RCRegisterClass); @@ -223,11 +221,13 @@ static SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) { SDValue AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool isTailCall, + bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) { + // Alpha target does not yet support tail call optimization. + isTailCall = false; // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; @@ -282,7 +282,8 @@ AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee, DAG.getIntPtrConstant(VA.getLocMemOffset())); MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, - PseudoSourceValue::getStack(), 0)); + PseudoSourceValue::getStack(), 0, + false, false, 0)); } } @@ -426,7 +427,8 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain, // Create the SelectionDAG nodes corresponding to a load //from this parameter SDValue FIN = DAG.getFrameIndex(FI, MVT::i64); - ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0); + ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, + false, false, 0); } InVals.push_back(ArgVal); } @@ -442,14 +444,16 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain, int FI = MFI->CreateFixedObject(8, -8 * (6 - i), true, false); if (i == 0) VarArgsBase = FI; SDValue SDFI = DAG.getFrameIndex(FI, MVT::i64); - LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0)); + LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0, + false, false, 0)); if (TargetRegisterInfo::isPhysicalRegister(args_float[i])) args_float[i] = AddLiveIn(MF, args_float[i], &Alpha::F8RCRegClass); argt = DAG.getCopyFromReg(Chain, dl, args_float[i], MVT::f64); FI = MFI->CreateFixedObject(8, - 8 * (12 - i), true, false); SDFI = DAG.getFrameIndex(FI, MVT::i64); - LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0)); + LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0, + false, false, 0)); } //Set up a token factor with all the stack traffic @@ -528,11 +532,12 @@ void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain, const Value *VAListS = cast<SrcValueSDNode>(N->getOperand(2))->getValue(); DebugLoc dl = N->getDebugLoc(); - SDValue Base = DAG.getLoad(MVT::i64, dl, Chain, VAListP, VAListS, 0); + SDValue Base = DAG.getLoad(MVT::i64, dl, Chain, VAListP, VAListS, 0, + false, false, 0); SDValue Tmp = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP, DAG.getConstant(8, MVT::i64)); SDValue Offset = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Base.getValue(1), - Tmp, NULL, 0, MVT::i32); + Tmp, NULL, 0, MVT::i32, false, false, 0); DataPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Base, Offset); if (N->getValueType(0).isFloatingPoint()) { @@ -547,7 +552,7 @@ void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain, SDValue NewOffset = DAG.getNode(ISD::ADD, dl, MVT::i64, Offset, DAG.getConstant(8, MVT::i64)); Chain = DAG.getTruncStore(Offset.getValue(1), dl, NewOffset, Tmp, NULL, 0, - MVT::i32); + MVT::i32, false, false, 0); } /// LowerOperation - Provide custom lowering hooks for some operations. @@ -694,9 +699,10 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { SDValue Result; if (Op.getValueType() == MVT::i32) Result = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Chain, DataPtr, - NULL, 0, MVT::i32); + NULL, 0, MVT::i32, false, false, 0); else - Result = DAG.getLoad(Op.getValueType(), dl, Chain, DataPtr, NULL, 0); + Result = DAG.getLoad(Op.getValueType(), dl, Chain, DataPtr, NULL, 0, + false, false, 0); return Result; } case ISD::VACOPY: { @@ -706,15 +712,18 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { const Value *DestS = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); const Value *SrcS = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); - SDValue Val = DAG.getLoad(getPointerTy(), dl, Chain, SrcP, SrcS, 0); - SDValue Result = DAG.getStore(Val.getValue(1), dl, Val, DestP, DestS, 0); + SDValue Val = DAG.getLoad(getPointerTy(), dl, Chain, SrcP, SrcS, 0, + false, false, 0); + SDValue Result = DAG.getStore(Val.getValue(1), dl, Val, DestP, DestS, 0, + false, false, 0); SDValue NP = DAG.getNode(ISD::ADD, dl, MVT::i64, SrcP, DAG.getConstant(8, MVT::i64)); Val = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Result, - NP, NULL,0, MVT::i32); + NP, NULL,0, MVT::i32, false, false, 0); SDValue NPD = DAG.getNode(ISD::ADD, dl, MVT::i64, DestP, DAG.getConstant(8, MVT::i64)); - return DAG.getTruncStore(Val.getValue(1), dl, Val, NPD, NULL, 0, MVT::i32); + return DAG.getTruncStore(Val.getValue(1), dl, Val, NPD, NULL, 0, MVT::i32, + false, false, 0); } case ISD::VASTART: { SDValue Chain = Op.getOperand(0); @@ -723,11 +732,12 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { // vastart stores the address of the VarArgsBase and VarArgsOffset SDValue FR = DAG.getFrameIndex(VarArgsBase, MVT::i64); - SDValue S1 = DAG.getStore(Chain, dl, FR, VAListP, VAListS, 0); + SDValue S1 = DAG.getStore(Chain, dl, FR, VAListP, VAListS, 0, + false, false, 0); SDValue SA2 = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP, DAG.getConstant(8, MVT::i64)); return DAG.getTruncStore(S1, dl, DAG.getConstant(VarArgsOffset, MVT::i64), - SA2, NULL, 0, MVT::i32); + SA2, NULL, 0, MVT::i32, false, false, 0); } case ISD::RETURNADDR: return DAG.getNode(AlphaISD::GlobalRetAddr, DebugLoc::getUnknownLoc(), @@ -749,7 +759,8 @@ void AlphaTargetLowering::ReplaceNodeResults(SDNode *N, SDValue Chain, DataPtr; LowerVAARG(N, Chain, DataPtr, DAG); - SDValue Res = DAG.getLoad(N->getValueType(0), dl, Chain, DataPtr, NULL, 0); + SDValue Res = DAG.getLoad(N->getValueType(0), dl, Chain, DataPtr, NULL, 0, + false, false, 0); Results.push_back(Res); Results.push_back(SDValue(Res.getNode(), 1)); } diff --git a/lib/Target/Alpha/AlphaISelLowering.h b/lib/Target/Alpha/AlphaISelLowering.h index b204faf..0f17025 100644 --- a/lib/Target/Alpha/AlphaISelLowering.h +++ b/lib/Target/Alpha/AlphaISelLowering.h @@ -121,7 +121,7 @@ namespace llvm { virtual SDValue LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, bool isTailCall, + CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, diff --git a/lib/Target/Alpha/AlphaMCAsmInfo.cpp b/lib/Target/Alpha/AlphaMCAsmInfo.cpp index b652a53..c67c6a2 100644 --- a/lib/Target/Alpha/AlphaMCAsmInfo.cpp +++ b/lib/Target/Alpha/AlphaMCAsmInfo.cpp @@ -17,6 +17,7 @@ using namespace llvm; AlphaMCAsmInfo::AlphaMCAsmInfo(const Target &T, const StringRef &TT) { AlignmentIsInBytes = false; PrivateGlobalPrefix = "$"; - PICJumpTableDirective = ".gprel32"; + GPRel32Directive = ".gprel32"; WeakRefDirective = "\t.weak\t"; + HasSetDirective = false; } diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp index 64bdd62..ba662fb 100644 --- a/lib/Target/Alpha/AlphaRegisterInfo.cpp +++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp @@ -251,7 +251,7 @@ void AlphaRegisterInfo::emitPrologue(MachineFunction &MF) const { } else { std::string msg; raw_string_ostream Msg(msg); - Msg << "Too big a stack frame at " + NumBytes; + Msg << "Too big a stack frame at " << NumBytes; llvm_report_error(Msg.str()); } @@ -303,15 +303,14 @@ void AlphaRegisterInfo::emitEpilogue(MachineFunction &MF, } else { std::string msg; raw_string_ostream Msg(msg); - Msg << "Too big a stack frame at " + NumBytes; + Msg << "Too big a stack frame at " << NumBytes; llvm_report_error(Msg.str()); } } } unsigned AlphaRegisterInfo::getRARegister() const { - llvm_unreachable("What is the return address register"); - return 0; + return Alpha::R26; } unsigned AlphaRegisterInfo::getFrameRegister(const MachineFunction &MF) const { diff --git a/lib/Target/Alpha/AlphaTargetMachine.cpp b/lib/Target/Alpha/AlphaTargetMachine.cpp index d0d5a43..5169a01 100644 --- a/lib/Target/Alpha/AlphaTargetMachine.cpp +++ b/lib/Target/Alpha/AlphaTargetMachine.cpp @@ -55,35 +55,7 @@ bool AlphaTargetMachine::addPreEmitPass(PassManagerBase &PM, } bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - MachineCodeEmitter &MCE) { - PM.add(createAlphaCodeEmitterPass(*this, MCE)); - return false; -} -bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, JITCodeEmitter &JCE) { PM.add(createAlphaJITCodeEmitterPass(*this, JCE)); return false; } -bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - ObjectCodeEmitter &OCE) { - PM.add(createAlphaObjectCodeEmitterPass(*this, OCE)); - return false; -} -bool AlphaTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - MachineCodeEmitter &MCE) { - return addCodeEmitter(PM, OptLevel, MCE); -} -bool AlphaTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - JITCodeEmitter &JCE) { - return addCodeEmitter(PM, OptLevel, JCE); -} -bool AlphaTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - ObjectCodeEmitter &OCE) { - return addCodeEmitter(PM, OptLevel, OCE); -} - diff --git a/lib/Target/Alpha/AlphaTargetMachine.h b/lib/Target/Alpha/AlphaTargetMachine.h index f03e938..6f3a774 100644 --- a/lib/Target/Alpha/AlphaTargetMachine.h +++ b/lib/Target/Alpha/AlphaTargetMachine.h @@ -56,20 +56,7 @@ public: virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - MachineCodeEmitter &MCE); - virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, JITCodeEmitter &JCE); - virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - ObjectCodeEmitter &JCE); - virtual bool addSimpleCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - MachineCodeEmitter &MCE); - virtual bool addSimpleCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - JITCodeEmitter &JCE); - virtual bool addSimpleCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - ObjectCodeEmitter &OCE); }; } // end namespace llvm diff --git a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp index b13f544..733a46c 100644 --- a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp +++ b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp @@ -29,30 +29,33 @@ #include "llvm/Target/TargetRegistry.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; -STATISTIC(EmittedInsts, "Number of machine instrs printed"); - namespace { struct AlphaAsmPrinter : public AsmPrinter { /// Unique incrementer for label values for referencing Global values. /// explicit AlphaAsmPrinter(formatted_raw_ostream &o, TargetMachine &tm, - const MCAsmInfo *T, bool V) - : AsmPrinter(o, tm, T, V) {} + MCContext &Ctx, MCStreamer &Streamer, + const MCAsmInfo *T) + : AsmPrinter(o, tm, Ctx, Streamer, T) {} virtual const char *getPassName() const { return "Alpha Assembly Printer"; } void printInstruction(const MachineInstr *MI); + void EmitInstruction(const MachineInstr *MI) { + printInstruction(MI); + OutStreamer.AddBlankLine(); + } static const char *getRegisterName(unsigned RegNo); void printOp(const MachineOperand &MO, bool IsCallOp = false); void printOperand(const MachineInstr *MI, int opNum); void printBaseOffsetPair(const MachineInstr *MI, int i, bool brackets=true); - bool runOnMachineFunction(MachineFunction &F); + virtual void EmitFunctionBodyStart(); + virtual void EmitFunctionBodyEnd(); void EmitStartOfAsmFile(Module &M); bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, @@ -93,7 +96,7 @@ void AlphaAsmPrinter::printOp(const MachineOperand &MO, bool IsCallOp) { return; case MachineOperand::MO_MachineBasicBlock: - O << *GetMBBSymbol(MO.getMBB()->getNumber()); + O << *MO.getMBB()->getSymbol(OutContext); return; case MachineOperand::MO_ConstantPoolIndex: @@ -120,73 +123,16 @@ void AlphaAsmPrinter::printOp(const MachineOperand &MO, bool IsCallOp) { } } -/// runOnMachineFunction - This uses the printMachineInstruction() -/// method to print assembly for each instruction. -/// -bool AlphaAsmPrinter::runOnMachineFunction(MachineFunction &MF) { - this->MF = &MF; - - SetupMachineFunction(MF); - O << "\n\n"; - - // Print out constants referenced by the function - EmitConstantPool(MF.getConstantPool()); - - // Print out jump tables referenced by the function - EmitJumpTableInfo(MF.getJumpTableInfo(), MF); - - // Print out labels for the function. - const Function *F = MF.getFunction(); - OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM)); - - EmitAlignment(MF.getAlignment(), F); - switch (F->getLinkage()) { - default: llvm_unreachable("Unknown linkage type!"); - case Function::InternalLinkage: // Symbols default to internal. - case Function::PrivateLinkage: - case Function::LinkerPrivateLinkage: - break; - case Function::ExternalLinkage: - O << "\t.globl " << *CurrentFnSym << '\n'; - break; - case Function::WeakAnyLinkage: - case Function::WeakODRLinkage: - case Function::LinkOnceAnyLinkage: - case Function::LinkOnceODRLinkage: - O << MAI->getWeakRefDirective() << *CurrentFnSym << '\n'; - break; - } - - printVisibility(CurrentFnSym, F->getVisibility()); - +/// EmitFunctionBodyStart - Targets can override this to emit stuff before +/// the first basic block in the function. +void AlphaAsmPrinter::EmitFunctionBodyStart() { O << "\t.ent " << *CurrentFnSym << "\n"; +} - O << *CurrentFnSym << ":\n"; - - // Print out code for the function. - for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); - I != E; ++I) { - if (I != MF.begin()) - EmitBasicBlockStart(I); - - for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); - II != E; ++II) { - // Print the assembly for the instruction. - ++EmittedInsts; - processDebugLoc(II, true); - printInstruction(II); - - if (VerboseAsm) - EmitComments(*II); - O << '\n'; - processDebugLoc(II, false); - } - } - +/// EmitFunctionBodyEnd - Targets can override this to emit stuff after +/// the last basic block in the function. +void AlphaAsmPrinter::EmitFunctionBodyEnd() { O << "\t.end " << *CurrentFnSym << "\n"; - - // We didn't modify anything. - return false; } void AlphaAsmPrinter::EmitStartOfAsmFile(Module &M) { diff --git a/lib/Target/Alpha/AsmPrinter/Makefile b/lib/Target/Alpha/AsmPrinter/Makefile index 3f64aac..3c64a3c 100644 --- a/lib/Target/Alpha/AsmPrinter/Makefile +++ b/lib/Target/Alpha/AsmPrinter/Makefile @@ -8,7 +8,6 @@ ##===----------------------------------------------------------------------===## LEVEL = ../../../.. LIBRARYNAME = LLVMAlphaAsmPrinter -CXXFLAGS = -fno-rtti # Hack: we need to include 'main' alpha target directory to grab private headers CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. diff --git a/lib/Target/Alpha/Makefile b/lib/Target/Alpha/Makefile index 14cbc6c..54d53ab 100644 --- a/lib/Target/Alpha/Makefile +++ b/lib/Target/Alpha/Makefile @@ -10,7 +10,6 @@ LEVEL = ../../.. LIBRARYNAME = LLVMAlphaCodeGen TARGET = Alpha -CXXFLAGS = -fno-rtti # Make sure that tblgen is run, first thing. BUILT_SOURCES = AlphaGenRegisterInfo.h.inc AlphaGenRegisterNames.inc \ diff --git a/lib/Target/Alpha/TargetInfo/Makefile b/lib/Target/Alpha/TargetInfo/Makefile index 6f7b898..de01d7f 100644 --- a/lib/Target/Alpha/TargetInfo/Makefile +++ b/lib/Target/Alpha/TargetInfo/Makefile @@ -8,7 +8,6 @@ ##===----------------------------------------------------------------------===## LEVEL = ../../../.. LIBRARYNAME = LLVMAlphaInfo -CXXFLAGS = -fno-rtti # Hack: we need to include 'main' target directory to grab private headers CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. diff --git a/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp b/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp index 749f735..fe13e14 100644 --- a/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp +++ b/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp @@ -30,20 +30,18 @@ #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetRegistry.h" -#include "llvm/ADT/Statistic.h" #include "llvm/ADT/SmallString.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" using namespace llvm; -STATISTIC(EmittedInsts, "Number of machine instrs printed"); - namespace { class BlackfinAsmPrinter : public AsmPrinter { public: BlackfinAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, - const MCAsmInfo *MAI, bool V) - : AsmPrinter(O, TM, MAI, V) {} + MCContext &Ctx, MCStreamer &Streamer, + const MCAsmInfo *MAI) + : AsmPrinter(O, TM, Ctx, Streamer, MAI) {} virtual const char *getPassName() const { return "Blackfin Assembly Printer"; @@ -54,8 +52,10 @@ namespace { void printInstruction(const MachineInstr *MI); // autogenerated. static const char *getRegisterName(unsigned RegNo); - void emitLinkage(const MCSymbol *GVSym, GlobalValue::LinkageTypes l); - bool runOnMachineFunction(MachineFunction &F); + void EmitInstruction(const MachineInstr *MI) { + printInstruction(MI); + OutStreamer.AddBlankLine(); + } bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode); bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, @@ -69,76 +69,6 @@ extern "C" void LLVMInitializeBlackfinAsmPrinter() { RegisterAsmPrinter<BlackfinAsmPrinter> X(TheBlackfinTarget); } -void BlackfinAsmPrinter::emitLinkage(const MCSymbol *GVSym, - GlobalValue::LinkageTypes L) { - switch (L) { - default: llvm_unreachable("Unknown linkage type!"); - case GlobalValue::InternalLinkage: // Symbols default to internal. - case GlobalValue::PrivateLinkage: - case GlobalValue::LinkerPrivateLinkage: - break; - case GlobalValue::ExternalLinkage: - O << MAI->getGlobalDirective() << *GVSym << "\n"; - break; - case GlobalValue::LinkOnceAnyLinkage: - case GlobalValue::LinkOnceODRLinkage: - case GlobalValue::WeakAnyLinkage: - case GlobalValue::WeakODRLinkage: - O << MAI->getGlobalDirective() << *GVSym << "\n"; - O << MAI->getWeakDefDirective() << *GVSym << "\n"; - break; - } -} - -/// runOnMachineFunction - This uses the printInstruction() -/// method to print assembly for each instruction. -/// -bool BlackfinAsmPrinter::runOnMachineFunction(MachineFunction &MF) { - SetupMachineFunction(MF); - EmitConstantPool(MF.getConstantPool()); - EmitJumpTableInfo(MF.getJumpTableInfo(), MF); - - const Function *F = MF.getFunction(); - OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM)); - EmitAlignment(2, F); - emitLinkage(CurrentFnSym, F->getLinkage()); - printVisibility(CurrentFnSym, F->getVisibility()); - - O << "\t.type\t" << *CurrentFnSym << ", STT_FUNC\n"; - O << *CurrentFnSym << ":\n"; - - if (DW) - DW->BeginFunction(&MF); - - // Print out code for the function. - for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); - I != E; ++I) { - // Print a label for the basic block. - EmitBasicBlockStart(I); - - for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); - II != E; ++II) { - // Print the assembly for the instruction. - processDebugLoc(II, true); - - printInstruction(II); - if (VerboseAsm) - EmitComments(*II); - O << '\n'; - - processDebugLoc(II, false); - ++EmittedInsts; - } - } - - O << "\t.size " << *CurrentFnSym << ", .-" << *CurrentFnSym << "\n"; - - if (DW) - DW->EndFunction(&MF); - - return false; -} - void BlackfinAsmPrinter::printOperand(const MachineInstr *MI, int opNum) { const MachineOperand &MO = MI->getOperand (opNum); switch (MO.getType()) { @@ -152,7 +82,7 @@ void BlackfinAsmPrinter::printOperand(const MachineInstr *MI, int opNum) { O << MO.getImm(); break; case MachineOperand::MO_MachineBasicBlock: - O << *GetMBBSymbol(MO.getMBB()->getNumber()); + O << *MO.getMBB()->getSymbol(OutContext); return; case MachineOperand::MO_GlobalAddress: O << *GetGlobalValueSymbol(MO.getGlobal()); diff --git a/lib/Target/Blackfin/AsmPrinter/Makefile b/lib/Target/Blackfin/AsmPrinter/Makefile index 30e8285..091d4df 100644 --- a/lib/Target/Blackfin/AsmPrinter/Makefile +++ b/lib/Target/Blackfin/AsmPrinter/Makefile @@ -8,7 +8,6 @@ ##===----------------------------------------------------------------------===## LEVEL = ../../../.. LIBRARYNAME = LLVMBlackfinAsmPrinter -CXXFLAGS = -fno-rtti # Hack: we need to include 'main' Blackfin target directory to grab private # headers diff --git a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp index e1b6008..2c9cc60 100644 --- a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp +++ b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp @@ -175,7 +175,7 @@ void BlackfinDAGToDAGISel::FixRegisterClasses(SelectionDAG &DAG) { // We cannot copy CC <-> !(CC/D) if ((isCC(DefRC) && !isDCC(UseRC)) || (isCC(UseRC) && !isDCC(DefRC))) { SDNode *Copy = - DAG.getMachineNode(TargetInstrInfo::COPY_TO_REGCLASS, + DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, NI->getDebugLoc(), MVT::i32, UI.getUse().get(), diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp index ad2510a..5ce2013 100644 --- a/lib/Target/Blackfin/BlackfinISelLowering.cpp +++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp @@ -22,7 +22,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/ADT/VectorExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -206,7 +206,8 @@ BlackfinTargetLowering::LowerFormalArguments(SDValue Chain, int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true, false); SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); - InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0)); + InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0, + false, false, 0)); } } @@ -273,11 +274,13 @@ BlackfinTargetLowering::LowerReturn(SDValue Chain, SDValue BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool isTailCall, + bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) { + // Blackfin target does not yet support tail call optimization. + isTailCall = false; // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; @@ -327,7 +330,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee, OffsetN = DAG.getNode(ISD::ADD, dl, MVT::i32, SPN, OffsetN); MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, OffsetN, PseudoSourceValue::getStack(), - Offset)); + Offset, false, false, 0)); } } diff --git a/lib/Target/Blackfin/BlackfinISelLowering.h b/lib/Target/Blackfin/BlackfinISelLowering.h index cdbc7d2..5f39910 100644 --- a/lib/Target/Blackfin/BlackfinISelLowering.h +++ b/lib/Target/Blackfin/BlackfinISelLowering.h @@ -64,7 +64,7 @@ namespace llvm { SmallVectorImpl<SDValue> &InVals); virtual SDValue LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, bool isTailCall, + CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, diff --git a/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp b/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp index 6d0f66c..31470fb 100644 --- a/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp +++ b/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp @@ -18,4 +18,5 @@ using namespace llvm; BlackfinMCAsmInfo::BlackfinMCAsmInfo(const Target &T, const StringRef &TT) { GlobalPrefix = "_"; CommentString = "//"; + HasSetDirective = false; } diff --git a/lib/Target/Blackfin/Makefile b/lib/Target/Blackfin/Makefile index 4fdaf27..339bef9 100644 --- a/lib/Target/Blackfin/Makefile +++ b/lib/Target/Blackfin/Makefile @@ -10,7 +10,6 @@ LEVEL = ../../.. LIBRARYNAME = LLVMBlackfinCodeGen TARGET = Blackfin -CXXFLAGS = -fno-rtti # Make sure that tblgen is run, first thing. BUILT_SOURCES = BlackfinGenRegisterInfo.h.inc BlackfinGenRegisterNames.inc \ diff --git a/lib/Target/Blackfin/TargetInfo/Makefile b/lib/Target/Blackfin/TargetInfo/Makefile index 5c770cf..c49cfbe 100644 --- a/lib/Target/Blackfin/TargetInfo/Makefile +++ b/lib/Target/Blackfin/TargetInfo/Makefile @@ -8,7 +8,6 @@ ##===----------------------------------------------------------------------===## LEVEL = ../../../.. LIBRARYNAME = LLVMBlackfinInfo -CXXFLAGS = -fno-rtti # Hack: we need to include 'main' target directory to grab private headers CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp index e765655..c1c1d80 100644 --- a/lib/Target/CBackend/CBackend.cpp +++ b/lib/Target/CBackend/CBackend.cpp @@ -493,7 +493,7 @@ raw_ostream & CWriter::printSimpleType(formatted_raw_ostream &Out, const Type *Ty, bool isSigned, const std::string &NameSoFar) { - assert((Ty->isPrimitiveType() || Ty->isInteger() || isa<VectorType>(Ty)) && + assert((Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<VectorType>(Ty)) && "Invalid type for printSimpleType"); switch (Ty->getTypeID()) { case Type::VoidTyID: return Out << "void " << NameSoFar; @@ -540,7 +540,7 @@ CWriter::printSimpleType(formatted_raw_ostream &Out, const Type *Ty, std::ostream & CWriter::printSimpleType(std::ostream &Out, const Type *Ty, bool isSigned, const std::string &NameSoFar) { - assert((Ty->isPrimitiveType() || Ty->isInteger() || isa<VectorType>(Ty)) && + assert((Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<VectorType>(Ty)) && "Invalid type for printSimpleType"); switch (Ty->getTypeID()) { case Type::VoidTyID: return Out << "void " << NameSoFar; @@ -591,7 +591,7 @@ raw_ostream &CWriter::printType(formatted_raw_ostream &Out, const Type *Ty, bool isSigned, const std::string &NameSoFar, bool IgnoreName, const AttrListPtr &PAL) { - if (Ty->isPrimitiveType() || Ty->isInteger() || isa<VectorType>(Ty)) { + if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<VectorType>(Ty)) { printSimpleType(Out, Ty, isSigned, NameSoFar); return Out; } @@ -694,7 +694,7 @@ raw_ostream &CWriter::printType(formatted_raw_ostream &Out, std::ostream &CWriter::printType(std::ostream &Out, const Type *Ty, bool isSigned, const std::string &NameSoFar, bool IgnoreName, const AttrListPtr &PAL) { - if (Ty->isPrimitiveType() || Ty->isInteger() || isa<VectorType>(Ty)) { + if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<VectorType>(Ty)) { printSimpleType(Out, Ty, isSigned, NameSoFar); return Out; } @@ -1396,7 +1396,7 @@ bool CWriter::printConstExprCast(const ConstantExpr* CE, bool Static) { } if (NeedsExplicitCast) { Out << "(("; - if (Ty->isInteger() && Ty != Type::getInt1Ty(Ty->getContext())) + if (Ty->isIntegerTy() && Ty != Type::getInt1Ty(Ty->getContext())) printSimpleType(Out, Ty, TypeIsSigned); else printType(Out, Ty); // not integer, sign doesn't matter @@ -1497,7 +1497,7 @@ void CWriter::writeInstComputationInline(Instruction &I) { // We can't currently support integer types other than 1, 8, 16, 32, 64. // Validate this. const Type *Ty = I.getType(); - if (Ty->isInteger() && (Ty!=Type::getInt1Ty(I.getContext()) && + if (Ty->isIntegerTy() && (Ty!=Type::getInt1Ty(I.getContext()) && Ty!=Type::getInt8Ty(I.getContext()) && Ty!=Type::getInt16Ty(I.getContext()) && Ty!=Type::getInt32Ty(I.getContext()) && @@ -1841,7 +1841,7 @@ static SpecialGlobalClass getGlobalVariableClass(const GlobalVariable *GV) { return GlobalDtors; } - // Otherwise, it it is other metadata, don't print it. This catches things + // Otherwise, if it is other metadata, don't print it. This catches things // like debug information. if (GV->getSection() == "llvm.metadata") return NotPrinted; @@ -2287,7 +2287,8 @@ void CWriter::printModuleTypes(const TypeSymbolTable &TST) { void CWriter::printContainedStructs(const Type *Ty, std::set<const Type*> &StructPrinted) { // Don't walk through pointers. - if (isa<PointerType>(Ty) || Ty->isPrimitiveType() || Ty->isInteger()) return; + if (isa<PointerType>(Ty) || Ty->isPrimitiveType() || Ty->isIntegerTy()) + return; // Print all contained types first. for (Type::subtype_iterator I = Ty->subtype_begin(), @@ -2423,8 +2424,8 @@ static inline bool isFPIntBitCast(const Instruction &I) { return false; const Type *SrcTy = I.getOperand(0)->getType(); const Type *DstTy = I.getType(); - return (SrcTy->isFloatingPoint() && DstTy->isInteger()) || - (DstTy->isFloatingPoint() && SrcTy->isInteger()); + return (SrcTy->isFloatingPointTy() && DstTy->isIntegerTy()) || + (DstTy->isFloatingPointTy() && SrcTy->isIntegerTy()); } void CWriter::printFunction(Function &F) { @@ -3113,7 +3114,7 @@ void CWriter::visitCallInst(CallInst &I) { } /// visitBuiltinCall - Handle the call to the specified builtin. Returns true -/// if the entire call is handled, return false it it wasn't handled, and +/// if the entire call is handled, return false if it wasn't handled, and /// optionally set 'WroteCallee' if the callee has already been printed out. bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, bool &WroteCallee) { @@ -3706,7 +3707,7 @@ bool CTargetMachine::addPassesToEmitWholeFile(PassManager &PM, formatted_raw_ostream &o, CodeGenFileType FileType, CodeGenOpt::Level OptLevel) { - if (FileType != TargetMachine::AssemblyFile) return true; + if (FileType != TargetMachine::CGFT_AssemblyFile) return true; PM.add(createGCLoweringPass()); PM.add(createLowerInvokePass()); diff --git a/lib/Target/CBackend/Makefile b/lib/Target/CBackend/Makefile index f82d277..621948a 100644 --- a/lib/Target/CBackend/Makefile +++ b/lib/Target/CBackend/Makefile @@ -9,8 +9,6 @@ LEVEL = ../../.. LIBRARYNAME = LLVMCBackend -CXXFLAGS = -fno-rtti - DIRS = TargetInfo include $(LEVEL)/Makefile.common diff --git a/lib/Target/CBackend/TargetInfo/Makefile b/lib/Target/CBackend/TargetInfo/Makefile index 6407904..d4d5e15 100644 --- a/lib/Target/CBackend/TargetInfo/Makefile +++ b/lib/Target/CBackend/TargetInfo/Makefile @@ -8,7 +8,6 @@ ##===----------------------------------------------------------------------===## LEVEL = ../../../.. LIBRARYNAME = LLVMCBackendInfo -CXXFLAGS = -fno-rtti # Hack: we need to include 'main' target directory to grab private headers CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt index 10478b4..43ebdac 100644 --- a/lib/Target/CMakeLists.txt +++ b/lib/Target/CMakeLists.txt @@ -9,7 +9,6 @@ add_llvm_library(LLVMTarget TargetInstrInfo.cpp TargetIntrinsicInfo.cpp TargetLoweringObjectFile.cpp - TargetMachOWriterInfo.cpp TargetMachine.cpp TargetRegisterInfo.cpp TargetSubtarget.cpp diff --git a/lib/Target/CellSPU/AsmPrinter/Makefile b/lib/Target/CellSPU/AsmPrinter/Makefile index aa0db52..69639ef 100644 --- a/lib/Target/CellSPU/AsmPrinter/Makefile +++ b/lib/Target/CellSPU/AsmPrinter/Makefile @@ -9,7 +9,6 @@ LEVEL = ../../../.. LIBRARYNAME = LLVMCellSPUAsmPrinter -CXXFLAGS = -fno-rtti # Hack: we need to include 'main' CellSPU target directory to grab # private headers diff --git a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp index 59d6ddd..2ca05c2 100644 --- a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp +++ b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp @@ -19,12 +19,8 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" -#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/DwarfWriter.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCSymbol.h" @@ -33,25 +29,18 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetRegistry.h" -#include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Support/MathExtras.h" using namespace llvm; namespace { - STATISTIC(EmittedInsts, "Number of machine instrs printed"); - - const std::string bss_section(".bss"); - class SPUAsmPrinter : public AsmPrinter { public: explicit SPUAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, - const MCAsmInfo *T, bool V) : - AsmPrinter(O, TM, T, V) {} + MCContext &Ctx, MCStreamer &Streamer, + const MCAsmInfo *T) : + AsmPrinter(O, TM, Ctx, Streamer, T) {} virtual const char *getPassName() const { return "STI CBEA SPU Assembly Printer"; @@ -67,7 +56,10 @@ namespace { static const char *getRegisterName(unsigned RegNo); - void printMachineInstruction(const MachineInstr *MI); + void EmitInstruction(const MachineInstr *MI) { + printInstruction(MI); + OutStreamer.AddBlankLine(); + } void printOp(const MachineOperand &MO); /// printRegister - Print register according to target requirements. @@ -276,29 +268,6 @@ namespace { llvm_unreachable("Invalid/non-immediate rotate amount in printRotateNeg7Imm"); } } - - virtual bool runOnMachineFunction(MachineFunction &F) = 0; - }; - - /// LinuxAsmPrinter - SPU assembly printer, customized for Linux - class LinuxAsmPrinter : public SPUAsmPrinter { - public: - explicit LinuxAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, - const MCAsmInfo *T, bool V) - : SPUAsmPrinter(O, TM, T, V) {} - - virtual const char *getPassName() const { - return "STI CBEA SPU Assembly Printer"; - } - - bool runOnMachineFunction(MachineFunction &F); - - void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AU.addRequired<MachineModuleInfo>(); - AU.addRequired<DwarfWriter>(); - SPUAsmPrinter::getAnalysisUsage(AU); - } }; } // end of anonymous namespace @@ -312,7 +281,7 @@ void SPUAsmPrinter::printOp(const MachineOperand &MO) { return; case MachineOperand::MO_MachineBasicBlock: - O << *GetMBBSymbol(MO.getMBB()->getNumber()); + O << *MO.getMBB()->getSymbol(OutContext); return; case MachineOperand::MO_JumpTableIndex: O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() @@ -386,88 +355,7 @@ bool SPUAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, return false; } -/// printMachineInstruction -- Print out a single PowerPC MI in Darwin syntax -/// to the current output stream. -/// -void SPUAsmPrinter::printMachineInstruction(const MachineInstr *MI) { - ++EmittedInsts; - processDebugLoc(MI, true); - printInstruction(MI); - if (VerboseAsm) - EmitComments(*MI); - processDebugLoc(MI, false); - O << '\n'; -} - -/// runOnMachineFunction - This uses the printMachineInstruction() -/// method to print assembly for each instruction. -/// -bool LinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) { - this->MF = &MF; - - SetupMachineFunction(MF); - O << "\n\n"; - - // Print out constants referenced by the function - EmitConstantPool(MF.getConstantPool()); - - // Print out labels for the function. - const Function *F = MF.getFunction(); - - OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM)); - EmitAlignment(MF.getAlignment(), F); - - switch (F->getLinkage()) { - default: llvm_unreachable("Unknown linkage type!"); - case Function::PrivateLinkage: - case Function::LinkerPrivateLinkage: - case Function::InternalLinkage: // Symbols default to internal. - break; - case Function::ExternalLinkage: - O << "\t.global\t" << *CurrentFnSym << "\n" << "\t.type\t"; - O << *CurrentFnSym << ", @function\n"; - break; - case Function::WeakAnyLinkage: - case Function::WeakODRLinkage: - case Function::LinkOnceAnyLinkage: - case Function::LinkOnceODRLinkage: - O << "\t.global\t" << *CurrentFnSym << "\n"; - O << "\t.weak_definition\t" << *CurrentFnSym << "\n"; - break; - } - - O << *CurrentFnSym << ":\n"; - - // Emit pre-function debug information. - DW->BeginFunction(&MF); - - // Print out code for the function. - for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); - I != E; ++I) { - // Print a label for the basic block. - if (I != MF.begin()) { - EmitBasicBlockStart(I); - } - for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); - II != E; ++II) { - // Print the assembly for the instruction. - printMachineInstruction(II); - } - } - - O << "\t.size\t" << *CurrentFnSym << ",.-" << *CurrentFnSym << "\n"; - - // Print out jump tables referenced by the function. - EmitJumpTableInfo(MF.getJumpTableInfo(), MF); - - // Emit post-function debug information. - DW->EndFunction(&MF); - - // We didn't modify anything. - return false; -} - // Force static initialization. extern "C" void LLVMInitializeCellSPUAsmPrinter() { - RegisterAsmPrinter<LinuxAsmPrinter> X(TheCellSPUTarget); + RegisterAsmPrinter<SPUAsmPrinter> X(TheCellSPUTarget); } diff --git a/lib/Target/CellSPU/Makefile b/lib/Target/CellSPU/Makefile index 9f3ff74..cbdbd3c 100644 --- a/lib/Target/CellSPU/Makefile +++ b/lib/Target/CellSPU/Makefile @@ -10,8 +10,6 @@ LEVEL = ../../.. LIBRARYNAME = LLVMCellSPUCodeGen TARGET = SPU -CXXFLAGS = -fno-rtti - BUILT_SOURCES = SPUGenInstrNames.inc SPUGenRegisterNames.inc \ SPUGenAsmWriter.inc SPUGenCodeEmitter.inc \ SPUGenRegisterInfo.h.inc SPUGenRegisterInfo.inc \ diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index 80693e1..1ed06e3 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -314,7 +314,7 @@ namespace { return SelectCode(CurDAG->getLoad(vecVT, dl, CurDAG->getEntryNode(), CGPoolOffset, PseudoSourceValue::getConstantPool(), 0, - false, Alignment).getNode()); + false, false, Alignment).getNode()); } /// Select - Convert the specified operand from a target-independent to a diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index aa7f910..b21eb37 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -25,7 +25,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/VectorExtras.h" #include "llvm/Support/Debug.h" @@ -669,7 +669,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { // Re-emit as a v16i8 vector load result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr, LN->getSrcValue(), LN->getSrcValueOffset(), - LN->isVolatile(), 16); + LN->isVolatile(), LN->isNonTemporal(), 16); // Update the chain the_chain = result.getValue(1); @@ -820,7 +820,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { // Re-emit as a v16i8 vector load alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr, SN->getSrcValue(), SN->getSrcValueOffset(), - SN->isVolatile(), 16); + SN->isVolatile(), SN->isNonTemporal(), 16); // Update the chain the_chain = alignLoadVec.getValue(1); @@ -861,7 +861,8 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { result = DAG.getStore(the_chain, dl, result, basePtr, LN->getSrcValue(), LN->getSrcValueOffset(), - LN->isVolatile(), LN->getAlignment()); + LN->isVolatile(), LN->isNonTemporal(), + LN->getAlignment()); #if 0 && !defined(NDEBUG) if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { @@ -1086,7 +1087,7 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, // or we're forced to do vararg int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true, false); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); - ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0); + ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, false, false, 0); ArgOffset += StackSlotSize; } @@ -1108,7 +1109,8 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, true, false); SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8); - SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0); + SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0, + false, false, 0); Chain = Store.getOperand(0); MemOps.push_back(Store); @@ -1140,11 +1142,13 @@ static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) { SDValue SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool isTailCall, + bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) { + // CellSPU target does not yet support tail call optimization. + isTailCall = false; const SPUSubtarget *ST = SPUTM.getSubtargetImpl(); unsigned NumOps = Outs.size(); @@ -1188,7 +1192,8 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (ArgRegIdx != NumArgRegs) { RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); } else { - MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0)); + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, + false, false, 0)); ArgOffset += StackSlotSize; } break; @@ -1197,7 +1202,8 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (ArgRegIdx != NumArgRegs) { RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); } else { - MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0)); + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, + false, false, 0)); ArgOffset += StackSlotSize; } break; @@ -1210,7 +1216,8 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (ArgRegIdx != NumArgRegs) { RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); } else { - MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0)); + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, + false, false, 0)); ArgOffset += StackSlotSize; } break; diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h index ab349bb..3c51177 100644 --- a/lib/Target/CellSPU/SPUISelLowering.h +++ b/lib/Target/CellSPU/SPUISelLowering.h @@ -158,7 +158,7 @@ namespace llvm { virtual SDValue LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool isTailCall, + bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, diff --git a/lib/Target/CellSPU/SPUMCAsmInfo.cpp b/lib/Target/CellSPU/SPUMCAsmInfo.cpp index 03cdb29..5ef3c6b 100644 --- a/lib/Target/CellSPU/SPUMCAsmInfo.cpp +++ b/lib/Target/CellSPU/SPUMCAsmInfo.cpp @@ -16,7 +16,6 @@ using namespace llvm; SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, const StringRef &TT) { ZeroDirective = "\t.space\t"; - SetDirective = "\t.set"; Data64bitsDirective = "\t.quad\t"; AlignmentIsInBytes = false; HasLCOMMDirective = true; @@ -31,7 +30,6 @@ SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, const StringRef &TT) { HasDotLocAndDotFile = true; SupportsDebugInformation = true; - NeedsSet = true; // Exception handling is not supported on CellSPU (think about it: you only // have 256K for code+data. Would you support exception handling?) diff --git a/lib/Target/CellSPU/TargetInfo/Makefile b/lib/Target/CellSPU/TargetInfo/Makefile index 30ca5cf..9cb6827 100644 --- a/lib/Target/CellSPU/TargetInfo/Makefile +++ b/lib/Target/CellSPU/TargetInfo/Makefile @@ -8,7 +8,6 @@ ##===----------------------------------------------------------------------===## LEVEL = ../../../.. LIBRARYNAME = LLVMCellSPUInfo -CXXFLAGS = -fno-rtti # Hack: we need to include 'main' target directory to grab private headers CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 73272bc..e3f2e9f 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -307,8 +307,6 @@ namespace { Out << "GlobalValue::DLLExportLinkage"; break; case GlobalValue::ExternalWeakLinkage: Out << "GlobalValue::ExternalWeakLinkage"; break; - case GlobalValue::GhostLinkage: - Out << "GlobalValue::GhostLinkage"; break; case GlobalValue::CommonLinkage: Out << "GlobalValue::CommonLinkage"; break; } @@ -346,7 +344,7 @@ namespace { std::string CppWriter::getCppName(const Type* Ty) { // First, handle the primitive types .. easy - if (Ty->isPrimitiveType() || Ty->isInteger()) { + if (Ty->isPrimitiveType() || Ty->isIntegerTy()) { switch (Ty->getTypeID()) { case Type::VoidTyID: return "Type::getVoidTy(getGlobalContext())"; case Type::IntegerTyID: { @@ -472,6 +470,7 @@ namespace { HANDLE_ATTR(Nest); HANDLE_ATTR(ReadNone); HANDLE_ATTR(ReadOnly); + HANDLE_ATTR(InlineHint); HANDLE_ATTR(NoInline); HANDLE_ATTR(AlwaysInline); HANDLE_ATTR(OptimizeForSize); @@ -494,7 +493,7 @@ namespace { bool CppWriter::printTypeInternal(const Type* Ty) { // We don't print definitions for primitive types - if (Ty->isPrimitiveType() || Ty->isInteger()) + if (Ty->isPrimitiveType() || Ty->isIntegerTy()) return false; // If we already defined this type, we don't need to define it again. @@ -687,7 +686,7 @@ namespace { // For primitive types and types already defined, just add a name TypeMap::const_iterator TNI = TypeNames.find(TI->second); - if (TI->second->isInteger() || TI->second->isPrimitiveType() || + if (TI->second->isIntegerTy() || TI->second->isPrimitiveType() || TNI != TypeNames.end()) { Out << "mod->addTypeName(\""; printEscapedString(TI->first); @@ -2011,7 +2010,7 @@ bool CPPTargetMachine::addPassesToEmitWholeFile(PassManager &PM, formatted_raw_ostream &o, CodeGenFileType FileType, CodeGenOpt::Level OptLevel) { - if (FileType != TargetMachine::AssemblyFile) return true; + if (FileType != TargetMachine::CGFT_AssemblyFile) return true; PM.add(new CppWriter(o)); return false; } diff --git a/lib/Target/CppBackend/Makefile b/lib/Target/CppBackend/Makefile index 52f2aad..d75f4e8 100644 --- a/lib/Target/CppBackend/Makefile +++ b/lib/Target/CppBackend/Makefile @@ -9,8 +9,6 @@ LEVEL = ../../.. LIBRARYNAME = LLVMCppBackend -CXXFLAGS = -fno-rtti - DIRS = TargetInfo include $(LEVEL)/Makefile.common diff --git a/lib/Target/CppBackend/TargetInfo/Makefile b/lib/Target/CppBackend/TargetInfo/Makefile index 7e44aab..6e68283 100644 --- a/lib/Target/CppBackend/TargetInfo/Makefile +++ b/lib/Target/CppBackend/TargetInfo/Makefile @@ -8,7 +8,6 @@ ##===----------------------------------------------------------------------===## LEVEL = ../../../.. LIBRARYNAME = LLVMCppBackendInfo -CXXFLAGS = -fno-rtti # Hack: we need to include 'main' target directory to grab private headers CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp index 1bc708e..3330d09 100644 --- a/lib/Target/MSIL/MSILWriter.cpp +++ b/lib/Target/MSIL/MSILWriter.cpp @@ -187,7 +187,7 @@ void MSILWriter::printModuleStartup() { break; case 1: Arg1 = F->arg_begin(); - if (Arg1->getType()->isInteger()) { + if (Arg1->getType()->isIntegerTy()) { Out << "\tldloc\targc\n"; Args = getTypeName(Arg1->getType()); BadSig = false; @@ -195,7 +195,7 @@ void MSILWriter::printModuleStartup() { break; case 2: Arg1 = Arg2 = F->arg_begin(); ++Arg2; - if (Arg1->getType()->isInteger() && + if (Arg1->getType()->isIntegerTy() && Arg2->getType()->getTypeID() == Type::PointerTyID) { Out << "\tldloc\targc\n\tldloc\targv\n"; Args = getTypeName(Arg1->getType())+","+getTypeName(Arg2->getType()); @@ -207,7 +207,7 @@ void MSILWriter::printModuleStartup() { } bool RetVoid = (F->getReturnType()->getTypeID() == Type::VoidTyID); - if (BadSig || (!F->getReturnType()->isInteger() && !RetVoid)) { + if (BadSig || (!F->getReturnType()->isIntegerTy() && !RetVoid)) { Out << "\tldc.i4.0\n"; } else { Out << "\tcall\t" << getTypeName(F->getReturnType()) << @@ -334,7 +334,7 @@ std::string MSILWriter::getPrimitiveTypeName(const Type* Ty, bool isSigned) { std::string MSILWriter::getTypeName(const Type* Ty, bool isSigned, bool isNested) { - if (Ty->isPrimitiveType() || Ty->isInteger()) + if (Ty->isPrimitiveType() || Ty->isIntegerTy()) return getPrimitiveTypeName(Ty,isSigned); // FIXME: "OpaqueType" support switch (Ty->getTypeID()) { @@ -1690,7 +1690,7 @@ bool MSILTarget::addPassesToEmitWholeFile(PassManager &PM, CodeGenFileType FileType, CodeGenOpt::Level OptLevel) { - if (FileType != TargetMachine::AssemblyFile) return true; + if (FileType != TargetMachine::CGFT_AssemblyFile) return true; MSILWriter* Writer = new MSILWriter(o); PM.add(createGCLoweringPass()); // FIXME: Handle switch through native IL instruction "switch" diff --git a/lib/Target/MSIL/Makefile b/lib/Target/MSIL/Makefile index 9fecba5..70eadb3 100644 --- a/lib/Target/MSIL/Makefile +++ b/lib/Target/MSIL/Makefile @@ -9,8 +9,6 @@ LEVEL = ../../.. LIBRARYNAME = LLVMMSIL -CXXFLAGS = -fno-rtti - DIRS = TargetInfo include $(LEVEL)/Makefile.common diff --git a/lib/Target/MSIL/TargetInfo/Makefile b/lib/Target/MSIL/TargetInfo/Makefile index 710f5a1..30b0950 100644 --- a/lib/Target/MSIL/TargetInfo/Makefile +++ b/lib/Target/MSIL/TargetInfo/Makefile @@ -8,7 +8,6 @@ ##===----------------------------------------------------------------------===## LEVEL = ../../../.. LIBRARYNAME = LLVMMSILInfo -CXXFLAGS = -fno-rtti # Hack: we need to include 'main' target directory to grab private headers CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. diff --git a/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp b/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp index 6033197..def5fc6 100644 --- a/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp +++ b/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp @@ -35,24 +35,16 @@ #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetRegistry.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Support/ErrorHandling.h" using namespace llvm; -STATISTIC(EmittedInsts, "Number of machine instrs printed"); - -static cl::opt<bool> -EnableMCInst("enable-msp430-mcinst-printer", cl::Hidden, - cl::desc("enable experimental mcinst gunk in the msp430 backend")); - namespace { class MSP430AsmPrinter : public AsmPrinter { public: MSP430AsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, - const MCAsmInfo *MAI, bool V) - : AsmPrinter(O, TM, MAI, V) {} + MCContext &Ctx, MCStreamer &Streamer, + const MCAsmInfo *MAI) + : AsmPrinter(O, TM, Ctx, Streamer, MAI) {} virtual const char *getPassName() const { return "MSP430 Assembly Printer"; @@ -76,10 +68,7 @@ namespace { bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode); - void printInstructionThroughMCStreamer(const MachineInstr *MI); - - void emitFunctionHeader(const MachineFunction &MF); - bool runOnMachineFunction(MachineFunction &F); + void EmitInstruction(const MachineInstr *MI); void getAnalysisUsage(AnalysisUsage &AU) const { AsmPrinter::getAnalysisUsage(AU); @@ -89,81 +78,11 @@ namespace { } // end of anonymous namespace -void MSP430AsmPrinter::emitFunctionHeader(const MachineFunction &MF) { - const Function *F = MF.getFunction(); - - OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM)); - - unsigned FnAlign = MF.getAlignment(); - EmitAlignment(FnAlign, F); - - switch (F->getLinkage()) { - default: llvm_unreachable("Unknown linkage type!"); - case Function::InternalLinkage: // Symbols default to internal. - case Function::PrivateLinkage: - case Function::LinkerPrivateLinkage: - break; - case Function::ExternalLinkage: - O << "\t.globl\t" << *CurrentFnSym << '\n'; - break; - case Function::LinkOnceAnyLinkage: - case Function::LinkOnceODRLinkage: - case Function::WeakAnyLinkage: - case Function::WeakODRLinkage: - O << "\t.weak\t" << *CurrentFnSym << '\n'; - break; - } - - printVisibility(CurrentFnSym, F->getVisibility()); - - O << "\t.type\t" << *CurrentFnSym << ",@function\n"; - O << *CurrentFnSym << ":\n"; -} - -bool MSP430AsmPrinter::runOnMachineFunction(MachineFunction &MF) { - SetupMachineFunction(MF); - O << "\n\n"; - - // Print the 'header' of function - emitFunctionHeader(MF); - - // Print out code for the function. - for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); - I != E; ++I) { - // Print a label for the basic block. - EmitBasicBlockStart(I); - - for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); - II != E; ++II) - // Print the assembly for the instruction. - printMachineInstruction(II); - } - - if (MAI->hasDotTypeDotSizeDirective()) - O << "\t.size\t" << *CurrentFnSym << ", .-" << *CurrentFnSym << '\n'; - - // We didn't modify anything - return false; -} - -void MSP430AsmPrinter::printMachineInstruction(const MachineInstr *MI) { - ++EmittedInsts; - - processDebugLoc(MI, true); - - printInstructionThroughMCStreamer(MI); - - if (VerboseAsm) - EmitComments(*MI); - O << '\n'; - - processDebugLoc(MI, false); -} - void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum, const char* Modifier) { const MachineOperand &MO = MI->getOperand(OpNum); switch (MO.getType()) { + default: assert(0 && "Not implemented yet!"); case MachineOperand::MO_Register: O << MSP430InstPrinter::getRegisterName(MO.getReg()); return; @@ -173,7 +92,7 @@ void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum, O << MO.getImm(); return; case MachineOperand::MO_MachineBasicBlock: - O << *GetMBBSymbol(MO.getMBB()->getNumber()); + O << *MO.getMBB()->getSymbol(OutContext); return; case MachineOperand::MO_GlobalAddress: { bool isMemOp = Modifier && !strcmp(Modifier, "mem"); @@ -196,8 +115,6 @@ void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum, O << MAI->getGlobalPrefix() << MO.getSymbolName(); return; } - default: - llvm_unreachable("Not implemented yet!"); } } @@ -226,30 +143,14 @@ void MSP430AsmPrinter::printSrcMemOperand(const MachineInstr *MI, int OpNum, } void MSP430AsmPrinter::printCCOperand(const MachineInstr *MI, int OpNum) { - unsigned CC = MI->getOperand(OpNum).getImm(); - - switch (CC) { - default: - llvm_unreachable("Unsupported CC code"); - break; - case MSP430CC::COND_E: - O << "eq"; - break; - case MSP430CC::COND_NE: - O << "ne"; - break; - case MSP430CC::COND_HS: - O << "hs"; - break; - case MSP430CC::COND_LO: - O << "lo"; - break; - case MSP430CC::COND_GE: - O << "ge"; - break; - case MSP430CC::COND_L: - O << 'l'; - break; + switch (MI->getOperand(OpNum).getImm()) { + default: assert(0 && "Unknown cond"); + case MSP430CC::COND_E: O << "eq"; break; + case MSP430CC::COND_NE: O << "ne"; break; + case MSP430CC::COND_HS: O << "hs"; break; + case MSP430CC::COND_LO: O << "lo"; break; + case MSP430CC::COND_GE: O << "ge"; break; + case MSP430CC::COND_L: O << 'l'; break; } } @@ -277,32 +178,12 @@ bool MSP430AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, } //===----------------------------------------------------------------------===// -void MSP430AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI){ - +void MSP430AsmPrinter::EmitInstruction(const MachineInstr *MI) { MSP430MCInstLower MCInstLowering(OutContext, *Mang, *this); - switch (MI->getOpcode()) { - case TargetInstrInfo::DBG_LABEL: - case TargetInstrInfo::EH_LABEL: - case TargetInstrInfo::GC_LABEL: - printLabel(MI); - return; - case TargetInstrInfo::KILL: - printKill(MI); - return; - case TargetInstrInfo::INLINEASM: - printInlineAsm(MI); - return; - case TargetInstrInfo::IMPLICIT_DEF: - printImplicitDef(MI); - return; - default: break; - } - MCInst TmpInst; MCInstLowering.Lower(MI, TmpInst); - - printMCInst(&TmpInst); + OutStreamer.EmitInstruction(TmpInst); } static MCInstPrinter *createMSP430MCInstPrinter(const Target &T, diff --git a/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp b/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp index a480307..f6565bd 100644 --- a/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp +++ b/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp @@ -25,7 +25,6 @@ using namespace llvm; // Include the auto-generated portion of the assembly writer. #define MachineInstr MCInst -#define NO_ASM_WRITER_BOILERPLATE #include "MSP430GenAsmWriter.inc" #undef MachineInstr diff --git a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp index e1f80b7..4eb7f3d 100644 --- a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp +++ b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp @@ -116,7 +116,7 @@ void MSP430MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { break; case MachineOperand::MO_MachineBasicBlock: MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( - Printer.GetMBBSymbol(MO.getMBB()->getNumber()), Ctx)); + MO.getMBB()->getSymbol(Printer.OutContext), Ctx)); break; case MachineOperand::MO_GlobalAddress: MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO)); diff --git a/lib/Target/MSP430/AsmPrinter/Makefile b/lib/Target/MSP430/AsmPrinter/Makefile index c8a44a1..4f340c6 100644 --- a/lib/Target/MSP430/AsmPrinter/Makefile +++ b/lib/Target/MSP430/AsmPrinter/Makefile @@ -8,7 +8,6 @@ ##===----------------------------------------------------------------------===## LEVEL = ../../../.. LIBRARYNAME = LLVMMSP430AsmPrinter -CXXFLAGS = -fno-rtti # Hack: we need to include 'main' MSP430 target directory to grab private headers CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp index 4eec757..a8c5e0a 100644 --- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp +++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp @@ -133,8 +133,7 @@ namespace { bool MatchWrapper(SDValue N, MSP430ISelAddressMode &AM); bool MatchAddressBase(SDValue N, MSP430ISelAddressMode &AM); - bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U, - SDNode *Root) const; + bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root) const; virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, @@ -336,8 +335,8 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, return false; } -bool MSP430DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, - SDNode *Root) const { +bool MSP430DAGToDAGISel::IsLegalToFold(SDValue N, SDNode *U, + SDNode *Root) const { if (OptLevel == CodeGenOpt::None) return false; /// RMW preprocessing creates the following code: @@ -364,11 +363,11 @@ bool MSP430DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, /// during preprocessing) to determine whether it's legal to introduce such /// "cycle" for a moment. DenseMap<SDNode*, SDNode*>::const_iterator I = RMWStores.find(Root); - if (I != RMWStores.end() && I->second == N) + if (I != RMWStores.end() && I->second == N.getNode()) return true; // Proceed to 'generic' cycle finder code - return SelectionDAGISel::IsLegalAndProfitableToFold(N, U, Root); + return SelectionDAGISel::IsLegalToFold(N, U, Root); } @@ -656,7 +655,7 @@ SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDNode *Op, unsigned Opc8, unsigned Opc16) { if (N1.getOpcode() == ISD::LOAD && N1.hasOneUse() && - IsLegalAndProfitableToFold(N1.getNode(), Op, Op)) { + IsLegalToFold(N1, Op, Op)) { LoadSDNode *LD = cast<LoadSDNode>(N1); if (!isValidIndexedLoad(LD)) return NULL; diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index b794911..7281b37 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -31,8 +31,8 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/ValueTypes.h" -#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -273,11 +273,13 @@ MSP430TargetLowering::LowerFormalArguments(SDValue Chain, SDValue MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool isTailCall, + bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) { + // MSP430 target does not yet support tail call optimization. + isTailCall = false; switch (CallConv) { default: @@ -369,7 +371,8 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain, //from this parameter SDValue FIN = DAG.getFrameIndex(FI, MVT::i16); InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, - PseudoSourceValue::getFixedStack(FI), 0)); + PseudoSourceValue::getFixedStack(FI), 0, + false, false, 0)); } } @@ -498,7 +501,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, PseudoSourceValue::getStack(), - VA.getLocMemOffset())); + VA.getLocMemOffset(), false, false, 0)); } } @@ -891,13 +894,13 @@ SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, getPointerTy(), FrameAddr, Offset), - NULL, 0); + NULL, 0, false, false, 0); } // Just load the return address. SDValue RetAddrFI = getReturnAddressFrameIndex(DAG); return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - RetAddrFI, NULL, 0); + RetAddrFI, NULL, 0, false, false, 0); } SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { @@ -909,7 +912,8 @@ SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, MSP430::FPW, VT); while (Depth--) - FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0); + FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0, + false, false, 0); return FrameAddr; } @@ -969,7 +973,7 @@ const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const { bool MSP430TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const { - if (!Ty1->isInteger() || !Ty2->isInteger()) + if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) return false; return (Ty1->getPrimitiveSizeInBits() > Ty2->getPrimitiveSizeInBits()); @@ -984,7 +988,7 @@ bool MSP430TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { bool MSP430TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const { // MSP430 implicitly zero-extends 8-bit results in 16-bit registers. - return 0 && Ty1->isInteger(8) && Ty2->isInteger(16); + return 0 && Ty1->isIntegerTy(8) && Ty2->isIntegerTy(16); } bool MSP430TargetLowering::isZExtFree(EVT VT1, EVT VT2) const { diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h index 6152a05..87a790b 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.h +++ b/lib/Target/MSP430/MSP430ISelLowering.h @@ -154,7 +154,7 @@ namespace llvm { SmallVectorImpl<SDValue> &InVals); virtual SDValue LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, bool isTailCall, + CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp index 9dc69e0..6372482 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -356,12 +356,12 @@ unsigned MSP430InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { switch (Desc.getOpcode()) { default: assert(0 && "Unknown instruction size!"); - case TargetInstrInfo::DBG_LABEL: - case TargetInstrInfo::EH_LABEL: - case TargetInstrInfo::IMPLICIT_DEF: - case TargetInstrInfo::KILL: + case TargetOpcode::DBG_LABEL: + case TargetOpcode::EH_LABEL: + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: return 0; - case TargetInstrInfo::INLINEASM: { + case TargetOpcode::INLINEASM: { const MachineFunction *MF = MI->getParent()->getParent(); const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo(); return TII.getInlineAsmLength(MI->getOperand(0).getSymbolName(), diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td index cd502cf..bb06f7b 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.td +++ b/lib/Target/MSP430/MSP430InstrInfo.td @@ -285,7 +285,7 @@ def MOV16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes, // up to 16 bits. def def8 : PatLeaf<(i8 GR8:$src), [{ return N->getOpcode() != ISD::TRUNCATE && - N->getOpcode() != TargetInstrInfo::EXTRACT_SUBREG && + N->getOpcode() != TargetOpcode::EXTRACT_SUBREG && N->getOpcode() != ISD::CopyFromReg; }]>; diff --git a/lib/Target/MSP430/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MSP430MCAsmInfo.cpp index 516eacb..cfb499d 100644 --- a/lib/Target/MSP430/MSP430MCAsmInfo.cpp +++ b/lib/Target/MSP430/MSP430MCAsmInfo.cpp @@ -17,7 +17,6 @@ using namespace llvm; MSP430MCAsmInfo::MSP430MCAsmInfo(const Target &T, const StringRef &TT) { PrivateGlobalPrefix = ".L"; WeakRefDirective ="\t.weak\t"; - SetDirective = "\t.set\t"; PCSymbol="."; CommentString = ";"; diff --git a/lib/Target/MSP430/Makefile b/lib/Target/MSP430/Makefile index 11195a4..b1f33d6 100644 --- a/lib/Target/MSP430/Makefile +++ b/lib/Target/MSP430/Makefile @@ -10,7 +10,6 @@ LEVEL = ../../.. LIBRARYNAME = LLVMMSP430CodeGen TARGET = MSP430 -CXXFLAGS = -fno-rtti # Make sure that tblgen is run, first thing. BUILT_SOURCES = MSP430GenRegisterInfo.h.inc MSP430GenRegisterNames.inc \ diff --git a/lib/Target/MSP430/TargetInfo/Makefile b/lib/Target/MSP430/TargetInfo/Makefile index d17fa7b..abb08f2 100644 --- a/lib/Target/MSP430/TargetInfo/Makefile +++ b/lib/Target/MSP430/TargetInfo/Makefile @@ -8,7 +8,6 @@ ##===----------------------------------------------------------------------===## LEVEL = ../../../.. LIBRARYNAME = LLVMMSP430Info -CXXFLAGS = -fno-rtti # Hack: we need to include 'main' target directory to grab private headers CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. diff --git a/lib/Target/Makefile b/lib/Target/Makefile index 281d58b..50a360f 100644 --- a/lib/Target/Makefile +++ b/lib/Target/Makefile @@ -10,7 +10,6 @@ LEVEL = ../.. LIBRARYNAME = LLVMTarget BUILD_ARCHIVE = 1 -CXXFLAGS = -fno-rtti # We include this early so we can access the value of TARGETS_TO_BUILD as the # value for PARALLEL_DIRS which must be set before Makefile.rules is included diff --git a/lib/Target/Mips/AsmPrinter/Makefile b/lib/Target/Mips/AsmPrinter/Makefile index aed801e..a2fecf4 100644 --- a/lib/Target/Mips/AsmPrinter/Makefile +++ b/lib/Target/Mips/AsmPrinter/Makefile @@ -9,7 +9,6 @@ LEVEL = ../../../.. LIBRARYNAME = LLVMMipsAsmPrinter -CXXFLAGS = -fno-rtti # Hack: we need to include 'main' Mips target directory to grab # private headers diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp index 9af9bd8..b8641c3 100644 --- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp @@ -37,7 +37,6 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegistry.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/CommandLine.h" @@ -46,15 +45,14 @@ #include <cctype> using namespace llvm; -STATISTIC(EmittedInsts, "Number of machine instrs printed"); - namespace { class MipsAsmPrinter : public AsmPrinter { const MipsSubtarget *Subtarget; public: explicit MipsAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, - const MCAsmInfo *T, bool V) - : AsmPrinter(O, TM, T, V) { + MCContext &Ctx, MCStreamer &Streamer, + const MCAsmInfo *T) + : AsmPrinter(O, TM, Ctx, Streamer, T) { Subtarget = &TM.getSubtarget<MipsSubtarget>(); } @@ -70,18 +68,22 @@ namespace { const char *Modifier = 0); void printFCCOperand(const MachineInstr *MI, int opNum, const char *Modifier = 0); - void printSavedRegsBitmask(MachineFunction &MF); + void printSavedRegsBitmask(); void printHex32(unsigned int Value); const char *emitCurrentABIString(); - void emitFunctionStart(MachineFunction &MF); - void emitFunctionEnd(MachineFunction &MF); - void emitFrameDirective(MachineFunction &MF); + void emitFrameDirective(); void printInstruction(const MachineInstr *MI); // autogenerated. + void EmitInstruction(const MachineInstr *MI) { + printInstruction(MI); + OutStreamer.AddBlankLine(); + } + virtual void EmitFunctionBodyStart(); + virtual void EmitFunctionBodyEnd(); static const char *getRegisterName(unsigned RegNo); - bool runOnMachineFunction(MachineFunction &F); + virtual void EmitFunctionEntryLabel(); void EmitStartOfAsmFile(Module &M); }; } // end of anonymous namespace @@ -125,18 +127,16 @@ namespace { // Create a bitmask with all callee saved registers for CPU or Floating Point // registers. For CPU registers consider RA, GP and FP for saving if necessary. -void MipsAsmPrinter:: -printSavedRegsBitmask(MachineFunction &MF) -{ +void MipsAsmPrinter::printSavedRegsBitmask() { const TargetRegisterInfo &RI = *TM.getRegisterInfo(); - MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + const MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>(); // CPU and FPU Saved Registers Bitmasks unsigned int CPUBitmask = 0; unsigned int FPUBitmask = 0; // Set the CPU and FPU Bitmasks - MachineFrameInfo *MFI = MF.getFrameInfo(); + const MachineFrameInfo *MFI = MF->getFrameInfo(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned RegNum = MipsRegisterInfo::getRegisterNumbering(CSI[i].getReg()); @@ -147,11 +147,11 @@ printSavedRegsBitmask(MachineFunction &MF) } // Return Address and Frame registers must also be set in CPUBitmask. - if (RI.hasFP(MF)) + if (RI.hasFP(*MF)) CPUBitmask |= (1 << MipsRegisterInfo:: - getRegisterNumbering(RI.getFrameRegister(MF))); + getRegisterNumbering(RI.getFrameRegister(*MF))); - if (MF.getFrameInfo()->hasCalls()) + if (MFI->hasCalls()) CPUBitmask |= (1 << MipsRegisterInfo:: getRegisterNumbering(RI.getRARegister())); @@ -178,12 +178,12 @@ printHex32(unsigned int Value) //===----------------------------------------------------------------------===// /// Frame Directive -void MipsAsmPrinter::emitFrameDirective(MachineFunction &MF) { +void MipsAsmPrinter::emitFrameDirective() { const TargetRegisterInfo &RI = *TM.getRegisterInfo(); - unsigned stackReg = RI.getFrameRegister(MF); + unsigned stackReg = RI.getFrameRegister(*MF); unsigned returnReg = RI.getRARegister(); - unsigned stackSize = MF.getFrameInfo()->getStackSize(); + unsigned stackSize = MF->getFrameInfo()->getStackSize(); O << "\t.frame\t" << '$' << LowercaseString(getRegisterName(stackReg)) @@ -207,96 +207,30 @@ const char *MipsAsmPrinter::emitCurrentABIString() { return NULL; } -/// Emit the directives used by GAS on the start of functions -void MipsAsmPrinter::emitFunctionStart(MachineFunction &MF) { - // Print out the label for the function. - const Function *F = MF.getFunction(); - OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM)); - - // 2 bits aligned - EmitAlignment(MF.getAlignment(), F); - - O << "\t.globl\t" << *CurrentFnSym << '\n'; +void MipsAsmPrinter::EmitFunctionEntryLabel() { O << "\t.ent\t" << *CurrentFnSym << '\n'; + OutStreamer.EmitLabel(CurrentFnSym); +} - printVisibility(CurrentFnSym, F->getVisibility()); - - if ((MAI->hasDotTypeDotSizeDirective()) && Subtarget->isLinux()) - O << "\t.type\t" << *CurrentFnSym << ", @function\n"; - - O << *CurrentFnSym << ":\n"; - - emitFrameDirective(MF); - printSavedRegsBitmask(MF); - - O << '\n'; +/// EmitFunctionBodyStart - Targets can override this to emit stuff before +/// the first basic block in the function. +void MipsAsmPrinter::EmitFunctionBodyStart() { + emitFrameDirective(); + printSavedRegsBitmask(); } -/// Emit the directives used by GAS on the end of functions -void MipsAsmPrinter::emitFunctionEnd(MachineFunction &MF) { +/// EmitFunctionBodyEnd - Targets can override this to emit stuff after +/// the last basic block in the function. +void MipsAsmPrinter::EmitFunctionBodyEnd() { // There are instruction for this macros, but they must // always be at the function end, and we can't emit and // break with BB logic. O << "\t.set\tmacro\n"; O << "\t.set\treorder\n"; - + O << "\t.end\t" << *CurrentFnSym << '\n'; - if (MAI->hasDotTypeDotSizeDirective() && !Subtarget->isLinux()) - O << "\t.size\t" << *CurrentFnSym << ", .-" << *CurrentFnSym << '\n'; } -/// runOnMachineFunction - This uses the printMachineInstruction() -/// method to print assembly for each instruction. -bool MipsAsmPrinter::runOnMachineFunction(MachineFunction &MF) { - this->MF = &MF; - - SetupMachineFunction(MF); - - // Print out constants referenced by the function - EmitConstantPool(MF.getConstantPool()); - - // Print out jump tables referenced by the function - EmitJumpTableInfo(MF.getJumpTableInfo(), MF); - - O << "\n\n"; - - // Emit the function start directives - emitFunctionStart(MF); - - // Print out code for the function. - for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); - I != E; ++I) { - - // Print a label for the basic block. - if (I != MF.begin()) { - EmitBasicBlockStart(I); - } - - for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); - II != E; ++II) { - processDebugLoc(II, true); - - // Print the assembly for the instruction. - printInstruction(II); - - if (VerboseAsm) - EmitComments(*II); - O << '\n'; - - processDebugLoc(II, false); - ++EmittedInsts; - } - - // Each Basic Block is separated by a newline - O << '\n'; - } - - // Emit function end directives - emitFunctionEnd(MF); - - // We didn't modify anything. - return false; -} // Print out an operand for an inline asm expression. bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, @@ -343,7 +277,7 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum) { break; case MachineOperand::MO_MachineBasicBlock: - O << *GetMBBSymbol(MO.getMBB()->getNumber()); + O << *MO.getMBB()->getSymbol(OutContext); return; case MachineOperand::MO_GlobalAddress: diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile index 4e4d874..2ed8d77 100644 --- a/lib/Target/Mips/Makefile +++ b/lib/Target/Mips/Makefile @@ -10,7 +10,6 @@ LEVEL = ../../.. LIBRARYNAME = LLVMMipsCodeGen TARGET = Mips -CXXFLAGS = -fno-rtti # Make sure that tblgen is run, first thing. BUILT_SOURCES = MipsGenRegisterInfo.h.inc MipsGenRegisterNames.inc \ diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index e3a45d2..f1d4a67 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -245,7 +245,7 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) { // lwc $f1, X+4($3) SDNode *LD0 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32, MVT::Other, Offset0, Base, Chain); - SDValue Undef = SDValue(CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF, + SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, NVT), 0); SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::SUBREG_FPEVEN, dl, MVT::f64, Undef, SDValue(LD0, 0)); @@ -426,7 +426,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { SDValue InFlag = SDValue(MulNode, 0); - if (MulOp == ISD::MUL) + if (Opcode == ISD::MUL) return CurDAG->getMachineNode(Mips::MFLO, dl, MVT::i32, InFlag); else return CurDAG->getMachineNode(Mips::MFHI, dl, MVT::i32, InFlag); @@ -464,8 +464,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, Mips::ZERO, MVT::i32); SDValue Undef = SDValue( - CurDAG->getMachineNode( - TargetInstrInfo::IMPLICIT_DEF, dl, MVT::f64), 0); + CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::f64), 0); SDNode *MTC = CurDAG->getMachineNode(Mips::MTC1, dl, MVT::f32, Zero); SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::SUBREG_FPEVEN, dl, MVT::f64, Undef, SDValue(MTC, 0)); diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index ced8b93..584b887 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -60,9 +60,6 @@ MipsTargetLowering(MipsTargetMachine &TM) // setcc operations results (slt, sgt, ...). setBooleanContents(ZeroOrOneBooleanContent); - // JumpTable targets must use GOT when using PIC_ - setUsesGlobalOffsetTable(true); - // Set up the register classes addRegisterClass(MVT::i32, Mips::CPURegsRegisterClass); addRegisterClass(MVT::f32, Mips::FGR32RegisterClass); @@ -100,6 +97,8 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::BRCOND, MVT::Other, Custom); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + setOperationAction(ISD::VASTART, MVT::Other, Custom); + // We custom lower AND/OR to handle the case where the DAG contain 'ands/ors' // with operands comming from setcc fp comparions. This is necessary since @@ -182,6 +181,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) case ISD::OR: return LowerANDOR(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); + case ISD::VASTART: return LowerVASTART(Op, DAG); } return SDValue(); } @@ -510,7 +510,8 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) { SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32, 0, MipsII::MO_GOT); SDValue ResNode = DAG.getLoad(MVT::i32, dl, - DAG.getEntryNode(), GA, NULL, 0); + DAG.getEntryNode(), GA, NULL, 0, + false, false, 0); // On functions and global targets not internal linked only // a load from got/GP is necessary for PIC to work. if (!GV->hasLocalLinkage() || isa<Function>(GV)) @@ -549,7 +550,8 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) SDValue Ops[] = { JTI }; HiPart = DAG.getNode(MipsISD::Hi, dl, DAG.getVTList(MVT::i32), Ops, 1); } else // Emit Load from Global Pointer - HiPart = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), JTI, NULL, 0); + HiPart = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), JTI, NULL, 0, + false, false, 0); SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, JTI); ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo); @@ -586,7 +588,7 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), N->getOffset(), MipsII::MO_GOT); SDValue Load = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), - CP, NULL, 0); + CP, NULL, 0, false, false, 0); SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CP); ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo); } @@ -594,6 +596,17 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) return ResNode; } +SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + SDValue FI = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); + + // vastart just stores the address of the VarArgsFrameIndex slot into the + // memory location argument. + const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); + return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1), SV, 0, + false, false, 0); +} + //===----------------------------------------------------------------------===// // Calling Convention Implementation //===----------------------------------------------------------------------===// @@ -679,21 +692,86 @@ static bool CC_MipsO32(unsigned ValNo, EVT ValVT, return false; // CC must always match } +static bool CC_MipsO32_VarArgs(unsigned ValNo, EVT ValVT, + EVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + + static const unsigned IntRegsSize=4; + + static const unsigned IntRegs[] = { + Mips::A0, Mips::A1, Mips::A2, Mips::A3 + }; + + // Promote i8 and i16 + if (LocVT == MVT::i8 || LocVT == MVT::i16) { + LocVT = MVT::i32; + if (ArgFlags.isSExt()) + LocInfo = CCValAssign::SExt; + else if (ArgFlags.isZExt()) + LocInfo = CCValAssign::ZExt; + else + LocInfo = CCValAssign::AExt; + } + + if (ValVT == MVT::i32 || ValVT == MVT::f32) { + if (unsigned Reg = State.AllocateReg(IntRegs, IntRegsSize)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, MVT::i32, LocInfo)); + return false; + } + unsigned Off = State.AllocateStack(4, 4); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Off, LocVT, LocInfo)); + return false; + } + + unsigned UnallocIntReg = State.getFirstUnallocated(IntRegs, IntRegsSize); + if (ValVT == MVT::f64) { + if (IntRegs[UnallocIntReg] == (unsigned (Mips::A1))) { + // A1 can't be used anymore, because 64 bit arguments + // must be aligned when copied back to the caller stack + State.AllocateReg(IntRegs, IntRegsSize); + UnallocIntReg++; + } + + if (IntRegs[UnallocIntReg] == (unsigned (Mips::A0)) || + IntRegs[UnallocIntReg] == (unsigned (Mips::A2))) { + unsigned Reg = State.AllocateReg(IntRegs, IntRegsSize); + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, MVT::i32, LocInfo)); + // Shadow the next register so it can be used + // later to get the other 32bit part. + State.AllocateReg(IntRegs, IntRegsSize); + return false; + } + + // Register is shadowed to preserve alignment, and the + // argument goes to a stack location. + if (UnallocIntReg != IntRegsSize) + State.AllocateReg(IntRegs, IntRegsSize); + + unsigned Off = State.AllocateStack(8, 8); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Off, LocVT, LocInfo)); + return false; + } + + return true; // CC didn't match +} + //===----------------------------------------------------------------------===// // Call Calling Convention Implementation //===----------------------------------------------------------------------===// /// LowerCall - functions arguments are copied from virtual regs to /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. -/// TODO: isVarArg, isTailCall. +/// TODO: isTailCall. SDValue MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool isTailCall, + bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) { + // MIPs target does not yet support tail call optimization. + isTailCall = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -709,7 +787,8 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (Subtarget->isABI_O32()) { int VTsize = EVT(MVT::i32).getSizeInBits()/8; MFI->CreateFixedObject(VTsize, (VTsize*3), true, false); - CCInfo.AnalyzeCallOperands(Outs, CC_MipsO32); + CCInfo.AnalyzeCallOperands(Outs, + isVarArg ? CC_MipsO32_VarArgs : CC_MipsO32); } else CCInfo.AnalyzeCallOperands(Outs, CC_Mips); @@ -783,7 +862,8 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // emit ISD::STORE whichs stores the // parameter value to a stack Location - MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0)); + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, + false, false, 0)); } // Transform all store nodes into one single node because all store @@ -835,11 +915,6 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Chain = DAG.getNode(MipsISD::JmpLink, dl, NodeTys, &Ops[0], Ops.size()); InFlag = Chain.getValue(1); - // Create the CALLSEQ_END node. - Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), - DAG.getIntPtrConstant(0, true), InFlag); - InFlag = Chain.getValue(1); - // Create a stack location to hold GP when PIC is used. This stack // location is used on function prologue to save GP and also after all // emited CALL's to restore GP. @@ -862,13 +937,19 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Reload GP value. FI = MipsFI->getGPFI(); SDValue FIN = DAG.getFrameIndex(FI,getPointerTy()); - SDValue GPLoad = DAG.getLoad(MVT::i32, dl, Chain, FIN, NULL, 0); + SDValue GPLoad = DAG.getLoad(MVT::i32, dl, Chain, FIN, NULL, 0, + false, false, 0); Chain = GPLoad.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, DAG.getRegister(Mips::GP, MVT::i32), GPLoad, SDValue(0,0)); InFlag = Chain.getValue(1); } + // Create the CALLSEQ_END node. + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), + DAG.getIntPtrConstant(0, true), InFlag); + InFlag = Chain.getValue(1); + // Handle result values, copying them out of physregs into vregs that we // return. return LowerCallResult(Chain, InFlag, CallConv, isVarArg, @@ -906,23 +987,28 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, // Formal Arguments Calling Convention Implementation //===----------------------------------------------------------------------===// -/// LowerFormalArguments - transform physical registers into -/// virtual registers and generate load operations for -/// arguments places on the stack. -/// TODO: isVarArg +/// LowerFormalArguments - transform physical registers into virtual registers +/// and generate load operations for arguments places on the stack. SDValue MipsTargetLowering::LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::InputArg> - &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> + &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); unsigned StackReg = MF.getTarget().getRegisterInfo()->getFrameRegister(MF); + VarArgsFrameIndex = 0; + + // Used with vargs to acumulate store chains. + std::vector<SDValue> OutChains; + + // Keep track of the last register used for arguments + unsigned ArgRegEnd = 0; // Assign locations to all of the incoming arguments. SmallVector<CCValAssign, 16> ArgLocs; @@ -930,7 +1016,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, ArgLocs, *DAG.getContext()); if (Subtarget->isABI_O32()) - CCInfo.AnalyzeFormalArguments(Ins, CC_MipsO32); + CCInfo.AnalyzeFormalArguments(Ins, + isVarArg ? CC_MipsO32_VarArgs : CC_MipsO32); else CCInfo.AnalyzeFormalArguments(Ins, CC_Mips); @@ -944,6 +1031,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, // Arguments stored on registers if (VA.isRegLoc()) { EVT RegVT = VA.getLocVT(); + ArgRegEnd = VA.getLocReg(); TargetRegisterClass *RC = 0; if (RegVT == MVT::i32) @@ -954,11 +1042,11 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, if (!Subtarget->isSingleFloat()) RC = Mips::AFGR64RegisterClass; } else - llvm_unreachable("RegVT not supported by LowerFormalArguments Lowering"); + llvm_unreachable("RegVT not supported by FormalArguments Lowering"); // Transform the arguments stored on // physical registers into virtual ones - unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); + unsigned Reg = AddLiveIn(DAG.getMachineFunction(), ArgRegEnd, RC); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); // If this is an 8 or 16-bit value, it has been passed promoted @@ -991,34 +1079,13 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, } InVals.push_back(ArgValue); - - // To meet ABI, when VARARGS are passed on registers, the registers - // must have their values written to the caller stack frame. - if ((isVarArg) && (Subtarget->isABI_O32())) { - if (StackPtr.getNode() == 0) - StackPtr = DAG.getRegister(StackReg, getPointerTy()); - - // The stack pointer offset is relative to the caller stack frame. - // Since the real stack size is unknown here, a negative SPOffset - // is used so there's a way to adjust these offsets when the stack - // size get known (on EliminateFrameIndex). A dummy SPOffset is - // used instead of a direct negative address (which is recorded to - // be used on emitPrologue) to avoid mis-calc of the first stack - // offset on PEI::calculateFrameObjectOffsets. - // Arguments are always 32-bit. - int FI = MFI->CreateFixedObject(4, 0, true, false); - MipsFI->recordStoreVarArgsFI(FI, -(4+(i*4))); - SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy()); - - // emit ISD::STORE whichs stores the - // parameter value to a stack Location - InVals.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, NULL, 0)); - } - } else { // VA.isRegLoc() // sanity check assert(VA.isMemLoc()); + + // The last argument is not a register anymore + ArgRegEnd = 0; // The stack pointer offset is relative to the caller stack frame. // Since the real stack size is unknown here, a negative SPOffset @@ -1035,7 +1102,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, // Create load nodes to retrieve arguments from the stack SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); - InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0)); + InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0, + false, false, 0)); } } @@ -1052,6 +1120,42 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain); } + // To meet ABI, when VARARGS are passed on registers, the registers + // must have their values written to the caller stack frame. If the last + // argument was placed in the stack, there's no need to save any register. + if ((isVarArg) && (Subtarget->isABI_O32() && ArgRegEnd)) { + if (StackPtr.getNode() == 0) + StackPtr = DAG.getRegister(StackReg, getPointerTy()); + + // The last register argument that must be saved is Mips::A3 + TargetRegisterClass *RC = Mips::CPURegsRegisterClass; + unsigned StackLoc = ArgLocs.size()-1; + + for (++ArgRegEnd; ArgRegEnd <= Mips::A3; ++ArgRegEnd, ++StackLoc) { + unsigned Reg = AddLiveIn(DAG.getMachineFunction(), ArgRegEnd, RC); + SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, MVT::i32); + + int FI = MFI->CreateFixedObject(4, 0, true, false); + MipsFI->recordStoreVarArgsFI(FI, -(4+(StackLoc*4))); + SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy()); + OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, NULL, 0, + false, false, 0)); + + // Record the frame index of the first variable argument + // which is a value necessary to VASTART. + if (!VarArgsFrameIndex) + VarArgsFrameIndex = FI; + } + } + + // All stores are grouped in one node to allow the matching between + // the size of Ins and InVals. This only happens when on varg functions + if (!OutChains.empty()) { + OutChains.push_back(Chain); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &OutChains[0], OutChains.size()); + } + return Chain; } diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index cacf4b5..7256617 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -68,6 +68,8 @@ namespace llvm { //===--------------------------------------------------------------------===// class MipsTargetLowering : public TargetLowering { + int VarArgsFrameIndex; // FrameIndex for start of varargs area. + public: explicit MipsTargetLowering(MipsTargetMachine &TM); @@ -107,6 +109,7 @@ namespace llvm { SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG); SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG); SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG); + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG); virtual SDValue LowerFormalArguments(SDValue Chain, @@ -118,7 +121,7 @@ namespace llvm { virtual SDValue LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool isTailCall, + bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index ce89cfd..fa4518d 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -113,7 +113,6 @@ let ft = 0 in { defm ROUND_W : FFR1_1<0b001100, "round.w">; defm TRUNC_W : FFR1_1<0b001101, "trunc.w">; defm CVTW : FFR1_1<0b100100, "cvt.w">; - defm FMOV : FFR1_1<0b000110, "mov">; defm FABS : FFR1_2<0b000101, "abs", fabs>; defm FNEG : FFR1_2<0b000111, "neg", fneg>; @@ -173,6 +172,11 @@ let fd = 0 in { "mtc1 $rt, $fs", []>; } +def FMOV_S32 : FFR<0x11, 0b000110, 0x0, (outs FGR32:$fd), (ins FGR32:$fs), + "mov.s $fd, $fs", []>; +def FMOV_D32 : FFR<0x11, 0b000110, 0x1, (outs AFGR64:$fd), (ins AFGR64:$fs), + "mov.d $fd, $fs", []>; + /// Floating Point Memory Instructions let Predicates = [IsNotSingleFloat, IsNotMipsI] in { def LDC1 : FFI<0b110101, (outs AFGR64:$ft), (ins mem:$addr), diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index e67bcbf..f16a805 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -120,7 +120,7 @@ def immZExt5 : PatLeaf<(imm), [{ // Mips Address Mode! SDNode frameindex could possibily be a match // since load and store instructions from stack used it. -def addr : ComplexPattern<i32, 2, "SelectAddr", [frameindex], []>; +def addr : ComplexPattern<iPTR, 2, "SelectAddr", [frameindex], []>; //===----------------------------------------------------------------------===// // Instructions specific format diff --git a/lib/Target/Mips/MipsMCAsmInfo.cpp b/lib/Target/Mips/MipsMCAsmInfo.cpp index 60ef1c9..89e3e11 100644 --- a/lib/Target/Mips/MipsMCAsmInfo.cpp +++ b/lib/Target/Mips/MipsMCAsmInfo.cpp @@ -16,12 +16,12 @@ using namespace llvm; MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, const StringRef &TT) { AlignmentIsInBytes = false; - COMMDirectiveTakesAlignment = true; Data16bitsDirective = "\t.half\t"; Data32bitsDirective = "\t.word\t"; Data64bitsDirective = 0; PrivateGlobalPrefix = "$"; CommentString = "#"; ZeroDirective = "\t.space\t"; - PICJumpTableDirective = "\t.gpword\t"; + GPRel32Directive = "\t.gpword\t"; + HasSetDirective = false; } diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index 80fd917..f923bed 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -223,6 +223,8 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); + unsigned RegSize = Subtarget.isGP32bit() ? 4 : 8; + bool HasGP = MipsFI->needGPSaveRestore(); // Min and Max CSI FrameIndex. int MinCSFI = -1, MaxCSFI = -1; @@ -248,6 +250,9 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const for (unsigned i = 0, e = CSI.size(); i != e; ++i) CalleeSavedAreaSize += MFI->getObjectAlignment(CSI[i].getFrameIdx()); + unsigned StackOffset = HasGP ? (MipsFI->getGPStackOffset()+RegSize) + : (Subtarget.isABI_O32() ? 16 : 0); + // Adjust local variables. They should come on the stack right // after the arguments. int LastOffsetFI = -1; @@ -256,7 +261,8 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const continue; if (MFI->isDeadObjectIndex(i)) continue; - unsigned Offset = MFI->getObjectOffset(i) - CalleeSavedAreaSize; + unsigned Offset = + StackOffset + MFI->getObjectOffset(i) - CalleeSavedAreaSize; if (LastOffsetFI == -1) LastOffsetFI = i; if (Offset > MFI->getObjectOffset(LastOffsetFI)) @@ -265,11 +271,8 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const } // Adjust CPU Callee Saved Registers Area. Registers RA and FP must - // be saved in this CPU Area there is the need. This whole Area must - // be aligned to the default Stack Alignment requirements. - unsigned StackOffset = 0; - unsigned RegSize = Subtarget.isGP32bit() ? 4 : 8; - + // be saved in this CPU Area. This whole area must be aligned to the + // default Stack Alignment requirements. if (LastOffsetFI >= 0) StackOffset = MFI->getObjectOffset(LastOffsetFI)+ MFI->getObjectSize(LastOffsetFI); @@ -283,21 +286,26 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const StackOffset += MFI->getObjectAlignment(CSI[i].getFrameIdx()); } - if (hasFP(MF)) { + // Stack locations for FP and RA. If only one of them is used, + // the space must be allocated for both, otherwise no space at all. + if (hasFP(MF) || MFI->hasCalls()) { + // FP stack location MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true), StackOffset); MipsFI->setFPStackOffset(StackOffset); TopCPUSavedRegOff = StackOffset; StackOffset += RegSize; - } - if (MFI->hasCalls()) { + // SP stack location MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true), StackOffset); MipsFI->setRAStackOffset(StackOffset); - TopCPUSavedRegOff = StackOffset; StackOffset += RegSize; + + if (MFI->hasCalls()) + TopCPUSavedRegOff += RegSize; } + StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign); // Adjust FPU Callee Saved Registers Area. This Area must be diff --git a/lib/Target/Mips/MipsTargetObjectFile.h b/lib/Target/Mips/MipsTargetObjectFile.h index 32e0436..237b160 100644 --- a/lib/Target/Mips/MipsTargetObjectFile.h +++ b/lib/Target/Mips/MipsTargetObjectFile.h @@ -10,7 +10,7 @@ #ifndef LLVM_TARGET_MIPS_TARGETOBJECTFILE_H #define LLVM_TARGET_MIPS_TARGETOBJECTFILE_H -#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" namespace llvm { diff --git a/lib/Target/Mips/TargetInfo/Makefile b/lib/Target/Mips/TargetInfo/Makefile index f27d49e..32f4e16 100644 --- a/lib/Target/Mips/TargetInfo/Makefile +++ b/lib/Target/Mips/TargetInfo/Makefile @@ -8,7 +8,6 @@ ##===----------------------------------------------------------------------===## LEVEL = ../../../.. LIBRARYNAME = LLVMMipsInfo -CXXFLAGS = -fno-rtti # Hack: we need to include 'main' target directory to grab private headers CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. diff --git a/lib/Target/PIC16/AsmPrinter/Makefile b/lib/Target/PIC16/AsmPrinter/Makefile index 27c4045..f4db57e 100644 --- a/lib/Target/PIC16/AsmPrinter/Makefile +++ b/lib/Target/PIC16/AsmPrinter/Makefile @@ -8,7 +8,6 @@ ##===----------------------------------------------------------------------===## LEVEL = ../../../.. LIBRARYNAME = LLVMPIC16AsmPrinter -CXXFLAGS = -fno-rtti # Hack: we need to include 'main' pic16 target directory to grab private headers CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp index 0463596..b015edd 100644 --- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp +++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp @@ -35,21 +35,17 @@ using namespace llvm; #include "PIC16GenAsmWriter.inc" PIC16AsmPrinter::PIC16AsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, - const MCAsmInfo *T, bool V) -: AsmPrinter(O, TM, T, V), DbgInfo(O, T) { + MCContext &Ctx, MCStreamer &Streamer, + const MCAsmInfo *T) +: AsmPrinter(O, TM, Ctx, Streamer, T), DbgInfo(O, T) { PTLI = static_cast<PIC16TargetLowering*>(TM.getTargetLowering()); PMAI = static_cast<const PIC16MCAsmInfo*>(T); PTOF = (PIC16TargetObjectFile *)&PTLI->getObjFileLowering(); } -bool PIC16AsmPrinter::printMachineInstruction(const MachineInstr *MI) { - processDebugLoc(MI, true); +void PIC16AsmPrinter::EmitInstruction(const MachineInstr *MI) { printInstruction(MI); - if (VerboseAsm) - EmitComments(*MI); - O << '\n'; - processDebugLoc(MI, false); - return true; + OutStreamer.AddBlankLine(); } static int getFunctionColor(const Function *F) { @@ -96,8 +92,6 @@ void PIC16AsmPrinter::ColorAutoSection(const Function *F) { /// directive and file begin debug directive (if required) for the function. /// bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) { - this->MF = &MF; - // This calls the base class function required to be called at beginning // of runOnMachineFunction. SetupMachineFunction(MF); @@ -112,8 +106,9 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) { DbgInfo.BeginFunction(MF); // Now emit the instructions of function in its code section. - const MCSection *fCodeSection - = getObjFileLowering().SectionForCode(CurrentFnSym->getName()); + const MCSection *fCodeSection = + getObjFileLowering().SectionForCode(CurrentFnSym->getName(), + PAN::isISR(F->getSection())); // Start the Code Section. O << "\n"; @@ -149,7 +144,7 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) { } // Print the assembly for the instruction. - printMachineInstruction(II); + EmitInstruction(II); } } @@ -211,7 +206,7 @@ void PIC16AsmPrinter::printOperand(const MachineInstr *MI, int opNum) { break; } case MachineOperand::MO_MachineBasicBlock: - O << *GetMBBSymbol(MO.getMBB()->getNumber()); + O << *MO.getMBB()->getSymbol(OutContext); return; default: diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h index 74ab72c..77b6e63 100644 --- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h +++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h @@ -31,7 +31,8 @@ namespace llvm { class VISIBILITY_HIDDEN PIC16AsmPrinter : public AsmPrinter { public: explicit PIC16AsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, - const MCAsmInfo *T, bool V); + MCContext &Ctx, MCStreamer &Streamer, + const MCAsmInfo *T); private: virtual const char *getPassName() const { return "PIC16 Assembly Printer"; @@ -47,7 +48,7 @@ namespace llvm { void printInstruction(const MachineInstr *MI); // definition autogenerated. static const char *getRegisterName(unsigned RegNo); - bool printMachineInstruction(const MachineInstr *MI); + void EmitInstruction(const MachineInstr *MI); void EmitFunctionDecls (Module &M); void EmitUndefinedVars (Module &M); void EmitDefinedVars (Module &M); diff --git a/lib/Target/PIC16/Makefile b/lib/Target/PIC16/Makefile index a1dbde5..9e784d1 100644 --- a/lib/Target/PIC16/Makefile +++ b/lib/Target/PIC16/Makefile @@ -10,7 +10,6 @@ LEVEL = ../../.. LIBRARYNAME = LLVMPIC16CodeGen TARGET = PIC16 -CXXFLAGS = -fno-rtti # Make sure that tblgen is run, first thing. BUILT_SOURCES = PIC16GenRegisterInfo.h.inc PIC16GenRegisterNames.inc \ diff --git a/lib/Target/PIC16/PIC16.h b/lib/Target/PIC16/PIC16.h index e46c9b2..8d067de 100644 --- a/lib/Target/PIC16/PIC16.h +++ b/lib/Target/PIC16/PIC16.h @@ -55,9 +55,10 @@ namespace PIC16CC { // External symbol names require memory to live till the program end. // So we have to allocate it and keep. + // FIXME: Don't leak the allocated strings. inline static const char *createESName (const std::string &name) { char *tmpName = new char[name.size() + 1]; - strcpy (tmpName, name.c_str()); + memcpy(tmpName, name.c_str(), name.size() + 1); return tmpName; } diff --git a/lib/Target/PIC16/PIC16ABINames.h b/lib/Target/PIC16/PIC16ABINames.h index e18ddf1..b0b9318 100644 --- a/lib/Target/PIC16/PIC16ABINames.h +++ b/lib/Target/PIC16/PIC16ABINames.h @@ -325,6 +325,19 @@ namespace llvm { return o.str(); } + + // Return true if the current function is an ISR + inline static bool isISR(const std::string SectName) { + if (SectName.find("interrupt") != std::string::npos) + return true; + + return false; + } + + // Return the address for ISR starts in rom. + inline static std::string getISRAddr(void) { + return "0x4"; + } }; // class PAN. } // end namespace llvm; diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp index 8368a3c..c517b1b 100644 --- a/lib/Target/PIC16/PIC16DebugInfo.cpp +++ b/lib/Target/PIC16/PIC16DebugInfo.cpp @@ -68,7 +68,7 @@ void PIC16DbgInfo::PopulateDerivedTypeInfo (DIType Ty, unsigned short &TypeNo, TypeNo = TypeNo << PIC16Dbg::S_DERIVED; } - // We also need to encode the the information about the base type of + // We also need to encode the information about the base type of // pointer in TypeNo. DIType BaseType = DIDerivedType(Ty.getNode()).getTypeDerivedFrom(); PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TagName); diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp index 9f093e8..d2fc8db 100644 --- a/lib/Target/PIC16/PIC16ISelLowering.cpp +++ b/lib/Target/PIC16/PIC16ISelLowering.cpp @@ -622,12 +622,12 @@ SDValue PIC16TargetLowering::ExpandStore(SDNode *N, SelectionDAG &DAG) { ChainHi = Chain.getOperand(1); } SDValue Store1 = DAG.getStore(ChainLo, dl, SrcLo, Ptr, NULL, - 0 + StoreOffset); + 0 + StoreOffset, false, false, 0); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(4, Ptr.getValueType())); SDValue Store2 = DAG.getStore(ChainHi, dl, SrcHi, Ptr, NULL, - 1 + StoreOffset); + 1 + StoreOffset, false, false, 0); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); @@ -1355,11 +1355,13 @@ GetDataAddress(DebugLoc dl, SDValue Callee, SDValue &Chain, SDValue PIC16TargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool isTailCall, + bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) { + // PIC16 target does not yet support tail call optimization. + isTailCall = false; assert(Callee.getValueType() == MVT::i16 && "Don't know how to legalize this call node!!!"); @@ -1511,8 +1513,7 @@ bool PIC16TargetLowering::NeedToConvertToMemOp(SDValue Op, unsigned &MemOp, // Direct load operands are folded in binary operations. But before folding // verify if this folding is legal. Fold only if it is legal otherwise // convert this direct load to a separate memory operation. - if(ISel->IsLegalAndProfitableToFold(Op.getOperand(0).getNode(), - Op.getNode(), Op.getNode())) + if(ISel->IsLegalToFold(Op.getOperand(0), Op.getNode(), Op.getNode())) return false; else MemOp = 0; diff --git a/lib/Target/PIC16/PIC16ISelLowering.h b/lib/Target/PIC16/PIC16ISelLowering.h index afdd4b4..de14520 100644 --- a/lib/Target/PIC16/PIC16ISelLowering.h +++ b/lib/Target/PIC16/PIC16ISelLowering.h @@ -143,7 +143,7 @@ namespace llvm { virtual SDValue LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, bool isTailCall, + CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, diff --git a/lib/Target/PIC16/PIC16MCAsmInfo.cpp b/lib/Target/PIC16/PIC16MCAsmInfo.cpp index 827315e..b080542 100644 --- a/lib/Target/PIC16/PIC16MCAsmInfo.cpp +++ b/lib/Target/PIC16/PIC16MCAsmInfo.cpp @@ -37,7 +37,7 @@ PIC16MCAsmInfo::PIC16MCAsmInfo(const Target &T, const StringRef &TT) { RomData8bitsDirective = " dw "; RomData16bitsDirective = " rom_di "; RomData32bitsDirective = " rom_dl "; - + HasSetDirective = false; // Set it to false because we weed to generate c file name and not bc file // name. diff --git a/lib/Target/PIC16/PIC16MemSelOpt.cpp b/lib/Target/PIC16/PIC16MemSelOpt.cpp index cc71b04..ab81ed1 100644 --- a/lib/Target/PIC16/PIC16MemSelOpt.cpp +++ b/lib/Target/PIC16/PIC16MemSelOpt.cpp @@ -59,6 +59,7 @@ namespace { const TargetInstrInfo *TII; // Machine instruction info. MachineBasicBlock *MBB; // Current basic block std::string CurBank; + int PageChanged; }; char MemSelOpt::ID = 0; @@ -93,10 +94,56 @@ bool MemSelOpt::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { // Let us assume that when entering a basic block now bank is selected. // Ideally we should look at the predecessors for this information. CurBank=""; + PageChanged=0; - for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) { + MachineBasicBlock::iterator I; + for (I = BB.begin(); I != BB.end(); ++I) { Changed |= processInstruction(I); + + // if the page has changed insert a page sel before + // any instruction that needs one + if (PageChanged == 1) + { + // Restore the page if it was changed, before leaving the basic block, + // because it may be required by the goto terminator or the fall thru + // basic blcok. + // If the terminator is return, we don't need to restore since there + // is no goto or fall thru basic block. + if ((I->getOpcode() == PIC16::sublw_3) || //macro has goto + (I->getOpcode() == PIC16::sublw_6) || //macro has goto + (I->getOpcode() == PIC16::addlwc) || //macro has goto + (TII->get(I->getOpcode()).isBranch())) + { + DebugLoc dl = I->getDebugLoc(); + BuildMI(*MBB, I, dl, TII->get(PIC16::pagesel)).addExternalSymbol("$"); + Changed = true; + PageChanged = 0; + } + } } + + // The basic block is over, but if we did not find any goto yet, + // we haven't restored the page. + // Restore the page if it was changed, before leaving the basic block, + // because it may be required by fall thru basic blcok. + // If the terminator is return, we don't need to restore since there + // is fall thru basic block. + if (PageChanged == 1) { + // save the end pointer before we move back to last insn. + MachineBasicBlock::iterator J = I; + I--; + const TargetInstrDesc &TID = TII->get(I->getOpcode()); + if (! TID.isReturn()) + { + DebugLoc dl = I->getDebugLoc(); + BuildMI(*MBB, J, dl, + TII->get(PIC16::pagesel)).addExternalSymbol("$"); + Changed = true; + PageChanged = 0; + } + } + + return Changed; } @@ -112,42 +159,74 @@ bool MemSelOpt::processInstruction(MachineInstr *MI) { if (!(TID.isBranch() || TID.isCall() || TID.mayLoad() || TID.mayStore())) return false; + // The first thing we should do is that record if banksel/pagesel are + // changed in an unknown way. This can happend via any type of call. + // We do it here first before scanning of MemOp / BBOp as the indirect + // call insns do not have any operands, but they still may change bank/page. + if (TID.isCall()) { + // Record that we have changed the page, so that we can restore it + // before basic block ends. + // We require to signal that a page anc bank change happened even for + // indirect calls. + PageChanged = 1; + + // When a call is made, there may be banksel for variables in callee. + // Hence the banksel in caller needs to be reset. + CurBank = ""; + } + // Scan for the memory address operand. // FIXME: Should we use standard interfaces like memoperands_iterator, // hasMemOperand() etc ? int MemOpPos = -1; + int BBOpPos = -1; for (unsigned i = 0; i < NumOperands; i++) { MachineOperand Op = MI->getOperand(i); if (Op.getType() == MachineOperand::MO_GlobalAddress || - Op.getType() == MachineOperand::MO_ExternalSymbol || - Op.getType() == MachineOperand::MO_MachineBasicBlock) { + Op.getType() == MachineOperand::MO_ExternalSymbol) { // We found one mem operand. Next one may be BS. MemOpPos = i; - break; + } + if (Op.getType() == MachineOperand::MO_MachineBasicBlock) { + // We found one BB operand. Next one may be pagesel. + BBOpPos = i; } } // If we did not find an insn accessing memory. Continue. - if (MemOpPos == -1) return Changed; + if ((MemOpPos == -1) && + (BBOpPos == -1)) + return false; + assert ((BBOpPos != MemOpPos) && "operand can only be of one type"); - // Get the MemOp. - MachineOperand &Op = MI->getOperand(MemOpPos); // If this is a pagesel material, handle it first. - if (MI->getOpcode() == PIC16::CALL || - MI->getOpcode() == PIC16::br_uncond) { + // CALL and br_ucond insns use MemOp (GA or ES) and not BBOp. + // Pagesel is required only for a direct call. + if ((MI->getOpcode() == PIC16::CALL)) { + // Get the BBOp. + MachineOperand &MemOp = MI->getOperand(MemOpPos); DebugLoc dl = MI->getDebugLoc(); - BuildMI(*MBB, MI, dl, TII->get(PIC16::pagesel)). - addOperand(Op); - return true; + BuildMI(*MBB, MI, dl, TII->get(PIC16::pagesel)).addOperand(MemOp); + + // CALL and br_ucond needs only pagesel. so we are done. + return true; } + // Pagesel is handled. Now, add a Banksel if needed. + if (MemOpPos == -1) return Changed; + // Get the MemOp. + MachineOperand &Op = MI->getOperand(MemOpPos); + // Get the section name(NewBank) for MemOp. // This assumes that the section names for globals are already set by // AsmPrinter->doInitialization. std::string NewBank = CurBank; + bool hasExternalLinkage = false; if (Op.getType() == MachineOperand::MO_GlobalAddress && Op.getGlobal()->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE) { + if (Op.getGlobal()->hasExternalLinkage()) + hasExternalLinkage= true; NewBank = Op.getGlobal()->getSection(); } else if (Op.getType() == MachineOperand::MO_ExternalSymbol) { // External Symbol is generated for temp data and arguments. They are @@ -162,7 +241,7 @@ bool MemSelOpt::processInstruction(MachineInstr *MI) { // If the previous and new section names are same, we don't need to // emit banksel. - if (NewBank.compare(CurBank) != 0 ) { + if (NewBank.compare(CurBank) != 0 || hasExternalLinkage) { DebugLoc dl = MI->getDebugLoc(); BuildMI(*MBB, MI, dl, TII->get(PIC16::banksel)). addOperand(Op); diff --git a/lib/Target/PIC16/PIC16Passes/Makefile b/lib/Target/PIC16/PIC16Passes/Makefile index fb45d71..9684b8d 100644 --- a/lib/Target/PIC16/PIC16Passes/Makefile +++ b/lib/Target/PIC16/PIC16Passes/Makefile @@ -10,7 +10,6 @@ LEVEL = ../../../.. TARGET = PIC16 LIBRARYNAME = LLVMpic16passes BUILD_ARCHIVE = 1 -CXXFLAGS = -fno-rtti include $(LEVEL)/Makefile.common diff --git a/lib/Target/PIC16/PIC16TargetObjectFile.cpp b/lib/Target/PIC16/PIC16TargetObjectFile.cpp index d7cfe02..b891c18 100644 --- a/lib/Target/PIC16/PIC16TargetObjectFile.cpp +++ b/lib/Target/PIC16/PIC16TargetObjectFile.cpp @@ -315,8 +315,12 @@ PIC16TargetObjectFile::allocateSHARED(const GlobalVariable *GV, // Interface used by AsmPrinter to get a code section for a function. const PIC16Section * -PIC16TargetObjectFile::SectionForCode(const std::string &FnName) const { +PIC16TargetObjectFile::SectionForCode(const std::string &FnName, + bool isISR) const { const std::string &sec_name = PAN::getCodeSectionName(FnName); + // If it is ISR, its code section starts at a specific address. + if (isISR) + return getPIC16Section(sec_name, CODE, PAN::getISRAddr()); return getPIC16Section(sec_name, CODE); } diff --git a/lib/Target/PIC16/PIC16TargetObjectFile.h b/lib/Target/PIC16/PIC16TargetObjectFile.h index 0b0ad43..cf8bf84 100644 --- a/lib/Target/PIC16/PIC16TargetObjectFile.h +++ b/lib/Target/PIC16/PIC16TargetObjectFile.h @@ -137,7 +137,8 @@ namespace llvm { /// Return a code section for a function. - const PIC16Section *SectionForCode (const std::string &FnName) const; + const PIC16Section *SectionForCode (const std::string &FnName, + bool isISR) const; /// Return a frame section for a function. const PIC16Section *SectionForFrame (const std::string &FnName) const; diff --git a/lib/Target/PIC16/TargetInfo/Makefile b/lib/Target/PIC16/TargetInfo/Makefile index 9004be8..76609f6 100644 --- a/lib/Target/PIC16/TargetInfo/Makefile +++ b/lib/Target/PIC16/TargetInfo/Makefile @@ -8,7 +8,6 @@ ##===----------------------------------------------------------------------===## LEVEL = ../../../.. LIBRARYNAME = LLVMPIC16Info -CXXFLAGS = -fno-rtti # Hack: we need to include 'main' target directory to grab private headers CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. diff --git a/lib/Target/PowerPC/AsmPrinter/Makefile b/lib/Target/PowerPC/AsmPrinter/Makefile index 4378151..269ef92 100644 --- a/lib/Target/PowerPC/AsmPrinter/Makefile +++ b/lib/Target/PowerPC/AsmPrinter/Makefile @@ -8,7 +8,6 @@ ##===----------------------------------------------------------------------===## LEVEL = ../../../.. LIBRARYNAME = LLVMPowerPCAsmPrinter -CXXFLAGS = -fno-rtti # Hack: we need to include 'main' PowerPC target directory to grab private headers CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp index b89c2b4..ac901d0 100644 --- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp @@ -31,13 +31,13 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOptions.h" @@ -47,14 +47,11 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSet.h" #include "llvm/ADT/SmallString.h" using namespace llvm; -STATISTIC(EmittedInsts, "Number of machine instrs printed"); - namespace { class PPCAsmPrinter : public AsmPrinter { protected: @@ -63,8 +60,9 @@ namespace { uint64_t LabelID; public: explicit PPCAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, - const MCAsmInfo *T, bool V) - : AsmPrinter(O, TM, T, V), + MCContext &Ctx, MCStreamer &Streamer, + const MCAsmInfo *T) + : AsmPrinter(O, TM, Ctx, Streamer, T), Subtarget(TM.getSubtarget<PPCSubtarget>()), LabelID(0) {} virtual const char *getPassName() const { @@ -98,7 +96,7 @@ namespace { static const char *getRegisterName(unsigned RegNo); - void printMachineInstruction(const MachineInstr *MI); + virtual void EmitInstruction(const MachineInstr *MI); void printOp(const MachineOperand &MO); /// stripRegisterPrefix - This method strips the character prefix from a @@ -200,7 +198,7 @@ namespace { if (GV->isDeclaration() || GV->isWeakForLinker()) { // Dynamically-resolved functions need a stub for the function. MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$stub"); - const MCSymbol *&StubSym = + MCSymbol *&StubSym = MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym); if (StubSym == 0) StubSym = GetGlobalValueSymbol(GV); @@ -213,8 +211,8 @@ namespace { TempNameStr += StringRef(MO.getSymbolName()); TempNameStr += StringRef("$stub"); - const MCSymbol *Sym = GetExternalSymbolSymbol(TempNameStr.str()); - const MCSymbol *&StubSym = + MCSymbol *Sym = GetExternalSymbolSymbol(TempNameStr.str()); + MCSymbol *&StubSym = MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym); if (StubSym == 0) StubSym = GetExternalSymbolSymbol(MO.getSymbolName()); @@ -319,24 +317,24 @@ namespace { void printPredicateOperand(const MachineInstr *MI, unsigned OpNo, const char *Modifier); - - virtual bool runOnMachineFunction(MachineFunction &F) = 0; }; /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux class PPCLinuxAsmPrinter : public PPCAsmPrinter { public: explicit PPCLinuxAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, - const MCAsmInfo *T, bool V) - : PPCAsmPrinter(O, TM, T, V){} + MCContext &Ctx, MCStreamer &Streamer, + const MCAsmInfo *T) + : PPCAsmPrinter(O, TM, Ctx, Streamer, T) {} virtual const char *getPassName() const { return "Linux PPC Assembly Printer"; } - bool runOnMachineFunction(MachineFunction &F); bool doFinalization(Module &M); + virtual void EmitFunctionEntryLabel(); + void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired<MachineModuleInfo>(); @@ -351,14 +349,14 @@ namespace { formatted_raw_ostream &OS; public: explicit PPCDarwinAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, - const MCAsmInfo *T, bool V) - : PPCAsmPrinter(O, TM, T, V), OS(O) {} + MCContext &Ctx, MCStreamer &Streamer, + const MCAsmInfo *T) + : PPCAsmPrinter(O, TM, Ctx, Streamer, T), OS(O) {} virtual const char *getPassName() const { return "Darwin PPC Assembly Printer"; } - bool runOnMachineFunction(MachineFunction &F); bool doFinalization(Module &M); void EmitStartOfAsmFile(Module &M); @@ -382,7 +380,7 @@ void PPCAsmPrinter::printOp(const MachineOperand &MO) { llvm_unreachable("printOp() does not handle immediate values"); case MachineOperand::MO_MachineBasicBlock: - O << *GetMBBSymbol(MO.getMBB()->getNumber()); + O << *MO.getMBB()->getSymbol(OutContext); return; case MachineOperand::MO_JumpTableIndex: O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() @@ -403,10 +401,10 @@ void PPCAsmPrinter::printOp(const MachineOperand &MO) { return; } - const MCSymbol *NLPSym = + MCSymbol *NLPSym = OutContext.GetOrCreateSymbol(StringRef(MAI->getGlobalPrefix())+ MO.getSymbolName()+"$non_lazy_ptr"); - const MCSymbol *&StubSym = + MCSymbol *&StubSym = MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(NLPSym); if (StubSym == 0) StubSym = GetExternalSymbolSymbol(MO.getSymbolName()); @@ -424,7 +422,7 @@ void PPCAsmPrinter::printOp(const MachineOperand &MO) { (GV->isDeclaration() || GV->isWeakForLinker())) { if (!GV->hasHiddenVisibility()) { SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); - const MCSymbol *&StubSym = + MCSymbol *&StubSym = MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(SymToPrint); if (StubSym == 0) StubSym = GetGlobalValueSymbol(GV); @@ -432,7 +430,7 @@ void PPCAsmPrinter::printOp(const MachineOperand &MO) { GV->hasAvailableExternallyLinkage()) { SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); - const MCSymbol *&StubSym = + MCSymbol *&StubSym = MMI->getObjFileInfo<MachineModuleInfoMachO>(). getHiddenGVStubEntry(SymToPrint); if (StubSym == 0) @@ -535,20 +533,16 @@ void PPCAsmPrinter::printPredicateOperand(const MachineInstr *MI, unsigned OpNo, } -/// printMachineInstruction -- Print out a single PowerPC MI in Darwin syntax to +/// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to /// the current output stream. /// -void PPCAsmPrinter::printMachineInstruction(const MachineInstr *MI) { - ++EmittedInsts; - - processDebugLoc(MI, true); - +void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Check for slwi/srwi mnemonics. - bool useSubstituteMnemonic = false; if (MI->getOpcode() == PPC::RLWINM) { unsigned char SH = MI->getOperand(2).getImm(); unsigned char MB = MI->getOperand(3).getImm(); unsigned char ME = MI->getOperand(4).getImm(); + bool useSubstituteMnemonic = false; if (SH <= 31 && MB == 0 && ME == (31-SH)) { O << "\tslwi "; useSubstituteMnemonic = true; } @@ -561,122 +555,55 @@ void PPCAsmPrinter::printMachineInstruction(const MachineInstr *MI) { O << ", "; printOperand(MI, 1); O << ", " << (unsigned int)SH; + OutStreamer.AddBlankLine(); + return; } - } else if (MI->getOpcode() == PPC::OR || MI->getOpcode() == PPC::OR8) { - if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) { - useSubstituteMnemonic = true; - O << "\tmr "; - printOperand(MI, 0); - O << ", "; - printOperand(MI, 1); - } - } else if (MI->getOpcode() == PPC::RLDICR) { + } + + if ((MI->getOpcode() == PPC::OR || MI->getOpcode() == PPC::OR8) && + MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) { + O << "\tmr "; + printOperand(MI, 0); + O << ", "; + printOperand(MI, 1); + OutStreamer.AddBlankLine(); + return; + } + + if (MI->getOpcode() == PPC::RLDICR) { unsigned char SH = MI->getOperand(2).getImm(); unsigned char ME = MI->getOperand(3).getImm(); // rldicr RA, RS, SH, 63-SH == sldi RA, RS, SH if (63-SH == ME) { - useSubstituteMnemonic = true; O << "\tsldi "; printOperand(MI, 0); O << ", "; printOperand(MI, 1); O << ", " << (unsigned int)SH; + OutStreamer.AddBlankLine(); + return; } } - if (!useSubstituteMnemonic) - printInstruction(MI); - - if (VerboseAsm) - EmitComments(*MI); - O << '\n'; - - processDebugLoc(MI, false); + printInstruction(MI); + OutStreamer.AddBlankLine(); } -/// runOnMachineFunction - This uses the printMachineInstruction() -/// method to print assembly for each instruction. -/// -bool PPCLinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) { - this->MF = &MF; - - SetupMachineFunction(MF); - O << "\n\n"; - - // Print out constants referenced by the function - EmitConstantPool(MF.getConstantPool()); - - // Print out labels for the function. - const Function *F = MF.getFunction(); - OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM)); - - switch (F->getLinkage()) { - default: llvm_unreachable("Unknown linkage type!"); - case Function::PrivateLinkage: - case Function::InternalLinkage: // Symbols default to internal. - break; - case Function::ExternalLinkage: - O << "\t.global\t" << *CurrentFnSym << '\n' << "\t.type\t"; - O << *CurrentFnSym << ", @function\n"; - break; - case Function::LinkerPrivateLinkage: - case Function::WeakAnyLinkage: - case Function::WeakODRLinkage: - case Function::LinkOnceAnyLinkage: - case Function::LinkOnceODRLinkage: - O << "\t.global\t" << *CurrentFnSym << '\n'; - O << "\t.weak\t" << *CurrentFnSym << '\n'; - break; - } - - printVisibility(CurrentFnSym, F->getVisibility()); - - EmitAlignment(MF.getAlignment(), F); - - if (Subtarget.isPPC64()) { - // Emit an official procedure descriptor. - // FIXME 64-bit SVR4: Use MCSection here! - O << "\t.section\t\".opd\",\"aw\"\n"; - O << "\t.align 3\n"; - O << *CurrentFnSym << ":\n"; - O << "\t.quad .L." << *CurrentFnSym << ",.TOC.@tocbase\n"; - O << "\t.previous\n"; - O << ".L." << *CurrentFnSym << ":\n"; - } else { - O << *CurrentFnSym << ":\n"; - } - - // Emit pre-function debug information. - DW->BeginFunction(&MF); - - // Print out code for the function. - for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); - I != E; ++I) { - // Print a label for the basic block. - if (I != MF.begin()) { - EmitBasicBlockStart(I); - } - for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); - II != E; ++II) { - // Print the assembly for the instruction. - printMachineInstruction(II); - } - } - - O << "\t.size\t" << *CurrentFnSym << ",.-" << *CurrentFnSym << '\n'; - - OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM)); - - // Emit post-function debug information. - DW->EndFunction(&MF); - - // Print out jump tables referenced by the function. - EmitJumpTableInfo(MF.getJumpTableInfo(), MF); - - // We didn't modify anything. - return false; +void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { + if (!Subtarget.isPPC64()) // linux/ppc32 - Normal entry label. + return AsmPrinter::EmitFunctionEntryLabel(); + + // Emit an official procedure descriptor. + // FIXME 64-bit SVR4: Use MCSection here! + O << "\t.section\t\".opd\",\"aw\"\n"; + O << "\t.align 3\n"; + OutStreamer.EmitLabel(CurrentFnSym); + O << "\t.quad .L." << *CurrentFnSym << ",.TOC.@tocbase\n"; + O << "\t.previous\n"; + O << ".L." << *CurrentFnSym << ":\n"; } + bool PPCLinuxAsmPrinter::doFinalization(Module &M) { const TargetData *TD = TM.getTargetData(); @@ -697,81 +624,6 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) { return AsmPrinter::doFinalization(M); } -/// runOnMachineFunction - This uses the printMachineInstruction() -/// method to print assembly for each instruction. -/// -bool PPCDarwinAsmPrinter::runOnMachineFunction(MachineFunction &MF) { - this->MF = &MF; - - SetupMachineFunction(MF); - O << "\n\n"; - - // Print out constants referenced by the function - EmitConstantPool(MF.getConstantPool()); - - // Print out labels for the function. - const Function *F = MF.getFunction(); - OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM)); - - switch (F->getLinkage()) { - default: llvm_unreachable("Unknown linkage type!"); - case Function::PrivateLinkage: - case Function::InternalLinkage: // Symbols default to internal. - break; - case Function::ExternalLinkage: - O << "\t.globl\t" << *CurrentFnSym << '\n'; - break; - case Function::WeakAnyLinkage: - case Function::WeakODRLinkage: - case Function::LinkOnceAnyLinkage: - case Function::LinkOnceODRLinkage: - case Function::LinkerPrivateLinkage: - O << "\t.globl\t" << *CurrentFnSym << '\n'; - O << "\t.weak_definition\t" << *CurrentFnSym << '\n'; - break; - } - - printVisibility(CurrentFnSym, F->getVisibility()); - - EmitAlignment(MF.getAlignment(), F); - O << *CurrentFnSym << ":\n"; - - // Emit pre-function debug information. - DW->BeginFunction(&MF); - - // If the function is empty, then we need to emit *something*. Otherwise, the - // function's label might be associated with something that it wasn't meant to - // be associated with. We emit a noop in this situation. - MachineFunction::iterator I = MF.begin(); - - if (++I == MF.end() && MF.front().empty()) - O << "\tnop\n"; - - // Print out code for the function. - for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); - I != E; ++I) { - // Print a label for the basic block. - if (I != MF.begin()) { - EmitBasicBlockStart(I); - } - for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); - II != IE; ++II) { - // Print the assembly for the instruction. - printMachineInstruction(II); - } - } - - // Emit post-function debug information. - DW->EndFunction(&MF); - - // Print out jump tables referenced by the function. - EmitJumpTableInfo(MF.getJumpTableInfo(), MF); - - // We didn't modify anything. - return false; -} - - void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { static const char *const CPUDirectives[] = { "", @@ -928,9 +780,8 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { for (std::vector<Function *>::const_iterator I = Personalities.begin(), E = Personalities.end(); I != E; ++I) { if (*I) { - const MCSymbol *NLPSym = - GetSymbolWithGlobalValueBase(*I, "$non_lazy_ptr"); - const MCSymbol *&StubSym = MMIMacho.getGVStubEntry(NLPSym); + MCSymbol *NLPSym = GetSymbolWithGlobalValueBase(*I, "$non_lazy_ptr"); + MCSymbol *&StubSym = MMIMacho.getGVStubEntry(NLPSym); StubSym = GetGlobalValueSymbol(*I); } } @@ -981,13 +832,13 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { /// static AsmPrinter *createPPCAsmPrinterPass(formatted_raw_ostream &o, TargetMachine &tm, - const MCAsmInfo *tai, - bool verbose) { + MCContext &Ctx, MCStreamer &Streamer, + const MCAsmInfo *tai) { const PPCSubtarget *Subtarget = &tm.getSubtarget<PPCSubtarget>(); if (Subtarget->isDarwin()) - return new PPCDarwinAsmPrinter(o, tm, tai, verbose); - return new PPCLinuxAsmPrinter(o, tm, tai, verbose); + return new PPCDarwinAsmPrinter(o, tm, Ctx, Streamer, tai); + return new PPCLinuxAsmPrinter(o, tm, Ctx, Streamer, tai); } // Force static initialization. diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt index bdd6d36..c997c5c 100644 --- a/lib/Target/PowerPC/CMakeLists.txt +++ b/lib/Target/PowerPC/CMakeLists.txt @@ -19,7 +19,6 @@ add_llvm_target(PowerPCCodeGen PPCISelDAGToDAG.cpp PPCISelLowering.cpp PPCJITInfo.cpp - PPCMachOWriterInfo.cpp PPCMCAsmInfo.cpp PPCPredicates.cpp PPCRegisterInfo.cpp diff --git a/lib/Target/PowerPC/Makefile b/lib/Target/PowerPC/Makefile index cd30011..1265f1d 100644 --- a/lib/Target/PowerPC/Makefile +++ b/lib/Target/PowerPC/Makefile @@ -10,7 +10,6 @@ LEVEL = ../../.. LIBRARYNAME = LLVMPowerPCCodeGen TARGET = PPC -CXXFLAGS = -fno-rtti # Make sure that tblgen is run, first thing. BUILT_SOURCES = PPCGenInstrNames.inc PPCGenRegisterNames.inc \ diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index 7b98268..67e3a4a 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -23,18 +23,12 @@ namespace llvm { class PPCTargetMachine; class FunctionPass; - class MachineCodeEmitter; - class ObjectCodeEmitter; class formatted_raw_ostream; FunctionPass *createPPCBranchSelectionPass(); FunctionPass *createPPCISelDag(PPCTargetMachine &TM); -FunctionPass *createPPCCodeEmitterPass(PPCTargetMachine &TM, - MachineCodeEmitter &MCE); FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM, JITCodeEmitter &MCE); -FunctionPass *createPPCObjectCodeEmitterPass(PPCTargetMachine &TM, - ObjectCodeEmitter &OCE); extern Target ThePPC32Target; extern Target ThePPC64Target; diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td index c7ce171..155fba2 100644 --- a/lib/Target/PowerPC/PPCCallingConv.td +++ b/lib/Target/PowerPC/PPCCallingConv.td @@ -66,28 +66,13 @@ def CC_PPC : CallingConv<[ // PowerPC System V Release 4 ABI //===----------------------------------------------------------------------===// -// _Complex arguments are never split, thus their two scalars are either -// passed both in argument registers or both on the stack. Also _Complex -// arguments are always passed in general purpose registers, never in -// Floating-point registers or vector registers. Arguments which should go -// on the stack are marked with the inreg parameter attribute. -// Giving inreg this target-dependent (and counter-intuitive) meaning -// simplifies things, because functions calls are not always coming from the -// frontend but are also created implicitly e.g. for libcalls. If inreg would -// actually mean that the argument is passed in a register, then all places -// which create function calls/function definitions implicitly would need to -// be aware of this fact and would need to mark arguments accordingly. With -// inreg meaning that the argument is passed on the stack, this is not an -// issue, except for calls which involve _Complex types. - def CC_PPC_SVR4_Common : CallingConv<[ // The ABI requires i64 to be passed in two adjacent registers with the first // register having an odd register number. CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignArgRegs">>>, // The first 8 integer arguments are passed in integer registers. - CCIfType<[i32], CCIf<"!ArgFlags.isInReg()", - CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>>, + CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>, // Make sure the i64 words from a long double are either both passed in // registers or both passed on the stack. diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp index da9ea36..327470d 100644 --- a/lib/Target/PowerPC/PPCCodeEmitter.cpp +++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp @@ -17,26 +17,34 @@ #include "PPC.h" #include "llvm/Module.h" #include "llvm/PassManager.h" -#include "llvm/CodeGen/MachineCodeEmitter.h" #include "llvm/CodeGen/JITCodeEmitter.h" -#include "llvm/CodeGen/ObjectCodeEmitter.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; namespace { - class PPCCodeEmitter { + class PPCCodeEmitter : public MachineFunctionPass { TargetMachine &TM; - MachineCodeEmitter &MCE; + JITCodeEmitter &MCE; + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineModuleInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + static char ID; + + /// MovePCtoLROffset - When/if we see a MovePCtoLR instruction, we record + /// its address in the function into this pointer. + void *MovePCtoLROffset; public: - PPCCodeEmitter(TargetMachine &tm, MachineCodeEmitter &mce): - TM(tm), MCE(mce) {} + + PPCCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) + : MachineFunctionPass(&ID), TM(tm), MCE(mce) {} /// getBinaryCodeForInstr - This function, generated by the /// CodeEmitterGenerator using TableGen, produces the binary encoding for @@ -49,27 +57,6 @@ namespace { unsigned getMachineOpValue(const MachineInstr &MI, const MachineOperand &MO); - /// MovePCtoLROffset - When/if we see a MovePCtoLR instruction, we record - /// its address in the function into this pointer. - - void *MovePCtoLROffset; - }; - - template <class CodeEmitter> - class Emitter : public MachineFunctionPass, public PPCCodeEmitter { - TargetMachine &TM; - CodeEmitter &MCE; - - void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<MachineModuleInfo>(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - public: - static char ID; - Emitter(TargetMachine &tm, CodeEmitter &mce) - : MachineFunctionPass(&ID), PPCCodeEmitter(tm, mce), TM(tm), MCE(mce) {} - const char *getPassName() const { return "PowerPC Machine Code Emitter"; } /// runOnMachineFunction - emits the given MachineFunction to memory @@ -84,31 +71,18 @@ namespace { /// unsigned getValueBit(int64_t Val, unsigned bit) { return (Val >> bit) & 1; } }; - - template <class CodeEmitter> - char Emitter<CodeEmitter>::ID = 0; } +char PPCCodeEmitter::ID = 0; + /// createPPCCodeEmitterPass - Return a pass that emits the collected PPC code /// to the specified MCE object. - -FunctionPass *llvm::createPPCCodeEmitterPass(PPCTargetMachine &TM, - MachineCodeEmitter &MCE) { - return new Emitter<MachineCodeEmitter>(TM, MCE); -} - FunctionPass *llvm::createPPCJITCodeEmitterPass(PPCTargetMachine &TM, JITCodeEmitter &JCE) { - return new Emitter<JITCodeEmitter>(TM, JCE); -} - -FunctionPass *llvm::createPPCObjectCodeEmitterPass(PPCTargetMachine &TM, - ObjectCodeEmitter &OCE) { - return new Emitter<ObjectCodeEmitter>(TM, OCE); + return new PPCCodeEmitter(TM, JCE); } -template <class CodeEmitter> -bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) { +bool PPCCodeEmitter::runOnMachineFunction(MachineFunction &MF) { assert((MF.getTarget().getRelocationModel() != Reloc::Default || MF.getTarget().getRelocationModel() != Reloc::Static) && "JIT relocation model must be set to static or default!"); @@ -124,8 +98,7 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) { return false; } -template <class CodeEmitter> -void Emitter<CodeEmitter>::emitBasicBlock(MachineBasicBlock &MBB) { +void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) { MCE.StartMachineBasicBlock(&MBB); for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I){ @@ -135,12 +108,12 @@ void Emitter<CodeEmitter>::emitBasicBlock(MachineBasicBlock &MBB) { default: MCE.emitWordBE(getBinaryCodeForInstr(MI)); break; - case TargetInstrInfo::DBG_LABEL: - case TargetInstrInfo::EH_LABEL: + case TargetOpcode::DBG_LABEL: + case TargetOpcode::EH_LABEL: MCE.emitLabel(MI.getOperand(0).getImm()); break; - case TargetInstrInfo::IMPLICIT_DEF: - case TargetInstrInfo::KILL: + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: break; // pseudo opcode, no side effects case PPC::MovePCtoLR: case PPC::MovePCtoLR8: diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp index 6af7e0f..3a15f7e 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp +++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -118,7 +118,7 @@ isLoadOfStoredAddress(unsigned LoadSize, SDValue Ptr1, SDValue Ptr2) const { } /// getHazardType - We return hazard for any non-branch instruction that would -/// terminate terminate the dispatch group. We turn NoopHazard for any +/// terminate the dispatch group. We turn NoopHazard for any /// instructions that wouldn't terminate the dispatch group that would cause a /// pipeline flush. ScheduleHazardRecognizer::HazardType PPCHazardRecognizer970:: diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 32c1879..004997f 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -199,7 +199,7 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) { // Check to see if this function uses vector registers, which means we have to // save and restore the VRSAVE register and update it with the regs we use. // - // In this case, there will be virtual registers of vector type type created + // In this case, there will be virtual registers of vector type created // by the scheduler. Detect them now. bool HasVectorVReg = false; for (unsigned i = TargetRegisterInfo::FirstVirtualRegister, diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 8248c94..e73af56 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -25,13 +25,13 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CallingConv.h" #include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/Intrinsics.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -1243,7 +1243,8 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, // If the global is weak or external, we have to go through the lazy // resolution stub. - return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Lo, NULL, 0); + return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Lo, NULL, 0, + false, false, 0); } SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { @@ -1355,7 +1356,8 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); - return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0); + return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0, + false, false, 0); } // For the 32-bit SVR4 ABI we follow the layout of the va_list struct. @@ -1405,25 +1407,29 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, // Store first byte : number of int regs SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, - Op.getOperand(1), SV, 0, MVT::i8); + Op.getOperand(1), SV, 0, MVT::i8, + false, false, 0); uint64_t nextOffset = FPROffset; SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1), ConstFPROffset); // Store second byte : number of float regs SDValue secondStore = - DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, SV, nextOffset, MVT::i8); + DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, SV, nextOffset, MVT::i8, + false, false, 0); nextOffset += StackOffset; nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset); // Store second word : arguments given on stack SDValue thirdStore = - DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, SV, nextOffset); + DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, SV, nextOffset, + false, false, 0); nextOffset += FrameOffset; nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset); // Store third word : arguments given in registers - return DAG.getStore(thirdStore, dl, FR, nextPtr, SV, nextOffset); + return DAG.getStore(thirdStore, dl, FR, nextPtr, SV, nextOffset, + false, false, 0); } @@ -1572,7 +1578,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Potential tail calls could cause overwriting of argument stack slots. - bool isImmutable = !(PerformTailCallOpt && (CallConv==CallingConv::Fast)); + bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast)); unsigned PtrByteSize = 4; // Assign locations to all of the incoming arguments. @@ -1628,7 +1634,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4( // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, PtrVT); - InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0)); + InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0, + false, false, 0)); } } @@ -1700,7 +1707,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4( unsigned GPRIndex = 0; for (; GPRIndex != VarArgsNumGPR; ++GPRIndex) { SDValue Val = DAG.getRegister(GPArgRegs[GPRIndex], PtrVT); - SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0); + SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0, + false, false, 0); MemOps.push_back(Store); // Increment the address by four for the next argument to store SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); @@ -1714,7 +1722,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4( unsigned VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0, + false, false, 0); MemOps.push_back(Store); // Increment the address by four for the next argument to store SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); @@ -1729,7 +1738,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4( unsigned FPRIndex = 0; for (FPRIndex = 0; FPRIndex != VarArgsNumFPR; ++FPRIndex) { SDValue Val = DAG.getRegister(FPArgRegs[FPRIndex], MVT::f64); - SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0); + SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0, + false, false, 0); MemOps.push_back(Store); // Increment the address by eight for the next argument to store SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8, @@ -1741,7 +1751,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4( unsigned VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64); - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0, + false, false, 0); MemOps.push_back(Store); // Increment the address by eight for the next argument to store SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8, @@ -1773,7 +1784,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); bool isPPC64 = PtrVT == MVT::i64; // Potential tail calls could cause overwriting of argument stack slots. - bool isImmutable = !(PerformTailCallOpt && (CallConv==CallingConv::Fast)); + bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast)); unsigned PtrByteSize = isPPC64 ? 8 : 4; unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, true); @@ -1903,7 +1914,9 @@ PPCTargetLowering::LowerFormalArguments_Darwin( unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, - NULL, 0, ObjSize==1 ? MVT::i8 : MVT::i16 ); + NULL, 0, + ObjSize==1 ? MVT::i8 : MVT::i16, + false, false, 0); MemOps.push_back(Store); ++GPR_idx; } @@ -1921,7 +1934,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin( int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true, false); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0, + false, false, 0); MemOps.push_back(Store); ++GPR_idx; ArgOffset += PtrByteSize; @@ -2045,7 +2059,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin( CurArgOffset + (ArgSize - ObjSize), isImmutable, false); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); - ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0); + ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, + false, false, 0); } InVals.push_back(ArgVal); @@ -2091,7 +2106,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin( VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0, + false, false, 0); MemOps.push_back(Store); // Increment the address by four for the next argument to store SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); @@ -2164,7 +2180,7 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, PPCFrameInfo::getMinCallFrameSize(isPPC64, true)); // Tail call needs the stack to be aligned. - if (CC==CallingConv::Fast && PerformTailCallOpt) { + if (CC==CallingConv::Fast && GuaranteedTailCallOpt) { unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()-> getStackAlignment(); unsigned AlignMask = TargetAlign-1; @@ -2200,6 +2216,9 @@ PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG& DAG) const { + if (!GuaranteedTailCallOpt) + return false; + // Variable argument functions are not supported. if (isVarArg) return false; @@ -2268,7 +2287,7 @@ StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, // Store relative to framepointer. MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN, PseudoSourceValue::getFixedStack(FI), - 0)); + 0, false, false, 0)); } } @@ -2294,7 +2313,8 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, EVT VT = isPPC64 ? MVT::i64 : MVT::i32; SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, - PseudoSourceValue::getFixedStack(NewRetAddr), 0); + PseudoSourceValue::getFixedStack(NewRetAddr), 0, + false, false, 0); // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack // slot as the FP is never overwritten. @@ -2305,7 +2325,8 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, true, false); SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, - PseudoSourceValue::getFixedStack(NewFPIdx), 0); + PseudoSourceValue::getFixedStack(NewFPIdx), 0, + false, false, 0); } } return Chain; @@ -2343,14 +2364,16 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, // Load the LR and FP stack slot for later adjusting. EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32; LROpOut = getReturnAddrFrameIndex(DAG); - LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, NULL, 0); + LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, NULL, 0, + false, false, 0); Chain = SDValue(LROpOut.getNode(), 1); // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack // slot as the FP is never overwritten. if (isDarwinABI) { FPOpOut = getFramePointerFrameIndex(DAG); - FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, NULL, 0); + FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, NULL, 0, + false, false, 0); Chain = SDValue(FPOpOut.getNode(), 1); } } @@ -2392,7 +2415,8 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, DAG.getConstant(ArgOffset, PtrVT)); } - MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0)); + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, + false, false, 0)); // Calculate and remember argument location. } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset, TailCallArguments); @@ -2601,7 +2625,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, // the stack. Account for this here so these bytes can be pushed back on in // PPCRegisterInfo::eliminateCallFramePseudoInstr. int BytesCalleePops = - (CallConv==CallingConv::Fast && PerformTailCallOpt) ? NumBytes : 0; + (CallConv==CallingConv::Fast && GuaranteedTailCallOpt) ? NumBytes : 0; if (InFlag.getNode()) Ops.push_back(InFlag); @@ -2673,11 +2697,15 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, SDValue PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool isTailCall, + bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) { + if (isTailCall) + isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, + Ins, DAG); + if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) { return LowerCall_SVR4(Chain, Callee, CallConv, isVarArg, isTailCall, Outs, Ins, @@ -2700,10 +2728,6 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, // See PPCTargetLowering::LowerFormalArguments_SVR4() for a description // of the 32-bit SVR4 ABI stack frame layout. - assert((!isTailCall || - (CallConv == CallingConv::Fast && PerformTailCallOpt)) && - "IsEligibleForTailCallOptimization missed a case!"); - assert((CallConv == CallingConv::C || CallConv == CallingConv::Fast) && "Unknown calling convention!"); @@ -2717,7 +2741,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, // and restoring the callers stack pointer in this functions epilog. This is // done because by tail calling the called function might overwrite the value // in this function's (MF) stack pointer stack slot 0(SP). - if (PerformTailCallOpt && CallConv==CallingConv::Fast) + if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast) MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); // Count how many bytes are to be pushed on the stack, including the linkage @@ -2859,7 +2883,8 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, - PseudoSourceValue::getStack(), LocMemOffset)); + PseudoSourceValue::getStack(), LocMemOffset, + false, false, 0)); } else { // Calculate and remember argument location. CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset, @@ -2920,7 +2945,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, // and restoring the callers stack pointer in this functions epilog. This is // done because by tail calling the called function might overwrite the value // in this function's (MF) stack pointer stack slot 0(SP). - if (PerformTailCallOpt && CallConv==CallingConv::Fast) + if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast) MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); unsigned nAltivecParamsAtEnd = 0; @@ -3021,7 +3046,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, EVT VT = (Size==1) ? MVT::i8 : MVT::i16; if (GPR_idx != NumGPRs) { SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, - NULL, 0, VT); + NULL, 0, VT, false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); @@ -3058,7 +3083,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, SDValue Const = DAG.getConstant(j, PtrOff.getValueType()); SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); if (GPR_idx != NumGPRs) { - SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, NULL, 0); + SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, NULL, 0, + false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); ArgOffset += PtrByteSize; @@ -3089,19 +3115,22 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); if (isVarArg) { - SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0); + SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, + false, false, 0); MemOpChains.push_back(Store); // Float varargs are always shadowed in available integer registers if (GPR_idx != NumGPRs) { - SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0); + SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0, + false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); } if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){ SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); - SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0); + SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0, + false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); } @@ -3144,10 +3173,12 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, // entirely in R registers. Maybe later. PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, DAG.getConstant(ArgOffset, PtrVT)); - SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0); + SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, + false, false, 0); MemOpChains.push_back(Store); if (VR_idx != NumVRs) { - SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, NULL, 0); + SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, NULL, 0, + false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); } @@ -3157,7 +3188,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, break; SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, DAG.getConstant(i, PtrVT)); - SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, NULL, 0); + SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, NULL, 0, + false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); } @@ -3222,7 +3254,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, // TOC save area offset. SDValue PtrOff = DAG.getIntPtrConstant(40); SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); - Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, NULL, 0); + Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, NULL, 0, + false, false, 0); } // Build a sequence of copy-to-reg nodes chained together with token chain @@ -3297,13 +3330,15 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, SDValue SaveSP = Op.getOperand(1); // Load the old link SP. - SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, NULL, 0); + SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, NULL, 0, + false, false, 0); // Restore the stack pointer. Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP); // Store the old link SP. - return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, NULL, 0); + return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, NULL, 0, + false, false, 0); } @@ -3480,14 +3515,16 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, SDValue FIPtr = DAG.CreateStackTemporary(MVT::f64); // Emit a store to the stack slot. - SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, NULL, 0); + SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, NULL, 0, + false, false, 0); // Result is a load from the stack slot. If loading 4 bytes, make sure to // add in a bias. if (Op.getValueType() == MVT::i32) FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, DAG.getConstant(4, FIPtr.getValueType())); - return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, NULL, 0); + return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, NULL, 0, + false, false, 0); } SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { @@ -3530,7 +3567,7 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { DAG.getMemIntrinsicNode(PPCISD::STD_32, dl, DAG.getVTList(MVT::Other), Ops, 4, MVT::i64, MMO); // Load the value as a double. - SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, NULL, 0); + SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, NULL, 0, false, false, 0); // FCFID it and return it. SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld); @@ -3575,12 +3612,13 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) { int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, - StackSlot, NULL, 0); + StackSlot, NULL, 0, false, false, 0); // Load FP Control Word from low 32 bits of stack slot. SDValue Four = DAG.getConstant(4, PtrVT); SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); - SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, NULL, 0); + SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, NULL, 0, + false, false, 0); // Transform as necessary SDValue CWD1 = @@ -4246,9 +4284,11 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, // Store the input value into Value#0 of the stack slot. SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, - Op.getOperand(0), FIdx, NULL, 0); + Op.getOperand(0), FIdx, NULL, 0, + false, false, 0); // Load it out. - return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, NULL, 0); + return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, NULL, 0, + false, false, 0); } SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) { @@ -5457,7 +5497,8 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { // to the stack. FuncInfo->setLRStoreRequired(); return DAG.getLoad(getPointerTy(), dl, - DAG.getEntryNode(), RetAddrFI, NULL, 0); + DAG.getEntryNode(), RetAddrFI, NULL, 0, + false, false, 0); } SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index cf81395..9c390ac 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -345,13 +345,6 @@ namespace llvm { /// the offset of the target addressing mode. virtual bool isLegalAddressImmediate(GlobalValue *GV) const; - virtual bool - IsEligibleForTailCallOptimization(SDValue Callee, - CallingConv::ID CalleeCC, - bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, - SelectionDAG& DAG) const; - virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; virtual EVT getOptimalMemOpType(uint64_t Size, unsigned Align, @@ -365,6 +358,13 @@ namespace llvm { SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const; SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const; + bool + IsEligibleForTailCallOptimization(SDValue Callee, + CallingConv::ID CalleeCC, + bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + SelectionDAG& DAG) const; + SDValue EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, int SPDiff, SDValue Chain, @@ -431,7 +431,7 @@ namespace llvm { virtual SDValue LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, bool isTailCall, + CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index af7d812..3db623a 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -421,22 +421,30 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, FrameIdx)); return true; } else { - // FIXME: We use R0 here, because it isn't available for RA. We need to - // store the CR in the low 4-bits of the saved value. First, issue a MFCR - // to save all of the CRBits. - NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCR), PPC::R0)); + // FIXME: We need a scatch reg here. The trouble with using R0 is that + // it's possible for the stack frame to be so big the save location is + // out of range of immediate offsets, necessitating another register. + // We hack this on Darwin by reserving R2. It's probably broken on Linux + // at the moment. + + // We need to store the CR in the low 4-bits of the saved value. First, + // issue a MFCR to save all of the CRBits. + unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? + PPC::R2 : PPC::R0; + NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCR), ScratchReg)); // If the saved register wasn't CR0, shift the bits left so that they are // in CR0's slot. if (SrcReg != PPC::CR0) { unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(SrcReg)*4; - // rlwinm r0, r0, ShiftBits, 0, 31. - NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), PPC::R0) - .addReg(PPC::R0).addImm(ShiftBits).addImm(0).addImm(31)); + // rlwinm scratch, scratch, ShiftBits, 0, 31. + NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg) + .addReg(ScratchReg).addImm(ShiftBits) + .addImm(0).addImm(31)); } NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW)) - .addReg(PPC::R0, + .addReg(ScratchReg, getKillRegState(isKill)), FrameIdx)); } @@ -540,20 +548,28 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg), FrameIdx)); } else if (RC == PPC::CRRCRegisterClass) { - // FIXME: We use R0 here, because it isn't available for RA. - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), PPC::R0), - FrameIdx)); + // FIXME: We need a scatch reg here. The trouble with using R0 is that + // it's possible for the stack frame to be so big the save location is + // out of range of immediate offsets, necessitating another register. + // We hack this on Darwin by reserving R2. It's probably broken on Linux + // at the moment. + unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? + PPC::R2 : PPC::R0; + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), + ScratchReg), FrameIdx)); // If the reloaded register isn't CR0, shift the bits right so that they are // in the right CR's slot. if (DestReg != PPC::CR0) { unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(DestReg)*4; // rlwinm r11, r11, 32-ShiftBits, 0, 31. - NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), PPC::R0) - .addReg(PPC::R0).addImm(32-ShiftBits).addImm(0).addImm(31)); + NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg) + .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0) + .addImm(31)); } - NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg).addReg(PPC::R0)); + NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg) + .addReg(ScratchReg)); } else if (RC == PPC::CRBITRCRegisterClass) { unsigned Reg = 0; diff --git a/lib/Target/PowerPC/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/PPCMCAsmInfo.cpp index d2ff3b7..b37aee8 100644 --- a/lib/Target/PowerPC/PPCMCAsmInfo.cpp +++ b/lib/Target/PowerPC/PPCMCAsmInfo.cpp @@ -26,6 +26,9 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) { } PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) { + // ".comm align is in bytes but .align is pow-2." + AlignmentIsInBytes = false; + CommentString = "#"; GlobalPrefix = ""; PrivateGlobalPrefix = ".L"; @@ -49,9 +52,7 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) { AbsoluteEHSectionOffsets = false; ZeroDirective = "\t.space\t"; - SetDirective = "\t.set"; Data64bitsDirective = is64Bit ? "\t.quad\t" : 0; - AlignmentIsInBytes = false; HasLCOMMDirective = true; AssemblerDialect = 0; // Old-Style mnemonics. } diff --git a/lib/Target/PowerPC/PPCMachOWriterInfo.cpp b/lib/Target/PowerPC/PPCMachOWriterInfo.cpp deleted file mode 100644 index 4c14454..0000000 --- a/lib/Target/PowerPC/PPCMachOWriterInfo.cpp +++ /dev/null @@ -1,152 +0,0 @@ -//===-- PPCMachOWriterInfo.cpp - Mach-O Writer Info for the PowerPC -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements Mach-O writer information for the PowerPC backend. -// -//===----------------------------------------------------------------------===// - -#include "PPCMachOWriterInfo.h" -#include "PPCRelocations.h" -#include "PPCTargetMachine.h" -#include "llvm/CodeGen/MachORelocation.h" -#include "llvm/Support/OutputBuffer.h" -#include "llvm/Support/ErrorHandling.h" -#include <cstdio> -using namespace llvm; - -PPCMachOWriterInfo::PPCMachOWriterInfo(const PPCTargetMachine &TM) - : TargetMachOWriterInfo(TM.getTargetData()->getPointerSizeInBits() == 64 ? - HDR_CPU_TYPE_POWERPC64 : - HDR_CPU_TYPE_POWERPC, - HDR_CPU_SUBTYPE_POWERPC_ALL) {} -PPCMachOWriterInfo::~PPCMachOWriterInfo() {} - -/// GetTargetRelocation - For the MachineRelocation MR, convert it to one or -/// more PowerPC MachORelocation(s), add the new relocations to the -/// MachOSection, and rewrite the instruction at the section offset if required -/// by that relocation type. -unsigned PPCMachOWriterInfo::GetTargetRelocation(MachineRelocation &MR, - unsigned FromIdx, - unsigned ToAddr, - unsigned ToIdx, - OutputBuffer &RelocOut, - OutputBuffer &SecOut, - bool Scattered, - bool isExtern) const { - unsigned NumRelocs = 0; - uint64_t Addr = 0; - - // Get the address of whatever it is we're relocating, if possible. - if (!isExtern) - Addr = (uintptr_t)MR.getResultPointer() + ToAddr; - - switch ((PPC::RelocationType)MR.getRelocationType()) { - default: llvm_unreachable("Unknown PPC relocation type!"); - case PPC::reloc_absolute_low_ix: - llvm_unreachable("Unhandled PPC relocation type!"); - break; - case PPC::reloc_vanilla: - { - // FIXME: need to handle 64 bit vanilla relocs - MachORelocation VANILLA(MR.getMachineCodeOffset(), ToIdx, - false, 2, isExtern, - PPC_RELOC_VANILLA, - Scattered, (intptr_t)MR.getResultPointer()); - ++NumRelocs; - - if (Scattered) { - RelocOut.outword(VANILLA.getPackedFields()); - RelocOut.outword(VANILLA.getAddress()); - } else { - RelocOut.outword(VANILLA.getAddress()); - RelocOut.outword(VANILLA.getPackedFields()); - } - - intptr_t SymbolOffset; - - if (Scattered) - SymbolOffset = Addr + MR.getConstantVal(); - else - SymbolOffset = Addr; - - printf("vanilla fixup: sec_%x[%x] = %x\n", FromIdx, - unsigned(MR.getMachineCodeOffset()), - unsigned(SymbolOffset)); - SecOut.fixword(SymbolOffset, MR.getMachineCodeOffset()); - } - break; - case PPC::reloc_pcrel_bx: - { - // FIXME: Presumably someday we will need to branch to other, non-extern - // functions too. Need to figure out some way to distinguish between - // target is BB and target is function. - if (isExtern) { - MachORelocation BR24(MR.getMachineCodeOffset(), ToIdx, true, 2, - isExtern, PPC_RELOC_BR24, Scattered, - (intptr_t)MR.getMachineCodeOffset()); - RelocOut.outword(BR24.getAddress()); - RelocOut.outword(BR24.getPackedFields()); - ++NumRelocs; - } - - Addr -= MR.getMachineCodeOffset(); - Addr >>= 2; - Addr &= 0xFFFFFF; - Addr <<= 2; - Addr |= (SecOut[MR.getMachineCodeOffset()] << 24); - Addr |= (SecOut[MR.getMachineCodeOffset()+3] & 0x3); - SecOut.fixword(Addr, MR.getMachineCodeOffset()); - break; - } - case PPC::reloc_pcrel_bcx: - { - Addr -= MR.getMachineCodeOffset(); - Addr &= 0xFFFC; - - SecOut.fixhalf(Addr, MR.getMachineCodeOffset() + 2); - break; - } - case PPC::reloc_absolute_high: - { - MachORelocation HA16(MR.getMachineCodeOffset(), ToIdx, false, 2, - isExtern, PPC_RELOC_HA16); - MachORelocation PAIR(Addr & 0xFFFF, 0xFFFFFF, false, 2, isExtern, - PPC_RELOC_PAIR); - NumRelocs = 2; - - RelocOut.outword(HA16.getRawAddress()); - RelocOut.outword(HA16.getPackedFields()); - RelocOut.outword(PAIR.getRawAddress()); - RelocOut.outword(PAIR.getPackedFields()); - - Addr += 0x8000; - - SecOut.fixhalf(Addr >> 16, MR.getMachineCodeOffset() + 2); - break; - } - case PPC::reloc_absolute_low: - { - MachORelocation LO16(MR.getMachineCodeOffset(), ToIdx, false, 2, - isExtern, PPC_RELOC_LO16); - MachORelocation PAIR(Addr >> 16, 0xFFFFFF, false, 2, isExtern, - PPC_RELOC_PAIR); - NumRelocs = 2; - - RelocOut.outword(LO16.getRawAddress()); - RelocOut.outword(LO16.getPackedFields()); - RelocOut.outword(PAIR.getRawAddress()); - RelocOut.outword(PAIR.getPackedFields()); - - SecOut.fixhalf(Addr, MR.getMachineCodeOffset() + 2); - break; - } - } - - return NumRelocs; -} diff --git a/lib/Target/PowerPC/PPCMachOWriterInfo.h b/lib/Target/PowerPC/PPCMachOWriterInfo.h deleted file mode 100644 index d46334d..0000000 --- a/lib/Target/PowerPC/PPCMachOWriterInfo.h +++ /dev/null @@ -1,55 +0,0 @@ -//===-- PPCMachOWriterInfo.h - Mach-O Writer Info for PowerPC ---*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements Mach-O writer information for the PowerPC backend. -// -//===----------------------------------------------------------------------===// - -#ifndef PPC_MACHO_WRITER_INFO_H -#define PPC_MACHO_WRITER_INFO_H - -#include "llvm/Target/TargetMachOWriterInfo.h" - -namespace llvm { - - // Forward declarations - class MachineRelocation; - class OutputBuffer; - class PPCTargetMachine; - - class PPCMachOWriterInfo : public TargetMachOWriterInfo { - public: - PPCMachOWriterInfo(const PPCTargetMachine &TM); - virtual ~PPCMachOWriterInfo(); - - virtual unsigned GetTargetRelocation(MachineRelocation &MR, - unsigned FromIdx, - unsigned ToAddr, - unsigned ToIdx, - OutputBuffer &RelocOut, - OutputBuffer &SecOut, - bool Scattered, bool Extern) const; - - // Constants for the relocation r_type field. - // See <mach-o/ppc/reloc.h> - enum { - PPC_RELOC_VANILLA, // generic relocation - PPC_RELOC_PAIR, // the second relocation entry of a pair - PPC_RELOC_BR14, // 14 bit branch displacement to word address - PPC_RELOC_BR24, // 24 bit branch displacement to word address - PPC_RELOC_HI16, // a PAIR follows with the low 16 bits - PPC_RELOC_LO16, // a PAIR follows with the high 16 bits - PPC_RELOC_HA16, // a PAIR follows, which is sign extended to 32b - PPC_RELOC_LO14 // LO16 with low 2 bits implicitly zero - }; - }; - -} // end llvm namespace - -#endif // PPC_MACHO_WRITER_INFO_H diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 0c3c8eb..0b509ac 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -406,7 +406,7 @@ PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { static bool needsFP(const MachineFunction &MF) { const MachineFrameInfo *MFI = MF.getFrameInfo(); return NoFramePointerElim || MFI->hasVarSizedObjects() || - (PerformTailCallOpt && MF.getInfo<PPCFunctionInfo>()->hasFastCall()); + (GuaranteedTailCallOpt && MF.getInfo<PPCFunctionInfo>()->hasFastCall()); } static bool spillsCR(const MachineFunction &MF) { @@ -427,6 +427,12 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(PPC::R2); // System-reserved register Reserved.set(PPC::R13); // Small Data Area pointer register } + // Reserve R2 on Darwin to hack around the problem of save/restore of CR + // when the stack frame is too big to address directly; we need two regs. + // This is a hack. + if (Subtarget.isDarwinABI()) { + Reserved.set(PPC::R2); + } // On PPC64, r13 is the thread pointer. Never allocate this register. // Note that this is over conservative, as it also prevents allocation of R31 @@ -447,6 +453,12 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { if (Subtarget.isSVR4ABI()) { Reserved.set(PPC::X2); } + // Reserve R2 on Darwin to hack around the problem of save/restore of CR + // when the stack frame is too big to address directly; we need two regs. + // This is a hack. + if (Subtarget.isDarwinABI()) { + Reserved.set(PPC::X2); + } } if (needsFP(MF)) @@ -486,7 +498,7 @@ static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { void PPCRegisterInfo:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { - if (PerformTailCallOpt && I->getOpcode() == PPC::ADJCALLSTACKUP) { + if (GuaranteedTailCallOpt && I->getOpcode() == PPC::ADJCALLSTACKUP) { // Add (actually subtract) back the amount the callee popped on return. if (int CalleeAmt = I->getOperand(1).getImm()) { bool is64Bit = Subtarget.isPPC64(); @@ -724,7 +736,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } // Take into account whether it's an add or mem instruction unsigned OffsetOperandNo = (FIOperandNo == 2) ? 1 : 2; - if (MI.getOpcode() == TargetInstrInfo::INLINEASM) + if (MI.isInlineAsm()) OffsetOperandNo = FIOperandNo-1; // Get the frame index. @@ -817,7 +829,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // addi 0:rA 1:rB, 2, imm ==> add 0:rA, 1:rB, 2:r0 unsigned OperandBase; - if (OpC != TargetInstrInfo::INLINEASM) { + if (OpC != TargetOpcode::INLINEASM) { assert(ImmToIdxMap.count(OpC) && "No indexed form of load or store available!"); unsigned NewOpcode = ImmToIdxMap.find(OpC)->second; @@ -1050,7 +1062,7 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Reserve stack space to move the linkage area to in case of a tail call. int TCSPDelta = 0; - if (PerformTailCallOpt && (TCSPDelta = FI->getTailCallSPDelta()) < 0) { + if (GuaranteedTailCallOpt && (TCSPDelta = FI->getTailCallSPDelta()) < 0) { MF.getFrameInfo()->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true, false); } @@ -1160,7 +1172,7 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) // Take into account stack space reserved for tail calls. int TCSPDelta = 0; - if (PerformTailCallOpt && (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { + if (GuaranteedTailCallOpt && (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { LowerBound = TCSPDelta; } @@ -1575,7 +1587,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF, // The loaded (or persistent) stack pointer value is offset by the 'stwu' // on entry to the function. Add this offset back now. if (!isPPC64) { - // If this function contained a fastcc call and PerformTailCallOpt is + // If this function contained a fastcc call and GuaranteedTailCallOpt is // enabled (=> hasFastCall()==true) the fastcc call might contain a tail // call which invalidates the stack pointer value in SP(0). So we use the // value of R31 in this case. @@ -1654,7 +1666,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF, // Callee pop calling convention. Pop parameter/linkage area. Used for tail // call optimization - if (PerformTailCallOpt && RetOpcode == PPC::BLR && + if (GuaranteedTailCallOpt && RetOpcode == PPC::BLR && MF.getFunction()->getCallingConv() == CallingConv::Fast) { PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); unsigned CallerAllocatedAmt = FI->getMinReservedArea(); diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index f75e781..40914ba 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -130,7 +130,7 @@ bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV, return false; // If symbol visibility is hidden, the extra load is not needed if // the symbol is definitely defined in the current translation unit. - bool isDecl = GV->isDeclaration() && !GV->hasNotBeenReadFromBitcode(); + bool isDecl = GV->isDeclaration() && !GV->isMaterializable(); if (GV->hasHiddenVisibility() && !isDecl && !GV->hasCommonLinkage()) return false; return GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() || diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index c7f7882..cac6962 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -45,7 +45,7 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const std::string &TT, Subtarget(TT, FS, is64Bit), DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this), FrameInfo(*this, is64Bit), JITInfo(*this, is64Bit), TLInfo(*this), - InstrItins(Subtarget.getInstrItineraryData()), MachOWriterInfo(*this) { + InstrItins(Subtarget.getInstrItineraryData()) { if (getRelocationModel() == Reloc::Default) { if (Subtarget.isDarwin()) @@ -91,33 +91,6 @@ bool PPCTargetMachine::addPreEmitPass(PassManagerBase &PM, bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - MachineCodeEmitter &MCE) { - // The JIT should use the static relocation model in ppc32 mode, PIC in ppc64. - // FIXME: This should be moved to TargetJITInfo!! - if (Subtarget.isPPC64()) { - // We use PIC codegen in ppc64 mode, because otherwise we'd have to use many - // instructions to materialize arbitrary global variable + function + - // constant pool addresses. - setRelocationModel(Reloc::PIC_); - // Temporary workaround for the inability of PPC64 JIT to handle jump - // tables. - DisableJumpTables = true; - } else { - setRelocationModel(Reloc::Static); - } - - // Inform the subtarget that we are in JIT mode. FIXME: does this break macho - // writing? - Subtarget.SetJITMode(); - - // Machine code emitter pass for PowerPC. - PM.add(createPPCCodeEmitterPass(*this, MCE)); - - return false; -} - -bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, JITCodeEmitter &JCE) { // The JIT should use the static relocation model in ppc32 mode, PIC in ppc64. // FIXME: This should be moved to TargetJITInfo!! @@ -142,83 +115,3 @@ bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM, return false; } - -bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - ObjectCodeEmitter &OCE) { - // The JIT should use the static relocation model in ppc32 mode, PIC in ppc64. - // FIXME: This should be moved to TargetJITInfo!! - if (Subtarget.isPPC64()) { - // We use PIC codegen in ppc64 mode, because otherwise we'd have to use many - // instructions to materialize arbitrary global variable + function + - // constant pool addresses. - setRelocationModel(Reloc::PIC_); - // Temporary workaround for the inability of PPC64 JIT to handle jump - // tables. - DisableJumpTables = true; - } else { - setRelocationModel(Reloc::Static); - } - - // Inform the subtarget that we are in JIT mode. FIXME: does this break macho - // writing? - Subtarget.SetJITMode(); - - // Machine code emitter pass for PowerPC. - PM.add(createPPCObjectCodeEmitterPass(*this, OCE)); - - return false; -} - -bool PPCTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - MachineCodeEmitter &MCE) { - // Machine code emitter pass for PowerPC. - PM.add(createPPCCodeEmitterPass(*this, MCE)); - return false; -} - -bool PPCTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - JITCodeEmitter &JCE) { - // Machine code emitter pass for PowerPC. - PM.add(createPPCJITCodeEmitterPass(*this, JCE)); - return false; -} - -bool PPCTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - ObjectCodeEmitter &OCE) { - // Machine code emitter pass for PowerPC. - PM.add(createPPCObjectCodeEmitterPass(*this, OCE)); - return false; -} - -/// getLSDAEncoding - Returns the LSDA pointer encoding. The choices are 4-byte, -/// 8-byte, and target default. The CIE is hard-coded to indicate that the LSDA -/// pointer in the FDE section is an "sdata4", and should be encoded as a 4-byte -/// pointer by default. However, some systems may require a different size due -/// to bugs or other conditions. We will default to a 4-byte encoding unless the -/// system tells us otherwise. -/// -/// The issue is when the CIE says their is an LSDA. That mandates that every -/// FDE have an LSDA slot. But if the function does not need an LSDA. There -/// needs to be some way to signify there is none. The LSDA is encoded as -/// pc-rel. But you don't look for some magic value after adding the pc. You -/// have to look for a zero before adding the pc. The problem is that the size -/// of the zero to look for depends on the encoding. The unwinder bug in SL is -/// that it always checks for a pointer-size zero. So on x86_64 it looks for 8 -/// bytes of zero. If you have an LSDA, it works fine since the 8-bytes are -/// non-zero so it goes ahead and then reads the value based on the encoding. -/// But if you use sdata4 and there is no LSDA, then the test for zero gives a -/// false negative and the unwinder thinks there is an LSDA. -/// -/// FIXME: This call-back isn't good! We should be using the correct encoding -/// regardless of the system. However, there are some systems which have bugs -/// that prevent this from occuring. -DwarfLSDAEncoding::Encoding PPCTargetMachine::getLSDAEncoding() const { - if (Subtarget.isDarwin() && Subtarget.getDarwinVers() != 10) - return DwarfLSDAEncoding::Default; - - return DwarfLSDAEncoding::EightByte; -} diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index 4afcb23..ac9ae2b 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -19,7 +19,6 @@ #include "PPCJITInfo.h" #include "PPCInstrInfo.h" #include "PPCISelLowering.h" -#include "PPCMachOWriterInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetData.h" @@ -37,7 +36,6 @@ class PPCTargetMachine : public LLVMTargetMachine { PPCJITInfo JITInfo; PPCTargetLowering TLInfo; InstrItineraryData InstrItins; - PPCMachOWriterInfo MachOWriterInfo; public: PPCTargetMachine(const Target &T, const std::string &TT, @@ -58,40 +56,12 @@ public: virtual const InstrItineraryData getInstrItineraryData() const { return InstrItins; } - virtual const PPCMachOWriterInfo *getMachOWriterInfo() const { - return &MachOWriterInfo; - } - - /// getLSDAEncoding - Returns the LSDA pointer encoding. The choices are - /// 4-byte, 8-byte, and target default. The CIE is hard-coded to indicate that - /// the LSDA pointer in the FDE section is an "sdata4", and should be encoded - /// as a 4-byte pointer by default. However, some systems may require a - /// different size due to bugs or other conditions. We will default to a - /// 4-byte encoding unless the system tells us otherwise. - /// - /// FIXME: This call-back isn't good! We should be using the correct encoding - /// regardless of the system. However, there are some systems which have bugs - /// that prevent this from occuring. - virtual DwarfLSDAEncoding::Encoding getLSDAEncoding() const; // Pass Pipeline Configuration virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - MachineCodeEmitter &MCE); - virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, JITCodeEmitter &JCE); - virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - ObjectCodeEmitter &OCE); - virtual bool addSimpleCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - MachineCodeEmitter &MCE); - virtual bool addSimpleCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - JITCodeEmitter &JCE); - virtual bool addSimpleCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - ObjectCodeEmitter &OCE); virtual bool getEnableTailMergeDefault() const; }; diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt index 060d6a5..e49bda0 100644 --- a/lib/Target/PowerPC/README.txt +++ b/lib/Target/PowerPC/README.txt @@ -430,6 +430,35 @@ This theoretically may help improve twolf slightly (used in dimbox.c:142?). ===-------------------------------------------------------------------------=== +PR5945: This: +define i32 @clamp0g(i32 %a) { +entry: + %cmp = icmp slt i32 %a, 0 + %sel = select i1 %cmp, i32 0, i32 %a + ret i32 %sel +} + +Is compile to this with the PowerPC (32-bit) backend: + +_clamp0g: + cmpwi cr0, r3, 0 + li r2, 0 + blt cr0, LBB1_2 +; BB#1: ; %entry + mr r2, r3 +LBB1_2: ; %entry + mr r3, r2 + blr + +This could be reduced to the much simpler: + +_clamp0g: + srawi r2, r3, 31 + andc r3, r3, r2 + blr + +===-------------------------------------------------------------------------=== + int foo(int N, int ***W, int **TK, int X) { int t, i; @@ -635,6 +664,32 @@ This sort of thing occurs a lot due to globalopt. ===-------------------------------------------------------------------------=== +We compile: + +define i32 @bar(i32 %x) nounwind readnone ssp { +entry: + %0 = icmp eq i32 %x, 0 ; <i1> [#uses=1] + %neg = sext i1 %0 to i32 ; <i32> [#uses=1] + ret i32 %neg +} + +to: + +_bar: + cntlzw r2, r3 + slwi r2, r2, 26 + srawi r3, r2, 31 + blr + +it would be better to produce: + +_bar: + addic r3,r3,-1 + subfe r3,r3,r3 + blr + +===-------------------------------------------------------------------------=== + We currently compile 32-bit bswap: declare i32 @llvm.bswap.i32(i32 %A) @@ -840,3 +895,20 @@ define double @test_FNEG_sel(double %A, double %B, double %C) { ret double %E } +//===----------------------------------------------------------------------===// +The save/restore sequence for CR in prolog/epilog is terrible: +- Each CR subreg is saved individually, rather than doing one save as a unit. +- On Darwin, the save is done after the decrement of SP, which means the offset +from SP of the save slot can be too big for a store instruction, which means we +need an additional register (currently hacked in 96015+96020; the solution there +is correct, but poor). +- On SVR4 the same thing can happen, and I don't think saving before the SP +decrement is safe on that target, as there is no red zone. This is currently +broken AFAIK, although it's not a target I can exercise. +The following demonstrates the problem: +extern void bar(char *p); +void foo() { + char x[100000]; + bar(x); + __asm__("" ::: "cr2"); +} diff --git a/lib/Target/PowerPC/TargetInfo/Makefile b/lib/Target/PowerPC/TargetInfo/Makefile index 16d0167..a101aa4 100644 --- a/lib/Target/PowerPC/TargetInfo/Makefile +++ b/lib/Target/PowerPC/TargetInfo/Makefile @@ -8,7 +8,6 @@ ##===----------------------------------------------------------------------===## LEVEL = ../../../.. LIBRARYNAME = LLVMPowerPCInfo -CXXFLAGS = -fno-rtti # Hack: we need to include 'main' target directory to grab private headers CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. diff --git a/lib/Target/README.txt b/lib/Target/README.txt index 080ea42..4fd46a8 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -156,6 +156,45 @@ void f () { /* this can be optimized to four additions... */ This requires reassociating to forms of expressions that are already available, something that reassoc doesn't think about yet. + +//===---------------------------------------------------------------------===// + +This function: (derived from GCC PR19988) +double foo(double x, double y) { + return ((x + 0.1234 * y) * (x + -0.1234 * y)); +} + +compiles to: +_foo: + movapd %xmm1, %xmm2 + mulsd LCPI1_1(%rip), %xmm1 + mulsd LCPI1_0(%rip), %xmm2 + addsd %xmm0, %xmm1 + addsd %xmm0, %xmm2 + movapd %xmm1, %xmm0 + mulsd %xmm2, %xmm0 + ret + +Reassociate should be able to turn it into: + +double foo(double x, double y) { + return ((x + 0.1234 * y) * (x - 0.1234 * y)); +} + +Which allows the multiply by constant to be CSE'd, producing: + +_foo: + mulsd LCPI1_0(%rip), %xmm1 + movapd %xmm1, %xmm2 + addsd %xmm0, %xmm2 + subsd %xmm1, %xmm0 + mulsd %xmm2, %xmm0 + ret + +This doesn't need -ffast-math support at all. This is particularly bad because +the llvm-gcc frontend is canonicalizing the later into the former, but clang +doesn't have this problem. + //===---------------------------------------------------------------------===// These two functions should generate the same code on big-endian systems: @@ -237,24 +276,6 @@ define void @test(i32* %P) { //===---------------------------------------------------------------------===// -dag/inst combine "clz(x)>>5 -> x==0" for 32-bit x. - -Compile: - -int bar(int x) -{ - int t = __builtin_clz(x); - return -(t>>5); -} - -to: - -_bar: addic r3,r3,-1 - subfe r3,r3,r3 - blr - -//===---------------------------------------------------------------------===// - quantum_sigma_x in 462.libquantum contains the following loop: for(i=0; i<reg->size; i++) @@ -294,6 +315,8 @@ unsigned long reverse(unsigned v) { //===---------------------------------------------------------------------===// +[LOOP RECOGNITION] + These idioms should be recognized as popcount (see PR1488): unsigned countbits_slow(unsigned v) { @@ -356,12 +379,36 @@ this construct. //===---------------------------------------------------------------------===// +[LOOP RECOGNITION] + viterbi speeds up *significantly* if the various "history" related copy loops are turned into memcpy calls at the source level. We need a "loops to memcpy" pass. //===---------------------------------------------------------------------===// +[LOOP OPTIMIZATION] + +SingleSource/Benchmarks/Misc/dt.c shows several interesting optimization +opportunities in its double_array_divs_variable function: it needs loop +interchange, memory promotion (which LICM already does), vectorization and +variable trip count loop unrolling (since it has a constant trip count). ICC +apparently produces this very nice code with -ffast-math: + +..B1.70: # Preds ..B1.70 ..B1.69 + mulpd %xmm0, %xmm1 #108.2 + mulpd %xmm0, %xmm1 #108.2 + mulpd %xmm0, %xmm1 #108.2 + mulpd %xmm0, %xmm1 #108.2 + addl $8, %edx # + cmpl $131072, %edx #108.2 + jb ..B1.70 # Prob 99% #108.2 + +It would be better to count down to zero, but this is a lot better than what we +do. + +//===---------------------------------------------------------------------===// + Consider: typedef unsigned U32; @@ -1218,9 +1265,16 @@ store->load. //===---------------------------------------------------------------------===// +[ALIAS ANALYSIS] + Type based alias analysis: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=14705 +We should do better analysis of posix_memalign. At the least it should +no-capture its pointer argument, at best, we should know that the out-value +result doesn't point to anything (like malloc). One example of this is in +SingleSource/Benchmarks/Misc/dt.c + //===---------------------------------------------------------------------===// A/B get pinned to the stack because we turn an if/then into a select instead @@ -1697,22 +1751,71 @@ from gcc. Missed instcombine transformation: define i32 @a(i32 %x) nounwind readnone { entry: - %shr = lshr i32 %x, 5 ; <i32> [#uses=1] - %xor = xor i32 %shr, 67108864 ; <i32> [#uses=1] - %sub = add i32 %xor, -67108864 ; <i32> [#uses=1] + %rem = srem i32 %x, 32 + %shl = shl i32 1, %rem + ret i32 %shl +} + +The srem can be transformed to an and because if x is negative, the shift is +undefined. Testcase derived from gcc. + +//===---------------------------------------------------------------------===// + +Missed instcombine/dagcombine transformation: +define i32 @a(i32 %x, i32 %y) nounwind readnone { +entry: + %mul = mul i32 %y, -8 + %sub = sub i32 %x, %mul ret i32 %sub } -This function is equivalent to "ashr i32 %x, 5". Testcase derived from gcc. +Should compile to something like x+y*8, but currently compiles to an +inefficient result. Testcase derived from gcc. //===---------------------------------------------------------------------===// -isSafeToLoadUnconditionally should allow a GEP of a global/alloca with constant -indicies within the bounds of the allocated object. Reduced example: +Missed instcombine/dagcombine transformation: +define void @lshift_lt(i8 zeroext %a) nounwind { +entry: + %conv = zext i8 %a to i32 + %shl = shl i32 %conv, 3 + %cmp = icmp ult i32 %shl, 33 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @bar() nounwind + ret void + +if.end: + ret void +} +declare void @bar() nounwind -const int a[] = {3,6}; -int b(int y) { int* x = y ? &a[0] : &a[1]; return *x; } +The shift should be eliminated. Testcase derived from gcc. -All the loads should be eliminated. Testcase derived from gcc. +//===---------------------------------------------------------------------===// + +These compile into different code, one gets recognized as a switch and the +other doesn't due to phase ordering issues (PR6212): + +int test1(int mainType, int subType) { + if (mainType == 7) + subType = 4; + else if (mainType == 9) + subType = 6; + else if (mainType == 11) + subType = 9; + return subType; +} + +int test2(int mainType, int subType) { + if (mainType == 7) + subType = 4; + if (mainType == 9) + subType = 6; + if (mainType == 11) + subType = 9; + return subType; +} //===---------------------------------------------------------------------===// diff --git a/lib/Target/Sparc/AsmPrinter/Makefile b/lib/Target/Sparc/AsmPrinter/Makefile index 404fad1..a856828 100644 --- a/lib/Target/Sparc/AsmPrinter/Makefile +++ b/lib/Target/Sparc/AsmPrinter/Makefile @@ -8,8 +8,6 @@ ##===----------------------------------------------------------------------===## LEVEL = ../../../.. LIBRARYNAME = LLVMSparcAsmPrinter -CXXFLAGS = -fno-rtti - # Hack: we need to include 'main' Sparc target directory to grab private headers CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. diff --git a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp index 8fc4e5a..9a2ce6b 100644 --- a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp +++ b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp @@ -16,45 +16,23 @@ #include "Sparc.h" #include "SparcInstrInfo.h" #include "SparcTargetMachine.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Module.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/DwarfWriter.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetRegistry.h" -#include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Support/MathExtras.h" -#include <cctype> -#include <cstring> -#include <map> using namespace llvm; -STATISTIC(EmittedInsts, "Number of machine instrs printed"); - namespace { class SparcAsmPrinter : public AsmPrinter { - /// We name each basic block in a Function with a unique number, so - /// that we can consistently refer to them later. This is cleared - /// at the beginning of each call to runOnMachineFunction(). - /// - typedef std::map<const Value *, unsigned> ValueMapTy; - ValueMapTy NumberForBB; - unsigned BBNumber; public: explicit SparcAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, - const MCAsmInfo *T, bool V) - : AsmPrinter(O, TM, T, V), BBNumber(0) {} + MCContext &Ctx, MCStreamer &Streamer, + const MCAsmInfo *T) + : AsmPrinter(O, TM, Ctx, Streamer, T) {} virtual const char *getPassName() const { return "Sparc Assembly Printer"; @@ -65,114 +43,24 @@ namespace { const char *Modifier = 0); void printCCOperand(const MachineInstr *MI, int opNum); + virtual void EmitInstruction(const MachineInstr *MI) { + printInstruction(MI); + OutStreamer.AddBlankLine(); + } void printInstruction(const MachineInstr *MI); // autogenerated. static const char *getRegisterName(unsigned RegNo); - bool runOnMachineFunction(MachineFunction &F); bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode); bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode); - void emitFunctionHeader(const MachineFunction &MF); bool printGetPCX(const MachineInstr *MI, unsigned OpNo); }; } // end of anonymous namespace #include "SparcGenAsmWriter.inc" -/// runOnMachineFunction - This uses the printInstruction() -/// method to print assembly for each instruction. -/// -bool SparcAsmPrinter::runOnMachineFunction(MachineFunction &MF) { - this->MF = &MF; - - SetupMachineFunction(MF); - - // Print out constants referenced by the function - EmitConstantPool(MF.getConstantPool()); - - // BBNumber is used here so that a given Printer will never give two - // BBs the same name. (If you have a better way, please let me know!) - - O << "\n\n"; - emitFunctionHeader(MF); - - - // Emit pre-function debug information. - DW->BeginFunction(&MF); - - // Number each basic block so that we can consistently refer to them - // in PC-relative references. - // FIXME: Why not use the MBB numbers? - NumberForBB.clear(); - for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); - I != E; ++I) { - NumberForBB[I->getBasicBlock()] = BBNumber++; - } - - // Print out code for the function. - for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); - I != E; ++I) { - // Print a label for the basic block. - if (I != MF.begin()) { - EmitBasicBlockStart(I); - } - for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); - II != E; ++II) { - // Print the assembly for the instruction. - processDebugLoc(II, true); - printInstruction(II); - - if (VerboseAsm) - EmitComments(*II); - O << '\n'; - processDebugLoc(II, false); - ++EmittedInsts; - } - } - - // Emit post-function debug information. - DW->EndFunction(&MF); - - // We didn't modify anything. - O << "\t.size\t" << *CurrentFnSym << ", .-" << *CurrentFnSym << '\n'; - return false; -} - -void SparcAsmPrinter::emitFunctionHeader(const MachineFunction &MF) { - const Function *F = MF.getFunction(); - OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM)); - EmitAlignment(MF.getAlignment(), F); - - switch (F->getLinkage()) { - default: llvm_unreachable("Unknown linkage type"); - case Function::PrivateLinkage: - case Function::InternalLinkage: - // Function is internal. - break; - case Function::DLLExportLinkage: - case Function::ExternalLinkage: - // Function is externally visible - O << "\t.global\t" << *CurrentFnSym << '\n'; - break; - case Function::LinkerPrivateLinkage: - case Function::LinkOnceAnyLinkage: - case Function::LinkOnceODRLinkage: - case Function::WeakAnyLinkage: - case Function::WeakODRLinkage: - // Function is weak - O << "\t.weak\t" << *CurrentFnSym << '\n'; - break; - } - - printVisibility(CurrentFnSym, F->getVisibility()); - - O << "\t.type\t" << *CurrentFnSym << ", #function\n"; - O << *CurrentFnSym << ":\n"; -} - - void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum) { const MachineOperand &MO = MI->getOperand (opNum); bool CloseParen = false; @@ -193,7 +81,7 @@ void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum) { O << (int)MO.getImm(); break; case MachineOperand::MO_MachineBasicBlock: - O << *GetMBBSymbol(MO.getMBB()->getNumber()); + O << *MO.getMBB()->getSymbol(OutContext); return; case MachineOperand::MO_GlobalAddress: O << *GetGlobalValueSymbol(MO.getGlobal()); @@ -252,7 +140,7 @@ bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum) { break; } - unsigned bbNum = NumberForBB[MI->getParent()->getBasicBlock()]; + unsigned bbNum = MI->getParent()->getNumber(); O << '\n' << ".LLGETPCH" << bbNum << ":\n"; O << "\tcall\t.LLGETPC" << bbNum << '\n' ; @@ -312,4 +200,5 @@ bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, // Force static initialization. extern "C" void LLVMInitializeSparcAsmPrinter() { RegisterAsmPrinter<SparcAsmPrinter> X(TheSparcTarget); + RegisterAsmPrinter<SparcAsmPrinter> Y(TheSparcV9Target); } diff --git a/lib/Target/Sparc/Makefile b/lib/Target/Sparc/Makefile index d3e2a89..e407848 100644 --- a/lib/Target/Sparc/Makefile +++ b/lib/Target/Sparc/Makefile @@ -10,7 +10,6 @@ LEVEL = ../../.. LIBRARYNAME = LLVMSparcCodeGen TARGET = Sparc -CXXFLAGS = -fno-rtti # Make sure that tblgen is run, first thing. BUILT_SOURCES = SparcGenRegisterInfo.h.inc SparcGenRegisterNames.inc \ diff --git a/lib/Target/Sparc/Sparc.h b/lib/Target/Sparc/Sparc.h index bb5155e1..a37920d 100644 --- a/lib/Target/Sparc/Sparc.h +++ b/lib/Target/Sparc/Sparc.h @@ -29,6 +29,7 @@ namespace llvm { FunctionPass *createSparcFPMoverPass(TargetMachine &TM); extern Target TheSparcTarget; + extern Target TheSparcV9Target; } // end namespace llvm; diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 1b3ca3e..4e93ef0 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -21,7 +21,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/ADT/VectorExtras.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -134,7 +134,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); SDValue Load; if (ObjectVT == MVT::i32) { - Load = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0); + Load = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0, + false, false, 0); } else { ISD::LoadExtType LoadOp = ISD::SEXTLOAD; @@ -143,7 +144,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, FIPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIPtr, DAG.getConstant(Offset, MVT::i32)); Load = DAG.getExtLoad(LoadOp, dl, MVT::i32, Chain, FIPtr, - NULL, 0, ObjectVT); + NULL, 0, ObjectVT, false, false, 0); Load = DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, Load); } InVals.push_back(Load); @@ -167,7 +168,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset, true, false); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); - SDValue Load = DAG.getLoad(MVT::f32, dl, Chain, FIPtr, NULL, 0); + SDValue Load = DAG.getLoad(MVT::f32, dl, Chain, FIPtr, NULL, 0, + false, false, 0); InVals.push_back(Load); } ArgOffset += 4; @@ -189,7 +191,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset, true, false); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); - HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0); + HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0, + false, false, 0); } SDValue LoVal; @@ -201,7 +204,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset+4, true, false); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); - LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0); + LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0, + false, false, 0); } // Compose the two halves together into an i64 unit. @@ -235,7 +239,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, true, false); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); - OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr, NULL, 0)); + OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr, NULL, 0, + false, false, 0)); ArgOffset += 4; } @@ -252,11 +257,13 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, SDValue SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool isTailCall, + bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) { + // Sparc target does not yet support tail call optimization. + isTailCall = false; #if 0 // Analyze operands of the call, assigning locations to each operand. @@ -337,7 +344,8 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // FIXME: VERIFY THAT 68 IS RIGHT. SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset()+68); PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); - MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); + MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0, + false, false, 0)); } #else @@ -383,14 +391,17 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // out the parts as integers. Top part goes in a reg. SDValue StackPtr = DAG.CreateStackTemporary(MVT::f64, MVT::i32); SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, - Val, StackPtr, NULL, 0); + Val, StackPtr, NULL, 0, + false, false, 0); // Sparc is big-endian, so the high part comes first. - SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0, 0); + SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0, + false, false, 0); // Increment the pointer to the other half. StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, DAG.getIntPtrConstant(4)); // Load the low part. - SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0, 0); + SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0, + false, false, 0); RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Hi)); @@ -433,7 +444,8 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SDValue PtrOff = DAG.getConstant(ArgOffset, MVT::i32); PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff); MemOpChains.push_back(DAG.getStore(Chain, dl, ValToStore, - PtrOff, NULL, 0)); + PtrOff, NULL, 0, + false, false, 0)); } ArgOffset += ObjSize; } @@ -757,7 +769,7 @@ SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op, SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, GlobalBase, RelAddr); return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - AbsAddr, NULL, 0); + AbsAddr, NULL, 0, false, false, 0); } SDValue SparcTargetLowering::LowerConstantPool(SDValue Op, @@ -778,7 +790,7 @@ SDValue SparcTargetLowering::LowerConstantPool(SDValue Op, SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, GlobalBase, RelAddr); return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - AbsAddr, NULL, 0); + AbsAddr, NULL, 0, false, false, 0); } static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { @@ -870,7 +882,8 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, DAG.getConstant(TLI.getVarArgsFrameOffset(), MVT::i32)); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); - return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1), SV, 0); + return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1), SV, 0, + false, false, 0); } static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) { @@ -880,21 +893,23 @@ static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) { SDValue VAListPtr = Node->getOperand(1); const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); DebugLoc dl = Node->getDebugLoc(); - SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr, SV, 0); + SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr, SV, 0, + false, false, 0); // Increment the pointer, VAList, to the next vaarg SDValue NextPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, VAList, DAG.getConstant(VT.getSizeInBits()/8, MVT::i32)); // Store the incremented VAList to the legalized pointer InChain = DAG.getStore(VAList.getValue(1), dl, NextPtr, - VAListPtr, SV, 0); + VAListPtr, SV, 0, false, false, 0); // Load the actual argument out of the pointer VAList, unless this is an // f64 load. if (VT != MVT::f64) - return DAG.getLoad(VT, dl, InChain, VAList, NULL, 0); + return DAG.getLoad(VT, dl, InChain, VAList, NULL, 0, false, false, 0); // Otherwise, load it as i64, then do a bitconvert. - SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, NULL, 0); + SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, NULL, 0, + false, false, 0); // Bit-Convert the value to f64. SDValue Ops[2] = { diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h index 55781be..2ee73c1 100644 --- a/lib/Target/Sparc/SparcISelLowering.h +++ b/lib/Target/Sparc/SparcISelLowering.h @@ -87,7 +87,7 @@ namespace llvm { virtual SDValue LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool isTailCall, + bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, diff --git a/lib/Target/Sparc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/SparcMCAsmInfo.cpp index b67537c..53a9bde 100644 --- a/lib/Target/Sparc/SparcMCAsmInfo.cpp +++ b/lib/Target/Sparc/SparcMCAsmInfo.cpp @@ -21,7 +21,6 @@ SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Target &T, const StringRef &TT) { Data64bitsDirective = 0; // .xword is only supported by V9. ZeroDirective = "\t.skip\t"; CommentString = "!"; - COMMDirectiveTakesAlignment = true; HasLEB128 = true; AbsoluteDebugSectionOffsets = true; SupportsDebugInformation = true; @@ -30,7 +29,6 @@ SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Target &T, const StringRef &TT) { UsesELFSectionDirectiveForBSS = true; WeakRefDirective = "\t.weak\t"; - SetDirective = "\t.set\t"; PrivateGlobalPrefix = ".L"; } diff --git a/lib/Target/Sparc/SparcSubtarget.cpp b/lib/Target/Sparc/SparcSubtarget.cpp index 8a88cc0..ce11af1 100644 --- a/lib/Target/Sparc/SparcSubtarget.cpp +++ b/lib/Target/Sparc/SparcSubtarget.cpp @@ -15,29 +15,20 @@ #include "SparcGenSubtarget.inc" using namespace llvm; -// FIXME: temporary. -#include "llvm/Support/CommandLine.h" -namespace { - cl::opt<bool> EnableV9("enable-sparc-v9-insts", cl::Hidden, - cl::desc("Enable V9 instructions in the V8 target")); -} - -SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &FS) { - // Set the default features. - IsV9 = false; - V8DeprecatedInsts = false; - IsVIS = false; +SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &FS, + bool is64Bit) : + IsV9(false), + V8DeprecatedInsts(false), + IsVIS(false), + Is64Bit(is64Bit) { // Determine default and user specified characteristics - std::string CPU = "generic"; + const char *CPU = "v8"; + if (is64Bit) { + CPU = "v9"; + IsV9 = true; + } - // FIXME: autodetect host here! - CPU = "v9"; // What is a good way to detect V9? - // Parse features string. ParseSubtargetFeatures(FS, CPU); - - // Unless explicitly enabled, disable the V9 instructions. - if (!EnableV9) - IsV9 = false; } diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h index 4377034..cec0ab4 100644 --- a/lib/Target/Sparc/SparcSubtarget.h +++ b/lib/Target/Sparc/SparcSubtarget.h @@ -23,8 +23,10 @@ class SparcSubtarget : public TargetSubtarget { bool IsV9; bool V8DeprecatedInsts; bool IsVIS; + bool Is64Bit; + public: - SparcSubtarget(const std::string &TT, const std::string &FS); + SparcSubtarget(const std::string &TT, const std::string &FS, bool is64bit); bool isV9() const { return IsV9; } bool isVIS() const { return IsVIS; } @@ -34,7 +36,17 @@ public: /// subtarget options. Definition of function is auto generated by tblgen. std::string ParseSubtargetFeatures(const std::string &FS, const std::string &CPU); - + + bool is64Bit() const { return Is64Bit; } + std::string getDataLayout() const { + const char *p; + if (is64Bit()) { + p = "E-p:64:64:64-i64:64:64-f64:64:64-f128:128:128-n32:64"; + } else { + p = "E-p:32:32:32-i64:64:64-f64:64:64-f128:64:64-n32"; + } + return std::string(p); + } }; } // end namespace llvm diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index 1eec112..a676623 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -19,18 +19,22 @@ using namespace llvm; extern "C" void LLVMInitializeSparcTarget() { // Register the target. - RegisterTargetMachine<SparcTargetMachine> X(TheSparcTarget); - RegisterAsmInfo<SparcELFMCAsmInfo> Y(TheSparcTarget); + RegisterTargetMachine<SparcV8TargetMachine> X(TheSparcTarget); + RegisterTargetMachine<SparcV9TargetMachine> Y(TheSparcV9Target); + + RegisterAsmInfo<SparcELFMCAsmInfo> A(TheSparcTarget); + RegisterAsmInfo<SparcELFMCAsmInfo> B(TheSparcV9Target); } /// SparcTargetMachine ctor - Create an ILP32 architecture model /// SparcTargetMachine::SparcTargetMachine(const Target &T, const std::string &TT, - const std::string &FS) + const std::string &FS, bool is64bit) : LLVMTargetMachine(T, TT), - DataLayout("E-p:32:32-f128:128:128-n32"), - Subtarget(TT, FS), TLInfo(*this), InstrInfo(Subtarget), + Subtarget(TT, FS, is64bit), + DataLayout(Subtarget.getDataLayout()), + TLInfo(*this), InstrInfo(Subtarget), FrameInfo(TargetFrameInfo::StackGrowsDown, 8, 0) { } @@ -49,3 +53,15 @@ bool SparcTargetMachine::addPreEmitPass(PassManagerBase &PM, PM.add(createSparcDelaySlotFillerPass(*this)); return true; } + +SparcV8TargetMachine::SparcV8TargetMachine(const Target &T, + const std::string &TT, + const std::string &FS) + : SparcTargetMachine(T, TT, FS, false) { +} + +SparcV9TargetMachine::SparcV9TargetMachine(const Target &T, + const std::string &TT, + const std::string &FS) + : SparcTargetMachine(T, TT, FS, true) { +} diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h index cce5510..5834d08 100644 --- a/lib/Target/Sparc/SparcTargetMachine.h +++ b/lib/Target/Sparc/SparcTargetMachine.h @@ -24,14 +24,14 @@ namespace llvm { class SparcTargetMachine : public LLVMTargetMachine { - const TargetData DataLayout; // Calculates type size & alignment SparcSubtarget Subtarget; + const TargetData DataLayout; // Calculates type size & alignment SparcTargetLowering TLInfo; SparcInstrInfo InstrInfo; TargetFrameInfo FrameInfo; public: SparcTargetMachine(const Target &T, const std::string &TT, - const std::string &FS); + const std::string &FS, bool is64bit); virtual const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; } @@ -49,6 +49,22 @@ public: virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); }; +/// SparcV8TargetMachine - Sparc 32-bit target machine +/// +class SparcV8TargetMachine : public SparcTargetMachine { +public: + SparcV8TargetMachine(const Target &T, const std::string &TT, + const std::string &FS); +}; + +/// SparcV9TargetMachine - Sparc 64-bit target machine +/// +class SparcV9TargetMachine : public SparcTargetMachine { +public: + SparcV9TargetMachine(const Target &T, const std::string &TT, + const std::string &FS); +}; + } // end namespace llvm #endif diff --git a/lib/Target/Sparc/TargetInfo/Makefile b/lib/Target/Sparc/TargetInfo/Makefile index 0827fdb..641ed87 100644 --- a/lib/Target/Sparc/TargetInfo/Makefile +++ b/lib/Target/Sparc/TargetInfo/Makefile @@ -8,7 +8,6 @@ ##===----------------------------------------------------------------------===## LEVEL = ../../../.. LIBRARYNAME = LLVMSparcInfo -CXXFLAGS = -fno-rtti # Hack: we need to include 'main' target directory to grab private headers CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. diff --git a/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp b/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp index 5d697bd..5c06f07 100644 --- a/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp +++ b/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp @@ -13,7 +13,9 @@ using namespace llvm; Target llvm::TheSparcTarget; +Target llvm::TheSparcV9Target; extern "C" void LLVMInitializeSparcTargetInfo() { RegisterTarget<Triple::sparc> X(TheSparcTarget, "sparc", "Sparc"); + RegisterTarget<Triple::sparcv9> Y(TheSparcV9Target, "sparcv9", "Sparc V9"); } diff --git a/lib/Target/SubtargetFeature.cpp b/lib/Target/SubtargetFeature.cpp index 7cc4fd1..2094cc9 100644 --- a/lib/Target/SubtargetFeature.cpp +++ b/lib/Target/SubtargetFeature.cpp @@ -67,7 +67,7 @@ static void Split(std::vector<std::string> &V, const std::string &S) { while (true) { // Find the next comma size_t Comma = S.find(',', Pos); - // If no comma found then the the rest of the string is used + // If no comma found then the rest of the string is used if (Comma == std::string::npos) { // Add string to vector V.push_back(S.substr(Pos)); diff --git a/lib/Target/SystemZ/AsmPrinter/Makefile b/lib/Target/SystemZ/AsmPrinter/Makefile index 36cd6f8..9a350df 100644 --- a/lib/Target/SystemZ/AsmPrinter/Makefile +++ b/lib/Target/SystemZ/AsmPrinter/Makefile @@ -8,7 +8,6 @@ ##===----------------------------------------------------------------------===## LEVEL = ../../../.. LIBRARYNAME = LLVMSystemZAsmPrinter -CXXFLAGS = -fno-rtti # Hack: we need to include 'main' SystemZ target directory to grab private headers CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. diff --git a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp index 5c3fe37..7a9e8dd 100644 --- a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp +++ b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp @@ -32,19 +32,17 @@ #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetRegistry.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" using namespace llvm; -STATISTIC(EmittedInsts, "Number of machine instrs printed"); - namespace { class SystemZAsmPrinter : public AsmPrinter { public: SystemZAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, - const MCAsmInfo *MAI, bool V) - : AsmPrinter(O, TM, MAI, V) {} + MCContext &Ctx, MCStreamer &Streamer, + const MCAsmInfo *MAI) + : AsmPrinter(O, TM, Ctx, Streamer, MAI) {} virtual const char *getPassName() const { return "SystemZ Assembly Printer"; @@ -67,10 +65,7 @@ namespace { void printInstruction(const MachineInstr *MI); // autogenerated. static const char *getRegisterName(unsigned RegNo); - void printMachineInstruction(const MachineInstr * MI); - - void emitFunctionHeader(const MachineFunction &MF); - bool runOnMachineFunction(MachineFunction &F); + void EmitInstruction(const MachineInstr *MI); void getAnalysisUsage(AnalysisUsage &AU) const { AsmPrinter::getAnalysisUsage(AU); @@ -81,82 +76,10 @@ namespace { #include "SystemZGenAsmWriter.inc" -void SystemZAsmPrinter::emitFunctionHeader(const MachineFunction &MF) { - unsigned FnAlign = MF.getAlignment(); - const Function *F = MF.getFunction(); - - OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM)); - - EmitAlignment(FnAlign, F); - - switch (F->getLinkage()) { - default: assert(0 && "Unknown linkage type!"); - case Function::InternalLinkage: // Symbols default to internal. - case Function::PrivateLinkage: - case Function::LinkerPrivateLinkage: - break; - case Function::ExternalLinkage: - O << "\t.globl\t" << *CurrentFnSym << '\n'; - break; - case Function::LinkOnceAnyLinkage: - case Function::LinkOnceODRLinkage: - case Function::WeakAnyLinkage: - case Function::WeakODRLinkage: - O << "\t.weak\t" << *CurrentFnSym << '\n'; - break; - } - - printVisibility(CurrentFnSym, F->getVisibility()); - - O << "\t.type\t" << *CurrentFnSym << ",@function\n"; - O << *CurrentFnSym << ":\n"; -} - -bool SystemZAsmPrinter::runOnMachineFunction(MachineFunction &MF) { - SetupMachineFunction(MF); - O << "\n\n"; - - // Print out constants referenced by the function - EmitConstantPool(MF.getConstantPool()); - - // Print the 'header' of function - emitFunctionHeader(MF); - - // Print out code for the function. - for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); - I != E; ++I) { - // Print a label for the basic block. - EmitBasicBlockStart(I); - - for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); - II != E; ++II) - // Print the assembly for the instruction. - printMachineInstruction(II); - } - - if (MAI->hasDotTypeDotSizeDirective()) - O << "\t.size\t" << *CurrentFnSym << ", .-" << *CurrentFnSym << '\n'; - - // Print out jump tables referenced by the function. - EmitJumpTableInfo(MF.getJumpTableInfo(), MF); - - // We didn't modify anything - return false; -} - -void SystemZAsmPrinter::printMachineInstruction(const MachineInstr *MI) { - ++EmittedInsts; - - processDebugLoc(MI, true); - +void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Call the autogenerated instruction printer routines. printInstruction(MI); - - if (VerboseAsm) - EmitComments(*MI); - O << '\n'; - - processDebugLoc(MI, false); + OutStreamer.AddBlankLine(); } void SystemZAsmPrinter::printPCRelImmOperand(const MachineInstr *MI, int OpNum){ @@ -166,7 +89,7 @@ void SystemZAsmPrinter::printPCRelImmOperand(const MachineInstr *MI, int OpNum){ O << MO.getImm(); return; case MachineOperand::MO_MachineBasicBlock: - O << *GetMBBSymbol(MO.getMBB()->getNumber()); + O << *MO.getMBB()->getSymbol(OutContext); return; case MachineOperand::MO_GlobalAddress: { const GlobalValue *GV = MO.getGlobal(); @@ -221,7 +144,7 @@ void SystemZAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, O << MO.getImm(); return; case MachineOperand::MO_MachineBasicBlock: - O << *GetMBBSymbol(MO.getMBB()->getNumber()); + O << *MO.getMBB()->getSymbol(OutContext); return; case MachineOperand::MO_JumpTableIndex: O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' diff --git a/lib/Target/SystemZ/Makefile b/lib/Target/SystemZ/Makefile index 6d0cbbd..5b44090 100644 --- a/lib/Target/SystemZ/Makefile +++ b/lib/Target/SystemZ/Makefile @@ -10,7 +10,6 @@ LEVEL = ../../.. LIBRARYNAME = LLVMSystemZCodeGen TARGET = SystemZ -CXXFLAGS = -fno-rtti # Make sure that tblgen is run, first thing. BUILT_SOURCES = SystemZGenRegisterInfo.h.inc SystemZGenRegisterNames.inc \ diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 7096c0e..7f0d9fb 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -594,8 +594,7 @@ bool SystemZDAGToDAGISel::SelectLAAddr(SDNode *Op, SDValue Addr, bool SystemZDAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, SDValue &Base, SDValue &Disp, SDValue &Index) { if (ISD::isNON_EXTLoad(N.getNode()) && - N.hasOneUse() && - IsLegalAndProfitableToFold(N.getNode(), P, P)) + IsLegalToFold(N, P, P)) return SelectAddrRRI20(P, N.getOperand(1), Base, Disp, Index); return false; } @@ -665,10 +664,10 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { Dividend = N0.getNode(); // Insert prepared dividend into suitable 'subreg' - SDNode *Tmp = CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF, + SDNode *Tmp = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResVT); Dividend = - CurDAG->getMachineNode(TargetInstrInfo::INSERT_SUBREG, dl, ResVT, + CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl, ResVT, SDValue(Tmp, 0), SDValue(Dividend, 0), CurDAG->getTargetConstant(subreg_odd, MVT::i32)); @@ -687,7 +686,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { // Copy the division (odd subreg) result, if it is needed. if (!SDValue(Node, 0).use_empty()) { unsigned SubRegIdx = (is32Bit ? subreg_odd32 : subreg_odd); - SDNode *Div = CurDAG->getMachineNode(TargetInstrInfo::EXTRACT_SUBREG, + SDNode *Div = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, NVT, SDValue(Result, 0), CurDAG->getTargetConstant(SubRegIdx, @@ -702,7 +701,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { // Copy the remainder (even subreg) result, if it is needed. if (!SDValue(Node, 1).use_empty()) { unsigned SubRegIdx = (is32Bit ? subreg_even32 : subreg_even); - SDNode *Rem = CurDAG->getMachineNode(TargetInstrInfo::EXTRACT_SUBREG, + SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, NVT, SDValue(Result, 0), CurDAG->getTargetConstant(SubRegIdx, @@ -749,12 +748,12 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { SDNode *Dividend = N0.getNode(); // Insert prepared dividend into suitable 'subreg' - SDNode *Tmp = CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF, + SDNode *Tmp = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResVT); { unsigned SubRegIdx = (is32Bit ? subreg_odd32 : subreg_odd); Dividend = - CurDAG->getMachineNode(TargetInstrInfo::INSERT_SUBREG, dl, ResVT, + CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl, ResVT, SDValue(Tmp, 0), SDValue(Dividend, 0), CurDAG->getTargetConstant(SubRegIdx, MVT::i32)); } @@ -777,7 +776,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { // Copy the division (odd subreg) result, if it is needed. if (!SDValue(Node, 0).use_empty()) { unsigned SubRegIdx = (is32Bit ? subreg_odd32 : subreg_odd); - SDNode *Div = CurDAG->getMachineNode(TargetInstrInfo::EXTRACT_SUBREG, + SDNode *Div = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, NVT, SDValue(Result, 0), CurDAG->getTargetConstant(SubRegIdx, @@ -791,7 +790,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { // Copy the remainder (even subreg) result, if it is needed. if (!SDValue(Node, 1).use_empty()) { unsigned SubRegIdx = (is32Bit ? subreg_even32 : subreg_even); - SDNode *Rem = CurDAG->getMachineNode(TargetInstrInfo::EXTRACT_SUBREG, + SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, NVT, SDValue(Result, 0), CurDAG->getTargetConstant(SubRegIdx, diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index d6b476e..6f4b30f 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -30,9 +30,9 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -250,11 +250,13 @@ SystemZTargetLowering::LowerFormalArguments(SDValue Chain, SDValue SystemZTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool isTailCall, + bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) { + // SystemZ target does not yet support tail call optimization. + isTailCall = false; switch (CallConv) { default: @@ -335,7 +337,8 @@ SystemZTargetLowering::LowerCCCArguments(SDValue Chain, // from this parameter SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); ArgValue = DAG.getLoad(LocVT, dl, Chain, FIN, - PseudoSourceValue::getFixedStack(FI), 0); + PseudoSourceValue::getFixedStack(FI), 0, + false, false, 0); } // If this is an 8/16/32-bit value, it is really passed promoted to 64 @@ -433,7 +436,8 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, DAG.getIntPtrConstant(Offset)); MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, - PseudoSourceValue::getStack(), Offset)); + PseudoSourceValue::getStack(), Offset, + false, false, 0)); } } @@ -736,7 +740,7 @@ SDValue SystemZTargetLowering::LowerGlobalAddress(SDValue Op, if (ExtraLoadRequired) Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result, - PseudoSourceValue::getGOT(), 0); + PseudoSourceValue::getGOT(), 0, false, false, 0); // If there was a non-zero offset that we didn't fold, create an explicit // addition for it. diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 5bf1ed6..36ff994 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -125,7 +125,7 @@ namespace llvm { SmallVectorImpl<SDValue> &InVals); virtual SDValue LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, bool isTailCall, + CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, diff --git a/lib/Target/SystemZ/SystemZMCAsmInfo.cpp b/lib/Target/SystemZ/SystemZMCAsmInfo.cpp index ba392bb..1a09206 100644 --- a/lib/Target/SystemZ/SystemZMCAsmInfo.cpp +++ b/lib/Target/SystemZ/SystemZMCAsmInfo.cpp @@ -16,11 +16,8 @@ using namespace llvm; SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, const StringRef &TT) { - AlignmentIsInBytes = true; - PrivateGlobalPrefix = ".L"; WeakRefDirective = "\t.weak\t"; - SetDirective = "\t.set\t"; PCSymbol = "."; } diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp index 1318195..fe50c90 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -86,10 +86,11 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MBB.erase(I); } -int SystemZRegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const { +int SystemZRegisterInfo::getFrameIndexOffset(const MachineFunction &MF, + int FI) const { const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - SystemZMachineFunctionInfo *SystemZMFI = + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const SystemZMachineFunctionInfo *SystemZMFI = MF.getInfo<SystemZMachineFunctionInfo>(); int Offset = MFI->getObjectOffset(FI) + MFI->getOffsetAdjustment(); uint64_t StackSize = MFI->getStackSize(); diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h index 93f6aee..fabd4e8 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -49,7 +49,7 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo { bool hasReservedCallFrame(MachineFunction &MF) const { return true; } bool hasFP(const MachineFunction &MF) const; - int getFrameIndexOffset(MachineFunction &MF, int FI) const; + int getFrameIndexOffset(const MachineFunction &MF, int FI) const; void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, diff --git a/lib/Target/SystemZ/TargetInfo/Makefile b/lib/Target/SystemZ/TargetInfo/Makefile index 9f36b2c..0be80eb 100644 --- a/lib/Target/SystemZ/TargetInfo/Makefile +++ b/lib/Target/SystemZ/TargetInfo/Makefile @@ -8,7 +8,6 @@ ##===----------------------------------------------------------------------===## LEVEL = ../../../.. LIBRARYNAME = LLVMSystemZInfo -CXXFLAGS = -fno-rtti # Hack: we need to include 'main' target directory to grab private headers CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. diff --git a/lib/Target/TargetAsmLexer.cpp b/lib/Target/TargetAsmLexer.cpp index 0ae6c14..d4893ff 100644 --- a/lib/Target/TargetAsmLexer.cpp +++ b/lib/Target/TargetAsmLexer.cpp @@ -10,5 +10,5 @@ #include "llvm/Target/TargetAsmLexer.h" using namespace llvm; -TargetAsmLexer::TargetAsmLexer(const Target &T) : TheTarget(T) {} +TargetAsmLexer::TargetAsmLexer(const Target &T) : TheTarget(T), Lexer(NULL) {} TargetAsmLexer::~TargetAsmLexer() {} diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp index ba3cc9d..295b30f 100644 --- a/lib/Target/TargetData.cpp +++ b/lib/Target/TargetData.cpp @@ -545,6 +545,13 @@ unsigned char TargetData::getABITypeAlignment(const Type *Ty) const { return getAlignment(Ty, true); } +/// getABIIntegerTypeAlignment - Return the minimum ABI-required alignment for +/// an integer type of the specified bitwidth. +unsigned char TargetData::getABIIntegerTypeAlignment(unsigned BitWidth) const { + return getAlignmentInfo(INTEGER_ALIGN, BitWidth, true, 0); +} + + unsigned char TargetData::getCallFrameTypeAlignment(const Type *Ty) const { for (unsigned i = 0, e = Alignments.size(); i != e; ++i) if (Alignments[i].AlignType == STACK_ALIGN) diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index a231ebc..0c105e9 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -19,17 +19,15 @@ #include "llvm/GlobalVariable.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCSectionMachO.h" -#include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringExtras.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -289,832 +287,54 @@ TargetLoweringObjectFile::getSectionForConstant(SectionKind Kind) const { } /// getSymbolForDwarfGlobalReference - Return an MCExpr to use for a -/// pc-relative reference to the specified global variable from exception -/// handling information. In addition to the symbol, this returns -/// by-reference: -/// -/// IsIndirect - True if the returned symbol is actually a stub that contains -/// the address of the symbol, false if the symbol is the global itself. -/// -/// IsPCRel - True if the symbol reference is already pc-relative, false if -/// the caller needs to subtract off the address of the reference from the -/// symbol. -/// +/// reference to the specified global variable from exception +/// handling information. const MCExpr *TargetLoweringObjectFile:: getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI, - bool &IsIndirect, bool &IsPCRel) const { - // The generic implementation of this just returns a direct reference to the - // symbol. - IsIndirect = false; - IsPCRel = false; - + MachineModuleInfo *MMI, unsigned Encoding) const { // FIXME: Use GetGlobalValueSymbol. SmallString<128> Name; Mang->getNameWithPrefix(Name, GV, false); - return MCSymbolRefExpr::Create(Name.str(), getContext()); -} - - -//===----------------------------------------------------------------------===// -// ELF -//===----------------------------------------------------------------------===// -typedef StringMap<const MCSectionELF*> ELFUniqueMapTy; - -TargetLoweringObjectFileELF::~TargetLoweringObjectFileELF() { - // If we have the section uniquing map, free it. - delete (ELFUniqueMapTy*)UniquingMap; -} - -const MCSection *TargetLoweringObjectFileELF:: -getELFSection(StringRef Section, unsigned Type, unsigned Flags, - SectionKind Kind, bool IsExplicit) const { - if (UniquingMap == 0) - UniquingMap = new ELFUniqueMapTy(); - ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)UniquingMap; - - // Do the lookup, if we have a hit, return it. - const MCSectionELF *&Entry = Map[Section]; - if (Entry) return Entry; - - return Entry = MCSectionELF::Create(Section, Type, Flags, Kind, IsExplicit, - getContext()); -} - -void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, - const TargetMachine &TM) { - if (UniquingMap != 0) - ((ELFUniqueMapTy*)UniquingMap)->clear(); - TargetLoweringObjectFile::Initialize(Ctx, TM); - - BSSSection = - getELFSection(".bss", MCSectionELF::SHT_NOBITS, - MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, - SectionKind::getBSS()); - - TextSection = - getELFSection(".text", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_EXECINSTR | MCSectionELF::SHF_ALLOC, - SectionKind::getText()); - - DataSection = - getELFSection(".data", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, - SectionKind::getDataRel()); - - ReadOnlySection = - getELFSection(".rodata", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC, - SectionKind::getReadOnly()); - - TLSDataSection = - getELFSection(".tdata", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS | - MCSectionELF::SHF_WRITE, SectionKind::getThreadData()); - - TLSBSSSection = - getELFSection(".tbss", MCSectionELF::SHT_NOBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS | - MCSectionELF::SHF_WRITE, SectionKind::getThreadBSS()); - - DataRelSection = - getELFSection(".data.rel", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, - SectionKind::getDataRel()); - - DataRelLocalSection = - getELFSection(".data.rel.local", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, - SectionKind::getDataRelLocal()); - - DataRelROSection = - getELFSection(".data.rel.ro", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, - SectionKind::getReadOnlyWithRel()); - - DataRelROLocalSection = - getELFSection(".data.rel.ro.local", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, - SectionKind::getReadOnlyWithRelLocal()); - - MergeableConst4Section = - getELFSection(".rodata.cst4", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE, - SectionKind::getMergeableConst4()); - - MergeableConst8Section = - getELFSection(".rodata.cst8", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE, - SectionKind::getMergeableConst8()); - - MergeableConst16Section = - getELFSection(".rodata.cst16", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE, - SectionKind::getMergeableConst16()); - - StaticCtorSection = - getELFSection(".ctors", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, - SectionKind::getDataRel()); - - StaticDtorSection = - getELFSection(".dtors", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, - SectionKind::getDataRel()); - - // Exception Handling Sections. - - // FIXME: We're emitting LSDA info into a readonly section on ELF, even though - // it contains relocatable pointers. In PIC mode, this is probably a big - // runtime hit for C++ apps. Either the contents of the LSDA need to be - // adjusted or this should be a data section. - LSDASection = - getELFSection(".gcc_except_table", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC, SectionKind::getReadOnly()); - EHFrameSection = - getELFSection(".eh_frame", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, - SectionKind::getDataRel()); - - // Debug Info Sections. - DwarfAbbrevSection = - getELFSection(".debug_abbrev", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfInfoSection = - getELFSection(".debug_info", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfLineSection = - getELFSection(".debug_line", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfFrameSection = - getELFSection(".debug_frame", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfPubNamesSection = - getELFSection(".debug_pubnames", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfPubTypesSection = - getELFSection(".debug_pubtypes", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfStrSection = - getELFSection(".debug_str", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfLocSection = - getELFSection(".debug_loc", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfARangesSection = - getELFSection(".debug_aranges", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfRangesSection = - getELFSection(".debug_ranges", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfMacroInfoSection = - getELFSection(".debug_macinfo", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); -} - - -static SectionKind -getELFKindForNamedSection(StringRef Name, SectionKind K) { - if (Name.empty() || Name[0] != '.') return K; - - // Some lame default implementation based on some magic section names. - if (Name == ".bss" || - Name.startswith(".bss.") || - Name.startswith(".gnu.linkonce.b.") || - Name.startswith(".llvm.linkonce.b.") || - Name == ".sbss" || - Name.startswith(".sbss.") || - Name.startswith(".gnu.linkonce.sb.") || - Name.startswith(".llvm.linkonce.sb.")) - return SectionKind::getBSS(); - - if (Name == ".tdata" || - Name.startswith(".tdata.") || - Name.startswith(".gnu.linkonce.td.") || - Name.startswith(".llvm.linkonce.td.")) - return SectionKind::getThreadData(); - - if (Name == ".tbss" || - Name.startswith(".tbss.") || - Name.startswith(".gnu.linkonce.tb.") || - Name.startswith(".llvm.linkonce.tb.")) - return SectionKind::getThreadBSS(); - - return K; -} - - -static unsigned getELFSectionType(StringRef Name, SectionKind K) { - - if (Name == ".init_array") - return MCSectionELF::SHT_INIT_ARRAY; - - if (Name == ".fini_array") - return MCSectionELF::SHT_FINI_ARRAY; + const MCSymbol *Sym = getContext().GetOrCreateSymbol(Name.str()); - if (Name == ".preinit_array") - return MCSectionELF::SHT_PREINIT_ARRAY; - - if (K.isBSS() || K.isThreadBSS()) - return MCSectionELF::SHT_NOBITS; - - return MCSectionELF::SHT_PROGBITS; -} - - -static unsigned -getELFSectionFlags(SectionKind K) { - unsigned Flags = 0; - - if (!K.isMetadata()) - Flags |= MCSectionELF::SHF_ALLOC; - - if (K.isText()) - Flags |= MCSectionELF::SHF_EXECINSTR; - - if (K.isWriteable()) - Flags |= MCSectionELF::SHF_WRITE; - - if (K.isThreadLocal()) - Flags |= MCSectionELF::SHF_TLS; - - // K.isMergeableConst() is left out to honour PR4650 - if (K.isMergeableCString() || K.isMergeableConst4() || - K.isMergeableConst8() || K.isMergeableConst16()) - Flags |= MCSectionELF::SHF_MERGE; - - if (K.isMergeableCString()) - Flags |= MCSectionELF::SHF_STRINGS; - - return Flags; -} - - -const MCSection *TargetLoweringObjectFileELF:: -getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, const TargetMachine &TM) const { - StringRef SectionName = GV->getSection(); - - // Infer section flags from the section name if we can. - Kind = getELFKindForNamedSection(SectionName, Kind); - - return getELFSection(SectionName, - getELFSectionType(SectionName, Kind), - getELFSectionFlags(Kind), Kind, true); -} - -static const char *getSectionPrefixForUniqueGlobal(SectionKind Kind) { - if (Kind.isText()) return ".gnu.linkonce.t."; - if (Kind.isReadOnly()) return ".gnu.linkonce.r."; - - if (Kind.isThreadData()) return ".gnu.linkonce.td."; - if (Kind.isThreadBSS()) return ".gnu.linkonce.tb."; - - if (Kind.isDataNoRel()) return ".gnu.linkonce.d."; - if (Kind.isDataRelLocal()) return ".gnu.linkonce.d.rel.local."; - if (Kind.isDataRel()) return ".gnu.linkonce.d.rel."; - if (Kind.isReadOnlyWithRelLocal()) return ".gnu.linkonce.d.rel.ro.local."; - - assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); - return ".gnu.linkonce.d.rel.ro."; + return getSymbolForDwarfReference(Sym, MMI, Encoding); } -const MCSection *TargetLoweringObjectFileELF:: -SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, const TargetMachine &TM) const { - - // If this global is linkonce/weak and the target handles this by emitting it - // into a 'uniqued' section name, create and return the section now. - if (GV->isWeakForLinker() && !Kind.isCommon() && !Kind.isBSS()) { - const char *Prefix = getSectionPrefixForUniqueGlobal(Kind); - SmallString<128> Name; - Name.append(Prefix, Prefix+strlen(Prefix)); - Mang->getNameWithPrefix(Name, GV, false); - return getELFSection(Name.str(), getELFSectionType(Name.str(), Kind), - getELFSectionFlags(Kind), Kind); - } - - if (Kind.isText()) return TextSection; - - if (Kind.isMergeable1ByteCString() || - Kind.isMergeable2ByteCString() || - Kind.isMergeable4ByteCString()) { - - // We also need alignment here. - // FIXME: this is getting the alignment of the character, not the - // alignment of the global! - unsigned Align = - TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV)); - - const char *SizeSpec = ".rodata.str1."; - if (Kind.isMergeable2ByteCString()) - SizeSpec = ".rodata.str2."; - else if (Kind.isMergeable4ByteCString()) - SizeSpec = ".rodata.str4."; - else - assert(Kind.isMergeable1ByteCString() && "unknown string width"); - - - std::string Name = SizeSpec + utostr(Align); - return getELFSection(Name, MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | - MCSectionELF::SHF_MERGE | - MCSectionELF::SHF_STRINGS, - Kind); - } - - if (Kind.isMergeableConst()) { - if (Kind.isMergeableConst4() && MergeableConst4Section) - return MergeableConst4Section; - if (Kind.isMergeableConst8() && MergeableConst8Section) - return MergeableConst8Section; - if (Kind.isMergeableConst16() && MergeableConst16Section) - return MergeableConst16Section; - return ReadOnlySection; // .const - } - - if (Kind.isReadOnly()) return ReadOnlySection; - - if (Kind.isThreadData()) return TLSDataSection; - if (Kind.isThreadBSS()) return TLSBSSSection; - - // Note: we claim that common symbols are put in BSSSection, but they are - // really emitted with the magic .comm directive, which creates a symbol table - // entry but not a section. - if (Kind.isBSS() || Kind.isCommon()) return BSSSection; - - if (Kind.isDataNoRel()) return DataSection; - if (Kind.isDataRelLocal()) return DataRelLocalSection; - if (Kind.isDataRel()) return DataRelSection; - if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection; - - assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); - return DataRelROSection; -} - -/// getSectionForConstant - Given a mergeable constant with the -/// specified size and relocation information, return a section that it -/// should be placed in. -const MCSection *TargetLoweringObjectFileELF:: -getSectionForConstant(SectionKind Kind) const { - if (Kind.isMergeableConst4() && MergeableConst4Section) - return MergeableConst4Section; - if (Kind.isMergeableConst8() && MergeableConst8Section) - return MergeableConst8Section; - if (Kind.isMergeableConst16() && MergeableConst16Section) - return MergeableConst16Section; - if (Kind.isReadOnly()) - return ReadOnlySection; - - if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection; - assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); - return DataRelROSection; -} - -//===----------------------------------------------------------------------===// -// MachO -//===----------------------------------------------------------------------===// - -typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy; - -TargetLoweringObjectFileMachO::~TargetLoweringObjectFileMachO() { - // If we have the MachO uniquing map, free it. - delete (MachOUniqueMapTy*)UniquingMap; -} - - -const MCSectionMachO *TargetLoweringObjectFileMachO:: -getMachOSection(StringRef Segment, StringRef Section, - unsigned TypeAndAttributes, - unsigned Reserved2, SectionKind Kind) const { - // We unique sections by their segment/section pair. The returned section - // may not have the same flags as the requested section, if so this should be - // diagnosed by the client as an error. - - // Create the map if it doesn't already exist. - if (UniquingMap == 0) - UniquingMap = new MachOUniqueMapTy(); - MachOUniqueMapTy &Map = *(MachOUniqueMapTy*)UniquingMap; - - // Form the name to look up. - SmallString<64> Name; - Name += Segment; - Name.push_back(','); - Name += Section; - - // Do the lookup, if we have a hit, return it. - const MCSectionMachO *&Entry = Map[Name.str()]; - if (Entry) return Entry; - - // Otherwise, return a new section. - return Entry = MCSectionMachO::Create(Segment, Section, TypeAndAttributes, - Reserved2, Kind, getContext()); -} - - -void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, - const TargetMachine &TM) { - if (UniquingMap != 0) - ((MachOUniqueMapTy*)UniquingMap)->clear(); - TargetLoweringObjectFile::Initialize(Ctx, TM); - - TextSection // .text - = getMachOSection("__TEXT", "__text", - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, - SectionKind::getText()); - DataSection // .data - = getMachOSection("__DATA", "__data", 0, SectionKind::getDataRel()); - - CStringSection // .cstring - = getMachOSection("__TEXT", "__cstring", MCSectionMachO::S_CSTRING_LITERALS, - SectionKind::getMergeable1ByteCString()); - UStringSection - = getMachOSection("__TEXT","__ustring", 0, - SectionKind::getMergeable2ByteCString()); - FourByteConstantSection // .literal4 - = getMachOSection("__TEXT", "__literal4", MCSectionMachO::S_4BYTE_LITERALS, - SectionKind::getMergeableConst4()); - EightByteConstantSection // .literal8 - = getMachOSection("__TEXT", "__literal8", MCSectionMachO::S_8BYTE_LITERALS, - SectionKind::getMergeableConst8()); - - // ld_classic doesn't support .literal16 in 32-bit mode, and ld64 falls back - // to using it in -static mode. - SixteenByteConstantSection = 0; - if (TM.getRelocationModel() != Reloc::Static && - TM.getTargetData()->getPointerSize() == 32) - SixteenByteConstantSection = // .literal16 - getMachOSection("__TEXT", "__literal16",MCSectionMachO::S_16BYTE_LITERALS, - SectionKind::getMergeableConst16()); - - ReadOnlySection // .const - = getMachOSection("__TEXT", "__const", 0, SectionKind::getReadOnly()); - - TextCoalSection - = getMachOSection("__TEXT", "__textcoal_nt", - MCSectionMachO::S_COALESCED | - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, - SectionKind::getText()); - ConstTextCoalSection - = getMachOSection("__TEXT", "__const_coal", MCSectionMachO::S_COALESCED, - SectionKind::getText()); - ConstDataCoalSection - = getMachOSection("__DATA","__const_coal", MCSectionMachO::S_COALESCED, - SectionKind::getText()); - ConstDataSection // .const_data - = getMachOSection("__DATA", "__const", 0, - SectionKind::getReadOnlyWithRel()); - DataCoalSection - = getMachOSection("__DATA","__datacoal_nt", MCSectionMachO::S_COALESCED, - SectionKind::getDataRel()); - DataCommonSection - = getMachOSection("__DATA","__common", MCSectionMachO::S_ZEROFILL, - SectionKind::getBSS()); - DataBSSSection - = getMachOSection("__DATA","__bss", MCSectionMachO::S_ZEROFILL, - SectionKind::getBSS()); - - - LazySymbolPointerSection - = getMachOSection("__DATA", "__la_symbol_ptr", - MCSectionMachO::S_LAZY_SYMBOL_POINTERS, - SectionKind::getMetadata()); - NonLazySymbolPointerSection - = getMachOSection("__DATA", "__nl_symbol_ptr", - MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS, - SectionKind::getMetadata()); - - if (TM.getRelocationModel() == Reloc::Static) { - StaticCtorSection - = getMachOSection("__TEXT", "__constructor", 0,SectionKind::getDataRel()); - StaticDtorSection - = getMachOSection("__TEXT", "__destructor", 0, SectionKind::getDataRel()); - } else { - StaticCtorSection - = getMachOSection("__DATA", "__mod_init_func", - MCSectionMachO::S_MOD_INIT_FUNC_POINTERS, - SectionKind::getDataRel()); - StaticDtorSection - = getMachOSection("__DATA", "__mod_term_func", - MCSectionMachO::S_MOD_TERM_FUNC_POINTERS, - SectionKind::getDataRel()); - } - - // Exception Handling. - LSDASection = getMachOSection("__DATA", "__gcc_except_tab", 0, - SectionKind::getDataRel()); - EHFrameSection = - getMachOSection("__TEXT", "__eh_frame", - MCSectionMachO::S_COALESCED | - MCSectionMachO::S_ATTR_NO_TOC | - MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS | - MCSectionMachO::S_ATTR_LIVE_SUPPORT, - SectionKind::getReadOnly()); - - // Debug Information. - DwarfAbbrevSection = - getMachOSection("__DWARF", "__debug_abbrev", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfInfoSection = - getMachOSection("__DWARF", "__debug_info", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfLineSection = - getMachOSection("__DWARF", "__debug_line", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfFrameSection = - getMachOSection("__DWARF", "__debug_frame", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfPubNamesSection = - getMachOSection("__DWARF", "__debug_pubnames", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfPubTypesSection = - getMachOSection("__DWARF", "__debug_pubtypes", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfStrSection = - getMachOSection("__DWARF", "__debug_str", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfLocSection = - getMachOSection("__DWARF", "__debug_loc", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfARangesSection = - getMachOSection("__DWARF", "__debug_aranges", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfRangesSection = - getMachOSection("__DWARF", "__debug_ranges", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfMacroInfoSection = - getMachOSection("__DWARF", "__debug_macinfo", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfDebugInlineSection = - getMachOSection("__DWARF", "__debug_inlined", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); -} - -const MCSection *TargetLoweringObjectFileMachO:: -getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, const TargetMachine &TM) const { - // Parse the section specifier and create it if valid. - StringRef Segment, Section; - unsigned TAA, StubSize; - std::string ErrorCode = - MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section, - TAA, StubSize); - if (!ErrorCode.empty()) { - // If invalid, report the error with llvm_report_error. - llvm_report_error("Global variable '" + GV->getNameStr() + - "' has an invalid section specifier '" + GV->getSection()+ - "': " + ErrorCode + "."); - // Fall back to dropping it into the data section. - return DataSection; - } - - // Get the section. - const MCSectionMachO *S = - getMachOSection(Segment, Section, TAA, StubSize, Kind); - - // Okay, now that we got the section, verify that the TAA & StubSize agree. - // If the user declared multiple globals with different section flags, we need - // to reject it here. - if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) { - // If invalid, report the error with llvm_report_error. - llvm_report_error("Global variable '" + GV->getNameStr() + - "' section type or attributes does not match previous" - " section specifier"); - } - - return S; -} - -const MCSection *TargetLoweringObjectFileMachO:: -SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, const TargetMachine &TM) const { - assert(!Kind.isThreadLocal() && "Darwin doesn't support TLS"); - - if (Kind.isText()) - return GV->isWeakForLinker() ? TextCoalSection : TextSection; - - // If this is weak/linkonce, put this in a coalescable section, either in text - // or data depending on if it is writable. - if (GV->isWeakForLinker()) { - if (Kind.isReadOnly()) - return ConstTextCoalSection; - return DataCoalSection; - } - - // FIXME: Alignment check should be handled by section classifier. - if (Kind.isMergeable1ByteCString() || - Kind.isMergeable2ByteCString()) { - if (TM.getTargetData()->getPreferredAlignment( - cast<GlobalVariable>(GV)) < 32) { - if (Kind.isMergeable1ByteCString()) - return CStringSection; - assert(Kind.isMergeable2ByteCString()); - return UStringSection; - } - } - - if (Kind.isMergeableConst()) { - if (Kind.isMergeableConst4()) - return FourByteConstantSection; - if (Kind.isMergeableConst8()) - return EightByteConstantSection; - if (Kind.isMergeableConst16() && SixteenByteConstantSection) - return SixteenByteConstantSection; - } - - // Otherwise, if it is readonly, but not something we can specially optimize, - // just drop it in .const. - if (Kind.isReadOnly()) - return ReadOnlySection; - - // If this is marked const, put it into a const section. But if the dynamic - // linker needs to write to it, put it in the data segment. - if (Kind.isReadOnlyWithRel()) - return ConstDataSection; - - // Put zero initialized globals with strong external linkage in the - // DATA, __common section with the .zerofill directive. - if (Kind.isBSSExtern()) - return DataCommonSection; - - // Put zero initialized globals with local linkage in __DATA,__bss directive - // with the .zerofill directive (aka .lcomm). - if (Kind.isBSSLocal()) - return DataBSSSection; - - // Otherwise, just drop the variable in the normal data section. - return DataSection; -} - -const MCSection * -TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind) const { - // If this constant requires a relocation, we have to put it in the data - // segment, not in the text segment. - if (Kind.isDataRel() || Kind.isReadOnlyWithRel()) - return ConstDataSection; - - if (Kind.isMergeableConst4()) - return FourByteConstantSection; - if (Kind.isMergeableConst8()) - return EightByteConstantSection; - if (Kind.isMergeableConst16() && SixteenByteConstantSection) - return SixteenByteConstantSection; - return ReadOnlySection; // .const -} - -/// shouldEmitUsedDirectiveFor - This hook allows targets to selectively decide -/// not to emit the UsedDirective for some symbols in llvm.used. -// FIXME: REMOVE this (rdar://7071300) -bool TargetLoweringObjectFileMachO:: -shouldEmitUsedDirectiveFor(const GlobalValue *GV, Mangler *Mang) const { - /// On Darwin, internally linked data beginning with "L" or "l" does not have - /// the directive emitted (this occurs in ObjC metadata). - if (!GV) return false; - - // Check whether the mangled name has the "Private" or "LinkerPrivate" prefix. - if (GV->hasLocalLinkage() && !isa<Function>(GV)) { - // FIXME: ObjC metadata is currently emitted as internal symbols that have - // \1L and \0l prefixes on them. Fix them to be Private/LinkerPrivate and - // this horrible hack can go away. - SmallString<64> Name; - Mang->getNameWithPrefix(Name, GV, false); - if (Name[0] == 'L' || Name[0] == 'l') - return false; +const MCExpr *TargetLoweringObjectFile:: +getSymbolForDwarfReference(const MCSymbol *Sym, MachineModuleInfo *MMI, + unsigned Encoding) const { + const MCExpr *Res = MCSymbolRefExpr::Create(Sym, getContext()); + + switch (Encoding & 0xF0) { + default: + llvm_report_error("Do not support this DWARF encoding yet!"); + break; + case dwarf::DW_EH_PE_absptr: + // Do nothing special + break; + case dwarf::DW_EH_PE_pcrel: + // FIXME: PCSymbol + const MCExpr *PC = MCSymbolRefExpr::Create(".", getContext()); + Res = MCBinaryExpr::CreateSub(Res, PC, getContext()); + break; } - return true; + return Res; } -const MCExpr *TargetLoweringObjectFileMachO:: -getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI, - bool &IsIndirect, bool &IsPCRel) const { - // The mach-o version of this method defaults to returning a stub reference. - IsIndirect = true; - IsPCRel = false; - - SmallString<128> Name; - Mang->getNameWithPrefix(Name, GV, true); - Name += "$non_lazy_ptr"; - return MCSymbolRefExpr::Create(Name.str(), getContext()); +unsigned TargetLoweringObjectFile::getPersonalityEncoding() const { + return dwarf::DW_EH_PE_absptr; } - -//===----------------------------------------------------------------------===// -// COFF -//===----------------------------------------------------------------------===// - -typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy; - -TargetLoweringObjectFileCOFF::~TargetLoweringObjectFileCOFF() { - delete (COFFUniqueMapTy*)UniquingMap; +unsigned TargetLoweringObjectFile::getLSDAEncoding() const { + return dwarf::DW_EH_PE_absptr; } - -const MCSection *TargetLoweringObjectFileCOFF:: -getCOFFSection(StringRef Name, bool isDirective, SectionKind Kind) const { - // Create the map if it doesn't already exist. - if (UniquingMap == 0) - UniquingMap = new MachOUniqueMapTy(); - COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)UniquingMap; - - // Do the lookup, if we have a hit, return it. - const MCSectionCOFF *&Entry = Map[Name]; - if (Entry) return Entry; - - return Entry = MCSectionCOFF::Create(Name, isDirective, Kind, getContext()); +unsigned TargetLoweringObjectFile::getFDEEncoding() const { + return dwarf::DW_EH_PE_absptr; } -void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx, - const TargetMachine &TM) { - if (UniquingMap != 0) - ((COFFUniqueMapTy*)UniquingMap)->clear(); - TargetLoweringObjectFile::Initialize(Ctx, TM); - TextSection = getCOFFSection("\t.text", true, SectionKind::getText()); - DataSection = getCOFFSection("\t.data", true, SectionKind::getDataRel()); - StaticCtorSection = - getCOFFSection(".ctors", false, SectionKind::getDataRel()); - StaticDtorSection = - getCOFFSection(".dtors", false, SectionKind::getDataRel()); - - // FIXME: We're emitting LSDA info into a readonly section on COFF, even - // though it contains relocatable pointers. In PIC mode, this is probably a - // big runtime hit for C++ apps. Either the contents of the LSDA need to be - // adjusted or this should be a data section. - LSDASection = - getCOFFSection(".gcc_except_table", false, SectionKind::getReadOnly()); - EHFrameSection = - getCOFFSection(".eh_frame", false, SectionKind::getDataRel()); - - // Debug info. - // FIXME: Don't use 'directive' mode here. - DwarfAbbrevSection = - getCOFFSection("\t.section\t.debug_abbrev,\"dr\"", - true, SectionKind::getMetadata()); - DwarfInfoSection = - getCOFFSection("\t.section\t.debug_info,\"dr\"", - true, SectionKind::getMetadata()); - DwarfLineSection = - getCOFFSection("\t.section\t.debug_line,\"dr\"", - true, SectionKind::getMetadata()); - DwarfFrameSection = - getCOFFSection("\t.section\t.debug_frame,\"dr\"", - true, SectionKind::getMetadata()); - DwarfPubNamesSection = - getCOFFSection("\t.section\t.debug_pubnames,\"dr\"", - true, SectionKind::getMetadata()); - DwarfPubTypesSection = - getCOFFSection("\t.section\t.debug_pubtypes,\"dr\"", - true, SectionKind::getMetadata()); - DwarfStrSection = - getCOFFSection("\t.section\t.debug_str,\"dr\"", - true, SectionKind::getMetadata()); - DwarfLocSection = - getCOFFSection("\t.section\t.debug_loc,\"dr\"", - true, SectionKind::getMetadata()); - DwarfARangesSection = - getCOFFSection("\t.section\t.debug_aranges,\"dr\"", - true, SectionKind::getMetadata()); - DwarfRangesSection = - getCOFFSection("\t.section\t.debug_ranges,\"dr\"", - true, SectionKind::getMetadata()); - DwarfMacroInfoSection = - getCOFFSection("\t.section\t.debug_macinfo,\"dr\"", - true, SectionKind::getMetadata()); -} - -const MCSection *TargetLoweringObjectFileCOFF:: -getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, const TargetMachine &TM) const { - return getCOFFSection(GV->getSection(), false, Kind); -} - -static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) { - if (Kind.isText()) - return ".text$linkonce"; - if (Kind.isWriteable()) - return ".data$linkonce"; - return ".rdata$linkonce"; -} - - -const MCSection *TargetLoweringObjectFileCOFF:: -SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, const TargetMachine &TM) const { - assert(!Kind.isThreadLocal() && "Doesn't support TLS"); - - // If this global is linkonce/weak and the target handles this by emitting it - // into a 'uniqued' section name, create and return the section now. - if (GV->isWeakForLinker()) { - const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind); - SmallString<128> Name(Prefix, Prefix+strlen(Prefix)); - Mang->getNameWithPrefix(Name, GV, false); - return getCOFFSection(Name.str(), false, Kind); - } - - if (Kind.isText()) - return getTextSection(); - - return getDataSection(); +unsigned TargetLoweringObjectFile::getTTypeEncoding() const { + return dwarf::DW_EH_PE_absptr; } diff --git a/lib/Target/TargetMachOWriterInfo.cpp b/lib/Target/TargetMachOWriterInfo.cpp deleted file mode 100644 index d608119..0000000 --- a/lib/Target/TargetMachOWriterInfo.cpp +++ /dev/null @@ -1,25 +0,0 @@ -//===-- llvm/Target/TargetMachOWriterInfo.h - MachO Writer Info -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the TargetMachOWriterInfo class. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Target/TargetMachOWriterInfo.h" -#include "llvm/CodeGen/MachineRelocation.h" -using namespace llvm; - -TargetMachOWriterInfo::~TargetMachOWriterInfo() {} - -MachineRelocation -TargetMachOWriterInfo::GetJTRelocation(unsigned Offset, - MachineBasicBlock *MBB) const { - // FIXME: do something about PIC - return MachineRelocation::getBB(Offset, MachineRelocation::VANILLA, MBB); -} diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index fec59b5..88871e3 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -40,7 +40,7 @@ namespace llvm { bool UnwindTablesMandatory; Reloc::Model RelocationModel; CodeModel::Model CMModel; - bool PerformTailCallOpt; + bool GuaranteedTailCallOpt; unsigned StackAlignment; bool RealignStack; bool DisableJumpTables; @@ -173,9 +173,9 @@ DefCodeModel("code-model", "Large code model"), clEnumValEnd)); static cl::opt<bool, true> -EnablePerformTailCallOpt("tailcallopt", - cl::desc("Turn on tail call optimization."), - cl::location(PerformTailCallOpt), +EnableGuaranteedTailCallOpt("tailcallopt", + cl::desc("Turn fastcc calls into tail calls by (potentially) changing ABI."), + cl::location(GuaranteedTailCallOpt), cl::init(false)); static cl::opt<unsigned, true> OverrideStackAlignment("stack-alignment", diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp index fac67e2..52983ff 100644 --- a/lib/Target/TargetRegisterInfo.cpp +++ b/lib/Target/TargetRegisterInfo.cpp @@ -86,9 +86,10 @@ BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF, /// getFrameIndexOffset - Returns the displacement from the frame register to /// the stack frame of the specified index. This is the default implementation /// which is overridden for some targets. -int TargetRegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const { +int TargetRegisterInfo::getFrameIndexOffset(const MachineFunction &MF, + int FI) const { const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo(); - MachineFrameInfo *MFI = MF.getFrameInfo(); + const MachineFrameInfo *MFI = MF.getFrameInfo(); return MFI->getObjectOffset(FI) + MFI->getStackSize() - TFI.getOffsetOfLocalArea() + MFI->getOffsetAdjustment(); } @@ -96,7 +97,7 @@ int TargetRegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const { /// getInitialFrameState - Returns a list of machine moves that are assumed /// on entry to a function. void -TargetRegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) const { +TargetRegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) const{ // Default is to do nothing. } diff --git a/lib/Target/X86/AsmParser/Makefile b/lib/Target/X86/AsmParser/Makefile index 288b985..25fb0a2 100644 --- a/lib/Target/X86/AsmParser/Makefile +++ b/lib/Target/X86/AsmParser/Makefile @@ -8,7 +8,6 @@ ##===----------------------------------------------------------------------===## LEVEL = ../../../.. LIBRARYNAME = LLVMX86AsmParser -CXXFLAGS = -fno-rtti # Hack: we need to include 'main' x86 target directory to grab private headers CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. diff --git a/lib/Target/X86/AsmParser/X86AsmLexer.cpp b/lib/Target/X86/AsmParser/X86AsmLexer.cpp index 1a62044..a58f58e 100644 --- a/lib/Target/X86/AsmParser/X86AsmLexer.cpp +++ b/lib/Target/X86/AsmParser/X86AsmLexer.cpp @@ -7,8 +7,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Target/TargetAsmLexer.h" #include "llvm/Target/TargetRegistry.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "X86.h" @@ -19,18 +22,119 @@ namespace { class X86AsmLexer : public TargetAsmLexer { const MCAsmInfo &AsmInfo; + + bool tentativeIsValid; + AsmToken tentativeToken; + + const AsmToken &lexTentative() { + tentativeToken = getLexer()->Lex(); + tentativeIsValid = true; + return tentativeToken; + } + + const AsmToken &lexDefinite() { + if(tentativeIsValid) { + tentativeIsValid = false; + return tentativeToken; + } + else { + return getLexer()->Lex(); + } + } + + AsmToken LexTokenATT(); + AsmToken LexTokenIntel(); protected: - AsmToken LexToken(); + AsmToken LexToken() { + if (!Lexer) { + SetError(SMLoc(), "No MCAsmLexer installed"); + return AsmToken(AsmToken::Error, "", 0); + } + + switch (AsmInfo.getAssemblerDialect()) { + default: + SetError(SMLoc(), "Unhandled dialect"); + return AsmToken(AsmToken::Error, "", 0); + case 0: + return LexTokenATT(); + case 1: + return LexTokenIntel(); + } + } public: X86AsmLexer(const Target &T, const MCAsmInfo &MAI) - : TargetAsmLexer(T), AsmInfo(MAI) { + : TargetAsmLexer(T), AsmInfo(MAI), tentativeIsValid(false) { } }; } -AsmToken X86AsmLexer::LexToken() { - return AsmToken(AsmToken::Error, "", 0); +static unsigned MatchRegisterName(StringRef Name); + +AsmToken X86AsmLexer::LexTokenATT() { + const AsmToken lexedToken = lexDefinite(); + + switch (lexedToken.getKind()) { + default: + return AsmToken(lexedToken); + case AsmToken::Error: + SetError(Lexer->getErrLoc(), Lexer->getErr()); + return AsmToken(lexedToken); + case AsmToken::Percent: + { + const AsmToken &nextToken = lexTentative(); + if (nextToken.getKind() == AsmToken::Identifier) { + unsigned regID = MatchRegisterName(nextToken.getString()); + + if (regID) { + lexDefinite(); + + StringRef regStr(lexedToken.getString().data(), + lexedToken.getString().size() + + nextToken.getString().size()); + + return AsmToken(AsmToken::Register, + regStr, + static_cast<int64_t>(regID)); + } + else { + return AsmToken(lexedToken); + } + } + else { + return AsmToken(lexedToken); + } + } + } +} + +AsmToken X86AsmLexer::LexTokenIntel() { + const AsmToken &lexedToken = lexDefinite(); + + switch(lexedToken.getKind()) { + default: + return AsmToken(lexedToken); + case AsmToken::Error: + SetError(Lexer->getErrLoc(), Lexer->getErr()); + return AsmToken(lexedToken); + case AsmToken::Identifier: + { + std::string upperCase = lexedToken.getString().str(); + std::string lowerCase = LowercaseString(upperCase); + StringRef lowerRef(lowerCase); + + unsigned regID = MatchRegisterName(lowerRef); + + if (regID) { + return AsmToken(AsmToken::Register, + lexedToken.getString(), + static_cast<int64_t>(regID)); + } + else { + return AsmToken(lexedToken); + } + } + } } extern "C" void LLVMInitializeX86AsmLexer() { @@ -38,6 +142,6 @@ extern "C" void LLVMInitializeX86AsmLexer() { RegisterAsmLexer<X86AsmLexer> Y(TheX86_64Target); } -//#define REGISTERS_ONLY -//#include "../X86GenAsmMatcher.inc" -//#undef REGISTERS_ONLY +#define REGISTERS_ONLY +#include "X86GenAsmMatcher.inc" +#undef REGISTERS_ONLY diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 7a9218e..84d7bb7 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -10,6 +10,7 @@ #include "llvm/Target/TargetAsmParser.h" #include "X86.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCExpr.h" @@ -67,7 +68,7 @@ public: /// @name Auto-generated Match Functions /// { -static unsigned MatchRegisterName(const StringRef &Name); +static unsigned MatchRegisterName(StringRef Name); /// } @@ -172,8 +173,25 @@ struct X86Operand : public MCParsedAsmOperand { bool isMem() const { return Kind == Memory; } + bool isAbsMem() const { + return Kind == Memory && !getMemSegReg() && !getMemBaseReg() && + !getMemIndexReg() && getMemScale() == 1; + } + + bool isNoSegMem() const { + return Kind == Memory && !getMemSegReg(); + } + bool isReg() const { return Kind == Register; } + void addExpr(MCInst &Inst, const MCExpr *Expr) const { + // Add as immediates when possible. + if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) + Inst.addOperand(MCOperand::CreateImm(CE->getValue())); + else + Inst.addOperand(MCOperand::CreateExpr(Expr)); + } + void addRegOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(getReg())); @@ -181,26 +199,35 @@ struct X86Operand : public MCParsedAsmOperand { void addImmOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateExpr(getImm())); + addExpr(Inst, getImm()); } void addImmSExt8Operands(MCInst &Inst, unsigned N) const { // FIXME: Support user customization of the render method. assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateExpr(getImm())); + addExpr(Inst, getImm()); } void addMemOperands(MCInst &Inst, unsigned N) const { - assert((N == 4 || N == 5) && "Invalid number of operands!"); - + assert((N == 5) && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(getMemBaseReg())); Inst.addOperand(MCOperand::CreateImm(getMemScale())); Inst.addOperand(MCOperand::CreateReg(getMemIndexReg())); + addExpr(Inst, getMemDisp()); + Inst.addOperand(MCOperand::CreateReg(getMemSegReg())); + } + + void addAbsMemOperands(MCInst &Inst, unsigned N) const { + assert((N == 1) && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateExpr(getMemDisp())); + } - // FIXME: What a hack. - if (N == 5) - Inst.addOperand(MCOperand::CreateReg(getMemSegReg())); + void addNoSegMemOperands(MCInst &Inst, unsigned N) const { + assert((N == 4) && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(getMemBaseReg())); + Inst.addOperand(MCOperand::CreateImm(getMemScale())); + Inst.addOperand(MCOperand::CreateReg(getMemIndexReg())); + addExpr(Inst, getMemDisp()); } static X86Operand *CreateToken(StringRef Str, SMLoc Loc) { @@ -222,10 +249,24 @@ struct X86Operand : public MCParsedAsmOperand { return Res; } + /// Create an absolute memory operand. + static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, + SMLoc EndLoc) { + X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); + Res->Mem.SegReg = 0; + Res->Mem.Disp = Disp; + Res->Mem.BaseReg = 0; + Res->Mem.IndexReg = 0; + Res->Mem.Scale = 1; + return Res; + } + + /// Create a generalized memory operand. static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg, unsigned Scale, SMLoc StartLoc, SMLoc EndLoc) { - // We should never just have a displacement, that would be an immediate. + // We should never just have a displacement, that should be parsed as an + // absolute memory operand. assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!"); // The scale should always be one of {1,2,4,8}. @@ -259,6 +300,42 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo, // FIXME: Validate register for the current architecture; we have to do // validation later, so maybe there is no need for this here. RegNo = MatchRegisterName(Tok.getString()); + + // Parse %st(1) and "%st" as "%st(0)" + if (RegNo == 0 && Tok.getString() == "st") { + RegNo = X86::ST0; + EndLoc = Tok.getLoc(); + Parser.Lex(); // Eat 'st' + + // Check to see if we have '(4)' after %st. + if (getLexer().isNot(AsmToken::LParen)) + return false; + // Lex the paren. + getParser().Lex(); + + const AsmToken &IntTok = Parser.getTok(); + if (IntTok.isNot(AsmToken::Integer)) + return Error(IntTok.getLoc(), "expected stack index"); + switch (IntTok.getIntVal()) { + case 0: RegNo = X86::ST0; break; + case 1: RegNo = X86::ST1; break; + case 2: RegNo = X86::ST2; break; + case 3: RegNo = X86::ST3; break; + case 4: RegNo = X86::ST4; break; + case 5: RegNo = X86::ST5; break; + case 6: RegNo = X86::ST6; break; + case 7: RegNo = X86::ST7; break; + default: return Error(IntTok.getLoc(), "invalid stack index"); + } + + if (getParser().Lex().isNot(AsmToken::RParen)) + return Error(Parser.getTok().getLoc(), "expected ')'"); + + EndLoc = Tok.getLoc(); + Parser.Lex(); // Eat ')' + return false; + } + if (RegNo == 0) return Error(Tok.getLoc(), "invalid register name"); @@ -300,8 +377,8 @@ X86Operand *X86ATTAsmParser::ParseMemOperand() { // We have to disambiguate a parenthesized expression "(4+5)" from the start // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The - // only way to do this without lookahead is to eat the ( and see what is after - // it. + // only way to do this without lookahead is to eat the '(' and see what is + // after it. const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); if (getLexer().isNot(AsmToken::LParen)) { SMLoc ExprEnd; @@ -312,7 +389,7 @@ X86Operand *X86ATTAsmParser::ParseMemOperand() { if (getLexer().isNot(AsmToken::LParen)) { // Unless we have a segment register, treat this as an immediate. if (SegReg == 0) - return X86Operand::CreateImm(Disp, MemStart, ExprEnd); + return X86Operand::CreateMem(Disp, MemStart, ExprEnd); return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd); } @@ -339,7 +416,7 @@ X86Operand *X86ATTAsmParser::ParseMemOperand() { if (getLexer().isNot(AsmToken::LParen)) { // Unless we have a segment register, treat this as an immediate. if (SegReg == 0) - return X86Operand::CreateImm(Disp, LParenLoc, ExprEnd); + return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd); return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd); } @@ -424,8 +501,20 @@ X86Operand *X86ATTAsmParser::ParseMemOperand() { bool X86ATTAsmParser:: ParseInstruction(const StringRef &Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - - Operands.push_back(X86Operand::CreateToken(Name, NameLoc)); + // FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to + // represent alternative syntaxes in the .td file, without requiring + // instruction duplication. + StringRef PatchedName = StringSwitch<StringRef>(Name) + .Case("sal", "shl") + .Case("salb", "shlb") + .Case("sall", "shll") + .Case("salq", "shlq") + .Case("salw", "shlw") + .Case("repe", "rep") + .Case("repz", "rep") + .Case("repnz", "repne") + .Default(Name); + Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); if (getLexer().isNot(AsmToken::EndOfStatement)) { diff --git a/lib/Target/X86/AsmPrinter/Makefile b/lib/Target/X86/AsmPrinter/Makefile index 326a22f..2368761 100644 --- a/lib/Target/X86/AsmPrinter/Makefile +++ b/lib/Target/X86/AsmPrinter/Makefile @@ -8,7 +8,6 @@ ##===----------------------------------------------------------------------===## LEVEL = ../../../.. LIBRARYNAME = LLVMX86AsmPrinter -CXXFLAGS = -fno-rtti # Hack: we need to include 'main' x86 target directory to grab private headers CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp index 804dbb9..1a35a49 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp @@ -18,17 +18,22 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" #include "X86GenInstrNames.inc" using namespace llvm; // Include the auto-generated portion of the assembly writer. #define MachineInstr MCInst -#define NO_ASM_WRITER_BOILERPLATE +#define GET_INSTRUCTION_NAME #include "X86GenAsmWriter.inc" #undef MachineInstr void X86ATTInstPrinter::printInst(const MCInst *MI) { printInstruction(MI); } +StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const { + return getInstructionName(Opcode); +} + void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op) { switch (MI->getOperand(Op).getImm()) { @@ -66,6 +71,10 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo) { O << '%' << getRegisterName(Op.getReg()); } else if (Op.isImm()) { O << '$' << Op.getImm(); + + if (CommentStream && (Op.getImm() > 255 || Op.getImm() < -256)) + *CommentStream << format("imm = 0x%X\n", Op.getImm()); + } else { assert(Op.isExpr() && "unknown operand kind in printOperand"); O << '$' << *Op.getExpr(); diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h index 3180618..d109a07 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h @@ -26,11 +26,12 @@ public: virtual void printInst(const MCInst *MI); - + virtual StringRef getOpcodeName(unsigned Opcode) const; + // Autogenerated by tblgen. void printInstruction(const MCInst *MI); static const char *getRegisterName(unsigned RegNo); - + static const char *getInstructionName(unsigned Opcode); void printOperand(const MCInst *MI, unsigned OpNo); void printMemReference(const MCInst *MI, unsigned Op); diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp index 2ffa18f..8cab24c 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp @@ -8,12 +8,10 @@ //===----------------------------------------------------------------------===// // // This file contains a printer that converts from our internal representation -// of machine-dependent LLVM code to AT&T format assembly -// language. This printer is the output mechanism used by `llc'. +// of machine-dependent LLVM code to X86 machine code. // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "asm-printer" #include "X86AsmPrinter.h" #include "X86ATTInstPrinter.h" #include "X86IntelInstPrinter.h" @@ -29,169 +27,70 @@ #include "llvm/Type.h" #include "llvm/Assembly/Writer.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/Mangler.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; -STATISTIC(EmittedInsts, "Number of machine instrs printed"); - //===----------------------------------------------------------------------===// // Primitive Helper Functions. //===----------------------------------------------------------------------===// -void X86AsmPrinter::printMCInst(const MCInst *MI) { - if (MAI->getAssemblerDialect() == 0) - X86ATTInstPrinter(O, *MAI).printInstruction(MI); - else - X86IntelInstPrinter(O, *MAI).printInstruction(MI); -} - void X86AsmPrinter::PrintPICBaseSymbol() const { - // FIXME: Gross const cast hack. - X86AsmPrinter *AP = const_cast<X86AsmPrinter*>(this); - O << *X86MCInstLower(OutContext, 0, *AP).GetPICBaseSymbol(); + const TargetLowering *TLI = TM.getTargetLowering(); + O << *static_cast<const X86TargetLowering*>(TLI)->getPICBaseSymbol(MF, + OutContext); } -void X86AsmPrinter::emitFunctionHeader(const MachineFunction &MF) { - unsigned FnAlign = MF.getAlignment(); - const Function *F = MF.getFunction(); +MCSymbol *X86AsmPrinter::GetGlobalValueSymbol(const GlobalValue *GV) const { + SmallString<60> NameStr; + Mang->getNameWithPrefix(NameStr, GV, false); + MCSymbol *Symb = OutContext.GetOrCreateSymbol(NameStr.str()); if (Subtarget->isTargetCygMing()) { - X86COFFMachineModuleInfo &COFFMMI = + X86COFFMachineModuleInfo &COFFMMI = MMI->getObjFileInfo<X86COFFMachineModuleInfo>(); - COFFMMI.DecorateCygMingName(CurrentFnSym, OutContext, F, - *TM.getTargetData()); - } - - OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM)); - EmitAlignment(FnAlign, F); - - switch (F->getLinkage()) { - default: llvm_unreachable("Unknown linkage type!"); - case Function::InternalLinkage: // Symbols default to internal. - case Function::PrivateLinkage: - break; - case Function::DLLExportLinkage: - case Function::ExternalLinkage: - OutStreamer.EmitSymbolAttribute(CurrentFnSym, MCSA_Global); - break; - case Function::LinkerPrivateLinkage: - case Function::LinkOnceAnyLinkage: - case Function::LinkOnceODRLinkage: - case Function::WeakAnyLinkage: - case Function::WeakODRLinkage: - if (Subtarget->isTargetDarwin()) { - OutStreamer.EmitSymbolAttribute(CurrentFnSym, MCSA_Global); - O << MAI->getWeakDefDirective() << *CurrentFnSym << '\n'; - } else if (Subtarget->isTargetCygMing()) { - OutStreamer.EmitSymbolAttribute(CurrentFnSym, MCSA_Global); - O << "\t.linkonce discard\n"; - } else { - O << "\t.weak\t" << *CurrentFnSym << '\n'; - } - break; - } + COFFMMI.DecorateCygMingName(Symb, OutContext, GV, *TM.getTargetData()); - printVisibility(CurrentFnSym, F->getVisibility()); + // Save function name for later type emission. + if (const Function *F = dyn_cast<Function>(GV)) + if (F->isDeclaration()) + COFFMMI.addExternalFunction(Symb->getName()); - if (Subtarget->isTargetELF()) { - O << "\t.type\t" << *CurrentFnSym << ",@function\n"; - } else if (Subtarget->isTargetCygMing()) { - O << "\t.def\t " << *CurrentFnSym; - O << ";\t.scl\t" << - (F->hasInternalLinkage() ? COFF::C_STAT : COFF::C_EXT) - << ";\t.type\t" << (COFF::DT_FCN << COFF::N_BTSHFT) - << ";\t.endef\n"; } - O << *CurrentFnSym << ':'; - if (VerboseAsm) { - O.PadToColumn(MAI->getCommentColumn()); - O << MAI->getCommentString() << ' '; - WriteAsOperand(O, F, /*PrintType=*/false, F->getParent()); - } - O << '\n'; - - // Add some workaround for linkonce linkage on Cygwin\MinGW - if (Subtarget->isTargetCygMing() && - (F->hasLinkOnceLinkage() || F->hasWeakLinkage())) - O << "Lllvm$workaround$fake$stub$" << *CurrentFnSym << ":\n"; + return Symb; } -/// runOnMachineFunction - This uses the printMachineInstruction() -/// method to print assembly for each instruction. +/// runOnMachineFunction - Emit the function body. /// bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) { - const Function *F = MF.getFunction(); - this->MF = &MF; - CallingConv::ID CC = F->getCallingConv(); - SetupMachineFunction(MF); - O << "\n\n"; if (Subtarget->isTargetCOFF()) { - X86COFFMachineModuleInfo &COFFMMI = - MMI->getObjFileInfo<X86COFFMachineModuleInfo>(); - - // Populate function information map. Don't want to populate - // non-stdcall or non-fastcall functions' information right now. - if (CC == CallingConv::X86_StdCall || CC == CallingConv::X86_FastCall) - COFFMMI.AddFunctionInfo(F, *MF.getInfo<X86MachineFunctionInfo>()); - } - - // Print out constants referenced by the function - EmitConstantPool(MF.getConstantPool()); - - // Print the 'header' of function - emitFunctionHeader(MF); - - // Emit pre-function debug and/or EH information. - if (MAI->doesSupportDebugInformation() || MAI->doesSupportExceptionHandling()) - DW->BeginFunction(&MF); - - // Print out code for the function. - bool hasAnyRealCode = false; - for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); - I != E; ++I) { - // Print a label for the basic block. - EmitBasicBlockStart(I); - for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); - II != IE; ++II) { - // Print the assembly for the instruction. - if (!II->isLabel()) - hasAnyRealCode = true; - printMachineInstruction(II); - } - } - - if (Subtarget->isTargetDarwin() && !hasAnyRealCode) { - // If the function is empty, then we need to emit *something*. Otherwise, - // the function's label might be associated with something that it wasn't - // meant to be associated with. We emit a noop in this situation. - // We are assuming inline asms are code. - O << "\tnop\n"; + const Function *F = MF.getFunction(); + O << "\t.def\t " << *CurrentFnSym << ";\t.scl\t" << + (F->hasInternalLinkage() ? COFF::C_STAT : COFF::C_EXT) + << ";\t.type\t" << (COFF::DT_FCN << COFF::N_BTSHFT) + << ";\t.endef\n"; } - if (MAI->hasDotTypeDotSizeDirective()) - O << "\t.size\t" << *CurrentFnSym << ", .-" << *CurrentFnSym << '\n'; + // Have common code print out the function header with linkage info etc. + EmitFunctionHeader(); - // Emit post-function debug information. - if (MAI->doesSupportDebugInformation() || MAI->doesSupportExceptionHandling()) - DW->EndFunction(&MF); - - // Print out jump tables referenced by the function. - EmitJumpTableInfo(MF.getJumpTableInfo(), MF); + // Emit the rest of the function body. + EmitFunctionBody(); // We didn't modify anything. return false; @@ -223,12 +122,6 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) { else GVSym = GetGlobalValueSymbol(GV); - if (Subtarget->isTargetCygMing()) { - X86COFFMachineModuleInfo &COFFMMI = - MMI->getObjFileInfo<X86COFFMachineModuleInfo>(); - COFFMMI.DecorateCygMingName(GVSym, OutContext, GV, *TM.getTargetData()); - } - // Handle dllimport linkage. if (MO.getTargetFlags() == X86II::MO_DLLIMPORT) GVSym = OutContext.GetOrCreateSymbol(Twine("__imp_") + GVSym->getName()); @@ -237,20 +130,20 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) { MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE) { MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); - const MCSymbol *&StubSym = + MCSymbol *&StubSym = MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(Sym); if (StubSym == 0) StubSym = GetGlobalValueSymbol(GV); } else if (MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE){ MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); - const MCSymbol *&StubSym = + MCSymbol *&StubSym = MMI->getObjFileInfo<MachineModuleInfoMachO>().getHiddenGVStubEntry(Sym); if (StubSym == 0) StubSym = GetGlobalValueSymbol(GV); } else if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) { MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$stub"); - const MCSymbol *&StubSym = + MCSymbol *&StubSym = MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym); if (StubSym == 0) StubSym = GetGlobalValueSymbol(GV); @@ -272,8 +165,8 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) { TempNameStr += StringRef(MO.getSymbolName()); TempNameStr += StringRef("$stub"); - const MCSymbol *Sym = GetExternalSymbolSymbol(TempNameStr.str()); - const MCSymbol *&StubSym = + MCSymbol *Sym = GetExternalSymbolSymbol(TempNameStr.str()); + MCSymbol *&StubSym = MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym); if (StubSym == 0) { TempNameStr.erase(TempNameStr.end()-5, TempNameStr.end()); @@ -338,7 +231,7 @@ void X86AsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo) { O << MO.getImm(); return; case MachineOperand::MO_MachineBasicBlock: - O << *GetMBBSymbol(MO.getMBB()->getNumber()); + O << *MO.getMBB()->getSymbol(OutContext); return; case MachineOperand::MO_GlobalAddress: case MachineOperand::MO_ExternalSymbol: @@ -451,30 +344,6 @@ void X86AsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op, printLeaMemReference(MI, Op, Modifier); } -void X86AsmPrinter::printPICJumpTableSetLabel(unsigned uid, - const MachineBasicBlock *MBB) const { - if (!MAI->getSetDirective()) - return; - - // We don't need .set machinery if we have GOT-style relocations - if (Subtarget->isPICStyleGOT()) - return; - - O << MAI->getSetDirective() << ' ' << MAI->getPrivateGlobalPrefix() - << getFunctionNumber() << '_' << uid << "_set_" << MBB->getNumber() << ','; - - O << *GetMBBSymbol(MBB->getNumber()); - - if (Subtarget->isPICStyleRIPRel()) - O << '-' << *GetJTISymbol(uid) << '\n'; - else { - O << '-'; - PrintPICBaseSymbol(); - O << '\n'; - } -} - - void X86AsmPrinter::printPICLabel(const MachineInstr *MI, unsigned Op) { PrintPICBaseSymbol(); O << '\n'; @@ -482,23 +351,6 @@ void X86AsmPrinter::printPICLabel(const MachineInstr *MI, unsigned Op) { O << ':'; } -void X86AsmPrinter::printPICJumpTableEntry(const MachineJumpTableInfo *MJTI, - const MachineBasicBlock *MBB, - unsigned uid) const { - const char *JTEntryDirective = MJTI->getEntrySize() == 4 ? - MAI->getData32bitsDirective() : MAI->getData64bitsDirective(); - - O << JTEntryDirective << ' '; - - if (Subtarget->isPICStyleRIPRel() || Subtarget->isPICStyleStubPIC()) { - O << MAI->getPrivateGlobalPrefix() << getFunctionNumber() - << '_' << uid << "_set_" << MBB->getNumber(); - } else if (Subtarget->isPICStyleGOT()) - O << *GetMBBSymbol(MBB->getNumber()) << "@GOTOFF"; - else - O << *GetMBBSymbol(MBB->getNumber()); -} - bool X86AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode) { unsigned Reg = MO.getReg(); switch (Mode) { @@ -625,24 +477,6 @@ bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, } - -/// printMachineInstruction -- Print out a single X86 LLVM instruction MI in -/// AT&T syntax to the current output stream. -/// -void X86AsmPrinter::printMachineInstruction(const MachineInstr *MI) { - ++EmittedInsts; - - processDebugLoc(MI, true); - - printInstructionThroughMCStreamer(MI); - - if (VerboseAsm) - EmitComments(*MI); - O << '\n'; - - processDebugLoc(MI, false); -} - void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { if (Subtarget->isTargetDarwin()) { // All darwin targets use mach-o. @@ -666,14 +500,17 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { OutStreamer.SwitchSection(TheSection); for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { - O << *Stubs[i].first << ":\n"; - // Get the MCSymbol without the $stub suffix. - O << "\t.indirect_symbol " << *Stubs[i].second; - O << "\n\thlt ; hlt ; hlt ; hlt ; hlt\n"; + // L_foo$stub: + OutStreamer.EmitLabel(Stubs[i].first); + // .indirect_symbol _foo + OutStreamer.EmitSymbolAttribute(Stubs[i].second, MCSA_IndirectSymbol); + // hlt; hlt; hlt; hlt; hlt hlt = 0xf4 = -12. + const char HltInsts[] = { -12, -12, -12, -12, -12 }; + OutStreamer.EmitBytes(StringRef(HltInsts, 5), 0/*addrspace*/); } - O << '\n'; Stubs.clear(); + OutStreamer.AddBlankLine(); } // Output stubs for external and common global variables. @@ -686,10 +523,15 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { OutStreamer.SwitchSection(TheSection); for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { - O << *Stubs[i].first << ":\n\t.indirect_symbol " << *Stubs[i].second; - O << "\n\t.long\t0\n"; + // L_foo$non_lazy_ptr: + OutStreamer.EmitLabel(Stubs[i].first); + // .indirect_symbol _foo + OutStreamer.EmitSymbolAttribute(Stubs[i].second, MCSA_IndirectSymbol); + // .long 0 + OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/); } Stubs.clear(); + OutStreamer.AddBlankLine(); } Stubs = MMIMacho.GetHiddenGVStubList(); @@ -698,10 +540,15 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { EmitAlignment(2); for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { - O << *Stubs[i].first << ":\n" << MAI->getData32bitsDirective(); - O << *Stubs[i].second << '\n'; + // L_foo$non_lazy_ptr: + OutStreamer.EmitLabel(Stubs[i].first); + // .long _foo + OutStreamer.EmitValue(MCSymbolRefExpr::Create(Stubs[i].second, + OutContext), + 4/*size*/, 0/*addrspace*/); } Stubs.clear(); + OutStreamer.AddBlankLine(); } // Funny Darwin hack: This flag tells the linker that no global symbols @@ -735,7 +582,6 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) if (I->hasDLLExportLinkage()) { MCSymbol *Sym = GetGlobalValueSymbol(I); - COFFMMI.DecorateCygMingName(Sym, OutContext, I, *TM.getTargetData()); DLLExportedFns.push_back(Sym); } @@ -757,6 +603,28 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { } } } + + if (Subtarget->isTargetELF()) { + TargetLoweringObjectFileELF &TLOFELF = + static_cast<TargetLoweringObjectFileELF &>(getObjFileLowering()); + + MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>(); + + // Output stubs for external and common global variables. + MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); + if (!Stubs.empty()) { + OutStreamer.SwitchSection(TLOFELF.getDataRelSection()); + const TargetData *TD = TM.getTargetData(); + + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) + O << *Stubs[i].first << ":\n" + << (TD->getPointerSize() == 8 ? + MAI->getData64bitsDirective() : MAI->getData32bitsDirective()) + << *Stubs[i].second << '\n'; + + Stubs.clear(); + } + } } diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h index 6a9262d..039214a 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h @@ -36,8 +36,9 @@ class VISIBILITY_HIDDEN X86AsmPrinter : public AsmPrinter { const X86Subtarget *Subtarget; public: explicit X86AsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, - const MCAsmInfo *T, bool V) - : AsmPrinter(O, TM, T, V) { + MCContext &Ctx, MCStreamer &Streamer, + const MCAsmInfo *T) + : AsmPrinter(O, TM, Ctx, Streamer, T) { Subtarget = &TM.getSubtarget<X86Subtarget>(); } @@ -57,14 +58,10 @@ class VISIBILITY_HIDDEN X86AsmPrinter : public AsmPrinter { virtual void EmitEndOfAsmFile(Module &M); - void printInstructionThroughMCStreamer(const MachineInstr *MI); - - - void printMCInst(const MCInst *MI); - - void printSymbolOperand(const MachineOperand &MO); - + virtual void EmitInstruction(const MachineInstr *MI); + void printSymbolOperand(const MachineOperand &MO); + virtual MCSymbol *GetGlobalValueSymbol(const GlobalValue *GV) const; // These methods are used by the tablegen'erated instruction printer. void printOperand(const MachineInstr *MI, unsigned OpNo, @@ -124,24 +121,12 @@ class VISIBILITY_HIDDEN X86AsmPrinter : public AsmPrinter { const char *Modifier=NULL); void printLeaMemReference(const MachineInstr *MI, unsigned Op, const char *Modifier=NULL); - void printPICJumpTableSetLabel(unsigned uid, - const MachineBasicBlock *MBB) const; - void printPICJumpTableSetLabel(unsigned uid, unsigned uid2, - const MachineBasicBlock *MBB) const { - AsmPrinter::printPICJumpTableSetLabel(uid, uid2, MBB); - } - void printPICJumpTableEntry(const MachineJumpTableInfo *MJTI, - const MachineBasicBlock *MBB, - unsigned uid) const; void printPICLabel(const MachineInstr *MI, unsigned Op); void PrintPICBaseSymbol() const; bool runOnMachineFunction(MachineFunction &F); - - void emitFunctionHeader(const MachineFunction &MF); - }; } // end namespace llvm diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp index 4efb529..610beb5 100644 --- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp @@ -24,11 +24,14 @@ using namespace llvm; // Include the auto-generated portion of the assembly writer. #define MachineInstr MCInst -#define NO_ASM_WRITER_BOILERPLATE +#define GET_INSTRUCTION_NAME #include "X86GenAsmWriter1.inc" #undef MachineInstr void X86IntelInstPrinter::printInst(const MCInst *MI) { printInstruction(MI); } +StringRef X86IntelInstPrinter::getOpcodeName(unsigned Opcode) const { + return getInstructionName(Opcode); +} void X86IntelInstPrinter::printSSECC(const MCInst *MI, unsigned Op) { switch (MI->getOperand(Op).getImm()) { diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h index 1976177..545bf84 100644 --- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h @@ -26,10 +26,12 @@ public: : MCInstPrinter(O, MAI) {} virtual void printInst(const MCInst *MI); + virtual StringRef getOpcodeName(unsigned Opcode) const; // Autogenerated by tblgen. void printInstruction(const MCInst *MI); static const char *getRegisterName(unsigned RegNo); + static const char *getInstructionName(unsigned Opcode); void printOperand(const MCInst *MI, unsigned OpNo, diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp index b970d46..fa8d13d 100644 --- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp +++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp @@ -14,8 +14,9 @@ #include "X86MCInstLower.h" #include "X86AsmPrinter.h" -#include "X86MCAsmInfo.h" #include "X86COFFMachineModuleInfo.h" +#include "X86MCAsmInfo.h" +#include "X86MCTargetExpr.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/MC/MCContext.h" @@ -25,6 +26,7 @@ #include "llvm/Target/Mangler.h" #include "llvm/Support/FormattedStream.h" #include "llvm/ADT/SmallString.h" +#include "llvm/Type.h" using namespace llvm; @@ -39,37 +41,45 @@ MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const { MCSymbol *X86MCInstLower::GetPICBaseSymbol() const { - return Ctx.GetOrCreateSymbol(Twine(AsmPrinter.MAI->getPrivateGlobalPrefix())+ - Twine(AsmPrinter.getFunctionNumber())+"$pb"); + const TargetLowering *TLI = AsmPrinter.TM.getTargetLowering(); + return static_cast<const X86TargetLowering*>(TLI)-> + getPICBaseSymbol(AsmPrinter.MF, Ctx); } -/// LowerGlobalAddressOperand - Lower an MO_GlobalAddress operand to an -/// MCOperand. +/// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol +/// operand to an MCSymbol. MCSymbol *X86MCInstLower:: -GetGlobalAddressSymbol(const MachineOperand &MO) const { - const GlobalValue *GV = MO.getGlobal(); - - bool isImplicitlyPrivate = false; - if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB || - MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY || - MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE || - MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE) - isImplicitlyPrivate = true; - +GetSymbolFromOperand(const MachineOperand &MO) const { + assert((MO.isGlobal() || MO.isSymbol()) && "Isn't a symbol reference"); + SmallString<128> Name; - Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate); - if (getSubtarget().isTargetCygMing()) { - X86COFFMachineModuleInfo &COFFMMI = - AsmPrinter.MMI->getObjFileInfo<X86COFFMachineModuleInfo>(); - COFFMMI.DecorateCygMingName(Name, GV, *AsmPrinter.TM.getTargetData()); - } + if (MO.isGlobal()) { + bool isImplicitlyPrivate = false; + if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB || + MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY || + MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE || + MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE) + isImplicitlyPrivate = true; + + const GlobalValue *GV = MO.getGlobal(); + Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate); + if (getSubtarget().isTargetCygMing()) { + X86COFFMachineModuleInfo &COFFMMI = + AsmPrinter.MMI->getObjFileInfo<X86COFFMachineModuleInfo>(); + COFFMMI.DecorateCygMingName(Name, GV, *AsmPrinter.TM.getTargetData()); + } + } else { + assert(MO.isSymbol()); + Name += AsmPrinter.MAI->getGlobalPrefix(); + Name += MO.getSymbolName(); + } + + // If the target flags on the operand changes the name of the symbol, do that + // before we return the symbol. switch (MO.getTargetFlags()) { - default: llvm_unreachable("Unknown target flag on GV operand"); - case X86II::MO_NO_FLAG: // No flag. - case X86II::MO_PIC_BASE_OFFSET: // Doesn't modify symbol name. - break; + default: break; case X86II::MO_DLLIMPORT: { // Handle dllimport linkage. const char *Prefix = "__imp_"; @@ -81,190 +91,72 @@ GetGlobalAddressSymbol(const MachineOperand &MO) const { Name += "$non_lazy_ptr"; MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str()); - const MCSymbol *&StubSym = getMachOMMI().getGVStubEntry(Sym); - if (StubSym == 0) - StubSym = AsmPrinter.GetGlobalValueSymbol(GV); + MCSymbol *&StubSym = getMachOMMI().getGVStubEntry(Sym); + if (StubSym == 0) { + assert(MO.isGlobal() && "Extern symbol not handled yet"); + StubSym = AsmPrinter.GetGlobalValueSymbol(MO.getGlobal()); + } return Sym; } case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: { Name += "$non_lazy_ptr"; MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str()); - const MCSymbol *&StubSym = getMachOMMI().getHiddenGVStubEntry(Sym); - if (StubSym == 0) - StubSym = AsmPrinter.GetGlobalValueSymbol(GV); + MCSymbol *&StubSym = getMachOMMI().getHiddenGVStubEntry(Sym); + if (StubSym == 0) { + assert(MO.isGlobal() && "Extern symbol not handled yet"); + StubSym = AsmPrinter.GetGlobalValueSymbol(MO.getGlobal()); + } return Sym; } case X86II::MO_DARWIN_STUB: { Name += "$stub"; MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str()); - const MCSymbol *&StubSym = getMachOMMI().getFnStubEntry(Sym); - if (StubSym == 0) - StubSym = AsmPrinter.GetGlobalValueSymbol(GV); - return Sym; - } - // FIXME: These probably should be a modifier on the symbol or something?? - case X86II::MO_TLSGD: Name += "@TLSGD"; break; - case X86II::MO_GOTTPOFF: Name += "@GOTTPOFF"; break; - case X86II::MO_INDNTPOFF: Name += "@INDNTPOFF"; break; - case X86II::MO_TPOFF: Name += "@TPOFF"; break; - case X86II::MO_NTPOFF: Name += "@NTPOFF"; break; - case X86II::MO_GOTPCREL: Name += "@GOTPCREL"; break; - case X86II::MO_GOT: Name += "@GOT"; break; - case X86II::MO_GOTOFF: Name += "@GOTOFF"; break; - case X86II::MO_PLT: Name += "@PLT"; break; - } - - return Ctx.GetOrCreateSymbol(Name.str()); -} - -MCSymbol *X86MCInstLower:: -GetExternalSymbolSymbol(const MachineOperand &MO) const { - SmallString<128> Name; - Name += AsmPrinter.MAI->getGlobalPrefix(); - Name += MO.getSymbolName(); - - switch (MO.getTargetFlags()) { - default: llvm_unreachable("Unknown target flag on GV operand"); - case X86II::MO_NO_FLAG: // No flag. - case X86II::MO_GOT_ABSOLUTE_ADDRESS: // Doesn't modify symbol name. - case X86II::MO_PIC_BASE_OFFSET: // Doesn't modify symbol name. - break; - case X86II::MO_DLLIMPORT: { - // Handle dllimport linkage. - const char *Prefix = "__imp_"; - Name.insert(Name.begin(), Prefix, Prefix+strlen(Prefix)); - break; - } - case X86II::MO_DARWIN_STUB: { - Name += "$stub"; - MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str()); - const MCSymbol *&StubSym = getMachOMMI().getFnStubEntry(Sym); - - if (StubSym == 0) { + MCSymbol *&StubSym = getMachOMMI().getFnStubEntry(Sym); + if (StubSym) + return Sym; + + if (MO.isGlobal()) { + StubSym = AsmPrinter.GetGlobalValueSymbol(MO.getGlobal()); + } else { Name.erase(Name.end()-5, Name.end()); StubSym = Ctx.GetOrCreateSymbol(Name.str()); } return Sym; } - // FIXME: These probably should be a modifier on the symbol or something?? - case X86II::MO_TLSGD: Name += "@TLSGD"; break; - case X86II::MO_GOTTPOFF: Name += "@GOTTPOFF"; break; - case X86II::MO_INDNTPOFF: Name += "@INDNTPOFF"; break; - case X86II::MO_TPOFF: Name += "@TPOFF"; break; - case X86II::MO_NTPOFF: Name += "@NTPOFF"; break; - case X86II::MO_GOTPCREL: Name += "@GOTPCREL"; break; - case X86II::MO_GOT: Name += "@GOT"; break; - case X86II::MO_GOTOFF: Name += "@GOTOFF"; break; - case X86II::MO_PLT: Name += "@PLT"; break; - } - - return Ctx.GetOrCreateSymbol(Name.str()); -} - -MCSymbol *X86MCInstLower::GetJumpTableSymbol(const MachineOperand &MO) const { - SmallString<256> Name; - // FIXME: Use AsmPrinter.GetJTISymbol. @TLSGD shouldn't be part of the symbol - // name! - raw_svector_ostream(Name) << AsmPrinter.MAI->getPrivateGlobalPrefix() << "JTI" - << AsmPrinter.getFunctionNumber() << '_' << MO.getIndex(); - - switch (MO.getTargetFlags()) { - default: - llvm_unreachable("Unknown target flag on GV operand"); - case X86II::MO_NO_FLAG: // No flag. - case X86II::MO_PIC_BASE_OFFSET: - case X86II::MO_DARWIN_NONLAZY_PIC_BASE: - case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: - break; - // FIXME: These probably should be a modifier on the symbol or something?? - case X86II::MO_TLSGD: Name += "@TLSGD"; break; - case X86II::MO_GOTTPOFF: Name += "@GOTTPOFF"; break; - case X86II::MO_INDNTPOFF: Name += "@INDNTPOFF"; break; - case X86II::MO_TPOFF: Name += "@TPOFF"; break; - case X86II::MO_NTPOFF: Name += "@NTPOFF"; break; - case X86II::MO_GOTPCREL: Name += "@GOTPCREL"; break; - case X86II::MO_GOT: Name += "@GOT"; break; - case X86II::MO_GOTOFF: Name += "@GOTOFF"; break; - case X86II::MO_PLT: Name += "@PLT"; break; } - - // Create a symbol for the name. - return Ctx.GetOrCreateSymbol(Name.str()); -} - -MCSymbol *X86MCInstLower:: -GetConstantPoolIndexSymbol(const MachineOperand &MO) const { - SmallString<256> Name; - // FIXME: USe AsmPrinter.GetCPISymbol. @TLSGD shouldn't be part of the symbol - // name! - raw_svector_ostream(Name) << AsmPrinter.MAI->getPrivateGlobalPrefix() << "CPI" - << AsmPrinter.getFunctionNumber() << '_' << MO.getIndex(); - - switch (MO.getTargetFlags()) { - default: - llvm_unreachable("Unknown target flag on GV operand"); - case X86II::MO_NO_FLAG: // No flag. - case X86II::MO_PIC_BASE_OFFSET: - case X86II::MO_DARWIN_NONLAZY_PIC_BASE: - case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: - break; - // FIXME: These probably should be a modifier on the symbol or something?? - case X86II::MO_TLSGD: Name += "@TLSGD"; break; - case X86II::MO_GOTTPOFF: Name += "@GOTTPOFF"; break; - case X86II::MO_INDNTPOFF: Name += "@INDNTPOFF"; break; - case X86II::MO_TPOFF: Name += "@TPOFF"; break; - case X86II::MO_NTPOFF: Name += "@NTPOFF"; break; - case X86II::MO_GOTPCREL: Name += "@GOTPCREL"; break; - case X86II::MO_GOT: Name += "@GOT"; break; - case X86II::MO_GOTOFF: Name += "@GOTOFF"; break; - case X86II::MO_PLT: Name += "@PLT"; break; - } - - // Create a symbol for the name. return Ctx.GetOrCreateSymbol(Name.str()); } -MCSymbol *X86MCInstLower:: -GetBlockAddressSymbol(const MachineOperand &MO) const { - const char *Suffix = ""; - switch (MO.getTargetFlags()) { - default: llvm_unreachable("Unknown target flag on BA operand"); - case X86II::MO_NO_FLAG: break; // No flag. - case X86II::MO_PIC_BASE_OFFSET: break; // Doesn't modify symbol name. - case X86II::MO_GOTOFF: Suffix = "@GOTOFF"; break; - } - - return AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress(), Suffix); -} - MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const { // FIXME: We would like an efficient form for this, so we don't have to do a // lot of extra uniquing. - const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx); + const MCExpr *Expr = 0; + X86MCTargetExpr::VariantKind RefKind = X86MCTargetExpr::Invalid; switch (MO.getTargetFlags()) { default: llvm_unreachable("Unknown target flag on GV operand"); case X86II::MO_NO_FLAG: // No flag. - // These affect the name of the symbol, not any suffix. case X86II::MO_DARWIN_NONLAZY: case X86II::MO_DLLIMPORT: case X86II::MO_DARWIN_STUB: - case X86II::MO_TLSGD: - case X86II::MO_GOTTPOFF: - case X86II::MO_INDNTPOFF: - case X86II::MO_TPOFF: - case X86II::MO_NTPOFF: - case X86II::MO_GOTPCREL: - case X86II::MO_GOT: - case X86II::MO_GOTOFF: - case X86II::MO_PLT: break; + + case X86II::MO_TLSGD: RefKind = X86MCTargetExpr::TLSGD; break; + case X86II::MO_GOTTPOFF: RefKind = X86MCTargetExpr::GOTTPOFF; break; + case X86II::MO_INDNTPOFF: RefKind = X86MCTargetExpr::INDNTPOFF; break; + case X86II::MO_TPOFF: RefKind = X86MCTargetExpr::TPOFF; break; + case X86II::MO_NTPOFF: RefKind = X86MCTargetExpr::NTPOFF; break; + case X86II::MO_GOTPCREL: RefKind = X86MCTargetExpr::GOTPCREL; break; + case X86II::MO_GOT: RefKind = X86MCTargetExpr::GOT; break; + case X86II::MO_GOTOFF: RefKind = X86MCTargetExpr::GOTOFF; break; + case X86II::MO_PLT: RefKind = X86MCTargetExpr::PLT; break; case X86II::MO_PIC_BASE_OFFSET: case X86II::MO_DARWIN_NONLAZY_PIC_BASE: case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: + Expr = MCSymbolRefExpr::Create(Sym, Ctx); // Subtract the pic base. Expr = MCBinaryExpr::CreateSub(Expr, MCSymbolRefExpr::Create(GetPICBaseSymbol(), Ctx), @@ -272,6 +164,13 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, break; } + if (Expr == 0) { + if (RefKind == X86MCTargetExpr::Invalid) + Expr = MCSymbolRefExpr::Create(Sym, Ctx); + else + Expr = X86MCTargetExpr::Create(Sym, RefKind, Ctx); + } + if (!MO.isJTI() && MO.getOffset()) Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(MO.getOffset(), Ctx), @@ -300,6 +199,17 @@ static void lower_lea64_32mem(MCInst *MI, unsigned OpNo) { } } +/// LowerSubReg32_Op0 - Things like MOVZX16rr8 -> MOVZX32rr8. +static void LowerSubReg32_Op0(MCInst &OutMI, unsigned NewOpc) { + OutMI.setOpcode(NewOpc); + lower_subreg32(&OutMI, 0); +} +/// LowerUnaryToTwoAddr - R = setb -> R = sbb R, R +static void LowerUnaryToTwoAddr(MCInst &OutMI, unsigned NewOpc) { + OutMI.setOpcode(NewOpc); + OutMI.addOperand(OutMI.getOperand(0)); + OutMI.addOperand(OutMI.getOperand(0)); +} void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { @@ -323,22 +233,23 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { break; case MachineOperand::MO_MachineBasicBlock: MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( - AsmPrinter.GetMBBSymbol(MO.getMBB()->getNumber()), Ctx)); + MO.getMBB()->getSymbol(Ctx), Ctx)); break; case MachineOperand::MO_GlobalAddress: - MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO)); + MCOp = LowerSymbolOperand(MO, GetSymbolFromOperand(MO)); break; case MachineOperand::MO_ExternalSymbol: - MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO)); + MCOp = LowerSymbolOperand(MO, GetSymbolFromOperand(MO)); break; case MachineOperand::MO_JumpTableIndex: - MCOp = LowerSymbolOperand(MO, GetJumpTableSymbol(MO)); + MCOp = LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex())); break; case MachineOperand::MO_ConstantPoolIndex: - MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO)); + MCOp = LowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex())); break; case MachineOperand::MO_BlockAddress: - MCOp = LowerSymbolOperand(MO, GetBlockAddressSymbol(MO)); + MCOp = LowerSymbolOperand(MO, + AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress())); break; } @@ -350,72 +261,48 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { case X86::LEA64_32r: // Handle 'subreg rewriting' for the lea64_32mem operand. lower_lea64_32mem(&OutMI, 1); break; - case X86::MOVZX16rr8: - OutMI.setOpcode(X86::MOVZX32rr8); - lower_subreg32(&OutMI, 0); - break; - case X86::MOVZX16rm8: - OutMI.setOpcode(X86::MOVZX32rm8); - lower_subreg32(&OutMI, 0); - break; - case X86::MOVSX16rr8: - OutMI.setOpcode(X86::MOVSX32rr8); - lower_subreg32(&OutMI, 0); - break; - case X86::MOVSX16rm8: - OutMI.setOpcode(X86::MOVSX32rm8); - lower_subreg32(&OutMI, 0); - break; - case X86::MOVZX64rr32: - OutMI.setOpcode(X86::MOV32rr); - lower_subreg32(&OutMI, 0); - break; - case X86::MOVZX64rm32: - OutMI.setOpcode(X86::MOV32rm); - lower_subreg32(&OutMI, 0); - break; - case X86::MOV64ri64i32: - OutMI.setOpcode(X86::MOV32ri); - lower_subreg32(&OutMI, 0); - break; - case X86::MOVZX64rr8: - OutMI.setOpcode(X86::MOVZX32rr8); - lower_subreg32(&OutMI, 0); - break; - case X86::MOVZX64rm8: - OutMI.setOpcode(X86::MOVZX32rm8); - lower_subreg32(&OutMI, 0); - break; - case X86::MOVZX64rr16: - OutMI.setOpcode(X86::MOVZX32rr16); - lower_subreg32(&OutMI, 0); - break; - case X86::MOVZX64rm16: - OutMI.setOpcode(X86::MOVZX32rm16); - lower_subreg32(&OutMI, 0); - break; + case X86::MOVZX16rr8: LowerSubReg32_Op0(OutMI, X86::MOVZX32rr8); break; + case X86::MOVZX16rm8: LowerSubReg32_Op0(OutMI, X86::MOVZX32rm8); break; + case X86::MOVSX16rr8: LowerSubReg32_Op0(OutMI, X86::MOVSX32rr8); break; + case X86::MOVSX16rm8: LowerSubReg32_Op0(OutMI, X86::MOVSX32rm8); break; + case X86::MOVZX64rr32: LowerSubReg32_Op0(OutMI, X86::MOV32rr); break; + case X86::MOVZX64rm32: LowerSubReg32_Op0(OutMI, X86::MOV32rm); break; + case X86::MOV64ri64i32: LowerSubReg32_Op0(OutMI, X86::MOV32ri); break; + case X86::MOVZX64rr8: LowerSubReg32_Op0(OutMI, X86::MOVZX32rr8); break; + case X86::MOVZX64rm8: LowerSubReg32_Op0(OutMI, X86::MOVZX32rm8); break; + case X86::MOVZX64rr16: LowerSubReg32_Op0(OutMI, X86::MOVZX32rr16); break; + case X86::MOVZX64rm16: LowerSubReg32_Op0(OutMI, X86::MOVZX32rm16); break; + case X86::SETB_C8r: LowerUnaryToTwoAddr(OutMI, X86::SBB8rr); break; + case X86::SETB_C16r: LowerUnaryToTwoAddr(OutMI, X86::SBB16rr); break; + case X86::SETB_C32r: LowerUnaryToTwoAddr(OutMI, X86::SBB32rr); break; + case X86::SETB_C64r: LowerUnaryToTwoAddr(OutMI, X86::SBB64rr); break; + case X86::MOV8r0: LowerUnaryToTwoAddr(OutMI, X86::XOR8rr); break; + case X86::MOV32r0: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break; + case X86::MMX_V_SET0: LowerUnaryToTwoAddr(OutMI, X86::MMX_PXORrr); break; + case X86::MMX_V_SETALLONES: + LowerUnaryToTwoAddr(OutMI, X86::MMX_PCMPEQDrr); break; + case X86::FsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; + case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; + case X86::V_SET0: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break; + case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break; + case X86::MOV16r0: - OutMI.setOpcode(X86::MOV32r0); - lower_subreg32(&OutMI, 0); + LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV16r0 -> MOV32r0 + LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr break; case X86::MOV64r0: - OutMI.setOpcode(X86::MOV32r0); - lower_subreg32(&OutMI, 0); + LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV64r0 -> MOV32r0 + LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr break; } } -void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { +void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { X86MCInstLower MCInstLowering(OutContext, Mang, *this); switch (MI->getOpcode()) { - case TargetInstrInfo::DBG_LABEL: - case TargetInstrInfo::EH_LABEL: - case TargetInstrInfo::GC_LABEL: - printLabel(MI); - return; - case TargetInstrInfo::DEBUG_VALUE: { + case TargetOpcode::DBG_VALUE: { // FIXME: if this is implemented for another target before it goes // away completely, the common part should be moved into AsmPrinter. if (!VerboseAsm) @@ -427,9 +314,35 @@ void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { O << V.getName(); O << " <- "; if (NOps==3) { - // Variable is in register - assert(MI->getOperand(0).getType()==MachineOperand::MO_Register); - printOperand(MI, 0); + // Register or immediate value. Register 0 means undef. + assert(MI->getOperand(0).getType()==MachineOperand::MO_Register || + MI->getOperand(0).getType()==MachineOperand::MO_Immediate || + MI->getOperand(0).getType()==MachineOperand::MO_FPImmediate); + if (MI->getOperand(0).getType()==MachineOperand::MO_Register && + MI->getOperand(0).getReg()==0) { + // Suppress offset in this case, it is not meaningful. + O << "undef"; + OutStreamer.AddBlankLine(); + return; + } else if (MI->getOperand(0).getType()==MachineOperand::MO_FPImmediate) { + // This is more naturally done in printOperand, but since the only use + // of such an operand is in this comment and that is temporary (and it's + // ugly), we prefer to keep this localized. + // The include of Type.h may be removable when this code is. + if (MI->getOperand(0).getFPImm()->getType()->isFloatTy() || + MI->getOperand(0).getFPImm()->getType()->isDoubleTy()) + MI->getOperand(0).print(O, &TM); + else { + // There is no good way to print long double. Convert a copy to + // double. Ah well, it's only a comment. + bool ignored; + APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF()); + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, + &ignored); + O << "(long double) " << APF.convertToDouble(); + } + } else + printOperand(MI, 0); } else { // Frame address. Currently handles register +- offset only. assert(MI->getOperand(0).getType()==MachineOperand::MO_Register); @@ -438,17 +351,9 @@ void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { } O << "+"; printOperand(MI, NOps-2); + OutStreamer.AddBlankLine(); return; } - case TargetInstrInfo::INLINEASM: - printInlineAsm(MI); - return; - case TargetInstrInfo::IMPLICIT_DEF: - printImplicitDef(MI); - return; - case TargetInstrInfo::KILL: - printKill(MI); - return; case X86::MOVPC32r: { MCInst TmpInst; // This is a pseudo op for a two instruction sequence with a label, which @@ -464,8 +369,7 @@ void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { // lot of extra uniquing. TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::Create(PICBase, OutContext))); - printMCInst(&TmpInst); - O << '\n'; + OutStreamer.EmitInstruction(TmpInst); // Emit the label. OutStreamer.EmitLabel(PICBase); @@ -473,7 +377,7 @@ void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { // popl $reg TmpInst.setOpcode(X86::POP32r); TmpInst.getOperand(0) = MCOperand::CreateReg(MI->getOperand(0).getReg()); - printMCInst(&TmpInst); + OutStreamer.EmitInstruction(TmpInst); return; } @@ -495,7 +399,7 @@ void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { OutStreamer.EmitLabel(DotSym); // Now that we have emitted the label, lower the complex operand expression. - MCSymbol *OpSym = MCInstLowering.GetExternalSymbolSymbol(MI->getOperand(2)); + MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2)); const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext); const MCExpr *PICBase = @@ -510,7 +414,7 @@ void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg())); TmpInst.addOperand(MCOperand::CreateExpr(DotExpr)); - printMCInst(&TmpInst); + OutStreamer.EmitInstruction(TmpInst); return; } } @@ -518,7 +422,6 @@ void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { MCInst TmpInst; MCInstLowering.Lower(MI, TmpInst); - - printMCInst(&TmpInst); + OutStreamer.EmitInstruction(TmpInst); } diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.h b/lib/Target/X86/AsmPrinter/X86MCInstLower.h index 94f8bfc..ebd23f6 100644 --- a/lib/Target/X86/AsmPrinter/X86MCInstLower.h +++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.h @@ -39,11 +39,7 @@ public: MCSymbol *GetPICBaseSymbol() const; - MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const; - MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const; - MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const; - MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const; - MCSymbol *GetBlockAddressSymbol(const MachineOperand &MO) const; + MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const; MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; private: diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 4186fec..61f26a7 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -25,6 +25,8 @@ set(sources X86InstrInfo.cpp X86JITInfo.cpp X86MCAsmInfo.cpp + X86MCCodeEmitter.cpp + X86MCTargetExpr.cpp X86RegisterInfo.cpp X86Subtarget.cpp X86TargetMachine.cpp diff --git a/lib/Target/X86/Disassembler/Makefile b/lib/Target/X86/Disassembler/Makefile index 6c26853..b289647 100644 --- a/lib/Target/X86/Disassembler/Makefile +++ b/lib/Target/X86/Disassembler/Makefile @@ -9,7 +9,6 @@ LEVEL = ../../../.. LIBRARYNAME = LLVMX86Disassembler -CXXFLAGS = -fno-rtti # Hack: we need to include 'main' x86 target directory to grab private headers CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. diff --git a/lib/Target/X86/Makefile b/lib/Target/X86/Makefile index 5e625dc..f4ff894 100644 --- a/lib/Target/X86/Makefile +++ b/lib/Target/X86/Makefile @@ -10,7 +10,6 @@ LEVEL = ../../.. LIBRARYNAME = LLVMX86CodeGen TARGET = X86 -CXXFLAGS = -fno-rtti # Make sure that tblgen is run, first thing. BUILT_SOURCES = X86GenRegisterInfo.h.inc X86GenRegisterNames.inc \ @@ -19,6 +18,7 @@ BUILT_SOURCES = X86GenRegisterInfo.h.inc X86GenRegisterNames.inc \ X86GenAsmWriter1.inc X86GenDAGISel.inc \ X86GenDisassemblerTables.inc X86GenFastISel.inc \ X86GenCallingConv.inc X86GenSubtarget.inc \ + X86GenEDInfo.inc DIRS = AsmPrinter AsmParser Disassembler TargetInfo diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index 0f3e44b..19eb05e 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -376,7 +376,7 @@ ret ... saving two instructions. The basic idea is that a reload from a spill slot, can, if only one 4-byte -chunk is used, bring in 3 zeros the the one element instead of 4 elements. +chunk is used, bring in 3 zeros the one element instead of 4 elements. This can be used to simplify a variety of shuffle operations, where the elements are fixed zeros. @@ -936,3 +936,54 @@ Also, the 'ret's should be shared. This is PR6032. //===---------------------------------------------------------------------===// +These should compile into the same code (PR6214): Perhaps instcombine should +canonicalize the former into the later? + +define float @foo(float %x) nounwind { + %t = bitcast float %x to i32 + %s = and i32 %t, 2147483647 + %d = bitcast i32 %s to float + ret float %d +} + +declare float @fabsf(float %n) +define float @bar(float %x) nounwind { + %d = call float @fabsf(float %x) + ret float %d +} + +//===---------------------------------------------------------------------===// + +This IR (from PR6194): + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0.0" + +%0 = type { double, double } +%struct.float3 = type { float, float, float } + +define void @test(%0, %struct.float3* nocapture %res) nounwind noinline ssp { +entry: + %tmp18 = extractvalue %0 %0, 0 ; <double> [#uses=1] + %tmp19 = bitcast double %tmp18 to i64 ; <i64> [#uses=1] + %tmp20 = zext i64 %tmp19 to i128 ; <i128> [#uses=1] + %tmp10 = lshr i128 %tmp20, 32 ; <i128> [#uses=1] + %tmp11 = trunc i128 %tmp10 to i32 ; <i32> [#uses=1] + %tmp12 = bitcast i32 %tmp11 to float ; <float> [#uses=1] + %tmp5 = getelementptr inbounds %struct.float3* %res, i64 0, i32 1 ; <float*> [#uses=1] + store float %tmp12, float* %tmp5 + ret void +} + +Compiles to: + +_test: ## @test + movd %xmm0, %rax + shrq $32, %rax + movl %eax, 4(%rdi) + ret + +This would be better kept in the SSE unit by treating XMM0 as a 4xfloat and +doing a shuffle from v[1] to v[0] then a float store. + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/README-UNIMPLEMENTED.txt b/lib/Target/X86/README-UNIMPLEMENTED.txt index 69dc8ee..c26c75a 100644 --- a/lib/Target/X86/README-UNIMPLEMENTED.txt +++ b/lib/Target/X86/README-UNIMPLEMENTED.txt @@ -11,4 +11,4 @@ which would be great. 2) vector comparisons 3) vector fp<->int conversions: PR2683, PR2684, PR2685, PR2686, PR2688 4) bitcasts from vectors to scalars: PR2804 - +5) llvm.atomic.cmp.swap.i128.p0i128: PR3462 diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index aa7bb3d..3c6138b 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -1868,3 +1868,69 @@ carried over to machine instructions. Asm printer (or JIT) can use this information to add the "lock" prefix. //===---------------------------------------------------------------------===// + +_Bool bar(int *x) { return *x & 1; } + +define zeroext i1 @bar(i32* nocapture %x) nounwind readonly { +entry: + %tmp1 = load i32* %x ; <i32> [#uses=1] + %and = and i32 %tmp1, 1 ; <i32> [#uses=1] + %tobool = icmp ne i32 %and, 0 ; <i1> [#uses=1] + ret i1 %tobool +} + +bar: # @bar +# BB#0: # %entry + movl 4(%esp), %eax + movb (%eax), %al + andb $1, %al + movzbl %al, %eax + ret + +Missed optimization: should be movl+andl. + +//===---------------------------------------------------------------------===// + +Consider the following two functions compiled with clang: +_Bool foo(int *x) { return !(*x & 4); } +unsigned bar(int *x) { return !(*x & 4); } + +foo: + movl 4(%esp), %eax + testb $4, (%eax) + sete %al + movzbl %al, %eax + ret + +bar: + movl 4(%esp), %eax + movl (%eax), %eax + shrl $2, %eax + andl $1, %eax + xorl $1, %eax + ret + +The second function generates more code even though the two functions are +are functionally identical. + +//===---------------------------------------------------------------------===// + +Take the following C code: +int x(int y) { return (y & 63) << 14; } + +Code produced by gcc: + andl $63, %edi + sall $14, %edi + movl %edi, %eax + ret + +Code produced by clang: + shll $14, %edi + movl %edi, %eax + andl $1032192, %eax + ret + +The code produced by gcc is 3 bytes shorter. This sort of construct often +shows up with bitfields. + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/TargetInfo/Makefile b/lib/Target/X86/TargetInfo/Makefile index 211607f..9858e6a 100644 --- a/lib/Target/X86/TargetInfo/Makefile +++ b/lib/Target/X86/TargetInfo/Makefile @@ -9,7 +9,6 @@ LEVEL = ../../../.. LIBRARYNAME = LLVMX86Info -CXXFLAGS = -fno-rtti # Hack: we need to include 'main' target directory to grab private headers CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 684c61f..1a1e447 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -23,6 +23,7 @@ class X86TargetMachine; class FunctionPass; class MachineCodeEmitter; class MCCodeEmitter; +class MCContext; class JITCodeEmitter; class Target; class formatted_raw_ostream; @@ -46,15 +47,13 @@ FunctionPass *createX87FPRegKillInserterPass(); /// createX86CodeEmitterPass - Return a pass that emits the collected X86 code /// to the specified MCE object. - -FunctionPass *createX86CodeEmitterPass(X86TargetMachine &TM, - MachineCodeEmitter &MCE); FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM, JITCodeEmitter &JCE); -FunctionPass *createX86ObjectCodeEmitterPass(X86TargetMachine &TM, - ObjectCodeEmitter &OCE); -MCCodeEmitter *createX86MCCodeEmitter(const Target &, TargetMachine &TM); +MCCodeEmitter *createX86_32MCCodeEmitter(const Target &, TargetMachine &TM, + MCContext &Ctx); +MCCodeEmitter *createX86_64MCCodeEmitter(const Target &, TargetMachine &TM, + MCContext &Ctx); /// createX86EmitCodeToMemory - Returns a pass that converts a register /// allocated function into raw machine code in a dynamically diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.cpp b/lib/Target/X86/X86COFFMachineModuleInfo.cpp index ea52795..ab67acb 100644 --- a/lib/Target/X86/X86COFFMachineModuleInfo.cpp +++ b/lib/Target/X86/X86COFFMachineModuleInfo.cpp @@ -27,90 +27,55 @@ X86COFFMachineModuleInfo::X86COFFMachineModuleInfo(const MachineModuleInfo &) { X86COFFMachineModuleInfo::~X86COFFMachineModuleInfo() { } -void X86COFFMachineModuleInfo::AddFunctionInfo(const Function *F, - const X86MachineFunctionInfo &Val) { - FunctionInfoMap[F] = Val; +void X86COFFMachineModuleInfo::addExternalFunction(const StringRef& Name) { + CygMingStubs.insert(Name); } - - -static X86MachineFunctionInfo calculateFunctionInfo(const Function *F, - const TargetData &TD) { - X86MachineFunctionInfo Info; - uint64_t Size = 0; - - switch (F->getCallingConv()) { - case CallingConv::X86_StdCall: - Info.setDecorationStyle(StdCall); - break; - case CallingConv::X86_FastCall: - Info.setDecorationStyle(FastCall); - break; - default: - return Info; - } - - unsigned argNum = 1; - for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); - AI != AE; ++AI, ++argNum) { - const Type* Ty = AI->getType(); - - // 'Dereference' type in case of byval parameter attribute - if (F->paramHasAttr(argNum, Attribute::ByVal)) - Ty = cast<PointerType>(Ty)->getElementType(); - - // Size should be aligned to DWORD boundary - Size += ((TD.getTypeAllocSize(Ty) + 3)/4)*4; - } - - // We're not supporting tooooo huge arguments :) - Info.setBytesToPopOnReturn((unsigned int)Size); - return Info; -} - - -/// DecorateCygMingName - Query FunctionInfoMap and use this information for -/// various name decorations for Cygwin and MingW. +/// DecorateCygMingName - Apply various name decorations if the function uses +/// stdcall or fastcall calling convention. void X86COFFMachineModuleInfo::DecorateCygMingName(SmallVectorImpl<char> &Name, const GlobalValue *GV, const TargetData &TD) { const Function *F = dyn_cast<Function>(GV); if (!F) return; - - // Save function name for later type emission. - if (F->isDeclaration()) - CygMingStubs.insert(StringRef(Name.data(), Name.size())); - + // We don't want to decorate non-stdcall or non-fastcall functions right now CallingConv::ID CC = F->getCallingConv(); if (CC != CallingConv::X86_StdCall && CC != CallingConv::X86_FastCall) return; - - const X86MachineFunctionInfo *Info; - - FMFInfoMap::const_iterator info_item = FunctionInfoMap.find(F); - if (info_item == FunctionInfoMap.end()) { - // Calculate apropriate function info and populate map - FunctionInfoMap[F] = calculateFunctionInfo(F, TD); - Info = &FunctionInfoMap[F]; - } else { - Info = &info_item->second; - } - - if (Info->getDecorationStyle() == None) return; + + unsigned ArgWords = 0; + DenseMap<const Function*, unsigned>::const_iterator item = FnArgWords.find(F); + if (item == FnArgWords.end()) { + // Calculate arguments sizes + for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); + AI != AE; ++AI) { + const Type* Ty = AI->getType(); + + // 'Dereference' type in case of byval parameter attribute + if (AI->hasByValAttr()) + Ty = cast<PointerType>(Ty)->getElementType(); + + // Size should be aligned to DWORD boundary + ArgWords += ((TD.getTypeAllocSize(Ty) + 3)/4)*4; + } + + FnArgWords[F] = ArgWords; + } else + ArgWords = item->second; + const FunctionType *FT = F->getFunctionType(); - // "Pure" variadic functions do not receive @0 suffix. if (!FT->isVarArg() || FT->getNumParams() == 0 || (FT->getNumParams() == 1 && F->hasStructRetAttr())) - raw_svector_ostream(Name) << '@' << Info->getBytesToPopOnReturn(); - - if (Info->getDecorationStyle() == FastCall) { + raw_svector_ostream(Name) << '@' << ArgWords; + + if (CC == CallingConv::X86_FastCall) { if (Name[0] == '_') Name[0] = '@'; else Name.insert(Name.begin(), '@'); - } + } } /// DecorateCygMingName - Query FunctionInfoMap and use this information for @@ -121,6 +86,6 @@ void X86COFFMachineModuleInfo::DecorateCygMingName(MCSymbol *&Name, const TargetData &TD) { SmallString<128> NameStr(Name->getName().begin(), Name->getName().end()); DecorateCygMingName(NameStr, GV, TD); - + Name = Ctx.GetOrCreateSymbol(NameStr.str()); } diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h index 0e2009e..9de3dcd 100644 --- a/lib/Target/X86/X86COFFMachineModuleInfo.h +++ b/lib/Target/X86/X86COFFMachineModuleInfo.h @@ -21,44 +21,25 @@ namespace llvm { class X86MachineFunctionInfo; class TargetData; - + /// X86COFFMachineModuleInfo - This is a MachineModuleInfoImpl implementation /// for X86 COFF targets. class X86COFFMachineModuleInfo : public MachineModuleInfoImpl { StringSet<> CygMingStubs; - - // We have to propagate some information about MachineFunction to - // AsmPrinter. It's ok, when we're printing the function, since we have - // access to MachineFunction and can get the appropriate MachineFunctionInfo. - // Unfortunately, this is not possible when we're printing reference to - // Function (e.g. calling it and so on). Even more, there is no way to get the - // corresponding MachineFunctions: it can even be not created at all. That's - // why we should use additional structure, when we're collecting all necessary - // information. - // - // This structure is using e.g. for name decoration for stdcall & fastcall'ed - // function, since we have to use arguments' size for decoration. - typedef std::map<const Function*, X86MachineFunctionInfo> FMFInfoMap; - FMFInfoMap FunctionInfoMap; - + DenseMap<const Function*, unsigned> FnArgWords; public: X86COFFMachineModuleInfo(const MachineModuleInfo &); ~X86COFFMachineModuleInfo(); - - + void DecorateCygMingName(MCSymbol* &Name, MCContext &Ctx, const GlobalValue *GV, const TargetData &TD); void DecorateCygMingName(SmallVectorImpl<char> &Name, const GlobalValue *GV, const TargetData &TD); - - void AddFunctionInfo(const Function *F, const X86MachineFunctionInfo &Val); - + void addExternalFunction(const StringRef& Name); typedef StringSet<>::const_iterator stub_iterator; stub_iterator stub_begin() const { return CygMingStubs.begin(); } stub_iterator stub_end() const { return CygMingStubs.end(); } - - }; diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index 828e872..8deadf6 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -21,9 +21,7 @@ #include "X86.h" #include "llvm/LLVMContext.h" #include "llvm/PassManager.h" -#include "llvm/CodeGen/MachineCodeEmitter.h" #include "llvm/CodeGen/JITCodeEmitter.h" -#include "llvm/CodeGen/ObjectCodeEmitter.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -110,19 +108,10 @@ template<class CodeEmitter> /// createX86CodeEmitterPass - Return a pass that emits the collected X86 code /// to the specified templated MachineCodeEmitter object. - -FunctionPass *llvm::createX86CodeEmitterPass(X86TargetMachine &TM, - MachineCodeEmitter &MCE) { - return new Emitter<MachineCodeEmitter>(TM, MCE); -} FunctionPass *llvm::createX86JITCodeEmitterPass(X86TargetMachine &TM, JITCodeEmitter &JCE) { return new Emitter<JITCodeEmitter>(TM, JCE); } -FunctionPass *llvm::createX86ObjectCodeEmitterPass(X86TargetMachine &TM, - ObjectCodeEmitter &OCE) { - return new Emitter<ObjectCodeEmitter>(TM, OCE); -} template<class CodeEmitter> bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) { @@ -202,8 +191,15 @@ template<class CodeEmitter> void Emitter<CodeEmitter>::emitExternalSymbolAddress(const char *ES, unsigned Reloc) { intptr_t RelocCST = (Reloc == X86::reloc_picrel_word) ? PICBaseOffset : 0; + + // X86 never needs stubs because instruction selection will always pick + // an instruction sequence that is large enough to hold any address + // to a symbol. + // (see X86ISelLowering.cpp, near 2039: X86TargetLowering::LowerCall) + bool NeedStub = false; MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(), - Reloc, ES, RelocCST)); + Reloc, ES, RelocCST, + 0, NeedStub)); if (Reloc == X86::reloc_absolute_dword) MCE.emitDWordLE(0); else @@ -253,7 +249,7 @@ void Emitter<CodeEmitter>::emitJumpTableAddress(unsigned JTI, unsigned Reloc, template<class CodeEmitter> unsigned Emitter<CodeEmitter>::getX86RegNum(unsigned RegNo) const { - return II->getRegisterInfo().getX86RegNum(RegNo); + return X86RegisterInfo::getX86RegNum(RegNo); } inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode, @@ -391,86 +387,103 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI, // If no BaseReg, issue a RIP relative instruction only if the MCE can // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table // 2-7) and absolute references. - if ((!Is64BitMode || DispForReloc || BaseReg != 0) && + unsigned BaseRegNo = -1U; + if (BaseReg != 0 && BaseReg != X86::RIP) + BaseRegNo = getX86RegNum(BaseReg); + + if (// The SIB byte must be used if there is an index register. IndexReg.getReg() == 0 && - ((BaseReg == 0 && MCE.earlyResolveAddresses()) || BaseReg == X86::RIP || - (BaseReg != 0 && getX86RegNum(BaseReg) != N86::ESP))) { - if (BaseReg == 0 || BaseReg == X86::RIP) { // Just a displacement? - // Emit special case [disp32] encoding + // The SIB byte must be used if the base is ESP/RSP/R12, all of which + // encode to an R/M value of 4, which indicates that a SIB byte is + // present. + BaseRegNo != N86::ESP && + // If there is no base register and we're in 64-bit mode, we need a SIB + // byte to emit an addr that is just 'disp32' (the non-RIP relative form). + (!Is64BitMode || BaseReg != 0)) { + if (BaseReg == 0 || // [disp32] in X86-32 mode + BaseReg == X86::RIP) { // [disp32+RIP] in X86-64 mode MCE.emitByte(ModRMByte(0, RegOpcodeField, 5)); emitDisplacementField(DispForReloc, DispVal, PCAdj, true); - } else { - unsigned BaseRegNo = getX86RegNum(BaseReg); - if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) { - // Emit simple indirect register encoding... [EAX] f.e. - MCE.emitByte(ModRMByte(0, RegOpcodeField, BaseRegNo)); - } else if (!DispForReloc && isDisp8(DispVal)) { - // Emit the disp8 encoding... [REG+disp8] - MCE.emitByte(ModRMByte(1, RegOpcodeField, BaseRegNo)); - emitConstant(DispVal, 1); - } else { - // Emit the most general non-SIB encoding: [REG+disp32] - MCE.emitByte(ModRMByte(2, RegOpcodeField, BaseRegNo)); - emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel); - } + return; } - - } else { // We need a SIB byte, so start by outputting the ModR/M byte first - assert(IndexReg.getReg() != X86::ESP && - IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!"); - - bool ForceDisp32 = false; - bool ForceDisp8 = false; - if (BaseReg == 0) { - // If there is no base register, we emit the special case SIB byte with - // MOD=0, BASE=5, to JUST get the index, scale, and displacement. - MCE.emitByte(ModRMByte(0, RegOpcodeField, 4)); - ForceDisp32 = true; - } else if (DispForReloc) { - // Emit the normal disp32 encoding. - MCE.emitByte(ModRMByte(2, RegOpcodeField, 4)); - ForceDisp32 = true; - } else if (DispVal == 0 && getX86RegNum(BaseReg) != N86::EBP) { - // Emit no displacement ModR/M byte - MCE.emitByte(ModRMByte(0, RegOpcodeField, 4)); - } else if (isDisp8(DispVal)) { - // Emit the disp8 encoding... - MCE.emitByte(ModRMByte(1, RegOpcodeField, 4)); - ForceDisp8 = true; // Make sure to force 8 bit disp if Base=EBP - } else { - // Emit the normal disp32 encoding... - MCE.emitByte(ModRMByte(2, RegOpcodeField, 4)); - } - - // Calculate what the SS field value should be... - static const unsigned SSTable[] = { ~0, 0, 1, ~0, 2, ~0, ~0, ~0, 3 }; - unsigned SS = SSTable[Scale.getImm()]; - - if (BaseReg == 0) { - // Handle the SIB byte for the case where there is no base, see Intel - // Manual 2A, table 2-7. The displacement has already been output. - unsigned IndexRegNo; - if (IndexReg.getReg()) - IndexRegNo = getX86RegNum(IndexReg.getReg()); - else // Examples: [ESP+1*<noreg>+4] or [scaled idx]+disp32 (MOD=0,BASE=5) - IndexRegNo = 4; - emitSIBByte(SS, IndexRegNo, 5); - } else { - unsigned BaseRegNo = getX86RegNum(BaseReg); - unsigned IndexRegNo; - if (IndexReg.getReg()) - IndexRegNo = getX86RegNum(IndexReg.getReg()); - else - IndexRegNo = 4; // For example [ESP+1*<noreg>+4] - emitSIBByte(SS, IndexRegNo, BaseRegNo); + + // If the base is not EBP/ESP and there is no displacement, use simple + // indirect register encoding, this handles addresses like [EAX]. The + // encoding for [EBP] with no displacement means [disp32] so we handle it + // by emitting a displacement of 0 below. + if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) { + MCE.emitByte(ModRMByte(0, RegOpcodeField, BaseRegNo)); + return; } - - // Do we need to output a displacement? - if (ForceDisp8) { + + // Otherwise, if the displacement fits in a byte, encode as [REG+disp8]. + if (!DispForReloc && isDisp8(DispVal)) { + MCE.emitByte(ModRMByte(1, RegOpcodeField, BaseRegNo)); emitConstant(DispVal, 1); - } else if (DispVal != 0 || ForceDisp32) { - emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel); + return; } + + // Otherwise, emit the most general non-SIB encoding: [REG+disp32] + MCE.emitByte(ModRMByte(2, RegOpcodeField, BaseRegNo)); + emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel); + return; + } + + // Otherwise we need a SIB byte, so start by outputting the ModR/M byte first. + assert(IndexReg.getReg() != X86::ESP && + IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!"); + + bool ForceDisp32 = false; + bool ForceDisp8 = false; + if (BaseReg == 0) { + // If there is no base register, we emit the special case SIB byte with + // MOD=0, BASE=4, to JUST get the index, scale, and displacement. + MCE.emitByte(ModRMByte(0, RegOpcodeField, 4)); + ForceDisp32 = true; + } else if (DispForReloc) { + // Emit the normal disp32 encoding. + MCE.emitByte(ModRMByte(2, RegOpcodeField, 4)); + ForceDisp32 = true; + } else if (DispVal == 0 && getX86RegNum(BaseReg) != N86::EBP) { + // Emit no displacement ModR/M byte + MCE.emitByte(ModRMByte(0, RegOpcodeField, 4)); + } else if (isDisp8(DispVal)) { + // Emit the disp8 encoding... + MCE.emitByte(ModRMByte(1, RegOpcodeField, 4)); + ForceDisp8 = true; // Make sure to force 8 bit disp if Base=EBP + } else { + // Emit the normal disp32 encoding... + MCE.emitByte(ModRMByte(2, RegOpcodeField, 4)); + } + + // Calculate what the SS field value should be... + static const unsigned SSTable[] = { ~0, 0, 1, ~0, 2, ~0, ~0, ~0, 3 }; + unsigned SS = SSTable[Scale.getImm()]; + + if (BaseReg == 0) { + // Handle the SIB byte for the case where there is no base, see Intel + // Manual 2A, table 2-7. The displacement has already been output. + unsigned IndexRegNo; + if (IndexReg.getReg()) + IndexRegNo = getX86RegNum(IndexReg.getReg()); + else // Examples: [ESP+1*<noreg>+4] or [scaled idx]+disp32 (MOD=0,BASE=5) + IndexRegNo = 4; + emitSIBByte(SS, IndexRegNo, 5); + } else { + unsigned BaseRegNo = getX86RegNum(BaseReg); + unsigned IndexRegNo; + if (IndexReg.getReg()) + IndexRegNo = getX86RegNum(IndexReg.getReg()); + else + IndexRegNo = 4; // For example [ESP+1*<noreg>+4] + emitSIBByte(SS, IndexRegNo, BaseRegNo); + } + + // Do we need to output a displacement? + if (ForceDisp8) { + emitConstant(DispVal, 1); + } else if (DispVal != 0 || ForceDisp32) { + emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel); } } @@ -570,7 +583,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32 --NumOps; - unsigned char BaseOpcode = II->getBaseOpcodeFor(Desc); + unsigned char BaseOpcode = X86II::getBaseOpcodeFor(Desc->TSFlags); switch (Desc->TSFlags & X86II::FormMask) { default: llvm_unreachable("Unknown FormMask value in X86 MachineCodeEmitter!"); @@ -582,25 +595,25 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, llvm_unreachable("psuedo instructions should be removed before code" " emission"); break; - case TargetInstrInfo::INLINEASM: + case TargetOpcode::INLINEASM: // We allow inline assembler nodes with empty bodies - they can // implicitly define registers, which is ok for JIT. if (MI.getOperand(0).getSymbolName()[0]) llvm_report_error("JIT does not support inline asm!"); break; - case TargetInstrInfo::DBG_LABEL: - case TargetInstrInfo::EH_LABEL: - case TargetInstrInfo::GC_LABEL: + case TargetOpcode::DBG_LABEL: + case TargetOpcode::EH_LABEL: + case TargetOpcode::GC_LABEL: MCE.emitLabel(MI.getOperand(0).getImm()); break; - case TargetInstrInfo::IMPLICIT_DEF: - case TargetInstrInfo::KILL: + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: case X86::FP_REG_KILL: break; case X86::MOVPC32r: { // This emits the "call" portion of this pseudo instruction. MCE.emitByte(BaseOpcode); - emitConstant(0, X86InstrInfo::sizeOfImm(Desc)); + emitConstant(0, X86II::getSizeOfImm(Desc->TSFlags)); // Remember PIC base. PICBaseOffset = (intptr_t) MCE.getCurrentPCOffset(); X86JITInfo *JTI = TM.getJITInfo(); @@ -639,15 +652,21 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, emitExternalSymbolAddress(MO.getSymbolName(), X86::reloc_pcrel_word); break; } + + // FIXME: Only used by hackish MCCodeEmitter, remove when dead. + if (MO.isJTI()) { + emitJumpTableAddress(MO.getIndex(), X86::reloc_pcrel_word); + break; + } assert(MO.isImm() && "Unknown RawFrm operand!"); if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32) { // Fix up immediate operand for pc relative calls. intptr_t Imm = (intptr_t)MO.getImm(); Imm = Imm - MCE.getCurrentPCValue() - 4; - emitConstant(Imm, X86InstrInfo::sizeOfImm(Desc)); + emitConstant(Imm, X86II::getSizeOfImm(Desc->TSFlags)); } else - emitConstant(MO.getImm(), X86InstrInfo::sizeOfImm(Desc)); + emitConstant(MO.getImm(), X86II::getSizeOfImm(Desc->TSFlags)); break; } @@ -658,7 +677,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, break; const MachineOperand &MO1 = MI.getOperand(CurOp++); - unsigned Size = X86InstrInfo::sizeOfImm(Desc); + unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); if (MO1.isImm()) { emitConstant(MO1.getImm(), Size); break; @@ -691,7 +710,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, CurOp += 2; if (CurOp != NumOps) emitConstant(MI.getOperand(CurOp++).getImm(), - X86InstrInfo::sizeOfImm(Desc)); + X86II::getSizeOfImm(Desc->TSFlags)); break; } case X86II::MRMDestMem: { @@ -702,7 +721,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, CurOp += X86AddrNumOperands + 1; if (CurOp != NumOps) emitConstant(MI.getOperand(CurOp++).getImm(), - X86InstrInfo::sizeOfImm(Desc)); + X86II::getSizeOfImm(Desc->TSFlags)); break; } @@ -713,7 +732,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, CurOp += 2; if (CurOp != NumOps) emitConstant(MI.getOperand(CurOp++).getImm(), - X86InstrInfo::sizeOfImm(Desc)); + X86II::getSizeOfImm(Desc->TSFlags)); break; case X86II::MRMSrcMem: { @@ -726,7 +745,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, AddrOperands = X86AddrNumOperands; intptr_t PCAdj = (CurOp + AddrOperands + 1 != NumOps) ? - X86InstrInfo::sizeOfImm(Desc) : 0; + X86II::getSizeOfImm(Desc->TSFlags) : 0; MCE.emitByte(BaseOpcode); emitMemModRMByte(MI, CurOp+1, getX86RegNum(MI.getOperand(CurOp).getReg()), @@ -734,7 +753,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, CurOp += AddrOperands + 1; if (CurOp != NumOps) emitConstant(MI.getOperand(CurOp++).getImm(), - X86InstrInfo::sizeOfImm(Desc)); + X86II::getSizeOfImm(Desc->TSFlags)); break; } @@ -743,33 +762,14 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, case X86II::MRM4r: case X86II::MRM5r: case X86II::MRM6r: case X86II::MRM7r: { MCE.emitByte(BaseOpcode); - - // Special handling of lfence, mfence, monitor, and mwait. - if (Desc->getOpcode() == X86::LFENCE || - Desc->getOpcode() == X86::MFENCE || - Desc->getOpcode() == X86::MONITOR || - Desc->getOpcode() == X86::MWAIT) { - emitRegModRMByte((Desc->TSFlags & X86II::FormMask)-X86II::MRM0r); - - switch (Desc->getOpcode()) { - default: break; - case X86::MONITOR: - MCE.emitByte(0xC8); - break; - case X86::MWAIT: - MCE.emitByte(0xC9); - break; - } - } else { - emitRegModRMByte(MI.getOperand(CurOp++).getReg(), - (Desc->TSFlags & X86II::FormMask)-X86II::MRM0r); - } + emitRegModRMByte(MI.getOperand(CurOp++).getReg(), + (Desc->TSFlags & X86II::FormMask)-X86II::MRM0r); if (CurOp == NumOps) break; const MachineOperand &MO1 = MI.getOperand(CurOp++); - unsigned Size = X86InstrInfo::sizeOfImm(Desc); + unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); if (MO1.isImm()) { emitConstant(MO1.getImm(), Size); break; @@ -798,7 +798,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, case X86II::MRM6m: case X86II::MRM7m: { intptr_t PCAdj = (CurOp + X86AddrNumOperands != NumOps) ? (MI.getOperand(CurOp+X86AddrNumOperands).isImm() ? - X86InstrInfo::sizeOfImm(Desc) : 4) : 0; + X86II::getSizeOfImm(Desc->TSFlags) : 4) : 0; MCE.emitByte(BaseOpcode); emitMemModRMByte(MI, CurOp, (Desc->TSFlags & X86II::FormMask)-X86II::MRM0m, @@ -809,7 +809,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, break; const MachineOperand &MO = MI.getOperand(CurOp++); - unsigned Size = X86InstrInfo::sizeOfImm(Desc); + unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); if (MO.isImm()) { emitConstant(MO.getImm(), Size); break; @@ -839,6 +839,27 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, getX86RegNum(MI.getOperand(CurOp).getReg())); ++CurOp; break; + + case X86II::MRM_C1: + MCE.emitByte(BaseOpcode); + MCE.emitByte(0xC1); + break; + case X86II::MRM_C8: + MCE.emitByte(BaseOpcode); + MCE.emitByte(0xC8); + break; + case X86II::MRM_C9: + MCE.emitByte(BaseOpcode); + MCE.emitByte(0xC9); + break; + case X86II::MRM_E8: + MCE.emitByte(BaseOpcode); + MCE.emitByte(0xE8); + break; + case X86II::MRM_F0: + MCE.emitByte(BaseOpcode); + MCE.emitByte(0xF0); + break; } if (!Desc->isVariadic() && CurOp != NumOps) { @@ -850,256 +871,3 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, MCE.processDebugLoc(MI.getDebugLoc(), false); } - -// Adapt the Emitter / CodeEmitter interfaces to MCCodeEmitter. -// -// FIXME: This is a total hack designed to allow work on llvm-mc to proceed -// without being blocked on various cleanups needed to support a clean interface -// to instruction encoding. -// -// Look away! - -#include "llvm/DerivedTypes.h" - -namespace { -class MCSingleInstructionCodeEmitter : public MachineCodeEmitter { - uint8_t Data[256]; - -public: - MCSingleInstructionCodeEmitter() { reset(); } - - void reset() { - BufferBegin = Data; - BufferEnd = array_endof(Data); - CurBufferPtr = Data; - } - - StringRef str() { - return StringRef(reinterpret_cast<char*>(BufferBegin), - CurBufferPtr - BufferBegin); - } - - virtual void startFunction(MachineFunction &F) {} - virtual bool finishFunction(MachineFunction &F) { return false; } - virtual void emitLabel(uint64_t LabelID) {} - virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) {} - virtual bool earlyResolveAddresses() const { return false; } - virtual void addRelocation(const MachineRelocation &MR) { } - virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const { - return 0; - } - virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const { - return 0; - } - virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const { - return 0; - } - virtual uintptr_t getLabelAddress(uint64_t LabelID) const { - return 0; - } - virtual void setModuleInfo(MachineModuleInfo* Info) {} -}; - -class X86MCCodeEmitter : public MCCodeEmitter { - X86MCCodeEmitter(const X86MCCodeEmitter &); // DO NOT IMPLEMENT - void operator=(const X86MCCodeEmitter &); // DO NOT IMPLEMENT - -private: - X86TargetMachine &TM; - llvm::Function *DummyF; - TargetData *DummyTD; - mutable llvm::MachineFunction *DummyMF; - llvm::MachineBasicBlock *DummyMBB; - - MCSingleInstructionCodeEmitter *InstrEmitter; - Emitter<MachineCodeEmitter> *Emit; - -public: - X86MCCodeEmitter(X86TargetMachine &_TM) : TM(_TM) { - // Verily, thou shouldst avert thine eyes. - const llvm::FunctionType *FTy = - FunctionType::get(llvm::Type::getVoidTy(getGlobalContext()), false); - DummyF = Function::Create(FTy, GlobalValue::InternalLinkage); - DummyTD = new TargetData(""); - DummyMF = new MachineFunction(DummyF, TM); - DummyMBB = DummyMF->CreateMachineBasicBlock(); - - InstrEmitter = new MCSingleInstructionCodeEmitter(); - Emit = new Emitter<MachineCodeEmitter>(TM, *InstrEmitter, - *TM.getInstrInfo(), - *DummyTD, false); - } - ~X86MCCodeEmitter() { - delete Emit; - delete InstrEmitter; - delete DummyMF; - delete DummyF; - } - - bool AddRegToInstr(const MCInst &MI, MachineInstr *Instr, - unsigned Start) const { - if (Start + 1 > MI.getNumOperands()) - return false; - - const MCOperand &Op = MI.getOperand(Start); - if (!Op.isReg()) return false; - - Instr->addOperand(MachineOperand::CreateReg(Op.getReg(), false)); - return true; - } - - bool AddImmToInstr(const MCInst &MI, MachineInstr *Instr, - unsigned Start) const { - if (Start + 1 > MI.getNumOperands()) - return false; - - const MCOperand &Op = MI.getOperand(Start); - if (Op.isImm()) { - Instr->addOperand(MachineOperand::CreateImm(Op.getImm())); - return true; - } - if (!Op.isExpr()) - return false; - - const MCExpr *Expr = Op.getExpr(); - if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) { - Instr->addOperand(MachineOperand::CreateImm(CE->getValue())); - return true; - } - - // FIXME: Relocation / fixup. - Instr->addOperand(MachineOperand::CreateImm(0)); - return true; - } - - bool AddLMemToInstr(const MCInst &MI, MachineInstr *Instr, - unsigned Start) const { - return (AddRegToInstr(MI, Instr, Start + 0) && - AddImmToInstr(MI, Instr, Start + 1) && - AddRegToInstr(MI, Instr, Start + 2) && - AddImmToInstr(MI, Instr, Start + 3)); - } - - bool AddMemToInstr(const MCInst &MI, MachineInstr *Instr, - unsigned Start) const { - return (AddRegToInstr(MI, Instr, Start + 0) && - AddImmToInstr(MI, Instr, Start + 1) && - AddRegToInstr(MI, Instr, Start + 2) && - AddImmToInstr(MI, Instr, Start + 3) && - AddRegToInstr(MI, Instr, Start + 4)); - } - - void EncodeInstruction(const MCInst &MI, raw_ostream &OS) const { - // Don't look yet! - - // Convert the MCInst to a MachineInstr so we can (ab)use the regular - // emitter. - const X86InstrInfo &II = *TM.getInstrInfo(); - const TargetInstrDesc &Desc = II.get(MI.getOpcode()); - MachineInstr *Instr = DummyMF->CreateMachineInstr(Desc, DebugLoc()); - DummyMBB->push_back(Instr); - - unsigned Opcode = MI.getOpcode(); - unsigned NumOps = MI.getNumOperands(); - unsigned CurOp = 0; - if (NumOps > 1 && Desc.getOperandConstraint(1, TOI::TIED_TO) != -1) { - Instr->addOperand(MachineOperand::CreateReg(0, false)); - ++CurOp; - } else if (NumOps > 2 && - Desc.getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0) - // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32 - --NumOps; - - bool OK = true; - switch (Desc.TSFlags & X86II::FormMask) { - case X86II::MRMDestReg: - case X86II::MRMSrcReg: - // Matching doesn't fill this in completely, we have to choose operand 0 - // for a tied register. - OK &= AddRegToInstr(MI, Instr, 0); CurOp++; - OK &= AddRegToInstr(MI, Instr, CurOp++); - if (CurOp < NumOps) - OK &= AddImmToInstr(MI, Instr, CurOp); - break; - - case X86II::RawFrm: - if (CurOp < NumOps) { - // Hack to make branches work. - if (!(Desc.TSFlags & X86II::ImmMask) && - MI.getOperand(0).isExpr() && - isa<MCSymbolRefExpr>(MI.getOperand(0).getExpr())) - Instr->addOperand(MachineOperand::CreateMBB(DummyMBB)); - else - OK &= AddImmToInstr(MI, Instr, CurOp); - } - break; - - case X86II::AddRegFrm: - OK &= AddRegToInstr(MI, Instr, CurOp++); - if (CurOp < NumOps) - OK &= AddImmToInstr(MI, Instr, CurOp); - break; - - case X86II::MRM0r: case X86II::MRM1r: - case X86II::MRM2r: case X86II::MRM3r: - case X86II::MRM4r: case X86II::MRM5r: - case X86II::MRM6r: case X86II::MRM7r: - // Matching doesn't fill this in completely, we have to choose operand 0 - // for a tied register. - OK &= AddRegToInstr(MI, Instr, 0); CurOp++; - if (CurOp < NumOps) - OK &= AddImmToInstr(MI, Instr, CurOp); - break; - - case X86II::MRM0m: case X86II::MRM1m: - case X86II::MRM2m: case X86II::MRM3m: - case X86II::MRM4m: case X86II::MRM5m: - case X86II::MRM6m: case X86II::MRM7m: - OK &= AddMemToInstr(MI, Instr, CurOp); CurOp += 5; - if (CurOp < NumOps) - OK &= AddImmToInstr(MI, Instr, CurOp); - break; - - case X86II::MRMSrcMem: - OK &= AddRegToInstr(MI, Instr, CurOp++); - if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || - Opcode == X86::LEA16r || Opcode == X86::LEA32r) - OK &= AddLMemToInstr(MI, Instr, CurOp); - else - OK &= AddMemToInstr(MI, Instr, CurOp); - break; - - case X86II::MRMDestMem: - OK &= AddMemToInstr(MI, Instr, CurOp); CurOp += 5; - OK &= AddRegToInstr(MI, Instr, CurOp); - break; - - default: - case X86II::MRMInitReg: - case X86II::Pseudo: - OK = false; - break; - } - - if (!OK) { - dbgs() << "couldn't convert inst '"; - MI.dump(); - dbgs() << "' to machine instr:\n"; - Instr->dump(); - } - - InstrEmitter->reset(); - if (OK) - Emit->emitInstruction(*Instr, &Desc); - OS << InstrEmitter->str(); - - Instr->eraseFromParent(); - } -}; -} - -// Ok, now you can look. -MCCodeEmitter *llvm::createX86MCCodeEmitter(const Target &, - TargetMachine &TM) { - return new X86MCCodeEmitter(static_cast<X86TargetMachine&>(TM)); -} diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index d5ad61b..69a9d60 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -786,8 +786,8 @@ bool X86FastISel::X86SelectCmp(Instruction *I) { bool X86FastISel::X86SelectZExt(Instruction *I) { // Handle zero-extension from i1 to i8, which is common. - if (I->getType()->isInteger(8) && - I->getOperand(0)->getType()->isInteger(1)) { + if (I->getType()->isIntegerTy(8) && + I->getOperand(0)->getType()->isIntegerTy(1)) { unsigned ResultReg = getRegForValue(I->getOperand(0)); if (ResultReg == 0) return false; // Set the high bits to zero. @@ -828,30 +828,30 @@ bool X86FastISel::X86SelectBranch(Instruction *I) { std::swap(TrueMBB, FalseMBB); Predicate = CmpInst::FCMP_UNE; // FALL THROUGH - case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE; break; - case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA; break; - case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE; break; - case CmpInst::FCMP_OLT: SwapArgs = true; BranchOpc = X86::JA; break; - case CmpInst::FCMP_OLE: SwapArgs = true; BranchOpc = X86::JAE; break; - case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE; break; - case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP; break; - case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP; break; - case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE; break; - case CmpInst::FCMP_UGT: SwapArgs = true; BranchOpc = X86::JB; break; - case CmpInst::FCMP_UGE: SwapArgs = true; BranchOpc = X86::JBE; break; - case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB; break; - case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE; break; + case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE_4; break; + case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA_4; break; + case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE_4; break; + case CmpInst::FCMP_OLT: SwapArgs = true; BranchOpc = X86::JA_4; break; + case CmpInst::FCMP_OLE: SwapArgs = true; BranchOpc = X86::JAE_4; break; + case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE_4; break; + case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP_4; break; + case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP_4; break; + case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE_4; break; + case CmpInst::FCMP_UGT: SwapArgs = true; BranchOpc = X86::JB_4; break; + case CmpInst::FCMP_UGE: SwapArgs = true; BranchOpc = X86::JBE_4; break; + case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break; + case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break; - case CmpInst::ICMP_EQ: SwapArgs = false; BranchOpc = X86::JE; break; - case CmpInst::ICMP_NE: SwapArgs = false; BranchOpc = X86::JNE; break; - case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA; break; - case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE; break; - case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB; break; - case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE; break; - case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG; break; - case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE; break; - case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL; break; - case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE; break; + case CmpInst::ICMP_EQ: SwapArgs = false; BranchOpc = X86::JE_4; break; + case CmpInst::ICMP_NE: SwapArgs = false; BranchOpc = X86::JNE_4; break; + case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA_4; break; + case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE_4; break; + case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break; + case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break; + case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG_4; break; + case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE_4; break; + case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL_4; break; + case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE_4; break; default: return false; } @@ -869,7 +869,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) { if (Predicate == CmpInst::FCMP_UNE) { // X86 requires a second branch to handle UNE (and OEQ, // which is mapped to UNE above). - BuildMI(MBB, DL, TII.get(X86::JP)).addMBB(TrueMBB); + BuildMI(MBB, DL, TII.get(X86::JP_4)).addMBB(TrueMBB); } FastEmitBranch(FalseMBB); @@ -923,7 +923,8 @@ bool X86FastISel::X86SelectBranch(Instruction *I) { unsigned OpCode = SetMI->getOpcode(); if (OpCode == X86::SETOr || OpCode == X86::SETBr) { - BuildMI(MBB, DL, TII.get(OpCode == X86::SETOr ? X86::JO : X86::JB)) + BuildMI(MBB, DL, TII.get(OpCode == X86::SETOr ? + X86::JO_4 : X86::JB_4)) .addMBB(TrueMBB); FastEmitBranch(FalseMBB); MBB->addSuccessor(TrueMBB); @@ -939,7 +940,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) { if (OpReg == 0) return false; BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(OpReg).addReg(OpReg); - BuildMI(MBB, DL, TII.get(X86::JNE)).addMBB(TrueMBB); + BuildMI(MBB, DL, TII.get(X86::JNE_4)).addMBB(TrueMBB); FastEmitBranch(FalseMBB); MBB->addSuccessor(TrueMBB); return true; @@ -948,7 +949,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) { bool X86FastISel::X86SelectShift(Instruction *I) { unsigned CReg = 0, OpReg = 0, OpImm = 0; const TargetRegisterClass *RC = NULL; - if (I->getType()->isInteger(8)) { + if (I->getType()->isIntegerTy(8)) { CReg = X86::CL; RC = &X86::GR8RegClass; switch (I->getOpcode()) { @@ -957,7 +958,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { case Instruction::Shl: OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break; default: return false; } - } else if (I->getType()->isInteger(16)) { + } else if (I->getType()->isIntegerTy(16)) { CReg = X86::CX; RC = &X86::GR16RegClass; switch (I->getOpcode()) { @@ -966,7 +967,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { case Instruction::Shl: OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break; default: return false; } - } else if (I->getType()->isInteger(32)) { + } else if (I->getType()->isIntegerTy(32)) { CReg = X86::ECX; RC = &X86::GR32RegClass; switch (I->getOpcode()) { @@ -975,7 +976,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { case Instruction::Shl: OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break; default: return false; } - } else if (I->getType()->isInteger(64)) { + } else if (I->getType()->isIntegerTy(64)) { CReg = X86::RCX; RC = &X86::GR64RegClass; switch (I->getOpcode()) { @@ -1012,7 +1013,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { // of X86::CL, emit an EXTRACT_SUBREG to precisely describe what // we're doing here. if (CReg != X86::CL) - BuildMI(MBB, DL, TII.get(TargetInstrInfo::EXTRACT_SUBREG), X86::CL) + BuildMI(MBB, DL, TII.get(TargetOpcode::EXTRACT_SUBREG), X86::CL) .addReg(CReg).addImm(X86::SUBREG_8BIT); unsigned ResultReg = createResultReg(RC); @@ -1153,6 +1154,17 @@ bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) { // FIXME: Handle more intrinsics. switch (I.getIntrinsicID()) { default: return false; + case Intrinsic::dbg_declare: { + DbgDeclareInst *DI = cast<DbgDeclareInst>(&I); + X86AddressMode AM; + assert(DI->getAddress() && "Null address should be checked earlier!"); + if (!X86SelectAddress(DI->getAddress(), AM)) + return false; + const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); + addFullAddress(BuildMI(MBB, DL, II), AM).addImm(0). + addMetadata(DI->getVariable()); + return true; + } case Intrinsic::trap: { BuildMI(MBB, DL, TII.get(X86::TRAP)); return true; @@ -1236,7 +1248,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) { // fastcc with -tailcallopt is intended to provide a guaranteed // tail call optimization. Fastisel doesn't know how to do that. - if (CC == CallingConv::Fast && PerformTailCallOpt) + if (CC == CallingConv::Fast && GuaranteedTailCallOpt) return false; // Let SDISel handle vararg functions. diff --git a/lib/Target/X86/X86FixupKinds.h b/lib/Target/X86/X86FixupKinds.h new file mode 100644 index 0000000..c8dac3c --- /dev/null +++ b/lib/Target/X86/X86FixupKinds.h @@ -0,0 +1,25 @@ +//===-- X86/X86FixupKinds.h - X86 Specific Fixup Entries --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_X86_X86FIXUPKINDS_H +#define LLVM_X86_X86FIXUPKINDS_H + +#include "llvm/MC/MCFixup.h" + +namespace llvm { +namespace X86 { +enum Fixups { + reloc_pcrel_4byte = FirstTargetFixupKind, // 32-bit pcrel, e.g. a branch. + reloc_pcrel_1byte, // 8-bit pcrel, e.g. branch_1 + reloc_riprel_4byte // 32-bit rip-relative +}; +} +} + +#endif diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 503ac14..6d6fe77 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -235,7 +235,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { unsigned Flags = MI->getDesc().TSFlags; unsigned FPInstClass = Flags & X86II::FPTypeMask; - if (MI->getOpcode() == TargetInstrInfo::INLINEASM) + if (MI->isInlineAsm()) FPInstClass = X86II::SpecialFP; if (FPInstClass == X86II::NotFP) @@ -1083,7 +1083,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { } } break; - case TargetInstrInfo::INLINEASM: { + case TargetOpcode::INLINEASM: { // The inline asm MachineInstr currently only *uses* FP registers for the // 'f' constraint. These should be turned into the current ST(x) register // in the machine instr. Also, any kills should be explicitly popped after diff --git a/lib/Target/X86/X86FloatingPointRegKill.cpp b/lib/Target/X86/X86FloatingPointRegKill.cpp index 34a0045..6a117dd 100644 --- a/lib/Target/X86/X86FloatingPointRegKill.cpp +++ b/lib/Target/X86/X86FloatingPointRegKill.cpp @@ -118,7 +118,7 @@ bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) { for (BasicBlock::const_iterator II = SI->begin(); (PN = dyn_cast<PHINode>(II)); ++II) { if (PN->getType()==Type::getX86_FP80Ty(LLVMBB->getContext()) || - (!Subtarget.hasSSE1() && PN->getType()->isFloatingPoint()) || + (!Subtarget.hasSSE1() && PN->getType()->isFloatingPointTy()) || (!Subtarget.hasSSE2() && PN->getType()==Type::getDoubleTy(LLVMBB->getContext()))) { ContainsFPCode = true; diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 91e0483..7b349f6 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -183,8 +183,9 @@ namespace { virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF); - virtual - bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U, SDNode *Root) const; + virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const; + + virtual bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root) const; // Include the pieces autogenerated from the target description. #include "X86GenDAGISel.inc" @@ -303,11 +304,18 @@ namespace { } -bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, - SDNode *Root) const { +bool +X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const { if (OptLevel == CodeGenOpt::None) return false; - if (U == Root) + if (!N.hasOneUse()) + return false; + + if (N.getOpcode() != ISD::LOAD) + return true; + + // If N is a load, do additional profitability checks. + if (U == Root) { switch (U->getOpcode()) { default: break; case X86ISD::ADD: @@ -354,9 +362,17 @@ bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, } } } + } + + return true; +} + + +bool X86DAGToDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root) const { + if (OptLevel == CodeGenOpt::None) return false; // Proceed to 'generic' cycle finder code - return SelectionDAGISel::IsLegalAndProfitableToFold(N, U, Root); + return SelectionDAGISel::IsLegalToFold(N, U, Root); } /// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand @@ -652,9 +668,10 @@ void X86DAGToDAGISel::PreprocessForFPConvert() { // FIXME: optimize the case where the src/dest is a load or store? SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0), - MemTmp, NULL, 0, MemVT); + MemTmp, NULL, 0, MemVT, + false, false, 0); SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp, - NULL, 0, MemVT); + NULL, 0, MemVT, false, false, 0); // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the // extload we created. This will cause general havok on the dag because @@ -1310,8 +1327,8 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Op, SDValue Pred, InChain = N.getOperand(0).getValue(1); if (ISD::isNON_EXTLoad(InChain.getNode()) && InChain.getValue(0).hasOneUse() && - N.hasOneUse() && - IsLegalAndProfitableToFold(N.getNode(), Pred.getNode(), Op)) { + IsProfitableToFold(N, Pred.getNode(), Op) && + IsLegalToFold(N, Pred.getNode(), Op)) { LoadSDNode *LD = cast<LoadSDNode>(InChain); if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) return false; @@ -1435,8 +1452,8 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, SDValue &Index, SDValue &Disp, SDValue &Segment) { if (ISD::isNON_EXTLoad(N.getNode()) && - N.hasOneUse() && - IsLegalAndProfitableToFold(N.getNode(), P, P)) + IsProfitableToFold(N, P, P) && + IsLegalToFold(N, P, P)) return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment); return false; } @@ -1606,7 +1623,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { } DebugLoc dl = Node->getDebugLoc(); - SDValue Undef = SDValue(CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF, + SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, NVT), 0); MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); @@ -1652,8 +1669,8 @@ static bool HasNoSignedComparisonUses(SDNode *N) { case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr: case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm: case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm: - case X86::JA: case X86::JAE: case X86::JB: case X86::JBE: - case X86::JE: case X86::JNE: case X86::JP: case X86::JNP: + case X86::JA_4: case X86::JAE_4: case X86::JB_4: case X86::JBE_4: + case X86::JE_4: case X86::JNE_4: case X86::JP_4: case X86::JNP_4: case X86::CMOVA16rr: case X86::CMOVA16rm: case X86::CMOVA32rr: case X86::CMOVA32rm: case X86::CMOVA64rr: case X86::CMOVA64rm: diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 11e07df..9974d8c 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -12,9 +12,11 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "x86-isel" #include "X86.h" #include "X86InstrBuilder.h" #include "X86ISelLowering.h" +#include "X86MCTargetExpr.h" #include "X86TargetMachine.h" #include "X86TargetObjectFile.h" #include "llvm/CallingConv.h" @@ -26,24 +28,30 @@ #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" #include "llvm/LLVMContext.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/VectorExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetOptions.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/VectorExtras.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +STATISTIC(NumTailCalls, "Number of tail calls"); + static cl::opt<bool> DisableMMX("disable-mmx", cl::Hidden, cl::desc("Disable use of MMX")); @@ -67,13 +75,14 @@ static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) { return new X8664_MachoTargetObjectFile(); return new X8632_MachoTargetObjectFile(); case X86Subtarget::isELF: - return new TargetLoweringObjectFileELF(); + if (TM.getSubtarget<X86Subtarget>().is64Bit()) + return new X8664_ELFTargetObjectFile(TM); + return new X8632_ELFTargetObjectFile(TM); case X86Subtarget::isMingw: case X86Subtarget::isCygwin: case X86Subtarget::isWindows: return new TargetLoweringObjectFileCOFF(); } - } X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) @@ -747,6 +756,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i8, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom); + // Custom lower build_vector, vector_shuffle, and extract_vector_elt. for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; ++i) { EVT VT = (MVT::SimpleValueType)i; @@ -987,19 +1002,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) computeRegisterProperties(); - // Divide and reminder operations have no vector equivalent and can - // trap. Do a custom widening for these operations in which we never - // generate more divides/remainder than the original vector width. - for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { - if (!isTypeLegal((MVT::SimpleValueType)VT)) { - setOperationAction(ISD::SDIV, (MVT::SimpleValueType) VT, Custom); - setOperationAction(ISD::UDIV, (MVT::SimpleValueType) VT, Custom); - setOperationAction(ISD::SREM, (MVT::SimpleValueType) VT, Custom); - setOperationAction(ISD::UREM, (MVT::SimpleValueType) VT, Custom); - } - } - // FIXME: These should be based on subtarget info. Plus, the values should // be smaller when we are in optimizing for size mode. maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores @@ -1084,12 +1086,46 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align, return MVT::i32; } +/// getJumpTableEncoding - Return the entry encoding for a jump table in the +/// current function. The returned value is a member of the +/// MachineJumpTableInfo::JTEntryKind enum. +unsigned X86TargetLowering::getJumpTableEncoding() const { + // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF + // symbol. + if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && + Subtarget->isPICStyleGOT()) + return MachineJumpTableInfo::EK_Custom32; + + // Otherwise, use the normal jump table encoding heuristics. + return TargetLowering::getJumpTableEncoding(); +} + +/// getPICBaseSymbol - Return the X86-32 PIC base. +MCSymbol * +X86TargetLowering::getPICBaseSymbol(const MachineFunction *MF, + MCContext &Ctx) const { + const MCAsmInfo &MAI = *getTargetMachine().getMCAsmInfo(); + return Ctx.GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix())+ + Twine(MF->getFunctionNumber())+"$pb"); +} + + +const MCExpr * +X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, + const MachineBasicBlock *MBB, + unsigned uid,MCContext &Ctx) const{ + assert(getTargetMachine().getRelocationModel() == Reloc::PIC_ && + Subtarget->isPICStyleGOT()); + // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF + // entries. + return X86MCTargetExpr::Create(MBB->getSymbol(Ctx), + X86MCTargetExpr::GOTOFF, Ctx); +} + /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC /// jumptable. SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table, - SelectionDAG &DAG) const { - if (usesGlobalOffsetTable()) - return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy()); + SelectionDAG &DAG) const { if (!Subtarget->is64Bit()) // This doesn't have DebugLoc associated with it, but is not really the // same as a Register. @@ -1098,6 +1134,20 @@ SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table, return Table; } +/// getPICJumpTableRelocBaseExpr - This returns the relocation base for the +/// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an +/// MCExpr. +const MCExpr *X86TargetLowering:: +getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, + MCContext &Ctx) const { + // X86-64 uses RIP relative addressing based on the jump table label. + if (Subtarget->isPICStyleRIPRel()) + return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx); + + // Otherwise, the reference is relative to the PIC base. + return MCSymbolRefExpr::Create(getPICBaseSymbol(MF, Ctx), Ctx); +} + /// getFunctionAlignment - Return the Log2 alignment of this function. unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const { return F->hasFnAttr(Attribute::OptimizeForSize) ? 0 : 4; @@ -1131,13 +1181,11 @@ X86TargetLowering::LowerReturn(SDValue Chain, RVLocs, *DAG.getContext()); CCInfo.AnalyzeReturn(Outs, RetCC_X86); - // If this is the first return lowered for this function, add the regs to the - // liveout set for the function. - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { - for (unsigned i = 0; i != RVLocs.size(); ++i) - if (RVLocs[i].isRegLoc()) - DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); - } + // Add the regs to the liveout set for the function. + MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); + for (unsigned i = 0; i != RVLocs.size(); ++i) + if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) + MRI.addLiveOut(RVLocs[i].getLocReg()); SDValue Flag; @@ -1190,7 +1238,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); unsigned Reg = FuncInfo->getSRetReturnReg(); if (!Reg) { - Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64)); + Reg = MRI.createVirtualRegister(getRegClassFor(MVT::i64)); FuncInfo->setSRetReturnReg(Reg); } SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy()); @@ -1199,7 +1247,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, Flag = Chain.getValue(1); // RAX now acts like a return value. - MF.getRegInfo().addLiveOut(X86::RAX); + MRI.addLiveOut(X86::RAX); } RetOps[0] = Chain; // Update chain. @@ -1329,7 +1377,7 @@ bool X86TargetLowering::IsCalleePop(bool IsVarArg, CallingConv::ID CallingConv){ case CallingConv::X86_FastCall: return !Subtarget->is64Bit(); case CallingConv::Fast: - return PerformTailCallOpt; + return GuaranteedTailCallOpt; } } @@ -1351,18 +1399,6 @@ CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const { return CC_X86_32_C; } -/// NameDecorationForCallConv - Selects the appropriate decoration to -/// apply to a MachineFunction containing a given calling convention. -NameDecorationStyle -X86TargetLowering::NameDecorationForCallConv(CallingConv::ID CallConv) { - if (CallConv == CallingConv::X86_FastCall) - return FastCall; - else if (CallConv == CallingConv::X86_StdCall) - return StdCall; - return None; -} - - /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified /// by "Src" to address "Dst" with size and alignment information specified by /// the specific parameter attribute. The copy will be passed as a byval @@ -1376,6 +1412,12 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, /*AlwaysInline=*/true, NULL, 0, NULL, 0); } +/// FuncIsMadeTailCallSafe - Return true if the function is being made into +/// a tailcall target by changing its ABI. +static bool FuncIsMadeTailCallSafe(CallingConv::ID CC) { + return GuaranteedTailCallOpt && CC == CallingConv::Fast; +} + SDValue X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, @@ -1384,10 +1426,9 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, const CCValAssign &VA, MachineFrameInfo *MFI, unsigned i) { - // Create the nodes corresponding to a load from this parameter slot. ISD::ArgFlagsTy Flags = Ins[i].Flags; - bool AlwaysUseMutable = (CallConv==CallingConv::Fast) && PerformTailCallOpt; + bool AlwaysUseMutable = FuncIsMadeTailCallSafe(CallConv); bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); EVT ValVT; @@ -1402,13 +1443,18 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, // changed with more analysis. // In case of tail call optimization mark all arguments mutable. Since they // could be overwritten by lowering of arguments in case of a tail call. - int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, - VA.getLocMemOffset(), isImmutable, false); - SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); - if (Flags.isByVal()) - return FIN; - return DAG.getLoad(ValVT, dl, Chain, FIN, - PseudoSourceValue::getFixedStack(FI), 0); + if (Flags.isByVal()) { + int FI = MFI->CreateFixedObject(Flags.getByValSize(), + VA.getLocMemOffset(), isImmutable, false); + return DAG.getFrameIndex(FI, getPointerTy()); + } else { + int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, + VA.getLocMemOffset(), isImmutable, false); + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + return DAG.getLoad(ValVT, dl, Chain, FIN, + PseudoSourceValue::getFixedStack(FI), 0, + false, false, 0); + } } SDValue @@ -1429,9 +1475,6 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, Fn->getName() == "main") FuncInfo->setForceFramePointer(true); - // Decorate the function name. - FuncInfo->setDecorationStyle(NameDecorationForCallConv(CallConv)); - MachineFrameInfo *MFI = MF.getFrameInfo(); bool Is64Bit = Subtarget->is64Bit(); bool IsWin64 = Subtarget->isTargetWin64(); @@ -1504,7 +1547,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, // If value is passed via pointer - do a load. if (VA.getLocInfo() == CCValAssign::Indirect) - ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, NULL, 0); + ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, NULL, 0, + false, false, 0); InVals.push_back(ArgValue); } @@ -1524,8 +1568,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, } unsigned StackSize = CCInfo.getNextStackOffset(); - // align stack specially for tail calls - if (PerformTailCallOpt && CallConv == CallingConv::Fast) + // Align stack specially for tail calls. + if (FuncIsMadeTailCallSafe(CallConv)) StackSize = GetAlignedArgumentStackSize(StackSize, DAG); // If the function takes variable number of arguments, make a frame index for @@ -1599,7 +1643,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, PseudoSourceValue::getFixedStack(RegSaveFrameIndex), - Offset); + Offset, false, false, 0); MemOps.push_back(Store); Offset += 8; } @@ -1636,13 +1680,11 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, // Some CCs need callee pop. if (IsCalleePop(isVarArg, CallConv)) { BytesToPopOnReturn = StackSize; // Callee pops everything. - BytesCallerReserves = 0; } else { BytesToPopOnReturn = 0; // Callee pops nothing. // If this is an sret function, the return should pop the hidden pointer. if (!Is64Bit && CallConv != CallingConv::Fast && ArgsAreStructReturn(Ins)) BytesToPopOnReturn = 4; - BytesCallerReserves = StackSize; } if (!Is64Bit) { @@ -1670,27 +1712,23 @@ X86TargetLowering::LowerMemOpCallTo(SDValue Chain, return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); } return DAG.getStore(Chain, dl, Arg, PtrOff, - PseudoSourceValue::getStack(), LocMemOffset); + PseudoSourceValue::getStack(), LocMemOffset, + false, false, 0); } /// EmitTailCallLoadRetAddr - Emit a load of return address if tail call /// optimization is performed and it is required. SDValue X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG, - SDValue &OutRetAddr, - SDValue Chain, - bool IsTailCall, - bool Is64Bit, - int FPDiff, - DebugLoc dl) { - if (!IsTailCall || FPDiff==0) return Chain; - + SDValue &OutRetAddr, SDValue Chain, + bool IsTailCall, bool Is64Bit, + int FPDiff, DebugLoc dl) { // Adjust the Return address stack slot. EVT VT = getPointerTy(); OutRetAddr = getReturnAddressFrameIndex(DAG); // Load the "old" Return address. - OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, NULL, 0); + OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, NULL, 0, false, false, 0); return SDValue(OutRetAddr.getNode(), 1); } @@ -1705,31 +1743,42 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF, // Calculate the new stack slot for the return address. int SlotSize = Is64Bit ? 8 : 4; int NewReturnAddrFI = - MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, - true, false); + MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, true,false); EVT VT = Is64Bit ? MVT::i64 : MVT::i32; SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT); Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx, - PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0); + PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0, + false, false, 0); return Chain; } SDValue X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool isTailCall, + bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) { - MachineFunction &MF = DAG.getMachineFunction(); bool Is64Bit = Subtarget->is64Bit(); bool IsStructRet = CallIsStructReturn(Outs); + bool IsSibcall = false; + + if (isTailCall) { + // Check if it's really possible to do a tail call. + isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, + Outs, Ins, DAG); + + // Sibcalls are automatically detected tailcalls which do not require + // ABI changes. + if (!GuaranteedTailCallOpt && isTailCall) + IsSibcall = true; + + if (isTailCall) + ++NumTailCalls; + } - assert((!isTailCall || - (CallConv == CallingConv::Fast && PerformTailCallOpt)) && - "IsEligibleForTailCallOptimization missed a case!"); assert(!(isVarArg && CallConv == CallingConv::Fast) && "Var args not supported with calling convention fastcc"); @@ -1741,11 +1790,15 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); - if (PerformTailCallOpt && CallConv == CallingConv::Fast) + if (IsSibcall) + // This is a sibcall. The memory operands are available in caller's + // own caller's stack. + NumBytes = 0; + else if (GuaranteedTailCallOpt && CallConv == CallingConv::Fast) NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); int FPDiff = 0; - if (isTailCall) { + if (isTailCall && !IsSibcall) { // Lower arguments at fp - stackoffset + fpdiff. unsigned NumBytesCallerPushed = MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn(); @@ -1757,12 +1810,14 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff); } - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); + if (!IsSibcall) + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); SDValue RetAddrFrIdx; // Load return adress for tail calls. - Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall, Is64Bit, - FPDiff, dl); + if (isTailCall && FPDiff) + Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall, + Is64Bit, FPDiff, dl); SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; SmallVector<SDValue, 8> MemOpChains; @@ -1804,7 +1859,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT()); int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); Chain = DAG.getStore(Chain, dl, Arg, SpillSlot, - PseudoSourceValue::getFixedStack(FI), 0); + PseudoSourceValue::getFixedStack(FI), 0, + false, false, 0); Arg = SpillSlot; break; } @@ -1812,15 +1868,12 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); - } else { - if (!isTailCall || (isTailCall && isByVal)) { - assert(VA.isMemLoc()); - if (StackPtr.getNode() == 0) - StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, getPointerTy()); - - MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, - dl, DAG, VA, Flags)); - } + } else if (!IsSibcall && (!isTailCall || isByVal)) { + assert(VA.isMemLoc()); + if (StackPtr.getNode() == 0) + StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, getPointerTy()); + MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, + dl, DAG, VA, Flags)); } } @@ -1840,7 +1893,6 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, InFlag = Chain.getValue(1); } - if (Subtarget->isPICStyleGOT()) { // ELF / PIC requires GOT in the EBX register before function calls via PLT // GOT pointer. @@ -1910,9 +1962,11 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, int FI = 0; // Do not flag preceeding copytoreg stuff together with the following stuff. InFlag = SDValue(); - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - CCValAssign &VA = ArgLocs[i]; - if (!VA.isRegLoc()) { + if (GuaranteedTailCallOpt) { + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + if (VA.isRegLoc()) + continue; assert(VA.isMemLoc()); SDValue Arg = Outs[i].Val; ISD::ArgFlagsTy Flags = Outs[i].Flags; @@ -1937,7 +1991,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Store relative to framepointer. MemOpChains2.push_back( DAG.getStore(ArgChain, dl, Arg, FIN, - PseudoSourceValue::getFixedStack(FI), 0)); + PseudoSourceValue::getFixedStack(FI), 0, + false, false, 0)); } } } @@ -2020,21 +2075,22 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, } if (isTailCall && !WasGlobalOrExternal) { - unsigned Opc = Is64Bit ? X86::R11 : X86::EAX; - + // Force the address into a (call preserved) caller-saved register since + // tailcall must happen after callee-saved registers are poped. + // FIXME: Give it a special register class that contains caller-saved + // register instead? + unsigned TCReg = Is64Bit ? X86::R11 : X86::EAX; Chain = DAG.getCopyToReg(Chain, dl, - DAG.getRegister(Opc, getPointerTy()), + DAG.getRegister(TCReg, getPointerTy()), Callee,InFlag); - Callee = DAG.getRegister(Opc, getPointerTy()); - // Add register as live out. - MF.getRegInfo().addLiveOut(Opc); + Callee = DAG.getRegister(TCReg, getPointerTy()); } // Returns a chain & a flag for retval copy to use. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); SmallVector<SDValue, 8> Ops; - if (isTailCall) { + if (!IsSibcall && isTailCall) { Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), DAG.getIntPtrConstant(0, true), InFlag); InFlag = Chain.getValue(1); @@ -2095,7 +2151,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (IsCalleePop(isVarArg, CallConv)) NumBytesForCalleeToPush = NumBytes; // Callee pops everything else if (!Is64Bit && CallConv != CallingConv::Fast && IsStructRet) - // If this is is a call to a struct-return function, the callee + // If this is a call to a struct-return function, the callee // pops the hidden struct pointer, so we have to push it back. // This is common for Darwin/X86, Linux & Mingw32 targets. NumBytesForCalleeToPush = 4; @@ -2103,12 +2159,14 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, NumBytesForCalleeToPush = 0; // Callee pops nothing. // Returns a flag for retval copy to use. - Chain = DAG.getCALLSEQ_END(Chain, - DAG.getIntPtrConstant(NumBytes, true), - DAG.getIntPtrConstant(NumBytesForCalleeToPush, - true), - InFlag); - InFlag = Chain.getValue(1); + if (!IsSibcall) { + Chain = DAG.getCALLSEQ_END(Chain, + DAG.getIntPtrConstant(NumBytes, true), + DAG.getIntPtrConstant(NumBytesForCalleeToPush, + true), + InFlag); + InFlag = Chain.getValue(1); + } // Handle result values, copying them out of physregs into vregs that we // return. @@ -2170,6 +2228,50 @@ unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize, return Offset; } +/// MatchingStackOffset - Return true if the given stack call argument is +/// already available in the same position (relatively) of the caller's +/// incoming argument stack. +static +bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, + MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, + const X86InstrInfo *TII) { + int FI; + if (Arg.getOpcode() == ISD::CopyFromReg) { + unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); + if (!VR || TargetRegisterInfo::isPhysicalRegister(VR)) + return false; + MachineInstr *Def = MRI->getVRegDef(VR); + if (!Def) + return false; + if (!Flags.isByVal()) { + if (!TII->isLoadFromStackSlot(Def, FI)) + return false; + } else { + unsigned Opcode = Def->getOpcode(); + if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r) && + Def->getOperand(1).isFI()) { + FI = Def->getOperand(1).getIndex(); + if (MFI->getObjectSize(FI) != Flags.getByValSize()) + return false; + } else + return false; + } + } else { + LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg); + if (!Ld) + return false; + SDValue Ptr = Ld->getBasePtr(); + FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); + if (!FINode) + return false; + FI = FINode->getIndex(); + } + + if (!MFI->isFixedObjectIndex(FI)) + return false; + return Offset == MFI->getObjectOffset(FI); +} + /// IsEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. Targets which want to do tail call /// optimization should implement this function. @@ -2177,23 +2279,79 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG& DAG) const { - MachineFunction &MF = DAG.getMachineFunction(); - CallingConv::ID CallerCC = MF.getFunction()->getCallingConv(); - return CalleeCC == CallingConv::Fast && CallerCC == CalleeCC; + if (CalleeCC != CallingConv::Fast && + CalleeCC != CallingConv::C) + return false; + + // If -tailcallopt is specified, make fastcc functions tail-callable. + const Function *CallerF = DAG.getMachineFunction().getFunction(); + if (GuaranteedTailCallOpt) { + if (CalleeCC == CallingConv::Fast && + CallerF->getCallingConv() == CalleeCC) + return true; + return false; + } + + // Look for obvious safe cases to perform tail call optimization that does not + // requite ABI changes. This is what gcc calls sibcall. + + // Do not tail call optimize vararg calls for now. + if (isVarArg) + return false; + + // If the callee takes no arguments then go on to check the results of the + // call. + if (!Outs.empty()) { + // Check if stack adjustment is needed. For now, do not do this if any + // argument is passed on the stack. + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(), + ArgLocs, *DAG.getContext()); + CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC)); + if (CCInfo.getNextStackOffset()) { + MachineFunction &MF = DAG.getMachineFunction(); + if (MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) + return false; + if (Subtarget->isTargetWin64()) + // Win64 ABI has additional complications. + return false; + + // Check if the arguments are already laid out in the right way as + // the caller's fixed stack objects. + MachineFrameInfo *MFI = MF.getFrameInfo(); + const MachineRegisterInfo *MRI = &MF.getRegInfo(); + const X86InstrInfo *TII = + ((X86TargetMachine&)getTargetMachine()).getInstrInfo(); + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + EVT RegVT = VA.getLocVT(); + SDValue Arg = Outs[i].Val; + ISD::ArgFlagsTy Flags = Outs[i].Flags; + if (VA.getLocInfo() == CCValAssign::Indirect) + return false; + if (!VA.isRegLoc()) { + if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, + MFI, MRI, TII)) + return false; + } + } + } + } + + return true; } FastISel * -X86TargetLowering::createFastISel(MachineFunction &mf, - MachineModuleInfo *mmo, - DwarfWriter *dw, - DenseMap<const Value *, unsigned> &vm, - DenseMap<const BasicBlock *, - MachineBasicBlock *> &bm, - DenseMap<const AllocaInst *, int> &am +X86TargetLowering::createFastISel(MachineFunction &mf, MachineModuleInfo *mmo, + DwarfWriter *dw, + DenseMap<const Value *, unsigned> &vm, + DenseMap<const BasicBlock*, MachineBasicBlock*> &bm, + DenseMap<const AllocaInst *, int> &am #ifndef NDEBUG - , SmallSet<Instruction*, 8> &cil + , SmallSet<Instruction*, 8> &cil #endif ) { return X86::createFastISel(mf, mmo, dw, vm, bm, am @@ -3413,7 +3571,8 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl, int EltNo = (Offset - StartOffset) >> 2; int Mask[4] = { EltNo, EltNo, EltNo, EltNo }; EVT VT = (PVT == MVT::i32) ? MVT::v4i32 : MVT::v4f32; - SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,LD->getSrcValue(),0); + SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,LD->getSrcValue(),0, + false, false, 0); // Canonicalize it to a v4i32 shuffle. V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, V1); return DAG.getNode(ISD::BIT_CONVERT, dl, VT, @@ -3686,6 +3845,33 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { return SDValue(); } +SDValue +X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { + // We support concatenate two MMX registers and place them in a MMX + // register. This is better than doing a stack convert. + DebugLoc dl = Op.getDebugLoc(); + EVT ResVT = Op.getValueType(); + assert(Op.getNumOperands() == 2); + assert(ResVT == MVT::v2i64 || ResVT == MVT::v4i32 || + ResVT == MVT::v8i16 || ResVT == MVT::v16i8); + int Mask[2]; + SDValue InVec = DAG.getNode(ISD::BIT_CONVERT,dl, MVT::v1i64, Op.getOperand(0)); + SDValue VecOp = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec); + InVec = Op.getOperand(1); + if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { + unsigned NumElts = ResVT.getVectorNumElements(); + VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, ResVT, VecOp); + VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ResVT, VecOp, + InVec.getOperand(0), DAG.getIntPtrConstant(NumElts/2+1)); + } else { + InVec = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v1i64, InVec); + SDValue VecOp2 = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec); + Mask[0] = 0; Mask[1] = 2; + VecOp = DAG.getVectorShuffle(MVT::v2i64, dl, VecOp, VecOp2, Mask); + } + return DAG.getNode(ISD::BIT_CONVERT, dl, ResVT, VecOp); +} + // v8i16 shuffles - Prefer shuffles in the following order: // 1. [all] pshuflw, pshufhw, optional move // 2. [ssse3] 1 x pshufb @@ -4885,7 +5071,7 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl, // load. if (isGlobalStubReference(OpFlags)) Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result, - PseudoSourceValue::getGOT(), 0); + PseudoSourceValue::getGOT(), 0, false, false, 0); // If there was a non-zero offset that we didn't fold, create an explicit // addition for it. @@ -4965,7 +5151,7 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, MVT::i32)); SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Base, - NULL, 0); + NULL, 0, false, false, 0); unsigned char OperandFlags = 0; // Most TLS accesses are not RIP relative, even on x86-64. One exception is @@ -4990,7 +5176,7 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, if (model == TLSModel::InitialExec) Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset, - PseudoSourceValue::getGOT(), 0); + PseudoSourceValue::getGOT(), 0, false, false, 0); // The address of the thread local variable is the add of the thread // pointer with the offset of the variable. @@ -5107,7 +5293,8 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), StackSlot, - PseudoSourceValue::getFixedStack(SSFI), 0); + PseudoSourceValue::getFixedStack(SSFI), 0, + false, false, 0); return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG); } @@ -5142,7 +5329,8 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, }; Chain = DAG.getNode(X86ISD::FST, dl, Tys, Ops, array_lengthof(Ops)); Result = DAG.getLoad(Op.getValueType(), dl, Chain, StackSlot, - PseudoSourceValue::getFixedStack(SSFI), 0); + PseudoSourceValue::getFixedStack(SSFI), 0, + false, false, 0); } return Result; @@ -5215,12 +5403,12 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) { SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, XR1, XR2); SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0, PseudoSourceValue::getConstantPool(), 0, - false, 16); + false, false, 16); SDValue Unpck2 = getUnpackl(DAG, dl, MVT::v4i32, Unpck1, CLod0); SDValue XR2F = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Unpck2); SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1, PseudoSourceValue::getConstantPool(), 0, - false, 16); + false, false, 16); SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1); // Add the halves; easiest way is to swap them into another reg first. @@ -5307,9 +5495,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) { SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackSlot, WordOff); SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), - StackSlot, NULL, 0); + StackSlot, NULL, 0, false, false, 0); SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32), - OffsetSlot, NULL, 0); + OffsetSlot, NULL, 0, false, false, 0); return BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG); } @@ -5357,7 +5545,8 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) { if (isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) { assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!"); Chain = DAG.getStore(Chain, dl, Value, StackSlot, - PseudoSourceValue::getFixedStack(SSFI), 0); + PseudoSourceValue::getFixedStack(SSFI), 0, + false, false, 0); SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other); SDValue Ops[] = { Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType()) @@ -5391,7 +5580,7 @@ SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { // Load the result. return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(), - FIST, StackSlot, NULL, 0); + FIST, StackSlot, NULL, 0, false, false, 0); } SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) { @@ -5401,7 +5590,7 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) { // Load the result. return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(), - FIST, StackSlot, NULL, 0); + FIST, StackSlot, NULL, 0, false, false, 0); } SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) { @@ -5426,8 +5615,8 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) { Constant *C = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, - false, 16); + PseudoSourceValue::getConstantPool(), 0, + false, false, 16); return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask); } @@ -5453,8 +5642,8 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) { Constant *C = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, - false, 16); + PseudoSourceValue::getConstantPool(), 0, + false, false, 16); if (VT.isVector()) { return DAG.getNode(ISD::BIT_CONVERT, dl, VT, DAG.getNode(ISD::XOR, dl, MVT::v2i64, @@ -5502,8 +5691,8 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { Constant *C = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, - false, 16); + PseudoSourceValue::getConstantPool(), 0, + false, false, 16); SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, SrcVT, Op1, Mask1); // Shift sign bit right or left if the two operands have different types. @@ -5531,8 +5720,8 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { C = ConstantVector::get(CV); CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Mask2 = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, - false, 16); + PseudoSourceValue::getConstantPool(), 0, + false, false, 16); SDValue Val = DAG.getNode(X86ISD::FAND, dl, VT, Op0, Mask2); // Or the value with the sign bit. @@ -5919,6 +6108,29 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) { Cond = NewCond; } + // (select (x == 0), -1, 0) -> (sign_bit (x - 1)) + SDValue Op1 = Op.getOperand(1); + SDValue Op2 = Op.getOperand(2); + if (Cond.getOpcode() == X86ISD::SETCC && + cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue() == X86::COND_E) { + SDValue Cmp = Cond.getOperand(1); + if (Cmp.getOpcode() == X86ISD::CMP) { + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(Op1); + ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(Op2); + ConstantSDNode *RHSC = + dyn_cast<ConstantSDNode>(Cmp.getOperand(1).getNode()); + if (N1C && N1C->isAllOnesValue() && + N2C && N2C->isNullValue() && + RHSC && RHSC->isNullValue()) { + SDValue CmpOp0 = Cmp.getOperand(0); + Cmp = DAG.getNode(X86ISD::CMP, dl, CmpOp0.getValueType(), + CmpOp0, DAG.getConstant(1, CmpOp0.getValueType())); + return DAG.getNode(X86ISD::SETCC_CARRY, dl, Op.getValueType(), + DAG.getConstant(X86::COND_B, MVT::i8), Cmp); + } + } + } + // Look pass (and (setcc_carry (cmp ...)), 1). if (Cond.getOpcode() == ISD::AND && Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) { @@ -5971,10 +6183,10 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) { Cond = EmitTest(Cond, X86::COND_NE, DAG); } - SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Flag); // X86ISD::CMOV means set the result (which is operand 1) to the RHS if // condition is true. - SDValue Ops[] = { Op.getOperand(2), Op.getOperand(1), CC, Cond }; + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Flag); + SDValue Ops[] = { Op2, Op1, CC, Cond }; return DAG.getNode(X86ISD::CMOV, dl, VTs, Ops, array_lengthof(Ops)); } @@ -6417,7 +6629,8 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); - return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0); + return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0, + false, false, 0); } // __va_list_tag: @@ -6429,8 +6642,8 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { SDValue FIN = Op.getOperand(1); // Store gp_offset SDValue Store = DAG.getStore(Op.getOperand(0), dl, - DAG.getConstant(VarArgsGPOffset, MVT::i32), - FIN, SV, 0); + DAG.getConstant(VarArgsGPOffset, MVT::i32), + FIN, SV, 0, false, false, 0); MemOps.push_back(Store); // Store fp_offset @@ -6438,21 +6651,23 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { FIN, DAG.getIntPtrConstant(4)); Store = DAG.getStore(Op.getOperand(0), dl, DAG.getConstant(VarArgsFPOffset, MVT::i32), - FIN, SV, 0); + FIN, SV, 0, false, false, 0); MemOps.push_back(Store); // Store ptr to overflow_arg_area FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, DAG.getIntPtrConstant(4)); SDValue OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); - Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 0); + Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 0, + false, false, 0); MemOps.push_back(Store); // Store ptr to reg_save_area. FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, DAG.getIntPtrConstant(8)); SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); - Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 0); + Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 0, + false, false, 0); MemOps.push_back(Store); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOps[0], MemOps.size()); @@ -6738,13 +6953,13 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, getPointerTy(), FrameAddr, Offset), - NULL, 0); + NULL, 0, false, false, 0); } // Just load the return address. SDValue RetAddrFI = getReturnAddressFrameIndex(DAG); return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - RetAddrFI, NULL, 0); + RetAddrFI, NULL, 0, false, false, 0); } SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { @@ -6756,7 +6971,8 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP; SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); while (Depth--) - FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0); + FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0, + false, false, 0); return FrameAddr; } @@ -6780,7 +6996,7 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) SDValue StoreAddr = DAG.getNode(ISD::SUB, dl, getPointerTy(), Frame, DAG.getIntPtrConstant(-TD->getPointerSize())); StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset); - Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, NULL, 0); + Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, NULL, 0, false, false, 0); Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr); MF.getRegInfo().addLiveOut(StoreAddrReg); @@ -6799,16 +7015,12 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); - const X86InstrInfo *TII = - ((X86TargetMachine&)getTargetMachine()).getInstrInfo(); - if (Subtarget->is64Bit()) { SDValue OutChains[6]; // Large code-model. - - const unsigned char JMP64r = TII->getBaseOpcodeFor(X86::JMP64r); - const unsigned char MOV64ri = TII->getBaseOpcodeFor(X86::MOV64ri); + const unsigned char JMP64r = 0xFF; // 64-bit jmp through register opcode. + const unsigned char MOV64ri = 0xB8; // X86::MOV64ri opcode. const unsigned char N86R10 = RegInfo->getX86RegNum(X86::R10); const unsigned char N86R11 = RegInfo->getX86RegNum(X86::R11); @@ -6819,11 +7031,12 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11 SDValue Addr = Trmp; OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16), - Addr, TrmpAddr, 0); + Addr, TrmpAddr, 0, false, false, 0); Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(2, MVT::i64)); - OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr, TrmpAddr, 2, false, 2); + OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr, TrmpAddr, 2, + false, false, 2); // Load the 'nest' parameter value into R10. // R10 is specified in X86CallingConv.td @@ -6831,24 +7044,25 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(10, MVT::i64)); OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16), - Addr, TrmpAddr, 10); + Addr, TrmpAddr, 10, false, false, 0); Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(12, MVT::i64)); - OutChains[3] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 12, false, 2); + OutChains[3] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 12, + false, false, 2); // Jump to the nested function. OpCode = (JMP64r << 8) | REX_WB; // jmpq *... Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(20, MVT::i64)); OutChains[4] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16), - Addr, TrmpAddr, 20); + Addr, TrmpAddr, 20, false, false, 0); unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11 Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(22, MVT::i64)); OutChains[5] = DAG.getStore(Root, dl, DAG.getConstant(ModRM, MVT::i8), Addr, - TrmpAddr, 22); + TrmpAddr, 22, false, false, 0); SDValue Ops[] = { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6) }; @@ -6903,25 +7117,28 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, DAG.getConstant(10, MVT::i32)); Disp = DAG.getNode(ISD::SUB, dl, MVT::i32, FPtr, Addr); - const unsigned char MOV32ri = TII->getBaseOpcodeFor(X86::MOV32ri); + // This is storing the opcode for MOV32ri. + const unsigned char MOV32ri = 0xB8; // X86::MOV32ri's opcode byte. const unsigned char N86Reg = RegInfo->getX86RegNum(NestReg); OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(MOV32ri|N86Reg, MVT::i8), - Trmp, TrmpAddr, 0); + Trmp, TrmpAddr, 0, false, false, 0); Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, DAG.getConstant(1, MVT::i32)); - OutChains[1] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 1, false, 1); + OutChains[1] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 1, + false, false, 1); - const unsigned char JMP = TII->getBaseOpcodeFor(X86::JMP); + const unsigned char JMP = 0xE9; // jmp <32bit dst> opcode. Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, DAG.getConstant(5, MVT::i32)); OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(JMP, MVT::i8), Addr, - TrmpAddr, 5, false, 1); + TrmpAddr, 5, false, false, 1); Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, DAG.getConstant(6, MVT::i32)); - OutChains[3] = DAG.getStore(Root, dl, Disp, Addr, TrmpAddr, 6, false, 1); + OutChains[3] = DAG.getStore(Root, dl, Disp, Addr, TrmpAddr, 6, + false, false, 1); SDValue Ops[] = { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 4) }; @@ -6964,7 +7181,8 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) { DAG.getEntryNode(), StackSlot); // Load FP Control Word from stack slot - SDValue CWD = DAG.getLoad(MVT::i16, dl, Chain, StackSlot, NULL, 0); + SDValue CWD = DAG.getLoad(MVT::i16, dl, Chain, StackSlot, NULL, 0, + false, false, 0); // Transform as necessary SDValue CWD1 = @@ -7238,6 +7456,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG); case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); + case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); @@ -7327,7 +7546,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, if (FIST.getNode() != 0) { EVT VT = N->getValueType(0); // Return a load from the stack slot. - Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, NULL, 0)); + Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, NULL, 0, + false, false, 0)); } return; } @@ -7345,14 +7565,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(edx.getValue(1)); return; } - case ISD::SDIV: - case ISD::UDIV: - case ISD::SREM: - case ISD::UREM: { - EVT WidenVT = getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - Results.push_back(DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements())); - return; - } case ISD::ATOMIC_CMP_SWAP: { EVT T = N->getValueType(0); assert (T == MVT::i64 && "Only know how to expand i64 Cmp and Swap"); @@ -7551,7 +7763,7 @@ bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const { - if (!Ty1->isInteger() || !Ty2->isInteger()) + if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) return false; unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); @@ -7572,7 +7784,7 @@ bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { bool X86TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const { // x86-64 implicitly zero-extends 32-bit results in 64-bit registers. - return Ty1->isInteger(32) && Ty2->isInteger(64) && Subtarget->is64Bit(); + return Ty1->isIntegerTy(32) && Ty2->isIntegerTy(64) && Subtarget->is64Bit(); } bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const { @@ -7728,7 +7940,7 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, MIB.addReg(EAXreg); // insert branch - BuildMI(newMBB, dl, TII->get(X86::JNE)).addMBB(newMBB); + BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB); F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now. return nextMBB; @@ -7885,7 +8097,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, MIB.addReg(X86::EDX); // insert branch - BuildMI(newMBB, dl, TII->get(X86::JNE)).addMBB(newMBB); + BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB); F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now. return nextMBB; @@ -7988,7 +8200,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, MIB.addReg(X86::EAX); // insert branch - BuildMI(newMBB, dl, TII->get(X86::JNE)).addMBB(newMBB); + BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB); F->DeleteMachineInstr(mInstr); // The pseudo instruction is gone now. return nextMBB; @@ -8070,7 +8282,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( if (!Subtarget->isTargetWin64()) { // If %al is 0, branch around the XMM save block. BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg); - BuildMI(MBB, DL, TII->get(X86::JE)).addMBB(EndMBB); + BuildMI(MBB, DL, TII->get(X86::JE_4)).addMBB(EndMBB); MBB->addSuccessor(EndMBB); } @@ -8556,10 +8768,11 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, if (DAG.InferPtrAlignment(LD->getBasePtr()) >= 16) return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), LD->getSrcValueOffset(), - LD->isVolatile()); + LD->isVolatile(), LD->isNonTemporal(), 0); return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), LD->getSrcValueOffset(), - LD->isVolatile(), LD->getAlignment()); + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); } else if (NumElems == 4 && LastLoadedElt == 1) { SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); SDValue Ops[] = { LD->getChain(), LD->getBasePtr() }; @@ -9278,7 +9491,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(), Ld->getSrcValue(), Ld->getSrcValueOffset(), Ld->isVolatile(), - Ld->getAlignment()); + Ld->isNonTemporal(), Ld->getAlignment()); SDValue NewChain = NewLd.getValue(1); if (TokenFactorIndex != -1) { Ops.push_back(NewChain); @@ -9287,7 +9500,8 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, } return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(), St->getSrcValue(), St->getSrcValueOffset(), - St->isVolatile(), St->getAlignment()); + St->isVolatile(), St->isNonTemporal(), + St->getAlignment()); } // Otherwise, lower to two pairs of 32-bit loads / stores. @@ -9297,10 +9511,11 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr, Ld->getSrcValue(), Ld->getSrcValueOffset(), - Ld->isVolatile(), Ld->getAlignment()); + Ld->isVolatile(), Ld->isNonTemporal(), + Ld->getAlignment()); SDValue HiLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), HiAddr, Ld->getSrcValue(), Ld->getSrcValueOffset()+4, - Ld->isVolatile(), + Ld->isVolatile(), Ld->isNonTemporal(), MinAlign(Ld->getAlignment(), 4)); SDValue NewChain = LoLd.getValue(1); @@ -9317,11 +9532,13 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, SDValue LoSt = DAG.getStore(NewChain, StDL, LoLd, LoAddr, St->getSrcValue(), St->getSrcValueOffset(), - St->isVolatile(), St->getAlignment()); + St->isVolatile(), St->isNonTemporal(), + St->getAlignment()); SDValue HiSt = DAG.getStore(NewChain, StDL, HiLd, HiAddr, St->getSrcValue(), St->getSrcValueOffset() + 4, St->isVolatile(), + St->isNonTemporal(), MinAlign(St->getAlignment(), 4)); return DAG.getNode(ISD::TokenFactor, StDL, MVT::Other, LoSt, HiSt); } @@ -9504,7 +9721,7 @@ static bool LowerToBSwap(CallInst *CI) { // Verify this is a simple bswap. if (CI->getNumOperands() != 2 || CI->getType() != CI->getOperand(1)->getType() || - !CI->getType()->isInteger()) + !CI->getType()->isIntegerTy()) return false; const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); @@ -9553,7 +9770,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { return LowerToBSwap(CI); } // rorw $$8, ${0:w} --> llvm.bswap.i16 - if (CI->getType()->isInteger(16) && + if (CI->getType()->isIntegerTy(16) && AsmPieces.size() == 3 && AsmPieces[0] == "rorw" && AsmPieces[1] == "$$8," && @@ -9563,7 +9780,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { } break; case 3: - if (CI->getType()->isInteger(64) && + if (CI->getType()->isIntegerTy(64) && Constraints.size() >= 2 && Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" && Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") { diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 64bc70c..cf0eb40 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -19,6 +19,7 @@ #include "X86RegisterInfo.h" #include "X86MachineFunctionInfo.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetOptions.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/CallingConvLower.h" @@ -156,6 +157,11 @@ namespace llvm { /// relative displacements. WrapperRIP, + /// MOVQ2DQ - Copies a 64-bit value from a vector to another vector. + /// Can be used to move a vector value from a MMX register to a XMM + /// register. + MOVQ2DQ, + /// PEXTRB - Extract an 8-bit value from a vector and zero extend it to /// i32, corresponds to X86::PEXTRB. PEXTRB, @@ -366,25 +372,33 @@ namespace llvm { unsigned VarArgsGPOffset; // X86-64 vararg func int reg offset. unsigned VarArgsFPOffset; // X86-64 vararg func fp reg offset. int BytesToPopOnReturn; // Number of arg bytes ret should pop. - int BytesCallerReserves; // Number of arg bytes caller makes. public: explicit X86TargetLowering(X86TargetMachine &TM); + /// getPICBaseSymbol - Return the X86-32 PIC base. + MCSymbol *getPICBaseSymbol(const MachineFunction *MF, MCContext &Ctx) const; + + virtual unsigned getJumpTableEncoding() const; + + virtual const MCExpr * + LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, + const MachineBasicBlock *MBB, unsigned uid, + MCContext &Ctx) const; + /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC /// jumptable. - SDValue getPICJumpTableRelocBase(SDValue Table, - SelectionDAG &DAG) const; - + virtual SDValue getPICJumpTableRelocBase(SDValue Table, + SelectionDAG &DAG) const; + virtual const MCExpr * + getPICJumpTableRelocBaseExpr(const MachineFunction *MF, + unsigned JTI, MCContext &Ctx) const; + // Return the number of bytes that a function should pop when it returns (in // addition to the space used by the return address). // unsigned getBytesToPopOnReturn() const { return BytesToPopOnReturn; } - // Return the number of bytes that the caller reserves for arguments passed - // to this function. - unsigned getBytesCallerReserves() const { return BytesCallerReserves; } - /// getStackPtrReg - Return the stack pointer register we are using: either /// ESP or RSP. unsigned getStackPtrReg() const { return X86StackPtr; } @@ -532,16 +546,6 @@ namespace llvm { return !X86ScalarSSEf64 || VT == MVT::f80; } - /// IsEligibleForTailCallOptimization - Check whether the call is eligible - /// for tail call optimization. Targets which want to do tail call - /// optimization should implement this function. - virtual bool - IsEligibleForTailCallOptimization(SDValue Callee, - CallingConv::ID CalleeCC, - bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, - SelectionDAG& DAG) const; - virtual const X86Subtarget* getSubtarget() { return Subtarget; } @@ -619,13 +623,22 @@ namespace llvm { ISD::ArgFlagsTy Flags); // Call lowering helpers. + + /// IsEligibleForTailCallOptimization - Check whether the call is eligible + /// for tail call optimization. Targets which want to do tail call + /// optimization should implement this function. + bool IsEligibleForTailCallOptimization(SDValue Callee, + CallingConv::ID CalleeCC, + bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<ISD::InputArg> &Ins, + SelectionDAG& DAG) const; bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv); SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall, bool Is64Bit, int FPDiff, DebugLoc dl); CCAssignFn *CCAssignFnForNode(CallingConv::ID CallConv) const; - NameDecorationStyle NameDecorationForCallConv(CallingConv::ID CallConv); unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG &DAG); std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, @@ -634,6 +647,7 @@ namespace llvm { SDValue LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl, SelectionDAG &DAG); SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG); + SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG); SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG); SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG); SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG); @@ -693,7 +707,7 @@ namespace llvm { SmallVectorImpl<SDValue> &InVals); virtual SDValue LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, bool isTailCall, + CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 9037ba6..4ea3739 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -187,7 +187,7 @@ def TCRETURNri64 : I<0, Pseudo, (outs), (ins GR64:$dst, i32imm:$offset, let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in - def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins GR64:$dst), + def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins GR64:$dst, variable_ops), "jmp{q}\t{*}$dst # TAILCALL", []>; @@ -435,7 +435,7 @@ def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), // up to 64 bits. def def32 : PatLeaf<(i32 GR32:$src), [{ return N->getOpcode() != ISD::TRUNCATE && - N->getOpcode() != TargetInstrInfo::EXTRACT_SUBREG && + N->getOpcode() != TargetOpcode::EXTRACT_SUBREG && N->getOpcode() != ISD::CopyFromReg && N->getOpcode() != X86ISD::CMOV; }]>; @@ -893,35 +893,38 @@ def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst), let isTwoAddress = 1 in { def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src), "rcl{q}\t{1, $dst|$dst, 1}", []>; -def RCL64m1 : RI<0xD1, MRM2m, (outs i64mem:$dst), (ins i64mem:$src), - "rcl{q}\t{1, $dst|$dst, 1}", []>; -let Uses = [CL] in { -def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src), - "rcl{q}\t{%cl, $dst|$dst, CL}", []>; -def RCL64mCL : RI<0xD3, MRM2m, (outs i64mem:$dst), (ins i64mem:$src), - "rcl{q}\t{%cl, $dst|$dst, CL}", []>; -} def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt), "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCL64mi : RIi8<0xC1, MRM2m, (outs i64mem:$dst), - (ins i64mem:$src, i8imm:$cnt), - "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>; def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src), "rcr{q}\t{1, $dst|$dst, 1}", []>; -def RCR64m1 : RI<0xD1, MRM3m, (outs i64mem:$dst), (ins i64mem:$src), - "rcr{q}\t{1, $dst|$dst, 1}", []>; +def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt), + "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>; + let Uses = [CL] in { +def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src), + "rcl{q}\t{%cl, $dst|$dst, CL}", []>; def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src), "rcr{q}\t{%cl, $dst|$dst, CL}", []>; -def RCR64mCL : RI<0xD3, MRM3m, (outs i64mem:$dst), (ins i64mem:$src), - "rcr{q}\t{%cl, $dst|$dst, CL}", []>; } -def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt), - "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCR64mi : RIi8<0xC1, MRM3m, (outs i64mem:$dst), - (ins i64mem:$src, i8imm:$cnt), +} + +let isTwoAddress = 0 in { +def RCL64m1 : RI<0xD1, MRM2m, (outs), (ins i64mem:$dst), + "rcl{q}\t{1, $dst|$dst, 1}", []>; +def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, i8imm:$cnt), + "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCR64m1 : RI<0xD1, MRM3m, (outs), (ins i64mem:$dst), + "rcr{q}\t{1, $dst|$dst, 1}", []>; +def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, i8imm:$cnt), "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>; + +let Uses = [CL] in { +def RCL64mCL : RI<0xD3, MRM2m, (outs), (ins i64mem:$dst), + "rcl{q}\t{%cl, $dst|$dst, CL}", []>; +def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst), + "rcr{q}\t{%cl, $dst|$dst, CL}", []>; +} } let isTwoAddress = 1 in { @@ -1466,9 +1469,13 @@ def CMOVNO64rm : RI<0x41, MRMSrcMem, // if !overflow, GR64 = [mem64] } // isTwoAddress // Use sbb to materialize carry flag into a GPR. +// FIXME: This are pseudo ops that should be replaced with Pat<> patterns. +// However, Pat<> can't replicate the destination reg into the inputs of the +// result. +// FIXME: Change this to have encoding Pseudo when X86MCCodeEmitter replaces +// X86CodeEmitter. let Defs = [EFLAGS], Uses = [EFLAGS], isCodeGenOnly = 1 in -def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), - "sbb{q}\t$dst, $dst", +def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), "", [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), @@ -1606,8 +1613,7 @@ def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins), // when we have a better way to specify isel priority. let Defs = [EFLAGS], AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in -def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), - "", +def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "", [(set GR64:$dst, 0)]>; // Materialize i64 constant where top 32-bits are zero. This could theoretically @@ -1768,7 +1774,7 @@ def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB; -def SWPGS : I<0x01, RawFrm, (outs), (ins), "swpgs", []>, TB; +def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", []>, TB; def PUSHFS64 : I<0xa0, RawFrm, (outs), (ins), "push{q}\t%fs", []>, TB; diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index 71ec178..e22a903 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -339,7 +339,6 @@ def FICOMP32m: FPI<0xDA, MRM3m, (outs), (ins i32mem:$src), "ficomp{l}\t$src">; def FCOM64m : FPI<0xDC, MRM2m, (outs), (ins f64mem:$src), "fcom{ll}\t$src">; def FCOMP64m : FPI<0xDC, MRM3m, (outs), (ins f64mem:$src), "fcomp{ll}\t$src">; -def FISTTP32m: FPI<0xDD, MRM1m, (outs i32mem:$dst), (ins), "fisttp{l}\t$dst">; def FRSTORm : FPI<0xDD, MRM4m, (outs f32mem:$dst), (ins), "frstor\t$dst">; def FSAVEm : FPI<0xDD, MRM6m, (outs f32mem:$dst), (ins), "fnsave\t$dst">; def FNSTSWm : FPI<0xDD, MRM7m, (outs f32mem:$dst), (ins), "fnstsw\t$dst">; diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index a799f16..bb81cbf 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -29,7 +29,16 @@ def MRM0m : Format<24>; def MRM1m : Format<25>; def MRM2m : Format<26>; def MRM3m : Format<27>; def MRM4m : Format<28>; def MRM5m : Format<29>; def MRM6m : Format<30>; def MRM7m : Format<31>; def MRMInitReg : Format<32>; - +def MRM_C1 : Format<33>; +def MRM_C2 : Format<34>; +def MRM_C3 : Format<35>; +def MRM_C4 : Format<36>; +def MRM_C8 : Format<37>; +def MRM_C9 : Format<38>; +def MRM_E8 : Format<39>; +def MRM_F0 : Format<40>; +def MRM_F8 : Format<41>; +def MRM_F9 : Format<42>; // ImmType - This specifies the immediate type used by an instruction. This is // part of the ad-hoc solution used to emit machine instruction encodings by our @@ -37,11 +46,13 @@ def MRMInitReg : Format<32>; class ImmType<bits<3> val> { bits<3> Value = val; } -def NoImm : ImmType<0>; -def Imm8 : ImmType<1>; -def Imm16 : ImmType<2>; -def Imm32 : ImmType<3>; -def Imm64 : ImmType<4>; +def NoImm : ImmType<0>; +def Imm8 : ImmType<1>; +def Imm8PCRel : ImmType<2>; +def Imm16 : ImmType<3>; +def Imm32 : ImmType<4>; +def Imm32PCRel : ImmType<5>; +def Imm64 : ImmType<6>; // FPFormat - This specifies what form this FP instruction has. This is used by // the Floating-Point stackifier pass. @@ -121,6 +132,12 @@ class Ii8 <bits<8> o, Format f, dag outs, dag ins, string asm, let Pattern = pattern; let CodeSize = 3; } +class Ii8PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, + list<dag> pattern> + : X86Inst<o, f, Imm8PCRel, outs, ins, asm> { + let Pattern = pattern; + let CodeSize = 3; +} class Ii16<bits<8> o, Format f, dag outs, dag ins, string asm, list<dag> pattern> : X86Inst<o, f, Imm16, outs, ins, asm> { @@ -134,6 +151,13 @@ class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm, let CodeSize = 3; } +class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, + list<dag> pattern> + : X86Inst<o, f, Imm32PCRel, outs, ins, asm> { + let Pattern = pattern; + let CodeSize = 3; +} + // FPStack Instruction Templates: // FPI - Floating Point Instruction template. class FPI<bits<8> o, Format F, dag outs, dag ins, string asm> diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td new file mode 100644 index 0000000..6b9478d --- /dev/null +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -0,0 +1,62 @@ +//======- X86InstrFragmentsSIMD.td - x86 ISA -------------*- tablegen -*-=====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides pattern fragments useful for SIMD instructions. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MMX Pattern Fragments +//===----------------------------------------------------------------------===// + +def load_mmx : PatFrag<(ops node:$ptr), (v1i64 (load node:$ptr))>; + +def bc_v8i8 : PatFrag<(ops node:$in), (v8i8 (bitconvert node:$in))>; +def bc_v4i16 : PatFrag<(ops node:$in), (v4i16 (bitconvert node:$in))>; +def bc_v2i32 : PatFrag<(ops node:$in), (v2i32 (bitconvert node:$in))>; +def bc_v1i64 : PatFrag<(ops node:$in), (v1i64 (bitconvert node:$in))>; + +//===----------------------------------------------------------------------===// +// MMX Masks +//===----------------------------------------------------------------------===// + +// MMX_SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to +// PSHUFW imm. +def MMX_SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{ + return getI8Imm(X86::getShuffleSHUFImmediate(N)); +}]>; + +// Patterns for: vector_shuffle v1, v2, <2, 6, 3, 7, ...> +def mmx_unpckh : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N)); +}]>; + +// Patterns for: vector_shuffle v1, v2, <0, 4, 2, 5, ...> +def mmx_unpckl : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N)); +}]>; + +// Patterns for: vector_shuffle v1, <undef>, <0, 0, 1, 1, ...> +def mmx_unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); +}]>; + +// Patterns for: vector_shuffle v1, <undef>, <2, 2, 3, 3, ...> +def mmx_unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); +}]>; + +def mmx_pshufw : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N)); +}], MMX_SHUFFLE_get_shuf_imm>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 3ae352c..a0d0312 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -1060,8 +1060,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, const TargetRegisterInfo *TRI) const { - DebugLoc DL = DebugLoc::getUnknownLoc(); - if (I != MBB.end()) DL = I->getDebugLoc(); + DebugLoc DL = MBB.findDebugLoc(I); if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) { DestReg = TRI->getSubReg(DestReg, SubIdx); @@ -1588,44 +1587,44 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { static X86::CondCode GetCondFromBranchOpc(unsigned BrOpc) { switch (BrOpc) { default: return X86::COND_INVALID; - case X86::JE: return X86::COND_E; - case X86::JNE: return X86::COND_NE; - case X86::JL: return X86::COND_L; - case X86::JLE: return X86::COND_LE; - case X86::JG: return X86::COND_G; - case X86::JGE: return X86::COND_GE; - case X86::JB: return X86::COND_B; - case X86::JBE: return X86::COND_BE; - case X86::JA: return X86::COND_A; - case X86::JAE: return X86::COND_AE; - case X86::JS: return X86::COND_S; - case X86::JNS: return X86::COND_NS; - case X86::JP: return X86::COND_P; - case X86::JNP: return X86::COND_NP; - case X86::JO: return X86::COND_O; - case X86::JNO: return X86::COND_NO; + case X86::JE_4: return X86::COND_E; + case X86::JNE_4: return X86::COND_NE; + case X86::JL_4: return X86::COND_L; + case X86::JLE_4: return X86::COND_LE; + case X86::JG_4: return X86::COND_G; + case X86::JGE_4: return X86::COND_GE; + case X86::JB_4: return X86::COND_B; + case X86::JBE_4: return X86::COND_BE; + case X86::JA_4: return X86::COND_A; + case X86::JAE_4: return X86::COND_AE; + case X86::JS_4: return X86::COND_S; + case X86::JNS_4: return X86::COND_NS; + case X86::JP_4: return X86::COND_P; + case X86::JNP_4: return X86::COND_NP; + case X86::JO_4: return X86::COND_O; + case X86::JNO_4: return X86::COND_NO; } } unsigned X86::GetCondBranchFromCond(X86::CondCode CC) { switch (CC) { default: llvm_unreachable("Illegal condition code!"); - case X86::COND_E: return X86::JE; - case X86::COND_NE: return X86::JNE; - case X86::COND_L: return X86::JL; - case X86::COND_LE: return X86::JLE; - case X86::COND_G: return X86::JG; - case X86::COND_GE: return X86::JGE; - case X86::COND_B: return X86::JB; - case X86::COND_BE: return X86::JBE; - case X86::COND_A: return X86::JA; - case X86::COND_AE: return X86::JAE; - case X86::COND_S: return X86::JS; - case X86::COND_NS: return X86::JNS; - case X86::COND_P: return X86::JP; - case X86::COND_NP: return X86::JNP; - case X86::COND_O: return X86::JO; - case X86::COND_NO: return X86::JNO; + case X86::COND_E: return X86::JE_4; + case X86::COND_NE: return X86::JNE_4; + case X86::COND_L: return X86::JL_4; + case X86::COND_LE: return X86::JLE_4; + case X86::COND_G: return X86::JG_4; + case X86::COND_GE: return X86::JGE_4; + case X86::COND_B: return X86::JB_4; + case X86::COND_BE: return X86::JBE_4; + case X86::COND_A: return X86::JA_4; + case X86::COND_AE: return X86::JAE_4; + case X86::COND_S: return X86::JS_4; + case X86::COND_NS: return X86::JNS_4; + case X86::COND_P: return X86::JP_4; + case X86::COND_NP: return X86::JNP_4; + case X86::COND_O: return X86::JO_4; + case X86::COND_NO: return X86::JNO_4; } } @@ -1695,7 +1694,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, return true; // Handle unconditional branches. - if (I->getOpcode() == X86::JMP) { + if (I->getOpcode() == X86::JMP_4) { if (!AllowModify) { TBB = I->getOperand(0).getMBB(); continue; @@ -1779,7 +1778,7 @@ unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { while (I != MBB.begin()) { --I; - if (I->getOpcode() != X86::JMP && + if (I->getOpcode() != X86::JMP_4 && GetCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID) break; // Remove the branch. @@ -1805,7 +1804,7 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, if (Cond.empty()) { // Unconditional branch? assert(!FBB && "Unconditional branch with multiple successors!"); - BuildMI(&MBB, dl, get(X86::JMP)).addMBB(TBB); + BuildMI(&MBB, dl, get(X86::JMP_4)).addMBB(TBB); return 1; } @@ -1815,16 +1814,16 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, switch (CC) { case X86::COND_NP_OR_E: // Synthesize NP_OR_E with two branches. - BuildMI(&MBB, dl, get(X86::JNP)).addMBB(TBB); + BuildMI(&MBB, dl, get(X86::JNP_4)).addMBB(TBB); ++Count; - BuildMI(&MBB, dl, get(X86::JE)).addMBB(TBB); + BuildMI(&MBB, dl, get(X86::JE_4)).addMBB(TBB); ++Count; break; case X86::COND_NE_OR_P: // Synthesize NE_OR_P with two branches. - BuildMI(&MBB, dl, get(X86::JNE)).addMBB(TBB); + BuildMI(&MBB, dl, get(X86::JNE_4)).addMBB(TBB); ++Count; - BuildMI(&MBB, dl, get(X86::JP)).addMBB(TBB); + BuildMI(&MBB, dl, get(X86::JP_4)).addMBB(TBB); ++Count; break; default: { @@ -1835,7 +1834,7 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, } if (FBB) { // Two-way Conditional branch. Insert the second branch. - BuildMI(&MBB, dl, get(X86::JMP)).addMBB(FBB); + BuildMI(&MBB, dl, get(X86::JMP_4)).addMBB(FBB); ++Count; } return Count; @@ -1851,8 +1850,7 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, const TargetRegisterClass *SrcRC) const { - DebugLoc DL = DebugLoc::getUnknownLoc(); - if (MI != MBB.end()) DL = MI->getDebugLoc(); + DebugLoc DL = MBB.findDebugLoc(MI); // Determine if DstRC and SrcRC have a common superclass in common. const TargetRegisterClass *CommonRC = DestRC; @@ -2079,8 +2077,7 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, const MachineFunction &MF = *MBB.getParent(); bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF); unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); - DebugLoc DL = DebugLoc::getUnknownLoc(); - if (MI != MBB.end()) DL = MI->getDebugLoc(); + DebugLoc DL = MBB.findDebugLoc(MI); addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx) .addReg(SrcReg, getKillRegState(isKill)); } @@ -2173,8 +2170,7 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, const MachineFunction &MF = *MBB.getParent(); bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF); unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); - DebugLoc DL = DebugLoc::getUnknownLoc(); - if (MI != MBB.end()) DL = MI->getDebugLoc(); + DebugLoc DL = MBB.findDebugLoc(MI); addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx); } @@ -3018,22 +3014,11 @@ isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass); } -unsigned X86InstrInfo::sizeOfImm(const TargetInstrDesc *Desc) { - switch (Desc->TSFlags & X86II::ImmMask) { - case X86II::Imm8: return 1; - case X86II::Imm16: return 2; - case X86II::Imm32: return 4; - case X86II::Imm64: return 8; - default: llvm_unreachable("Immediate size not set!"); - return 0; - } -} -/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended register? -/// e.g. r8, xmm8, etc. -bool X86InstrInfo::isX86_64ExtendedReg(const MachineOperand &MO) { - if (!MO.isReg()) return false; - switch (MO.getReg()) { +/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or higher) +/// register? e.g. r8, xmm8, xmm13, etc. +bool X86InstrInfo::isX86_64ExtendedReg(unsigned RegNo) { + switch (RegNo) { default: break; case X86::R8: case X86::R9: case X86::R10: case X86::R11: case X86::R12: case X86::R13: case X86::R14: case X86::R15: @@ -3387,24 +3372,24 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, switch (Opcode) { default: break; - case TargetInstrInfo::INLINEASM: { + case TargetOpcode::INLINEASM: { const MachineFunction *MF = MI.getParent()->getParent(); const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo(); FinalSize += TII.getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MF->getTarget().getMCAsmInfo()); break; } - case TargetInstrInfo::DBG_LABEL: - case TargetInstrInfo::EH_LABEL: + case TargetOpcode::DBG_LABEL: + case TargetOpcode::EH_LABEL: break; - case TargetInstrInfo::IMPLICIT_DEF: - case TargetInstrInfo::KILL: + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: case X86::FP_REG_KILL: break; case X86::MOVPC32r: { // This emits the "call" portion of this pseudo instruction. ++FinalSize; - FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); + FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags)); break; } } @@ -3422,7 +3407,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, } else if (MO.isSymbol()) { FinalSize += sizeExternalSymbolAddress(false); } else if (MO.isImm()) { - FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); + FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags)); } else { llvm_unreachable("Unknown RawFrm operand!"); } @@ -3435,7 +3420,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, if (CurOp != NumOps) { const MachineOperand &MO1 = MI.getOperand(CurOp++); - unsigned Size = X86InstrInfo::sizeOfImm(Desc); + unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); if (MO1.isImm()) FinalSize += sizeConstant(Size); else { @@ -3460,7 +3445,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, CurOp += 2; if (CurOp != NumOps) { ++CurOp; - FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); + FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags)); } break; } @@ -3470,7 +3455,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, CurOp += X86AddrNumOperands + 1; if (CurOp != NumOps) { ++CurOp; - FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); + FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags)); } break; } @@ -3481,7 +3466,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, CurOp += 2; if (CurOp != NumOps) { ++CurOp; - FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); + FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags)); } break; @@ -3498,7 +3483,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, CurOp += AddrOperands + 1; if (CurOp != NumOps) { ++CurOp; - FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); + FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags)); } break; } @@ -3523,7 +3508,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, if (CurOp != NumOps) { const MachineOperand &MO1 = MI.getOperand(CurOp++); - unsigned Size = X86InstrInfo::sizeOfImm(Desc); + unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); if (MO1.isImm()) FinalSize += sizeConstant(Size); else { @@ -3553,7 +3538,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, if (CurOp != NumOps) { const MachineOperand &MO = MI.getOperand(CurOp++); - unsigned Size = X86InstrInfo::sizeOfImm(Desc); + unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); if (MO.isImm()) FinalSize += sizeConstant(Size); else { @@ -3571,6 +3556,14 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, } } break; + + case X86II::MRM_C1: + case X86II::MRM_C8: + case X86II::MRM_C9: + case X86II::MRM_E8: + case X86II::MRM_F0: + FinalSize += 2; + break; } case X86II::MRMInitReg: @@ -3619,8 +3612,7 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { // Insert the set of GlobalBaseReg into the first MBB of the function MachineBasicBlock &FirstMBB = MF->front(); MachineBasicBlock::iterator MBBI = FirstMBB.begin(); - DebugLoc DL = DebugLoc::getUnknownLoc(); - if (MBBI != FirstMBB.end()) DL = MBBI->getDebugLoc(); + DebugLoc DL = FirstMBB.findDebugLoc(MBBI); MachineRegisterInfo &RegInfo = MF->getRegInfo(); unsigned PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass); diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 4f35d0d..5111719 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -18,7 +18,6 @@ #include "X86.h" #include "X86RegisterInfo.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/Target/TargetRegisterInfo.h" namespace llvm { class X86RegisterInfo; @@ -269,6 +268,18 @@ namespace X86II { // MRMInitReg - This form is used for instructions whose source and // destinations are the same register. MRMInitReg = 32, + + //// MRM_C1 - A mod/rm byte of exactly 0xC1. + MRM_C1 = 33, + MRM_C2 = 34, + MRM_C3 = 35, + MRM_C4 = 36, + MRM_C8 = 37, + MRM_C9 = 38, + MRM_E8 = 39, + MRM_F0 = 40, + MRM_F8 = 41, + MRM_F9 = 42, FormMask = 63, @@ -332,11 +343,13 @@ namespace X86II { // This three-bit field describes the size of an immediate operand. Zero is // unused so that we can tell if we forgot to set a value. ImmShift = 13, - ImmMask = 7 << ImmShift, - Imm8 = 1 << ImmShift, - Imm16 = 2 << ImmShift, - Imm32 = 3 << ImmShift, - Imm64 = 4 << ImmShift, + ImmMask = 7 << ImmShift, + Imm8 = 1 << ImmShift, + Imm8PCRel = 2 << ImmShift, + Imm16 = 3 << ImmShift, + Imm32 = 4 << ImmShift, + Imm32PCRel = 5 << ImmShift, + Imm64 = 6 << ImmShift, //===------------------------------------------------------------------===// // FP Instruction Classification... Zero is non-fp instruction. @@ -389,6 +402,47 @@ namespace X86II { OpcodeShift = 24, OpcodeMask = 0xFF << OpcodeShift }; + + // getBaseOpcodeFor - This function returns the "base" X86 opcode for the + // specified machine instruction. + // + static inline unsigned char getBaseOpcodeFor(unsigned TSFlags) { + return TSFlags >> X86II::OpcodeShift; + } + + static inline bool hasImm(unsigned TSFlags) { + return (TSFlags & X86II::ImmMask) != 0; + } + + /// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field + /// of the specified instruction. + static inline unsigned getSizeOfImm(unsigned TSFlags) { + switch (TSFlags & X86II::ImmMask) { + default: assert(0 && "Unknown immediate size"); + case X86II::Imm8: + case X86II::Imm8PCRel: return 1; + case X86II::Imm16: return 2; + case X86II::Imm32: + case X86II::Imm32PCRel: return 4; + case X86II::Imm64: return 8; + } + } + + /// isImmPCRel - Return true if the immediate of the specified instruction's + /// TSFlags indicates that it is pc relative. + static inline unsigned isImmPCRel(unsigned TSFlags) { + switch (TSFlags & X86II::ImmMask) { + default: assert(0 && "Unknown immediate size"); + case X86II::Imm8PCRel: + case X86II::Imm32PCRel: + return true; + case X86II::Imm8: + case X86II::Imm16: + case X86II::Imm32: + case X86II::Imm64: + return false; + } + } } const int X86AddrNumOperands = 5; @@ -637,25 +691,21 @@ public: /// instruction that defines the specified register class. bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const; - // getBaseOpcodeFor - This function returns the "base" X86 opcode for the - // specified machine instruction. - // - unsigned char getBaseOpcodeFor(const TargetInstrDesc *TID) const { - return TID->TSFlags >> X86II::OpcodeShift; - } - unsigned char getBaseOpcodeFor(unsigned Opcode) const { - return getBaseOpcodeFor(&get(Opcode)); - } - static bool isX86_64NonExtLowByteReg(unsigned reg) { return (reg == X86::SPL || reg == X86::BPL || reg == X86::SIL || reg == X86::DIL); } - static unsigned sizeOfImm(const TargetInstrDesc *Desc); - static bool isX86_64ExtendedReg(const MachineOperand &MO); + static bool isX86_64ExtendedReg(const MachineOperand &MO) { + if (!MO.isReg()) return false; + return isX86_64ExtendedReg(MO.getReg()); + } static unsigned determineREX(const MachineInstr &MI); + /// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or + /// higher) register? e.g. r8, xmm8, xmm13, etc. + static bool isX86_64ExtendedReg(unsigned RegNo); + /// GetInstSize - Returns the size of the specified MachineInstr. /// virtual unsigned GetInstSizeInBytes(const MachineInstr *MI) const; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 396cb53..25cd297 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -182,10 +182,6 @@ def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>; // X86 Operand Definitions. // -def i32imm_pcrel : Operand<i32> { - let PrintMethod = "print_pcrel_imm"; -} - // A version of ptr_rc which excludes SP, ESP, and RSP. This is used for // the index operand of an address, to conform to x86 encoding restrictions. def ptr_rc_nosp : PointerLikeRegClass<1>; @@ -196,6 +192,14 @@ def X86MemAsmOperand : AsmOperandClass { let Name = "Mem"; let SuperClass = ?; } +def X86AbsMemAsmOperand : AsmOperandClass { + let Name = "AbsMem"; + let SuperClass = X86MemAsmOperand; +} +def X86NoSegMemAsmOperand : AsmOperandClass { + let Name = "NoSegMem"; + let SuperClass = X86MemAsmOperand; +} class X86MemOperand<string printMethod> : Operand<iPTR> { let PrintMethod = printMethod; let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm); @@ -207,11 +211,6 @@ def opaque48mem : X86MemOperand<"printopaquemem">; def opaque80mem : X86MemOperand<"printopaquemem">; def opaque512mem : X86MemOperand<"printopaquemem">; -def offset8 : Operand<i64> { let PrintMethod = "print_pcrel_imm"; } -def offset16 : Operand<i64> { let PrintMethod = "print_pcrel_imm"; } -def offset32 : Operand<i64> { let PrintMethod = "print_pcrel_imm"; } -def offset64 : Operand<i64> { let PrintMethod = "print_pcrel_imm"; } - def i8mem : X86MemOperand<"printi8mem">; def i16mem : X86MemOperand<"printi16mem">; def i32mem : X86MemOperand<"printi32mem">; @@ -235,7 +234,22 @@ def i8mem_NOREX : Operand<i64> { def lea32mem : Operand<i32> { let PrintMethod = "printlea32mem"; let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm); - let ParserMatchClass = X86MemAsmOperand; + let ParserMatchClass = X86NoSegMemAsmOperand; +} + +let ParserMatchClass = X86AbsMemAsmOperand, + PrintMethod = "print_pcrel_imm" in { +def i32imm_pcrel : Operand<i32>; + +def offset8 : Operand<i64>; +def offset16 : Operand<i64>; +def offset32 : Operand<i64>; +def offset64 : Operand<i64>; + +// Branch targets have OtherVT type and print as pc-relative values. +def brtarget : Operand<OtherVT>; +def brtarget8 : Operand<OtherVT>; + } def SSECC : Operand<i8> { @@ -257,15 +271,6 @@ def i32i8imm : Operand<i32> { let ParserMatchClass = ImmSExt8AsmOperand; } -// Branch targets have OtherVT type and print as pc-relative values. -def brtarget : Operand<OtherVT> { - let PrintMethod = "print_pcrel_imm"; -} - -def brtarget8 : Operand<OtherVT> { - let PrintMethod = "print_pcrel_imm"; -} - //===----------------------------------------------------------------------===// // X86 Complex Pattern Definitions. // @@ -591,7 +596,7 @@ let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in "", []>; //===----------------------------------------------------------------------===// -// Control Flow Instructions... +// Control Flow Instructions. // // Return instructions. @@ -609,16 +614,46 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, "lret\t$amt", []>; } -// All branches are RawFrm, Void, Branch, and Terminators -let isBranch = 1, isTerminator = 1 in - class IBr<bits<8> opcode, dag ins, string asm, list<dag> pattern> : - I<opcode, RawFrm, (outs), ins, asm, pattern>; +// Unconditional branches. +let isBarrier = 1, isBranch = 1, isTerminator = 1 in { + def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst), + "jmp\t$dst", [(br bb:$dst)]>; + def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst), + "jmp\t$dst", []>; +} -let isBranch = 1, isBarrier = 1 in { - def JMP : IBr<0xE9, (ins brtarget:$dst), "jmp\t$dst", [(br bb:$dst)]>; - def JMP8 : IBr<0xEB, (ins brtarget8:$dst), "jmp\t$dst", []>; +// Conditional Branches. +let isBranch = 1, isTerminator = 1, Uses = [EFLAGS] in { + multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> { + def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, []>; + def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget:$dst), asm, + [(X86brcond bb:$dst, Cond, EFLAGS)]>, TB; + } } +defm JO : ICBr<0x70, 0x80, "jo\t$dst" , X86_COND_O>; +defm JNO : ICBr<0x71, 0x81, "jno\t$dst" , X86_COND_NO>; +defm JB : ICBr<0x72, 0x82, "jb\t$dst" , X86_COND_B>; +defm JAE : ICBr<0x73, 0x83, "jae\t$dst", X86_COND_AE>; +defm JE : ICBr<0x74, 0x84, "je\t$dst" , X86_COND_E>; +defm JNE : ICBr<0x75, 0x85, "jne\t$dst", X86_COND_NE>; +defm JBE : ICBr<0x76, 0x86, "jbe\t$dst", X86_COND_BE>; +defm JA : ICBr<0x77, 0x87, "ja\t$dst" , X86_COND_A>; +defm JS : ICBr<0x78, 0x88, "js\t$dst" , X86_COND_S>; +defm JNS : ICBr<0x79, 0x89, "jns\t$dst", X86_COND_NS>; +defm JP : ICBr<0x7A, 0x8A, "jp\t$dst" , X86_COND_P>; +defm JNP : ICBr<0x7B, 0x8B, "jnp\t$dst", X86_COND_NP>; +defm JL : ICBr<0x7C, 0x8C, "jl\t$dst" , X86_COND_L>; +defm JGE : ICBr<0x7D, 0x8D, "jge\t$dst", X86_COND_GE>; +defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>; +defm JG : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>; + +// FIXME: What about the CX/RCX versions of this instruction? +let Uses = [ECX], isBranch = 1, isTerminator = 1 in + def JCXZ8 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst), + "jcxz\t$dst", []>; + + // Indirect branches let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst", @@ -639,63 +674,6 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { "ljmp{l}\t{*}$dst", []>; } -// Conditional branches -let Uses = [EFLAGS] in { -// Short conditional jumps -def JO8 : IBr<0x70, (ins brtarget8:$dst), "jo\t$dst", []>; -def JNO8 : IBr<0x71, (ins brtarget8:$dst), "jno\t$dst", []>; -def JB8 : IBr<0x72, (ins brtarget8:$dst), "jb\t$dst", []>; -def JAE8 : IBr<0x73, (ins brtarget8:$dst), "jae\t$dst", []>; -def JE8 : IBr<0x74, (ins brtarget8:$dst), "je\t$dst", []>; -def JNE8 : IBr<0x75, (ins brtarget8:$dst), "jne\t$dst", []>; -def JBE8 : IBr<0x76, (ins brtarget8:$dst), "jbe\t$dst", []>; -def JA8 : IBr<0x77, (ins brtarget8:$dst), "ja\t$dst", []>; -def JS8 : IBr<0x78, (ins brtarget8:$dst), "js\t$dst", []>; -def JNS8 : IBr<0x79, (ins brtarget8:$dst), "jns\t$dst", []>; -def JP8 : IBr<0x7A, (ins brtarget8:$dst), "jp\t$dst", []>; -def JNP8 : IBr<0x7B, (ins brtarget8:$dst), "jnp\t$dst", []>; -def JL8 : IBr<0x7C, (ins brtarget8:$dst), "jl\t$dst", []>; -def JGE8 : IBr<0x7D, (ins brtarget8:$dst), "jge\t$dst", []>; -def JLE8 : IBr<0x7E, (ins brtarget8:$dst), "jle\t$dst", []>; -def JG8 : IBr<0x7F, (ins brtarget8:$dst), "jg\t$dst", []>; - -def JCXZ8 : IBr<0xE3, (ins brtarget8:$dst), "jcxz\t$dst", []>; - -def JE : IBr<0x84, (ins brtarget:$dst), "je\t$dst", - [(X86brcond bb:$dst, X86_COND_E, EFLAGS)]>, TB; -def JNE : IBr<0x85, (ins brtarget:$dst), "jne\t$dst", - [(X86brcond bb:$dst, X86_COND_NE, EFLAGS)]>, TB; -def JL : IBr<0x8C, (ins brtarget:$dst), "jl\t$dst", - [(X86brcond bb:$dst, X86_COND_L, EFLAGS)]>, TB; -def JLE : IBr<0x8E, (ins brtarget:$dst), "jle\t$dst", - [(X86brcond bb:$dst, X86_COND_LE, EFLAGS)]>, TB; -def JG : IBr<0x8F, (ins brtarget:$dst), "jg\t$dst", - [(X86brcond bb:$dst, X86_COND_G, EFLAGS)]>, TB; -def JGE : IBr<0x8D, (ins brtarget:$dst), "jge\t$dst", - [(X86brcond bb:$dst, X86_COND_GE, EFLAGS)]>, TB; - -def JB : IBr<0x82, (ins brtarget:$dst), "jb\t$dst", - [(X86brcond bb:$dst, X86_COND_B, EFLAGS)]>, TB; -def JBE : IBr<0x86, (ins brtarget:$dst), "jbe\t$dst", - [(X86brcond bb:$dst, X86_COND_BE, EFLAGS)]>, TB; -def JA : IBr<0x87, (ins brtarget:$dst), "ja\t$dst", - [(X86brcond bb:$dst, X86_COND_A, EFLAGS)]>, TB; -def JAE : IBr<0x83, (ins brtarget:$dst), "jae\t$dst", - [(X86brcond bb:$dst, X86_COND_AE, EFLAGS)]>, TB; - -def JS : IBr<0x88, (ins brtarget:$dst), "js\t$dst", - [(X86brcond bb:$dst, X86_COND_S, EFLAGS)]>, TB; -def JNS : IBr<0x89, (ins brtarget:$dst), "jns\t$dst", - [(X86brcond bb:$dst, X86_COND_NS, EFLAGS)]>, TB; -def JP : IBr<0x8A, (ins brtarget:$dst), "jp\t$dst", - [(X86brcond bb:$dst, X86_COND_P, EFLAGS)]>, TB; -def JNP : IBr<0x8B, (ins brtarget:$dst), "jnp\t$dst", - [(X86brcond bb:$dst, X86_COND_NP, EFLAGS)]>, TB; -def JO : IBr<0x80, (ins brtarget:$dst), "jo\t$dst", - [(X86brcond bb:$dst, X86_COND_O, EFLAGS)]>, TB; -def JNO : IBr<0x81, (ins brtarget:$dst), "jno\t$dst", - [(X86brcond bb:$dst, X86_COND_NO, EFLAGS)]>, TB; -} // Uses = [EFLAGS] // Loop instructions @@ -716,7 +694,7 @@ let isCall = 1 in XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], Uses = [ESP] in { - def CALLpcrel32 : Ii32<0xE8, RawFrm, + def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm, (outs), (ins i32imm_pcrel:$dst,variable_ops), "call\t$dst", []>; def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops), @@ -756,15 +734,18 @@ def TCRETURNri : I<0, Pseudo, (outs), "#TC_RETURN $dst $offset", []>; -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in - def TAILJMPd : IBr<0xE9, (ins i32imm_pcrel:$dst), "jmp\t$dst # TAILCALL", +// FIXME: The should be pseudo instructions that are lowered when going to +// mcinst. +let isCall = 1, isBranch = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in + def TAILJMPd : Ii32<0xE9, RawFrm, (outs),(ins i32imm_pcrel:$dst,variable_ops), + "jmp\t$dst # TAILCALL", []>; let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in - def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32:$dst), + def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32:$dst, variable_ops), "jmp{l}\t{*}$dst # TAILCALL", []>; let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in - def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), + def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem:$dst, variable_ops), "jmp\t{*}$dst # TAILCALL", []>; //===----------------------------------------------------------------------===// @@ -877,7 +858,7 @@ def LEA32r : I<0x8D, MRMSrcMem, "lea{l}\t{$src|$dst}, {$dst|$src}", [(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>; -let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI] in { +let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in { def REP_MOVSB : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}", [(X86rep_movs i8)]>, REP; def REP_MOVSW : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}", @@ -886,16 +867,31 @@ def REP_MOVSD : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}", [(X86rep_movs i32)]>, REP; } -let Defs = [ECX,EDI], Uses = [AL,ECX,EDI] in +// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI +let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in { +def MOVSB : I<0xA4, RawFrm, (outs), (ins), "{movsb}", []>; +def MOVSW : I<0xA5, RawFrm, (outs), (ins), "{movsw}", []>, OpSize; +def MOVSD : I<0xA5, RawFrm, (outs), (ins), "{movsl|movsd}", []>; +} + +let Defs = [ECX,EDI], Uses = [AL,ECX,EDI], isCodeGenOnly = 1 in def REP_STOSB : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}", [(X86rep_stos i8)]>, REP; -let Defs = [ECX,EDI], Uses = [AX,ECX,EDI] in +let Defs = [ECX,EDI], Uses = [AX,ECX,EDI], isCodeGenOnly = 1 in def REP_STOSW : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}", [(X86rep_stos i16)]>, REP, OpSize; -let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI] in +let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI], isCodeGenOnly = 1 in def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}", [(X86rep_stos i32)]>, REP; +// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI +let Defs = [EDI], Uses = [AL,EDI,EFLAGS] in +def STOSB : I<0xAA, RawFrm, (outs), (ins), "{stosb}", []>; +let Defs = [EDI], Uses = [AX,EDI,EFLAGS] in +def STOSW : I<0xAB, RawFrm, (outs), (ins), "{stosw}", []>, OpSize; +let Defs = [EDI], Uses = [EAX,EDI,EFLAGS] in +def STOSD : I<0xAB, RawFrm, (outs), (ins), "{stosl|stosd}", []>; + def SCAS8 : I<0xAE, RawFrm, (outs), (ins), "scas{b}", []>; def SCAS16 : I<0xAF, RawFrm, (outs), (ins), "scas{w}", []>, OpSize; def SCAS32 : I<0xAF, RawFrm, (outs), (ins), "scas{l}", []>; @@ -908,6 +904,9 @@ let Defs = [RAX, RDX] in def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>, TB; +let Defs = [RAX, RCX, RDX] in +def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB; + let isBarrier = 1, hasCtrlDep = 1 in { def TRAP : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB; } @@ -996,6 +995,7 @@ def MOV32ri : Ii32<0xB8, AddRegFrm, (outs GR32:$dst), (ins i32imm:$src), "mov{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, imm:$src)]>; } + def MOV8mi : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src), "mov{b}\t{$src, $dst|$dst, $src}", [(store (i8 imm:$src), addr:$dst)]>; @@ -2306,98 +2306,100 @@ let isTwoAddress = 0 in { def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src), "rcl{b}\t{1, $dst|$dst, 1}", []>; -def RCL8m1 : I<0xD0, MRM2m, (outs i8mem:$dst), (ins i8mem:$src), - "rcl{b}\t{1, $dst|$dst, 1}", []>; let Uses = [CL] in { def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src), "rcl{b}\t{%cl, $dst|$dst, CL}", []>; -def RCL8mCL : I<0xD2, MRM2m, (outs i8mem:$dst), (ins i8mem:$src), - "rcl{b}\t{%cl, $dst|$dst, CL}", []>; } def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src, i8imm:$cnt), "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCL8mi : Ii8<0xC0, MRM2m, (outs i8mem:$dst), (ins i8mem:$src, i8imm:$cnt), - "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>; def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src), "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize; -def RCL16m1 : I<0xD1, MRM2m, (outs i16mem:$dst), (ins i16mem:$src), - "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize; let Uses = [CL] in { def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src), "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; -def RCL16mCL : I<0xD3, MRM2m, (outs i16mem:$dst), (ins i16mem:$src), - "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; } def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt), "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; -def RCL16mi : Ii8<0xC1, MRM2m, (outs i16mem:$dst), - (ins i16mem:$src, i8imm:$cnt), - "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src), "rcl{l}\t{1, $dst|$dst, 1}", []>; -def RCL32m1 : I<0xD1, MRM2m, (outs i32mem:$dst), (ins i32mem:$src), - "rcl{l}\t{1, $dst|$dst, 1}", []>; let Uses = [CL] in { def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src), "rcl{l}\t{%cl, $dst|$dst, CL}", []>; -def RCL32mCL : I<0xD3, MRM2m, (outs i32mem:$dst), (ins i32mem:$src), - "rcl{l}\t{%cl, $dst|$dst, CL}", []>; } def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt), "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCL32mi : Ii8<0xC1, MRM2m, (outs i32mem:$dst), - (ins i32mem:$src, i8imm:$cnt), - "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>; def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src), "rcr{b}\t{1, $dst|$dst, 1}", []>; -def RCR8m1 : I<0xD0, MRM3m, (outs i8mem:$dst), (ins i8mem:$src), - "rcr{b}\t{1, $dst|$dst, 1}", []>; let Uses = [CL] in { def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src), "rcr{b}\t{%cl, $dst|$dst, CL}", []>; -def RCR8mCL : I<0xD2, MRM3m, (outs i8mem:$dst), (ins i8mem:$src), - "rcr{b}\t{%cl, $dst|$dst, CL}", []>; } def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src, i8imm:$cnt), "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCR8mi : Ii8<0xC0, MRM3m, (outs i8mem:$dst), (ins i8mem:$src, i8imm:$cnt), - "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>; def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src), "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize; -def RCR16m1 : I<0xD1, MRM3m, (outs i16mem:$dst), (ins i16mem:$src), - "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize; let Uses = [CL] in { def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src), "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; -def RCR16mCL : I<0xD3, MRM3m, (outs i16mem:$dst), (ins i16mem:$src), - "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; } def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt), "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; -def RCR16mi : Ii8<0xC1, MRM3m, (outs i16mem:$dst), - (ins i16mem:$src, i8imm:$cnt), - "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src), "rcr{l}\t{1, $dst|$dst, 1}", []>; -def RCR32m1 : I<0xD1, MRM3m, (outs i32mem:$dst), (ins i32mem:$src), - "rcr{l}\t{1, $dst|$dst, 1}", []>; let Uses = [CL] in { def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src), "rcr{l}\t{%cl, $dst|$dst, CL}", []>; -def RCR32mCL : I<0xD3, MRM3m, (outs i32mem:$dst), (ins i32mem:$src), - "rcr{l}\t{%cl, $dst|$dst, CL}", []>; } def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt), "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCR32mi : Ii8<0xC1, MRM3m, (outs i32mem:$dst), - (ins i32mem:$src, i8imm:$cnt), + +let isTwoAddress = 0 in { +def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst), + "rcl{b}\t{1, $dst|$dst, 1}", []>; +def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt), + "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCL16m1 : I<0xD1, MRM2m, (outs), (ins i16mem:$dst), + "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize; +def RCL16mi : Ii8<0xC1, MRM2m, (outs), (ins i16mem:$dst, i8imm:$cnt), + "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; +def RCL32m1 : I<0xD1, MRM2m, (outs), (ins i32mem:$dst), + "rcl{l}\t{1, $dst|$dst, 1}", []>; +def RCL32mi : Ii8<0xC1, MRM2m, (outs), (ins i32mem:$dst, i8imm:$cnt), + "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCR8m1 : I<0xD0, MRM3m, (outs), (ins i8mem:$dst), + "rcr{b}\t{1, $dst|$dst, 1}", []>; +def RCR8mi : Ii8<0xC0, MRM3m, (outs), (ins i8mem:$dst, i8imm:$cnt), + "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCR16m1 : I<0xD1, MRM3m, (outs), (ins i16mem:$dst), + "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize; +def RCR16mi : Ii8<0xC1, MRM3m, (outs), (ins i16mem:$dst, i8imm:$cnt), + "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; +def RCR32m1 : I<0xD1, MRM3m, (outs), (ins i32mem:$dst), + "rcr{l}\t{1, $dst|$dst, 1}", []>; +def RCR32mi : Ii8<0xC1, MRM3m, (outs), (ins i32mem:$dst, i8imm:$cnt), "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>; +let Uses = [CL] in { +def RCL8mCL : I<0xD2, MRM2m, (outs), (ins i8mem:$dst), + "rcl{b}\t{%cl, $dst|$dst, CL}", []>; +def RCL16mCL : I<0xD3, MRM2m, (outs), (ins i16mem:$dst), + "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; +def RCL32mCL : I<0xD3, MRM2m, (outs), (ins i32mem:$dst), + "rcl{l}\t{%cl, $dst|$dst, CL}", []>; +def RCR8mCL : I<0xD2, MRM3m, (outs), (ins i8mem:$dst), + "rcr{b}\t{%cl, $dst|$dst, CL}", []>; +def RCR16mCL : I<0xD3, MRM3m, (outs), (ins i16mem:$dst), + "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; +def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst), + "rcr{l}\t{%cl, $dst|$dst, CL}", []>; +} +} + // FIXME: provide shorter instructions when imm8 == 1 let Uses = [CL] in { def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src), @@ -3006,8 +3008,8 @@ let isTwoAddress = 0 in { def SBB32mr : I<0x19, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), "sbb{l}\t{$src2, $dst|$dst, $src2}", [(store (sube (load addr:$dst), GR32:$src2), addr:$dst)]>; - def SBB8mi : Ii32<0x80, MRM3m, (outs), (ins i8mem:$dst, i8imm:$src2), - "sbb{b}\t{$src2, $dst|$dst, $src2}", + def SBB8mi : Ii8<0x80, MRM3m, (outs), (ins i8mem:$dst, i8imm:$src2), + "sbb{b}\t{$src2, $dst|$dst, $src2}", [(store (sube (loadi8 addr:$dst), imm:$src2), addr:$dst)]>; def SBB16mi : Ii16<0x81, MRM3m, (outs), (ins i16mem:$dst, i16imm:$src2), "sbb{w}\t{$src2, $dst|$dst, $src2}", @@ -3234,17 +3236,18 @@ def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", []>; // AH = flags let Uses = [EFLAGS] in { // Use sbb to materialize carry bit. - let Defs = [EFLAGS], isCodeGenOnly = 1 in { -def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins), - "sbb{b}\t$dst, $dst", +// FIXME: These are pseudo ops that should be replaced with Pat<> patterns. +// However, Pat<> can't replicate the destination reg into the inputs of the +// result. +// FIXME: Change these to have encoding Pseudo when X86MCCodeEmitter replaces +// X86CodeEmitter. +def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins), "", [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; -def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins), - "sbb{w}\t$dst, $dst", +def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins), "", [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>, OpSize; -def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins), - "sbb{l}\t$dst, $dst", +def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins), "", [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; } // isCodeGenOnly @@ -3681,7 +3684,7 @@ def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), "movz{wl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (zextloadi32i16 addr:$src))]>, TB; -// These are the same as the regular regular MOVZX32rr8 and MOVZX32rm8 +// These are the same as the regular MOVZX32rr8 and MOVZX32rm8 // except that they use GR32_NOREX for the output operand register class // instead of GR32. This allows them to operate on h registers on x86-64. def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg, @@ -3716,10 +3719,10 @@ let neverHasSideEffects = 1 in { // Alias instructions that map movr0 to xor. // FIXME: remove when we can teach regalloc that xor reg, reg is ok. +// FIXME: Set encoding to pseudo. let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1 in { -def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), - "xor{b}\t$dst, $dst", +def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "", [(set GR8:$dst, 0)]>; // We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller @@ -3731,8 +3734,8 @@ def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins), "", [(set GR16:$dst, 0)]>, OpSize; -def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), - "xor{l}\t$dst, $dst", +// FIXME: Set encoding to pseudo. +def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "", [(set GR32:$dst, 0)]>; } @@ -4077,7 +4080,7 @@ def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB; -def INVLPG : I<0x01, RawFrm, (outs), (ins), "invlpg", []>, TB; +def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB; def STRr : I<0x00, MRM1r, (outs GR16:$dst), (ins), "str{w}\t{$dst}", []>, TB; @@ -4155,6 +4158,26 @@ def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src), def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src), "lldt{w}\t$src", []>, TB; +// Lock instruction prefix +def LOCK_PREFIX : I<0xF0, RawFrm, (outs), (ins), "lock", []>; + +// Repeat string operation instruction prefixes +// These uses the DF flag in the EFLAGS register to inc or dec ECX +let Defs = [ECX], Uses = [ECX,EFLAGS] in { +// Repeat (used with INS, OUTS, MOVS, LODS and STOS) +def REP_PREFIX : I<0xF3, RawFrm, (outs), (ins), "rep", []>; +// Repeat while not equal (used with CMPS and SCAS) +def REPNE_PREFIX : I<0xF2, RawFrm, (outs), (ins), "repne", []>; +} + +// Segment override instruction prefixes +def CS_PREFIX : I<0x2E, RawFrm, (outs), (ins), "cs", []>; +def SS_PREFIX : I<0x36, RawFrm, (outs), (ins), "ss", []>; +def DS_PREFIX : I<0x3E, RawFrm, (outs), (ins), "ds", []>; +def ES_PREFIX : I<0x26, RawFrm, (outs), (ins), "es", []>; +def FS_PREFIX : I<0x64, RawFrm, (outs), (ins), "fs", []>; +def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", []>; + // String manipulation instructions def LODSB : I<0xAC, RawFrm, (outs), (ins), "lodsb", []>; @@ -4219,17 +4242,17 @@ def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB; // VMX instructions // 66 0F 38 80 -def INVEPT : I<0x38, RawFrm, (outs), (ins), "invept", []>, OpSize, TB; +def INVEPT : I<0x80, RawFrm, (outs), (ins), "invept", []>, OpSize, T8; // 66 0F 38 81 -def INVVPID : I<0x38, RawFrm, (outs), (ins), "invvpid", []>, OpSize, TB; +def INVVPID : I<0x81, RawFrm, (outs), (ins), "invvpid", []>, OpSize, T8; // 0F 01 C1 -def VMCALL : I<0x01, RawFrm, (outs), (ins), "vmcall", []>, TB; +def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB; def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs), "vmclear\t$vmcs", []>, OpSize, TB; // 0F 01 C2 -def VMLAUNCH : I<0x01, RawFrm, (outs), (ins), "vmlaunch", []>, TB; +def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB; // 0F 01 C3 -def VMRESUME : I<0x01, RawFrm, (outs), (ins), "vmresume", []>, TB; +def VMRESUME : I<0x01, MRM_C3, (outs), (ins), "vmresume", []>, TB; def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs), "vmptrld\t$vmcs", []>, TB; def VMPTRSTm : I<0xC7, MRM7m, (outs i64mem:$vmcs), (ins), @@ -4251,7 +4274,7 @@ def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB; // 0F 01 C4 -def VMXOFF : I<0x01, RawFrm, (outs), (ins), "vmxoff", []>, OpSize; +def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB; def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon), "vmxon\t{$vmxon}", []>, XD; @@ -5181,6 +5204,12 @@ include "X86InstrFPStack.td" include "X86Instr64bit.td" //===----------------------------------------------------------------------===// +// SIMD support (SSE, MMX and AVX) +//===----------------------------------------------------------------------===// + +include "X86InstrFragmentsSIMD.td" + +//===----------------------------------------------------------------------===// // XMM Floating point support (requires SSE / SSE2) //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index fc40c9a..89f020c 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -14,56 +14,6 @@ //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// MMX Pattern Fragments -//===----------------------------------------------------------------------===// - -def load_mmx : PatFrag<(ops node:$ptr), (v1i64 (load node:$ptr))>; - -def bc_v8i8 : PatFrag<(ops node:$in), (v8i8 (bitconvert node:$in))>; -def bc_v4i16 : PatFrag<(ops node:$in), (v4i16 (bitconvert node:$in))>; -def bc_v2i32 : PatFrag<(ops node:$in), (v2i32 (bitconvert node:$in))>; -def bc_v1i64 : PatFrag<(ops node:$in), (v1i64 (bitconvert node:$in))>; - -//===----------------------------------------------------------------------===// -// MMX Masks -//===----------------------------------------------------------------------===// - -// MMX_SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to -// PSHUFW imm. -def MMX_SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{ - return getI8Imm(X86::getShuffleSHUFImmediate(N)); -}]>; - -// Patterns for: vector_shuffle v1, v2, <2, 6, 3, 7, ...> -def mmx_unpckh : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N)); -}]>; - -// Patterns for: vector_shuffle v1, v2, <0, 4, 2, 5, ...> -def mmx_unpckl : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N)); -}]>; - -// Patterns for: vector_shuffle v1, <undef>, <0, 0, 1, 1, ...> -def mmx_unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); -}]>; - -// Patterns for: vector_shuffle v1, <undef>, <2, 2, 3, 3, ...> -def mmx_unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def mmx_pshufw : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N)); -}], MMX_SHUFFLE_get_shuf_imm>; - -//===----------------------------------------------------------------------===// // MMX Multiclasses //===----------------------------------------------------------------------===// @@ -501,6 +451,20 @@ let Constraints = "$src1 = $dst" in { (iPTR imm:$src3))))]>; } +// MMX to XMM for vector types +def MMX_X86movq2dq : SDNode<"X86ISD::MOVQ2DQ", SDTypeProfile<1, 1, + [SDTCisVT<0, v2i64>, SDTCisVT<1, v1i64>]>>; + +def : Pat<(v2i64 (MMX_X86movq2dq VR64:$src)), + (v2i64 (MMX_MOVQ2DQrr VR64:$src))>; + +def : Pat<(v2i64 (MMX_X86movq2dq (load_mmx addr:$src))), + (v2i64 (MOVQI2PQIrm addr:$src))>; + +def : Pat<(v2i64 (MMX_X86movq2dq (v1i64 (bitconvert + (v2i32 (scalar_to_vector (loadi32 addr:$src))))))), + (v2i64 (MOVDI2PDIrm addr:$src))>; + // Mask creation def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src), "pmovmskb\t{$src, $dst|$dst, $src}", @@ -522,11 +486,10 @@ def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask), // Alias instructions that map zero vector to pxor. let isReMaterializable = 1, isCodeGenOnly = 1 in { - def MMX_V_SET0 : MMXI<0xEF, MRMInitReg, (outs VR64:$dst), (ins), - "pxor\t$dst, $dst", + // FIXME: Change encoding to pseudo. + def MMX_V_SET0 : MMXI<0xEF, MRMInitReg, (outs VR64:$dst), (ins), "", [(set VR64:$dst, (v2i32 immAllZerosV))]>; - def MMX_V_SETALLONES : MMXI<0x76, MRMInitReg, (outs VR64:$dst), (ins), - "pcmpeqd\t$dst, $dst", + def MMX_V_SETALLONES : MMXI<0x76, MRMInitReg, (outs VR64:$dst), (ins), "", [(set VR64:$dst, (v2i32 immAllOnesV))]>; } diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 94b9b55..9b2140f 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -505,9 +505,10 @@ def Int_COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), // Alias instructions that map fld0 to pxor for sse. let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1, canFoldAsLoad = 1 in -def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), - "pxor\t$dst, $dst", [(set FR32:$dst, fp32imm0)]>, - Requires<[HasSSE1]>, TB, OpSize; + // FIXME: Set encoding to pseudo! +def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "", + [(set FR32:$dst, fp32imm0)]>, + Requires<[HasSSE1]>, TB, OpSize; // Alias instruction to do FR32 reg-to-reg copy using movaps. Upper bits are // disregarded. @@ -761,6 +762,9 @@ let Constraints = "$src1 = $dst" in { } // Constraints = "$src1 = $dst" +def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), + (MOVHPSrm VR128:$src1, addr:$src2)>; + def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), @@ -1025,10 +1029,10 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), // Alias instructions that map zero vector to pxor / xorp* for sse. // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-zeros value if folding it would be beneficial. +// FIXME: Change encoding to pseudo! let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, isCodeGenOnly = 1 in -def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), - "xorps\t$dst, $dst", +def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4i32 immAllZerosV))]>; let Predicates = [HasSSE1] in { @@ -1269,8 +1273,8 @@ def Int_COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), // Alias instructions that map fld0 to pxor for sse. let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1, canFoldAsLoad = 1 in -def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), - "pxor\t$dst, $dst", [(set FR64:$dst, fpimm0)]>, +def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "", + [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2]>, TB, OpSize; // Alias instruction to do FR64 reg-to-reg copy using movapd. Upper bits are @@ -2311,9 +2315,9 @@ def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), TB, Requires<[HasSSE2]>; // Load, store, and memory fence -def LFENCE : I<0xAE, MRM5r, (outs), (ins), +def LFENCE : I<0xAE, MRM_E8, (outs), (ins), "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>; -def MFENCE : I<0xAE, MRM6r, (outs), (ins), +def MFENCE : I<0xAE, MRM_F0, (outs), (ins), "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>; //TODO: custom lower this so as to never even generate the noop @@ -2329,8 +2333,8 @@ def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss), // load of an all-ones value if folding it would be beneficial. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, isCodeGenOnly = 1 in - def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), - "pcmpeqd\t$dst, $dst", + // FIXME: Change encoding to pseudo. + def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4i32 immAllOnesV))]>; // FR64 to 128-bit vector conversion. @@ -2612,9 +2616,9 @@ let Constraints = "$src1 = $dst" in { } // Thread synchronization -def MONITOR : I<0x01, MRM1r, (outs), (ins), "monitor", +def MONITOR : I<0x01, MRM_C8, (outs), (ins), "monitor", [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>; -def MWAIT : I<0x01, MRM1r, (outs), (ins), "mwait", +def MWAIT : I<0x01, MRM_C9, (outs), (ins), "mwait", [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>; // vector_shuffle v1, <undef> <1, 1, 3, 3> @@ -3746,7 +3750,8 @@ def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2), def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movntdqa\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>; + [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>, + OpSize; //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index f363903..d297d24 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -297,6 +297,7 @@ extern "C" { push edx push ecx and esp, -16 + sub esp, 16 mov eax, dword ptr [ebp+4] mov dword ptr [esp+4], eax mov dword ptr [esp], ebp diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/X86MCAsmInfo.cpp index 1738d49..91c0fbb 100644 --- a/lib/Target/X86/X86MCAsmInfo.cpp +++ b/lib/Target/X86/X86MCAsmInfo.cpp @@ -55,9 +55,6 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) { if (!is64Bit) Data64bitsDirective = 0; // we can't emit a 64-bit unit - // Leopard and above support aligned common symbols. - COMMDirectiveTakesAlignment = Triple.getDarwinMajorNumber() >= 9; - CommentString = "##"; PCSymbol = "."; @@ -75,7 +72,6 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &Triple) { PrivateGlobalPrefix = ".L"; WeakRefDirective = "\t.weak\t"; - SetDirective = "\t.set\t"; PCSymbol = "."; // Set up DWARF directives @@ -98,27 +94,4 @@ MCSection *X86ELFMCAsmInfo::getNonexecutableStackSection(MCContext &Ctx) const { X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) { AsmTransCBE = x86_asm_table; AssemblerDialect = AsmWriterFlavor; -} - - -X86WinMCAsmInfo::X86WinMCAsmInfo(const Triple &Triple) { - AsmTransCBE = x86_asm_table; - AssemblerDialect = AsmWriterFlavor; - - GlobalPrefix = "_"; - CommentString = ";"; - - PrivateGlobalPrefix = "$"; - AlignDirective = "\tALIGN\t"; - ZeroDirective = "\tdb\t"; - AsciiDirective = "\tdb\t"; - AscizDirective = 0; - Data8bitsDirective = "\tdb\t"; - Data16bitsDirective = "\tdw\t"; - Data32bitsDirective = "\tdd\t"; - Data64bitsDirective = "\tdq\t"; - HasDotTypeDotSizeDirective = false; - HasSingleParameterDotFile = false; - - AlignmentIsInBytes = true; -} +}
\ No newline at end of file diff --git a/lib/Target/X86/X86MCAsmInfo.h b/lib/Target/X86/X86MCAsmInfo.h index ca227b7..69716bf 100644 --- a/lib/Target/X86/X86MCAsmInfo.h +++ b/lib/Target/X86/X86MCAsmInfo.h @@ -33,11 +33,6 @@ namespace llvm { struct X86MCAsmInfoCOFF : public MCAsmInfoCOFF { explicit X86MCAsmInfoCOFF(const Triple &Triple); }; - - struct X86WinMCAsmInfo : public MCAsmInfo { - explicit X86WinMCAsmInfo(const Triple &Triple); - }; - } // namespace llvm #endif diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp new file mode 100644 index 0000000..3f18696 --- /dev/null +++ b/lib/Target/X86/X86MCCodeEmitter.cpp @@ -0,0 +1,645 @@ +//===-- X86/X86MCCodeEmitter.cpp - Convert X86 code to machine code -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the X86MCCodeEmitter class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "x86-emitter" +#include "X86.h" +#include "X86InstrInfo.h" +#include "X86FixupKinds.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { +class X86MCCodeEmitter : public MCCodeEmitter { + X86MCCodeEmitter(const X86MCCodeEmitter &); // DO NOT IMPLEMENT + void operator=(const X86MCCodeEmitter &); // DO NOT IMPLEMENT + const TargetMachine &TM; + const TargetInstrInfo &TII; + MCContext &Ctx; + bool Is64BitMode; +public: + X86MCCodeEmitter(TargetMachine &tm, MCContext &ctx, bool is64Bit) + : TM(tm), TII(*TM.getInstrInfo()), Ctx(ctx) { + Is64BitMode = is64Bit; + } + + ~X86MCCodeEmitter() {} + + unsigned getNumFixupKinds() const { + return 3; + } + + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { + const static MCFixupKindInfo Infos[] = { + { "reloc_pcrel_4byte", 0, 4 * 8 }, + { "reloc_pcrel_1byte", 0, 1 * 8 }, + { "reloc_riprel_4byte", 0, 4 * 8 } + }; + + if (Kind < FirstTargetFixupKind) + return MCCodeEmitter::getFixupKindInfo(Kind); + + assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && + "Invalid kind!"); + return Infos[Kind - FirstTargetFixupKind]; + } + + static unsigned GetX86RegNum(const MCOperand &MO) { + return X86RegisterInfo::getX86RegNum(MO.getReg()); + } + + void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const { + OS << (char)C; + ++CurByte; + } + + void EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte, + raw_ostream &OS) const { + // Output the constant in little endian byte order. + for (unsigned i = 0; i != Size; ++i) { + EmitByte(Val & 255, CurByte, OS); + Val >>= 8; + } + } + + void EmitImmediate(const MCOperand &Disp, + unsigned ImmSize, MCFixupKind FixupKind, + unsigned &CurByte, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups, + int ImmOffset = 0) const; + + inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode, + unsigned RM) { + assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!"); + return RM | (RegOpcode << 3) | (Mod << 6); + } + + void EmitRegModRMByte(const MCOperand &ModRMReg, unsigned RegOpcodeFld, + unsigned &CurByte, raw_ostream &OS) const { + EmitByte(ModRMByte(3, RegOpcodeFld, GetX86RegNum(ModRMReg)), CurByte, OS); + } + + void EmitSIBByte(unsigned SS, unsigned Index, unsigned Base, + unsigned &CurByte, raw_ostream &OS) const { + // SIB byte is in the same format as the ModRMByte. + EmitByte(ModRMByte(SS, Index, Base), CurByte, OS); + } + + + void EmitMemModRMByte(const MCInst &MI, unsigned Op, + unsigned RegOpcodeField, + unsigned TSFlags, unsigned &CurByte, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups) const; + + void EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups) const; + +}; + +} // end anonymous namespace + + +MCCodeEmitter *llvm::createX86_32MCCodeEmitter(const Target &, + TargetMachine &TM, + MCContext &Ctx) { + return new X86MCCodeEmitter(TM, Ctx, false); +} + +MCCodeEmitter *llvm::createX86_64MCCodeEmitter(const Target &, + TargetMachine &TM, + MCContext &Ctx) { + return new X86MCCodeEmitter(TM, Ctx, true); +} + + +/// isDisp8 - Return true if this signed displacement fits in a 8-bit +/// sign-extended field. +static bool isDisp8(int Value) { + return Value == (signed char)Value; +} + +/// getImmFixupKind - Return the appropriate fixup kind to use for an immediate +/// in an instruction with the specified TSFlags. +static MCFixupKind getImmFixupKind(unsigned TSFlags) { + unsigned Size = X86II::getSizeOfImm(TSFlags); + bool isPCRel = X86II::isImmPCRel(TSFlags); + + switch (Size) { + default: assert(0 && "Unknown immediate size"); + case 1: return isPCRel ? MCFixupKind(X86::reloc_pcrel_1byte) : FK_Data_1; + case 4: return isPCRel ? MCFixupKind(X86::reloc_pcrel_4byte) : FK_Data_4; + case 2: assert(!isPCRel); return FK_Data_2; + case 8: assert(!isPCRel); return FK_Data_8; + } +} + + +void X86MCCodeEmitter:: +EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind, + unsigned &CurByte, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups, int ImmOffset) const { + // If this is a simple integer displacement that doesn't require a relocation, + // emit it now. + if (DispOp.isImm()) { + // FIXME: is this right for pc-rel encoding?? Probably need to emit this as + // a fixup if so. + EmitConstant(DispOp.getImm()+ImmOffset, Size, CurByte, OS); + return; + } + + // If we have an immoffset, add it to the expression. + const MCExpr *Expr = DispOp.getExpr(); + + // If the fixup is pc-relative, we need to bias the value to be relative to + // the start of the field, not the end of the field. + if (FixupKind == MCFixupKind(X86::reloc_pcrel_4byte) || + FixupKind == MCFixupKind(X86::reloc_riprel_4byte)) + ImmOffset -= 4; + if (FixupKind == MCFixupKind(X86::reloc_pcrel_1byte)) + ImmOffset -= 1; + + if (ImmOffset) + Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(ImmOffset, Ctx), + Ctx); + + // Emit a symbolic constant as a fixup and 4 zeros. + Fixups.push_back(MCFixup::Create(CurByte, Expr, FixupKind)); + EmitConstant(0, Size, CurByte, OS); +} + + +void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, + unsigned RegOpcodeField, + unsigned TSFlags, unsigned &CurByte, + raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups) const{ + const MCOperand &Disp = MI.getOperand(Op+3); + const MCOperand &Base = MI.getOperand(Op); + const MCOperand &Scale = MI.getOperand(Op+1); + const MCOperand &IndexReg = MI.getOperand(Op+2); + unsigned BaseReg = Base.getReg(); + + // Handle %rip relative addressing. + if (BaseReg == X86::RIP) { // [disp32+RIP] in X86-64 mode + assert(IndexReg.getReg() == 0 && Is64BitMode && + "Invalid rip-relative address"); + EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS); + + // rip-relative addressing is actually relative to the *next* instruction. + // Since an immediate can follow the mod/rm byte for an instruction, this + // means that we need to bias the immediate field of the instruction with + // the size of the immediate field. If we have this case, add it into the + // expression to emit. + int ImmSize = X86II::hasImm(TSFlags) ? X86II::getSizeOfImm(TSFlags) : 0; + + EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_riprel_4byte), + CurByte, OS, Fixups, -ImmSize); + return; + } + + unsigned BaseRegNo = BaseReg ? GetX86RegNum(Base) : -1U; + + // Determine whether a SIB byte is needed. + // If no BaseReg, issue a RIP relative instruction only if the MCE can + // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table + // 2-7) and absolute references. + + if (// The SIB byte must be used if there is an index register. + IndexReg.getReg() == 0 && + // The SIB byte must be used if the base is ESP/RSP/R12, all of which + // encode to an R/M value of 4, which indicates that a SIB byte is + // present. + BaseRegNo != N86::ESP && + // If there is no base register and we're in 64-bit mode, we need a SIB + // byte to emit an addr that is just 'disp32' (the non-RIP relative form). + (!Is64BitMode || BaseReg != 0)) { + + if (BaseReg == 0) { // [disp32] in X86-32 mode + EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS); + EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups); + return; + } + + // If the base is not EBP/ESP and there is no displacement, use simple + // indirect register encoding, this handles addresses like [EAX]. The + // encoding for [EBP] with no displacement means [disp32] so we handle it + // by emitting a displacement of 0 below. + if (Disp.isImm() && Disp.getImm() == 0 && BaseRegNo != N86::EBP) { + EmitByte(ModRMByte(0, RegOpcodeField, BaseRegNo), CurByte, OS); + return; + } + + // Otherwise, if the displacement fits in a byte, encode as [REG+disp8]. + if (Disp.isImm() && isDisp8(Disp.getImm())) { + EmitByte(ModRMByte(1, RegOpcodeField, BaseRegNo), CurByte, OS); + EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups); + return; + } + + // Otherwise, emit the most general non-SIB encoding: [REG+disp32] + EmitByte(ModRMByte(2, RegOpcodeField, BaseRegNo), CurByte, OS); + EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups); + return; + } + + // We need a SIB byte, so start by outputting the ModR/M byte first + assert(IndexReg.getReg() != X86::ESP && + IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!"); + + bool ForceDisp32 = false; + bool ForceDisp8 = false; + if (BaseReg == 0) { + // If there is no base register, we emit the special case SIB byte with + // MOD=0, BASE=5, to JUST get the index, scale, and displacement. + EmitByte(ModRMByte(0, RegOpcodeField, 4), CurByte, OS); + ForceDisp32 = true; + } else if (!Disp.isImm()) { + // Emit the normal disp32 encoding. + EmitByte(ModRMByte(2, RegOpcodeField, 4), CurByte, OS); + ForceDisp32 = true; + } else if (Disp.getImm() == 0 && BaseReg != X86::EBP) { + // Emit no displacement ModR/M byte + EmitByte(ModRMByte(0, RegOpcodeField, 4), CurByte, OS); + } else if (isDisp8(Disp.getImm())) { + // Emit the disp8 encoding. + EmitByte(ModRMByte(1, RegOpcodeField, 4), CurByte, OS); + ForceDisp8 = true; // Make sure to force 8 bit disp if Base=EBP + } else { + // Emit the normal disp32 encoding. + EmitByte(ModRMByte(2, RegOpcodeField, 4), CurByte, OS); + } + + // Calculate what the SS field value should be... + static const unsigned SSTable[] = { ~0, 0, 1, ~0, 2, ~0, ~0, ~0, 3 }; + unsigned SS = SSTable[Scale.getImm()]; + + if (BaseReg == 0) { + // Handle the SIB byte for the case where there is no base, see Intel + // Manual 2A, table 2-7. The displacement has already been output. + unsigned IndexRegNo; + if (IndexReg.getReg()) + IndexRegNo = GetX86RegNum(IndexReg); + else // Examples: [ESP+1*<noreg>+4] or [scaled idx]+disp32 (MOD=0,BASE=5) + IndexRegNo = 4; + EmitSIBByte(SS, IndexRegNo, 5, CurByte, OS); + } else { + unsigned IndexRegNo; + if (IndexReg.getReg()) + IndexRegNo = GetX86RegNum(IndexReg); + else + IndexRegNo = 4; // For example [ESP+1*<noreg>+4] + EmitSIBByte(SS, IndexRegNo, GetX86RegNum(Base), CurByte, OS); + } + + // Do we need to output a displacement? + if (ForceDisp8) + EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups); + else if (ForceDisp32 || Disp.getImm() != 0) + EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups); +} + +/// DetermineREXPrefix - Determine if the MCInst has to be encoded with a X86-64 +/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand +/// size, and 3) use of X86-64 extended registers. +static unsigned DetermineREXPrefix(const MCInst &MI, unsigned TSFlags, + const TargetInstrDesc &Desc) { + // Pseudo instructions never have a rex byte. + if ((TSFlags & X86II::FormMask) == X86II::Pseudo) + return 0; + + unsigned REX = 0; + if (TSFlags & X86II::REX_W) + REX |= 1 << 3; + + if (MI.getNumOperands() == 0) return REX; + + unsigned NumOps = MI.getNumOperands(); + // FIXME: MCInst should explicitize the two-addrness. + bool isTwoAddr = NumOps > 1 && + Desc.getOperandConstraint(1, TOI::TIED_TO) != -1; + + // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix. + unsigned i = isTwoAddr ? 1 : 0; + for (; i != NumOps; ++i) { + const MCOperand &MO = MI.getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!X86InstrInfo::isX86_64NonExtLowByteReg(Reg)) continue; + // FIXME: The caller of DetermineREXPrefix slaps this prefix onto anything + // that returns non-zero. + REX |= 0x40; + break; + } + + switch (TSFlags & X86II::FormMask) { + case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!"); + case X86II::MRMSrcReg: + if (MI.getOperand(0).isReg() && + X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg())) + REX |= 1 << 2; + i = isTwoAddr ? 2 : 1; + for (; i != NumOps; ++i) { + const MCOperand &MO = MI.getOperand(i); + if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) + REX |= 1 << 0; + } + break; + case X86II::MRMSrcMem: { + if (MI.getOperand(0).isReg() && + X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg())) + REX |= 1 << 2; + unsigned Bit = 0; + i = isTwoAddr ? 2 : 1; + for (; i != NumOps; ++i) { + const MCOperand &MO = MI.getOperand(i); + if (MO.isReg()) { + if (X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) + REX |= 1 << Bit; + Bit++; + } + } + break; + } + case X86II::MRM0m: case X86II::MRM1m: + case X86II::MRM2m: case X86II::MRM3m: + case X86II::MRM4m: case X86II::MRM5m: + case X86II::MRM6m: case X86II::MRM7m: + case X86II::MRMDestMem: { + unsigned e = (isTwoAddr ? X86AddrNumOperands+1 : X86AddrNumOperands); + i = isTwoAddr ? 1 : 0; + if (NumOps > e && MI.getOperand(e).isReg() && + X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(e).getReg())) + REX |= 1 << 2; + unsigned Bit = 0; + for (; i != e; ++i) { + const MCOperand &MO = MI.getOperand(i); + if (MO.isReg()) { + if (X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) + REX |= 1 << Bit; + Bit++; + } + } + break; + } + default: + if (MI.getOperand(0).isReg() && + X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg())) + REX |= 1 << 0; + i = isTwoAddr ? 2 : 1; + for (unsigned e = NumOps; i != e; ++i) { + const MCOperand &MO = MI.getOperand(i); + if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) + REX |= 1 << 2; + } + break; + } + return REX; +} + +void X86MCCodeEmitter:: +EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups) const { + unsigned Opcode = MI.getOpcode(); + const TargetInstrDesc &Desc = TII.get(Opcode); + unsigned TSFlags = Desc.TSFlags; + + // Keep track of the current byte being emitted. + unsigned CurByte = 0; + + // FIXME: We should emit the prefixes in exactly the same order as GAS does, + // in order to provide diffability. + + // Emit the lock opcode prefix as needed. + if (TSFlags & X86II::LOCK) + EmitByte(0xF0, CurByte, OS); + + // Emit segment override opcode prefix as needed. + switch (TSFlags & X86II::SegOvrMask) { + default: assert(0 && "Invalid segment!"); + case 0: break; // No segment override! + case X86II::FS: + EmitByte(0x64, CurByte, OS); + break; + case X86II::GS: + EmitByte(0x65, CurByte, OS); + break; + } + + // Emit the repeat opcode prefix as needed. + if ((TSFlags & X86II::Op0Mask) == X86II::REP) + EmitByte(0xF3, CurByte, OS); + + // Emit the operand size opcode prefix as needed. + if (TSFlags & X86II::OpSize) + EmitByte(0x66, CurByte, OS); + + // Emit the address size opcode prefix as needed. + if (TSFlags & X86II::AdSize) + EmitByte(0x67, CurByte, OS); + + bool Need0FPrefix = false; + switch (TSFlags & X86II::Op0Mask) { + default: assert(0 && "Invalid prefix!"); + case 0: break; // No prefix! + case X86II::REP: break; // already handled. + case X86II::TB: // Two-byte opcode prefix + case X86II::T8: // 0F 38 + case X86II::TA: // 0F 3A + Need0FPrefix = true; + break; + case X86II::TF: // F2 0F 38 + EmitByte(0xF2, CurByte, OS); + Need0FPrefix = true; + break; + case X86II::XS: // F3 0F + EmitByte(0xF3, CurByte, OS); + Need0FPrefix = true; + break; + case X86II::XD: // F2 0F + EmitByte(0xF2, CurByte, OS); + Need0FPrefix = true; + break; + case X86II::D8: EmitByte(0xD8, CurByte, OS); break; + case X86II::D9: EmitByte(0xD9, CurByte, OS); break; + case X86II::DA: EmitByte(0xDA, CurByte, OS); break; + case X86II::DB: EmitByte(0xDB, CurByte, OS); break; + case X86II::DC: EmitByte(0xDC, CurByte, OS); break; + case X86II::DD: EmitByte(0xDD, CurByte, OS); break; + case X86II::DE: EmitByte(0xDE, CurByte, OS); break; + case X86II::DF: EmitByte(0xDF, CurByte, OS); break; + } + + // Handle REX prefix. + // FIXME: Can this come before F2 etc to simplify emission? + if (Is64BitMode) { + if (unsigned REX = DetermineREXPrefix(MI, TSFlags, Desc)) + EmitByte(0x40 | REX, CurByte, OS); + } + + // 0x0F escape code must be emitted just before the opcode. + if (Need0FPrefix) + EmitByte(0x0F, CurByte, OS); + + // FIXME: Pull this up into previous switch if REX can be moved earlier. + switch (TSFlags & X86II::Op0Mask) { + case X86II::TF: // F2 0F 38 + case X86II::T8: // 0F 38 + EmitByte(0x38, CurByte, OS); + break; + case X86II::TA: // 0F 3A + EmitByte(0x3A, CurByte, OS); + break; + } + + // If this is a two-address instruction, skip one of the register operands. + unsigned NumOps = Desc.getNumOperands(); + unsigned CurOp = 0; + if (NumOps > 1 && Desc.getOperandConstraint(1, TOI::TIED_TO) != -1) + ++CurOp; + else if (NumOps > 2 && Desc.getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0) + // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32 + --NumOps; + + unsigned char BaseOpcode = X86II::getBaseOpcodeFor(TSFlags); + switch (TSFlags & X86II::FormMask) { + case X86II::MRMInitReg: + assert(0 && "FIXME: Remove this form when the JIT moves to MCCodeEmitter!"); + default: errs() << "FORM: " << (TSFlags & X86II::FormMask) << "\n"; + assert(0 && "Unknown FormMask value in X86MCCodeEmitter!"); + case X86II::Pseudo: return; // Pseudo instructions encode to nothing. + case X86II::RawFrm: + EmitByte(BaseOpcode, CurByte, OS); + break; + + case X86II::AddRegFrm: + EmitByte(BaseOpcode + GetX86RegNum(MI.getOperand(CurOp++)), CurByte, OS); + break; + + case X86II::MRMDestReg: + EmitByte(BaseOpcode, CurByte, OS); + EmitRegModRMByte(MI.getOperand(CurOp), + GetX86RegNum(MI.getOperand(CurOp+1)), CurByte, OS); + CurOp += 2; + break; + + case X86II::MRMDestMem: + EmitByte(BaseOpcode, CurByte, OS); + EmitMemModRMByte(MI, CurOp, + GetX86RegNum(MI.getOperand(CurOp + X86AddrNumOperands)), + TSFlags, CurByte, OS, Fixups); + CurOp += X86AddrNumOperands + 1; + break; + + case X86II::MRMSrcReg: + EmitByte(BaseOpcode, CurByte, OS); + EmitRegModRMByte(MI.getOperand(CurOp+1), GetX86RegNum(MI.getOperand(CurOp)), + CurByte, OS); + CurOp += 2; + break; + + case X86II::MRMSrcMem: { + EmitByte(BaseOpcode, CurByte, OS); + + // FIXME: Maybe lea should have its own form? This is a horrible hack. + int AddrOperands; + if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || + Opcode == X86::LEA16r || Opcode == X86::LEA32r) + AddrOperands = X86AddrNumOperands - 1; // No segment register + else + AddrOperands = X86AddrNumOperands; + + EmitMemModRMByte(MI, CurOp+1, GetX86RegNum(MI.getOperand(CurOp)), + TSFlags, CurByte, OS, Fixups); + CurOp += AddrOperands + 1; + break; + } + + case X86II::MRM0r: case X86II::MRM1r: + case X86II::MRM2r: case X86II::MRM3r: + case X86II::MRM4r: case X86II::MRM5r: + case X86II::MRM6r: case X86II::MRM7r: + EmitByte(BaseOpcode, CurByte, OS); + EmitRegModRMByte(MI.getOperand(CurOp++), + (TSFlags & X86II::FormMask)-X86II::MRM0r, + CurByte, OS); + break; + case X86II::MRM0m: case X86II::MRM1m: + case X86II::MRM2m: case X86II::MRM3m: + case X86II::MRM4m: case X86II::MRM5m: + case X86II::MRM6m: case X86II::MRM7m: + EmitByte(BaseOpcode, CurByte, OS); + EmitMemModRMByte(MI, CurOp, (TSFlags & X86II::FormMask)-X86II::MRM0m, + TSFlags, CurByte, OS, Fixups); + CurOp += X86AddrNumOperands; + break; + case X86II::MRM_C1: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xC1, CurByte, OS); + break; + case X86II::MRM_C2: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xC2, CurByte, OS); + break; + case X86II::MRM_C3: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xC3, CurByte, OS); + break; + case X86II::MRM_C4: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xC4, CurByte, OS); + break; + case X86II::MRM_C8: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xC8, CurByte, OS); + break; + case X86II::MRM_C9: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xC9, CurByte, OS); + break; + case X86II::MRM_E8: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xE8, CurByte, OS); + break; + case X86II::MRM_F0: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xF0, CurByte, OS); + break; + case X86II::MRM_F8: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xF8, CurByte, OS); + break; + case X86II::MRM_F9: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xF9, CurByte, OS); + break; + } + + // If there is a remaining operand, it must be a trailing immediate. Emit it + // according to the right size for the instruction. + if (CurOp != NumOps) + EmitImmediate(MI.getOperand(CurOp++), + X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags), + CurByte, OS, Fixups); + +#ifndef NDEBUG + // FIXME: Verify. + if (/*!Desc.isVariadic() &&*/ CurOp != NumOps) { + errs() << "Cannot encode all operands of: "; + MI.dump(); + errs() << '\n'; + abort(); + } +#endif +} diff --git a/lib/Target/X86/X86MCTargetExpr.cpp b/lib/Target/X86/X86MCTargetExpr.cpp new file mode 100644 index 0000000..17b4fe8 --- /dev/null +++ b/lib/Target/X86/X86MCTargetExpr.cpp @@ -0,0 +1,48 @@ +//===- X86MCTargetExpr.cpp - X86 Target Specific MCExpr Implementation ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "X86MCTargetExpr.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +X86MCTargetExpr *X86MCTargetExpr::Create(const MCSymbol *Sym, VariantKind K, + MCContext &Ctx) { + return new (Ctx) X86MCTargetExpr(Sym, K); +} + +void X86MCTargetExpr::PrintImpl(raw_ostream &OS) const { + OS << *Sym; + + switch (Kind) { + case Invalid: OS << "@<invalid>"; break; + case GOT: OS << "@GOT"; break; + case GOTOFF: OS << "@GOTOFF"; break; + case GOTPCREL: OS << "@GOTPCREL"; break; + case GOTTPOFF: OS << "@GOTTPOFF"; break; + case INDNTPOFF: OS << "@INDNTPOFF"; break; + case NTPOFF: OS << "@NTPOFF"; break; + case PLT: OS << "@PLT"; break; + case TLSGD: OS << "@TLSGD"; break; + case TPOFF: OS << "@TPOFF"; break; + } +} + +bool X86MCTargetExpr::EvaluateAsRelocatableImpl(MCValue &Res) const { + // FIXME: I don't know if this is right, it followed MCSymbolRefExpr. + + // Evaluate recursively if this is a variable. + if (Sym->isVariable()) + return Sym->getValue()->EvaluateAsRelocatable(Res); + + Res = MCValue::get(Sym, 0, 0); + return true; +} diff --git a/lib/Target/X86/X86MCTargetExpr.h b/lib/Target/X86/X86MCTargetExpr.h new file mode 100644 index 0000000..7de8a5c --- /dev/null +++ b/lib/Target/X86/X86MCTargetExpr.h @@ -0,0 +1,49 @@ +//===- X86MCTargetExpr.h - X86 Target Specific MCExpr -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef X86_MCTARGETEXPR_H +#define X86_MCTARGETEXPR_H + +#include "llvm/MC/MCExpr.h" + +namespace llvm { + +/// X86MCTargetExpr - This class represents symbol variants, like foo@GOT. +class X86MCTargetExpr : public MCTargetExpr { +public: + enum VariantKind { + Invalid, + GOT, + GOTOFF, + GOTPCREL, + GOTTPOFF, + INDNTPOFF, + NTPOFF, + PLT, + TLSGD, + TPOFF + }; +private: + /// Sym - The symbol being referenced. + const MCSymbol * const Sym; + /// Kind - The modifier. + const VariantKind Kind; + + X86MCTargetExpr(const MCSymbol *S, VariantKind K) : Sym(S), Kind(K) {} +public: + static X86MCTargetExpr *Create(const MCSymbol *Sym, VariantKind K, + MCContext &Ctx); + + void PrintImpl(raw_ostream &OS) const; + bool EvaluateAsRelocatableImpl(MCValue &Res) const; +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h index fafcf7e..4b2529b 100644 --- a/lib/Target/X86/X86MachineFunctionInfo.h +++ b/lib/Target/X86/X86MachineFunctionInfo.h @@ -18,12 +18,6 @@ namespace llvm { -enum NameDecorationStyle { - None, - StdCall, - FastCall -}; - /// X86MachineFunctionInfo - This class is derived from MachineFunction and /// contains private X86 target-specific information for each MachineFunction. class X86MachineFunctionInfo : public MachineFunctionInfo { @@ -41,16 +35,11 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { /// Used on windows platform for stdcall & fastcall name decoration unsigned BytesToPopOnReturn; - /// DecorationStyle - If the function requires additional name decoration, - /// DecorationStyle holds the right way to do so. - NameDecorationStyle DecorationStyle; - /// ReturnAddrIndex - FrameIndex for return slot. int ReturnAddrIndex; - /// TailCallReturnAddrDelta - Delta the ReturnAddr stack slot is moved - /// Used for creating an area before the register spill area on the stack - /// the returnaddr can be savely move to this area + /// TailCallReturnAddrDelta - The number of bytes by which return address + /// stack slot is moved as the result of tail call optimization. int TailCallReturnAddrDelta; /// SRetReturnReg - Some subtargets require that sret lowering includes @@ -67,7 +56,6 @@ public: X86MachineFunctionInfo() : ForceFramePointer(false), CalleeSavedFrameSize(0), BytesToPopOnReturn(0), - DecorationStyle(None), ReturnAddrIndex(0), TailCallReturnAddrDelta(0), SRetReturnReg(0), @@ -77,7 +65,6 @@ public: : ForceFramePointer(false), CalleeSavedFrameSize(0), BytesToPopOnReturn(0), - DecorationStyle(None), ReturnAddrIndex(0), TailCallReturnAddrDelta(0), SRetReturnReg(0), @@ -92,9 +79,6 @@ public: unsigned getBytesToPopOnReturn() const { return BytesToPopOnReturn; } void setBytesToPopOnReturn (unsigned bytes) { BytesToPopOnReturn = bytes;} - NameDecorationStyle getDecorationStyle() const { return DecorationStyle; } - void setDecorationStyle(NameDecorationStyle style) { DecorationStyle = style;} - int getRAIndex() const { return ReturnAddrIndex; } void setRAIndex(int Index) { ReturnAddrIndex = Index; } diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index f959a2d..8524236 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -473,9 +473,9 @@ bool X86RegisterInfo::hasReservedSpillSlot(MachineFunction &MF, unsigned Reg, } int -X86RegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const { +X86RegisterInfo::getFrameIndexOffset(const MachineFunction &MF, int FI) const { const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo(); - MachineFrameInfo *MFI = MF.getFrameInfo(); + const MachineFrameInfo *MFI = MF.getFrameInfo(); int Offset = MFI->getObjectOffset(FI) - TFI.getOffsetOfLocalArea(); uint64_t StackSize = MFI->getStackSize(); @@ -485,7 +485,7 @@ X86RegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const { Offset += SlotSize; } else { unsigned Align = MFI->getObjectAlignment(FI); - assert( (-(Offset + StackSize)) % Align == 0); + assert((-(Offset + StackSize)) % Align == 0); Align = 0; return Offset + StackSize; } @@ -498,7 +498,7 @@ X86RegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const { Offset += SlotSize; // Skip the RETADDR move area - X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); + const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); if (TailCallReturnAddrDelta < 0) Offset -= TailCallReturnAddrDelta; @@ -627,10 +627,6 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { MachineFrameInfo *MFI = MF.getFrameInfo(); - // Calculate and set max stack object alignment early, so we can decide - // whether we will need stack realignment (and thus FP). - MFI->calculateMaxStackAlignment(); - X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); @@ -1242,13 +1238,19 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, } // Jump to label or value in register. - if (RetOpcode == X86::TCRETURNdi|| RetOpcode == X86::TCRETURNdi64) + if (RetOpcode == X86::TCRETURNdi|| RetOpcode == X86::TCRETURNdi64) { BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPd)). - addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); - else if (RetOpcode== X86::TCRETURNri64) + addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), + JumpTarget.getTargetFlags()); + } else if (RetOpcode == X86::TCRETURNri64) { BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64), JumpTarget.getReg()); - else + } else { BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr), JumpTarget.getReg()); + } + + MachineInstr *NewMI = prior(MBBI); + for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i) + NewMI->addOperand(MBBI->getOperand(i)); // Delete the pseudo instruction TCRETURN. MBB.erase(MBBI); diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index dec3fba..8fb5e92 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -156,7 +156,7 @@ public: // Debug information queries. unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const; - int getFrameIndexOffset(MachineFunction &MF, int FI) const; + int getFrameIndexOffset(const MachineFunction &MF, int FI) const; void getInitialFrameState(std::vector<MachineMove> &Moves) const; // Exception handling queries. diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 6db0cc3..1559bf7 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -512,20 +512,17 @@ def GR64_ABCD : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RBX]> { let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD]; } -// GR8_NOREX, GR16_NOREX, GR32_NOREX, GR64_NOREX - Subclasses of -// GR8, GR16, GR32, and GR64 which contain only the first 8 GPRs. -// On x86-64, GR64_NOREX, GR32_NOREX and GR16_NOREX are the classes -// of registers which do not by themselves require a REX prefix. +// GR8_NOREX - GR8 registers which do not require a REX prefix. def GR8_NOREX : RegisterClass<"X86", [i8], 8, - [AL, CL, DL, AH, CH, DH, BL, BH, - SIL, DIL, BPL, SPL]> { + [AL, CL, DL, AH, CH, DH, BL, BH]> { let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; }]; let MethodBodies = [{ + // In 64-bit mode, it's not safe to blindly allocate H registers. static const unsigned X86_GR8_NOREX_AO_64[] = { - X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL, X86::BL, X86::BPL + X86::AL, X86::CL, X86::DL, X86::BL }; GR8_NOREXClass::iterator @@ -541,21 +538,15 @@ def GR8_NOREX : RegisterClass<"X86", [i8], 8, GR8_NOREXClass::iterator GR8_NOREXClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); - // Does the function dedicate RBP / EBP to being a frame ptr? - if (!Subtarget.is64Bit()) - // In 32-mode, none of the 8-bit registers aliases EBP or ESP. - return begin() + 8; - else if (RI->hasFP(MF)) - // If so, don't allocate SPL or BPL. - return array_endof(X86_GR8_NOREX_AO_64) - 1; - else - // If not, just don't allocate SPL. + if (Subtarget.is64Bit()) return array_endof(X86_GR8_NOREX_AO_64); + else + return end(); } }]; } +// GR16_NOREX - GR16 registers which do not require a REX prefix. def GR16_NOREX : RegisterClass<"X86", [i16], 16, [AX, CX, DX, SI, DI, BX, BP, SP]> { let SubRegClassList = [GR8_NOREX, GR8_NOREX]; diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 2039be7..adef5bc 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -53,9 +53,9 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const { if (GV->hasDLLImportLinkage()) return X86II::MO_DLLIMPORT; - // GV with ghost linkage (in JIT lazy compilation mode) do not require an + // Materializable GVs (in JIT lazy compilation mode) do not require an // extra load from stub. - bool isDecl = GV->isDeclaration() && !GV->hasNotBeenReadFromBitcode(); + bool isDecl = GV->isDeclaration() && !GV->isMaterializable(); // X86-64 in PIC mode. if (isPICStyleRIPRel()) { diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 618dd10..5e05c2f 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -175,7 +175,7 @@ public: else if (isTargetDarwin()) p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32"; else if (isTargetMingw() || isTargetWindows()) - p = "e-p:32:32-f64:64:64-i64:64:64-f80:128:128-n8:16:32"; + p = "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-n8:16:32"; else p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32"; diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 731c3ab..7802f98 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -30,9 +30,8 @@ static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { case Triple::MinGW32: case Triple::MinGW64: case Triple::Cygwin: - return new X86MCAsmInfoCOFF(TheTriple); case Triple::Win32: - return new X86WinMCAsmInfo(TheTriple); + return new X86MCAsmInfoCOFF(TheTriple); default: return new X86ELFMCAsmInfo(TheTriple); } @@ -48,8 +47,10 @@ extern "C" void LLVMInitializeX86Target() { RegisterAsmInfoFn B(TheX86_64Target, createMCAsmInfo); // Register the code emitter. - TargetRegistry::RegisterCodeEmitter(TheX86_32Target, createX86MCCodeEmitter); - TargetRegistry::RegisterCodeEmitter(TheX86_64Target, createX86MCCodeEmitter); + TargetRegistry::RegisterCodeEmitter(TheX86_32Target, + createX86_32MCCodeEmitter); + TargetRegistry::RegisterCodeEmitter(TheX86_64Target, + createX86_64MCCodeEmitter); } @@ -145,10 +146,6 @@ bool X86TargetMachine::addInstSelector(PassManagerBase &PM, // Install an instruction selector. PM.add(createX86ISelDag(*this, OptLevel)); - // If we're using Fast-ISel, clean up the mess. - if (EnableFastISel) - PM.add(createDeadMachineInstructionElimPass()); - // Install a pass to insert x87 FP_REG_KILL instructions, as needed. PM.add(createX87FPRegKillInserterPass()); @@ -168,22 +165,6 @@ bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM, bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - MachineCodeEmitter &MCE) { - // FIXME: Move this to TargetJITInfo! - // On Darwin, do not override 64-bit setting made in X86TargetMachine(). - if (DefRelocModel == Reloc::Default && - (!Subtarget.isTargetDarwin() || !Subtarget.is64Bit())) { - setRelocationModel(Reloc::Static); - Subtarget.setPICStyle(PICStyles::None); - } - - PM.add(createX86CodeEmitterPass(*this, MCE)); - - return false; -} - -bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, JITCodeEmitter &JCE) { // FIXME: Move this to TargetJITInfo! // On Darwin, do not override 64-bit setting made in X86TargetMachine(). @@ -199,34 +180,6 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, return false; } -bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - ObjectCodeEmitter &OCE) { - PM.add(createX86ObjectCodeEmitterPass(*this, OCE)); - return false; -} - -bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - MachineCodeEmitter &MCE) { - PM.add(createX86CodeEmitterPass(*this, MCE)); - return false; -} - -bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - JITCodeEmitter &JCE) { - PM.add(createX86JITCodeEmitterPass(*this, JCE)); - return false; -} - -bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - ObjectCodeEmitter &OCE) { - PM.add(createX86ObjectCodeEmitterPass(*this, OCE)); - return false; -} - void X86TargetMachine::setCodeModelForStatic() { if (getCodeModel() != CodeModel::Default) return; @@ -246,32 +199,3 @@ void X86TargetMachine::setCodeModelForJIT() { else setCodeModel(CodeModel::Small); } - -/// getLSDAEncoding - Returns the LSDA pointer encoding. The choices are 4-byte, -/// 8-byte, and target default. The CIE is hard-coded to indicate that the LSDA -/// pointer in the FDE section is an "sdata4", and should be encoded as a 4-byte -/// pointer by default. However, some systems may require a different size due -/// to bugs or other conditions. We will default to a 4-byte encoding unless the -/// system tells us otherwise. -/// -/// The issue is when the CIE says their is an LSDA. That mandates that every -/// FDE have an LSDA slot. But if the function does not need an LSDA. There -/// needs to be some way to signify there is none. The LSDA is encoded as -/// pc-rel. But you don't look for some magic value after adding the pc. You -/// have to look for a zero before adding the pc. The problem is that the size -/// of the zero to look for depends on the encoding. The unwinder bug in SL is -/// that it always checks for a pointer-size zero. So on x86_64 it looks for 8 -/// bytes of zero. If you have an LSDA, it works fine since the 8-bytes are -/// non-zero so it goes ahead and then reads the value based on the encoding. -/// But if you use sdata4 and there is no LSDA, then the test for zero gives a -/// false negative and the unwinder thinks there is an LSDA. -/// -/// FIXME: This call-back isn't good! We should be using the correct encoding -/// regardless of the system. However, there are some systems which have bugs -/// that prevent this from occuring. -DwarfLSDAEncoding::Encoding X86TargetMachine::getLSDAEncoding() const { - if (Subtarget.isTargetDarwin() && Subtarget.getDarwinVers() != 10) - return DwarfLSDAEncoding::Default; - - return DwarfLSDAEncoding::EightByte; -} diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index d05bebd..2bb5454 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -62,37 +62,12 @@ public: return Subtarget.isTargetELF() ? &ELFWriterInfo : 0; } - /// getLSDAEncoding - Returns the LSDA pointer encoding. The choices are - /// 4-byte, 8-byte, and target default. The CIE is hard-coded to indicate that - /// the LSDA pointer in the FDE section is an "sdata4", and should be encoded - /// as a 4-byte pointer by default. However, some systems may require a - /// different size due to bugs or other conditions. We will default to a - /// 4-byte encoding unless the system tells us otherwise. - /// - /// FIXME: This call-back isn't good! We should be using the correct encoding - /// regardless of the system. However, there are some systems which have bugs - /// that prevent this from occuring. - virtual DwarfLSDAEncoding::Encoding getLSDAEncoding() const; - // Set up the pass pipeline. virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - MachineCodeEmitter &MCE); - virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, JITCodeEmitter &JCE); - virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - ObjectCodeEmitter &OCE); - virtual bool addSimpleCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - MachineCodeEmitter &MCE); - virtual bool addSimpleCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - JITCodeEmitter &JCE); - virtual bool addSimpleCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - ObjectCodeEmitter &OCE); }; /// X86_32TargetMachine - X86 32-bit target machine. diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp index 41ad153..d1ee3fc 100644 --- a/lib/Target/X86/X86TargetObjectFile.cpp +++ b/lib/Target/X86/X86TargetObjectFile.cpp @@ -7,60 +7,176 @@ // //===----------------------------------------------------------------------===// +#include "X86MCTargetExpr.h" #include "X86TargetObjectFile.h" +#include "X86TargetMachine.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" #include "llvm/Target/Mangler.h" #include "llvm/ADT/SmallString.h" +#include "llvm/Support/Dwarf.h" using namespace llvm; +using namespace dwarf; const MCExpr *X8632_MachoTargetObjectFile:: getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI, - bool &IsIndirect, bool &IsPCRel) const { + MachineModuleInfo *MMI, unsigned Encoding) const { // The mach-o version of this method defaults to returning a stub reference. - IsIndirect = true; - IsPCRel = false; - - - MachineModuleInfoMachO &MachOMMI = - MMI->getObjFileInfo<MachineModuleInfoMachO>(); - - // FIXME: Use GetSymbolWithGlobalValueBase. - SmallString<128> Name; - Mang->getNameWithPrefix(Name, GV, true); - Name += "$non_lazy_ptr"; - - // Add information about the stub reference to MachOMMI so that the stub gets - // emitted by the asmprinter. - MCSymbol *Sym = getContext().GetOrCreateSymbol(Name.str()); - const MCSymbol *&StubSym = MachOMMI.getGVStubEntry(Sym); - if (StubSym == 0) { - Name.clear(); - Mang->getNameWithPrefix(Name, GV, false); - StubSym = getContext().GetOrCreateSymbol(Name.str()); + + if (Encoding & DW_EH_PE_indirect) { + MachineModuleInfoMachO &MachOMMI = + MMI->getObjFileInfo<MachineModuleInfoMachO>(); + + SmallString<128> Name; + Mang->getNameWithPrefix(Name, GV, true); + Name += "$non_lazy_ptr"; + + // Add information about the stub reference to MachOMMI so that the stub + // gets emitted by the asmprinter. + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name.str()); + MCSymbol *&StubSym = MachOMMI.getGVStubEntry(Sym); + if (StubSym == 0) { + Name.clear(); + Mang->getNameWithPrefix(Name, GV, false); + StubSym = getContext().GetOrCreateSymbol(Name.str()); + } + + return TargetLoweringObjectFile:: + getSymbolForDwarfReference(Sym, MMI, + Encoding & ~dwarf::DW_EH_PE_indirect); } - - return MCSymbolRefExpr::Create(Sym, getContext()); + + return TargetLoweringObjectFileMachO:: + getSymbolForDwarfGlobalReference(GV, Mang, MMI, Encoding); } const MCExpr *X8664_MachoTargetObjectFile:: getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI, - bool &IsIndirect, bool &IsPCRel) const { - + MachineModuleInfo *MMI, unsigned Encoding) const { + // On Darwin/X86-64, we can reference dwarf symbols with foo@GOTPCREL+4, which // is an indirect pc-relative reference. - IsIndirect = true; - IsPCRel = true; - - SmallString<128> Name; - Mang->getNameWithPrefix(Name, GV, false); - Name += "@GOTPCREL"; - const MCExpr *Res = - MCSymbolRefExpr::Create(Name.str(), getContext()); - const MCExpr *Four = MCConstantExpr::Create(4, getContext()); - return MCBinaryExpr::CreateAdd(Res, Four, getContext()); + if (Encoding & (DW_EH_PE_indirect | DW_EH_PE_pcrel)) { + SmallString<128> Name; + Mang->getNameWithPrefix(Name, GV, false); + const MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); + const MCExpr *Res = + X86MCTargetExpr::Create(Sym, X86MCTargetExpr::GOTPCREL, getContext()); + const MCExpr *Four = MCConstantExpr::Create(4, getContext()); + return MCBinaryExpr::CreateAdd(Res, Four, getContext()); + } + + return TargetLoweringObjectFileMachO:: + getSymbolForDwarfGlobalReference(GV, Mang, MMI, Encoding); } +unsigned X8632_ELFTargetObjectFile::getPersonalityEncoding() const { + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4; + else + return DW_EH_PE_absptr; +} + +unsigned X8632_ELFTargetObjectFile::getLSDAEncoding() const { + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_pcrel | DW_EH_PE_sdata4; + else + return DW_EH_PE_absptr; +} + +unsigned X8632_ELFTargetObjectFile::getFDEEncoding() const { + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_pcrel | DW_EH_PE_sdata4; + else + return DW_EH_PE_absptr; +} + +unsigned X8632_ELFTargetObjectFile::getTTypeEncoding() const { + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4; + else + return DW_EH_PE_absptr; +} + +unsigned X8664_ELFTargetObjectFile::getPersonalityEncoding() const { + CodeModel::Model Model = TM.getCodeModel(); + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_indirect | DW_EH_PE_pcrel | (Model == CodeModel::Small || + Model == CodeModel::Medium ? + DW_EH_PE_sdata4 : DW_EH_PE_sdata8); + + if (Model == CodeModel::Small || Model == CodeModel::Medium) + return DW_EH_PE_udata4; + + return DW_EH_PE_absptr; +} + +unsigned X8664_ELFTargetObjectFile::getLSDAEncoding() const { + CodeModel::Model Model = TM.getCodeModel(); + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_pcrel | (Model == CodeModel::Small ? + DW_EH_PE_sdata4 : DW_EH_PE_sdata8); + + if (Model == CodeModel::Small) + return DW_EH_PE_udata4; + + return DW_EH_PE_absptr; +} + +unsigned X8664_ELFTargetObjectFile::getFDEEncoding() const { + CodeModel::Model Model = TM.getCodeModel(); + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_pcrel | (Model == CodeModel::Small || + Model == CodeModel::Medium ? + DW_EH_PE_sdata4 : DW_EH_PE_sdata8); + + if (Model == CodeModel::Small || Model == CodeModel::Medium) + return DW_EH_PE_udata4; + + return DW_EH_PE_absptr; +} + +unsigned X8664_ELFTargetObjectFile::getTTypeEncoding() const { + CodeModel::Model Model = TM.getCodeModel(); + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_indirect | DW_EH_PE_pcrel | (Model == CodeModel::Small || + Model == CodeModel::Medium ? + DW_EH_PE_sdata4 : DW_EH_PE_sdata8); + + if (Model == CodeModel::Small) + return DW_EH_PE_udata4; + + return DW_EH_PE_absptr; +} + +unsigned X8632_MachoTargetObjectFile::getPersonalityEncoding() const { + return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4; +} + +unsigned X8632_MachoTargetObjectFile::getLSDAEncoding() const { + return DW_EH_PE_pcrel | DW_EH_PE_sdata4; +} + +unsigned X8632_MachoTargetObjectFile::getFDEEncoding() const { + return DW_EH_PE_pcrel | DW_EH_PE_sdata4; +} + +unsigned X8632_MachoTargetObjectFile::getTTypeEncoding() const { + return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4; +} + +unsigned X8664_MachoTargetObjectFile::getPersonalityEncoding() const { + return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4; +} + +unsigned X8664_MachoTargetObjectFile::getLSDAEncoding() const { + return DW_EH_PE_pcrel | DW_EH_PE_sdata4; +} + +unsigned X8664_MachoTargetObjectFile::getFDEEncoding() const { + return DW_EH_PE_pcrel | DW_EH_PE_sdata4; +} + +unsigned X8664_MachoTargetObjectFile::getTTypeEncoding() const { + return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4; +} diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h index 377a93b..0fff194 100644 --- a/lib/Target/X86/X86TargetObjectFile.h +++ b/lib/Target/X86/X86TargetObjectFile.h @@ -10,21 +10,27 @@ #ifndef LLVM_TARGET_X86_TARGETOBJECTFILE_H #define LLVM_TARGET_X86_TARGETOBJECTFILE_H +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetLoweringObjectFile.h" namespace llvm { - + class X86TargetMachine; + /// X8632_MachoTargetObjectFile - This TLOF implementation is used for /// Darwin/x86-32. class X8632_MachoTargetObjectFile : public TargetLoweringObjectFileMachO { public: - + virtual const MCExpr * getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI, - bool &IsIndirect, bool &IsPCRel) const; + MachineModuleInfo *MMI, unsigned Encoding) const; + virtual unsigned getPersonalityEncoding() const; + virtual unsigned getLSDAEncoding() const; + virtual unsigned getFDEEncoding() const; + virtual unsigned getTTypeEncoding() const; }; - + /// X8664_MachoTargetObjectFile - This TLOF implementation is used for /// Darwin/x86-64. class X8664_MachoTargetObjectFile : public TargetLoweringObjectFileMachO { @@ -32,9 +38,35 @@ namespace llvm { virtual const MCExpr * getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI, - bool &IsIndirect, bool &IsPCRel) const; + MachineModuleInfo *MMI, unsigned Encoding) const; + virtual unsigned getPersonalityEncoding() const; + virtual unsigned getLSDAEncoding() const; + virtual unsigned getFDEEncoding() const; + virtual unsigned getTTypeEncoding() const; + }; + + class X8632_ELFTargetObjectFile : public TargetLoweringObjectFileELF { + const X86TargetMachine &TM; + public: + X8632_ELFTargetObjectFile(const X86TargetMachine &tm) + :TM(tm) { }; + virtual unsigned getPersonalityEncoding() const; + virtual unsigned getLSDAEncoding() const; + virtual unsigned getFDEEncoding() const; + virtual unsigned getTTypeEncoding() const; }; + + class X8664_ELFTargetObjectFile : public TargetLoweringObjectFileELF { + const X86TargetMachine &TM; + public: + X8664_ELFTargetObjectFile(const X86TargetMachine &tm) + :TM(tm) { }; + virtual unsigned getPersonalityEncoding() const; + virtual unsigned getLSDAEncoding() const; + virtual unsigned getFDEEncoding() const; + virtual unsigned getTTypeEncoding() const; + }; + } // end namespace llvm #endif diff --git a/lib/Target/XCore/AsmPrinter/Makefile b/lib/Target/XCore/AsmPrinter/Makefile index f0e883e..82dc1df 100644 --- a/lib/Target/XCore/AsmPrinter/Makefile +++ b/lib/Target/XCore/AsmPrinter/Makefile @@ -9,7 +9,6 @@ LEVEL = ../../../.. LIBRARYNAME = LLVMXCoreAsmPrinter -CXXFLAGS = -fno-rtti # Hack: we need to include 'main' XCore target directory to grab private headers CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. diff --git a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp index 40d7160..d18f55d 100644 --- a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp +++ b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp @@ -32,7 +32,6 @@ #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetRegistry.h" -#include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" @@ -42,8 +41,6 @@ #include <cctype> using namespace llvm; -STATISTIC(EmittedInsts, "Number of machine instrs printed"); - static cl::opt<unsigned> MaxThreads("xcore-max-threads", cl::Optional, cl::desc("Maximum number of threads (for emulation thread-local storage)"), cl::Hidden, @@ -55,8 +52,9 @@ namespace { const XCoreSubtarget &Subtarget; public: explicit XCoreAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, - const MCAsmInfo *T, bool V) - : AsmPrinter(O, TM, T, V), + MCContext &Ctx, MCStreamer &Streamer, + const MCAsmInfo *T) + : AsmPrinter(O, TM, Ctx, Streamer, T), Subtarget(TM.getSubtarget<XCoreSubtarget>()) {} virtual const char *getPassName() const { @@ -74,13 +72,13 @@ namespace { virtual void EmitGlobalVariable(const GlobalVariable *GV); void emitFunctionStart(MachineFunction &MF); - void emitFunctionEnd(MachineFunction &MF); void printInstruction(const MachineInstr *MI); // autogenerated. static const char *getRegisterName(unsigned RegNo); - void printMachineInstruction(const MachineInstr *MI); - bool runOnMachineFunction(MachineFunction &F); + bool runOnMachineFunction(MachineFunction &MF); + void EmitInstruction(const MachineInstr *MI); + void EmitFunctionBodyEnd(); void getAnalysisUsage(AnalysisUsage &AU) const { AsmPrinter::getAnalysisUsage(AU); @@ -106,7 +104,7 @@ void XCoreAsmPrinter::emitArrayBound(const MCSymbol *Sym, cast<PointerType>(GV->getType())->getElementType())) { O << MAI->getGlobalDirective() << *Sym; O << ".globound" << "\n"; - O << MAI->getSetDirective() << *Sym; + O << "\t.set\t" << *Sym; O << ".globound" << "," << ATy->getNumElements() << "\n"; if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage()) { // TODO Use COMDAT groups for LinkOnceLinkage @@ -150,8 +148,6 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { case GlobalValue::PrivateLinkage: case GlobalValue::LinkerPrivateLinkage: break; - case GlobalValue::GhostLinkage: - llvm_unreachable("Should not have any unmaterialized functions!"); case GlobalValue::DLLImportLinkage: llvm_unreachable("DLLImport linkage is not supported by this target!"); case GlobalValue::DLLExportLinkage: @@ -222,26 +218,22 @@ void XCoreAsmPrinter::emitFunctionStart(MachineFunction &MF) { O << *CurrentFnSym << ":\n"; } -/// Emit the directives on the end of functions -void XCoreAsmPrinter::emitFunctionEnd(MachineFunction &MF) { - // Mark the end of the function + +/// EmitFunctionBodyEnd - Targets can override this to emit stuff after +/// the last basic block in the function. +void XCoreAsmPrinter::EmitFunctionBodyEnd() { + // Emit function end directives O << "\t.cc_bottom " << *CurrentFnSym << ".function\n"; } /// runOnMachineFunction - This uses the printMachineInstruction() /// method to print assembly for each instruction. /// -bool XCoreAsmPrinter::runOnMachineFunction(MachineFunction &MF) -{ - this->MF = &MF; - +bool XCoreAsmPrinter::runOnMachineFunction(MachineFunction &MF) { SetupMachineFunction(MF); // Print out constants referenced by the function - EmitConstantPool(MF.getConstantPool()); - - // Print out jump tables referenced by the function - EmitJumpTableInfo(MF.getJumpTableInfo(), MF); + EmitConstantPool(); // Emit the function start directives emitFunctionStart(MF); @@ -249,32 +241,7 @@ bool XCoreAsmPrinter::runOnMachineFunction(MachineFunction &MF) // Emit pre-function debug information. DW->BeginFunction(&MF); - // Print out code for the function. - for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); - I != E; ++I) { - - // Print a label for the basic block. - if (I != MF.begin()) { - EmitBasicBlockStart(I); - } - - for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); - II != E; ++II) { - // Print the assembly for the instruction. - printMachineInstruction(II); - } - - // Each Basic Block is separated by a newline - O << '\n'; - } - - // Emit function end directives - emitFunctionEnd(MF); - - // Emit post-function debug information. - DW->EndFunction(&MF); - - // We didn't modify anything. + EmitFunctionBody(); return false; } @@ -300,7 +267,7 @@ void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum) { O << MO.getImm(); break; case MachineOperand::MO_MachineBasicBlock: - O << *GetMBBSymbol(MO.getMBB()->getNumber()); + O << *MO.getMBB()->getSymbol(OutContext); break; case MachineOperand::MO_GlobalAddress: O << *GetGlobalValueSymbol(MO.getGlobal()); @@ -333,24 +300,17 @@ bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, return false; } -void XCoreAsmPrinter::printMachineInstruction(const MachineInstr *MI) { - ++EmittedInsts; - - processDebugLoc(MI, true); - +void XCoreAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Check for mov mnemonic unsigned src, dst, srcSR, dstSR; if (TM.getInstrInfo()->isMoveInstr(*MI, src, dst, srcSR, dstSR)) { O << "\tmov " << getRegisterName(dst) << ", "; - O << getRegisterName(src) << '\n'; + O << getRegisterName(src); + OutStreamer.AddBlankLine(); return; } printInstruction(MI); - if (VerboseAsm) - EmitComments(*MI); - O << '\n'; - - processDebugLoc(MI, false); + OutStreamer.AddBlankLine(); } // Force static initialization. diff --git a/lib/Target/XCore/Makefile b/lib/Target/XCore/Makefile index 3bb127f..1b70974 100644 --- a/lib/Target/XCore/Makefile +++ b/lib/Target/XCore/Makefile @@ -10,7 +10,6 @@ LEVEL = ../../.. LIBRARYNAME = LLVMXCoreCodeGen TARGET = XCore -CXXFLAGS = -fno-rtti # Make sure that tblgen is run, first thing. BUILT_SOURCES = XCoreGenRegisterInfo.h.inc XCoreGenRegisterNames.inc \ diff --git a/lib/Target/XCore/TargetInfo/Makefile b/lib/Target/XCore/TargetInfo/Makefile index 83bba13..f8a4095 100644 --- a/lib/Target/XCore/TargetInfo/Makefile +++ b/lib/Target/XCore/TargetInfo/Makefile @@ -9,7 +9,6 @@ LEVEL = ../../../.. LIBRARYNAME = LLVMXCoreInfo -CXXFLAGS = -fno-rtti # Hack: we need to include 'main' target directory to grab private headers CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 6849e0b..57fd43b 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -390,7 +390,12 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) if (Offset % 4 == 0) { // We've managed to infer better alignment information than the load // already has. Use an aligned load. - return DAG.getLoad(getPointerTy(), dl, Chain, BasePtr, NULL, 4); + // + // FIXME: No new alignment information is actually passed here. + // Should the offset really be 4? + // + return DAG.getLoad(getPointerTy(), dl, Chain, BasePtr, NULL, 4, + false, false, 0); } // Lower to // ldw low, base[offset >> 2] @@ -407,9 +412,9 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Base, HighOffset); SDValue Low = DAG.getLoad(getPointerTy(), dl, Chain, - LowAddr, NULL, 4); + LowAddr, NULL, 4, false, false, 0); SDValue High = DAG.getLoad(getPointerTy(), dl, Chain, - HighAddr, NULL, 4); + HighAddr, NULL, 4, false, false, 0); SDValue LowShifted = DAG.getNode(ISD::SRL, dl, MVT::i32, Low, LowShift); SDValue HighShifted = DAG.getNode(ISD::SHL, dl, MVT::i32, High, HighShift); SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, LowShifted, HighShifted); @@ -423,12 +428,13 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) int SVOffset = LD->getSrcValueOffset(); SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain, BasePtr, LD->getSrcValue(), SVOffset, MVT::i16, - LD->isVolatile(), 2); + LD->isVolatile(), LD->isNonTemporal(), 2); SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr, DAG.getConstant(2, MVT::i32)); SDValue High = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::i32, Chain, HighAddr, LD->getSrcValue(), SVOffset + 2, - MVT::i16, LD->isVolatile(), 2); + MVT::i16, LD->isVolatile(), + LD->isNonTemporal(), 2); SDValue HighShifted = DAG.getNode(ISD::SHL, dl, MVT::i32, High, DAG.getConstant(16, MVT::i32)); SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, Low, HighShifted); @@ -487,12 +493,14 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) DAG.getConstant(16, MVT::i32)); SDValue StoreLow = DAG.getTruncStore(Chain, dl, Low, BasePtr, ST->getSrcValue(), SVOffset, MVT::i16, - ST->isVolatile(), 2); + ST->isVolatile(), ST->isNonTemporal(), + 2); SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr, DAG.getConstant(2, MVT::i32)); SDValue StoreHigh = DAG.getTruncStore(Chain, dl, High, HighAddr, ST->getSrcValue(), SVOffset + 2, - MVT::i16, ST->isVolatile(), 2); + MVT::i16, ST->isVolatile(), + ST->isNonTemporal(), 2); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StoreLow, StoreHigh); } @@ -561,15 +569,16 @@ LowerVAARG(SDValue Op, SelectionDAG &DAG) const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); EVT VT = Node->getValueType(0); SDValue VAList = DAG.getLoad(getPointerTy(), dl, Node->getOperand(0), - Node->getOperand(1), V, 0); + Node->getOperand(1), V, 0, false, false, 0); // Increment the pointer, VAList, to the next vararg SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, getPointerTy(), VAList, DAG.getConstant(VT.getSizeInBits(), getPointerTy())); // Store the incremented VAList to the legalized pointer - Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Node->getOperand(1), V, 0); + Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Node->getOperand(1), V, 0, + false, false, 0); // Load the actual argument out of the pointer VAList - return DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0); + return DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0, false, false, 0); } SDValue XCoreTargetLowering:: @@ -582,7 +591,8 @@ LowerVASTART(SDValue Op, SelectionDAG &DAG) XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>(); SDValue Addr = DAG.getFrameIndex(XFI->getVarArgsFrameIndex(), MVT::i32); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); - return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1), SV, 0); + return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1), SV, 0, + false, false, 0); } SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { @@ -611,11 +621,13 @@ SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { SDValue XCoreTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool isTailCall, + bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) { + // XCore target does not yet support tail call optimization. + isTailCall = false; // For now, only CallingConv::C implemented switch (CallConv) @@ -875,7 +887,8 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, // Create the SelectionDAG nodes corresponding to a load //from this parameter SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); - InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, NULL, 0)); + InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, NULL, 0, + false, false, 0)); } } @@ -906,7 +919,8 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, RegInfo.addLiveIn(ArgRegs[i], VReg); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); // Move argument from virt reg -> stack - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0, + false, false, 0); MemOps.push_back(Store); } if (!MemOps.empty()) diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index f86be5e..5095f36 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -28,7 +28,7 @@ namespace llvm { namespace XCoreISD { enum NodeType { // Start the numbering where the builtin ops and target ops leave off. - FIRST_NUMBER = ISD::BUILTIN_OP_END+XCore::INSTRUCTION_LIST_END, + FIRST_NUMBER = ISD::BUILTIN_OP_END, // Branch and link (call) BL, @@ -149,7 +149,7 @@ namespace llvm { virtual SDValue LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool isTailCall, + bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index d4ae49e..10dc18c 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -686,7 +686,7 @@ def LDAP_lu10_ba : _FLU10<(outs), [(set R11, (pcrelwrapper tblockaddress:$addr))]>; let isCall=1, -// All calls clobber the the link register and the non-callee-saved registers: +// All calls clobber the link register and the non-callee-saved registers: Defs = [R0, R1, R2, R3, R11, LR] in { def BL_u10 : _FU10< (outs), @@ -779,7 +779,7 @@ def ECALLF_1r : _F1R<(outs), (ins GRRegs:$src), []>; let isCall=1, -// All calls clobber the the link register and the non-callee-saved registers: +// All calls clobber the link register and the non-callee-saved registers: Defs = [R0, R1, R2, R3, R11, LR] in { def BLA_1r : _F1R<(outs), (ins GRRegs:$addr, variable_ops), "bla $addr", diff --git a/lib/Target/XCore/XCoreMCAsmInfo.cpp b/lib/Target/XCore/XCoreMCAsmInfo.cpp index dffdda9..bf78575 100644 --- a/lib/Target/XCore/XCoreMCAsmInfo.cpp +++ b/lib/Target/XCore/XCoreMCAsmInfo.cpp @@ -22,7 +22,6 @@ XCoreMCAsmInfo::XCoreMCAsmInfo(const Target &T, const StringRef &TT) { AscizDirective = ".asciiz"; WeakDefDirective = "\t.weak\t"; WeakRefDirective = "\t.weak\t"; - SetDirective = "\t.set\t"; // Debug HasLEB128 = true; diff --git a/lib/Target/XCore/XCoreTargetObjectFile.h b/lib/Target/XCore/XCoreTargetObjectFile.h index 7efb990..7424c78 100644 --- a/lib/Target/XCore/XCoreTargetObjectFile.h +++ b/lib/Target/XCore/XCoreTargetObjectFile.h @@ -10,13 +10,12 @@ #ifndef LLVM_TARGET_XCORE_TARGETOBJECTFILE_H #define LLVM_TARGET_XCORE_TARGETOBJECTFILE_H -#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" namespace llvm { class XCoreTargetObjectFile : public TargetLoweringObjectFileELF { public: - void Initialize(MCContext &Ctx, const TargetMachine &TM); // TODO: Classify globals as xcore wishes. diff --git a/lib/Transforms/Hello/Makefile b/lib/Transforms/Hello/Makefile index 46f8098..c5e75d4 100644 --- a/lib/Transforms/Hello/Makefile +++ b/lib/Transforms/Hello/Makefile @@ -11,7 +11,6 @@ LEVEL = ../../.. LIBRARYNAME = LLVMHello LOADABLE_MODULE = 1 USEDLIBS = -CXXFLAGS = -fno-rtti include $(LEVEL)/Makefile.common diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index d8190a4..325d353 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -247,7 +247,7 @@ static bool PrefixIn(const ArgPromotion::IndicesVector &Indices, return Low != Set.end() && IsPrefix(*Low, Indices); } -/// Mark the given indices (ToMark) as safe in the the given set of indices +/// Mark the given indices (ToMark) as safe in the given set of indices /// (Safe). Marking safe usually means adding ToMark to Safe. However, if there /// is already a prefix of Indices in Safe, Indices are implicitely marked safe /// already. Furthermore, any indices that Indices is itself a prefix of, are diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp index 4972687..3c05f88 100644 --- a/lib/Transforms/IPO/ConstantMerge.cpp +++ b/lib/Transforms/IPO/ConstantMerge.cpp @@ -19,10 +19,11 @@ #define DEBUG_TYPE "constmerge" #include "llvm/Transforms/IPO.h" +#include "llvm/DerivedTypes.h" #include "llvm/Module.h" #include "llvm/Pass.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Statistic.h" -#include <map> using namespace llvm; STATISTIC(NumMerged, "Number of global constants merged"); @@ -48,10 +49,10 @@ ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); } bool ConstantMerge::runOnModule(Module &M) { // Map unique constant/section pairs to globals. We don't want to merge // globals in different sections. - std::map<std::pair<Constant*, std::string>, GlobalVariable*> CMap; + DenseMap<Constant*, GlobalVariable*> CMap; // Replacements - This vector contains a list of replacements to perform. - std::vector<std::pair<GlobalVariable*, GlobalVariable*> > Replacements; + SmallVector<std::pair<GlobalVariable*, GlobalVariable*>, 32> Replacements; bool MadeChange = false; @@ -76,19 +77,21 @@ bool ConstantMerge::runOnModule(Module &M) { continue; } - // Only process constants with initializers. - if (GV->isConstant() && GV->hasDefinitiveInitializer()) { - Constant *Init = GV->getInitializer(); - - // Check to see if the initializer is already known. - GlobalVariable *&Slot = CMap[std::make_pair(Init, GV->getSection())]; - - if (Slot == 0) { // Nope, add it to the map. - Slot = GV; - } else if (GV->hasLocalLinkage()) { // Yup, this is a duplicate! - // Make all uses of the duplicate constant use the canonical version. - Replacements.push_back(std::make_pair(GV, Slot)); - } + // Only process constants with initializers in the default addres space. + if (!GV->isConstant() ||!GV->hasDefinitiveInitializer() || + GV->getType()->getAddressSpace() != 0 || !GV->getSection().empty()) + continue; + + Constant *Init = GV->getInitializer(); + + // Check to see if the initializer is already known. + GlobalVariable *&Slot = CMap[Init]; + + if (Slot == 0) { // Nope, add it to the map. + Slot = GV; + } else if (GV->hasLocalLinkage()) { // Yup, this is a duplicate! + // Make all uses of the duplicate constant use the canonical version. + Replacements.push_back(std::make_pair(GV, Slot)); } } @@ -100,11 +103,11 @@ bool ConstantMerge::runOnModule(Module &M) { // now. This avoid invalidating the pointers in CMap, which are unneeded // now. for (unsigned i = 0, e = Replacements.size(); i != e; ++i) { - // Eliminate any uses of the dead global... + // Eliminate any uses of the dead global. Replacements[i].first->replaceAllUsesWith(Replacements[i].second); - // Delete the global value from the module... - M.getGlobalList().erase(Replacements[i].first); + // Delete the global value from the module. + Replacements[i].first->eraseFromParent(); } NumMerged += Replacements.size(); diff --git a/lib/Transforms/IPO/DeadTypeElimination.cpp b/lib/Transforms/IPO/DeadTypeElimination.cpp index 025d77e..662fbb5 100644 --- a/lib/Transforms/IPO/DeadTypeElimination.cpp +++ b/lib/Transforms/IPO/DeadTypeElimination.cpp @@ -57,13 +57,13 @@ ModulePass *llvm::createDeadTypeEliminationPass() { // static inline bool ShouldNukeSymtabEntry(const Type *Ty){ // Nuke all names for primitive types! - if (Ty->isPrimitiveType() || Ty->isInteger()) + if (Ty->isPrimitiveType() || Ty->isIntegerTy()) return true; // Nuke all pointers to primitive types as well... if (const PointerType *PT = dyn_cast<PointerType>(Ty)) if (PT->getElementType()->isPrimitiveType() || - PT->getElementType()->isInteger()) + PT->getElementType()->isIntegerTy()) return true; return false; diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index ee260e9..df060eb 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -638,8 +638,8 @@ static bool AllUsesOfValueWillTrapIfNull(Value *V, } else if (PHINode *PN = dyn_cast<PHINode>(*UI)) { // If we've already seen this phi node, ignore it, it has already been // checked. - if (PHIs.insert(PN)) - return AllUsesOfValueWillTrapIfNull(PN, PHIs); + if (PHIs.insert(PN) && !AllUsesOfValueWillTrapIfNull(PN, PHIs)) + return false; } else if (isa<ICmpInst>(*UI) && isa<ConstantPointerNull>(UI->getOperand(1))) { // Ignore setcc X, null @@ -1590,7 +1590,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { // simplification. In these cases, we typically end up with "cond ? v1 : v2" // where v1 and v2 both require constant pool loads, a big loss. if (GVElType == Type::getInt1Ty(GV->getContext()) || - GVElType->isFloatingPoint() || + GVElType->isFloatingPointTy() || isa<PointerType>(GVElType) || isa<VectorType>(GVElType)) return false; @@ -1925,7 +1925,7 @@ GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) { if (!ATy) return 0; const StructType *STy = dyn_cast<StructType>(ATy->getElementType()); if (!STy || STy->getNumElements() != 2 || - !STy->getElementType(0)->isInteger(32)) return 0; + !STy->getElementType(0)->isIntegerTy(32)) return 0; const PointerType *PFTy = dyn_cast<PointerType>(STy->getElementType(1)); if (!PFTy) return 0; const FunctionType *FTy = dyn_cast<FunctionType>(PFTy->getElementType()); diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 0990278..752a97c 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -38,8 +38,15 @@ STATISTIC(NumDeleted, "Number of functions deleted because all callers found"); STATISTIC(NumMergedAllocas, "Number of allocas merged together"); static cl::opt<int> -InlineLimit("inline-threshold", cl::Hidden, cl::init(200), cl::ZeroOrMore, - cl::desc("Control the amount of inlining to perform (default = 200)")); +InlineLimit("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, + cl::desc("Control the amount of inlining to perform (default = 225)")); + +static cl::opt<int> +HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), + cl::desc("Threshold for inlining functions with inline hint")); + +// Threshold to use when optsize is specified (and there is no -inline-limit). +const int OptSizeThreshold = 75; Inliner::Inliner(void *ID) : CallGraphSCCPass(ID), InlineThreshold(InlineLimit) {} @@ -172,13 +179,23 @@ static bool InlineCallIfPossible(CallSite CS, CallGraph &CG, return true; } -unsigned Inliner::getInlineThreshold(Function* Caller) const { +unsigned Inliner::getInlineThreshold(CallSite CS) const { + int thres = InlineThreshold; + + // Listen to optsize when -inline-limit is not given. + Function *Caller = CS.getCaller(); if (Caller && !Caller->isDeclaration() && Caller->hasFnAttr(Attribute::OptimizeForSize) && InlineLimit.getNumOccurrences() == 0) - return 50; - else - return InlineThreshold; + thres = OptSizeThreshold; + + // Listen to inlinehint when it would increase the threshold. + Function *Callee = CS.getCalledFunction(); + if (HintThreshold > thres && Callee && !Callee->isDeclaration() && + Callee->hasFnAttr(Attribute::InlineHint)) + thres = HintThreshold; + + return thres; } /// shouldInline - Return true if the inliner should attempt to inline @@ -200,7 +217,7 @@ bool Inliner::shouldInline(CallSite CS) { int Cost = IC.getValue(); Function *Caller = CS.getCaller(); - int CurrentThreshold = getInlineThreshold(Caller); + int CurrentThreshold = getInlineThreshold(CS); float FudgeFactor = getInlineFudgeFactor(CS); if (Cost >= (int)(CurrentThreshold * FudgeFactor)) { DEBUG(dbgs() << " NOT Inlining: cost=" << Cost @@ -236,8 +253,7 @@ bool Inliner::shouldInline(CallSite CS) { outerCallsFound = true; int Cost2 = IC2.getValue(); - Function *Caller2 = CS2.getCaller(); - int CurrentThreshold2 = getInlineThreshold(Caller2); + int CurrentThreshold2 = getInlineThreshold(CS2); float FudgeFactor2 = getInlineFudgeFactor(CS2); if (Cost2 >= (int)(CurrentThreshold2 * FudgeFactor2)) diff --git a/lib/Transforms/IPO/Makefile b/lib/Transforms/IPO/Makefile index fd018c4..5c42374 100644 --- a/lib/Transforms/IPO/Makefile +++ b/lib/Transforms/IPO/Makefile @@ -10,7 +10,6 @@ LEVEL = ../../.. LIBRARYNAME = LLVMipo BUILD_ARCHIVE = 1 -CXXFLAGS = -fno-rtti include $(LEVEL)/Makefile.common diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index fa8845b..b07e22c 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -467,7 +467,6 @@ static LinkageCategory categorize(const Function *F) { case GlobalValue::AppendingLinkage: case GlobalValue::DLLImportLinkage: case GlobalValue::DLLExportLinkage: - case GlobalValue::GhostLinkage: case GlobalValue::CommonLinkage: return ExternalStrong; } diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index f40902f..f8ec722 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -117,7 +117,7 @@ Function* PartialInliner::unswitchFunction(Function* F) { DominatorTree DT; DT.runOnFunction(*duplicateFunction); - // Extract the body of the the if. + // Extract the body of the if. Function* extractedFunction = ExtractCodeRegion(DT, toExtract); // Inline the top-level if test into all callers. diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp index 0e0d83a..310e4a2 100644 --- a/lib/Transforms/IPO/StripSymbols.cpp +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -214,6 +214,15 @@ static bool StripDebugInfo(Module &M) { Changed = true; } + if (Function *DbgVal = M.getFunction("llvm.dbg.value")) { + while (!DbgVal->use_empty()) { + CallInst *CI = cast<CallInst>(DbgVal->use_back()); + CI->eraseFromParent(); + } + DbgVal->eraseFromParent(); + Changed = true; + } + NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv"); if (NMD) { Changed = true; diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index 5367900..09accb6 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -199,11 +199,12 @@ private: SmallVectorImpl<Value*> &NewIndices); Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI); - /// ValueRequiresCast - Return true if the cast from "V to Ty" actually - /// results in any code being generated. It does not require codegen if V is - /// simple enough or if the cast can be folded into other casts. - bool ValueRequiresCast(Instruction::CastOps opcode,const Value *V, - const Type *Ty); + /// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually + /// results in any code being generated and is interesting to optimize out. If + /// the cast can be eliminated by some other simple transformation, we prefer + /// to do the simplification first. + bool ShouldOptimizeCast(Instruction::CastOps opcode,const Value *V, + const Type *Ty); Instruction *visitCallSite(CallSite CS); bool transformConstExprCastCall(CallSite CS); diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 4891ff0..2da17f1 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -35,7 +35,7 @@ static Constant *SubOne(ConstantInt *C) { // Otherwise, return null. // static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) { - if (!V->hasOneUse() || !V->getType()->isInteger()) + if (!V->hasOneUse() || !V->getType()->isIntegerTy()) return 0; Instruction *I = dyn_cast<Instruction>(V); @@ -121,50 +121,34 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) { uint32_t TySizeBits = I.getType()->getScalarSizeInBits(); const APInt& RHSVal = cast<ConstantInt>(RHSC)->getValue(); - - uint32_t Size = TySizeBits / 2; - APInt C0080Val(APInt(TySizeBits, 1ULL).shl(Size - 1)); - APInt CFF80Val(-C0080Val); - do { - if (TySizeBits > Size) { - // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext. - // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext. - if ((RHSVal == CFF80Val && XorRHS->getValue() == C0080Val) || - (RHSVal == C0080Val && XorRHS->getValue() == CFF80Val)) { - // This is a sign extend if the top bits are known zero. - if (!MaskedValueIsZero(XorLHS, - APInt::getHighBitsSet(TySizeBits, TySizeBits - Size))) - Size = 0; // Not a sign ext, but can't be any others either. - break; - } - } - Size >>= 1; - C0080Val = APIntOps::lshr(C0080Val, Size); - CFF80Val = APIntOps::ashr(CFF80Val, Size); - } while (Size >= 1); - - // FIXME: This shouldn't be necessary. When the backends can handle types - // with funny bit widths then this switch statement should be removed. It - // is just here to get the size of the "middle" type back up to something - // that the back ends can handle. - const Type *MiddleType = 0; - switch (Size) { - default: break; - case 32: - case 16: - case 8: MiddleType = IntegerType::get(I.getContext(), Size); break; + unsigned ExtendAmt = 0; + // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext. + // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext. + if (XorRHS->getValue() == -RHSVal) { + if (RHSVal.isPowerOf2()) + ExtendAmt = TySizeBits - RHSVal.logBase2() - 1; + else if (XorRHS->getValue().isPowerOf2()) + ExtendAmt = TySizeBits - XorRHS->getValue().logBase2() - 1; + } + + if (ExtendAmt) { + APInt Mask = APInt::getHighBitsSet(TySizeBits, ExtendAmt); + if (!MaskedValueIsZero(XorLHS, Mask)) + ExtendAmt = 0; } - if (MiddleType) { - Value *NewTrunc = Builder->CreateTrunc(XorLHS, MiddleType, "sext"); - return new SExtInst(NewTrunc, I.getType(), I.getName()); + + if (ExtendAmt) { + Constant *ShAmt = ConstantInt::get(I.getType(), ExtendAmt); + Value *NewShl = Builder->CreateShl(XorLHS, ShAmt, "sext"); + return BinaryOperator::CreateAShr(NewShl, ShAmt); } } } - if (I.getType()->isInteger(1)) + if (I.getType()->isIntegerTy(1)) return BinaryOperator::CreateXor(LHS, RHS); - if (I.getType()->isInteger()) { + if (I.getType()->isIntegerTy()) { // X + X --> X << 1 if (LHS == RHS) return BinaryOperator::CreateShl(LHS, ConstantInt::get(I.getType(), 1)); @@ -184,7 +168,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // -A + B --> B - A // -A + -B --> -(A + B) if (Value *LHSV = dyn_castNegVal(LHS)) { - if (LHS->getType()->isIntOrIntVector()) { + if (LHS->getType()->isIntOrIntVectorTy()) { if (Value *RHSV = dyn_castNegVal(RHS)) { Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum"); return BinaryOperator::CreateNeg(NewAdd); @@ -238,7 +222,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { } // W*X + Y*Z --> W * (X+Z) iff W == Y - if (I.getType()->isIntOrIntVector()) { + if (I.getType()->isIntOrIntVectorTy()) { Value *W, *X, *Y, *Z; if (match(LHS, m_Mul(m_Value(W), m_Value(X))) && match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) { @@ -576,7 +560,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { return ReplaceInstUsesWith(I, Op0); // undef - X -> undef if (isa<UndefValue>(Op1)) return ReplaceInstUsesWith(I, Op1); // X - undef -> undef - if (I.getType()->isInteger(1)) + if (I.getType()->isIntegerTy(1)) return BinaryOperator::CreateXor(Op0, Op1); if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) { @@ -676,6 +660,13 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { return BinaryOperator::CreateSDiv(Op1I->getOperand(0), ConstantExpr::getNeg(DivRHS)); + // 0 - (C << X) -> (-C << X) + if (Op1I->getOpcode() == Instruction::Shl) + if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0)) + if (CSI->isZero()) + if (Value *ShlLHSNeg = dyn_castNegVal(Op1I->getOperand(0))) + return BinaryOperator::CreateShl(ShlLHSNeg, Op1I->getOperand(1)); + // X - X*C --> X * (1-C) ConstantInt *C2 = 0; if (dyn_castFoldableMul(Op1I, C2) == Op0) { diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index fa7bb12..5e47953 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -546,7 +546,7 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, std::swap(LHSCC, RHSCC); } - // At this point, we know we have have two icmp instructions + // At this point, we know we have two icmp instructions // comparing a value against two constants and and'ing the result // together. Because of the above check, we know that we only have // icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know @@ -932,24 +932,49 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0)) if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS)) return Res; - + + // If and'ing two fcmp, try combine them into one. + if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) + if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) + if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS)) + return Res; + + // fold (and (cast A), (cast B)) -> (cast (and A, B)) if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) - if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) - if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind ? - const Type *SrcTy = Op0C->getOperand(0)->getType(); - if (SrcTy == Op1C->getOperand(0)->getType() && - SrcTy->isIntOrIntVector() && - // Only do this if the casts both really cause code to be generated. - ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), - I.getType()) && - ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), - I.getType())) { - Value *NewOp = Builder->CreateAnd(Op0C->getOperand(0), - Op1C->getOperand(0), I.getName()); + if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) { + const Type *SrcTy = Op0C->getOperand(0)->getType(); + if (Op0C->getOpcode() == Op1C->getOpcode() && // same cast kind ? + SrcTy == Op1C->getOperand(0)->getType() && + SrcTy->isIntOrIntVectorTy()) { + Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0); + + // Only do this if the casts both really cause code to be generated. + if (ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) && + ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) { + Value *NewOp = Builder->CreateAnd(Op0COp, Op1COp, I.getName()); return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); } + + // If this is and(cast(icmp), cast(icmp)), try to fold this even if the + // cast is otherwise not optimizable. This happens for vector sexts. + if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp)) + if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp)) + if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS)) { + InsertNewInstBefore(Res, I); + return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); + } + + // If this is and(cast(fcmp), cast(fcmp)), try to fold this even if the + // cast is otherwise not optimizable. This happens for vector sexts. + if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp)) + if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp)) + if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS)) { + InsertNewInstBefore(Res, I); + return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); + } } + } // (X >> Z) & (Y >> Z) -> (X&Y) >> Z for all shifts. if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) { @@ -965,13 +990,6 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { } } - // If and'ing two fcmp, try combine them into one. - if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) { - if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) - if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS)) - return Res; - } - return Changed ? &I : 0; } @@ -1142,18 +1160,20 @@ static Instruction *MatchSelectFromAndOr(Value *A, Value *B, Value *C, Value *D) { // If A is not a select of -1/0, this cannot match. Value *Cond = 0; - if (!match(A, m_SelectCst<-1, 0>(m_Value(Cond)))) + if (!match(A, m_SExt(m_Value(Cond))) || + !Cond->getType()->isIntegerTy(1)) return 0; // ((cond?-1:0)&C) | (B&(cond?0:-1)) -> cond ? C : B. - if (match(D, m_SelectCst<0, -1>(m_Specific(Cond)))) + if (match(D, m_Not(m_SExt(m_Specific(Cond))))) return SelectInst::Create(Cond, C, B); - if (match(D, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond))))) + if (match(D, m_SExt(m_Not(m_Specific(Cond))))) return SelectInst::Create(Cond, C, B); + // ((cond?-1:0)&C) | ((cond?0:-1)&D) -> cond ? C : D. - if (match(B, m_SelectCst<0, -1>(m_Specific(Cond)))) + if (match(B, m_Not(m_SExt(m_Specific(Cond))))) return SelectInst::Create(Cond, C, D); - if (match(B, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond))))) + if (match(B, m_SExt(m_Not(m_Specific(Cond))))) return SelectInst::Create(Cond, C, D); return 0; } @@ -1224,7 +1244,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, std::swap(LHSCC, RHSCC); } - // At this point, we know we have have two icmp instructions + // At this point, we know we have two icmp instructions // comparing a value against two constants and or'ing the result // together. Because of the above check, we know that we only have // ICMP_EQ, ICMP_NE, ICMP_LT, and ICMP_GT here. We also know (from the @@ -1595,15 +1615,19 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { } } - // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) -> C0 ? A : B, and commuted variants - if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D)) - return Match; - if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C)) - return Match; - if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D)) - return Match; - if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C)) - return Match; + // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) -> C0 ? A : B, and commuted variants. + // Don't do this for vector select idioms, the code generator doesn't handle + // them well yet. + if (!isa<VectorType>(I.getType())) { + if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D)) + return Match; + if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C)) + return Match; + if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D)) + return Match; + if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C)) + return Match; + } // ((A&~B)|(~A&B)) -> A^B if ((match(C, m_Not(m_Specific(D))) && @@ -1663,37 +1687,51 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS)) return Res; + // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y) + if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) + if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) + if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS)) + return Res; + // fold (or (cast A), (cast B)) -> (cast (or A, B)) if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) { if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ? - if (!isa<ICmpInst>(Op0C->getOperand(0)) || - !isa<ICmpInst>(Op1C->getOperand(0))) { - const Type *SrcTy = Op0C->getOperand(0)->getType(); - if (SrcTy == Op1C->getOperand(0)->getType() && - SrcTy->isIntOrIntVector() && + const Type *SrcTy = Op0C->getOperand(0)->getType(); + if (SrcTy == Op1C->getOperand(0)->getType() && + SrcTy->isIntOrIntVectorTy()) { + Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0); + + if ((!isa<ICmpInst>(Op0COp) || !isa<ICmpInst>(Op1COp)) && // Only do this if the casts both really cause code to be // generated. - ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), - I.getType()) && - ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), - I.getType())) { - Value *NewOp = Builder->CreateOr(Op0C->getOperand(0), - Op1C->getOperand(0), I.getName()); + ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) && + ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) { + Value *NewOp = Builder->CreateOr(Op0COp, Op1COp, I.getName()); return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); } + + // If this is or(cast(icmp), cast(icmp)), try to fold this even if the + // cast is otherwise not optimizable. This happens for vector sexts. + if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp)) + if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp)) + if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS)) { + InsertNewInstBefore(Res, I); + return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); + } + + // If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the + // cast is otherwise not optimizable. This happens for vector sexts. + if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp)) + if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp)) + if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS)) { + InsertNewInstBefore(Res, I); + return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); + } } } } - - // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y) - if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) { - if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) - if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS)) - return Res; - } - return Changed ? &I : 0; } @@ -1978,12 +2016,12 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind? const Type *SrcTy = Op0C->getOperand(0)->getType(); - if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() && + if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isIntegerTy() && // Only do this if the casts both really cause code to be generated. - ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), - I.getType()) && - ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), - I.getType())) { + ShouldOptimizeCast(Op0C->getOpcode(), Op0C->getOperand(0), + I.getType()) && + ShouldOptimizeCast(Op1C->getOpcode(), Op1C->getOperand(0), + I.getType())) { Value *NewOp = Builder->CreateXor(Op0C->getOperand(0), Op1C->getOperand(0), I.getName()); return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 47c37c4..d7efdcf 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -199,7 +199,7 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { // Extract the length and alignment and fill if they are constant. ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength()); ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue()); - if (!LenC || !FillC || !FillC->getType()->isInteger(8)) + if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8)) return 0; uint64_t Len = LenC->getZExtValue(); Alignment = MI->getAlignment(); @@ -230,7 +230,6 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { return 0; } - /// visitCallInst - CallInst simplification. This mostly only handles folding /// of intrinsic instructions. For normal calls, it allows visitCallSite to do /// the heavy lifting. @@ -304,6 +303,60 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { switch (II->getIntrinsicID()) { default: break; + case Intrinsic::objectsize: { + const Type *ReturnTy = CI.getType(); + Value *Op1 = II->getOperand(1); + bool Min = (cast<ConstantInt>(II->getOperand(2))->getZExtValue() == 1); + + // We need target data for just about everything so depend on it. + if (!TD) break; + + // Get to the real allocated thing and offset as fast as possible. + Op1 = Op1->stripPointerCasts(); + + // If we've stripped down to a single global variable that we + // can know the size of then just return that. + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op1)) { + if (GV->hasDefinitiveInitializer()) { + Constant *C = GV->getInitializer(); + size_t globalSize = TD->getTypeAllocSize(C->getType()); + return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, globalSize)); + } else { + Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); + return ReplaceInstUsesWith(CI, RetVal); + } + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op1)) { + + // Only handle constant GEPs here. + if (CE->getOpcode() != Instruction::GetElementPtr) break; + GEPOperator *GEP = cast<GEPOperator>(CE); + + // Make sure we're not a constant offset from an external + // global. + Value *Operand = GEP->getPointerOperand(); + Operand = Operand->stripPointerCasts(); + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Operand)) + if (!GV->hasDefinitiveInitializer()) break; + + // Get what we're pointing to and its size. + const PointerType *BaseType = + cast<PointerType>(Operand->getType()); + size_t Size = TD->getTypeAllocSize(BaseType->getElementType()); + + // Get the current byte offset into the thing. Use the original + // operand in case we're looking through a bitcast. + SmallVector<Value*, 8> Ops(CE->op_begin()+1, CE->op_end()); + const PointerType *OffsetType = + cast<PointerType>(GEP->getPointerOperand()->getType()); + size_t Offset = TD->getIndexedOffset(OffsetType, &Ops[0], Ops.size()); + + assert(Size >= Offset); + + Constant *RetVal = ConstantInt::get(ReturnTy, Size-Offset); + return ReplaceInstUsesWith(CI, RetVal); + + } + } case Intrinsic::bswap: // bswap(bswap(x)) -> x if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getOperand(1))) @@ -632,18 +685,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return EraseInstFromFunction(CI); break; } - case Intrinsic::objectsize: { - ConstantInt *Const = cast<ConstantInt>(II->getOperand(2)); - const Type *Ty = CI.getType(); - - // 0 is maximum number of bytes left, 1 is minimum number of bytes left. - // TODO: actually add these values, the current return values are "don't - // know". - if (Const->getZExtValue() == 0) - return ReplaceInstUsesWith(CI, Constant::getAllOnesValue(Ty)); - else - return ReplaceInstUsesWith(CI, ConstantInt::get(Ty, 0)); - } } return visitCallSite(II); @@ -692,10 +733,14 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { Value *Callee = CS.getCalledValue(); if (Function *CalleeF = dyn_cast<Function>(Callee)) - if (CalleeF->getCallingConv() != CS.getCallingConv()) { + // If the call and callee calling conventions don't match, this call must + // be unreachable, as the call is undefined. + if (CalleeF->getCallingConv() != CS.getCallingConv() && + // Only do this for calls to a function with a body. A prototype may + // not actually end up matching the implementation's calling conv for a + // variety of reasons (e.g. it may be written in assembly). + !CalleeF->isDeclaration()) { Instruction *OldCall = CS.getInstruction(); - // If the call and callee calling conventions don't match, this call must - // be unreachable, as the call is undefined. new StoreInst(ConstantInt::getTrue(Callee->getContext()), UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), OldCall); @@ -703,8 +748,13 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { // This allows ValueHandlers and custom metadata to adjust itself. if (!OldCall->getType()->isVoidTy()) OldCall->replaceAllUsesWith(UndefValue::get(OldCall->getType())); - if (isa<CallInst>(OldCall)) // Not worth removing an invoke here. + if (isa<CallInst>(OldCall)) return EraseInstFromFunction(*OldCall); + + // We cannot remove an invoke, because it would change the CFG, just + // change the callee to a null pointer. + cast<InvokeInst>(OldCall)->setOperand(0, + Constant::getNullValue(CalleeF->getType())); return 0; } diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index f25dd35..bb4a0e9 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -23,7 +23,7 @@ using namespace PatternMatch; /// static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, int &Offset) { - assert(Val->getType()->isInteger(32) && "Unexpected allocation size type!"); + assert(Val->getType()->isIntegerTy(32) && "Unexpected allocation size type!"); if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) { Offset = CI->getZExtValue(); Scale = 0; @@ -255,17 +255,26 @@ isEliminableCastPair( return Instruction::CastOps(Res); } -/// ValueRequiresCast - Return true if the cast from "V to Ty" actually results -/// in any code being generated. It does not require codegen if V is simple -/// enough or if the cast can be folded into other casts. -bool InstCombiner::ValueRequiresCast(Instruction::CastOps opcode,const Value *V, - const Type *Ty) { +/// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually +/// results in any code being generated and is interesting to optimize out. If +/// the cast can be eliminated by some other simple transformation, we prefer +/// to do the simplification first. +bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V, + const Type *Ty) { + // Noop casts and casts of constants should be eliminated trivially. if (V->getType() == Ty || isa<Constant>(V)) return false; - // If this is another cast that can be eliminated, it isn't codegen either. + // If this is another cast that can be eliminated, we prefer to have it + // eliminated. if (const CastInst *CI = dyn_cast<CastInst>(V)) - if (isEliminableCastPair(CI, opcode, Ty, TD)) + if (isEliminableCastPair(CI, opc, Ty, TD)) return false; + + // If this is a vector sext from a compare, then we don't want to break the + // idiom where each element of the extended vector is either zero or all ones. + if (opc == Instruction::SExt && isa<CmpInst>(V) && isa<VectorType>(Ty)) + return false; + return true; } @@ -828,7 +837,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { // zext (xor i1 X, true) to i32 --> xor (zext i1 X to i32), 1 Value *X; - if (SrcI && SrcI->hasOneUse() && SrcI->getType()->isInteger(1) && + if (SrcI && SrcI->hasOneUse() && SrcI->getType()->isIntegerTy(1) && match(SrcI, m_Not(m_Value(X))) && (!X->hasOneUse() || !isa<CmpInst>(X))) { Value *New = Builder->CreateZExt(X, CI.getType()); @@ -923,12 +932,6 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { Value *Src = CI.getOperand(0); const Type *SrcTy = Src->getType(), *DestTy = CI.getType(); - // Canonicalize sign-extend from i1 to a select. - if (Src->getType()->isInteger(1)) - return SelectInst::Create(Src, - Constant::getAllOnesValue(CI.getType()), - Constant::getNullValue(CI.getType())); - // Attempt to extend the entire input expression tree to the destination // type. Only do this if the dest type is a simple type, don't convert the // expression tree to something weird like i93 unless the source is also @@ -968,6 +971,30 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { return BinaryOperator::CreateAShr(Res, ShAmt); } + + // (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if signed + // (x >s -1) ? -1 : 0 -> ashr x, 31 -> all ones if not signed + { + ICmpInst::Predicate Pred; Value *CmpLHS; ConstantInt *CmpRHS; + if (match(Src, m_ICmp(Pred, m_Value(CmpLHS), m_ConstantInt(CmpRHS)))) { + // sext (x <s 0) to i32 --> x>>s31 true if signbit set. + // sext (x >s -1) to i32 --> (x>>s31)^-1 true if signbit clear. + if ((Pred == ICmpInst::ICMP_SLT && CmpRHS->isZero()) || + (Pred == ICmpInst::ICMP_SGT && CmpRHS->isAllOnesValue())) { + Value *Sh = ConstantInt::get(CmpLHS->getType(), + CmpLHS->getType()->getScalarSizeInBits()-1); + Value *In = Builder->CreateAShr(CmpLHS, Sh, CmpLHS->getName()+".lobit"); + if (In->getType() != CI.getType()) + In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/, "tmp"); + + if (Pred == ICmpInst::ICMP_SGT) + In = Builder->CreateNot(In, In->getName()+".not"); + return ReplaceInstUsesWith(CI, In); + } + } + } + + // If the input is a shl/ashr pair of a same constant, then this is a sign // extension from a smaller value. If we could trust arbitrary bitwidth // integers, we could turn this into a truncate to the smaller bit and then @@ -1127,16 +1154,22 @@ Instruction *InstCombiner::visitSIToFP(CastInst &CI) { } Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { - // If the source integer type is larger than the intptr_t type for - // this target, do a trunc to the intptr_t type, then inttoptr of it. This - // allows the trunc to be exposed to other transforms. Don't do this for - // extending inttoptr's, because we don't know if the target sign or zero - // extends to pointers. - if (TD && CI.getOperand(0)->getType()->getScalarSizeInBits() > - TD->getPointerSizeInBits()) { - Value *P = Builder->CreateTrunc(CI.getOperand(0), - TD->getIntPtrType(CI.getContext()), "tmp"); - return new IntToPtrInst(P, CI.getType()); + // If the source integer type is not the intptr_t type for this target, do a + // trunc or zext to the intptr_t type, then inttoptr of it. This allows the + // cast to be exposed to other transforms. + if (TD) { + if (CI.getOperand(0)->getType()->getScalarSizeInBits() > + TD->getPointerSizeInBits()) { + Value *P = Builder->CreateTrunc(CI.getOperand(0), + TD->getIntPtrType(CI.getContext()), "tmp"); + return new IntToPtrInst(P, CI.getType()); + } + if (CI.getOperand(0)->getType()->getScalarSizeInBits() < + TD->getPointerSizeInBits()) { + Value *P = Builder->CreateZExt(CI.getOperand(0), + TD->getIntPtrType(CI.getContext()), "tmp"); + return new IntToPtrInst(P, CI.getType()); + } } if (Instruction *I = commonCastTransforms(CI)) @@ -1198,17 +1231,22 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { } Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { - // If the destination integer type is smaller than the intptr_t type for - // this target, do a ptrtoint to intptr_t then do a trunc. This allows the - // trunc to be exposed to other transforms. Don't do this for extending - // ptrtoint's, because we don't know if the target sign or zero extends its - // pointers. - if (TD && - CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) { - Value *P = Builder->CreatePtrToInt(CI.getOperand(0), - TD->getIntPtrType(CI.getContext()), - "tmp"); - return new TruncInst(P, CI.getType()); + // If the destination integer type is not the intptr_t type for this target, + // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast + // to be exposed to other transforms. + if (TD) { + if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) { + Value *P = Builder->CreatePtrToInt(CI.getOperand(0), + TD->getIntPtrType(CI.getContext()), + "tmp"); + return new TruncInst(P, CI.getType()); + } + if (CI.getType()->getScalarSizeInBits() > TD->getPointerSizeInBits()) { + Value *P = Builder->CreatePtrToInt(CI.getOperand(0), + TD->getIntPtrType(CI.getContext()), + "tmp"); + return new ZExtInst(P, CI.getType()); + } } return commonPointerCastTransforms(CI); diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index e59406c6..72af80f 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1589,24 +1589,24 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { bool Changed = false; + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); /// Orders the operands of the compare so that they are listed from most /// complex to least complex. This puts constants before unary operators, /// before binary operators. - if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) { + if (getComplexity(Op0) < getComplexity(Op1)) { I.swapOperands(); + std::swap(Op0, Op1); Changed = true; } - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, TD)) return ReplaceInstUsesWith(I, V); const Type *Ty = Op0->getType(); // icmp's with boolean values can always be turned into bitwise operations - if (Ty == Type::getInt1Ty(I.getContext())) { + if (Ty->isIntegerTy(1)) { switch (I.getPredicate()) { default: llvm_unreachable("Invalid icmp instruction!"); case ICmpInst::ICMP_EQ: { // icmp eq i1 A, B -> ~(A^B) @@ -1650,7 +1650,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { unsigned BitWidth = 0; if (TD) BitWidth = TD->getTypeSizeInBits(Ty->getScalarType()); - else if (Ty->isIntOrIntVector()) + else if (Ty->isIntOrIntVectorTy()) BitWidth = Ty->getScalarSizeInBits(); bool isSignBit = false; diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index ae728dd..e6c59c7 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -87,7 +87,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, const Type *SrcPTy = SrcTy->getElementType(); - if (DestPTy->isInteger() || isa<PointerType>(DestPTy) || + if (DestPTy->isIntegerTy() || isa<PointerType>(DestPTy) || isa<VectorType>(DestPTy)) { // If the source is an array, the code below will not succeed. Check to // see if a trivial 'gep P, 0, 0' will help matters. Only do this for @@ -104,7 +104,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, } if (IC.getTargetData() && - (SrcPTy->isInteger() || isa<PointerType>(SrcPTy) || + (SrcPTy->isIntegerTy() || isa<PointerType>(SrcPTy) || isa<VectorType>(SrcPTy)) && // Do not allow turning this into a load of an integer, which is then // casted to a pointer, this pessimizes pointer analysis a lot. @@ -115,8 +115,9 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, // Okay, we are casting from one integer or pointer type to another of // the same size. Instead of casting the pointer before the load, cast // the result of the loaded value. - Value *NewLoad = + LoadInst *NewLoad = IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName()); + NewLoad->setAlignment(LI.getAlignment()); // Now cast the result of the load. return new BitCastInst(NewLoad, LI.getType()); } @@ -199,12 +200,15 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { // if (SelectInst *SI = dyn_cast<SelectInst>(Op)) { // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2). - if (isSafeToLoadUnconditionally(SI->getOperand(1), SI) && - isSafeToLoadUnconditionally(SI->getOperand(2), SI)) { - Value *V1 = Builder->CreateLoad(SI->getOperand(1), - SI->getOperand(1)->getName()+".val"); - Value *V2 = Builder->CreateLoad(SI->getOperand(2), - SI->getOperand(2)->getName()+".val"); + unsigned Align = LI.getAlignment(); + if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, TD) && + isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, TD)) { + LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1), + SI->getOperand(1)->getName()+".val"); + LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2), + SI->getOperand(2)->getName()+".val"); + V1->setAlignment(Align); + V2->setAlignment(Align); return SelectInst::Create(SI->getCondition(), V1, V2); } @@ -239,7 +243,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { const Type *SrcPTy = SrcTy->getElementType(); - if (!DestPTy->isInteger() && !isa<PointerType>(DestPTy)) + if (!DestPTy->isIntegerTy() && !isa<PointerType>(DestPTy)) return 0; /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep" @@ -273,7 +277,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace()); } - if (!SrcPTy->isInteger() && !isa<PointerType>(SrcPTy)) + if (!SrcPTy->isIntegerTy() && !isa<PointerType>(SrcPTy)) return 0; // If the pointers point into different address spaces or if they point to @@ -294,7 +298,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { const Type* CastSrcTy = SIOp0->getType(); const Type* CastDstTy = SrcPTy; if (isa<PointerType>(CastDstTy)) { - if (CastSrcTy->isInteger()) + if (CastSrcTy->isIntegerTy()) opcode = Instruction::IntToPtr; } else if (isa<IntegerType>(CastDstTy)) { if (isa<PointerType>(SIOp0->getType())) diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 2e26a75..668c34f 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -157,7 +157,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { } /// i1 mul -> i1 and. - if (I.getType()->isInteger(1)) + if (I.getType()->isIntegerTy(1)) return BinaryOperator::CreateAnd(Op0, Op1); // X*(1 << Y) --> X << Y @@ -314,7 +314,7 @@ Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) { // undef / X -> 0 for integer. // undef / X -> undef for FP (the undef could be a snan). if (isa<UndefValue>(Op0)) { - if (Op0->getType()->isFPOrFPVector()) + if (Op0->getType()->isFPOrFPVectorTy()) return ReplaceInstUsesWith(I, Op0); return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); } @@ -386,7 +386,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); // It can't be division by zero, hence it must be division by one. - if (I.getType()->isInteger(1)) + if (I.getType()->isIntegerTy(1)) return ReplaceInstUsesWith(I, Op0); if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) { @@ -493,7 +493,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { // If the sign bits of both operands are zero (i.e. we can prove they are // unsigned inputs), turn this into a udiv. - if (I.getType()->isInteger()) { + if (I.getType()->isIntegerTy()) { APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); if (MaskedValueIsZero(Op0, Mask)) { if (MaskedValueIsZero(Op1, Mask)) { @@ -527,7 +527,7 @@ Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); if (isa<UndefValue>(Op0)) { // undef % X -> 0 - if (I.getType()->isFPOrFPVector()) + if (I.getType()->isFPOrFPVectorTy()) return ReplaceInstUsesWith(I, Op0); // X % undef -> undef (could be SNaN) return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); } @@ -648,7 +648,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) { // If the sign bits of both operands are zero (i.e. we can prove they are // unsigned inputs), turn this into a urem. - if (I.getType()->isInteger()) { + if (I.getType()->isIntegerTy()) { APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) { // X srem Y -> X urem Y, iff X and Y don't have sign bit set diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index 18b2dff..7807d9a 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -326,44 +326,6 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, break; } } - - // (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if signed - // (x >s -1) ? -1 : 0 -> ashr x, 31 -> all ones if not signed - CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE; - if (match(TrueVal, m_ConstantInt<-1>()) && - match(FalseVal, m_ConstantInt<0>())) - Pred = ICI->getPredicate(); - else if (match(TrueVal, m_ConstantInt<0>()) && - match(FalseVal, m_ConstantInt<-1>())) - Pred = CmpInst::getInversePredicate(ICI->getPredicate()); - - if (Pred != CmpInst::BAD_ICMP_PREDICATE) { - // If we are just checking for a icmp eq of a single bit and zext'ing it - // to an integer, then shift the bit to the appropriate place and then - // cast to integer to avoid the comparison. - const APInt &Op1CV = CI->getValue(); - - // sext (x <s 0) to i32 --> x>>s31 true if signbit set. - // sext (x >s -1) to i32 --> (x>>s31)^-1 true if signbit clear. - if ((Pred == ICmpInst::ICMP_SLT && Op1CV == 0) || - (Pred == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) { - Value *In = ICI->getOperand(0); - Value *Sh = ConstantInt::get(In->getType(), - In->getType()->getScalarSizeInBits()-1); - In = InsertNewInstBefore(BinaryOperator::CreateAShr(In, Sh, - In->getName()+".lobit"), - *ICI); - if (In->getType() != SI.getType()) - In = CastInst::CreateIntegerCast(In, SI.getType(), - true/*SExt*/, "tmp", ICI); - - if (Pred == ICmpInst::ICMP_SGT) - In = InsertNewInstBefore(BinaryOperator::CreateNot(In, - In->getName()+".not"), *ICI); - - return ReplaceInstUsesWith(SI, In); - } - } } if (CmpLHS == TrueVal && CmpRHS == FalseVal) { @@ -479,7 +441,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return ReplaceInstUsesWith(SI, FalseVal); } - if (SI.getType()->isInteger(1)) { + if (SI.getType()->isIntegerTy(1)) { if (ConstantInt *C = dyn_cast<ConstantInt>(TrueVal)) { if (C->getZExtValue()) { // Change: A = select B, true, C --> A = or B, C @@ -516,16 +478,25 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { if (ConstantInt *TrueValC = dyn_cast<ConstantInt>(TrueVal)) if (ConstantInt *FalseValC = dyn_cast<ConstantInt>(FalseVal)) { // select C, 1, 0 -> zext C to int - if (FalseValC->isZero() && TrueValC->getValue() == 1) { - return CastInst::Create(Instruction::ZExt, CondVal, SI.getType()); - } else if (TrueValC->isZero() && FalseValC->getValue() == 1) { - // select C, 0, 1 -> zext !C to int - Value *NotCond = - InsertNewInstBefore(BinaryOperator::CreateNot(CondVal, - "not."+CondVal->getName()), SI); - return CastInst::Create(Instruction::ZExt, NotCond, SI.getType()); + if (FalseValC->isZero() && TrueValC->getValue() == 1) + return new ZExtInst(CondVal, SI.getType()); + + // select C, -1, 0 -> sext C to int + if (FalseValC->isZero() && TrueValC->isAllOnesValue()) + return new SExtInst(CondVal, SI.getType()); + + // select C, 0, 1 -> zext !C to int + if (TrueValC->isZero() && FalseValC->getValue() == 1) { + Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName()); + return new ZExtInst(NotCond, SI.getType()); } + // select C, 0, -1 -> sext !C to int + if (TrueValC->isZero() && FalseValC->isAllOnesValue()) { + Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName()); + return new SExtInst(NotCond, SI.getType()); + } + if (ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition())) { // If one of the constants is zero (we know they can't both be) and we // have an icmp instruction with zero, and we have an 'and' with the @@ -547,8 +518,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE; Value *V = ICA; if (ShouldNotVal) - V = InsertNewInstBefore(BinaryOperator::Create( - Instruction::Xor, V, ICA->getOperand(1)), SI); + V = Builder->CreateXor(V, ICA->getOperand(1)); return ReplaceInstUsesWith(SI, V); } } @@ -659,7 +629,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { } // See if we can fold the select into one of our operands. - if (SI.getType()->isInteger()) { + if (SI.getType()->isIntegerTy()) { if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal)) return FoldI; diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index 321c91d..836bda3 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" +#include "llvm/IntrinsicInst.h" #include "llvm/Support/PatternMatch.h" using namespace llvm; using namespace PatternMatch; @@ -69,10 +70,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, if (Op1->uge(TypeBits)) { if (I.getOpcode() != Instruction::AShr) return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType())); - else { - I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1)); - return &I; - } + // ashr i32 X, 32 --> ashr i32 X, 31 + I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1)); + return &I; } // ((X*C1) << C2) == (X * (C1 << C2)) @@ -387,7 +387,29 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) { } Instruction *InstCombiner::visitLShr(BinaryOperator &I) { - return commonShiftTransforms(I); + if (Instruction *R = commonShiftTransforms(I)) + return R; + + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Op0)) { + unsigned BitWidth = Op0->getType()->getScalarSizeInBits(); + // ctlz.i32(x)>>5 --> zext(x == 0) + // cttz.i32(x)>>5 --> zext(x == 0) + // ctpop.i32(x)>>5 --> zext(x == -1) + if ((II->getIntrinsicID() == Intrinsic::ctlz || + II->getIntrinsicID() == Intrinsic::cttz || + II->getIntrinsicID() == Intrinsic::ctpop) && + isPowerOf2_32(BitWidth) && Log2_32(BitWidth) == Op1C->getZExtValue()){ + bool isCtPop = II->getIntrinsicID() == Intrinsic::ctpop; + Constant *RHS = ConstantInt::getSigned(Op0->getType(), isCtPop ? -1:0); + Value *Cmp = Builder->CreateICmpEQ(II->getOperand(1), RHS); + return new ZExtInst(Cmp, II->getType()); + } + } + + return 0; } Instruction *InstCombiner::visitAShr(BinaryOperator &I) { diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 74a1b68..5e9a52f 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -107,7 +107,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, assert((TD || !isa<PointerType>(VTy)) && "SimplifyDemandedBits needs to know bit widths!"); assert((!TD || TD->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) && - (!VTy->isIntOrIntVector() || + (!VTy->isIntOrIntVectorTy() || VTy->getScalarSizeInBits() == BitWidth) && KnownZero.getBitWidth() == BitWidth && KnownOne.getBitWidth() == BitWidth && @@ -138,11 +138,11 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return 0; APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - APInt &RHSKnownZero = KnownZero, &RHSKnownOne = KnownOne; + APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); Instruction *I = dyn_cast<Instruction>(V); if (!I) { - ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth); + ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth); return 0; // Only analyze instructions. } @@ -219,7 +219,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, switch (I->getOpcode()) { default: - ComputeMaskedBits(I, DemandedMask, RHSKnownZero, RHSKnownOne, Depth); + ComputeMaskedBits(I, DemandedMask, KnownZero, KnownOne, Depth); break; case Instruction::And: // If either the LHS or the RHS are Zero, the result is zero. @@ -249,9 +249,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return I; // Output known-1 bits are only known if set in both the LHS & RHS. - RHSKnownOne &= LHSKnownOne; + KnownOne = RHSKnownOne & LHSKnownOne; // Output known-0 are known to be clear if zero in either the LHS | RHS. - RHSKnownZero |= LHSKnownZero; + KnownZero = RHSKnownZero | LHSKnownZero; break; case Instruction::Or: // If either the LHS or the RHS are One, the result is One. @@ -286,9 +286,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return I; // Output known-0 bits are only known if clear in both the LHS & RHS. - RHSKnownZero &= LHSKnownZero; + KnownZero = RHSKnownZero & LHSKnownZero; // Output known-1 are known to be set if set in either the LHS | RHS. - RHSKnownOne |= LHSKnownOne; + KnownOne = RHSKnownOne | LHSKnownOne; break; case Instruction::Xor: { if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, @@ -306,13 +306,6 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if ((DemandedMask & LHSKnownZero) == DemandedMask) return I->getOperand(1); - // Output known-0 bits are known if clear or set in both the LHS & RHS. - APInt KnownZeroOut = (RHSKnownZero & LHSKnownZero) | - (RHSKnownOne & LHSKnownOne); - // Output known-1 are known to be set if set in only one of the LHS, RHS. - APInt KnownOneOut = (RHSKnownZero & LHSKnownOne) | - (RHSKnownOne & LHSKnownZero); - // If all of the demanded bits are known to be zero on one side or the // other, turn this into an *inclusive* or. // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 @@ -368,10 +361,11 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, BinaryOperator::CreateXor(NewAnd, XorC, "tmp"); return InsertNewInstBefore(NewXor, *I); } - - - RHSKnownZero = KnownZeroOut; - RHSKnownOne = KnownOneOut; + + // Output known-0 bits are known if clear or set in both the LHS & RHS. + KnownZero= (RHSKnownZero & LHSKnownZero) | (RHSKnownOne & LHSKnownOne); + // Output known-1 are known to be set if set in only one of the LHS, RHS. + KnownOne = (RHSKnownZero & LHSKnownOne) | (RHSKnownOne & LHSKnownZero); break; } case Instruction::Select: @@ -389,61 +383,61 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return I; // Only known if known in both the LHS and RHS. - RHSKnownOne &= LHSKnownOne; - RHSKnownZero &= LHSKnownZero; + KnownOne = RHSKnownOne & LHSKnownOne; + KnownZero = RHSKnownZero & LHSKnownZero; break; case Instruction::Trunc: { unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits(); DemandedMask.zext(truncBf); - RHSKnownZero.zext(truncBf); - RHSKnownOne.zext(truncBf); + KnownZero.zext(truncBf); + KnownOne.zext(truncBf); if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1)) + KnownZero, KnownOne, Depth+1)) return I; DemandedMask.trunc(BitWidth); - RHSKnownZero.trunc(BitWidth); - RHSKnownOne.trunc(BitWidth); - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + KnownZero.trunc(BitWidth); + KnownOne.trunc(BitWidth); + assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); break; } case Instruction::BitCast: - if (!I->getOperand(0)->getType()->isIntOrIntVector()) - return false; // vector->int or fp->int? + if (!I->getOperand(0)->getType()->isIntOrIntVectorTy()) + return 0; // vector->int or fp->int? if (const VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) { if (const VectorType *SrcVTy = dyn_cast<VectorType>(I->getOperand(0)->getType())) { if (DstVTy->getNumElements() != SrcVTy->getNumElements()) // Don't touch a bitcast between vectors of different element counts. - return false; + return 0; } else // Don't touch a scalar-to-vector bitcast. - return false; + return 0; } else if (isa<VectorType>(I->getOperand(0)->getType())) // Don't touch a vector-to-scalar bitcast. - return false; + return 0; if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1)) + KnownZero, KnownOne, Depth+1)) return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); break; case Instruction::ZExt: { // Compute the bits in the result that are not present in the input. unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits(); DemandedMask.trunc(SrcBitWidth); - RHSKnownZero.trunc(SrcBitWidth); - RHSKnownOne.trunc(SrcBitWidth); + KnownZero.trunc(SrcBitWidth); + KnownOne.trunc(SrcBitWidth); if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1)) + KnownZero, KnownOne, Depth+1)) return I; DemandedMask.zext(BitWidth); - RHSKnownZero.zext(BitWidth); - RHSKnownOne.zext(BitWidth); - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + KnownZero.zext(BitWidth); + KnownOne.zext(BitWidth); + assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); // The top bits are known to be zero. - RHSKnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); break; } case Instruction::SExt: { @@ -460,27 +454,27 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, InputDemandedBits.set(SrcBitWidth-1); InputDemandedBits.trunc(SrcBitWidth); - RHSKnownZero.trunc(SrcBitWidth); - RHSKnownOne.trunc(SrcBitWidth); + KnownZero.trunc(SrcBitWidth); + KnownOne.trunc(SrcBitWidth); if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits, - RHSKnownZero, RHSKnownOne, Depth+1)) + KnownZero, KnownOne, Depth+1)) return I; InputDemandedBits.zext(BitWidth); - RHSKnownZero.zext(BitWidth); - RHSKnownOne.zext(BitWidth); - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + KnownZero.zext(BitWidth); + KnownOne.zext(BitWidth); + assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); // If the sign bit of the input is known set or clear, then we know the // top bits of the result. // If the input sign bit is known zero, or if the NewBits are not demanded // convert this into a zero extension. - if (RHSKnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) { + if (KnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) { // Convert to ZExt cast CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName()); return InsertNewInstBefore(NewCast, *I); - } else if (RHSKnownOne[SrcBitWidth-1]) { // Input sign bit known set - RHSKnownOne |= NewBits; + } else if (KnownOne[SrcBitWidth-1]) { // Input sign bit known set + KnownOne |= NewBits; } break; } @@ -540,12 +534,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // Bits are known one if they are known zero in one operand and one in the // other, and there is no input carry. - RHSKnownOne = ((LHSKnownZero & RHSVal) | - (LHSKnownOne & ~RHSVal)) & ~CarryBits; + KnownOne = ((LHSKnownZero & RHSVal) | + (LHSKnownOne & ~RHSVal)) & ~CarryBits; // Bits are known zero if they are known zero in both operands and there // is no input carry. - RHSKnownZero = LHSKnownZero & ~RHSVal & ~CarryBits; + KnownZero = LHSKnownZero & ~RHSVal & ~CarryBits; } else { // If the high-bits of this ADD are not demanded, then it does not demand // the high bits of its LHS or RHS. @@ -578,21 +572,21 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, } // Otherwise just hand the sub off to ComputeMaskedBits to fill in // the known zeros and ones. - ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth); + ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth); break; case Instruction::Shl: if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt)); if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, - RHSKnownZero, RHSKnownOne, Depth+1)) + KnownZero, KnownOne, Depth+1)) return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - RHSKnownZero <<= ShiftAmt; - RHSKnownOne <<= ShiftAmt; + assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); + KnownZero <<= ShiftAmt; + KnownOne <<= ShiftAmt; // low bits known zero. if (ShiftAmt) - RHSKnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); + KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); } break; case Instruction::LShr: @@ -603,15 +597,15 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // Unsigned shift right. APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, - RHSKnownZero, RHSKnownOne, Depth+1)) + KnownZero, KnownOne, Depth+1)) return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - RHSKnownZero = APIntOps::lshr(RHSKnownZero, ShiftAmt); - RHSKnownOne = APIntOps::lshr(RHSKnownOne, ShiftAmt); + assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); + KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); + KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); if (ShiftAmt) { // Compute the new bits that are at the top now. APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); - RHSKnownZero |= HighBits; // high bits known zero. + KnownZero |= HighBits; // high bits known zero. } } break; @@ -642,13 +636,13 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (DemandedMask.countLeadingZeros() <= ShiftAmt) DemandedMaskIn.set(BitWidth-1); if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, - RHSKnownZero, RHSKnownOne, Depth+1)) + KnownZero, KnownOne, Depth+1)) return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); // Compute the new bits that are at the top now. APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); - RHSKnownZero = APIntOps::lshr(RHSKnownZero, ShiftAmt); - RHSKnownOne = APIntOps::lshr(RHSKnownOne, ShiftAmt); + KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); + KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); // Handle the sign bits. APInt SignBit(APInt::getSignBit(BitWidth)); @@ -657,14 +651,14 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If the input sign bit is known to be zero, or if none of the top bits // are demanded, turn this into an unsigned shift right. - if (BitWidth <= ShiftAmt || RHSKnownZero[BitWidth-ShiftAmt-1] || + if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] || (HighBits & ~DemandedMask) == HighBits) { // Perform the logical shift right. Instruction *NewVal = BinaryOperator::CreateLShr( I->getOperand(0), SA, I->getName()); return InsertNewInstBefore(NewVal, *I); - } else if ((RHSKnownOne & SignBit) != 0) { // New bits are known one. - RHSKnownOne |= HighBits; + } else if ((KnownOne & SignBit) != 0) { // New bits are known one. + KnownOne |= HighBits; } } break; @@ -681,10 +675,19 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, LHSKnownZero, LHSKnownOne, Depth+1)) return I; + // The low bits of LHS are unchanged by the srem. + KnownZero = LHSKnownZero & LowBits; + KnownOne = LHSKnownOne & LowBits; + + // If LHS is non-negative or has all low bits zero, then the upper bits + // are all zero. if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits)) - LHSKnownZero |= ~LowBits; + KnownZero |= ~LowBits; - KnownZero |= LHSKnownZero & DemandedMask; + // If LHS is negative and not all low bits are zero, then the upper bits + // are all one. + if (LHSKnownOne[BitWidth-1] && ((LHSKnownOne & LowBits) != 0)) + KnownOne |= ~LowBits; assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); } @@ -743,15 +746,15 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, } } } - ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth); + ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth); break; } // If the client is only demanding bits that we know, return the known // constant. - if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) - return Constant::getIntegerValue(VTy, RHSKnownOne); - return false; + if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask) + return Constant::getIntegerValue(VTy, KnownOne); + return 0; } @@ -764,7 +767,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, /// operation, the operation is simplified, then the resultant value is /// returned. This returns null if no change was made. Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, - APInt& UndefElts, + APInt &UndefElts, unsigned Depth) { unsigned VWidth = cast<VectorType>(V->getType())->getNumElements(); APInt EltMask(APInt::getAllOnesValue(VWidth)); @@ -774,13 +777,15 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // If the entire vector is undefined, just return this info. UndefElts = EltMask; return 0; - } else if (DemandedElts == 0) { // If nothing is demanded, provide undef. + } + + if (DemandedElts == 0) { // If nothing is demanded, provide undef. UndefElts = EltMask; return UndefValue::get(V->getType()); } UndefElts = 0; - if (ConstantVector *CP = dyn_cast<ConstantVector>(V)) { + if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) { const Type *EltTy = cast<VectorType>(V->getType())->getElementType(); Constant *Undef = UndefValue::get(EltTy); @@ -789,23 +794,25 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, if (!DemandedElts[i]) { // If not demanded, set to undef. Elts.push_back(Undef); UndefElts.set(i); - } else if (isa<UndefValue>(CP->getOperand(i))) { // Already undef. + } else if (isa<UndefValue>(CV->getOperand(i))) { // Already undef. Elts.push_back(Undef); UndefElts.set(i); } else { // Otherwise, defined. - Elts.push_back(CP->getOperand(i)); + Elts.push_back(CV->getOperand(i)); } // If we changed the constant, return it. Constant *NewCP = ConstantVector::get(Elts); - return NewCP != CP ? NewCP : 0; - } else if (isa<ConstantAggregateZero>(V)) { + return NewCP != CV ? NewCP : 0; + } + + if (isa<ConstantAggregateZero>(V)) { // Simplify the CAZ to a ConstantVector where the non-demanded elements are // set to undef. // Check if this is identity. If so, return 0 since we are not simplifying // anything. - if (DemandedElts == ((1ULL << VWidth) -1)) + if (DemandedElts.isAllOnesValue()) return 0; const Type *EltTy = cast<VectorType>(V->getType())->getElementType(); diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index f11f557..20fda1a 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -162,7 +162,8 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { // property. if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) { APInt UndefElts(VectorWidth, 0); - APInt DemandedMask(VectorWidth, 1 << IndexVal); + APInt DemandedMask(VectorWidth, 0); + DemandedMask.set(IndexVal); if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), DemandedMask, UndefElts)) { EI.setOperand(0, V); diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 93b1961..96c0342 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -158,7 +158,7 @@ Value *InstCombiner::dyn_castNegVal(Value *V) const { return ConstantExpr::getNeg(C); if (ConstantVector *C = dyn_cast<ConstantVector>(V)) - if (C->getType()->getElementType()->isInteger()) + if (C->getType()->getElementType()->isIntegerTy()) return ConstantExpr::getNeg(C); return 0; @@ -177,7 +177,7 @@ Value *InstCombiner::dyn_castFNegVal(Value *V) const { return ConstantExpr::getFNeg(C); if (ConstantVector *C = dyn_cast<ConstantVector>(V)) - if (C->getType()->getElementType()->isFloatingPoint()) + if (C->getType()->getElementType()->isFloatingPointTy()) return ConstantExpr::getFNeg(C); return 0; @@ -226,7 +226,7 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { if (isa<Constant>(TV) || isa<Constant>(FV)) { // Bool selects with constant operands can be folded to logical ops. - if (SI->getType()->isInteger(1)) return 0; + if (SI->getType()->isIntegerTy(1)) return 0; Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this); Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, this); @@ -596,7 +596,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // (where tmp = 8*tmp2) into: // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast - if (TD && isa<ArrayType>(SrcElTy) && ResElTy->isInteger(8)) { + if (TD && isa<ArrayType>(SrcElTy) && ResElTy->isIntegerTy(8)) { uint64_t ArrayEltSize = TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()); diff --git a/lib/Transforms/InstCombine/Makefile b/lib/Transforms/InstCombine/Makefile index f9de42a..0c488e78 100644 --- a/lib/Transforms/InstCombine/Makefile +++ b/lib/Transforms/InstCombine/Makefile @@ -10,7 +10,6 @@ LEVEL = ../../.. LIBRARYNAME = LLVMInstCombine BUILD_ARCHIVE = 1 -CXXFLAGS = -fno-rtti include $(LEVEL)/Makefile.common diff --git a/lib/Transforms/Instrumentation/Makefile b/lib/Transforms/Instrumentation/Makefile index 1238896..6cbc7a9 100644 --- a/lib/Transforms/Instrumentation/Makefile +++ b/lib/Transforms/Instrumentation/Makefile @@ -10,7 +10,6 @@ LEVEL = ../../.. LIBRARYNAME = LLVMInstrumentation BUILD_ARCHIVE = 1 -CXXFLAGS = -fno-rtti include $(LEVEL)/Makefile.common diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/lib/Transforms/Instrumentation/ProfilingUtils.cpp index 3214c8c..8662a82 100644 --- a/lib/Transforms/Instrumentation/ProfilingUtils.cpp +++ b/lib/Transforms/Instrumentation/ProfilingUtils.cpp @@ -84,7 +84,7 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName, AI = MainFn->arg_begin(); // If the program looked at argc, have it look at the return value of the // init call instead. - if (!AI->getType()->isInteger(32)) { + if (!AI->getType()->isIntegerTy(32)) { Instruction::CastOps opcode; if (!AI->use_empty()) { opcode = CastInst::getCastOpcode(InitCall, true, AI->getType(), true); diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index c3139a5..21e6f89 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -32,7 +32,6 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/Assembly/Writer.h" #include "llvm/Support/CallSite.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" @@ -40,9 +39,6 @@ using namespace llvm; using namespace llvm::PatternMatch; -static cl::opt<bool> FactorCommonPreds("split-critical-paths-tweak", - cl::init(false), cl::Hidden); - namespace { class CodeGenPrepare : public FunctionPass { /// TLI - Keep a pointer of a TargetLowering to consult for determining @@ -63,6 +59,10 @@ namespace { AU.addPreserved<ProfileInfo>(); } + virtual void releaseMemory() { + BackEdges.clear(); + } + private: bool EliminateMostlyEmptyBlocks(Function &F); bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; @@ -297,6 +297,70 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); } +/// FindReusablePredBB - Check all of the predecessors of the block DestPHI +/// lives in to see if there is a block that we can reuse as a critical edge +/// from TIBB. +static BasicBlock *FindReusablePredBB(PHINode *DestPHI, BasicBlock *TIBB) { + BasicBlock *Dest = DestPHI->getParent(); + + /// TIPHIValues - This array is lazily computed to determine the values of + /// PHIs in Dest that TI would provide. + SmallVector<Value*, 32> TIPHIValues; + + /// TIBBEntryNo - This is a cache to speed up pred queries for TIBB. + unsigned TIBBEntryNo = 0; + + // Check to see if Dest has any blocks that can be used as a split edge for + // this terminator. + for (unsigned pi = 0, e = DestPHI->getNumIncomingValues(); pi != e; ++pi) { + BasicBlock *Pred = DestPHI->getIncomingBlock(pi); + // To be usable, the pred has to end with an uncond branch to the dest. + BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator()); + if (!PredBr || !PredBr->isUnconditional()) + continue; + // Must be empty other than the branch and debug info. + BasicBlock::iterator I = Pred->begin(); + while (isa<DbgInfoIntrinsic>(I)) + I++; + if (&*I != PredBr) + continue; + // Cannot be the entry block; its label does not get emitted. + if (Pred == &Dest->getParent()->getEntryBlock()) + continue; + + // Finally, since we know that Dest has phi nodes in it, we have to make + // sure that jumping to Pred will have the same effect as going to Dest in + // terms of PHI values. + PHINode *PN; + unsigned PHINo = 0; + unsigned PredEntryNo = pi; + + bool FoundMatch = true; + for (BasicBlock::iterator I = Dest->begin(); + (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) { + if (PHINo == TIPHIValues.size()) { + if (PN->getIncomingBlock(TIBBEntryNo) != TIBB) + TIBBEntryNo = PN->getBasicBlockIndex(TIBB); + TIPHIValues.push_back(PN->getIncomingValue(TIBBEntryNo)); + } + + // If the PHI entry doesn't work, we can't use this pred. + if (PN->getIncomingBlock(PredEntryNo) != Pred) + PredEntryNo = PN->getBasicBlockIndex(Pred); + + if (TIPHIValues[PHINo] != PN->getIncomingValue(PredEntryNo)) { + FoundMatch = false; + break; + } + } + + // If we found a workable predecessor, change TI to branch to Succ. + if (FoundMatch) + return Pred; + } + return 0; +} + /// SplitEdgeNicely - Split the critical edge from TI to its specified /// successor if it will improve codegen. We only do this if the successor has @@ -311,13 +375,12 @@ static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum, BasicBlock *Dest = TI->getSuccessor(SuccNum); assert(isa<PHINode>(Dest->begin()) && "This should only be called if Dest has a PHI!"); + PHINode *DestPHI = cast<PHINode>(Dest->begin()); // Do not split edges to EH landing pads. - if (InvokeInst *Invoke = dyn_cast<InvokeInst>(TI)) { + if (InvokeInst *Invoke = dyn_cast<InvokeInst>(TI)) if (Invoke->getSuccessor(1) == Dest) return; - } - // As a hack, never split backedges of loops. Even though the copy for any // PHIs inserted on the backedge would be dead for exits from the loop, we @@ -325,92 +388,16 @@ static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum, if (BackEdges.count(std::make_pair(TIBB, Dest))) return; - if (!FactorCommonPreds) { - /// TIPHIValues - This array is lazily computed to determine the values of - /// PHIs in Dest that TI would provide. - SmallVector<Value*, 32> TIPHIValues; - - // Check to see if Dest has any blocks that can be used as a split edge for - // this terminator. - for (pred_iterator PI = pred_begin(Dest), E = pred_end(Dest); PI != E; ++PI) { - BasicBlock *Pred = *PI; - // To be usable, the pred has to end with an uncond branch to the dest. - BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator()); - if (!PredBr || !PredBr->isUnconditional()) - continue; - // Must be empty other than the branch and debug info. - BasicBlock::iterator I = Pred->begin(); - while (isa<DbgInfoIntrinsic>(I)) - I++; - if (dyn_cast<Instruction>(I) != PredBr) - continue; - // Cannot be the entry block; its label does not get emitted. - if (Pred == &(Dest->getParent()->getEntryBlock())) - continue; - - // Finally, since we know that Dest has phi nodes in it, we have to make - // sure that jumping to Pred will have the same effect as going to Dest in - // terms of PHI values. - PHINode *PN; - unsigned PHINo = 0; - bool FoundMatch = true; - for (BasicBlock::iterator I = Dest->begin(); - (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) { - if (PHINo == TIPHIValues.size()) - TIPHIValues.push_back(PN->getIncomingValueForBlock(TIBB)); - - // If the PHI entry doesn't work, we can't use this pred. - if (TIPHIValues[PHINo] != PN->getIncomingValueForBlock(Pred)) { - FoundMatch = false; - break; - } - } - - // If we found a workable predecessor, change TI to branch to Succ. - if (FoundMatch) { - ProfileInfo *PFI = P->getAnalysisIfAvailable<ProfileInfo>(); - if (PFI) - PFI->splitEdge(TIBB, Dest, Pred); - Dest->removePredecessor(TIBB); - TI->setSuccessor(SuccNum, Pred); - return; - } - } - - SplitCriticalEdge(TI, SuccNum, P, true); + if (BasicBlock *ReuseBB = FindReusablePredBB(DestPHI, TIBB)) { + ProfileInfo *PFI = P->getAnalysisIfAvailable<ProfileInfo>(); + if (PFI) + PFI->splitEdge(TIBB, Dest, ReuseBB); + Dest->removePredecessor(TIBB); + TI->setSuccessor(SuccNum, ReuseBB); return; } - PHINode *PN; - SmallVector<Value*, 8> TIPHIValues; - for (BasicBlock::iterator I = Dest->begin(); - (PN = dyn_cast<PHINode>(I)); ++I) - TIPHIValues.push_back(PN->getIncomingValueForBlock(TIBB)); - - SmallVector<BasicBlock*, 8> IdenticalPreds; - for (pred_iterator PI = pred_begin(Dest), E = pred_end(Dest); PI != E; ++PI) { - BasicBlock *Pred = *PI; - if (BackEdges.count(std::make_pair(Pred, Dest))) - continue; - if (PI == TIBB) - IdenticalPreds.push_back(Pred); - else { - bool Identical = true; - unsigned PHINo = 0; - for (BasicBlock::iterator I = Dest->begin(); - (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) - if (TIPHIValues[PHINo] != PN->getIncomingValueForBlock(Pred)) { - Identical = false; - break; - } - if (Identical) - IdenticalPreds.push_back(Pred); - } - } - - assert(!IdenticalPreds.empty()); - SplitBlockPredecessors(Dest, &IdenticalPreds[0], IdenticalPreds.size(), - ".critedge", P); + SplitCriticalEdge(TI, SuccNum, P, true); } diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 320afa1..09c01d3 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -44,8 +44,14 @@ namespace { virtual bool runOnFunction(Function &F) { bool Changed = false; + + DominatorTree &DT = getAnalysis<DominatorTree>(); + for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) - Changed |= runOnBasicBlock(*I); + // Only check non-dead blocks. Dead blocks may have strange pointer + // cycles that will confuse alias analysis. + if (DT.isReachableFromEntry(I)) + Changed |= runOnBasicBlock(*I); return Changed; } diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index b29fe74..3ce7482 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -60,6 +60,7 @@ STATISTIC(NumPRELoad, "Number of loads PRE'd"); static cl::opt<bool> EnablePRE("enable-pre", cl::init(true), cl::Hidden); static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true)); +static cl::opt<bool> EnableFullLoadPRE("enable-full-load-pre", cl::init(false)); //===----------------------------------------------------------------------===// // ValueTable Class @@ -1522,8 +1523,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI, while (TmpBB->getSinglePredecessor()) { isSinglePred = true; TmpBB = TmpBB->getSinglePredecessor(); - if (!TmpBB) // If haven't found any, bail now. - return false; if (TmpBB == LoadBB) // Infinite (unreachable) loop. return false; if (Blockers.count(TmpBB)) @@ -1539,10 +1538,12 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // at least one of the values is LI. Since this means that we won't be able // to eliminate LI even if we insert uses in the other predecessors, we will // end up increasing code size. Reject this by scanning for LI. - for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) - if (ValuesPerBlock[i].isSimpleValue() && - ValuesPerBlock[i].getSimpleValue() == LI) - return false; + if (!EnableFullLoadPRE) { + for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) + if (ValuesPerBlock[i].isSimpleValue() && + ValuesPerBlock[i].getSimpleValue() == LI) + return false; + } // FIXME: It is extremely unclear what this loop is doing, other than // artificially restricting loadpre. @@ -1566,13 +1567,9 @@ bool GVN::processNonLocalLoad(LoadInst *LI, return false; } - // Okay, we have some hope :). Check to see if the loaded value is fully - // available in all but one predecessor. - // FIXME: If we could restructure the CFG, we could make a common pred with - // all the preds that don't have an available LI and insert a new load into - // that one block. - BasicBlock *UnavailablePred = 0; - + // Check to see how many predecessors have the loaded value fully + // available. + DenseMap<BasicBlock*, Value*> PredLoads; DenseMap<BasicBlock*, char> FullyAvailableBlocks; for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) FullyAvailableBlocks[ValuesPerBlock[i].BB] = true; @@ -1581,79 +1578,93 @@ bool GVN::processNonLocalLoad(LoadInst *LI, for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); PI != E; ++PI) { - if (IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks)) + BasicBlock *Pred = *PI; + if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks)) { continue; - - // If this load is not available in multiple predecessors, reject it. - if (UnavailablePred && UnavailablePred != *PI) + } + PredLoads[Pred] = 0; + // We don't currently handle critical edges :( + if (Pred->getTerminator()->getNumSuccessors() != 1) { + DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '" + << Pred->getName() << "': " << *LI << '\n'); return false; - UnavailablePred = *PI; + } } - assert(UnavailablePred != 0 && + // Decide whether PRE is profitable for this load. + unsigned NumUnavailablePreds = PredLoads.size(); + assert(NumUnavailablePreds != 0 && "Fully available value should be eliminated above!"); - - // We don't currently handle critical edges :( - if (UnavailablePred->getTerminator()->getNumSuccessors() != 1) { - DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '" - << UnavailablePred->getName() << "': " << *LI << '\n'); - return false; + if (!EnableFullLoadPRE) { + // If this load is unavailable in multiple predecessors, reject it. + // FIXME: If we could restructure the CFG, we could make a common pred with + // all the preds that don't have an available LI and insert a new load into + // that one block. + if (NumUnavailablePreds != 1) + return false; } - - // Do PHI translation to get its value in the predecessor if necessary. The - // returned pointer (if non-null) is guaranteed to dominate UnavailablePred. - // + + // Check if the load can safely be moved to all the unavailable predecessors. + bool CanDoPRE = true; SmallVector<Instruction*, 8> NewInsts; - - // If all preds have a single successor, then we know it is safe to insert the - // load on the pred (?!?), so we can insert code to materialize the pointer if - // it is not available. - PHITransAddr Address(LI->getOperand(0), TD); - Value *LoadPtr = 0; - if (allSingleSucc) { - LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, - *DT, NewInsts); - } else { - Address.PHITranslateValue(LoadBB, UnavailablePred); - LoadPtr = Address.getAddr(); + for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(), + E = PredLoads.end(); I != E; ++I) { + BasicBlock *UnavailablePred = I->first; + + // Do PHI translation to get its value in the predecessor if necessary. The + // returned pointer (if non-null) is guaranteed to dominate UnavailablePred. + + // If all preds have a single successor, then we know it is safe to insert + // the load on the pred (?!?), so we can insert code to materialize the + // pointer if it is not available. + PHITransAddr Address(LI->getOperand(0), TD); + Value *LoadPtr = 0; + if (allSingleSucc) { + LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, + *DT, NewInsts); + } else { + Address.PHITranslateValue(LoadBB, UnavailablePred); + LoadPtr = Address.getAddr(); - // Make sure the value is live in the predecessor. - if (Instruction *Inst = dyn_cast_or_null<Instruction>(LoadPtr)) - if (!DT->dominates(Inst->getParent(), UnavailablePred)) - LoadPtr = 0; - } + // Make sure the value is live in the predecessor. + if (Instruction *Inst = dyn_cast_or_null<Instruction>(LoadPtr)) + if (!DT->dominates(Inst->getParent(), UnavailablePred)) + LoadPtr = 0; + } - // If we couldn't find or insert a computation of this phi translated value, - // we fail PRE. - if (LoadPtr == 0) { - assert(NewInsts.empty() && "Shouldn't insert insts on failure"); - DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: " - << *LI->getOperand(0) << "\n"); - return false; - } + // If we couldn't find or insert a computation of this phi translated value, + // we fail PRE. + if (LoadPtr == 0) { + DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: " + << *LI->getOperand(0) << "\n"); + CanDoPRE = false; + break; + } - // Assign value numbers to these new instructions. - for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) { - // FIXME: We really _ought_ to insert these value numbers into their - // parent's availability map. However, in doing so, we risk getting into - // ordering issues. If a block hasn't been processed yet, we would be - // marking a value as AVAIL-IN, which isn't what we intend. - VN.lookup_or_add(NewInsts[i]); + // Make sure it is valid to move this load here. We have to watch out for: + // @1 = getelementptr (i8* p, ... + // test p and branch if == 0 + // load @1 + // It is valid to have the getelementptr before the test, even if p can be 0, + // as getelementptr only does address arithmetic. + // If we are not pushing the value through any multiple-successor blocks + // we do not have this case. Otherwise, check that the load is safe to + // put anywhere; this can be improved, but should be conservatively safe. + if (!allSingleSucc && + // FIXME: REEVALUTE THIS. + !isSafeToLoadUnconditionally(LoadPtr, + UnavailablePred->getTerminator(), + LI->getAlignment(), TD)) { + CanDoPRE = false; + break; + } + + I->second = LoadPtr; } - - // Make sure it is valid to move this load here. We have to watch out for: - // @1 = getelementptr (i8* p, ... - // test p and branch if == 0 - // load @1 - // It is valid to have the getelementptr before the test, even if p can be 0, - // as getelementptr only does address arithmetic. - // If we are not pushing the value through any multiple-successor blocks - // we do not have this case. Otherwise, check that the load is safe to - // put anywhere; this can be improved, but should be conservatively safe. - if (!allSingleSucc && - // FIXME: REEVALUTE THIS. - !isSafeToLoadUnconditionally(LoadPtr, UnavailablePred->getTerminator())) { - assert(NewInsts.empty() && "Should not have inserted instructions"); + + if (!CanDoPRE) { + while (!NewInsts.empty()) + NewInsts.pop_back_val()->eraseFromParent(); return false; } @@ -1665,12 +1676,28 @@ bool GVN::processNonLocalLoad(LoadInst *LI, dbgs() << "INSERTED " << NewInsts.size() << " INSTS: " << *NewInsts.back() << '\n'); - Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false, - LI->getAlignment(), - UnavailablePred->getTerminator()); + // Assign value numbers to the new instructions. + for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) { + // FIXME: We really _ought_ to insert these value numbers into their + // parent's availability map. However, in doing so, we risk getting into + // ordering issues. If a block hasn't been processed yet, we would be + // marking a value as AVAIL-IN, which isn't what we intend. + VN.lookup_or_add(NewInsts[i]); + } - // Add the newly created load. - ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,NewLoad)); + for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(), + E = PredLoads.end(); I != E; ++I) { + BasicBlock *UnavailablePred = I->first; + Value *LoadPtr = I->second; + + Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false, + LI->getAlignment(), + UnavailablePred->getTerminator()); + + // Add the newly created load. + ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred, + NewLoad)); + } // Perform PHI construction. Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT, @@ -1864,6 +1891,10 @@ Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) { /// by inserting it into the appropriate sets bool GVN::processInstruction(Instruction *I, SmallVectorImpl<Instruction*> &toErase) { + // Ignore dbg info intrinsics. + if (isa<DbgInfoIntrinsic>(I)) + return false; + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { bool Changed = processLoad(LI, toErase); @@ -2075,7 +2106,7 @@ bool GVN::performPRE(Function &F) { for (pred_iterator PI = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock); PI != PE; ++PI) { // We're not interested in PRE where the block is its - // own predecessor, on in blocks with predecessors + // own predecessor, or in blocks with predecessors // that are not reachable. if (*PI == CurrentBlock) { NumWithout = 2; @@ -2123,10 +2154,10 @@ bool GVN::performPRE(Function &F) { continue; } - // Instantiate the expression the in predecessor that lacked it. + // Instantiate the expression in the predecessor that lacked it. // Because we are going top-down through the block, all value numbers // will be available in the predecessor by the time we need them. Any - // that weren't original present will have been instantiated earlier + // that weren't originally present will have been instantiated earlier // in this loop. Instruction *PREInstr = CurInst->clone(); bool success = true; diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 17f7d98..5302fdc 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -364,23 +364,17 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { if (ExitingBlock) NeedCannIV = true; } - for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { - const SCEV *Stride = IU->StrideOrder[i]; - const Type *Ty = SE->getEffectiveSCEVType(Stride->getType()); + for (IVUsers::const_iterator I = IU->begin(), E = IU->end(); I != E; ++I) { + const Type *Ty = + SE->getEffectiveSCEVType(I->getOperandValToReplace()->getType()); if (!LargestType || SE->getTypeSizeInBits(Ty) > SE->getTypeSizeInBits(LargestType)) LargestType = Ty; - - std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = - IU->IVUsesByStride.find(IU->StrideOrder[i]); - assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); - - if (!SI->second->Users.empty()) - NeedCannIV = true; + NeedCannIV = true; } - // Now that we know the largest of of the induction variable expressions + // Now that we know the largest of the induction variable expressions // in this loop, insert a canonical induction variable of the largest size. Value *IndVar = 0; if (NeedCannIV) { @@ -455,72 +449,64 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType, // add the offsets to the primary induction variable and cast, avoiding // the need for the code evaluation methods to insert induction variables // of different sizes. - for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { - const SCEV *Stride = IU->StrideOrder[i]; - - std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = - IU->IVUsesByStride.find(IU->StrideOrder[i]); - assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); - ilist<IVStrideUse> &List = SI->second->Users; - for (ilist<IVStrideUse>::iterator UI = List.begin(), - E = List.end(); UI != E; ++UI) { - Value *Op = UI->getOperandValToReplace(); - const Type *UseTy = Op->getType(); - Instruction *User = UI->getUser(); - - // Compute the final addrec to expand into code. - const SCEV *AR = IU->getReplacementExpr(*UI); - - // Evaluate the expression out of the loop, if possible. - if (!L->contains(UI->getUser())) { - const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop()); - if (ExitVal->isLoopInvariant(L)) - AR = ExitVal; - } + for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) { + const SCEV *Stride = UI->getStride(); + Value *Op = UI->getOperandValToReplace(); + const Type *UseTy = Op->getType(); + Instruction *User = UI->getUser(); + + // Compute the final addrec to expand into code. + const SCEV *AR = IU->getReplacementExpr(*UI); + + // Evaluate the expression out of the loop, if possible. + if (!L->contains(UI->getUser())) { + const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop()); + if (ExitVal->isLoopInvariant(L)) + AR = ExitVal; + } - // FIXME: It is an extremely bad idea to indvar substitute anything more - // complex than affine induction variables. Doing so will put expensive - // polynomial evaluations inside of the loop, and the str reduction pass - // currently can only reduce affine polynomials. For now just disable - // indvar subst on anything more complex than an affine addrec, unless - // it can be expanded to a trivial value. - if (!AR->isLoopInvariant(L) && !Stride->isLoopInvariant(L)) - continue; + // FIXME: It is an extremely bad idea to indvar substitute anything more + // complex than affine induction variables. Doing so will put expensive + // polynomial evaluations inside of the loop, and the str reduction pass + // currently can only reduce affine polynomials. For now just disable + // indvar subst on anything more complex than an affine addrec, unless + // it can be expanded to a trivial value. + if (!AR->isLoopInvariant(L) && !Stride->isLoopInvariant(L)) + continue; - // Determine the insertion point for this user. By default, insert - // immediately before the user. The SCEVExpander class will automatically - // hoist loop invariants out of the loop. For PHI nodes, there may be - // multiple uses, so compute the nearest common dominator for the - // incoming blocks. - Instruction *InsertPt = User; - if (PHINode *PHI = dyn_cast<PHINode>(InsertPt)) - for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) - if (PHI->getIncomingValue(i) == Op) { - if (InsertPt == User) - InsertPt = PHI->getIncomingBlock(i)->getTerminator(); - else - InsertPt = - DT->findNearestCommonDominator(InsertPt->getParent(), - PHI->getIncomingBlock(i)) - ->getTerminator(); - } - - // Now expand it into actual Instructions and patch it into place. - Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt); - - // Patch the new value into place. - if (Op->hasName()) - NewVal->takeName(Op); - User->replaceUsesOfWith(Op, NewVal); - UI->setOperandValToReplace(NewVal); - DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n' - << " into = " << *NewVal << "\n"); - ++NumRemoved; - Changed = true; - - // The old value may be dead now. - DeadInsts.push_back(Op); - } + // Determine the insertion point for this user. By default, insert + // immediately before the user. The SCEVExpander class will automatically + // hoist loop invariants out of the loop. For PHI nodes, there may be + // multiple uses, so compute the nearest common dominator for the + // incoming blocks. + Instruction *InsertPt = User; + if (PHINode *PHI = dyn_cast<PHINode>(InsertPt)) + for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) + if (PHI->getIncomingValue(i) == Op) { + if (InsertPt == User) + InsertPt = PHI->getIncomingBlock(i)->getTerminator(); + else + InsertPt = + DT->findNearestCommonDominator(InsertPt->getParent(), + PHI->getIncomingBlock(i)) + ->getTerminator(); + } + + // Now expand it into actual Instructions and patch it into place. + Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt); + + // Patch the new value into place. + if (Op->hasName()) + NewVal->takeName(Op); + User->replaceUsesOfWith(Op, NewVal); + UI->setOperandValToReplace(NewVal); + DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n' + << " into = " << *NewVal << "\n"); + ++NumRemoved; + Changed = true; + + // The old value may be dead now. + DeadInsts.push_back(Op); } // Clear the rewriter cache, because values that are in the rewriter's cache diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 9531311..8f21aac 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -336,13 +336,18 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ else InterestingVal = ConstantInt::getFalse(I->getContext()); - // Scan for the sentinel. + // Scan for the sentinel. If we find an undef, force it to the + // interesting value: x|undef -> true and x&undef -> false. for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) - if (LHSVals[i].first == InterestingVal || LHSVals[i].first == 0) + if (LHSVals[i].first == InterestingVal || LHSVals[i].first == 0) { Result.push_back(LHSVals[i]); + Result.back().first = InterestingVal; + } for (unsigned i = 0, e = RHSVals.size(); i != e; ++i) - if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0) + if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0) { Result.push_back(RHSVals[i]); + Result.back().first = InterestingVal; + } return !Result.empty(); } @@ -400,7 +405,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ // If comparing a live-in value against a constant, see if we know the // live-in value on any predecessors. if (LVI && isa<Constant>(Cmp->getOperand(1)) && - Cmp->getType()->isInteger() && // Not vector compare. + Cmp->getType()->isIntegerTy() && // Not vector compare. (!isa<Instruction>(Cmp->getOperand(0)) || cast<Instruction>(Cmp->getOperand(0))->getParent() != BB)) { Constant *RHSCst = cast<Constant>(Cmp->getOperand(1)); @@ -451,6 +456,12 @@ static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) { /// ProcessBlock - If there are any predecessors whose control can be threaded /// through to a successor, transform them now. bool JumpThreading::ProcessBlock(BasicBlock *BB) { + // If the block is trivially dead, just return and let the caller nuke it. + // This simplifies other transformations. + if (pred_begin(BB) == pred_end(BB) && + BB != &BB->getParent()->getEntryBlock()) + return false; + // If this block has a single predecessor, and if that pred has a single // successor, merge the blocks. This encourages recursive jump threading // because now the condition in this block can be threaded through @@ -1117,6 +1128,11 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) { isa<ConstantInt>(BO->getOperand(1))) return false; + // If the first instruction in BB isn't a phi, we won't be able to infer + // anything special about any particular predecessor. + if (!isa<PHINode>(BB->front())) + return false; + // If we have a xor as the branch input to this block, and we know that the // LHS or RHS of the xor in any predecessor is true/false, then we can clone // the condition into the predecessor and fix that value to true, saving some @@ -1174,6 +1190,26 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) { BlocksToFoldInto.push_back(XorOpValues[i].second); } + // If we inferred a value for all of the predecessors, then duplication won't + // help us. However, we can just replace the LHS or RHS with the constant. + if (BlocksToFoldInto.size() == + cast<PHINode>(BB->front()).getNumIncomingValues()) { + if (SplitVal == 0) { + // If all preds provide undef, just nuke the xor, because it is undef too. + BO->replaceAllUsesWith(UndefValue::get(BO->getType())); + BO->eraseFromParent(); + } else if (SplitVal->isZero()) { + // If all preds provide 0, replace the xor with the other input. + BO->replaceAllUsesWith(BO->getOperand(isLHS)); + BO->eraseFromParent(); + } else { + // If all preds provide 1, set the computed value to 1. + BO->setOperand(!isLHS, SplitVal); + } + + return true; + } + // Try to duplicate BB into PredBB. return DuplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto); } @@ -1393,9 +1429,9 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, // Unless PredBB ends with an unconditional branch, split the edge so that we // can just clone the bits from BB into the end of the new PredBB. - BranchInst *OldPredBranch = cast<BranchInst>(PredBB->getTerminator()); + BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator()); - if (!OldPredBranch->isUnconditional()) { + if (OldPredBranch == 0 || !OldPredBranch->isUnconditional()) { PredBB = SplitEdge(PredBB, BB, this); OldPredBranch = cast<BranchInst>(PredBB->getTerminator()); } diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index fa820ed..240b298 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -17,6 +17,40 @@ // available on the target, and it performs a variety of other optimizations // related to loop induction variables. // +// Terminology note: this code has a lot of handling for "post-increment" or +// "post-inc" users. This is not talking about post-increment addressing modes; +// it is instead talking about code like this: +// +// %i = phi [ 0, %entry ], [ %i.next, %latch ] +// ... +// %i.next = add %i, 1 +// %c = icmp eq %i.next, %n +// +// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however +// it's useful to think about these as the same register, with some uses using +// the value of the register before the add and some using // it after. In this +// example, the icmp is a post-increment user, since it uses %i.next, which is +// the value of the induction variable after the increment. The other common +// case of post-increment users is users outside the loop. +// +// TODO: More sophistication in the way Formulae are generated and filtered. +// +// TODO: Handle multiple loops at a time. +// +// TODO: Should TargetLowering::AddrMode::BaseGV be changed to a ConstantExpr +// instead of a GlobalValue? +// +// TODO: When truncation is free, truncate ICmp users' operands to make it a +// smaller encoding (on x86 at least). +// +// TODO: When a negated register is used by an add (such as in a list of +// multiple base registers, or as the increment expression in an addrec), +// we may not actually need both reg and (-1 * reg) in registers; the +// negation can be implemented by using a sub instead of an add. The +// lack of support for taking this into consideration when making +// register pressure decisions is partly worked around by the "Special" +// use kind. +// //===----------------------------------------------------------------------===// #define DEBUG_TYPE "loop-reduce" @@ -26,208 +60,401 @@ #include "llvm/IntrinsicInst.h" #include "llvm/DerivedTypes.h" #include "llvm/Analysis/IVUsers.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" -#include "llvm/Transforms/Utils/AddrModeMatcher.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" #include <algorithm> using namespace llvm; -STATISTIC(NumReduced , "Number of IV uses strength reduced"); -STATISTIC(NumInserted, "Number of PHIs inserted"); -STATISTIC(NumVariable, "Number of PHIs with variable strides"); -STATISTIC(NumEliminated, "Number of strides eliminated"); -STATISTIC(NumShadow, "Number of Shadow IVs optimized"); -STATISTIC(NumImmSunk, "Number of common expr immediates sunk into uses"); -STATISTIC(NumLoopCond, "Number of loop terminating conds optimized"); -STATISTIC(NumCountZero, "Number of count iv optimized to count toward zero"); +namespace { + +/// RegSortData - This class holds data which is used to order reuse candidates. +class RegSortData { +public: + /// UsedByIndices - This represents the set of LSRUse indices which reference + /// a particular register. + SmallBitVector UsedByIndices; + + RegSortData() {} + + void print(raw_ostream &OS) const; + void dump() const; +}; -static cl::opt<bool> EnableFullLSRMode("enable-full-lsr", - cl::init(false), - cl::Hidden); +} + +void RegSortData::print(raw_ostream &OS) const { + OS << "[NumUses=" << UsedByIndices.count() << ']'; +} + +void RegSortData::dump() const { + print(errs()); errs() << '\n'; +} namespace { - struct BasedUser; +/// RegUseTracker - Map register candidates to information about how they are +/// used. +class RegUseTracker { + typedef DenseMap<const SCEV *, RegSortData> RegUsesTy; - /// IVInfo - This structure keeps track of one IV expression inserted during - /// StrengthReduceStridedIVUsers. It contains the stride, the common base, as - /// well as the PHI node and increment value created for rewrite. - struct IVExpr { - const SCEV *Stride; - const SCEV *Base; - PHINode *PHI; + RegUsesTy RegUses; + SmallVector<const SCEV *, 16> RegSequence; - IVExpr(const SCEV *const stride, const SCEV *const base, PHINode *phi) - : Stride(stride), Base(base), PHI(phi) {} - }; +public: + void CountRegister(const SCEV *Reg, size_t LUIdx); + + bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const; + + const SmallBitVector &getUsedByIndices(const SCEV *Reg) const; + + void clear(); + + typedef SmallVectorImpl<const SCEV *>::iterator iterator; + typedef SmallVectorImpl<const SCEV *>::const_iterator const_iterator; + iterator begin() { return RegSequence.begin(); } + iterator end() { return RegSequence.end(); } + const_iterator begin() const { return RegSequence.begin(); } + const_iterator end() const { return RegSequence.end(); } +}; + +} - /// IVsOfOneStride - This structure keeps track of all IV expression inserted - /// during StrengthReduceStridedIVUsers for a particular stride of the IV. - struct IVsOfOneStride { - std::vector<IVExpr> IVs; +void +RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) { + std::pair<RegUsesTy::iterator, bool> Pair = + RegUses.insert(std::make_pair(Reg, RegSortData())); + RegSortData &RSD = Pair.first->second; + if (Pair.second) + RegSequence.push_back(Reg); + RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1)); + RSD.UsedByIndices.set(LUIdx); +} + +bool +RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const { + if (!RegUses.count(Reg)) return false; + const SmallBitVector &UsedByIndices = + RegUses.find(Reg)->second.UsedByIndices; + int i = UsedByIndices.find_first(); + if (i == -1) return false; + if ((size_t)i != LUIdx) return true; + return UsedByIndices.find_next(i) != -1; +} + +const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const { + RegUsesTy::const_iterator I = RegUses.find(Reg); + assert(I != RegUses.end() && "Unknown register!"); + return I->second.UsedByIndices; +} + +void RegUseTracker::clear() { + RegUses.clear(); + RegSequence.clear(); +} + +namespace { + +/// Formula - This class holds information that describes a formula for +/// computing satisfying a use. It may include broken-out immediates and scaled +/// registers. +struct Formula { + /// AM - This is used to represent complex addressing, as well as other kinds + /// of interesting uses. + TargetLowering::AddrMode AM; + + /// BaseRegs - The list of "base" registers for this use. When this is + /// non-empty, AM.HasBaseReg should be set to true. + SmallVector<const SCEV *, 2> BaseRegs; - void addIV(const SCEV *const Stride, const SCEV *const Base, PHINode *PHI) { - IVs.push_back(IVExpr(Stride, Base, PHI)); + /// ScaledReg - The 'scaled' register for this use. This should be non-null + /// when AM.Scale is not zero. + const SCEV *ScaledReg; + + Formula() : ScaledReg(0) {} + + void InitialMatch(const SCEV *S, Loop *L, + ScalarEvolution &SE, DominatorTree &DT); + + unsigned getNumRegs() const; + const Type *getType() const; + + bool referencesReg(const SCEV *S) const; + bool hasRegsUsedByUsesOtherThan(size_t LUIdx, + const RegUseTracker &RegUses) const; + + void print(raw_ostream &OS) const; + void dump() const; +}; + +} + +/// DoInitialMatch - Recurrsion helper for InitialMatch. +static void DoInitialMatch(const SCEV *S, Loop *L, + SmallVectorImpl<const SCEV *> &Good, + SmallVectorImpl<const SCEV *> &Bad, + ScalarEvolution &SE, DominatorTree &DT) { + // Collect expressions which properly dominate the loop header. + if (S->properlyDominates(L->getHeader(), &DT)) { + Good.push_back(S); + return; + } + + // Look at add operands. + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); + I != E; ++I) + DoInitialMatch(*I, L, Good, Bad, SE, DT); + return; + } + + // Look at addrec operands. + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) + if (!AR->getStart()->isZero()) { + DoInitialMatch(AR->getStart(), L, Good, Bad, SE, DT); + DoInitialMatch(SE.getAddRecExpr(SE.getIntegerSCEV(0, AR->getType()), + AR->getStepRecurrence(SE), + AR->getLoop()), + L, Good, Bad, SE, DT); + return; } - }; - class LoopStrengthReduce : public LoopPass { - IVUsers *IU; - ScalarEvolution *SE; - bool Changed; - - /// IVsByStride - Keep track of all IVs that have been inserted for a - /// particular stride. - std::map<const SCEV *, IVsOfOneStride> IVsByStride; - - /// DeadInsts - Keep track of instructions we may have made dead, so that - /// we can remove them after we are done working. - SmallVector<WeakVH, 16> DeadInsts; - - /// TLI - Keep a pointer of a TargetLowering to consult for determining - /// transformation profitability. - const TargetLowering *TLI; - - public: - static char ID; // Pass ID, replacement for typeid - explicit LoopStrengthReduce(const TargetLowering *tli = NULL) : - LoopPass(&ID), TLI(tli) {} - - bool runOnLoop(Loop *L, LPPassManager &LPM); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - // We split critical edges, so we change the CFG. However, we do update - // many analyses if they are around. - AU.addPreservedID(LoopSimplifyID); - AU.addPreserved("loops"); - AU.addPreserved("domfrontier"); - AU.addPreserved("domtree"); - - AU.addRequiredID(LoopSimplifyID); - AU.addRequired<ScalarEvolution>(); - AU.addPreserved<ScalarEvolution>(); - AU.addRequired<IVUsers>(); - AU.addPreserved<IVUsers>(); + // Handle a multiplication by -1 (negation) if it didn't fold. + if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) + if (Mul->getOperand(0)->isAllOnesValue()) { + SmallVector<const SCEV *, 4> Ops(Mul->op_begin()+1, Mul->op_end()); + const SCEV *NewMul = SE.getMulExpr(Ops); + + SmallVector<const SCEV *, 4> MyGood; + SmallVector<const SCEV *, 4> MyBad; + DoInitialMatch(NewMul, L, MyGood, MyBad, SE, DT); + const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue( + SE.getEffectiveSCEVType(NewMul->getType()))); + for (SmallVectorImpl<const SCEV *>::const_iterator I = MyGood.begin(), + E = MyGood.end(); I != E; ++I) + Good.push_back(SE.getMulExpr(NegOne, *I)); + for (SmallVectorImpl<const SCEV *>::const_iterator I = MyBad.begin(), + E = MyBad.end(); I != E; ++I) + Bad.push_back(SE.getMulExpr(NegOne, *I)); + return; } - private: - void OptimizeIndvars(Loop *L); - - /// OptimizeLoopTermCond - Change loop terminating condition to use the - /// postinc iv when possible. - void OptimizeLoopTermCond(Loop *L); - - /// OptimizeShadowIV - If IV is used in a int-to-float cast - /// inside the loop then try to eliminate the cast opeation. - void OptimizeShadowIV(Loop *L); - - /// OptimizeMax - Rewrite the loop's terminating condition - /// if it uses a max computation. - ICmpInst *OptimizeMax(Loop *L, ICmpInst *Cond, - IVStrideUse* &CondUse); - - /// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for - /// deciding when to exit the loop is used only for that purpose, try to - /// rearrange things so it counts down to a test against zero. - bool OptimizeLoopCountIV(Loop *L); - bool OptimizeLoopCountIVOfStride(const SCEV* &Stride, - IVStrideUse* &CondUse, Loop *L); - - /// StrengthReduceIVUsersOfStride - Strength reduce all of the users of a - /// single stride of IV. All of the users may have different starting - /// values, and this may not be the only stride. - void StrengthReduceIVUsersOfStride(const SCEV *Stride, - IVUsersOfOneStride &Uses, - Loop *L); - void StrengthReduceIVUsers(Loop *L); - - ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond, - IVStrideUse* &CondUse, - const SCEV* &CondStride, - bool PostPass = false); - - bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse, - const SCEV* &CondStride); - bool RequiresTypeConversion(const Type *Ty, const Type *NewTy); - const SCEV *CheckForIVReuse(bool, bool, bool, const SCEV *, - IVExpr&, const Type*, - const std::vector<BasedUser>& UsersToProcess); - bool ValidScale(bool, int64_t, - const std::vector<BasedUser>& UsersToProcess); - bool ValidOffset(bool, int64_t, int64_t, - const std::vector<BasedUser>& UsersToProcess); - const SCEV *CollectIVUsers(const SCEV *Stride, - IVUsersOfOneStride &Uses, - Loop *L, - bool &AllUsesAreAddresses, - bool &AllUsesAreOutsideLoop, - std::vector<BasedUser> &UsersToProcess); - bool StrideMightBeShared(const SCEV *Stride, Loop *L, bool CheckPreInc); - bool ShouldUseFullStrengthReductionMode( - const std::vector<BasedUser> &UsersToProcess, - const Loop *L, - bool AllUsesAreAddresses, - const SCEV *Stride); - void PrepareToStrengthReduceFully( - std::vector<BasedUser> &UsersToProcess, - const SCEV *Stride, - const SCEV *CommonExprs, - const Loop *L, - SCEVExpander &PreheaderRewriter); - void PrepareToStrengthReduceFromSmallerStride( - std::vector<BasedUser> &UsersToProcess, - Value *CommonBaseV, - const IVExpr &ReuseIV, - Instruction *PreInsertPt); - void PrepareToStrengthReduceWithNewPhi( - std::vector<BasedUser> &UsersToProcess, - const SCEV *Stride, - const SCEV *CommonExprs, - Value *CommonBaseV, - Instruction *IVIncInsertPt, - const Loop *L, - SCEVExpander &PreheaderRewriter); - - void DeleteTriviallyDeadInstructions(); - }; + // Ok, we can't do anything interesting. Just stuff the whole thing into a + // register and hope for the best. + Bad.push_back(S); } -char LoopStrengthReduce::ID = 0; -static RegisterPass<LoopStrengthReduce> -X("loop-reduce", "Loop Strength Reduction"); +/// InitialMatch - Incorporate loop-variant parts of S into this Formula, +/// attempting to keep all loop-invariant and loop-computable values in a +/// single base register. +void Formula::InitialMatch(const SCEV *S, Loop *L, + ScalarEvolution &SE, DominatorTree &DT) { + SmallVector<const SCEV *, 4> Good; + SmallVector<const SCEV *, 4> Bad; + DoInitialMatch(S, L, Good, Bad, SE, DT); + if (!Good.empty()) { + BaseRegs.push_back(SE.getAddExpr(Good)); + AM.HasBaseReg = true; + } + if (!Bad.empty()) { + BaseRegs.push_back(SE.getAddExpr(Bad)); + AM.HasBaseReg = true; + } +} -Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) { - return new LoopStrengthReduce(TLI); +/// getNumRegs - Return the total number of register operands used by this +/// formula. This does not include register uses implied by non-constant +/// addrec strides. +unsigned Formula::getNumRegs() const { + return !!ScaledReg + BaseRegs.size(); } -/// DeleteTriviallyDeadInstructions - If any of the instructions is the -/// specified set are trivially dead, delete them and see if this makes any of -/// their operands subsequently dead. -void LoopStrengthReduce::DeleteTriviallyDeadInstructions() { - while (!DeadInsts.empty()) { - Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()); +/// getType - Return the type of this formula, if it has one, or null +/// otherwise. This type is meaningless except for the bit size. +const Type *Formula::getType() const { + return !BaseRegs.empty() ? BaseRegs.front()->getType() : + ScaledReg ? ScaledReg->getType() : + AM.BaseGV ? AM.BaseGV->getType() : + 0; +} - if (I == 0 || !isInstructionTriviallyDead(I)) - continue; +/// referencesReg - Test if this formula references the given register. +bool Formula::referencesReg(const SCEV *S) const { + return S == ScaledReg || + std::find(BaseRegs.begin(), BaseRegs.end(), S) != BaseRegs.end(); +} - for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) - if (Instruction *U = dyn_cast<Instruction>(*OI)) { - *OI = 0; - if (U->use_empty()) - DeadInsts.push_back(U); +/// hasRegsUsedByUsesOtherThan - Test whether this formula uses registers +/// which are used by uses other than the use with the given index. +bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx, + const RegUseTracker &RegUses) const { + if (ScaledReg) + if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx)) + return true; + for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(), + E = BaseRegs.end(); I != E; ++I) + if (RegUses.isRegUsedByUsesOtherThan(*I, LUIdx)) + return true; + return false; +} + +void Formula::print(raw_ostream &OS) const { + bool First = true; + if (AM.BaseGV) { + if (!First) OS << " + "; else First = false; + WriteAsOperand(OS, AM.BaseGV, /*PrintType=*/false); + } + if (AM.BaseOffs != 0) { + if (!First) OS << " + "; else First = false; + OS << AM.BaseOffs; + } + for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(), + E = BaseRegs.end(); I != E; ++I) { + if (!First) OS << " + "; else First = false; + OS << "reg(" << **I << ')'; + } + if (AM.Scale != 0) { + if (!First) OS << " + "; else First = false; + OS << AM.Scale << "*reg("; + if (ScaledReg) + OS << *ScaledReg; + else + OS << "<unknown>"; + OS << ')'; + } +} + +void Formula::dump() const { + print(errs()); errs() << '\n'; +} + +/// getSDiv - Return an expression for LHS /s RHS, if it can be determined, +/// or null otherwise. If IgnoreSignificantBits is true, expressions like +/// (X * Y) /s Y are simplified to Y, ignoring that the multiplication may +/// overflow, which is useful when the result will be used in a context where +/// the most significant bits are ignored. +static const SCEV *getSDiv(const SCEV *LHS, const SCEV *RHS, + ScalarEvolution &SE, + bool IgnoreSignificantBits = false) { + // Handle the trivial case, which works for any SCEV type. + if (LHS == RHS) + return SE.getIntegerSCEV(1, LHS->getType()); + + // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do some + // folding. + if (RHS->isAllOnesValue()) + return SE.getMulExpr(LHS, RHS); + + // Check for a division of a constant by a constant. + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) { + const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS); + if (!RC) + return 0; + if (C->getValue()->getValue().srem(RC->getValue()->getValue()) != 0) + return 0; + return SE.getConstant(C->getValue()->getValue() + .sdiv(RC->getValue()->getValue())); + } + + // Distribute the sdiv over addrec operands. + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) { + const SCEV *Start = getSDiv(AR->getStart(), RHS, SE, + IgnoreSignificantBits); + if (!Start) return 0; + const SCEV *Step = getSDiv(AR->getStepRecurrence(SE), RHS, SE, + IgnoreSignificantBits); + if (!Step) return 0; + return SE.getAddRecExpr(Start, Step, AR->getLoop()); + } + + // Distribute the sdiv over add operands. + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) { + SmallVector<const SCEV *, 8> Ops; + for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); + I != E; ++I) { + const SCEV *Op = getSDiv(*I, RHS, SE, + IgnoreSignificantBits); + if (!Op) return 0; + Ops.push_back(Op); + } + return SE.getAddExpr(Ops); + } + + // Check for a multiply operand that we can pull RHS out of. + if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) + if (IgnoreSignificantBits || Mul->hasNoSignedWrap()) { + SmallVector<const SCEV *, 4> Ops; + bool Found = false; + for (SCEVMulExpr::op_iterator I = Mul->op_begin(), E = Mul->op_end(); + I != E; ++I) { + if (!Found) + if (const SCEV *Q = getSDiv(*I, RHS, SE, IgnoreSignificantBits)) { + Ops.push_back(Q); + Found = true; + continue; + } + Ops.push_back(*I); } + return Found ? SE.getMulExpr(Ops) : 0; + } - I->eraseFromParent(); - Changed = true; + // Otherwise we don't know. + return 0; +} + +/// ExtractImmediate - If S involves the addition of a constant integer value, +/// return that integer value, and mutate S to point to a new SCEV with that +/// value excluded. +static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) { + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) { + if (C->getValue()->getValue().getMinSignedBits() <= 64) { + S = SE.getIntegerSCEV(0, C->getType()); + return C->getValue()->getSExtValue(); + } + } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end()); + int64_t Result = ExtractImmediate(NewOps.front(), SE); + S = SE.getAddExpr(NewOps); + return Result; + } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end()); + int64_t Result = ExtractImmediate(NewOps.front(), SE); + S = SE.getAddRecExpr(NewOps, AR->getLoop()); + return Result; + } + return 0; +} + +/// ExtractSymbol - If S involves the addition of a GlobalValue address, +/// return that symbol, and mutate S to point to a new SCEV with that +/// value excluded. +static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) { + if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { + if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) { + S = SE.getIntegerSCEV(0, GV->getType()); + return GV; + } + } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end()); + GlobalValue *Result = ExtractSymbol(NewOps.back(), SE); + S = SE.getAddExpr(NewOps); + return Result; + } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end()); + GlobalValue *Result = ExtractSymbol(NewOps.front(), SE); + S = SE.getAddRecExpr(NewOps, AR->getLoop()); + return Result; } + return 0; } /// isAddressUse - Returns true if the specified instruction is using the @@ -276,1775 +503,832 @@ static const Type *getAccessType(const Instruction *Inst) { break; } } - return AccessTy; -} - -namespace { - /// BasedUser - For a particular base value, keep information about how we've - /// partitioned the expression so far. - struct BasedUser { - /// Base - The Base value for the PHI node that needs to be inserted for - /// this use. As the use is processed, information gets moved from this - /// field to the Imm field (below). BasedUser values are sorted by this - /// field. - const SCEV *Base; - - /// Inst - The instruction using the induction variable. - Instruction *Inst; - - /// OperandValToReplace - The operand value of Inst to replace with the - /// EmittedBase. - Value *OperandValToReplace; - - /// Imm - The immediate value that should be added to the base immediately - /// before Inst, because it will be folded into the imm field of the - /// instruction. This is also sometimes used for loop-variant values that - /// must be added inside the loop. - const SCEV *Imm; - - /// Phi - The induction variable that performs the striding that - /// should be used for this user. - PHINode *Phi; - - // isUseOfPostIncrementedValue - True if this should use the - // post-incremented version of this IV, not the preincremented version. - // This can only be set in special cases, such as the terminating setcc - // instruction for a loop and uses outside the loop that are dominated by - // the loop. - bool isUseOfPostIncrementedValue; - - BasedUser(IVStrideUse &IVSU, ScalarEvolution *se) - : Base(IVSU.getOffset()), Inst(IVSU.getUser()), - OperandValToReplace(IVSU.getOperandValToReplace()), - Imm(se->getIntegerSCEV(0, Base->getType())), - isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {} - - // Once we rewrite the code to insert the new IVs we want, update the - // operands of Inst to use the new expression 'NewBase', with 'Imm' added - // to it. - void RewriteInstructionToUseNewBase(const SCEV *NewBase, - Instruction *InsertPt, - SCEVExpander &Rewriter, Loop *L, Pass *P, - SmallVectorImpl<WeakVH> &DeadInsts, - ScalarEvolution *SE); - - Value *InsertCodeForBaseAtPosition(const SCEV *NewBase, - const Type *Ty, - SCEVExpander &Rewriter, - Instruction *IP, - ScalarEvolution *SE); - void dump() const; - }; -} - -void BasedUser::dump() const { - dbgs() << " Base=" << *Base; - dbgs() << " Imm=" << *Imm; - dbgs() << " Inst: " << *Inst; -} - -Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *NewBase, - const Type *Ty, - SCEVExpander &Rewriter, - Instruction *IP, - ScalarEvolution *SE) { - Value *Base = Rewriter.expandCodeFor(NewBase, 0, IP); - // Wrap the base in a SCEVUnknown so that ScalarEvolution doesn't try to - // re-analyze it. - const SCEV *NewValSCEV = SE->getUnknown(Base); + // All pointers have the same requirements, so canonicalize them to an + // arbitrary pointer type to minimize variation. + if (const PointerType *PTy = dyn_cast<PointerType>(AccessTy)) + AccessTy = PointerType::get(IntegerType::get(PTy->getContext(), 1), + PTy->getAddressSpace()); - // Always emit the immediate into the same block as the user. - NewValSCEV = SE->getAddExpr(NewValSCEV, Imm); - - return Rewriter.expandCodeFor(NewValSCEV, Ty, IP); + return AccessTy; } +/// DeleteTriviallyDeadInstructions - If any of the instructions is the +/// specified set are trivially dead, delete them and see if this makes any of +/// their operands subsequently dead. +static bool +DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) { + bool Changed = false; -// Once we rewrite the code to insert the new IVs we want, update the -// operands of Inst to use the new expression 'NewBase', with 'Imm' added -// to it. NewBasePt is the last instruction which contributes to the -// value of NewBase in the case that it's a diffferent instruction from -// the PHI that NewBase is computed from, or null otherwise. -// -void BasedUser::RewriteInstructionToUseNewBase(const SCEV *NewBase, - Instruction *NewBasePt, - SCEVExpander &Rewriter, Loop *L, Pass *P, - SmallVectorImpl<WeakVH> &DeadInsts, - ScalarEvolution *SE) { - if (!isa<PHINode>(Inst)) { - // By default, insert code at the user instruction. - BasicBlock::iterator InsertPt = Inst; - - // However, if the Operand is itself an instruction, the (potentially - // complex) inserted code may be shared by many users. Because of this, we - // want to emit code for the computation of the operand right before its old - // computation. This is usually safe, because we obviously used to use the - // computation when it was computed in its current block. However, in some - // cases (e.g. use of a post-incremented induction variable) the NewBase - // value will be pinned to live somewhere after the original computation. - // In this case, we have to back off. - // - // If this is a use outside the loop (which means after, since it is based - // on a loop indvar) we use the post-incremented value, so that we don't - // artificially make the preinc value live out the bottom of the loop. - if (!isUseOfPostIncrementedValue && L->contains(Inst)) { - if (NewBasePt && isa<PHINode>(OperandValToReplace)) { - InsertPt = NewBasePt; - ++InsertPt; - } else if (Instruction *OpInst - = dyn_cast<Instruction>(OperandValToReplace)) { - InsertPt = OpInst; - while (isa<PHINode>(InsertPt)) ++InsertPt; - } - } - Value *NewVal = InsertCodeForBaseAtPosition(NewBase, - OperandValToReplace->getType(), - Rewriter, InsertPt, SE); - // Replace the use of the operand Value with the new Phi we just created. - Inst->replaceUsesOfWith(OperandValToReplace, NewVal); - - DEBUG(dbgs() << " Replacing with "); - DEBUG(WriteAsOperand(dbgs(), NewVal, /*PrintType=*/false)); - DEBUG(dbgs() << ", which has value " << *NewBase << " plus IMM " - << *Imm << "\n"); - return; - } + while (!DeadInsts.empty()) { + Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()); - // PHI nodes are more complex. We have to insert one copy of the NewBase+Imm - // expression into each operand block that uses it. Note that PHI nodes can - // have multiple entries for the same predecessor. We use a map to make sure - // that a PHI node only has a single Value* for each predecessor (which also - // prevents us from inserting duplicate code in some blocks). - DenseMap<BasicBlock*, Value*> InsertedCode; - PHINode *PN = cast<PHINode>(Inst); - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - if (PN->getIncomingValue(i) == OperandValToReplace) { - // If the original expression is outside the loop, put the replacement - // code in the same place as the original expression, - // which need not be an immediate predecessor of this PHI. This way we - // need only one copy of it even if it is referenced multiple times in - // the PHI. We don't do this when the original expression is inside the - // loop because multiple copies sometimes do useful sinking of code in - // that case(?). - Instruction *OldLoc = dyn_cast<Instruction>(OperandValToReplace); - BasicBlock *PHIPred = PN->getIncomingBlock(i); - if (L->contains(OldLoc)) { - // If this is a critical edge, split the edge so that we do not insert - // the code on all predecessor/successor paths. We do this unless this - // is the canonical backedge for this loop, as this can make some - // inserted code be in an illegal position. - if (e != 1 && PHIPred->getTerminator()->getNumSuccessors() > 1 && - !isa<IndirectBrInst>(PHIPred->getTerminator()) && - (PN->getParent() != L->getHeader() || !L->contains(PHIPred))) { - - // First step, split the critical edge. - BasicBlock *NewBB = SplitCriticalEdge(PHIPred, PN->getParent(), - P, false); - - // Next step: move the basic block. In particular, if the PHI node - // is outside of the loop, and PredTI is in the loop, we want to - // move the block to be immediately before the PHI block, not - // immediately after PredTI. - if (L->contains(PHIPred) && !L->contains(PN)) - NewBB->moveBefore(PN->getParent()); + if (I == 0 || !isInstructionTriviallyDead(I)) + continue; - // Splitting the edge can reduce the number of PHI entries we have. - e = PN->getNumIncomingValues(); - PHIPred = NewBB; - i = PN->getBasicBlockIndex(PHIPred); - } - } - Value *&Code = InsertedCode[PHIPred]; - if (!Code) { - // Insert the code into the end of the predecessor block. - Instruction *InsertPt = (L->contains(OldLoc)) ? - PHIPred->getTerminator() : - OldLoc->getParent()->getTerminator(); - Code = InsertCodeForBaseAtPosition(NewBase, PN->getType(), - Rewriter, InsertPt, SE); - - DEBUG(dbgs() << " Changing PHI use to "); - DEBUG(WriteAsOperand(dbgs(), Code, /*PrintType=*/false)); - DEBUG(dbgs() << ", which has value " << *NewBase << " plus IMM " - << *Imm << "\n"); + for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) + if (Instruction *U = dyn_cast<Instruction>(*OI)) { + *OI = 0; + if (U->use_empty()) + DeadInsts.push_back(U); } - // Replace the use of the operand Value with the new Phi we just created. - PN->setIncomingValue(i, Code); - Rewriter.clear(); - } + I->eraseFromParent(); + Changed = true; } - // PHI node might have become a constant value after SplitCriticalEdge. - DeadInsts.push_back(Inst); + return Changed; } +namespace { -/// fitsInAddressMode - Return true if V can be subsumed within an addressing -/// mode, and does not need to be put in a register first. -static bool fitsInAddressMode(const SCEV *V, const Type *AccessTy, - const TargetLowering *TLI, bool HasBaseReg) { - if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(V)) { - int64_t VC = SC->getValue()->getSExtValue(); - if (TLI) { - TargetLowering::AddrMode AM; - AM.BaseOffs = VC; - AM.HasBaseReg = HasBaseReg; - return TLI->isLegalAddressingMode(AM, AccessTy); - } else { - // Defaults to PPC. PPC allows a sign-extended 16-bit immediate field. - return (VC > -(1 << 16) && VC < (1 << 16)-1); - } - } - - if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) - if (GlobalValue *GV = dyn_cast<GlobalValue>(SU->getValue())) { - if (TLI) { - TargetLowering::AddrMode AM; - AM.BaseGV = GV; - AM.HasBaseReg = HasBaseReg; - return TLI->isLegalAddressingMode(AM, AccessTy); - } else { - // Default: assume global addresses are not legal. - } - } +/// Cost - This class is used to measure and compare candidate formulae. +class Cost { + /// TODO: Some of these could be merged. Also, a lexical ordering + /// isn't always optimal. + unsigned NumRegs; + unsigned AddRecCost; + unsigned NumIVMuls; + unsigned NumBaseAdds; + unsigned ImmCost; + unsigned SetupCost; + +public: + Cost() + : NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), ImmCost(0), + SetupCost(0) {} + + unsigned getNumRegs() const { return NumRegs; } + + bool operator<(const Cost &Other) const; + + void Loose(); + + void RateFormula(const Formula &F, + SmallPtrSet<const SCEV *, 16> &Regs, + const DenseSet<const SCEV *> &VisitedRegs, + const Loop *L, + const SmallVectorImpl<int64_t> &Offsets, + ScalarEvolution &SE, DominatorTree &DT); + + void print(raw_ostream &OS) const; + void dump() const; + +private: + void RateRegister(const SCEV *Reg, + SmallPtrSet<const SCEV *, 16> &Regs, + const Loop *L, + ScalarEvolution &SE, DominatorTree &DT); + void RatePrimaryRegister(const SCEV *Reg, + SmallPtrSet<const SCEV *, 16> &Regs, + const Loop *L, + ScalarEvolution &SE, DominatorTree &DT); +}; - return false; } -/// MoveLoopVariantsToImmediateField - Move any subexpressions from Val that are -/// loop varying to the Imm operand. -static void MoveLoopVariantsToImmediateField(const SCEV *&Val, const SCEV *&Imm, - Loop *L, ScalarEvolution *SE) { - if (Val->isLoopInvariant(L)) return; // Nothing to do. - - if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) { - SmallVector<const SCEV *, 4> NewOps; - NewOps.reserve(SAE->getNumOperands()); +/// RateRegister - Tally up interesting quantities from the given register. +void Cost::RateRegister(const SCEV *Reg, + SmallPtrSet<const SCEV *, 16> &Regs, + const Loop *L, + ScalarEvolution &SE, DominatorTree &DT) { + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) { + if (AR->getLoop() == L) + AddRecCost += 1; /// TODO: This should be a function of the stride. + + // If this is an addrec for a loop that's already been visited by LSR, + // don't second-guess its addrec phi nodes. LSR isn't currently smart + // enough to reason about more than one loop at a time. Consider these + // registers free and leave them alone. + else if (L->contains(AR->getLoop()) || + (!AR->getLoop()->contains(L) && + DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))) { + for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin(); + PHINode *PN = dyn_cast<PHINode>(I); ++I) + if (SE.isSCEVable(PN->getType()) && + (SE.getEffectiveSCEVType(PN->getType()) == + SE.getEffectiveSCEVType(AR->getType())) && + SE.getSCEV(PN) == AR) + return; - for (unsigned i = 0; i != SAE->getNumOperands(); ++i) - if (!SAE->getOperand(i)->isLoopInvariant(L)) { - // If this is a loop-variant expression, it must stay in the immediate - // field of the expression. - Imm = SE->getAddExpr(Imm, SAE->getOperand(i)); - } else { - NewOps.push_back(SAE->getOperand(i)); - } + // If this isn't one of the addrecs that the loop already has, it + // would require a costly new phi and add. TODO: This isn't + // precisely modeled right now. + ++NumBaseAdds; + if (!Regs.count(AR->getStart())) + RateRegister(AR->getStart(), Regs, L, SE, DT); + } - if (NewOps.empty()) - Val = SE->getIntegerSCEV(0, Val->getType()); - else - Val = SE->getAddExpr(NewOps); - } else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) { - // Try to pull immediates out of the start value of nested addrec's. - const SCEV *Start = SARE->getStart(); - MoveLoopVariantsToImmediateField(Start, Imm, L, SE); - - SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end()); - Ops[0] = Start; - Val = SE->getAddRecExpr(Ops, SARE->getLoop()); - } else { - // Otherwise, all of Val is variant, move the whole thing over. - Imm = SE->getAddExpr(Imm, Val); - Val = SE->getIntegerSCEV(0, Val->getType()); + // Add the step value register, if it needs one. + // TODO: The non-affine case isn't precisely modeled here. + if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) + if (!Regs.count(AR->getStart())) + RateRegister(AR->getOperand(1), Regs, L, SE, DT); } + ++NumRegs; + + // Rough heuristic; favor registers which don't require extra setup + // instructions in the preheader. + if (!isa<SCEVUnknown>(Reg) && + !isa<SCEVConstant>(Reg) && + !(isa<SCEVAddRecExpr>(Reg) && + (isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) || + isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart())))) + ++SetupCost; } +/// RatePrimaryRegister - Record this register in the set. If we haven't seen it +/// before, rate it. +void Cost::RatePrimaryRegister(const SCEV *Reg, + SmallPtrSet<const SCEV *, 16> &Regs, + const Loop *L, + ScalarEvolution &SE, DominatorTree &DT) { + if (Regs.insert(Reg)) + RateRegister(Reg, Regs, L, SE, DT); +} -/// MoveImmediateValues - Look at Val, and pull out any additions of constants -/// that can fit into the immediate field of instructions in the target. -/// Accumulate these immediate values into the Imm value. -static void MoveImmediateValues(const TargetLowering *TLI, - const Type *AccessTy, - const SCEV *&Val, const SCEV *&Imm, - bool isAddress, Loop *L, - ScalarEvolution *SE) { - if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) { - SmallVector<const SCEV *, 4> NewOps; - NewOps.reserve(SAE->getNumOperands()); - - for (unsigned i = 0; i != SAE->getNumOperands(); ++i) { - const SCEV *NewOp = SAE->getOperand(i); - MoveImmediateValues(TLI, AccessTy, NewOp, Imm, isAddress, L, SE); - - if (!NewOp->isLoopInvariant(L)) { - // If this is a loop-variant expression, it must stay in the immediate - // field of the expression. - Imm = SE->getAddExpr(Imm, NewOp); - } else { - NewOps.push_back(NewOp); - } - } - - if (NewOps.empty()) - Val = SE->getIntegerSCEV(0, Val->getType()); - else - Val = SE->getAddExpr(NewOps); - return; - } else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) { - // Try to pull immediates out of the start value of nested addrec's. - const SCEV *Start = SARE->getStart(); - MoveImmediateValues(TLI, AccessTy, Start, Imm, isAddress, L, SE); - - if (Start != SARE->getStart()) { - SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end()); - Ops[0] = Start; - Val = SE->getAddRecExpr(Ops, SARE->getLoop()); +void Cost::RateFormula(const Formula &F, + SmallPtrSet<const SCEV *, 16> &Regs, + const DenseSet<const SCEV *> &VisitedRegs, + const Loop *L, + const SmallVectorImpl<int64_t> &Offsets, + ScalarEvolution &SE, DominatorTree &DT) { + // Tally up the registers. + if (const SCEV *ScaledReg = F.ScaledReg) { + if (VisitedRegs.count(ScaledReg)) { + Loose(); + return; } - return; - } else if (const SCEVMulExpr *SME = dyn_cast<SCEVMulExpr>(Val)) { - // Transform "8 * (4 + v)" -> "32 + 8*V" if "32" fits in the immed field. - if (isAddress && - fitsInAddressMode(SME->getOperand(0), AccessTy, TLI, false) && - SME->getNumOperands() == 2 && SME->isLoopInvariant(L)) { - - const SCEV *SubImm = SE->getIntegerSCEV(0, Val->getType()); - const SCEV *NewOp = SME->getOperand(1); - MoveImmediateValues(TLI, AccessTy, NewOp, SubImm, isAddress, L, SE); - - // If we extracted something out of the subexpressions, see if we can - // simplify this! - if (NewOp != SME->getOperand(1)) { - // Scale SubImm up by "8". If the result is a target constant, we are - // good. - SubImm = SE->getMulExpr(SubImm, SME->getOperand(0)); - if (fitsInAddressMode(SubImm, AccessTy, TLI, false)) { - // Accumulate the immediate. - Imm = SE->getAddExpr(Imm, SubImm); - - // Update what is left of 'Val'. - Val = SE->getMulExpr(SME->getOperand(0), NewOp); - return; - } - } + RatePrimaryRegister(ScaledReg, Regs, L, SE, DT); + } + for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(), + E = F.BaseRegs.end(); I != E; ++I) { + const SCEV *BaseReg = *I; + if (VisitedRegs.count(BaseReg)) { + Loose(); + return; } + RatePrimaryRegister(BaseReg, Regs, L, SE, DT); + + NumIVMuls += isa<SCEVMulExpr>(BaseReg) && + BaseReg->hasComputableLoopEvolution(L); } - // Loop-variant expressions must stay in the immediate field of the - // expression. - if ((isAddress && fitsInAddressMode(Val, AccessTy, TLI, false)) || - !Val->isLoopInvariant(L)) { - Imm = SE->getAddExpr(Imm, Val); - Val = SE->getIntegerSCEV(0, Val->getType()); - return; + if (F.BaseRegs.size() > 1) + NumBaseAdds += F.BaseRegs.size() - 1; + + // Tally up the non-zero immediates. + for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(), + E = Offsets.end(); I != E; ++I) { + int64_t Offset = (uint64_t)*I + F.AM.BaseOffs; + if (F.AM.BaseGV) + ImmCost += 64; // Handle symbolic values conservatively. + // TODO: This should probably be the pointer size. + else if (Offset != 0) + ImmCost += APInt(64, Offset, true).getMinSignedBits(); } +} - // Otherwise, no immediates to move. +/// Loose - Set this cost to a loosing value. +void Cost::Loose() { + NumRegs = ~0u; + AddRecCost = ~0u; + NumIVMuls = ~0u; + NumBaseAdds = ~0u; + ImmCost = ~0u; + SetupCost = ~0u; } -static void MoveImmediateValues(const TargetLowering *TLI, - Instruction *User, - const SCEV *&Val, const SCEV *&Imm, - bool isAddress, Loop *L, - ScalarEvolution *SE) { - const Type *AccessTy = getAccessType(User); - MoveImmediateValues(TLI, AccessTy, Val, Imm, isAddress, L, SE); +/// operator< - Choose the lower cost. +bool Cost::operator<(const Cost &Other) const { + if (NumRegs != Other.NumRegs) + return NumRegs < Other.NumRegs; + if (AddRecCost != Other.AddRecCost) + return AddRecCost < Other.AddRecCost; + if (NumIVMuls != Other.NumIVMuls) + return NumIVMuls < Other.NumIVMuls; + if (NumBaseAdds != Other.NumBaseAdds) + return NumBaseAdds < Other.NumBaseAdds; + if (ImmCost != Other.ImmCost) + return ImmCost < Other.ImmCost; + if (SetupCost != Other.SetupCost) + return SetupCost < Other.SetupCost; + return false; } -/// SeparateSubExprs - Decompose Expr into all of the subexpressions that are -/// added together. This is used to reassociate common addition subexprs -/// together for maximal sharing when rewriting bases. -static void SeparateSubExprs(SmallVector<const SCEV *, 16> &SubExprs, - const SCEV *Expr, - ScalarEvolution *SE) { - if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(Expr)) { - for (unsigned j = 0, e = AE->getNumOperands(); j != e; ++j) - SeparateSubExprs(SubExprs, AE->getOperand(j), SE); - } else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Expr)) { - const SCEV *Zero = SE->getIntegerSCEV(0, Expr->getType()); - if (SARE->getOperand(0) == Zero) { - SubExprs.push_back(Expr); - } else { - // Compute the addrec with zero as its base. - SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end()); - Ops[0] = Zero; // Start with zero base. - SubExprs.push_back(SE->getAddRecExpr(Ops, SARE->getLoop())); +void Cost::print(raw_ostream &OS) const { + OS << NumRegs << " reg" << (NumRegs == 1 ? "" : "s"); + if (AddRecCost != 0) + OS << ", with addrec cost " << AddRecCost; + if (NumIVMuls != 0) + OS << ", plus " << NumIVMuls << " IV mul" << (NumIVMuls == 1 ? "" : "s"); + if (NumBaseAdds != 0) + OS << ", plus " << NumBaseAdds << " base add" + << (NumBaseAdds == 1 ? "" : "s"); + if (ImmCost != 0) + OS << ", plus " << ImmCost << " imm cost"; + if (SetupCost != 0) + OS << ", plus " << SetupCost << " setup cost"; +} +void Cost::dump() const { + print(errs()); errs() << '\n'; +} - SeparateSubExprs(SubExprs, SARE->getOperand(0), SE); - } - } else if (!Expr->isZero()) { - // Do not add zero. - SubExprs.push_back(Expr); - } -} - -// This is logically local to the following function, but C++ says we have -// to make it file scope. -struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; }; - -/// RemoveCommonExpressionsFromUseBases - Look through all of the Bases of all -/// the Uses, removing any common subexpressions, except that if all such -/// subexpressions can be folded into an addressing mode for all uses inside -/// the loop (this case is referred to as "free" in comments herein) we do -/// not remove anything. This looks for things like (a+b+c) and -/// (a+c+d) and computes the common (a+c) subexpression. The common expression -/// is *removed* from the Bases and returned. -static const SCEV * -RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses, - ScalarEvolution *SE, Loop *L, - const TargetLowering *TLI) { - unsigned NumUses = Uses.size(); - - // Only one use? This is a very common case, so we handle it specially and - // cheaply. - const SCEV *Zero = SE->getIntegerSCEV(0, Uses[0].Base->getType()); - const SCEV *Result = Zero; - const SCEV *FreeResult = Zero; - if (NumUses == 1) { - // If the use is inside the loop, use its base, regardless of what it is: - // it is clearly shared across all the IV's. If the use is outside the loop - // (which means after it) we don't want to factor anything *into* the loop, - // so just use 0 as the base. - if (L->contains(Uses[0].Inst)) - std::swap(Result, Uses[0].Base); - return Result; - } +namespace { - // To find common subexpressions, count how many of Uses use each expression. - // If any subexpressions are used Uses.size() times, they are common. - // Also track whether all uses of each expression can be moved into an - // an addressing mode "for free"; such expressions are left within the loop. - // struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; }; - std::map<const SCEV *, SubExprUseData> SubExpressionUseData; - - // UniqueSubExprs - Keep track of all of the subexpressions we see in the - // order we see them. - SmallVector<const SCEV *, 16> UniqueSubExprs; - - SmallVector<const SCEV *, 16> SubExprs; - unsigned NumUsesInsideLoop = 0; - for (unsigned i = 0; i != NumUses; ++i) { - // If the user is outside the loop, just ignore it for base computation. - // Since the user is outside the loop, it must be *after* the loop (if it - // were before, it could not be based on the loop IV). We don't want users - // after the loop to affect base computation of values *inside* the loop, - // because we can always add their offsets to the result IV after the loop - // is done, ensuring we get good code inside the loop. - if (!L->contains(Uses[i].Inst)) - continue; - NumUsesInsideLoop++; +/// LSRFixup - An operand value in an instruction which is to be replaced +/// with some equivalent, possibly strength-reduced, replacement. +struct LSRFixup { + /// UserInst - The instruction which will be updated. + Instruction *UserInst; - // If the base is zero (which is common), return zero now, there are no - // CSEs we can find. - if (Uses[i].Base == Zero) return Zero; + /// OperandValToReplace - The operand of the instruction which will + /// be replaced. The operand may be used more than once; every instance + /// will be replaced. + Value *OperandValToReplace; - // If this use is as an address we may be able to put CSEs in the addressing - // mode rather than hoisting them. - bool isAddrUse = isAddressUse(Uses[i].Inst, Uses[i].OperandValToReplace); - // We may need the AccessTy below, but only when isAddrUse, so compute it - // only in that case. - const Type *AccessTy = 0; - if (isAddrUse) - AccessTy = getAccessType(Uses[i].Inst); - - // Split the expression into subexprs. - SeparateSubExprs(SubExprs, Uses[i].Base, SE); - // Add one to SubExpressionUseData.Count for each subexpr present, and - // if the subexpr is not a valid immediate within an addressing mode use, - // set SubExpressionUseData.notAllUsesAreFree. We definitely want to - // hoist these out of the loop (if they are common to all uses). - for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) { - if (++SubExpressionUseData[SubExprs[j]].Count == 1) - UniqueSubExprs.push_back(SubExprs[j]); - if (!isAddrUse || !fitsInAddressMode(SubExprs[j], AccessTy, TLI, false)) - SubExpressionUseData[SubExprs[j]].notAllUsesAreFree = true; - } - SubExprs.clear(); - } - - // Now that we know how many times each is used, build Result. Iterate over - // UniqueSubexprs so that we have a stable ordering. - for (unsigned i = 0, e = UniqueSubExprs.size(); i != e; ++i) { - std::map<const SCEV *, SubExprUseData>::iterator I = - SubExpressionUseData.find(UniqueSubExprs[i]); - assert(I != SubExpressionUseData.end() && "Entry not found?"); - if (I->second.Count == NumUsesInsideLoop) { // Found CSE! - if (I->second.notAllUsesAreFree) - Result = SE->getAddExpr(Result, I->first); - else - FreeResult = SE->getAddExpr(FreeResult, I->first); - } else - // Remove non-cse's from SubExpressionUseData. - SubExpressionUseData.erase(I); - } - - if (FreeResult != Zero) { - // We have some subexpressions that can be subsumed into addressing - // modes in every use inside the loop. However, it's possible that - // there are so many of them that the combined FreeResult cannot - // be subsumed, or that the target cannot handle both a FreeResult - // and a Result in the same instruction (for example because it would - // require too many registers). Check this. - for (unsigned i=0; i<NumUses; ++i) { - if (!L->contains(Uses[i].Inst)) - continue; - // We know this is an addressing mode use; if there are any uses that - // are not, FreeResult would be Zero. - const Type *AccessTy = getAccessType(Uses[i].Inst); - if (!fitsInAddressMode(FreeResult, AccessTy, TLI, Result!=Zero)) { - // FIXME: could split up FreeResult into pieces here, some hoisted - // and some not. There is no obvious advantage to this. - Result = SE->getAddExpr(Result, FreeResult); - FreeResult = Zero; - break; - } - } - } + /// PostIncLoop - If this user is to use the post-incremented value of an + /// induction variable, this variable is non-null and holds the loop + /// associated with the induction variable. + const Loop *PostIncLoop; - // If we found no CSE's, return now. - if (Result == Zero) return Result; + /// LUIdx - The index of the LSRUse describing the expression which + /// this fixup needs, minus an offset (below). + size_t LUIdx; - // If we still have a FreeResult, remove its subexpressions from - // SubExpressionUseData. This means they will remain in the use Bases. - if (FreeResult != Zero) { - SeparateSubExprs(SubExprs, FreeResult, SE); - for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) { - std::map<const SCEV *, SubExprUseData>::iterator I = - SubExpressionUseData.find(SubExprs[j]); - SubExpressionUseData.erase(I); - } - SubExprs.clear(); - } + /// Offset - A constant offset to be added to the LSRUse expression. + /// This allows multiple fixups to share the same LSRUse with different + /// offsets, for example in an unrolled loop. + int64_t Offset; - // Otherwise, remove all of the CSE's we found from each of the base values. - for (unsigned i = 0; i != NumUses; ++i) { - // Uses outside the loop don't necessarily include the common base, but - // the final IV value coming into those uses does. Instead of trying to - // remove the pieces of the common base, which might not be there, - // subtract off the base to compensate for this. - if (!L->contains(Uses[i].Inst)) { - Uses[i].Base = SE->getMinusSCEV(Uses[i].Base, Result); - continue; - } + LSRFixup(); - // Split the expression into subexprs. - SeparateSubExprs(SubExprs, Uses[i].Base, SE); + void print(raw_ostream &OS) const; + void dump() const; +}; - // Remove any common subexpressions. - for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) - if (SubExpressionUseData.count(SubExprs[j])) { - SubExprs.erase(SubExprs.begin()+j); - --j; --e; - } +} - // Finally, add the non-shared expressions together. - if (SubExprs.empty()) - Uses[i].Base = Zero; - else - Uses[i].Base = SE->getAddExpr(SubExprs); - SubExprs.clear(); +LSRFixup::LSRFixup() + : UserInst(0), OperandValToReplace(0), PostIncLoop(0), + LUIdx(~size_t(0)), Offset(0) {} + +void LSRFixup::print(raw_ostream &OS) const { + OS << "UserInst="; + // Store is common and interesting enough to be worth special-casing. + if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) { + OS << "store "; + WriteAsOperand(OS, Store->getOperand(0), /*PrintType=*/false); + } else if (UserInst->getType()->isVoidTy()) + OS << UserInst->getOpcodeName(); + else + WriteAsOperand(OS, UserInst, /*PrintType=*/false); + + OS << ", OperandValToReplace="; + WriteAsOperand(OS, OperandValToReplace, /*PrintType=*/false); + + if (PostIncLoop) { + OS << ", PostIncLoop="; + WriteAsOperand(OS, PostIncLoop->getHeader(), /*PrintType=*/false); } - return Result; -} + if (LUIdx != ~size_t(0)) + OS << ", LUIdx=" << LUIdx; -/// ValidScale - Check whether the given Scale is valid for all loads and -/// stores in UsersToProcess. -/// -bool LoopStrengthReduce::ValidScale(bool HasBaseReg, int64_t Scale, - const std::vector<BasedUser>& UsersToProcess) { - if (!TLI) - return true; + if (Offset != 0) + OS << ", Offset=" << Offset; +} - for (unsigned i = 0, e = UsersToProcess.size(); i!=e; ++i) { - // If this is a load or other access, pass the type of the access in. - const Type *AccessTy = - Type::getVoidTy(UsersToProcess[i].Inst->getContext()); - if (isAddressUse(UsersToProcess[i].Inst, - UsersToProcess[i].OperandValToReplace)) - AccessTy = getAccessType(UsersToProcess[i].Inst); - else if (isa<PHINode>(UsersToProcess[i].Inst)) - continue; +void LSRFixup::dump() const { + print(errs()); errs() << '\n'; +} - TargetLowering::AddrMode AM; - if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(UsersToProcess[i].Imm)) - AM.BaseOffs = SC->getValue()->getSExtValue(); - AM.HasBaseReg = HasBaseReg || !UsersToProcess[i].Base->isZero(); - AM.Scale = Scale; +namespace { - // If load[imm+r*scale] is illegal, bail out. - if (!TLI->isLegalAddressingMode(AM, AccessTy)) - return false; +/// UniquifierDenseMapInfo - A DenseMapInfo implementation for holding +/// DenseMaps and DenseSets of sorted SmallVectors of const SCEV*. +struct UniquifierDenseMapInfo { + static SmallVector<const SCEV *, 2> getEmptyKey() { + SmallVector<const SCEV *, 2> V; + V.push_back(reinterpret_cast<const SCEV *>(-1)); + return V; } - return true; -} - -/// ValidOffset - Check whether the given Offset is valid for all loads and -/// stores in UsersToProcess. -/// -bool LoopStrengthReduce::ValidOffset(bool HasBaseReg, - int64_t Offset, - int64_t Scale, - const std::vector<BasedUser>& UsersToProcess) { - if (!TLI) - return true; - for (unsigned i=0, e = UsersToProcess.size(); i!=e; ++i) { - // If this is a load or other access, pass the type of the access in. - const Type *AccessTy = - Type::getVoidTy(UsersToProcess[i].Inst->getContext()); - if (isAddressUse(UsersToProcess[i].Inst, - UsersToProcess[i].OperandValToReplace)) - AccessTy = getAccessType(UsersToProcess[i].Inst); - else if (isa<PHINode>(UsersToProcess[i].Inst)) - continue; + static SmallVector<const SCEV *, 2> getTombstoneKey() { + SmallVector<const SCEV *, 2> V; + V.push_back(reinterpret_cast<const SCEV *>(-2)); + return V; + } - TargetLowering::AddrMode AM; - if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(UsersToProcess[i].Imm)) - AM.BaseOffs = SC->getValue()->getSExtValue(); - AM.BaseOffs = (uint64_t)AM.BaseOffs + (uint64_t)Offset; - AM.HasBaseReg = HasBaseReg || !UsersToProcess[i].Base->isZero(); - AM.Scale = Scale; + static unsigned getHashValue(const SmallVector<const SCEV *, 2> &V) { + unsigned Result = 0; + for (SmallVectorImpl<const SCEV *>::const_iterator I = V.begin(), + E = V.end(); I != E; ++I) + Result ^= DenseMapInfo<const SCEV *>::getHashValue(*I); + return Result; + } - // If load[imm+r*scale] is illegal, bail out. - if (!TLI->isLegalAddressingMode(AM, AccessTy)) - return false; + static bool isEqual(const SmallVector<const SCEV *, 2> &LHS, + const SmallVector<const SCEV *, 2> &RHS) { + return LHS == RHS; } - return true; -} +}; + +/// LSRUse - This class holds the state that LSR keeps for each use in +/// IVUsers, as well as uses invented by LSR itself. It includes information +/// about what kinds of things can be folded into the user, information about +/// the user itself, and information about how the use may be satisfied. +/// TODO: Represent multiple users of the same expression in common? +class LSRUse { + DenseSet<SmallVector<const SCEV *, 2>, UniquifierDenseMapInfo> Uniquifier; + +public: + /// KindType - An enum for a kind of use, indicating what types of + /// scaled and immediate operands it might support. + enum KindType { + Basic, ///< A normal use, with no folding. + Special, ///< A special case of basic, allowing -1 scales. + Address, ///< An address use; folding according to TargetLowering + ICmpZero ///< An equality icmp with both operands folded into one. + // TODO: Add a generic icmp too? + }; -/// RequiresTypeConversion - Returns true if converting Ty1 to Ty2 is not -/// a nop. -bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty1, - const Type *Ty2) { - if (Ty1 == Ty2) - return false; - Ty1 = SE->getEffectiveSCEVType(Ty1); - Ty2 = SE->getEffectiveSCEVType(Ty2); - if (Ty1 == Ty2) - return false; - if (Ty1->canLosslesslyBitCastTo(Ty2)) - return false; - if (TLI && TLI->isTruncateFree(Ty1, Ty2)) - return false; - return true; -} + KindType Kind; + const Type *AccessTy; -/// CheckForIVReuse - Returns the multiple if the stride is the multiple -/// of a previous stride and it is a legal value for the target addressing -/// mode scale component and optional base reg. This allows the users of -/// this stride to be rewritten as prev iv * factor. It returns 0 if no -/// reuse is possible. Factors can be negative on same targets, e.g. ARM. -/// -/// If all uses are outside the loop, we don't require that all multiplies -/// be folded into the addressing mode, nor even that the factor be constant; -/// a multiply (executed once) outside the loop is better than another IV -/// within. Well, usually. -const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, - bool AllUsesAreAddresses, - bool AllUsesAreOutsideLoop, - const SCEV *Stride, - IVExpr &IV, const Type *Ty, - const std::vector<BasedUser>& UsersToProcess) { - if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) { - int64_t SInt = SC->getValue()->getSExtValue(); - for (unsigned NewStride = 0, e = IU->StrideOrder.size(); - NewStride != e; ++NewStride) { - std::map<const SCEV *, IVsOfOneStride>::iterator SI = - IVsByStride.find(IU->StrideOrder[NewStride]); - if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first)) - continue; - // The other stride has no uses, don't reuse it. - std::map<const SCEV *, IVUsersOfOneStride *>::iterator UI = - IU->IVUsesByStride.find(IU->StrideOrder[NewStride]); - if (UI->second->Users.empty()) - continue; - int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue(); - if (SI->first != Stride && - (unsigned(abs64(SInt)) < SSInt || (SInt % SSInt) != 0)) - continue; - int64_t Scale = SInt / SSInt; - // Check that this stride is valid for all the types used for loads and - // stores; if it can be used for some and not others, we might as well use - // the original stride everywhere, since we have to create the IV for it - // anyway. If the scale is 1, then we don't need to worry about folding - // multiplications. - if (Scale == 1 || - (AllUsesAreAddresses && - ValidScale(HasBaseReg, Scale, UsersToProcess))) { - // Prefer to reuse an IV with a base of zero. - for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(), - IE = SI->second.IVs.end(); II != IE; ++II) - // Only reuse previous IV if it would not require a type conversion - // and if the base difference can be folded. - if (II->Base->isZero() && - !RequiresTypeConversion(II->Base->getType(), Ty)) { - IV = *II; - return SE->getIntegerSCEV(Scale, Stride->getType()); - } - // Otherwise, settle for an IV with a foldable base. - if (AllUsesAreAddresses) - for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(), - IE = SI->second.IVs.end(); II != IE; ++II) - // Only reuse previous IV if it would not require a type conversion - // and if the base difference can be folded. - if (SE->getEffectiveSCEVType(II->Base->getType()) == - SE->getEffectiveSCEVType(Ty) && - isa<SCEVConstant>(II->Base)) { - int64_t Base = - cast<SCEVConstant>(II->Base)->getValue()->getSExtValue(); - if (Base > INT32_MIN && Base <= INT32_MAX && - ValidOffset(HasBaseReg, -Base * Scale, - Scale, UsersToProcess)) { - IV = *II; - return SE->getIntegerSCEV(Scale, Stride->getType()); - } - } - } - } - } else if (AllUsesAreOutsideLoop) { - // Accept nonconstant strides here; it is really really right to substitute - // an existing IV if we can. - for (unsigned NewStride = 0, e = IU->StrideOrder.size(); - NewStride != e; ++NewStride) { - std::map<const SCEV *, IVsOfOneStride>::iterator SI = - IVsByStride.find(IU->StrideOrder[NewStride]); - if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first)) - continue; - int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue(); - if (SI->first != Stride && SSInt != 1) - continue; - for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(), - IE = SI->second.IVs.end(); II != IE; ++II) - // Accept nonzero base here. - // Only reuse previous IV if it would not require a type conversion. - if (!RequiresTypeConversion(II->Base->getType(), Ty)) { - IV = *II; - return Stride; - } - } - // Special case, old IV is -1*x and this one is x. Can treat this one as - // -1*old. - for (unsigned NewStride = 0, e = IU->StrideOrder.size(); - NewStride != e; ++NewStride) { - std::map<const SCEV *, IVsOfOneStride>::iterator SI = - IVsByStride.find(IU->StrideOrder[NewStride]); - if (SI == IVsByStride.end()) - continue; - if (const SCEVMulExpr *ME = dyn_cast<SCEVMulExpr>(SI->first)) - if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(ME->getOperand(0))) - if (Stride == ME->getOperand(1) && - SC->getValue()->getSExtValue() == -1LL) - for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(), - IE = SI->second.IVs.end(); II != IE; ++II) - // Accept nonzero base here. - // Only reuse previous IV if it would not require type conversion. - if (!RequiresTypeConversion(II->Base->getType(), Ty)) { - IV = *II; - return SE->getIntegerSCEV(-1LL, Stride->getType()); - } - } - } - return SE->getIntegerSCEV(0, Stride->getType()); -} - -/// PartitionByIsUseOfPostIncrementedValue - Simple boolean predicate that -/// returns true if Val's isUseOfPostIncrementedValue is true. -static bool PartitionByIsUseOfPostIncrementedValue(const BasedUser &Val) { - return Val.isUseOfPostIncrementedValue; -} - -/// isNonConstantNegative - Return true if the specified scev is negated, but -/// not a constant. -static bool isNonConstantNegative(const SCEV *Expr) { - const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Expr); - if (!Mul) return false; - - // If there is a constant factor, it will be first. - const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0)); - if (!SC) return false; - - // Return true if the value is negative, this matches things like (-42 * V). - return SC->getValue()->getValue().isNegative(); -} - -/// CollectIVUsers - Transform our list of users and offsets to a bit more -/// complex table. In this new vector, each 'BasedUser' contains 'Base', the -/// base of the strided accesses, as well as the old information from Uses. We -/// progressively move information from the Base field to the Imm field, until -/// we eventually have the full access expression to rewrite the use. -const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *Stride, - IVUsersOfOneStride &Uses, - Loop *L, - bool &AllUsesAreAddresses, - bool &AllUsesAreOutsideLoop, - std::vector<BasedUser> &UsersToProcess) { - // FIXME: Generalize to non-affine IV's. - if (!Stride->isLoopInvariant(L)) - return SE->getIntegerSCEV(0, Stride->getType()); - - UsersToProcess.reserve(Uses.Users.size()); - for (ilist<IVStrideUse>::iterator I = Uses.Users.begin(), - E = Uses.Users.end(); I != E; ++I) { - UsersToProcess.push_back(BasedUser(*I, SE)); - - // Move any loop variant operands from the offset field to the immediate - // field of the use, so that we don't try to use something before it is - // computed. - MoveLoopVariantsToImmediateField(UsersToProcess.back().Base, - UsersToProcess.back().Imm, L, SE); - assert(UsersToProcess.back().Base->isLoopInvariant(L) && - "Base value is not loop invariant!"); - } - - // We now have a whole bunch of uses of like-strided induction variables, but - // they might all have different bases. We want to emit one PHI node for this - // stride which we fold as many common expressions (between the IVs) into as - // possible. Start by identifying the common expressions in the base values - // for the strides (e.g. if we have "A+C+B" and "A+B+D" as our bases, find - // "A+B"), emit it to the preheader, then remove the expression from the - // UsersToProcess base values. - const SCEV *CommonExprs = - RemoveCommonExpressionsFromUseBases(UsersToProcess, SE, L, TLI); - - // Next, figure out what we can represent in the immediate fields of - // instructions. If we can represent anything there, move it to the imm - // fields of the BasedUsers. We do this so that it increases the commonality - // of the remaining uses. - unsigned NumPHI = 0; - bool HasAddress = false; - for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) { - // If the user is not in the current loop, this means it is using the exit - // value of the IV. Do not put anything in the base, make sure it's all in - // the immediate field to allow as much factoring as possible. - if (!L->contains(UsersToProcess[i].Inst)) { - UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm, - UsersToProcess[i].Base); - UsersToProcess[i].Base = - SE->getIntegerSCEV(0, UsersToProcess[i].Base->getType()); - } else { - // Not all uses are outside the loop. - AllUsesAreOutsideLoop = false; - - // Addressing modes can be folded into loads and stores. Be careful that - // the store is through the expression, not of the expression though. - bool isPHI = false; - bool isAddress = isAddressUse(UsersToProcess[i].Inst, - UsersToProcess[i].OperandValToReplace); - if (isa<PHINode>(UsersToProcess[i].Inst)) { - isPHI = true; - ++NumPHI; - } + SmallVector<int64_t, 8> Offsets; + int64_t MinOffset; + int64_t MaxOffset; - if (isAddress) - HasAddress = true; + /// AllFixupsOutsideLoop - This records whether all of the fixups using this + /// LSRUse are outside of the loop, in which case some special-case heuristics + /// may be used. + bool AllFixupsOutsideLoop; - // If this use isn't an address, then not all uses are addresses. - if (!isAddress && !isPHI) - AllUsesAreAddresses = false; + /// Formulae - A list of ways to build a value that can satisfy this user. + /// After the list is populated, one of these is selected heuristically and + /// used to formulate a replacement for OperandValToReplace in UserInst. + SmallVector<Formula, 12> Formulae; - MoveImmediateValues(TLI, UsersToProcess[i].Inst, UsersToProcess[i].Base, - UsersToProcess[i].Imm, isAddress, L, SE); - } - } + /// Regs - The set of register candidates used by all formulae in this LSRUse. + SmallPtrSet<const SCEV *, 4> Regs; - // If one of the use is a PHI node and all other uses are addresses, still - // allow iv reuse. Essentially we are trading one constant multiplication - // for one fewer iv. - if (NumPHI > 1) - AllUsesAreAddresses = false; + LSRUse(KindType K, const Type *T) : Kind(K), AccessTy(T), + MinOffset(INT64_MAX), + MaxOffset(INT64_MIN), + AllFixupsOutsideLoop(true) {} - // There are no in-loop address uses. - if (AllUsesAreAddresses && (!HasAddress && !AllUsesAreOutsideLoop)) - AllUsesAreAddresses = false; + bool InsertFormula(size_t LUIdx, const Formula &F); - return CommonExprs; -} + void check() const; -/// ShouldUseFullStrengthReductionMode - Test whether full strength-reduction -/// is valid and profitable for the given set of users of a stride. In -/// full strength-reduction mode, all addresses at the current stride are -/// strength-reduced all the way down to pointer arithmetic. -/// -bool LoopStrengthReduce::ShouldUseFullStrengthReductionMode( - const std::vector<BasedUser> &UsersToProcess, - const Loop *L, - bool AllUsesAreAddresses, - const SCEV *Stride) { - if (!EnableFullLSRMode) - return false; + void print(raw_ostream &OS) const; + void dump() const; +}; - // The heuristics below aim to avoid increasing register pressure, but - // fully strength-reducing all the addresses increases the number of - // add instructions, so don't do this when optimizing for size. - // TODO: If the loop is large, the savings due to simpler addresses - // may oughtweight the costs of the extra increment instructions. - if (L->getHeader()->getParent()->hasFnAttr(Attribute::OptimizeForSize)) - return false; +/// InsertFormula - If the given formula has not yet been inserted, add it to +/// the list, and return true. Return false otherwise. +bool LSRUse::InsertFormula(size_t LUIdx, const Formula &F) { + SmallVector<const SCEV *, 2> Key = F.BaseRegs; + if (F.ScaledReg) Key.push_back(F.ScaledReg); + // Unstable sort by host order ok, because this is only used for uniquifying. + std::sort(Key.begin(), Key.end()); - // TODO: For now, don't do full strength reduction if there could - // potentially be greater-stride multiples of the current stride - // which could reuse the current stride IV. - if (IU->StrideOrder.back() != Stride) + if (!Uniquifier.insert(Key).second) return false; - // Iterate through the uses to find conditions that automatically rule out - // full-lsr mode. - for (unsigned i = 0, e = UsersToProcess.size(); i != e; ) { - const SCEV *Base = UsersToProcess[i].Base; - const SCEV *Imm = UsersToProcess[i].Imm; - // If any users have a loop-variant component, they can't be fully - // strength-reduced. - if (Imm && !Imm->isLoopInvariant(L)) - return false; - // If there are to users with the same base and the difference between - // the two Imm values can't be folded into the address, full - // strength reduction would increase register pressure. - do { - const SCEV *CurImm = UsersToProcess[i].Imm; - if ((CurImm || Imm) && CurImm != Imm) { - if (!CurImm) CurImm = SE->getIntegerSCEV(0, Stride->getType()); - if (!Imm) Imm = SE->getIntegerSCEV(0, Stride->getType()); - const Instruction *Inst = UsersToProcess[i].Inst; - const Type *AccessTy = getAccessType(Inst); - const SCEV *Diff = SE->getMinusSCEV(UsersToProcess[i].Imm, Imm); - if (!Diff->isZero() && - (!AllUsesAreAddresses || - !fitsInAddressMode(Diff, AccessTy, TLI, /*HasBaseReg=*/true))) - return false; - } - } while (++i != e && Base == UsersToProcess[i].Base); - } + // Using a register to hold the value of 0 is not profitable. + assert((!F.ScaledReg || !F.ScaledReg->isZero()) && + "Zero allocated in a scaled register!"); +#ifndef NDEBUG + for (SmallVectorImpl<const SCEV *>::const_iterator I = + F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) + assert(!(*I)->isZero() && "Zero allocated in a base register!"); +#endif - // If there's exactly one user in this stride, fully strength-reducing it - // won't increase register pressure. If it's starting from a non-zero base, - // it'll be simpler this way. - if (UsersToProcess.size() == 1 && !UsersToProcess[0].Base->isZero()) - return true; + // Add the formula to the list. + Formulae.push_back(F); - // Otherwise, if there are any users in this stride that don't require - // a register for their base, full strength-reduction will increase - // register pressure. - for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) - if (UsersToProcess[i].Base->isZero()) - return false; + // Record registers now being used by this use. + if (F.ScaledReg) Regs.insert(F.ScaledReg); + Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end()); - // Otherwise, go for it. return true; } -/// InsertAffinePhi Create and insert a PHI node for an induction variable -/// with the specified start and step values in the specified loop. -/// -/// If NegateStride is true, the stride should be negated by using a -/// subtract instead of an add. -/// -/// Return the created phi node. -/// -static PHINode *InsertAffinePhi(const SCEV *Start, const SCEV *Step, - Instruction *IVIncInsertPt, - const Loop *L, - SCEVExpander &Rewriter) { - assert(Start->isLoopInvariant(L) && "New PHI start is not loop invariant!"); - assert(Step->isLoopInvariant(L) && "New PHI stride is not loop invariant!"); - - BasicBlock *Header = L->getHeader(); - BasicBlock *Preheader = L->getLoopPreheader(); - BasicBlock *LatchBlock = L->getLoopLatch(); - const Type *Ty = Start->getType(); - Ty = Rewriter.SE.getEffectiveSCEVType(Ty); - - PHINode *PN = PHINode::Create(Ty, "lsr.iv", Header->begin()); - PN->addIncoming(Rewriter.expandCodeFor(Start, Ty, Preheader->getTerminator()), - Preheader); - - // If the stride is negative, insert a sub instead of an add for the - // increment. - bool isNegative = isNonConstantNegative(Step); - const SCEV *IncAmount = Step; - if (isNegative) - IncAmount = Rewriter.SE.getNegativeSCEV(Step); - - // Insert an add instruction right before the terminator corresponding - // to the back-edge or just before the only use. The location is determined - // by the caller and passed in as IVIncInsertPt. - Value *StepV = Rewriter.expandCodeFor(IncAmount, Ty, - Preheader->getTerminator()); - Instruction *IncV; - if (isNegative) { - IncV = BinaryOperator::CreateSub(PN, StepV, "lsr.iv.next", - IVIncInsertPt); - } else { - IncV = BinaryOperator::CreateAdd(PN, StepV, "lsr.iv.next", - IVIncInsertPt); - } - if (!isa<ConstantInt>(StepV)) ++NumVariable; - - PN->addIncoming(IncV, LatchBlock); - - ++NumInserted; - return PN; -} - -static void SortUsersToProcess(std::vector<BasedUser> &UsersToProcess) { - // We want to emit code for users inside the loop first. To do this, we - // rearrange BasedUser so that the entries at the end have - // isUseOfPostIncrementedValue = false, because we pop off the end of the - // vector (so we handle them first). - std::partition(UsersToProcess.begin(), UsersToProcess.end(), - PartitionByIsUseOfPostIncrementedValue); - - // Sort this by base, so that things with the same base are handled - // together. By partitioning first and stable-sorting later, we are - // guaranteed that within each base we will pop off users from within the - // loop before users outside of the loop with a particular base. - // - // We would like to use stable_sort here, but we can't. The problem is that - // const SCEV *'s don't have a deterministic ordering w.r.t to each other, so - // we don't have anything to do a '<' comparison on. Because we think the - // number of uses is small, do a horrible bubble sort which just relies on - // ==. - for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) { - // Get a base value. - const SCEV *Base = UsersToProcess[i].Base; - - // Compact everything with this base to be consecutive with this one. - for (unsigned j = i+1; j != e; ++j) { - if (UsersToProcess[j].Base == Base) { - std::swap(UsersToProcess[i+1], UsersToProcess[j]); - ++i; - } - } +void LSRUse::print(raw_ostream &OS) const { + OS << "LSR Use: Kind="; + switch (Kind) { + case Basic: OS << "Basic"; break; + case Special: OS << "Special"; break; + case ICmpZero: OS << "ICmpZero"; break; + case Address: + OS << "Address of "; + if (isa<PointerType>(AccessTy)) + OS << "pointer"; // the full pointer type could be really verbose + else + OS << *AccessTy; } -} -/// PrepareToStrengthReduceFully - Prepare to fully strength-reduce -/// UsersToProcess, meaning lowering addresses all the way down to direct -/// pointer arithmetic. -/// -void -LoopStrengthReduce::PrepareToStrengthReduceFully( - std::vector<BasedUser> &UsersToProcess, - const SCEV *Stride, - const SCEV *CommonExprs, - const Loop *L, - SCEVExpander &PreheaderRewriter) { - DEBUG(dbgs() << " Fully reducing all users\n"); - - // Rewrite the UsersToProcess records, creating a separate PHI for each - // unique Base value. - Instruction *IVIncInsertPt = L->getLoopLatch()->getTerminator(); - for (unsigned i = 0, e = UsersToProcess.size(); i != e; ) { - // TODO: The uses are grouped by base, but not sorted. We arbitrarily - // pick the first Imm value here to start with, and adjust it for the - // other uses. - const SCEV *Imm = UsersToProcess[i].Imm; - const SCEV *Base = UsersToProcess[i].Base; - const SCEV *Start = SE->getAddExpr(CommonExprs, Base, Imm); - PHINode *Phi = InsertAffinePhi(Start, Stride, IVIncInsertPt, L, - PreheaderRewriter); - // Loop over all the users with the same base. - do { - UsersToProcess[i].Base = SE->getIntegerSCEV(0, Stride->getType()); - UsersToProcess[i].Imm = SE->getMinusSCEV(UsersToProcess[i].Imm, Imm); - UsersToProcess[i].Phi = Phi; - assert(UsersToProcess[i].Imm->isLoopInvariant(L) && - "ShouldUseFullStrengthReductionMode should reject this!"); - } while (++i != e && Base == UsersToProcess[i].Base); - } -} - -/// FindIVIncInsertPt - Return the location to insert the increment instruction. -/// If the only use if a use of postinc value, (must be the loop termination -/// condition), then insert it just before the use. -static Instruction *FindIVIncInsertPt(std::vector<BasedUser> &UsersToProcess, - const Loop *L) { - if (UsersToProcess.size() == 1 && - UsersToProcess[0].isUseOfPostIncrementedValue && - L->contains(UsersToProcess[0].Inst)) - return UsersToProcess[0].Inst; - return L->getLoopLatch()->getTerminator(); -} - -/// PrepareToStrengthReduceWithNewPhi - Insert a new induction variable for the -/// given users to share. -/// -void -LoopStrengthReduce::PrepareToStrengthReduceWithNewPhi( - std::vector<BasedUser> &UsersToProcess, - const SCEV *Stride, - const SCEV *CommonExprs, - Value *CommonBaseV, - Instruction *IVIncInsertPt, - const Loop *L, - SCEVExpander &PreheaderRewriter) { - DEBUG(dbgs() << " Inserting new PHI:\n"); - - PHINode *Phi = InsertAffinePhi(SE->getUnknown(CommonBaseV), - Stride, IVIncInsertPt, L, - PreheaderRewriter); - - // Remember this in case a later stride is multiple of this. - IVsByStride[Stride].addIV(Stride, CommonExprs, Phi); - - // All the users will share this new IV. - for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) - UsersToProcess[i].Phi = Phi; - - DEBUG(dbgs() << " IV="); - DEBUG(WriteAsOperand(dbgs(), Phi, /*PrintType=*/false)); - DEBUG(dbgs() << "\n"); -} - -/// PrepareToStrengthReduceFromSmallerStride - Prepare for the given users to -/// reuse an induction variable with a stride that is a factor of the current -/// induction variable. -/// -void -LoopStrengthReduce::PrepareToStrengthReduceFromSmallerStride( - std::vector<BasedUser> &UsersToProcess, - Value *CommonBaseV, - const IVExpr &ReuseIV, - Instruction *PreInsertPt) { - DEBUG(dbgs() << " Rewriting in terms of existing IV of STRIDE " - << *ReuseIV.Stride << " and BASE " << *ReuseIV.Base << "\n"); - - // All the users will share the reused IV. - for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) - UsersToProcess[i].Phi = ReuseIV.PHI; - - Constant *C = dyn_cast<Constant>(CommonBaseV); - if (C && - (!C->isNullValue() && - !fitsInAddressMode(SE->getUnknown(CommonBaseV), CommonBaseV->getType(), - TLI, false))) - // We want the common base emitted into the preheader! This is just - // using cast as a copy so BitCast (no-op cast) is appropriate - CommonBaseV = new BitCastInst(CommonBaseV, CommonBaseV->getType(), - "commonbase", PreInsertPt); -} - -static bool IsImmFoldedIntoAddrMode(GlobalValue *GV, int64_t Offset, - const Type *AccessTy, - std::vector<BasedUser> &UsersToProcess, - const TargetLowering *TLI) { - SmallVector<Instruction*, 16> AddrModeInsts; - for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) { - if (UsersToProcess[i].isUseOfPostIncrementedValue) - continue; - ExtAddrMode AddrMode = - AddressingModeMatcher::Match(UsersToProcess[i].OperandValToReplace, - AccessTy, UsersToProcess[i].Inst, - AddrModeInsts, *TLI); - if (GV && GV != AddrMode.BaseGV) - return false; - if (Offset && !AddrMode.BaseOffs) - // FIXME: How to accurate check it's immediate offset is folded. - return false; - AddrModeInsts.clear(); + OS << ", Offsets={"; + for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(), + E = Offsets.end(); I != E; ++I) { + OS << *I; + if (next(I) != E) + OS << ','; } - return true; -} + OS << '}'; -/// StrengthReduceIVUsersOfStride - Strength reduce all of the users of a single -/// stride of IV. All of the users may have different starting values, and this -/// may not be the only stride. -void -LoopStrengthReduce::StrengthReduceIVUsersOfStride(const SCEV *Stride, - IVUsersOfOneStride &Uses, - Loop *L) { - // If all the users are moved to another stride, then there is nothing to do. - if (Uses.Users.empty()) - return; + if (AllFixupsOutsideLoop) + OS << ", all-fixups-outside-loop"; +} - // Keep track if every use in UsersToProcess is an address. If they all are, - // we may be able to rewrite the entire collection of them in terms of a - // smaller-stride IV. - bool AllUsesAreAddresses = true; - - // Keep track if every use of a single stride is outside the loop. If so, - // we want to be more aggressive about reusing a smaller-stride IV; a - // multiply outside the loop is better than another IV inside. Well, usually. - bool AllUsesAreOutsideLoop = true; - - // Transform our list of users and offsets to a bit more complex table. In - // this new vector, each 'BasedUser' contains 'Base' the base of the - // strided accessas well as the old information from Uses. We progressively - // move information from the Base field to the Imm field, until we eventually - // have the full access expression to rewrite the use. - std::vector<BasedUser> UsersToProcess; - const SCEV *CommonExprs = CollectIVUsers(Stride, Uses, L, AllUsesAreAddresses, - AllUsesAreOutsideLoop, - UsersToProcess); - - // Sort the UsersToProcess array so that users with common bases are - // next to each other. - SortUsersToProcess(UsersToProcess); - - // If we managed to find some expressions in common, we'll need to carry - // their value in a register and add it in for each use. This will take up - // a register operand, which potentially restricts what stride values are - // valid. - bool HaveCommonExprs = !CommonExprs->isZero(); - const Type *ReplacedTy = CommonExprs->getType(); - - // If all uses are addresses, consider sinking the immediate part of the - // common expression back into uses if they can fit in the immediate fields. - if (TLI && HaveCommonExprs && AllUsesAreAddresses) { - const SCEV *NewCommon = CommonExprs; - const SCEV *Imm = SE->getIntegerSCEV(0, ReplacedTy); - MoveImmediateValues(TLI, Type::getVoidTy( - L->getLoopPreheader()->getContext()), - NewCommon, Imm, true, L, SE); - if (!Imm->isZero()) { - bool DoSink = true; - - // If the immediate part of the common expression is a GV, check if it's - // possible to fold it into the target addressing mode. - GlobalValue *GV = 0; - if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(Imm)) - GV = dyn_cast<GlobalValue>(SU->getValue()); - int64_t Offset = 0; - if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Imm)) - Offset = SC->getValue()->getSExtValue(); - if (GV || Offset) - // Pass VoidTy as the AccessTy to be conservative, because - // there could be multiple access types among all the uses. - DoSink = IsImmFoldedIntoAddrMode(GV, Offset, - Type::getVoidTy(L->getLoopPreheader()->getContext()), - UsersToProcess, TLI); - - if (DoSink) { - DEBUG(dbgs() << " Sinking " << *Imm << " back down into uses\n"); - for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) - UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm, Imm); - CommonExprs = NewCommon; - HaveCommonExprs = !CommonExprs->isZero(); - ++NumImmSunk; - } - } - } +void LSRUse::dump() const { + print(errs()); errs() << '\n'; +} - // Now that we know what we need to do, insert the PHI node itself. - // - DEBUG(dbgs() << "LSR: Examining IVs of TYPE " << *ReplacedTy << " of STRIDE " - << *Stride << ":\n" - << " Common base: " << *CommonExprs << "\n"); +/// isLegalUse - Test whether the use described by AM is "legal", meaning it can +/// be completely folded into the user instruction at isel time. This includes +/// address-mode folding and special icmp tricks. +static bool isLegalUse(const TargetLowering::AddrMode &AM, + LSRUse::KindType Kind, const Type *AccessTy, + const TargetLowering *TLI) { + switch (Kind) { + case LSRUse::Address: + // If we have low-level target information, ask the target if it can + // completely fold this address. + if (TLI) return TLI->isLegalAddressingMode(AM, AccessTy); + + // Otherwise, just guess that reg+reg addressing is legal. + return !AM.BaseGV && AM.BaseOffs == 0 && AM.Scale <= 1; + + case LSRUse::ICmpZero: + // There's not even a target hook for querying whether it would be legal to + // fold a GV into an ICmp. + if (AM.BaseGV) + return false; - SCEVExpander Rewriter(*SE); - SCEVExpander PreheaderRewriter(*SE); + // ICmp only has two operands; don't allow more than two non-trivial parts. + if (AM.Scale != 0 && AM.HasBaseReg && AM.BaseOffs != 0) + return false; - BasicBlock *Preheader = L->getLoopPreheader(); - Instruction *PreInsertPt = Preheader->getTerminator(); - BasicBlock *LatchBlock = L->getLoopLatch(); - Instruction *IVIncInsertPt = LatchBlock->getTerminator(); - - Value *CommonBaseV = Constant::getNullValue(ReplacedTy); - - const SCEV *RewriteFactor = SE->getIntegerSCEV(0, ReplacedTy); - IVExpr ReuseIV(SE->getIntegerSCEV(0, - Type::getInt32Ty(Preheader->getContext())), - SE->getIntegerSCEV(0, - Type::getInt32Ty(Preheader->getContext())), - 0); - - // Choose a strength-reduction strategy and prepare for it by creating - // the necessary PHIs and adjusting the bookkeeping. - if (ShouldUseFullStrengthReductionMode(UsersToProcess, L, - AllUsesAreAddresses, Stride)) { - PrepareToStrengthReduceFully(UsersToProcess, Stride, CommonExprs, L, - PreheaderRewriter); - } else { - // Emit the initial base value into the loop preheader. - CommonBaseV = PreheaderRewriter.expandCodeFor(CommonExprs, ReplacedTy, - PreInsertPt); - - // If all uses are addresses, check if it is possible to reuse an IV. The - // new IV must have a stride that is a multiple of the old stride; the - // multiple must be a number that can be encoded in the scale field of the - // target addressing mode; and we must have a valid instruction after this - // substitution, including the immediate field, if any. - RewriteFactor = CheckForIVReuse(HaveCommonExprs, AllUsesAreAddresses, - AllUsesAreOutsideLoop, - Stride, ReuseIV, ReplacedTy, - UsersToProcess); - if (!RewriteFactor->isZero()) - PrepareToStrengthReduceFromSmallerStride(UsersToProcess, CommonBaseV, - ReuseIV, PreInsertPt); - else { - IVIncInsertPt = FindIVIncInsertPt(UsersToProcess, L); - PrepareToStrengthReduceWithNewPhi(UsersToProcess, Stride, CommonExprs, - CommonBaseV, IVIncInsertPt, - L, PreheaderRewriter); - } - } + // ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by + // putting the scaled register in the other operand of the icmp. + if (AM.Scale != 0 && AM.Scale != -1) + return false; - // Process all the users now, replacing their strided uses with - // strength-reduced forms. This outer loop handles all bases, the inner - // loop handles all users of a particular base. - while (!UsersToProcess.empty()) { - const SCEV *Base = UsersToProcess.back().Base; - Instruction *Inst = UsersToProcess.back().Inst; - - // Emit the code for Base into the preheader. - Value *BaseV = 0; - if (!Base->isZero()) { - BaseV = PreheaderRewriter.expandCodeFor(Base, 0, PreInsertPt); - - DEBUG(dbgs() << " INSERTING code for BASE = " << *Base << ":"); - if (BaseV->hasName()) - DEBUG(dbgs() << " Result value name = %" << BaseV->getName()); - DEBUG(dbgs() << "\n"); - - // If BaseV is a non-zero constant, make sure that it gets inserted into - // the preheader, instead of being forward substituted into the uses. We - // do this by forcing a BitCast (noop cast) to be inserted into the - // preheader in this case. - if (!fitsInAddressMode(Base, getAccessType(Inst), TLI, false) && - isa<Constant>(BaseV)) { - // We want this constant emitted into the preheader! This is just - // using cast as a copy so BitCast (no-op cast) is appropriate - BaseV = new BitCastInst(BaseV, BaseV->getType(), "preheaderinsert", - PreInsertPt); - } + // If we have low-level target information, ask the target if it can fold an + // integer immediate on an icmp. + if (AM.BaseOffs != 0) { + if (TLI) return TLI->isLegalICmpImmediate(-AM.BaseOffs); + return false; } - // Emit the code to add the immediate offset to the Phi value, just before - // the instructions that we identified as using this stride and base. - do { - // FIXME: Use emitted users to emit other users. - BasedUser &User = UsersToProcess.back(); - - DEBUG(dbgs() << " Examining "); - if (User.isUseOfPostIncrementedValue) - DEBUG(dbgs() << "postinc"); - else - DEBUG(dbgs() << "preinc"); - DEBUG(dbgs() << " use "); - DEBUG(WriteAsOperand(dbgs(), UsersToProcess.back().OperandValToReplace, - /*PrintType=*/false)); - DEBUG(dbgs() << " in Inst: " << *User.Inst); - - // If this instruction wants to use the post-incremented value, move it - // after the post-inc and use its value instead of the PHI. - Value *RewriteOp = User.Phi; - if (User.isUseOfPostIncrementedValue) { - RewriteOp = User.Phi->getIncomingValueForBlock(LatchBlock); - // If this user is in the loop, make sure it is the last thing in the - // loop to ensure it is dominated by the increment. In case it's the - // only use of the iv, the increment instruction is already before the - // use. - if (L->contains(User.Inst) && User.Inst != IVIncInsertPt) - User.Inst->moveBefore(IVIncInsertPt); - } - - const SCEV *RewriteExpr = SE->getUnknown(RewriteOp); - - if (SE->getEffectiveSCEVType(RewriteOp->getType()) != - SE->getEffectiveSCEVType(ReplacedTy)) { - assert(SE->getTypeSizeInBits(RewriteOp->getType()) > - SE->getTypeSizeInBits(ReplacedTy) && - "Unexpected widening cast!"); - RewriteExpr = SE->getTruncateExpr(RewriteExpr, ReplacedTy); - } + return true; - // If we had to insert new instructions for RewriteOp, we have to - // consider that they may not have been able to end up immediately - // next to RewriteOp, because non-PHI instructions may never precede - // PHI instructions in a block. In this case, remember where the last - // instruction was inserted so that if we're replacing a different - // PHI node, we can use the later point to expand the final - // RewriteExpr. - Instruction *NewBasePt = dyn_cast<Instruction>(RewriteOp); - if (RewriteOp == User.Phi) NewBasePt = 0; - - // Clear the SCEVExpander's expression map so that we are guaranteed - // to have the code emitted where we expect it. - Rewriter.clear(); - - // If we are reusing the iv, then it must be multiplied by a constant - // factor to take advantage of the addressing mode scale component. - if (!RewriteFactor->isZero()) { - // If we're reusing an IV with a nonzero base (currently this happens - // only when all reuses are outside the loop) subtract that base here. - // The base has been used to initialize the PHI node but we don't want - // it here. - if (!ReuseIV.Base->isZero()) { - const SCEV *typedBase = ReuseIV.Base; - if (SE->getEffectiveSCEVType(RewriteExpr->getType()) != - SE->getEffectiveSCEVType(ReuseIV.Base->getType())) { - // It's possible the original IV is a larger type than the new IV, - // in which case we have to truncate the Base. We checked in - // RequiresTypeConversion that this is valid. - assert(SE->getTypeSizeInBits(RewriteExpr->getType()) < - SE->getTypeSizeInBits(ReuseIV.Base->getType()) && - "Unexpected lengthening conversion!"); - typedBase = SE->getTruncateExpr(ReuseIV.Base, - RewriteExpr->getType()); - } - RewriteExpr = SE->getMinusSCEV(RewriteExpr, typedBase); - } + case LSRUse::Basic: + // Only handle single-register values. + return !AM.BaseGV && AM.Scale == 0 && AM.BaseOffs == 0; - // Multiply old variable, with base removed, by new scale factor. - RewriteExpr = SE->getMulExpr(RewriteFactor, - RewriteExpr); - - // The common base is emitted in the loop preheader. But since we - // are reusing an IV, it has not been used to initialize the PHI node. - // Add it to the expression used to rewrite the uses. - // When this use is outside the loop, we earlier subtracted the - // common base, and are adding it back here. Use the same expression - // as before, rather than CommonBaseV, so DAGCombiner will zap it. - if (!CommonExprs->isZero()) { - if (L->contains(User.Inst)) - RewriteExpr = SE->getAddExpr(RewriteExpr, - SE->getUnknown(CommonBaseV)); - else - RewriteExpr = SE->getAddExpr(RewriteExpr, CommonExprs); - } - } - - // Now that we know what we need to do, insert code before User for the - // immediate and any loop-variant expressions. - if (BaseV) - // Add BaseV to the PHI value if needed. - RewriteExpr = SE->getAddExpr(RewriteExpr, SE->getUnknown(BaseV)); - - User.RewriteInstructionToUseNewBase(RewriteExpr, NewBasePt, - Rewriter, L, this, - DeadInsts, SE); - - // Mark old value we replaced as possibly dead, so that it is eliminated - // if we just replaced the last use of that value. - DeadInsts.push_back(User.OperandValToReplace); - - UsersToProcess.pop_back(); - ++NumReduced; - - // If there are any more users to process with the same base, process them - // now. We sorted by base above, so we just have to check the last elt. - } while (!UsersToProcess.empty() && UsersToProcess.back().Base == Base); - // TODO: Next, find out which base index is the most common, pull it out. - } - - // IMPORTANT TODO: Figure out how to partition the IV's with this stride, but - // different starting values, into different PHIs. -} - -void LoopStrengthReduce::StrengthReduceIVUsers(Loop *L) { - // Note: this processes each stride/type pair individually. All users - // passed into StrengthReduceIVUsersOfStride have the same type AND stride. - // Also, note that we iterate over IVUsesByStride indirectly by using - // StrideOrder. This extra layer of indirection makes the ordering of - // strides deterministic - not dependent on map order. - for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e; ++Stride) { - std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = - IU->IVUsesByStride.find(IU->StrideOrder[Stride]); - assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); - // FIXME: Generalize to non-affine IV's. - if (!SI->first->isLoopInvariant(L)) - continue; - StrengthReduceIVUsersOfStride(SI->first, *SI->second, L); + case LSRUse::Special: + // Only handle -1 scales, or no scale. + return AM.Scale == 0 || AM.Scale == -1; } + + return false; } -/// FindIVUserForCond - If Cond has an operand that is an expression of an IV, -/// set the IV user and stride information and return true, otherwise return -/// false. -bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond, - IVStrideUse *&CondUse, - const SCEV* &CondStride) { - for (unsigned Stride = 0, e = IU->StrideOrder.size(); - Stride != e && !CondUse; ++Stride) { - std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = - IU->IVUsesByStride.find(IU->StrideOrder[Stride]); - assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); - - for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(), - E = SI->second->Users.end(); UI != E; ++UI) - if (UI->getUser() == Cond) { - // NOTE: we could handle setcc instructions with multiple uses here, but - // InstCombine does it as well for simple uses, it's not clear that it - // occurs enough in real life to handle. - CondUse = UI; - CondStride = SI->first; - return true; - } +static bool isLegalUse(TargetLowering::AddrMode AM, + int64_t MinOffset, int64_t MaxOffset, + LSRUse::KindType Kind, const Type *AccessTy, + const TargetLowering *TLI) { + // Check for overflow. + if (((int64_t)((uint64_t)AM.BaseOffs + MinOffset) > AM.BaseOffs) != + (MinOffset > 0)) + return false; + AM.BaseOffs = (uint64_t)AM.BaseOffs + MinOffset; + if (isLegalUse(AM, Kind, AccessTy, TLI)) { + AM.BaseOffs = (uint64_t)AM.BaseOffs - MinOffset; + // Check for overflow. + if (((int64_t)((uint64_t)AM.BaseOffs + MaxOffset) > AM.BaseOffs) != + (MaxOffset > 0)) + return false; + AM.BaseOffs = (uint64_t)AM.BaseOffs + MaxOffset; + return isLegalUse(AM, Kind, AccessTy, TLI); } return false; } -namespace { - // Constant strides come first which in turns are sorted by their absolute - // values. If absolute values are the same, then positive strides comes first. - // e.g. - // 4, -1, X, 1, 2 ==> 1, -1, 2, 4, X - struct StrideCompare { - const ScalarEvolution *SE; - explicit StrideCompare(const ScalarEvolution *se) : SE(se) {} - - bool operator()(const SCEV *LHS, const SCEV *RHS) { - const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS); - const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS); - if (LHSC && RHSC) { - int64_t LV = LHSC->getValue()->getSExtValue(); - int64_t RV = RHSC->getValue()->getSExtValue(); - uint64_t ALV = (LV < 0) ? -LV : LV; - uint64_t ARV = (RV < 0) ? -RV : RV; - if (ALV == ARV) { - if (LV != RV) - return LV > RV; - } else { - return ALV < ARV; - } +static bool isAlwaysFoldable(int64_t BaseOffs, + GlobalValue *BaseGV, + bool HasBaseReg, + LSRUse::KindType Kind, const Type *AccessTy, + const TargetLowering *TLI, + ScalarEvolution &SE) { + // Fast-path: zero is always foldable. + if (BaseOffs == 0 && !BaseGV) return true; + + // Conservatively, create an address with an immediate and a + // base and a scale. + TargetLowering::AddrMode AM; + AM.BaseOffs = BaseOffs; + AM.BaseGV = BaseGV; + AM.HasBaseReg = HasBaseReg; + AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1; + + return isLegalUse(AM, Kind, AccessTy, TLI); +} - // If it's the same value but different type, sort by bit width so - // that we emit larger induction variables before smaller - // ones, letting the smaller be re-written in terms of larger ones. - return SE->getTypeSizeInBits(RHS->getType()) < - SE->getTypeSizeInBits(LHS->getType()); - } - return LHSC && !RHSC; - } - }; +static bool isAlwaysFoldable(const SCEV *S, + int64_t MinOffset, int64_t MaxOffset, + bool HasBaseReg, + LSRUse::KindType Kind, const Type *AccessTy, + const TargetLowering *TLI, + ScalarEvolution &SE) { + // Fast-path: zero is always foldable. + if (S->isZero()) return true; + + // Conservatively, create an address with an immediate and a + // base and a scale. + int64_t BaseOffs = ExtractImmediate(S, SE); + GlobalValue *BaseGV = ExtractSymbol(S, SE); + + // If there's anything else involved, it's not foldable. + if (!S->isZero()) return false; + + // Fast-path: zero is always foldable. + if (BaseOffs == 0 && !BaseGV) return true; + + // Conservatively, create an address with an immediate and a + // base and a scale. + TargetLowering::AddrMode AM; + AM.BaseOffs = BaseOffs; + AM.BaseGV = BaseGV; + AM.HasBaseReg = HasBaseReg; + AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1; + + return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, TLI); } -/// ChangeCompareStride - If a loop termination compare instruction is the -/// only use of its stride, and the compaison is against a constant value, -/// try eliminate the stride by moving the compare instruction to another -/// stride and change its constant operand accordingly. e.g. -/// -/// loop: -/// ... -/// v1 = v1 + 3 -/// v2 = v2 + 1 -/// if (v2 < 10) goto loop -/// => -/// loop: -/// ... -/// v1 = v1 + 3 -/// if (v1 < 30) goto loop -ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, - IVStrideUse* &CondUse, - const SCEV* &CondStride, - bool PostPass) { - // If there's only one stride in the loop, there's nothing to do here. - if (IU->StrideOrder.size() < 2) - return Cond; - // If there are other users of the condition's stride, don't bother - // trying to change the condition because the stride will still - // remain. - std::map<const SCEV *, IVUsersOfOneStride *>::iterator I = - IU->IVUsesByStride.find(CondStride); - if (I == IU->IVUsesByStride.end()) - return Cond; - if (I->second->Users.size() > 1) { - for (ilist<IVStrideUse>::iterator II = I->second->Users.begin(), - EE = I->second->Users.end(); II != EE; ++II) { - if (II->getUser() == Cond) - continue; - if (!isInstructionTriviallyDead(II->getUser())) - return Cond; - } +/// FormulaSorter - This class implements an ordering for formulae which sorts +/// the by their standalone cost. +class FormulaSorter { + /// These two sets are kept empty, so that we compute standalone costs. + DenseSet<const SCEV *> VisitedRegs; + SmallPtrSet<const SCEV *, 16> Regs; + Loop *L; + LSRUse *LU; + ScalarEvolution &SE; + DominatorTree &DT; + +public: + FormulaSorter(Loop *l, LSRUse &lu, ScalarEvolution &se, DominatorTree &dt) + : L(l), LU(&lu), SE(se), DT(dt) {} + + bool operator()(const Formula &A, const Formula &B) { + Cost CostA; + CostA.RateFormula(A, Regs, VisitedRegs, L, LU->Offsets, SE, DT); + Regs.clear(); + Cost CostB; + CostB.RateFormula(B, Regs, VisitedRegs, L, LU->Offsets, SE, DT); + Regs.clear(); + return CostA < CostB; + } +}; + +/// LSRInstance - This class holds state for the main loop strength reduction +/// logic. +class LSRInstance { + IVUsers &IU; + ScalarEvolution &SE; + DominatorTree &DT; + const TargetLowering *const TLI; + Loop *const L; + bool Changed; + + /// IVIncInsertPos - This is the insert position that the current loop's + /// induction variable increment should be placed. In simple loops, this is + /// the latch block's terminator. But in more complicated cases, this is a + /// position which will dominate all the in-loop post-increment users. + Instruction *IVIncInsertPos; + + /// Factors - Interesting factors between use strides. + SmallSetVector<int64_t, 8> Factors; + + /// Types - Interesting use types, to facilitate truncation reuse. + SmallSetVector<const Type *, 4> Types; + + /// Fixups - The list of operands which are to be replaced. + SmallVector<LSRFixup, 16> Fixups; + + /// Uses - The list of interesting uses. + SmallVector<LSRUse, 16> Uses; + + /// RegUses - Track which uses use which register candidates. + RegUseTracker RegUses; + + void OptimizeShadowIV(); + bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse); + ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse); + bool OptimizeLoopTermCond(); + + void CollectInterestingTypesAndFactors(); + void CollectFixupsAndInitialFormulae(); + + LSRFixup &getNewFixup() { + Fixups.push_back(LSRFixup()); + return Fixups.back(); } - // Only handle constant strides for now. - const SCEVConstant *SC = dyn_cast<SCEVConstant>(CondStride); - if (!SC) return Cond; - - ICmpInst::Predicate Predicate = Cond->getPredicate(); - int64_t CmpSSInt = SC->getValue()->getSExtValue(); - unsigned BitWidth = SE->getTypeSizeInBits(CondStride->getType()); - uint64_t SignBit = 1ULL << (BitWidth-1); - const Type *CmpTy = Cond->getOperand(0)->getType(); - const Type *NewCmpTy = NULL; - unsigned TyBits = SE->getTypeSizeInBits(CmpTy); - unsigned NewTyBits = 0; - const SCEV *NewStride = NULL; - Value *NewCmpLHS = NULL; - Value *NewCmpRHS = NULL; - int64_t Scale = 1; - const SCEV *NewOffset = SE->getIntegerSCEV(0, CmpTy); - - if (ConstantInt *C = dyn_cast<ConstantInt>(Cond->getOperand(1))) { - int64_t CmpVal = C->getValue().getSExtValue(); - - // Check the relevant induction variable for conformance to - // the pattern. - const SCEV *IV = SE->getSCEV(Cond->getOperand(0)); - const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV); - if (!AR || !AR->isAffine()) - return Cond; - - const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart()); - // Check stride constant and the comparision constant signs to detect - // overflow. - if (StartC) { - if ((StartC->getValue()->getSExtValue() < CmpVal && CmpSSInt < 0) || - (StartC->getValue()->getSExtValue() > CmpVal && CmpSSInt > 0)) - return Cond; - } else { - // More restrictive check for the other cases. - if ((CmpVal & SignBit) != (CmpSSInt & SignBit)) - return Cond; - } - - // Look for a suitable stride / iv as replacement. - for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { - std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = - IU->IVUsesByStride.find(IU->StrideOrder[i]); - if (!isa<SCEVConstant>(SI->first) || SI->second->Users.empty()) - continue; - int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue(); - if (SSInt == CmpSSInt || - abs64(SSInt) < abs64(CmpSSInt) || - (SSInt % CmpSSInt) != 0) - continue; - Scale = SSInt / CmpSSInt; - int64_t NewCmpVal = CmpVal * Scale; + // Support for sharing of LSRUses between LSRFixups. + typedef DenseMap<const SCEV *, size_t> UseMapTy; + UseMapTy UseMap; + + bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, + LSRUse::KindType Kind, const Type *AccessTy); + + std::pair<size_t, int64_t> getUse(const SCEV *&Expr, + LSRUse::KindType Kind, + const Type *AccessTy); + +public: + void InsertInitialFormula(const SCEV *S, Loop *L, LSRUse &LU, size_t LUIdx); + void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx); + void CountRegisters(const Formula &F, size_t LUIdx); + bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F); + + void CollectLoopInvariantFixupsAndFormulae(); + + void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base, + unsigned Depth = 0); + void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base); + void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base); + void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base); + void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base); + void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base); + void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base); + void GenerateCrossUseConstantOffsets(); + void GenerateAllReuseFormulae(); + + void FilterOutUndesirableDedicatedRegisters(); + void NarrowSearchSpaceUsingHeuristics(); + + void SolveRecurse(SmallVectorImpl<const Formula *> &Solution, + Cost &SolutionCost, + SmallVectorImpl<const Formula *> &Workspace, + const Cost &CurCost, + const SmallPtrSet<const SCEV *, 16> &CurRegs, + DenseSet<const SCEV *> &VisitedRegs) const; + void Solve(SmallVectorImpl<const Formula *> &Solution) const; + + Value *Expand(const LSRFixup &LF, + const Formula &F, + BasicBlock::iterator IP, Loop *L, Instruction *IVIncInsertPos, + SCEVExpander &Rewriter, + SmallVectorImpl<WeakVH> &DeadInsts, + ScalarEvolution &SE, DominatorTree &DT) const; + void Rewrite(const LSRFixup &LF, + const Formula &F, + Loop *L, Instruction *IVIncInsertPos, + SCEVExpander &Rewriter, + SmallVectorImpl<WeakVH> &DeadInsts, + ScalarEvolution &SE, DominatorTree &DT, + Pass *P) const; + void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution, + Pass *P); + + LSRInstance(const TargetLowering *tli, Loop *l, Pass *P); + + bool getChanged() const { return Changed; } + + void print_factors_and_types(raw_ostream &OS) const; + void print_fixups(raw_ostream &OS) const; + void print_uses(raw_ostream &OS) const; + void print(raw_ostream &OS) const; + void dump() const; +}; - // If old icmp value fits in icmp immediate field, but the new one doesn't - // try something else. - if (TLI && - TLI->isLegalICmpImmediate(CmpVal) && - !TLI->isLegalICmpImmediate(NewCmpVal)) - continue; +} - APInt Mul = APInt(BitWidth*2, CmpVal, true); - Mul = Mul * APInt(BitWidth*2, Scale, true); - // Check for overflow. - if (!Mul.isSignedIntN(BitWidth)) - continue; - // Check for overflow in the stride's type too. - if (!Mul.isSignedIntN(SE->getTypeSizeInBits(SI->first->getType()))) - continue; +/// OptimizeShadowIV - If IV is used in a int-to-float cast +/// inside the loop then try to eliminate the cast opeation. +void LSRInstance::OptimizeShadowIV() { + const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L); + if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) + return; - // Watch out for overflow. - if (ICmpInst::isSigned(Predicate) && - (CmpVal & SignBit) != (NewCmpVal & SignBit)) - continue; + for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); + UI != E; /* empty */) { + IVUsers::const_iterator CandidateUI = UI; + ++UI; + Instruction *ShadowUse = CandidateUI->getUser(); + const Type *DestTy = NULL; - // Pick the best iv to use trying to avoid a cast. - NewCmpLHS = NULL; - for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(), - E = SI->second->Users.end(); UI != E; ++UI) { - Value *Op = UI->getOperandValToReplace(); - - // If the IVStrideUse implies a cast, check for an actual cast which - // can be used to find the original IV expression. - if (SE->getEffectiveSCEVType(Op->getType()) != - SE->getEffectiveSCEVType(SI->first->getType())) { - CastInst *CI = dyn_cast<CastInst>(Op); - // If it's not a simple cast, it's complicated. - if (!CI) - continue; - // If it's a cast from a type other than the stride type, - // it's complicated. - if (CI->getOperand(0)->getType() != SI->first->getType()) - continue; - // Ok, we found the IV expression in the stride's type. - Op = CI->getOperand(0); - } + /* If shadow use is a int->float cast then insert a second IV + to eliminate this cast. - NewCmpLHS = Op; - if (NewCmpLHS->getType() == CmpTy) - break; - } - if (!NewCmpLHS) - continue; + for (unsigned i = 0; i < n; ++i) + foo((double)i); - NewCmpTy = NewCmpLHS->getType(); - NewTyBits = SE->getTypeSizeInBits(NewCmpTy); - const Type *NewCmpIntTy = IntegerType::get(Cond->getContext(), NewTyBits); - if (RequiresTypeConversion(NewCmpTy, CmpTy)) { - // Check if it is possible to rewrite it using - // an iv / stride of a smaller integer type. - unsigned Bits = NewTyBits; - if (ICmpInst::isSigned(Predicate)) - --Bits; - uint64_t Mask = (1ULL << Bits) - 1; - if (((uint64_t)NewCmpVal & Mask) != (uint64_t)NewCmpVal) - continue; - } + is transformed into - // Don't rewrite if use offset is non-constant and the new type is - // of a different type. - // FIXME: too conservative? - if (NewTyBits != TyBits && !isa<SCEVConstant>(CondUse->getOffset())) - continue; + double d = 0.0; + for (unsigned i = 0; i < n; ++i, ++d) + foo(d); + */ + if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) + DestTy = UCast->getDestTy(); + else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) + DestTy = SCast->getDestTy(); + if (!DestTy) continue; - if (!PostPass) { - bool AllUsesAreAddresses = true; - bool AllUsesAreOutsideLoop = true; - std::vector<BasedUser> UsersToProcess; - const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L, - AllUsesAreAddresses, - AllUsesAreOutsideLoop, - UsersToProcess); - // Avoid rewriting the compare instruction with an iv of new stride - // if it's likely the new stride uses will be rewritten using the - // stride of the compare instruction. - if (AllUsesAreAddresses && - ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess)) - continue; - } + if (TLI) { + // If target does not support DestTy natively then do not apply + // this transformation. + EVT DVT = TLI->getValueType(DestTy); + if (!TLI->isTypeLegal(DVT)) continue; + } - // Avoid rewriting the compare instruction with an iv which has - // implicit extension or truncation built into it. - // TODO: This is over-conservative. - if (SE->getTypeSizeInBits(CondUse->getOffset()->getType()) != TyBits) - continue; + PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0)); + if (!PH) continue; + if (PH->getNumIncomingValues() != 2) continue; - // If scale is negative, use swapped predicate unless it's testing - // for equality. - if (Scale < 0 && !Cond->isEquality()) - Predicate = ICmpInst::getSwappedPredicate(Predicate); + const Type *SrcTy = PH->getType(); + int Mantissa = DestTy->getFPMantissaWidth(); + if (Mantissa == -1) continue; + if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa) + continue; - NewStride = IU->StrideOrder[i]; - if (!isa<PointerType>(NewCmpTy)) - NewCmpRHS = ConstantInt::get(NewCmpTy, NewCmpVal); - else { - Constant *CI = ConstantInt::get(NewCmpIntTy, NewCmpVal); - NewCmpRHS = ConstantExpr::getIntToPtr(CI, NewCmpTy); - } - NewOffset = TyBits == NewTyBits - ? SE->getMulExpr(CondUse->getOffset(), - SE->getConstant(CmpTy, Scale)) - : SE->getConstant(NewCmpIntTy, - cast<SCEVConstant>(CondUse->getOffset())->getValue() - ->getSExtValue()*Scale); - break; + unsigned Entry, Latch; + if (PH->getIncomingBlock(0) == L->getLoopPreheader()) { + Entry = 0; + Latch = 1; + } else { + Entry = 1; + Latch = 0; } - } - // Forgo this transformation if it the increment happens to be - // unfortunately positioned after the condition, and the condition - // has multiple uses which prevent it from being moved immediately - // before the branch. See - // test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-*.ll - // for an example of this situation. - if (!Cond->hasOneUse()) { - for (BasicBlock::iterator I = Cond, E = Cond->getParent()->end(); - I != E; ++I) - if (I == NewCmpLHS) - return Cond; - } + ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry)); + if (!Init) continue; + Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue()); - if (NewCmpRHS) { - // Create a new compare instruction using new stride / iv. - ICmpInst *OldCond = Cond; - // Insert new compare instruction. - Cond = new ICmpInst(OldCond, Predicate, NewCmpLHS, NewCmpRHS, - L->getHeader()->getName() + ".termcond"); + BinaryOperator *Incr = + dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch)); + if (!Incr) continue; + if (Incr->getOpcode() != Instruction::Add + && Incr->getOpcode() != Instruction::Sub) + continue; + + /* Initialize new IV, double d = 0.0 in above example. */ + ConstantInt *C = NULL; + if (Incr->getOperand(0) == PH) + C = dyn_cast<ConstantInt>(Incr->getOperand(1)); + else if (Incr->getOperand(1) == PH) + C = dyn_cast<ConstantInt>(Incr->getOperand(0)); + else + continue; - DEBUG(dbgs() << " Change compare stride in Inst " << *OldCond); - DEBUG(dbgs() << " to " << *Cond << '\n'); + if (!C) continue; - // Remove the old compare instruction. The old indvar is probably dead too. - DeadInsts.push_back(CondUse->getOperandValToReplace()); - OldCond->replaceAllUsesWith(Cond); - OldCond->eraseFromParent(); + // Ignore negative constants, as the code below doesn't handle them + // correctly. TODO: Remove this restriction. + if (!C->getValue().isStrictlyPositive()) continue; - IU->IVUsesByStride[NewStride]->addUser(NewOffset, Cond, NewCmpLHS); - CondUse = &IU->IVUsesByStride[NewStride]->Users.back(); - CondStride = NewStride; - ++NumEliminated; - Changed = true; + /* Add new PHINode. */ + PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH); + + /* create new increment. '++d' in above example. */ + Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue()); + BinaryOperator *NewIncr = + BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ? + Instruction::FAdd : Instruction::FSub, + NewPH, CFP, "IV.S.next.", Incr); + + NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry)); + NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch)); + + /* Remove cast operation */ + ShadowUse->replaceAllUsesWith(NewPH); + ShadowUse->eraseFromParent(); + break; } +} - return Cond; +/// FindIVUserForCond - If Cond has an operand that is an expression of an IV, +/// set the IV user and stride information and return true, otherwise return +/// false. +bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, + IVStrideUse *&CondUse) { + for (IVUsers::iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) + if (UI->getUser() == Cond) { + // NOTE: we could handle setcc instructions with multiple uses here, but + // InstCombine does it as well for simple uses, it's not clear that it + // occurs enough in real life to handle. + CondUse = UI; + return true; + } + return false; } /// OptimizeMax - Rewrite the loop's terminating condition if it uses @@ -2087,7 +1371,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, /// are designed around them. The most obvious example of this is the /// LoopInfo analysis, which doesn't remember trip count values. It /// expects to be able to rediscover the trip count each time it is -/// needed, and it does this using a simple analyis that only succeeds if +/// needed, and it does this using a simple analysis that only succeeds if /// the loop has a canonical induction variable. /// /// However, when it comes time to generate code, the maximum operation @@ -2097,8 +1381,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, /// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting /// the instructions for the maximum computation. /// -ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond, - IVStrideUse* &CondUse) { +ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) { // Check that the loop matches the pattern we're looking for. if (Cond->getPredicate() != CmpInst::ICMP_EQ && Cond->getPredicate() != CmpInst::ICMP_NE) @@ -2107,19 +1390,19 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond, SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1)); if (!Sel || !Sel->hasOneUse()) return Cond; - const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); + const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L); if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) return Cond; - const SCEV *One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType()); + const SCEV *One = SE.getIntegerSCEV(1, BackedgeTakenCount->getType()); // Add one to the backedge-taken count to get the trip count. - const SCEV *IterationCount = SE->getAddExpr(BackedgeTakenCount, One); + const SCEV *IterationCount = SE.getAddExpr(BackedgeTakenCount, One); // Check for a max calculation that matches the pattern. if (!isa<SCEVSMaxExpr>(IterationCount) && !isa<SCEVUMaxExpr>(IterationCount)) return Cond; const SCEVNAryExpr *Max = cast<SCEVNAryExpr>(IterationCount); - if (Max != SE->getSCEV(Sel)) return Cond; + if (Max != SE.getSCEV(Sel)) return Cond; // To handle a max with more than two operands, this optimization would // require additional checking and setup. @@ -2129,14 +1412,13 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond, const SCEV *MaxLHS = Max->getOperand(0); const SCEV *MaxRHS = Max->getOperand(1); if (!MaxLHS || MaxLHS != One) return Cond; - // Check the relevant induction variable for conformance to // the pattern. - const SCEV *IV = SE->getSCEV(Cond->getOperand(0)); + const SCEV *IV = SE.getSCEV(Cond->getOperand(0)); const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV); if (!AR || !AR->isAffine() || AR->getStart() != One || - AR->getStepRecurrence(*SE) != One) + AR->getStepRecurrence(SE) != One) return Cond; assert(AR->getLoop() == L && @@ -2145,9 +1427,9 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond, // Check the right operand of the select, and remember it, as it will // be used in the new comparison instruction. Value *NewRHS = 0; - if (SE->getSCEV(Sel->getOperand(1)) == MaxRHS) + if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS) NewRHS = Sel->getOperand(1); - else if (SE->getSCEV(Sel->getOperand(2)) == MaxRHS) + else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS) NewRHS = Sel->getOperand(2); if (!NewRHS) return Cond; @@ -2174,552 +1456,1764 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond, return NewCond; } -/// OptimizeShadowIV - If IV is used in a int-to-float cast -/// inside the loop then try to eliminate the cast opeation. -void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { +/// OptimizeLoopTermCond - Change loop terminating condition to use the +/// postinc iv when possible. +bool +LSRInstance::OptimizeLoopTermCond() { + SmallPtrSet<Instruction *, 4> PostIncs; - const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); - if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) - return; + BasicBlock *LatchBlock = L->getLoopLatch(); + SmallVector<BasicBlock*, 8> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + + for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { + BasicBlock *ExitingBlock = ExitingBlocks[i]; + + // Get the terminating condition for the loop if possible. If we + // can, we want to change it to use a post-incremented version of its + // induction variable, to allow coalescing the live ranges for the IV into + // one register value. + + BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); + if (!TermBr) + continue; + // FIXME: Overly conservative, termination condition could be an 'or' etc.. + if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition())) + continue; - for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e; - ++Stride) { - std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = - IU->IVUsesByStride.find(IU->StrideOrder[Stride]); - assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); - if (!isa<SCEVConstant>(SI->first)) + // Search IVUsesByStride to find Cond's IVUse if there is one. + IVStrideUse *CondUse = 0; + ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition()); + if (!FindIVUserForCond(Cond, CondUse)) continue; - for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(), - E = SI->second->Users.end(); UI != E; /* empty */) { - ilist<IVStrideUse>::iterator CandidateUI = UI; - ++UI; - Instruction *ShadowUse = CandidateUI->getUser(); - const Type *DestTy = NULL; - - /* If shadow use is a int->float cast then insert a second IV - to eliminate this cast. - - for (unsigned i = 0; i < n; ++i) - foo((double)i); - - is transformed into - - double d = 0.0; - for (unsigned i = 0; i < n; ++i, ++d) - foo(d); - */ - if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) - DestTy = UCast->getDestTy(); - else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) - DestTy = SCast->getDestTy(); - if (!DestTy) continue; - - if (TLI) { - // If target does not support DestTy natively then do not apply - // this transformation. - EVT DVT = TLI->getValueType(DestTy); - if (!TLI->isTypeLegal(DVT)) continue; - } + // If the trip count is computed in terms of a max (due to ScalarEvolution + // being unable to find a sufficient guard, for example), change the loop + // comparison to use SLT or ULT instead of NE. + // One consequence of doing this now is that it disrupts the count-down + // optimization. That's not always a bad thing though, because in such + // cases it may still be worthwhile to avoid a max. + Cond = OptimizeMax(Cond, CondUse); + + // If this exiting block dominates the latch block, it may also use + // the post-inc value if it won't be shared with other uses. + // Check for dominance. + if (!DT.dominates(ExitingBlock, LatchBlock)) + continue; - PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0)); - if (!PH) continue; - if (PH->getNumIncomingValues() != 2) continue; + // Conservatively avoid trying to use the post-inc value in non-latch + // exits if there may be pre-inc users in intervening blocks. + if (LatchBlock != ExitingBlock) + for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) + // Test if the use is reachable from the exiting block. This dominator + // query is a conservative approximation of reachability. + if (&*UI != CondUse && + !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) { + // Conservatively assume there may be reuse if the quotient of their + // strides could be a legal scale. + const SCEV *A = CondUse->getStride(); + const SCEV *B = UI->getStride(); + if (SE.getTypeSizeInBits(A->getType()) != + SE.getTypeSizeInBits(B->getType())) { + if (SE.getTypeSizeInBits(A->getType()) > + SE.getTypeSizeInBits(B->getType())) + B = SE.getSignExtendExpr(B, A->getType()); + else + A = SE.getSignExtendExpr(A, B->getType()); + } + if (const SCEVConstant *D = + dyn_cast_or_null<SCEVConstant>(getSDiv(B, A, SE))) { + // Stride of one or negative one can have reuse with non-addresses. + if (D->getValue()->isOne() || + D->getValue()->isAllOnesValue()) + goto decline_post_inc; + // Avoid weird situations. + if (D->getValue()->getValue().getMinSignedBits() >= 64 || + D->getValue()->getValue().isMinSignedValue()) + goto decline_post_inc; + // Without TLI, assume that any stride might be valid, and so any + // use might be shared. + if (!TLI) + goto decline_post_inc; + // Check for possible scaled-address reuse. + const Type *AccessTy = getAccessType(UI->getUser()); + TargetLowering::AddrMode AM; + AM.Scale = D->getValue()->getSExtValue(); + if (TLI->isLegalAddressingMode(AM, AccessTy)) + goto decline_post_inc; + AM.Scale = -AM.Scale; + if (TLI->isLegalAddressingMode(AM, AccessTy)) + goto decline_post_inc; + } + } - const Type *SrcTy = PH->getType(); - int Mantissa = DestTy->getFPMantissaWidth(); - if (Mantissa == -1) continue; - if ((int)SE->getTypeSizeInBits(SrcTy) > Mantissa) - continue; + DEBUG(dbgs() << " Change loop exiting icmp to use postinc iv: " + << *Cond << '\n'); - unsigned Entry, Latch; - if (PH->getIncomingBlock(0) == L->getLoopPreheader()) { - Entry = 0; - Latch = 1; + // It's possible for the setcc instruction to be anywhere in the loop, and + // possible for it to have multiple users. If it is not immediately before + // the exiting block branch, move it. + if (&*++BasicBlock::iterator(Cond) != TermBr) { + if (Cond->hasOneUse()) { + Cond->moveBefore(TermBr); } else { - Entry = 1; - Latch = 0; + // Clone the terminating condition and insert into the loopend. + ICmpInst *OldCond = Cond; + Cond = cast<ICmpInst>(Cond->clone()); + Cond->setName(L->getHeader()->getName() + ".termcond"); + ExitingBlock->getInstList().insert(TermBr, Cond); + + // Clone the IVUse, as the old use still exists! + CondUse = &IU.AddUser(CondUse->getStride(), CondUse->getOffset(), + Cond, CondUse->getOperandValToReplace()); + TermBr->replaceUsesOfWith(OldCond, Cond); } + } - ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry)); - if (!Init) continue; - Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue()); + // If we get to here, we know that we can transform the setcc instruction to + // use the post-incremented version of the IV, allowing us to coalesce the + // live ranges for the IV correctly. + CondUse->setOffset(SE.getMinusSCEV(CondUse->getOffset(), + CondUse->getStride())); + CondUse->setIsUseOfPostIncrementedValue(true); + Changed = true; - BinaryOperator *Incr = - dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch)); - if (!Incr) continue; - if (Incr->getOpcode() != Instruction::Add - && Incr->getOpcode() != Instruction::Sub) - continue; + PostIncs.insert(Cond); + decline_post_inc:; + } - /* Initialize new IV, double d = 0.0 in above example. */ - ConstantInt *C = NULL; - if (Incr->getOperand(0) == PH) - C = dyn_cast<ConstantInt>(Incr->getOperand(1)); - else if (Incr->getOperand(1) == PH) - C = dyn_cast<ConstantInt>(Incr->getOperand(0)); - else - continue; + // Determine an insertion point for the loop induction variable increment. It + // must dominate all the post-inc comparisons we just set up, and it must + // dominate the loop latch edge. + IVIncInsertPos = L->getLoopLatch()->getTerminator(); + for (SmallPtrSet<Instruction *, 4>::const_iterator I = PostIncs.begin(), + E = PostIncs.end(); I != E; ++I) { + BasicBlock *BB = + DT.findNearestCommonDominator(IVIncInsertPos->getParent(), + (*I)->getParent()); + if (BB == (*I)->getParent()) + IVIncInsertPos = *I; + else if (BB != IVIncInsertPos->getParent()) + IVIncInsertPos = BB->getTerminator(); + } - if (!C) continue; + return Changed; +} + +bool +LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, + LSRUse::KindType Kind, const Type *AccessTy) { + int64_t NewMinOffset = LU.MinOffset; + int64_t NewMaxOffset = LU.MaxOffset; + const Type *NewAccessTy = AccessTy; + + // Check for a mismatched kind. It's tempting to collapse mismatched kinds to + // something conservative, however this can pessimize in the case that one of + // the uses will have all its uses outside the loop, for example. + if (LU.Kind != Kind) + return false; + // Conservatively assume HasBaseReg is true for now. + if (NewOffset < LU.MinOffset) { + if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, /*HasBaseReg=*/true, + Kind, AccessTy, TLI, SE)) + return false; + NewMinOffset = NewOffset; + } else if (NewOffset > LU.MaxOffset) { + if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, /*HasBaseReg=*/true, + Kind, AccessTy, TLI, SE)) + return false; + NewMaxOffset = NewOffset; + } + // Check for a mismatched access type, and fall back conservatively as needed. + if (Kind == LSRUse::Address && AccessTy != LU.AccessTy) + NewAccessTy = Type::getVoidTy(AccessTy->getContext()); + + // Update the use. + LU.MinOffset = NewMinOffset; + LU.MaxOffset = NewMaxOffset; + LU.AccessTy = NewAccessTy; + if (NewOffset != LU.Offsets.back()) + LU.Offsets.push_back(NewOffset); + return true; +} - // Ignore negative constants, as the code below doesn't handle them - // correctly. TODO: Remove this restriction. - if (!C->getValue().isStrictlyPositive()) continue; +/// getUse - Return an LSRUse index and an offset value for a fixup which +/// needs the given expression, with the given kind and optional access type. +/// Either reuse an exisitng use or create a new one, as needed. +std::pair<size_t, int64_t> +LSRInstance::getUse(const SCEV *&Expr, + LSRUse::KindType Kind, const Type *AccessTy) { + const SCEV *Copy = Expr; + int64_t Offset = ExtractImmediate(Expr, SE); + + // Basic uses can't accept any offset, for example. + if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true, + Kind, AccessTy, TLI, SE)) { + Expr = Copy; + Offset = 0; + } - /* Add new PHINode. */ - PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH); + std::pair<UseMapTy::iterator, bool> P = + UseMap.insert(std::make_pair(Expr, 0)); + if (!P.second) { + // A use already existed with this base. + size_t LUIdx = P.first->second; + LSRUse &LU = Uses[LUIdx]; + if (reconcileNewOffset(LU, Offset, Kind, AccessTy)) + // Reuse this use. + return std::make_pair(LUIdx, Offset); + } - /* create new increment. '++d' in above example. */ - Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue()); - BinaryOperator *NewIncr = - BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ? - Instruction::FAdd : Instruction::FSub, - NewPH, CFP, "IV.S.next.", Incr); + // Create a new use. + size_t LUIdx = Uses.size(); + P.first->second = LUIdx; + Uses.push_back(LSRUse(Kind, AccessTy)); + LSRUse &LU = Uses[LUIdx]; - NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry)); - NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch)); + // We don't need to track redundant offsets, but we don't need to go out + // of our way here to avoid them. + if (LU.Offsets.empty() || Offset != LU.Offsets.back()) + LU.Offsets.push_back(Offset); - /* Remove cast operation */ - ShadowUse->replaceAllUsesWith(NewPH); - ShadowUse->eraseFromParent(); - NumShadow++; - break; + LU.MinOffset = Offset; + LU.MaxOffset = Offset; + return std::make_pair(LUIdx, Offset); +} + +void LSRInstance::CollectInterestingTypesAndFactors() { + SmallSetVector<const SCEV *, 4> Strides; + + // Collect interesting types and factors. + for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) { + const SCEV *Stride = UI->getStride(); + + // Collect interesting types. + Types.insert(SE.getEffectiveSCEVType(Stride->getType())); + + // Collect interesting factors. + for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter = + Strides.begin(), SEnd = Strides.end(); NewStrideIter != SEnd; + ++NewStrideIter) { + const SCEV *OldStride = Stride; + const SCEV *NewStride = *NewStrideIter; + if (OldStride == NewStride) + continue; + + if (SE.getTypeSizeInBits(OldStride->getType()) != + SE.getTypeSizeInBits(NewStride->getType())) { + if (SE.getTypeSizeInBits(OldStride->getType()) > + SE.getTypeSizeInBits(NewStride->getType())) + NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType()); + else + OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType()); + } + if (const SCEVConstant *Factor = + dyn_cast_or_null<SCEVConstant>(getSDiv(NewStride, OldStride, + SE, true))) { + if (Factor->getValue()->getValue().getMinSignedBits() <= 64) + Factors.insert(Factor->getValue()->getValue().getSExtValue()); + } else if (const SCEVConstant *Factor = + dyn_cast_or_null<SCEVConstant>(getSDiv(OldStride, NewStride, + SE, true))) { + if (Factor->getValue()->getValue().getMinSignedBits() <= 64) + Factors.insert(Factor->getValue()->getValue().getSExtValue()); + } } + Strides.insert(Stride); } -} -/// OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar -/// uses in the loop, look to see if we can eliminate some, in favor of using -/// common indvars for the different uses. -void LoopStrengthReduce::OptimizeIndvars(Loop *L) { - // TODO: implement optzns here. + // If all uses use the same type, don't bother looking for truncation-based + // reuse. + if (Types.size() == 1) + Types.clear(); - OptimizeShadowIV(L); + DEBUG(print_factors_and_types(dbgs())); } -bool LoopStrengthReduce::StrideMightBeShared(const SCEV* Stride, Loop *L, - bool CheckPreInc) { - int64_t SInt = cast<SCEVConstant>(Stride)->getValue()->getSExtValue(); - for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { - std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = - IU->IVUsesByStride.find(IU->StrideOrder[i]); - const SCEV *Share = SI->first; - if (!isa<SCEVConstant>(SI->first) || Share == Stride) - continue; - int64_t SSInt = cast<SCEVConstant>(Share)->getValue()->getSExtValue(); - if (SSInt == SInt) - return true; // This can definitely be reused. - if (unsigned(abs64(SSInt)) < SInt || (SSInt % SInt) != 0) - continue; - int64_t Scale = SSInt / SInt; - bool AllUsesAreAddresses = true; - bool AllUsesAreOutsideLoop = true; - std::vector<BasedUser> UsersToProcess; - const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L, - AllUsesAreAddresses, - AllUsesAreOutsideLoop, - UsersToProcess); - if (AllUsesAreAddresses && - ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess)) { - if (!CheckPreInc) - return true; - // Any pre-inc iv use? - IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[Share]; - for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(), - E = StrideUses.Users.end(); I != E; ++I) { - if (!I->isUseOfPostIncrementedValue()) - return true; +void LSRInstance::CollectFixupsAndInitialFormulae() { + for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) { + // Record the uses. + LSRFixup &LF = getNewFixup(); + LF.UserInst = UI->getUser(); + LF.OperandValToReplace = UI->getOperandValToReplace(); + if (UI->isUseOfPostIncrementedValue()) + LF.PostIncLoop = L; + + LSRUse::KindType Kind = LSRUse::Basic; + const Type *AccessTy = 0; + if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) { + Kind = LSRUse::Address; + AccessTy = getAccessType(LF.UserInst); + } + + const SCEV *S = IU.getCanonicalExpr(*UI); + + // Equality (== and !=) ICmps are special. We can rewrite (i == N) as + // (N - i == 0), and this allows (N - i) to be the expression that we work + // with rather than just N or i, so we can consider the register + // requirements for both N and i at the same time. Limiting this code to + // equality icmps is not a problem because all interesting loops use + // equality icmps, thanks to IndVarSimplify. + if (ICmpInst *CI = dyn_cast<ICmpInst>(LF.UserInst)) + if (CI->isEquality()) { + // Swap the operands if needed to put the OperandValToReplace on the + // left, for consistency. + Value *NV = CI->getOperand(1); + if (NV == LF.OperandValToReplace) { + CI->setOperand(1, CI->getOperand(0)); + CI->setOperand(0, NV); + } + + // x == y --> x - y == 0 + const SCEV *N = SE.getSCEV(NV); + if (N->isLoopInvariant(L)) { + Kind = LSRUse::ICmpZero; + S = SE.getMinusSCEV(N, S); + } + + // -1 and the negations of all interesting strides (except the negation + // of -1) are now also interesting. + for (size_t i = 0, e = Factors.size(); i != e; ++i) + if (Factors[i] != -1) + Factors.insert(-(uint64_t)Factors[i]); + Factors.insert(-1); } + + // Set up the initial formula for this use. + std::pair<size_t, int64_t> P = getUse(S, Kind, AccessTy); + LF.LUIdx = P.first; + LF.Offset = P.second; + LSRUse &LU = Uses[LF.LUIdx]; + LU.AllFixupsOutsideLoop &= !L->contains(LF.UserInst); + + // If this is the first use of this LSRUse, give it a formula. + if (LU.Formulae.empty()) { + InsertInitialFormula(S, L, LU, LF.LUIdx); + CountRegisters(LU.Formulae.back(), LF.LUIdx); } } - return false; + + DEBUG(print_fixups(dbgs())); } -/// isUsedByExitBranch - Return true if icmp is used by a loop terminating -/// conditional branch or it's and / or with other conditions before being used -/// as the condition. -static bool isUsedByExitBranch(ICmpInst *Cond, Loop *L) { - BasicBlock *CondBB = Cond->getParent(); - if (!L->isLoopExiting(CondBB)) - return false; - BranchInst *TermBr = dyn_cast<BranchInst>(CondBB->getTerminator()); - if (!TermBr || !TermBr->isConditional()) +void +LSRInstance::InsertInitialFormula(const SCEV *S, Loop *L, + LSRUse &LU, size_t LUIdx) { + Formula F; + F.InitialMatch(S, L, SE, DT); + bool Inserted = InsertFormula(LU, LUIdx, F); + assert(Inserted && "Initial formula already exists!"); (void)Inserted; +} + +void +LSRInstance::InsertSupplementalFormula(const SCEV *S, + LSRUse &LU, size_t LUIdx) { + Formula F; + F.BaseRegs.push_back(S); + F.AM.HasBaseReg = true; + bool Inserted = InsertFormula(LU, LUIdx, F); + assert(Inserted && "Supplemental formula already exists!"); (void)Inserted; +} + +/// CountRegisters - Note which registers are used by the given formula, +/// updating RegUses. +void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) { + if (F.ScaledReg) + RegUses.CountRegister(F.ScaledReg, LUIdx); + for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(), + E = F.BaseRegs.end(); I != E; ++I) + RegUses.CountRegister(*I, LUIdx); +} + +/// InsertFormula - If the given formula has not yet been inserted, add it to +/// the list, and return true. Return false otherwise. +bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) { + if (!LU.InsertFormula(LUIdx, F)) return false; - Value *User = *Cond->use_begin(); - Instruction *UserInst = dyn_cast<Instruction>(User); - while (UserInst && - (UserInst->getOpcode() == Instruction::And || - UserInst->getOpcode() == Instruction::Or)) { - if (!UserInst->hasOneUse() || UserInst->getParent() != CondBB) - return false; - User = *User->use_begin(); - UserInst = dyn_cast<Instruction>(User); + CountRegisters(F, LUIdx); + return true; +} + +/// CollectLoopInvariantFixupsAndFormulae - Check for other uses of +/// loop-invariant values which we're tracking. These other uses will pin these +/// values in registers, making them less profitable for elimination. +/// TODO: This currently misses non-constant addrec step registers. +/// TODO: Should this give more weight to users inside the loop? +void +LSRInstance::CollectLoopInvariantFixupsAndFormulae() { + SmallVector<const SCEV *, 8> Worklist(RegUses.begin(), RegUses.end()); + SmallPtrSet<const SCEV *, 8> Inserted; + + while (!Worklist.empty()) { + const SCEV *S = Worklist.pop_back_val(); + + if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) + Worklist.insert(Worklist.end(), N->op_begin(), N->op_end()); + else if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) + Worklist.push_back(C->getOperand()); + else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) { + Worklist.push_back(D->getLHS()); + Worklist.push_back(D->getRHS()); + } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { + if (!Inserted.insert(U)) continue; + const Value *V = U->getValue(); + if (const Instruction *Inst = dyn_cast<Instruction>(V)) + if (L->contains(Inst)) continue; + for (Value::use_const_iterator UI = V->use_begin(), UE = V->use_end(); + UI != UE; ++UI) { + const Instruction *UserInst = dyn_cast<Instruction>(*UI); + // Ignore non-instructions. + if (!UserInst) + continue; + // Ignore instructions in other functions (as can happen with + // Constants). + if (UserInst->getParent()->getParent() != L->getHeader()->getParent()) + continue; + // Ignore instructions not dominated by the loop. + const BasicBlock *UseBB = !isa<PHINode>(UserInst) ? + UserInst->getParent() : + cast<PHINode>(UserInst)->getIncomingBlock( + PHINode::getIncomingValueNumForOperand(UI.getOperandNo())); + if (!DT.dominates(L->getHeader(), UseBB)) + continue; + // Ignore uses which are part of other SCEV expressions, to avoid + // analyzing them multiple times. + if (SE.isSCEVable(UserInst->getType()) && + !isa<SCEVUnknown>(SE.getSCEV(const_cast<Instruction *>(UserInst)))) + continue; + // Ignore icmp instructions which are already being analyzed. + if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) { + unsigned OtherIdx = !UI.getOperandNo(); + Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx)); + if (SE.getSCEV(OtherOp)->hasComputableLoopEvolution(L)) + continue; + } + + LSRFixup &LF = getNewFixup(); + LF.UserInst = const_cast<Instruction *>(UserInst); + LF.OperandValToReplace = UI.getUse(); + std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic, 0); + LF.LUIdx = P.first; + LF.Offset = P.second; + LSRUse &LU = Uses[LF.LUIdx]; + LU.AllFixupsOutsideLoop &= L->contains(LF.UserInst); + InsertSupplementalFormula(U, LU, LF.LUIdx); + CountRegisters(LU.Formulae.back(), Uses.size() - 1); + break; + } + } } - return User == TermBr; } -static bool ShouldCountToZero(ICmpInst *Cond, IVStrideUse* &CondUse, - ScalarEvolution *SE, Loop *L, - const TargetLowering *TLI = 0) { - if (!L->contains(Cond)) - return false; +/// CollectSubexprs - Split S into subexpressions which can be pulled out into +/// separate registers. If C is non-null, multiply each subexpression by C. +static void CollectSubexprs(const SCEV *S, const SCEVConstant *C, + SmallVectorImpl<const SCEV *> &Ops, + ScalarEvolution &SE) { + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + // Break out add operands. + for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); + I != E; ++I) + CollectSubexprs(*I, C, Ops, SE); + return; + } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + // Split a non-zero base out of an addrec. + if (!AR->getStart()->isZero()) { + CollectSubexprs(SE.getAddRecExpr(SE.getIntegerSCEV(0, AR->getType()), + AR->getStepRecurrence(SE), + AR->getLoop()), C, Ops, SE); + CollectSubexprs(AR->getStart(), C, Ops, SE); + return; + } + } else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) { + // Break (C * (a + b + c)) into C*a + C*b + C*c. + if (Mul->getNumOperands() == 2) + if (const SCEVConstant *Op0 = + dyn_cast<SCEVConstant>(Mul->getOperand(0))) { + CollectSubexprs(Mul->getOperand(1), + C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0, + Ops, SE); + return; + } + } - if (!isa<SCEVConstant>(CondUse->getOffset())) - return false; + // Otherwise use the value itself. + Ops.push_back(C ? SE.getMulExpr(C, S) : S); +} - // Handle only tests for equality for the moment. - if (!Cond->isEquality() || !Cond->hasOneUse()) - return false; - if (!isUsedByExitBranch(Cond, L)) - return false; +/// GenerateReassociations - Split out subexpressions from adds and the bases of +/// addrecs. +void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, + Formula Base, + unsigned Depth) { + // Arbitrarily cap recursion to protect compile time. + if (Depth >= 3) return; + + for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) { + const SCEV *BaseReg = Base.BaseRegs[i]; + + SmallVector<const SCEV *, 8> AddOps; + CollectSubexprs(BaseReg, 0, AddOps, SE); + if (AddOps.size() == 1) continue; + + for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(), + JE = AddOps.end(); J != JE; ++J) { + // Don't pull a constant into a register if the constant could be folded + // into an immediate field. + if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset, + Base.getNumRegs() > 1, + LU.Kind, LU.AccessTy, TLI, SE)) + continue; - Value *CondOp0 = Cond->getOperand(0); - const SCEV *IV = SE->getSCEV(CondOp0); - const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV); - if (!AR || !AR->isAffine()) - return false; + // Collect all operands except *J. + SmallVector<const SCEV *, 8> InnerAddOps; + for (SmallVectorImpl<const SCEV *>::const_iterator K = AddOps.begin(), + KE = AddOps.end(); K != KE; ++K) + if (K != J) + InnerAddOps.push_back(*K); + + // Don't leave just a constant behind in a register if the constant could + // be folded into an immediate field. + if (InnerAddOps.size() == 1 && + isAlwaysFoldable(InnerAddOps[0], LU.MinOffset, LU.MaxOffset, + Base.getNumRegs() > 1, + LU.Kind, LU.AccessTy, TLI, SE)) + continue; - const SCEVConstant *SC = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); - if (!SC || SC->getValue()->getSExtValue() < 0) - // If it's already counting down, don't do anything. - return false; + Formula F = Base; + F.BaseRegs[i] = SE.getAddExpr(InnerAddOps); + F.BaseRegs.push_back(*J); + if (InsertFormula(LU, LUIdx, F)) + // If that formula hadn't been seen before, recurse to find more like + // it. + GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth+1); + } + } +} - // If the RHS of the comparison is not an loop invariant, the rewrite - // cannot be done. Also bail out if it's already comparing against a zero. - // If we are checking this before cmp stride optimization, check if it's - // comparing against a already legal immediate. - Value *RHS = Cond->getOperand(1); - ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS); - if (!L->isLoopInvariant(RHS) || - (RHSC && RHSC->isZero()) || - (RHSC && TLI && TLI->isLegalICmpImmediate(RHSC->getSExtValue()))) - return false; +/// GenerateCombinations - Generate a formula consisting of all of the +/// loop-dominating registers added into a single register. +void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx, + Formula Base) { + // This method is only intersting on a plurality of registers. + if (Base.BaseRegs.size() <= 1) return; + + Formula F = Base; + F.BaseRegs.clear(); + SmallVector<const SCEV *, 4> Ops; + for (SmallVectorImpl<const SCEV *>::const_iterator + I = Base.BaseRegs.begin(), E = Base.BaseRegs.end(); I != E; ++I) { + const SCEV *BaseReg = *I; + if (BaseReg->properlyDominates(L->getHeader(), &DT) && + !BaseReg->hasComputableLoopEvolution(L)) + Ops.push_back(BaseReg); + else + F.BaseRegs.push_back(BaseReg); + } + if (Ops.size() > 1) { + const SCEV *Sum = SE.getAddExpr(Ops); + // TODO: If Sum is zero, it probably means ScalarEvolution missed an + // opportunity to fold something. For now, just ignore such cases + // rather than procede with zero in a register. + if (!Sum->isZero()) { + F.BaseRegs.push_back(Sum); + (void)InsertFormula(LU, LUIdx, F); + } + } +} - // Make sure the IV is only used for counting. Value may be preinc or - // postinc; 2 uses in either case. - if (!CondOp0->hasNUses(2)) - return false; +/// GenerateSymbolicOffsets - Generate reuse formulae using symbolic offsets. +void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, + Formula Base) { + // We can't add a symbolic offset if the address already contains one. + if (Base.AM.BaseGV) return; - return true; + for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) { + const SCEV *G = Base.BaseRegs[i]; + GlobalValue *GV = ExtractSymbol(G, SE); + if (G->isZero() || !GV) + continue; + Formula F = Base; + F.AM.BaseGV = GV; + if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset, + LU.Kind, LU.AccessTy, TLI)) + continue; + F.BaseRegs[i] = G; + (void)InsertFormula(LU, LUIdx, F); + } } -/// OptimizeLoopTermCond - Change loop terminating condition to use the -/// postinc iv when possible. -void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { - BasicBlock *LatchBlock = L->getLoopLatch(); - bool LatchExit = L->isLoopExiting(LatchBlock); - SmallVector<BasicBlock*, 8> ExitingBlocks; - L->getExitingBlocks(ExitingBlocks); +/// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets. +void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, + Formula Base) { + // TODO: For now, just add the min and max offset, because it usually isn't + // worthwhile looking at everything inbetween. + SmallVector<int64_t, 4> Worklist; + Worklist.push_back(LU.MinOffset); + if (LU.MaxOffset != LU.MinOffset) + Worklist.push_back(LU.MaxOffset); + + for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) { + const SCEV *G = Base.BaseRegs[i]; + + for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(), + E = Worklist.end(); I != E; ++I) { + Formula F = Base; + F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs - *I; + if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I, + LU.Kind, LU.AccessTy, TLI)) { + F.BaseRegs[i] = SE.getAddExpr(G, SE.getIntegerSCEV(*I, G->getType())); + + (void)InsertFormula(LU, LUIdx, F); + } + } - for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { - BasicBlock *ExitingBlock = ExitingBlocks[i]; + int64_t Imm = ExtractImmediate(G, SE); + if (G->isZero() || Imm == 0) + continue; + Formula F = Base; + F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Imm; + if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset, + LU.Kind, LU.AccessTy, TLI)) + continue; + F.BaseRegs[i] = G; + (void)InsertFormula(LU, LUIdx, F); + } +} - // Finally, get the terminating condition for the loop if possible. If we - // can, we want to change it to use a post-incremented version of its - // induction variable, to allow coalescing the live ranges for the IV into - // one register value. +/// GenerateICmpZeroScales - For ICmpZero, check to see if we can scale up +/// the comparison. For example, x == y -> x*c == y*c. +void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, + Formula Base) { + if (LU.Kind != LSRUse::ICmpZero) return; - BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); - if (!TermBr) + // Determine the integer type for the base formula. + const Type *IntTy = Base.getType(); + if (!IntTy) return; + if (SE.getTypeSizeInBits(IntTy) > 64) return; + + // Don't do this if there is more than one offset. + if (LU.MinOffset != LU.MaxOffset) return; + + assert(!Base.AM.BaseGV && "ICmpZero use is not legal!"); + + // Check each interesting stride. + for (SmallSetVector<int64_t, 8>::const_iterator + I = Factors.begin(), E = Factors.end(); I != E; ++I) { + int64_t Factor = *I; + Formula F = Base; + + // Check that the multiplication doesn't overflow. + F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs * Factor; + if ((int64_t)F.AM.BaseOffs / Factor != Base.AM.BaseOffs) continue; - // FIXME: Overly conservative, termination condition could be an 'or' etc.. - if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition())) + + // Check that multiplying with the use offset doesn't overflow. + int64_t Offset = LU.MinOffset; + Offset = (uint64_t)Offset * Factor; + if ((int64_t)Offset / Factor != LU.MinOffset) continue; - // Search IVUsesByStride to find Cond's IVUse if there is one. - IVStrideUse *CondUse = 0; - const SCEV *CondStride = 0; - ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition()); - if (!FindIVUserForCond(Cond, CondUse, CondStride)) + // Check that this scale is legal. + if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, TLI)) continue; - // If the latch block is exiting and it's not a single block loop, it's - // not safe to use postinc iv in other exiting blocks. FIXME: overly - // conservative? How about icmp stride optimization? - bool UsePostInc = !(e > 1 && LatchExit && ExitingBlock != LatchBlock); - if (UsePostInc && ExitingBlock != LatchBlock) { - if (!Cond->hasOneUse()) - // See below, we don't want the condition to be cloned. - UsePostInc = false; - else { - // If exiting block is the latch block, we know it's safe and profitable - // to transform the icmp to use post-inc iv. Otherwise do so only if it - // would not reuse another iv and its iv would be reused by other uses. - // We are optimizing for the case where the icmp is the only use of the - // iv. - IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[CondStride]; - for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(), - E = StrideUses.Users.end(); I != E; ++I) { - if (I->getUser() == Cond) - continue; - if (!I->isUseOfPostIncrementedValue()) { - UsePostInc = false; - break; - } + // Compensate for the use having MinOffset built into it. + F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Offset - LU.MinOffset; + + const SCEV *FactorS = SE.getIntegerSCEV(Factor, IntTy); + + // Check that multiplying with each base register doesn't overflow. + for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) { + F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS); + if (getSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i]) + goto next; + } + + // Check that multiplying with the scaled register doesn't overflow. + if (F.ScaledReg) { + F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS); + if (getSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg) + continue; + } + + // If we make it here and it's legal, add it. + (void)InsertFormula(LU, LUIdx, F); + next:; + } +} + +/// GenerateScales - Generate stride factor reuse formulae by making use of +/// scaled-offset address modes, for example. +void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, + Formula Base) { + // Determine the integer type for the base formula. + const Type *IntTy = Base.getType(); + if (!IntTy) return; + + // If this Formula already has a scaled register, we can't add another one. + if (Base.AM.Scale != 0) return; + + // Check each interesting stride. + for (SmallSetVector<int64_t, 8>::const_iterator + I = Factors.begin(), E = Factors.end(); I != E; ++I) { + int64_t Factor = *I; + + Base.AM.Scale = Factor; + Base.AM.HasBaseReg = Base.BaseRegs.size() > 1; + // Check whether this scale is going to be legal. + if (!isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset, + LU.Kind, LU.AccessTy, TLI)) { + // As a special-case, handle special out-of-loop Basic users specially. + // TODO: Reconsider this special case. + if (LU.Kind == LSRUse::Basic && + isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset, + LSRUse::Special, LU.AccessTy, TLI) && + LU.AllFixupsOutsideLoop) + LU.Kind = LSRUse::Special; + else + continue; + } + // For an ICmpZero, negating a solitary base register won't lead to + // new solutions. + if (LU.Kind == LSRUse::ICmpZero && + !Base.AM.HasBaseReg && Base.AM.BaseOffs == 0 && !Base.AM.BaseGV) + continue; + // For each addrec base reg, apply the scale, if possible. + for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) + if (const SCEVAddRecExpr *AR = + dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i])) { + const SCEV *FactorS = SE.getIntegerSCEV(Factor, IntTy); + if (FactorS->isZero()) + continue; + // Divide out the factor, ignoring high bits, since we'll be + // scaling the value back up in the end. + if (const SCEV *Quotient = getSDiv(AR, FactorS, SE, true)) { + // TODO: This could be optimized to avoid all the copying. + Formula F = Base; + F.ScaledReg = Quotient; + std::swap(F.BaseRegs[i], F.BaseRegs.back()); + F.BaseRegs.pop_back(); + (void)InsertFormula(LU, LUIdx, F); } } + } +} - // If iv for the stride might be shared and any of the users use pre-inc - // iv might be used, then it's not safe to use post-inc iv. - if (UsePostInc && - isa<SCEVConstant>(CondStride) && - StrideMightBeShared(CondStride, L, true)) - UsePostInc = false; - } +/// GenerateTruncates - Generate reuse formulae from different IV types. +void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, + Formula Base) { + // This requires TargetLowering to tell us which truncates are free. + if (!TLI) return; + + // Don't bother truncating symbolic values. + if (Base.AM.BaseGV) return; + + // Determine the integer type for the base formula. + const Type *DstTy = Base.getType(); + if (!DstTy) return; + DstTy = SE.getEffectiveSCEVType(DstTy); + + for (SmallSetVector<const Type *, 4>::const_iterator + I = Types.begin(), E = Types.end(); I != E; ++I) { + const Type *SrcTy = *I; + if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) { + Formula F = Base; + + if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I); + for (SmallVectorImpl<const SCEV *>::iterator J = F.BaseRegs.begin(), + JE = F.BaseRegs.end(); J != JE; ++J) + *J = SE.getAnyExtendExpr(*J, SrcTy); + + // TODO: This assumes we've done basic processing on all uses and + // have an idea what the register usage is. + if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses)) + continue; - // If the trip count is computed in terms of a max (due to ScalarEvolution - // being unable to find a sufficient guard, for example), change the loop - // comparison to use SLT or ULT instead of NE. - Cond = OptimizeMax(L, Cond, CondUse); - - // If possible, change stride and operands of the compare instruction to - // eliminate one stride. However, avoid rewriting the compare instruction - // with an iv of new stride if it's likely the new stride uses will be - // rewritten using the stride of the compare instruction. - if (ExitingBlock == LatchBlock && isa<SCEVConstant>(CondStride)) { - // If the condition stride is a constant and it's the only use, we might - // want to optimize it first by turning it to count toward zero. - if (!StrideMightBeShared(CondStride, L, false) && - !ShouldCountToZero(Cond, CondUse, SE, L, TLI)) - Cond = ChangeCompareStride(L, Cond, CondUse, CondStride); + (void)InsertFormula(LU, LUIdx, F); } + } +} + +namespace { + +/// WorkItem - Helper class for GenerateCrossUseConstantOffsets. It's used to +/// defer modifications so that the search phase doesn't have to worry about +/// the data structures moving underneath it. +struct WorkItem { + size_t LUIdx; + int64_t Imm; + const SCEV *OrigReg; + + WorkItem(size_t LI, int64_t I, const SCEV *R) + : LUIdx(LI), Imm(I), OrigReg(R) {} + + void print(raw_ostream &OS) const; + void dump() const; +}; + +} + +void WorkItem::print(raw_ostream &OS) const { + OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx + << " , add offset " << Imm; +} + +void WorkItem::dump() const { + print(errs()); errs() << '\n'; +} + +/// GenerateCrossUseConstantOffsets - Look for registers which are a constant +/// distance apart and try to form reuse opportunities between them. +void LSRInstance::GenerateCrossUseConstantOffsets() { + // Group the registers by their value without any added constant offset. + typedef std::map<int64_t, const SCEV *> ImmMapTy; + typedef DenseMap<const SCEV *, ImmMapTy> RegMapTy; + RegMapTy Map; + DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap; + SmallVector<const SCEV *, 8> Sequence; + for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end(); + I != E; ++I) { + const SCEV *Reg = *I; + int64_t Imm = ExtractImmediate(Reg, SE); + std::pair<RegMapTy::iterator, bool> Pair = + Map.insert(std::make_pair(Reg, ImmMapTy())); + if (Pair.second) + Sequence.push_back(Reg); + Pair.first->second.insert(std::make_pair(Imm, *I)); + UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(*I); + } - if (!UsePostInc) + // Now examine each set of registers with the same base value. Build up + // a list of work to do and do the work in a separate step so that we're + // not adding formulae and register counts while we're searching. + SmallVector<WorkItem, 32> WorkItems; + SmallSet<std::pair<size_t, int64_t>, 32> UniqueItems; + for (SmallVectorImpl<const SCEV *>::const_iterator I = Sequence.begin(), + E = Sequence.end(); I != E; ++I) { + const SCEV *Reg = *I; + const ImmMapTy &Imms = Map.find(Reg)->second; + + // It's not worthwhile looking for reuse if there's only one offset. + if (Imms.size() == 1) continue; - DEBUG(dbgs() << " Change loop exiting icmp to use postinc iv: " - << *Cond << '\n'); + DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':'; + for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end(); + J != JE; ++J) + dbgs() << ' ' << J->first; + dbgs() << '\n'); - // It's possible for the setcc instruction to be anywhere in the loop, and - // possible for it to have multiple users. If it is not immediately before - // the exiting block branch, move it. - if (&*++BasicBlock::iterator(Cond) != (Instruction*)TermBr) { - if (Cond->hasOneUse()) { // Condition has a single use, just move it. - Cond->moveBefore(TermBr); - } else { - // Otherwise, clone the terminating condition and insert into the - // loopend. - Cond = cast<ICmpInst>(Cond->clone()); - Cond->setName(L->getHeader()->getName() + ".termcond"); - ExitingBlock->getInstList().insert(TermBr, Cond); + // Examine each offset. + for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end(); + J != JE; ++J) { + const SCEV *OrigReg = J->second; - // Clone the IVUse, as the old use still exists! - IU->IVUsesByStride[CondStride]->addUser(CondUse->getOffset(), Cond, - CondUse->getOperandValToReplace()); - CondUse = &IU->IVUsesByStride[CondStride]->Users.back(); + int64_t JImm = J->first; + const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg); + + if (!isa<SCEVConstant>(OrigReg) && + UsedByIndicesMap[Reg].count() == 1) { + DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg << '\n'); + continue; + } + + // Conservatively examine offsets between this orig reg a few selected + // other orig regs. + ImmMapTy::const_iterator OtherImms[] = { + Imms.begin(), prior(Imms.end()), + Imms.upper_bound((Imms.begin()->first + prior(Imms.end())->first) / 2) + }; + for (size_t i = 0, e = array_lengthof(OtherImms); i != e; ++i) { + ImmMapTy::const_iterator M = OtherImms[i]; + if (M == J || M == JE) continue; + + // Compute the difference between the two. + int64_t Imm = (uint64_t)JImm - M->first; + for (int LUIdx = UsedByIndices.find_first(); LUIdx != -1; + LUIdx = UsedByIndices.find_next(LUIdx)) + // Make a memo of this use, offset, and register tuple. + if (UniqueItems.insert(std::make_pair(LUIdx, Imm))) + WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg)); } } + } - // If we get to here, we know that we can transform the setcc instruction to - // use the post-incremented version of the IV, allowing us to coalesce the - // live ranges for the IV correctly. - CondUse->setOffset(SE->getMinusSCEV(CondUse->getOffset(), CondStride)); - CondUse->setIsUseOfPostIncrementedValue(true); - Changed = true; + Map.clear(); + Sequence.clear(); + UsedByIndicesMap.clear(); + UniqueItems.clear(); + + // Now iterate through the worklist and add new formulae. + for (SmallVectorImpl<WorkItem>::const_iterator I = WorkItems.begin(), + E = WorkItems.end(); I != E; ++I) { + const WorkItem &WI = *I; + size_t LUIdx = WI.LUIdx; + LSRUse &LU = Uses[LUIdx]; + int64_t Imm = WI.Imm; + const SCEV *OrigReg = WI.OrigReg; + + const Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType()); + const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm)); + unsigned BitWidth = SE.getTypeSizeInBits(IntTy); + + // TODO: Use a more targetted data structure. + for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) { + Formula F = LU.Formulae[L]; + // Use the immediate in the scaled register. + if (F.ScaledReg == OrigReg) { + int64_t Offs = (uint64_t)F.AM.BaseOffs + + Imm * (uint64_t)F.AM.Scale; + // Don't create 50 + reg(-50). + if (F.referencesReg(SE.getSCEV( + ConstantInt::get(IntTy, -(uint64_t)Offs)))) + continue; + Formula NewF = F; + NewF.AM.BaseOffs = Offs; + if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset, + LU.Kind, LU.AccessTy, TLI)) + continue; + NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg); + + // If the new scale is a constant in a register, and adding the constant + // value to the immediate would produce a value closer to zero than the + // immediate itself, then the formula isn't worthwhile. + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg)) + if (C->getValue()->getValue().isNegative() != + (NewF.AM.BaseOffs < 0) && + (C->getValue()->getValue().abs() * APInt(BitWidth, F.AM.Scale)) + .ule(APInt(BitWidth, NewF.AM.BaseOffs).abs())) + continue; - ++NumLoopCond; + // OK, looks good. + (void)InsertFormula(LU, LUIdx, NewF); + } else { + // Use the immediate in a base register. + for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) { + const SCEV *BaseReg = F.BaseRegs[N]; + if (BaseReg != OrigReg) + continue; + Formula NewF = F; + NewF.AM.BaseOffs = (uint64_t)NewF.AM.BaseOffs + Imm; + if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset, + LU.Kind, LU.AccessTy, TLI)) + continue; + NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg); + + // If the new formula has a constant in a register, and adding the + // constant value to the immediate would produce a value closer to + // zero than the immediate itself, then the formula isn't worthwhile. + for (SmallVectorImpl<const SCEV *>::const_iterator + J = NewF.BaseRegs.begin(), JE = NewF.BaseRegs.end(); + J != JE; ++J) + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*J)) + if (C->getValue()->getValue().isNegative() != + (NewF.AM.BaseOffs < 0) && + C->getValue()->getValue().abs() + .ule(APInt(BitWidth, NewF.AM.BaseOffs).abs())) + goto skip_formula; + + // Ok, looks good. + (void)InsertFormula(LU, LUIdx, NewF); + break; + skip_formula:; + } + } + } } } -bool LoopStrengthReduce::OptimizeLoopCountIVOfStride(const SCEV* &Stride, - IVStrideUse* &CondUse, - Loop *L) { - // If the only use is an icmp of a loop exiting conditional branch, then - // attempt the optimization. - BasedUser User = BasedUser(*CondUse, SE); - assert(isa<ICmpInst>(User.Inst) && "Expecting an ICMPInst!"); - ICmpInst *Cond = cast<ICmpInst>(User.Inst); +/// GenerateAllReuseFormulae - Generate formulae for each use. +void +LSRInstance::GenerateAllReuseFormulae() { + // This is split into multiple loops so that hasRegsUsedByUsesOtherThan + // queries are more precise. + for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { + LSRUse &LU = Uses[LUIdx]; + for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) + GenerateReassociations(LU, LUIdx, LU.Formulae[i]); + for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) + GenerateCombinations(LU, LUIdx, LU.Formulae[i]); + } + for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { + LSRUse &LU = Uses[LUIdx]; + for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) + GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]); + for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) + GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]); + for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) + GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]); + for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) + GenerateScales(LU, LUIdx, LU.Formulae[i]); + } + for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { + LSRUse &LU = Uses[LUIdx]; + for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) + GenerateTruncates(LU, LUIdx, LU.Formulae[i]); + } - // Less strict check now that compare stride optimization is done. - if (!ShouldCountToZero(Cond, CondUse, SE, L)) - return false; + GenerateCrossUseConstantOffsets(); +} - Value *CondOp0 = Cond->getOperand(0); - PHINode *PHIExpr = dyn_cast<PHINode>(CondOp0); - Instruction *Incr; - if (!PHIExpr) { - // Value tested is postinc. Find the phi node. - Incr = dyn_cast<BinaryOperator>(CondOp0); - // FIXME: Just use User.OperandValToReplace here? - if (!Incr || Incr->getOpcode() != Instruction::Add) - return false; +/// If their are multiple formulae with the same set of registers used +/// by other uses, pick the best one and delete the others. +void LSRInstance::FilterOutUndesirableDedicatedRegisters() { +#ifndef NDEBUG + bool Changed = false; +#endif + + // Collect the best formula for each unique set of shared registers. This + // is reset for each use. + typedef DenseMap<SmallVector<const SCEV *, 2>, size_t, UniquifierDenseMapInfo> + BestFormulaeTy; + BestFormulaeTy BestFormulae; + + for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { + LSRUse &LU = Uses[LUIdx]; + FormulaSorter Sorter(L, LU, SE, DT); + + // Clear out the set of used regs; it will be recomputed. + LU.Regs.clear(); + + for (size_t FIdx = 0, NumForms = LU.Formulae.size(); + FIdx != NumForms; ++FIdx) { + Formula &F = LU.Formulae[FIdx]; + + SmallVector<const SCEV *, 2> Key; + for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(), + JE = F.BaseRegs.end(); J != JE; ++J) { + const SCEV *Reg = *J; + if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx)) + Key.push_back(Reg); + } + if (F.ScaledReg && + RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx)) + Key.push_back(F.ScaledReg); + // Unstable sort by host order ok, because this is only used for + // uniquifying. + std::sort(Key.begin(), Key.end()); + + std::pair<BestFormulaeTy::const_iterator, bool> P = + BestFormulae.insert(std::make_pair(Key, FIdx)); + if (!P.second) { + Formula &Best = LU.Formulae[P.first->second]; + if (Sorter.operator()(F, Best)) + std::swap(F, Best); + DEBUG(dbgs() << "Filtering out "; F.print(dbgs()); + dbgs() << "\n" + " in favor of "; Best.print(dbgs()); + dbgs() << '\n'); +#ifndef NDEBUG + Changed = true; +#endif + std::swap(F, LU.Formulae.back()); + LU.Formulae.pop_back(); + --FIdx; + --NumForms; + continue; + } + if (F.ScaledReg) LU.Regs.insert(F.ScaledReg); + LU.Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end()); + } + BestFormulae.clear(); + } - PHIExpr = dyn_cast<PHINode>(Incr->getOperand(0)); - if (!PHIExpr) - return false; - // 1 use for preinc value, the increment. - if (!PHIExpr->hasOneUse()) - return false; - } else { - assert(isa<PHINode>(CondOp0) && - "Unexpected loop exiting counting instruction sequence!"); - PHIExpr = cast<PHINode>(CondOp0); - // Value tested is preinc. Find the increment. - // A CmpInst is not a BinaryOperator; we depend on this. - Instruction::use_iterator UI = PHIExpr->use_begin(); - Incr = dyn_cast<BinaryOperator>(UI); - if (!Incr) - Incr = dyn_cast<BinaryOperator>(++UI); - // One use for postinc value, the phi. Unnecessarily conservative? - if (!Incr || !Incr->hasOneUse() || Incr->getOpcode() != Instruction::Add) - return false; + DEBUG(if (Changed) { + dbgs() << "\n" + "After filtering out undesirable candidates:\n"; + print_uses(dbgs()); + }); +} + +/// NarrowSearchSpaceUsingHeuristics - If there are an extrordinary number of +/// formulae to choose from, use some rough heuristics to prune down the number +/// of formulae. This keeps the main solver from taking an extrordinary amount +/// of time in some worst-case scenarios. +void LSRInstance::NarrowSearchSpaceUsingHeuristics() { + // This is a rough guess that seems to work fairly well. + const size_t Limit = UINT16_MAX; + + SmallPtrSet<const SCEV *, 4> Taken; + for (;;) { + // Estimate the worst-case number of solutions we might consider. We almost + // never consider this many solutions because we prune the search space, + // but the pruning isn't always sufficient. + uint32_t Power = 1; + for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), + E = Uses.end(); I != E; ++I) { + size_t FSize = I->Formulae.size(); + if (FSize >= Limit) { + Power = Limit; + break; + } + Power *= FSize; + if (Power >= Limit) + break; + } + if (Power < Limit) + break; + + // Ok, we have too many of formulae on our hands to conveniently handle. + // Use a rough heuristic to thin out the list. + + // Pick the register which is used by the most LSRUses, which is likely + // to be a good reuse register candidate. + const SCEV *Best = 0; + unsigned BestNum = 0; + for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end(); + I != E; ++I) { + const SCEV *Reg = *I; + if (Taken.count(Reg)) + continue; + if (!Best) + Best = Reg; + else { + unsigned Count = RegUses.getUsedByIndices(Reg).count(); + if (Count > BestNum) { + Best = Reg; + BestNum = Count; + } + } + } + + DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best + << " will yeild profitable reuse.\n"); + Taken.insert(Best); + + // In any use with formulae which references this register, delete formulae + // which don't reference it. + for (SmallVectorImpl<LSRUse>::iterator I = Uses.begin(), + E = Uses.end(); I != E; ++I) { + LSRUse &LU = *I; + if (!LU.Regs.count(Best)) continue; + + // Clear out the set of used regs; it will be recomputed. + LU.Regs.clear(); + + for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) { + Formula &F = LU.Formulae[i]; + if (!F.referencesReg(Best)) { + DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n'); + std::swap(LU.Formulae.back(), F); + LU.Formulae.pop_back(); + --e; + --i; + continue; + } + + if (F.ScaledReg) LU.Regs.insert(F.ScaledReg); + LU.Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end()); + } + } + + DEBUG(dbgs() << "After pre-selection:\n"; + print_uses(dbgs())); } +} - // Replace the increment with a decrement. - DEBUG(dbgs() << "LSR: Examining use "); - DEBUG(WriteAsOperand(dbgs(), CondOp0, /*PrintType=*/false)); - DEBUG(dbgs() << " in Inst: " << *Cond << '\n'); - BinaryOperator *Decr = BinaryOperator::Create(Instruction::Sub, - Incr->getOperand(0), Incr->getOperand(1), "tmp", Incr); - Incr->replaceAllUsesWith(Decr); - Incr->eraseFromParent(); - - // Substitute endval-startval for the original startval, and 0 for the - // original endval. Since we're only testing for equality this is OK even - // if the computation wraps around. - BasicBlock *Preheader = L->getLoopPreheader(); - Instruction *PreInsertPt = Preheader->getTerminator(); - unsigned InBlock = L->contains(PHIExpr->getIncomingBlock(0)) ? 1 : 0; - Value *StartVal = PHIExpr->getIncomingValue(InBlock); - Value *EndVal = Cond->getOperand(1); - DEBUG(dbgs() << " Optimize loop counting iv to count down [" - << *EndVal << " .. " << *StartVal << "]\n"); - - // FIXME: check for case where both are constant. - Constant* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0); - BinaryOperator *NewStartVal = BinaryOperator::Create(Instruction::Sub, - EndVal, StartVal, "tmp", PreInsertPt); - PHIExpr->setIncomingValue(InBlock, NewStartVal); - Cond->setOperand(1, Zero); - DEBUG(dbgs() << " New icmp: " << *Cond << "\n"); - - int64_t SInt = cast<SCEVConstant>(Stride)->getValue()->getSExtValue(); - const SCEV *NewStride = 0; - bool Found = false; - for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { - const SCEV *OldStride = IU->StrideOrder[i]; - if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(OldStride)) - if (SC->getValue()->getSExtValue() == -SInt) { - Found = true; - NewStride = OldStride; +/// SolveRecurse - This is the recursive solver. +void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution, + Cost &SolutionCost, + SmallVectorImpl<const Formula *> &Workspace, + const Cost &CurCost, + const SmallPtrSet<const SCEV *, 16> &CurRegs, + DenseSet<const SCEV *> &VisitedRegs) const { + // Some ideas: + // - prune more: + // - use more aggressive filtering + // - sort the formula so that the most profitable solutions are found first + // - sort the uses too + // - search faster: + // - dont compute a cost, and then compare. compare while computing a cost + // and bail early. + // - track register sets with SmallBitVector + + const LSRUse &LU = Uses[Workspace.size()]; + + // If this use references any register that's already a part of the + // in-progress solution, consider it a requirement that a formula must + // reference that register in order to be considered. This prunes out + // unprofitable searching. + SmallSetVector<const SCEV *, 4> ReqRegs; + for (SmallPtrSet<const SCEV *, 16>::const_iterator I = CurRegs.begin(), + E = CurRegs.end(); I != E; ++I) + if (LU.Regs.count(*I)) + ReqRegs.insert(*I); + + bool AnySatisfiedReqRegs = false; + SmallPtrSet<const SCEV *, 16> NewRegs; + Cost NewCost; +retry: + for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(), + E = LU.Formulae.end(); I != E; ++I) { + const Formula &F = *I; + + // Ignore formulae which do not use any of the required registers. + for (SmallSetVector<const SCEV *, 4>::const_iterator J = ReqRegs.begin(), + JE = ReqRegs.end(); J != JE; ++J) { + const SCEV *Reg = *J; + if ((!F.ScaledReg || F.ScaledReg != Reg) && + std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) == + F.BaseRegs.end()) + goto skip; + } + AnySatisfiedReqRegs = true; + + // Evaluate the cost of the current formula. If it's already worse than + // the current best, prune the search at that point. + NewCost = CurCost; + NewRegs = CurRegs; + NewCost.RateFormula(F, NewRegs, VisitedRegs, L, LU.Offsets, SE, DT); + if (NewCost < SolutionCost) { + Workspace.push_back(&F); + if (Workspace.size() != Uses.size()) { + SolveRecurse(Solution, SolutionCost, Workspace, NewCost, + NewRegs, VisitedRegs); + if (F.getNumRegs() == 1 && Workspace.size() == 1) + VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]); + } else { + DEBUG(dbgs() << "New best at "; NewCost.print(dbgs()); + dbgs() << ". Regs:"; + for (SmallPtrSet<const SCEV *, 16>::const_iterator + I = NewRegs.begin(), E = NewRegs.end(); I != E; ++I) + dbgs() << ' ' << **I; + dbgs() << '\n'); + + SolutionCost = NewCost; + Solution = Workspace; + } + Workspace.pop_back(); + } + skip:; + } + + // If none of the formulae had all of the required registers, relax the + // constraint so that we don't exclude all formulae. + if (!AnySatisfiedReqRegs) { + ReqRegs.clear(); + goto retry; + } +} + +void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const { + SmallVector<const Formula *, 8> Workspace; + Cost SolutionCost; + SolutionCost.Loose(); + Cost CurCost; + SmallPtrSet<const SCEV *, 16> CurRegs; + DenseSet<const SCEV *> VisitedRegs; + Workspace.reserve(Uses.size()); + + SolveRecurse(Solution, SolutionCost, Workspace, CurCost, + CurRegs, VisitedRegs); + + // Ok, we've now made all our decisions. + DEBUG(dbgs() << "\n" + "The chosen solution requires "; SolutionCost.print(dbgs()); + dbgs() << ":\n"; + for (size_t i = 0, e = Uses.size(); i != e; ++i) { + dbgs() << " "; + Uses[i].print(dbgs()); + dbgs() << "\n" + " "; + Solution[i]->print(dbgs()); + dbgs() << '\n'; + }); +} + +/// getImmediateDominator - A handy utility for the specific DominatorTree +/// query that we need here. +/// +static BasicBlock *getImmediateDominator(BasicBlock *BB, DominatorTree &DT) { + DomTreeNode *Node = DT.getNode(BB); + if (!Node) return 0; + Node = Node->getIDom(); + if (!Node) return 0; + return Node->getBlock(); +} + +Value *LSRInstance::Expand(const LSRFixup &LF, + const Formula &F, + BasicBlock::iterator IP, + Loop *L, Instruction *IVIncInsertPos, + SCEVExpander &Rewriter, + SmallVectorImpl<WeakVH> &DeadInsts, + ScalarEvolution &SE, DominatorTree &DT) const { + const LSRUse &LU = Uses[LF.LUIdx]; + + // Then, collect some instructions which we will remain dominated by when + // expanding the replacement. These must be dominated by any operands that + // will be required in the expansion. + SmallVector<Instruction *, 4> Inputs; + if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace)) + Inputs.push_back(I); + if (LU.Kind == LSRUse::ICmpZero) + if (Instruction *I = + dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1))) + Inputs.push_back(I); + if (LF.PostIncLoop && !L->contains(LF.UserInst)) + Inputs.push_back(L->getLoopLatch()->getTerminator()); + + // Then, climb up the immediate dominator tree as far as we can go while + // still being dominated by the input positions. + for (;;) { + bool AllDominate = true; + Instruction *BetterPos = 0; + BasicBlock *IDom = getImmediateDominator(IP->getParent(), DT); + if (!IDom) break; + Instruction *Tentative = IDom->getTerminator(); + for (SmallVectorImpl<Instruction *>::const_iterator I = Inputs.begin(), + E = Inputs.end(); I != E; ++I) { + Instruction *Inst = *I; + if (Inst == Tentative || !DT.dominates(Inst, Tentative)) { + AllDominate = false; break; } + if (IDom == Inst->getParent() && + (!BetterPos || DT.dominates(BetterPos, Inst))) + BetterPos = next(BasicBlock::iterator(Inst)); + } + if (!AllDominate) + break; + if (BetterPos) + IP = BetterPos; + else + IP = Tentative; } + while (isa<PHINode>(IP)) ++IP; + + // Inform the Rewriter if we have a post-increment use, so that it can + // perform an advantageous expansion. + Rewriter.setPostInc(LF.PostIncLoop); + + // This is the type that the user actually needs. + const Type *OpTy = LF.OperandValToReplace->getType(); + // This will be the type that we'll initially expand to. + const Type *Ty = F.getType(); + if (!Ty) + // No type known; just expand directly to the ultimate type. + Ty = OpTy; + else if (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(OpTy)) + // Expand directly to the ultimate type if it's the right size. + Ty = OpTy; + // This is the type to do integer arithmetic in. + const Type *IntTy = SE.getEffectiveSCEVType(Ty); + + // Build up a list of operands to add together to form the full base. + SmallVector<const SCEV *, 8> Ops; + + // Expand the BaseRegs portion. + for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(), + E = F.BaseRegs.end(); I != E; ++I) { + const SCEV *Reg = *I; + assert(!Reg->isZero() && "Zero allocated in a base register!"); + + // If we're expanding for a post-inc user for the add-rec's loop, make the + // post-inc adjustment. + const SCEV *Start = Reg; + while (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Start)) { + if (AR->getLoop() == LF.PostIncLoop) { + Reg = SE.getAddExpr(Reg, AR->getStepRecurrence(SE)); + // If the user is inside the loop, insert the code after the increment + // so that it is dominated by its operand. + if (L->contains(LF.UserInst)) + IP = IVIncInsertPos; + break; + } + Start = AR->getStart(); + } - if (!Found) - NewStride = SE->getIntegerSCEV(-SInt, Stride->getType()); - IU->AddUser(NewStride, CondUse->getOffset(), Cond, Cond->getOperand(0)); - IU->IVUsesByStride[Stride]->removeUser(CondUse); + Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP))); + } - CondUse = &IU->IVUsesByStride[NewStride]->Users.back(); - Stride = NewStride; + // Expand the ScaledReg portion. + Value *ICmpScaledV = 0; + if (F.AM.Scale != 0) { + const SCEV *ScaledS = F.ScaledReg; + + // If we're expanding for a post-inc user for the add-rec's loop, make the + // post-inc adjustment. + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ScaledS)) + if (AR->getLoop() == LF.PostIncLoop) + ScaledS = SE.getAddExpr(ScaledS, AR->getStepRecurrence(SE)); + + if (LU.Kind == LSRUse::ICmpZero) { + // An interesting way of "folding" with an icmp is to use a negated + // scale, which we'll implement by inserting it into the other operand + // of the icmp. + assert(F.AM.Scale == -1 && + "The only scale supported by ICmpZero uses is -1!"); + ICmpScaledV = Rewriter.expandCodeFor(ScaledS, 0, IP); + } else { + // Otherwise just expand the scaled register and an explicit scale, + // which is expected to be matched as part of the address. + ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP)); + ScaledS = SE.getMulExpr(ScaledS, + SE.getIntegerSCEV(F.AM.Scale, + ScaledS->getType())); + Ops.push_back(ScaledS); + } + } - ++NumCountZero; + // Expand the immediate portions. + if (F.AM.BaseGV) + Ops.push_back(SE.getSCEV(F.AM.BaseGV)); + int64_t Offset = (uint64_t)F.AM.BaseOffs + LF.Offset; + if (Offset != 0) { + if (LU.Kind == LSRUse::ICmpZero) { + // The other interesting way of "folding" with an ICmpZero is to use a + // negated immediate. + if (!ICmpScaledV) + ICmpScaledV = ConstantInt::get(IntTy, -Offset); + else { + Ops.push_back(SE.getUnknown(ICmpScaledV)); + ICmpScaledV = ConstantInt::get(IntTy, Offset); + } + } else { + // Just add the immediate values. These again are expected to be matched + // as part of the address. + Ops.push_back(SE.getIntegerSCEV(Offset, IntTy)); + } + } - return true; + // Emit instructions summing all the operands. + const SCEV *FullS = Ops.empty() ? + SE.getIntegerSCEV(0, IntTy) : + SE.getAddExpr(Ops); + Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP); + + // We're done expanding now, so reset the rewriter. + Rewriter.setPostInc(0); + + // An ICmpZero Formula represents an ICmp which we're handling as a + // comparison against zero. Now that we've expanded an expression for that + // form, update the ICmp's other operand. + if (LU.Kind == LSRUse::ICmpZero) { + ICmpInst *CI = cast<ICmpInst>(LF.UserInst); + DeadInsts.push_back(CI->getOperand(1)); + assert(!F.AM.BaseGV && "ICmp does not support folding a global value and " + "a scale at the same time!"); + if (F.AM.Scale == -1) { + if (ICmpScaledV->getType() != OpTy) { + Instruction *Cast = + CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false, + OpTy, false), + ICmpScaledV, OpTy, "tmp", CI); + ICmpScaledV = Cast; + } + CI->setOperand(1, ICmpScaledV); + } else { + assert(F.AM.Scale == 0 && + "ICmp does not support folding a global value and " + "a scale at the same time!"); + Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy), + -(uint64_t)Offset); + if (C->getType() != OpTy) + C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, + OpTy, false), + C, OpTy); + + CI->setOperand(1, C); + } + } + + return FullV; } -/// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding -/// when to exit the loop is used only for that purpose, try to rearrange things -/// so it counts down to a test against zero. -bool LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) { - bool ThisChanged = false; - for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { - const SCEV *Stride = IU->StrideOrder[i]; - std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = - IU->IVUsesByStride.find(Stride); - assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); - // FIXME: Generalize to non-affine IV's. - if (!SI->first->isLoopInvariant(L)) - continue; - // If stride is a constant and it has an icmpinst use, check if we can - // optimize the loop to count down. - if (isa<SCEVConstant>(Stride) && SI->second->Users.size() == 1) { - Instruction *User = SI->second->Users.begin()->getUser(); - if (!isa<ICmpInst>(User)) - continue; - const SCEV *CondStride = Stride; - IVStrideUse *Use = &*SI->second->Users.begin(); - if (!OptimizeLoopCountIVOfStride(CondStride, Use, L)) - continue; - ThisChanged = true; +/// Rewrite - Emit instructions for the leading candidate expression for this +/// LSRUse (this is called "expanding"), and update the UserInst to reference +/// the newly expanded value. +void LSRInstance::Rewrite(const LSRFixup &LF, + const Formula &F, + Loop *L, Instruction *IVIncInsertPos, + SCEVExpander &Rewriter, + SmallVectorImpl<WeakVH> &DeadInsts, + ScalarEvolution &SE, DominatorTree &DT, + Pass *P) const { + const Type *OpTy = LF.OperandValToReplace->getType(); + + // First, find an insertion point that dominates UserInst. For PHI nodes, + // find the nearest block which dominates all the relevant uses. + if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) { + DenseMap<BasicBlock *, Value *> Inserted; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == LF.OperandValToReplace) { + BasicBlock *BB = PN->getIncomingBlock(i); - // Now check if it's possible to reuse this iv for other stride uses. - for (unsigned j = 0, ee = IU->StrideOrder.size(); j != ee; ++j) { - const SCEV *SStride = IU->StrideOrder[j]; - if (SStride == CondStride) - continue; - std::map<const SCEV *, IVUsersOfOneStride *>::iterator SII = - IU->IVUsesByStride.find(SStride); - assert(SII != IU->IVUsesByStride.end() && "Stride doesn't exist!"); - // FIXME: Generalize to non-affine IV's. - if (!SII->first->isLoopInvariant(L)) - continue; - // FIXME: Rewrite other stride using CondStride. + // If this is a critical edge, split the edge so that we do not insert + // the code on all predecessor/successor paths. We do this unless this + // is the canonical backedge for this loop, which complicates post-inc + // users. + if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 && + !isa<IndirectBrInst>(BB->getTerminator()) && + (PN->getParent() != L->getHeader() || !L->contains(BB))) { + // Split the critical edge. + BasicBlock *NewBB = SplitCriticalEdge(BB, PN->getParent(), P); + + // If PN is outside of the loop and BB is in the loop, we want to + // move the block to be immediately before the PHI block, not + // immediately after BB. + if (L->contains(BB) && !L->contains(PN)) + NewBB->moveBefore(PN->getParent()); + + // Splitting the edge can reduce the number of PHI entries we have. + e = PN->getNumIncomingValues(); + BB = NewBB; + i = PN->getBasicBlockIndex(BB); + } + + std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair = + Inserted.insert(std::make_pair(BB, static_cast<Value *>(0))); + if (!Pair.second) + PN->setIncomingValue(i, Pair.first->second); + else { + Value *FullV = Expand(LF, F, BB->getTerminator(), L, IVIncInsertPos, + Rewriter, DeadInsts, SE, DT); + + // If this is reuse-by-noop-cast, insert the noop cast. + if (FullV->getType() != OpTy) + FullV = + CastInst::Create(CastInst::getCastOpcode(FullV, false, + OpTy, false), + FullV, LF.OperandValToReplace->getType(), + "tmp", BB->getTerminator()); + + PN->setIncomingValue(i, FullV); + Pair.first->second = FullV; + } } + } else { + Value *FullV = Expand(LF, F, LF.UserInst, L, IVIncInsertPos, + Rewriter, DeadInsts, SE, DT); + + // If this is reuse-by-noop-cast, insert the noop cast. + if (FullV->getType() != OpTy) { + Instruction *Cast = + CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false), + FullV, OpTy, "tmp", LF.UserInst); + FullV = Cast; } + + // Update the user. ICmpZero is handled specially here (for now) because + // Expand may have updated one of the operands of the icmp already, and + // its new value may happen to be equal to LF.OperandValToReplace, in + // which case doing replaceUsesOfWith leads to replacing both operands + // with the same value. TODO: Reorganize this. + if (Uses[LF.LUIdx].Kind == LSRUse::ICmpZero) + LF.UserInst->setOperand(0, FullV); + else + LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV); } - Changed |= ThisChanged; - return ThisChanged; + DeadInsts.push_back(LF.OperandValToReplace); } -bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { - IU = &getAnalysis<IVUsers>(); - SE = &getAnalysis<ScalarEvolution>(); - Changed = false; +void +LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution, + Pass *P) { + // Keep track of instructions we may have made dead, so that + // we can remove them after we are done working. + SmallVector<WeakVH, 16> DeadInsts; + + SCEVExpander Rewriter(SE); + Rewriter.disableCanonicalMode(); + Rewriter.setIVIncInsertPos(L, IVIncInsertPos); - // If LoopSimplify form is not available, stay out of trouble. - if (!L->getLoopPreheader() || !L->getLoopLatch()) - return false; + // Expand the new value definitions and update the users. + for (size_t i = 0, e = Fixups.size(); i != e; ++i) { + size_t LUIdx = Fixups[i].LUIdx; + + Rewrite(Fixups[i], *Solution[LUIdx], L, IVIncInsertPos, Rewriter, + DeadInsts, SE, DT, P); + + Changed = true; + } - if (!IU->IVUsesByStride.empty()) { - DEBUG(dbgs() << "\nLSR on \"" << L->getHeader()->getParent()->getName() - << "\" "; - L->print(dbgs())); + // Clean up after ourselves. This must be done before deleting any + // instructions. + Rewriter.clear(); - // Sort the StrideOrder so we process larger strides first. - std::stable_sort(IU->StrideOrder.begin(), IU->StrideOrder.end(), - StrideCompare(SE)); + Changed |= DeleteTriviallyDeadInstructions(DeadInsts); +} - // Optimize induction variables. Some indvar uses can be transformed to use - // strides that will be needed for other purposes. A common example of this - // is the exit test for the loop, which can often be rewritten to use the - // computation of some other indvar to decide when to terminate the loop. - OptimizeIndvars(L); +LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P) + : IU(P->getAnalysis<IVUsers>()), + SE(P->getAnalysis<ScalarEvolution>()), + DT(P->getAnalysis<DominatorTree>()), + TLI(tli), L(l), Changed(false), IVIncInsertPos(0) { - // Change loop terminating condition to use the postinc iv when possible - // and optimize loop terminating compare. FIXME: Move this after - // StrengthReduceIVUsersOfStride? - OptimizeLoopTermCond(L); + // If LoopSimplify form is not available, stay out of trouble. + if (!L->isLoopSimplifyForm()) return; + + // If there's no interesting work to be done, bail early. + if (IU.empty()) return; + + DEBUG(dbgs() << "\nLSR on loop "; + WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false); + dbgs() << ":\n"); + + /// OptimizeShadowIV - If IV is used in a int-to-float cast + /// inside the loop then try to eliminate the cast opeation. + OptimizeShadowIV(); + + // Change loop terminating condition to use the postinc iv when possible. + Changed |= OptimizeLoopTermCond(); + + CollectInterestingTypesAndFactors(); + CollectFixupsAndInitialFormulae(); + CollectLoopInvariantFixupsAndFormulae(); + + DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n"; + print_uses(dbgs())); + + // Now use the reuse data to generate a bunch of interesting ways + // to formulate the values needed for the uses. + GenerateAllReuseFormulae(); + + DEBUG(dbgs() << "\n" + "After generating reuse formulae:\n"; + print_uses(dbgs())); + + FilterOutUndesirableDedicatedRegisters(); + NarrowSearchSpaceUsingHeuristics(); + + SmallVector<const Formula *, 8> Solution; + Solve(Solution); + assert(Solution.size() == Uses.size() && "Malformed solution!"); + + // Release memory that is no longer needed. + Factors.clear(); + Types.clear(); + RegUses.clear(); + +#ifndef NDEBUG + // Formulae should be legal. + for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), + E = Uses.end(); I != E; ++I) { + const LSRUse &LU = *I; + for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(), + JE = LU.Formulae.end(); J != JE; ++J) + assert(isLegalUse(J->AM, LU.MinOffset, LU.MaxOffset, + LU.Kind, LU.AccessTy, TLI) && + "Illegal formula generated!"); + }; +#endif - // FIXME: We can shrink overlarge IV's here. e.g. if the code has - // computation in i64 values and the target doesn't support i64, demote - // the computation to 32-bit if safe. + // Now that we've decided what we want, make it so. + ImplementSolution(Solution, P); +} - // FIXME: Attempt to reuse values across multiple IV's. In particular, we - // could have something like "for(i) { foo(i*8); bar(i*16) }", which should - // be codegened as "for (j = 0;; j+=8) { foo(j); bar(j+j); }" on X86/PPC. - // Need to be careful that IV's are all the same type. Only works for - // intptr_t indvars. +void LSRInstance::print_factors_and_types(raw_ostream &OS) const { + if (Factors.empty() && Types.empty()) return; - // IVsByStride keeps IVs for one particular loop. - assert(IVsByStride.empty() && "Stale entries in IVsByStride?"); + OS << "LSR has identified the following interesting factors and types: "; + bool First = true; - StrengthReduceIVUsers(L); + for (SmallSetVector<int64_t, 8>::const_iterator + I = Factors.begin(), E = Factors.end(); I != E; ++I) { + if (!First) OS << ", "; + First = false; + OS << '*' << *I; + } - // After all sharing is done, see if we can adjust the loop to test against - // zero instead of counting up to a maximum. This is usually faster. - OptimizeLoopCountIV(L); + for (SmallSetVector<const Type *, 4>::const_iterator + I = Types.begin(), E = Types.end(); I != E; ++I) { + if (!First) OS << ", "; + First = false; + OS << '(' << **I << ')'; + } + OS << '\n'; +} - // We're done analyzing this loop; release all the state we built up for it. - IVsByStride.clear(); +void LSRInstance::print_fixups(raw_ostream &OS) const { + OS << "LSR is examining the following fixup sites:\n"; + for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(), + E = Fixups.end(); I != E; ++I) { + const LSRFixup &LF = *I; + dbgs() << " "; + LF.print(OS); + OS << '\n'; + } +} - // Clean up after ourselves - DeleteTriviallyDeadInstructions(); +void LSRInstance::print_uses(raw_ostream &OS) const { + OS << "LSR is examining the following uses:\n"; + for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), + E = Uses.end(); I != E; ++I) { + const LSRUse &LU = *I; + dbgs() << " "; + LU.print(OS); + OS << '\n'; + for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(), + JE = LU.Formulae.end(); J != JE; ++J) { + OS << " "; + J->print(OS); + OS << '\n'; + } } +} + +void LSRInstance::print(raw_ostream &OS) const { + print_factors_and_types(OS); + print_fixups(OS); + print_uses(OS); +} + +void LSRInstance::dump() const { + print(errs()); errs() << '\n'; +} + +namespace { + +class LoopStrengthReduce : public LoopPass { + /// TLI - Keep a pointer of a TargetLowering to consult for determining + /// transformation profitability. + const TargetLowering *const TLI; + +public: + static char ID; // Pass ID, replacement for typeid + explicit LoopStrengthReduce(const TargetLowering *tli = 0); + +private: + bool runOnLoop(Loop *L, LPPassManager &LPM); + void getAnalysisUsage(AnalysisUsage &AU) const; +}; + +} + +char LoopStrengthReduce::ID = 0; +static RegisterPass<LoopStrengthReduce> +X("loop-reduce", "Loop Strength Reduction"); + +Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) { + return new LoopStrengthReduce(TLI); +} + +LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli) + : LoopPass(&ID), TLI(tli) {} + +void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const { + // We split critical edges, so we change the CFG. However, we do update + // many analyses if they are around. + AU.addPreservedID(LoopSimplifyID); + AU.addPreserved<LoopInfo>(); + AU.addPreserved("domfrontier"); + + AU.addRequiredID(LoopSimplifyID); + AU.addRequired<DominatorTree>(); + AU.addPreserved<DominatorTree>(); + AU.addRequired<ScalarEvolution>(); + AU.addPreserved<ScalarEvolution>(); + AU.addRequired<IVUsers>(); + AU.addPreserved<IVUsers>(); +} + +bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) { + bool Changed = false; + + // Run the main LSR transformation. + Changed |= LSRInstance(TLI, L, this).getChanged(); // At this point, it is worth checking to see if any recurrence PHIs are also // dead, so that we can remove them as well. diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index ee8cb4f..a355ec3 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -76,11 +76,12 @@ static RegisterPass<LoopUnroll> X("loop-unroll", "Unroll loops"); Pass *llvm::createLoopUnrollPass() { return new LoopUnroll(); } /// ApproximateLoopSize - Approximate the size of the loop. -static unsigned ApproximateLoopSize(const Loop *L) { +static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls) { CodeMetrics Metrics; for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) Metrics.analyzeBasicBlock(*I); + NumCalls = Metrics.NumCalls; return Metrics.NumInsts; } @@ -110,8 +111,13 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { // Enforce the threshold. if (UnrollThreshold != NoThreshold) { - unsigned LoopSize = ApproximateLoopSize(L); + unsigned NumCalls; + unsigned LoopSize = ApproximateLoopSize(L, NumCalls); DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n"); + if (NumCalls != 0) { + DEBUG(dbgs() << " Not unrolling loop with function calls.\n"); + return false; + } uint64_t Size = (uint64_t)LoopSize*Count; if (TripCount != 1 && Size > UnrollThreshold) { DEBUG(dbgs() << " Too large to fully unroll with count: " << Count diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 527a7b5..990e0c4 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -169,6 +169,10 @@ Pass *llvm::createLoopUnswitchPass(bool Os) { /// invariant in the loop, or has an invariant piece, return the invariant. /// Otherwise, return null. static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) { + // We can never unswitch on vector conditions. + if (isa<VectorType>(Cond->getType())) + return 0; + // Constants should be folded, not unswitched on! if (isa<Constant>(Cond)) return 0; @@ -401,7 +405,7 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val, /// UnswitchIfProfitable - We have found that we can unswitch currentLoop when /// LoopCond == Val to simplify the loop. If we decide that this is profitable, /// unswitch the loop, reprocess the pieces, then return true. -bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val){ +bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) { initLoopData(); @@ -867,7 +871,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, // If we know that LIC == Val, or that LIC == NotVal, just replace uses of LIC // in the loop with the appropriate one directly. if (IsEqual || (isa<ConstantInt>(Val) && - Val->getType()->isInteger(1))) { + Val->getType()->isIntegerTy(1))) { Value *Replacement; if (IsEqual) Replacement = Val; @@ -993,10 +997,10 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { case Instruction::And: if (isa<ConstantInt>(I->getOperand(0)) && // constant -> RHS - I->getOperand(0)->getType()->isInteger(1)) + I->getOperand(0)->getType()->isIntegerTy(1)) cast<BinaryOperator>(I)->swapOperands(); if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(1))) - if (CB->getType()->isInteger(1)) { + if (CB->getType()->isIntegerTy(1)) { if (CB->isOne()) // X & 1 -> X ReplaceUsesOfWith(I, I->getOperand(0), Worklist, L, LPM); else // X & 0 -> 0 @@ -1007,10 +1011,10 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { case Instruction::Or: if (isa<ConstantInt>(I->getOperand(0)) && // constant -> RHS - I->getOperand(0)->getType()->isInteger(1)) + I->getOperand(0)->getType()->isIntegerTy(1)) cast<BinaryOperator>(I)->swapOperands(); if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(1))) - if (CB->getType()->isInteger(1)) { + if (CB->getType()->isIntegerTy(1)) { if (CB->isOne()) // X | 1 -> 1 ReplaceUsesOfWith(I, I->getOperand(1), Worklist, L, LPM); else // X | 0 -> X diff --git a/lib/Transforms/Scalar/Makefile b/lib/Transforms/Scalar/Makefile index e18f30f..cc42fd0 100644 --- a/lib/Transforms/Scalar/Makefile +++ b/lib/Transforms/Scalar/Makefile @@ -10,7 +10,6 @@ LEVEL = ../../.. LIBRARYNAME = LLVMScalarOpts BUILD_ARCHIVE = 1 -CXXFLAGS = -fno-rtti include $(LEVEL)/Makefile.common diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index e0aa491..62e2977 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -42,7 +42,7 @@ static Value *isBytewiseValue(Value *V) { LLVMContext &Context = V->getContext(); // All byte-wide stores are splatable, even of arbitrary variables. - if (V->getType()->isInteger(8)) return V; + if (V->getType()->isIntegerTy(8)) return V; // Constant float and double values can be handled as integer values if the // corresponding integer value is "byteable". An important case is 0.0. diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index 4a99f4a..187216a 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -182,7 +182,7 @@ unsigned Reassociate::getRank(Value *V) { // If this is a not or neg instruction, do not count it for rank. This // assures us that X and ~X will have the same rank. - if (!I->getType()->isInteger() || + if (!I->getType()->isIntegerTy() || (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I))) ++Rank; @@ -249,7 +249,7 @@ void Reassociate::LinearizeExpr(BinaryOperator *I) { /// LinearizeExprTree - Given an associative binary expression tree, traverse /// all of the uses putting it into canonical form. This forces a left-linear -/// form of the the expression (((a+b)+c)+d), and collects information about the +/// form of the expression (((a+b)+c)+d), and collects information about the /// rank of the non-tree operands. /// /// NOTE: These intentionally destroys the expression tree operands (turning @@ -299,7 +299,7 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I, Success = false; MadeChange = true; } else if (RHSBO) { - // Turn (A+B)+(C+D) -> (((A+B)+C)+D). This guarantees the the RHS is not + // Turn (A+B)+(C+D) -> (((A+B)+C)+D). This guarantees the RHS is not // part of the expression tree. LinearizeExpr(I); LHS = LHSBO = cast<BinaryOperator>(I->getOperand(0)); @@ -929,10 +929,19 @@ void Reassociate::ReassociateBB(BasicBlock *BB) { } // Reject cases where it is pointless to do this. - if (!isa<BinaryOperator>(BI) || BI->getType()->isFloatingPoint() || + if (!isa<BinaryOperator>(BI) || BI->getType()->isFloatingPointTy() || isa<VectorType>(BI->getType())) continue; // Floating point ops are not associative. + // Do not reassociate boolean (i1) expressions. We want to preserve the + // original order of evaluation for short-circuited comparisons that + // SimplifyCFG has folded to AND/OR expressions. If the expression + // is not further optimized, it is likely to be transformed back to a + // short-circuited form for code gen, and the source order may have been + // optimized for the most likely conditions. + if (BI->getType()->isIntegerTy(1)) + continue; + // If this is a subtract instruction which is not already in negate form, // see if we can convert it to X+-Y. if (BI->getOpcode() == Instruction::Sub) { diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index f473480..822712e 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -202,12 +202,18 @@ bool SROA::performPromotion(Function &F) { return Changed; } -/// getNumSAElements - Return the number of elements in the specific struct or -/// array. -static uint64_t getNumSAElements(const Type *T) { +/// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for +/// SROA. It must be a struct or array type with a small number of elements. +static bool ShouldAttemptScalarRepl(AllocaInst *AI) { + const Type *T = AI->getAllocatedType(); + // Do not promote any struct into more than 32 separate vars. if (const StructType *ST = dyn_cast<StructType>(T)) - return ST->getNumElements(); - return cast<ArrayType>(T)->getNumElements(); + return ST->getNumElements() <= 32; + // Arrays are much less likely to be safe for SROA; only consider + // them if they are very small. + if (const ArrayType *AT = dyn_cast<ArrayType>(T)) + return AT->getNumElements() <= 8; + return false; } // performScalarRepl - This algorithm is a simple worklist driven algorithm, @@ -266,22 +272,18 @@ bool SROA::performScalarRepl(Function &F) { // Do not promote [0 x %struct]. if (AllocaSize == 0) continue; + // If the alloca looks like a good candidate for scalar replacement, and if + // all its users can be transformed, then split up the aggregate into its + // separate elements. + if (ShouldAttemptScalarRepl(AI) && isSafeAllocaToScalarRepl(AI)) { + DoScalarReplacement(AI, WorkList); + Changed = true; + continue; + } + // Do not promote any struct whose size is too big. if (AllocaSize > SRThreshold) continue; - if ((isa<StructType>(AI->getAllocatedType()) || - isa<ArrayType>(AI->getAllocatedType())) && - // Do not promote any struct into more than "32" separate vars. - getNumSAElements(AI->getAllocatedType()) <= SRThreshold/4) { - // Check that all of the users of the allocation are capable of being - // transformed. - if (isSafeAllocaToScalarRepl(AI)) { - DoScalarReplacement(AI, WorkList); - Changed = true; - continue; - } - } - // If we can turn this aggregate value (potentially with casts) into a // simple scalar value that can be mem2reg'd into a register value. // IsNotTrivial tracks whether this is something that mem2reg could have @@ -681,7 +683,7 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, Val->takeName(GEPI); } if (Val->getType() != GEPI->getType()) - Val = new BitCastInst(Val, GEPI->getType(), Val->getNameStr(), GEPI); + Val = new BitCastInst(Val, GEPI->getType(), Val->getName(), GEPI); GEPI->replaceAllUsesWith(Val); DeadInsts.push_back(GEPI); } @@ -769,7 +771,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, Value *Idx[2] = { Zero, ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) }; OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, Idx + 2, - OtherPtr->getNameStr()+"."+Twine(i), + OtherPtr->getName()+"."+Twine(i), MI); uint64_t EltOffset; const PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType()); @@ -833,7 +835,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, StoreVal = ConstantInt::get(Context, TotalVal); if (isa<PointerType>(ValTy)) StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy); - else if (ValTy->isFloatingPoint()) + else if (ValTy->isFloatingPointTy()) StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy); assert(StoreVal->getType() == ValTy && "Type mismatch!"); @@ -853,12 +855,11 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, // Cast the element pointer to BytePtrTy. if (EltPtr->getType() != BytePtrTy) - EltPtr = new BitCastInst(EltPtr, BytePtrTy, EltPtr->getNameStr(), MI); + EltPtr = new BitCastInst(EltPtr, BytePtrTy, EltPtr->getName(), MI); // Cast the other pointer (if we have one) to BytePtrTy. if (OtherElt && OtherElt->getType() != BytePtrTy) - OtherElt = new BitCastInst(OtherElt, BytePtrTy,OtherElt->getNameStr(), - MI); + OtherElt = new BitCastInst(OtherElt, BytePtrTy, OtherElt->getName(), MI); unsigned EltSize = TD->getTypeAllocSize(EltTy); @@ -938,7 +939,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, Value *DestField = NewElts[i]; if (EltVal->getType() == FieldTy) { // Storing to an integer field of this size, just do it. - } else if (FieldTy->isFloatingPoint() || isa<VectorType>(FieldTy)) { + } else if (FieldTy->isFloatingPointTy() || isa<VectorType>(FieldTy)) { // Bitcast to the right element type (for fp/vector values). EltVal = new BitCastInst(EltVal, FieldTy, "", SI); } else { @@ -982,7 +983,8 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, Value *DestField = NewElts[i]; if (EltVal->getType() == ArrayEltTy) { // Storing to an integer field of this size, just do it. - } else if (ArrayEltTy->isFloatingPoint() || isa<VectorType>(ArrayEltTy)) { + } else if (ArrayEltTy->isFloatingPointTy() || + isa<VectorType>(ArrayEltTy)) { // Bitcast to the right element type (for fp/vector values). EltVal = new BitCastInst(EltVal, ArrayEltTy, "", SI); } else { @@ -1042,7 +1044,7 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(), FieldSizeBits); - if (!isa<IntegerType>(FieldTy) && !FieldTy->isFloatingPoint() && + if (!isa<IntegerType>(FieldTy) && !FieldTy->isFloatingPointTy() && !isa<VectorType>(FieldTy)) SrcField = new BitCastInst(SrcField, PointerType::getUnqual(FieldIntTy), @@ -1384,9 +1386,9 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) { // If the source and destination are both to the same alloca, then this is // a noop copy-to-self, just delete it. Otherwise, emit a load and store // as appropriate. - AllocaInst *OrigAI = cast<AllocaInst>(Ptr->getUnderlyingObject()); + AllocaInst *OrigAI = cast<AllocaInst>(Ptr->getUnderlyingObject(0)); - if (MTI->getSource()->getUnderlyingObject() != OrigAI) { + if (MTI->getSource()->getUnderlyingObject(0) != OrigAI) { // Dest must be OrigAI, change this to be a load from the original // pointer (bitcasted), then a store to our new alloca. assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?"); @@ -1396,7 +1398,7 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) { LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval"); SrcVal->setAlignment(MTI->getAlignment()); Builder.CreateStore(SrcVal, NewAI); - } else if (MTI->getDest()->getUnderlyingObject() != OrigAI) { + } else if (MTI->getDest()->getUnderlyingObject(0) != OrigAI) { // Src must be OrigAI, change this to be a load from NewAI then a store // through the original dest pointer (bitcasted). assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?"); @@ -1521,7 +1523,7 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, // If the result is an integer, this is a trunc or bitcast. if (isa<IntegerType>(ToType)) { // Should be done. - } else if (ToType->isFloatingPoint() || isa<VectorType>(ToType)) { + } else if (ToType->isFloatingPointTy() || isa<VectorType>(ToType)) { // Just do a bitcast, we know the sizes match up. FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp"); } else { @@ -1599,7 +1601,7 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old, unsigned DestWidth = TD->getTypeSizeInBits(AllocaType); unsigned SrcStoreWidth = TD->getTypeStoreSizeInBits(SV->getType()); unsigned DestStoreWidth = TD->getTypeStoreSizeInBits(AllocaType); - if (SV->getType()->isFloatingPoint() || isa<VectorType>(SV->getType())) + if (SV->getType()->isFloatingPointTy() || isa<VectorType>(SV->getType())) SV = Builder.CreateBitCast(SV, IntegerType::get(SV->getContext(),SrcWidth), "tmp"); else if (isa<PointerType>(SV->getType())) diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 43447de..62f34a2 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -30,6 +30,7 @@ #include "llvm/Attributes.h" #include "llvm/Support/CFG.h" #include "llvm/Pass.h" +#include "llvm/Target/TargetData.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" @@ -261,7 +262,7 @@ static bool MergeEmptyReturnBlocks(Function &F) { /// IterativeSimplifyCFG - Call SimplifyCFG on all the blocks in the function, /// iterating until no more changes are made. -static bool IterativeSimplifyCFG(Function &F) { +static bool IterativeSimplifyCFG(Function &F, const TargetData *TD) { bool Changed = false; bool LocalChange = true; while (LocalChange) { @@ -271,7 +272,7 @@ static bool IterativeSimplifyCFG(Function &F) { // if they are unneeded... // for (Function::iterator BBIt = ++F.begin(); BBIt != F.end(); ) { - if (SimplifyCFG(BBIt++)) { + if (SimplifyCFG(BBIt++, TD)) { LocalChange = true; ++NumSimpl; } @@ -285,10 +286,11 @@ static bool IterativeSimplifyCFG(Function &F) { // simplify the CFG. // bool CFGSimplifyPass::runOnFunction(Function &F) { + const TargetData *TD = getAnalysisIfAvailable<TargetData>(); bool EverChanged = RemoveUnreachableBlocksFromFn(F); EverChanged |= MergeEmptyReturnBlocks(F); - EverChanged |= IterativeSimplifyCFG(F); - + EverChanged |= IterativeSimplifyCFG(F, TD); + // If neither pass changed anything, we're done. if (!EverChanged) return false; @@ -299,11 +301,11 @@ bool CFGSimplifyPass::runOnFunction(Function &F) { // RemoveUnreachableBlocksFromFn doesn't do anything. if (!RemoveUnreachableBlocksFromFn(F)) return true; - + do { - EverChanged = IterativeSimplifyCFG(F); + EverChanged = IterativeSimplifyCFG(F, TD); EverChanged |= RemoveUnreachableBlocksFromFn(F); } while (EverChanged); - + return true; } diff --git a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp index 5acd6aa..4464961 100644 --- a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp @@ -68,7 +68,7 @@ InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs, Function *Callee = Call->getCalledFunction(); // Minimally sanity-check the CFG of half_powr to ensure that it contains - // the the kind of code we expect. If we're running this pass, we have + // the kind of code we expect. If we're running this pass, we have // reason to believe it will be what we expect. Function::iterator I = Callee->begin(); BasicBlock *Prologue = I++; diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index a49da9c..54b4380 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -152,7 +152,7 @@ Value *LibCallOptimization::EmitStrLen(Value *Ptr, IRBuilder<> &B) { Constant *StrLen =M->getOrInsertFunction("strlen", AttrListPtr::get(AWI, 2), TD->getIntPtrType(*Context), - Type::getInt8PtrTy(*Context), + Type::getInt8PtrTy(*Context), NULL); CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen"); if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts())) @@ -232,10 +232,10 @@ Value *LibCallOptimization::EmitMemChr(Value *Ptr, Value *Val, AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind); Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(&AWI, 1), - Type::getInt8PtrTy(*Context), - Type::getInt8PtrTy(*Context), + Type::getInt8PtrTy(*Context), + Type::getInt8PtrTy(*Context), Type::getInt32Ty(*Context), - TD->getIntPtrType(*Context), + TD->getIntPtrType(*Context), NULL); CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr"); @@ -321,9 +321,9 @@ Value *LibCallOptimization::EmitPutChar(Value *Char, IRBuilder<> &B) { Type::getInt32Ty(*Context), NULL); CallInst *CI = B.CreateCall(PutChar, B.CreateIntCast(Char, - Type::getInt32Ty(*Context), - /*isSigned*/true, - "chari"), + Type::getInt32Ty(*Context), + /*isSigned*/true, + "chari"), "putchar"); if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts())) @@ -341,7 +341,7 @@ void LibCallOptimization::EmitPutS(Value *Str, IRBuilder<> &B) { Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2), Type::getInt32Ty(*Context), - Type::getInt8PtrTy(*Context), + Type::getInt8PtrTy(*Context), NULL); CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts"); if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts())) @@ -359,13 +359,13 @@ void LibCallOptimization::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B) { Constant *F; if (isa<PointerType>(File->getType())) F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2), - Type::getInt32Ty(*Context), + Type::getInt32Ty(*Context), Type::getInt32Ty(*Context), File->getType(), - NULL); + NULL); else F = M->getOrInsertFunction("fputc", - Type::getInt32Ty(*Context), - Type::getInt32Ty(*Context), + Type::getInt32Ty(*Context), + Type::getInt32Ty(*Context), File->getType(), NULL); Char = B.CreateIntCast(Char, Type::getInt32Ty(*Context), /*isSigned*/true, "chari"); @@ -386,7 +386,7 @@ void LibCallOptimization::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B) { Constant *F; if (isa<PointerType>(File->getType())) F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3), - Type::getInt32Ty(*Context), + Type::getInt32Ty(*Context), Type::getInt8PtrTy(*Context), File->getType(), NULL); else @@ -414,13 +414,13 @@ void LibCallOptimization::EmitFWrite(Value *Ptr, Value *Size, Value *File, TD->getIntPtrType(*Context), Type::getInt8PtrTy(*Context), TD->getIntPtrType(*Context), - TD->getIntPtrType(*Context), + TD->getIntPtrType(*Context), File->getType(), NULL); else F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(*Context), Type::getInt8PtrTy(*Context), TD->getIntPtrType(*Context), - TD->getIntPtrType(*Context), + TD->getIntPtrType(*Context), File->getType(), NULL); CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size, ConstantInt::get(TD->getIntPtrType(*Context), 1), File); @@ -525,7 +525,7 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) { // Must be a Constant Array ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit); - if (!Array || !Array->getType()->getElementType()->isInteger(8)) + if (!Array || !Array->getType()->getElementType()->isIntegerTy(8)) return false; // Get the number of elements in the array @@ -697,7 +697,7 @@ struct StrChrOpt : public LibCallOptimization { if (!TD) return 0; uint64_t Len = GetStringLength(SrcStr); - if (Len == 0 || !FT->getParamType(1)->isInteger(32)) // memchr needs i32. + if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32. return 0; return EmitMemChr(SrcStr, CI->getOperand(2), // include nul. @@ -739,7 +739,7 @@ struct StrCmpOpt : public LibCallOptimization { // Verify the "strcmp" function prototype. const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || - !FT->getReturnType()->isInteger(32) || + !FT->getReturnType()->isIntegerTy(32) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != Type::getInt8PtrTy(*Context)) return 0; @@ -787,7 +787,7 @@ struct StrNCmpOpt : public LibCallOptimization { // Verify the "strncmp" function prototype. const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || - !FT->getReturnType()->isInteger(32) || + !FT->getReturnType()->isIntegerTy(32) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != Type::getInt8PtrTy(*Context) || !isa<IntegerType>(FT->getParamType(2))) @@ -1008,7 +1008,7 @@ struct MemCmpOpt : public LibCallOptimization { const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || !isa<PointerType>(FT->getParamType(0)) || !isa<PointerType>(FT->getParamType(1)) || - !FT->getReturnType()->isInteger(32)) + !FT->getReturnType()->isIntegerTy(32)) return 0; Value *LHS = CI->getOperand(1), *RHS = CI->getOperand(2); @@ -1203,22 +1203,23 @@ struct MemMoveChkOpt : public LibCallOptimization { struct StrCpyChkOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // These optimizations require TargetData. - if (!TD) return 0; - const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || !isa<PointerType>(FT->getParamType(0)) || - !isa<PointerType>(FT->getParamType(1)) || - !isa<IntegerType>(FT->getParamType(2))) + !isa<PointerType>(FT->getParamType(1))) return 0; ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(3)); if (!SizeCI) return 0; - // We don't have any length information, just lower to a plain strcpy. - if (SizeCI->isAllOnesValue()) + // If a) we don't have any length information, or b) we know this will + // fit then just lower to a plain strcpy. Otherwise we'll keep our + // strcpy_chk call which may fail at runtime if the size is too long. + // TODO: It might be nice to get a maximum length out of the possible + // string lengths for varying. + if (SizeCI->isAllOnesValue() || + SizeCI->getZExtValue() >= GetStringLength(CI->getOperand(2))) return EmitStrCpy(CI->getOperand(1), CI->getOperand(2), B); return 0; @@ -1240,7 +1241,7 @@ struct PowOpt : public LibCallOptimization { // result type. if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != FT->getParamType(1) || - !FT->getParamType(0)->isFloatingPoint()) + !FT->getParamType(0)->isFloatingPointTy()) return 0; Value *Op1 = CI->getOperand(1), *Op2 = CI->getOperand(2); @@ -1294,7 +1295,7 @@ struct Exp2Opt : public LibCallOptimization { // Just make sure this has 1 argument of FP type, which matches the // result type. if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || - !FT->getParamType(0)->isFloatingPoint()) + !FT->getParamType(0)->isFloatingPointTy()) return 0; Value *Op = CI->getOperand(1); @@ -1327,7 +1328,7 @@ struct Exp2Opt : public LibCallOptimization { Module *M = Caller->getParent(); Value *Callee = M->getOrInsertFunction(Name, Op->getType(), Op->getType(), - Type::getInt32Ty(*Context),NULL); + Type::getInt32Ty(*Context),NULL); CallInst *CI = B.CreateCall2(Callee, One, LdExpArg); if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); @@ -1374,7 +1375,7 @@ struct FFSOpt : public LibCallOptimization { // Just make sure this has 2 arguments of the same FP type, which match the // result type. if (FT->getNumParams() != 1 || - !FT->getReturnType()->isInteger(32) || + !FT->getReturnType()->isIntegerTy(32) || !isa<IntegerType>(FT->getParamType(0))) return 0; @@ -1410,7 +1411,7 @@ struct IsDigitOpt : public LibCallOptimization { const FunctionType *FT = Callee->getFunctionType(); // We require integer(i32) if (FT->getNumParams() != 1 || !isa<IntegerType>(FT->getReturnType()) || - !FT->getParamType(0)->isInteger(32)) + !FT->getParamType(0)->isIntegerTy(32)) return 0; // isdigit(c) -> (c-'0') <u 10 @@ -1431,7 +1432,7 @@ struct IsAsciiOpt : public LibCallOptimization { const FunctionType *FT = Callee->getFunctionType(); // We require integer(i32) if (FT->getNumParams() != 1 || !isa<IntegerType>(FT->getReturnType()) || - !FT->getParamType(0)->isInteger(32)) + !FT->getParamType(0)->isIntegerTy(32)) return 0; // isascii(c) -> c <u 128 @@ -1472,7 +1473,7 @@ struct ToAsciiOpt : public LibCallOptimization { const FunctionType *FT = Callee->getFunctionType(); // We require i32(i32) if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || - !FT->getParamType(0)->isInteger(32)) + !FT->getParamType(0)->isIntegerTy(32)) return 0; // isascii(c) -> c & 0x7f diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index 4119cb9..162d902 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -211,7 +211,8 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) { // FIXME: Writes to memory only matter if they may alias the pointer // being loaded from. if (CI->mayWriteToMemory() || - !isSafeToLoadUnconditionally(L->getPointerOperand(), L)) + !isSafeToLoadUnconditionally(L->getPointerOperand(), L, + L->getAlignment())) return false; } } diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp index 19c7206..3657390 100644 --- a/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -179,7 +179,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, // Create a new basic block, linking it into the CFG. BasicBlock *NewBB = BasicBlock::Create(TI->getContext(), TIBB->getName() + "." + DestBB->getName() + "_crit_edge"); - // Create our unconditional branch... + // Create our unconditional branch. BranchInst::Create(DestBB, NewBB); // Branch to the new block, breaking the edge. @@ -192,16 +192,47 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, // If there are any PHI nodes in DestBB, we need to update them so that they // merge incoming values from NewBB instead of from TIBB. - // - for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) { - PHINode *PN = cast<PHINode>(I); - // We no longer enter through TIBB, now we come in through NewBB. Revector - // exactly one entry in the PHI node that used to come from TIBB to come - // from NewBB. - int BBIdx = PN->getBasicBlockIndex(TIBB); - PN->setIncomingBlock(BBIdx, NewBB); + if (PHINode *APHI = dyn_cast<PHINode>(DestBB->begin())) { + // This conceptually does: + // foreach (PHINode *PN in DestBB) + // PN->setIncomingBlock(PN->getIncomingBlock(TIBB), NewBB); + // but is optimized for two cases. + + if (APHI->getNumIncomingValues() <= 8) { // Small # preds case. + unsigned BBIdx = 0; + for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) { + // We no longer enter through TIBB, now we come in through NewBB. + // Revector exactly one entry in the PHI node that used to come from + // TIBB to come from NewBB. + PHINode *PN = cast<PHINode>(I); + + // Reuse the previous value of BBIdx if it lines up. In cases where we + // have multiple phi nodes with *lots* of predecessors, this is a speed + // win because we don't have to scan the PHI looking for TIBB. This + // happens because the BB list of PHI nodes are usually in the same + // order. + if (PN->getIncomingBlock(BBIdx) != TIBB) + BBIdx = PN->getBasicBlockIndex(TIBB); + PN->setIncomingBlock(BBIdx, NewBB); + } + } else { + // However, the foreach loop is slow for blocks with lots of predecessors + // because PHINode::getIncomingBlock is O(n) in # preds. Instead, walk + // the user list of TIBB to find the PHI nodes. + SmallPtrSet<PHINode*, 16> UpdatedPHIs; + + for (Value::use_iterator UI = TIBB->use_begin(), E = TIBB->use_end(); + UI != E; ) { + Value::use_iterator Use = UI++; + if (PHINode *PN = dyn_cast<PHINode>(Use)) { + // Remove one entry from each PHI. + if (PN->getParent() == DestBB && UpdatedPHIs.insert(PN)) + PN->setOperand(Use.getOperandNo(), NewBB); + } + } + } } - + // If there are any other edges from TIBB to DestBB, update those to go // through the split block, making those edges non-critical as well (and // reducing the number of phi entries in the DestBB if relevant). @@ -221,6 +252,15 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, // If we don't have a pass object, we can't update anything... if (P == 0) return NewBB; + + DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>(); + DominanceFrontier *DF = P->getAnalysisIfAvailable<DominanceFrontier>(); + LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>(); + ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>(); + + // If we have nothing to update, just return. + if (DT == 0 && DF == 0 && LI == 0 && PI == 0) + return NewBB; // Now update analysis information. Since the only predecessor of NewBB is // the TIBB, TIBB clearly dominates NewBB. TIBB usually doesn't dominate @@ -229,14 +269,23 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, // loop header) then NewBB dominates DestBB. SmallVector<BasicBlock*, 8> OtherPreds; - for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); I != E; ++I) - if (*I != NewBB) - OtherPreds.push_back(*I); + // If there is a PHI in the block, loop over predecessors with it, which is + // faster than iterating pred_begin/end. + if (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingBlock(i) != NewBB) + OtherPreds.push_back(PN->getIncomingBlock(i)); + } else { + for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); + I != E; ++I) + if (*I != NewBB) + OtherPreds.push_back(*I); + } bool NewBBDominatesDestBB = true; // Should we update DominatorTree information? - if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) { + if (DT) { DomTreeNode *TINode = DT->getNode(TIBB); // The new block is not the immediate dominator for any other nodes, but @@ -267,7 +316,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, } // Should we update DominanceFrontier information? - if (DominanceFrontier *DF = P->getAnalysisIfAvailable<DominanceFrontier>()) { + if (DF) { // If NewBBDominatesDestBB hasn't been computed yet, do so with DF. if (!OtherPreds.empty()) { // FIXME: IMPLEMENT THIS! @@ -301,7 +350,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, } // Update LoopInfo if it is around. - if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>()) { + if (LI) { if (Loop *TIL = LI->getLoopFor(TIBB)) { // If one or the other blocks were not in a loop, the new block is not // either, and thus LI doesn't need to be updated. @@ -382,9 +431,8 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, } // Update ProfileInfo if it is around. - if (ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>()) { - PI->splitEdge(TIBB,DestBB,NewBB,MergeIdenticalEdges); - } + if (PI) + PI->splitEdge(TIBB, DestBB, NewBB, MergeIdenticalEdges); return NewBB; } diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index bd750cc..c80827d 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -33,7 +33,7 @@ using namespace llvm; // CloneBasicBlock - See comments in Cloning.h BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, DenseMap<const Value*, Value*> &ValueMap, - const char *NameSuffix, Function *F, + const Twine &NameSuffix, Function *F, ClonedCodeInfo *CodeInfo) { BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F); if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix); diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 92bdf2d..57ad459 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -38,20 +38,82 @@ using namespace llvm; // Local analysis. // +/// getUnderlyingObjectWithOffset - Strip off up to MaxLookup GEPs and +/// bitcasts to get back to the underlying object being addressed, keeping +/// track of the offset in bytes from the GEPs relative to the result. +/// This is closely related to Value::getUnderlyingObject but is located +/// here to avoid making VMCore depend on TargetData. +static Value *getUnderlyingObjectWithOffset(Value *V, const TargetData *TD, + uint64_t &ByteOffset, + unsigned MaxLookup = 6) { + if (!isa<PointerType>(V->getType())) + return V; + for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) { + if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { + if (!GEP->hasAllConstantIndices()) + return V; + SmallVector<Value*, 8> Indices(GEP->op_begin() + 1, GEP->op_end()); + ByteOffset += TD->getIndexedOffset(GEP->getPointerOperandType(), + &Indices[0], Indices.size()); + V = GEP->getPointerOperand(); + } else if (Operator::getOpcode(V) == Instruction::BitCast) { + V = cast<Operator>(V)->getOperand(0); + } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { + if (GA->mayBeOverridden()) + return V; + V = GA->getAliasee(); + } else { + return V; + } + assert(isa<PointerType>(V->getType()) && "Unexpected operand type!"); + } + return V; +} + /// isSafeToLoadUnconditionally - Return true if we know that executing a load /// from this value cannot trap. If it is not obviously safe to load from the /// specified pointer, we do a quick local scan of the basic block containing /// ScanFrom, to determine if the address is already accessed. -bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom) { - // If it is an alloca it is always safe to load from. - if (isa<AllocaInst>(V)) return true; +bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, + unsigned Align, const TargetData *TD) { + uint64_t ByteOffset = 0; + Value *Base = V; + if (TD) + Base = getUnderlyingObjectWithOffset(V, TD, ByteOffset); + + const Type *BaseType = 0; + unsigned BaseAlign = 0; + if (const AllocaInst *AI = dyn_cast<AllocaInst>(Base)) { + // An alloca is safe to load from as load as it is suitably aligned. + BaseType = AI->getAllocatedType(); + BaseAlign = AI->getAlignment(); + } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(Base)) { + // Global variables are safe to load from but their size cannot be + // guaranteed if they are overridden. + if (!isa<GlobalAlias>(GV) && !GV->mayBeOverridden()) { + BaseType = GV->getType()->getElementType(); + BaseAlign = GV->getAlignment(); + } + } - // If it is a global variable it is mostly safe to load from. - if (const GlobalValue *GV = dyn_cast<GlobalVariable>(V)) - // Don't try to evaluate aliases. External weak GV can be null. - return !isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage(); + if (BaseType && BaseType->isSized()) { + if (TD && BaseAlign == 0) + BaseAlign = TD->getPrefTypeAlignment(BaseType); - // Otherwise, be a little bit agressive by scanning the local block where we + if (Align <= BaseAlign) { + if (!TD) + return true; // Loading directly from an alloca or global is OK. + + // Check if the load is within the bounds of the underlying object. + const PointerType *AddrTy = cast<PointerType>(V->getType()); + uint64_t LoadSize = TD->getTypeStoreSize(AddrTy->getElementType()); + if (ByteOffset + LoadSize <= TD->getTypeAllocSize(BaseType) && + (Align == 0 || (ByteOffset % Align) == 0)) + return true; + } + } + + // Otherwise, be a little bit aggressive by scanning the local block where we // want to check to see if the pointer is already being loaded or stored // from/to. If so, the previous load or store would have already trapped, // so there is no harm doing an extra load (also, CSE will later eliminate @@ -428,6 +490,17 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) { // Splice all the instructions from PredBB to DestBB. PredBB->getTerminator()->eraseFromParent(); DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList()); + + // Zap anything that took the address of DestBB. Not doing this will give the + // address an invalid value. + if (DestBB->hasAddressTaken()) { + BlockAddress *BA = BlockAddress::get(DestBB); + Constant *Replacement = + ConstantInt::get(llvm::Type::getInt32Ty(BA->getContext()), 1); + BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(Replacement, + BA->getType())); + BA->destroyConstant(); + } // Anything that branched to PredBB now branches to DestBB. PredBB->replaceAllUsesWith(DestBB); diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index e81b779..57bab60 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -176,8 +176,9 @@ ReprocessLoop: SmallVector<BasicBlock*, 8> ExitBlocks; L->getExitBlocks(ExitBlocks); - SetVector<BasicBlock*> ExitBlockSet(ExitBlocks.begin(), ExitBlocks.end()); - for (SetVector<BasicBlock*>::iterator I = ExitBlockSet.begin(), + SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(), + ExitBlocks.end()); + for (SmallSetVector<BasicBlock *, 8>::iterator I = ExitBlockSet.begin(), E = ExitBlockSet.end(); I != E; ++I) { BasicBlock *ExitBlock = *I; for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock); diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index 53117a0..e47c86d 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -29,7 +29,6 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" -#include <cstdio> using namespace llvm; @@ -204,15 +203,12 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) Latches.push_back(LatchBlock); for (unsigned It = 1; It != Count; ++It) { - char SuffixBuffer[100]; - sprintf(SuffixBuffer, ".%d", It); - std::vector<BasicBlock*> NewBlocks; for (std::vector<BasicBlock*>::iterator BB = LoopBlocks.begin(), E = LoopBlocks.end(); BB != E; ++BB) { ValueMapTy ValueMap; - BasicBlock *New = CloneBasicBlock(*BB, ValueMap, SuffixBuffer); + BasicBlock *New = CloneBasicBlock(*BB, ValueMap, "." + Twine(It)); Header->getParent()->getBasicBlockList().push_back(New); // Loop over all of the PHI nodes in the block, changing them to use the diff --git a/lib/Transforms/Utils/Makefile b/lib/Transforms/Utils/Makefile index b9761df..d1e9336 100644 --- a/lib/Transforms/Utils/Makefile +++ b/lib/Transforms/Utils/Makefile @@ -10,7 +10,6 @@ LEVEL = ../../.. LIBRARYNAME = LLVMTransformUtils BUILD_ARCHIVE = 1 -CXXFLAGS = -fno-rtti include $(LEVEL)/Makefile.common diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index d9261ac..544e20b 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -23,6 +23,7 @@ #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Metadata.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/AliasSetTracker.h" @@ -84,6 +85,18 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) { return true; } +/// FindAllocaDbgDeclare - Finds the llvm.dbg.declare intrinsic describing the +/// alloca 'V', if any. +static DbgDeclareInst *FindAllocaDbgDeclare(Value *V) { + if (MDNode *DebugNode = MDNode::getIfExists(V->getContext(), &V, 1)) + for (Value::use_iterator UI = DebugNode->use_begin(), + E = DebugNode->use_end(); UI != E; ++UI) + if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI)) + return DDI; + + return 0; +} + namespace { struct AllocaInfo; @@ -188,6 +201,11 @@ namespace { /// std::vector<Value*> PointerAllocaValues; + /// AllocaDbgDeclares - For each alloca, we keep track of the dbg.declare + /// intrinsic that describes it, if any, so that we can convert it to a + /// dbg.value intrinsic if the alloca gets promoted. + SmallVector<DbgDeclareInst*, 8> AllocaDbgDeclares; + /// Visited - The set of basic blocks the renamer has already visited. /// SmallPtrSet<BasicBlock*, 16> Visited; @@ -202,6 +220,9 @@ namespace { PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt, DominanceFrontier &df, AliasSetTracker *ast) : Allocas(A), DT(dt), DF(df), DIF(0), AST(ast) {} + ~PromoteMem2Reg() { + delete DIF; + } void run(); @@ -243,9 +264,9 @@ namespace { LargeBlockInfo &LBI); void PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, LargeBlockInfo &LBI); - void ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, StoreInst* SI, - uint64_t Offset); - + void ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, StoreInst *SI); + + void RenamePass(BasicBlock *BB, BasicBlock *Pred, RenamePassData::ValVector &IncVals, std::vector<RenamePassData> &Worklist); @@ -262,6 +283,7 @@ namespace { bool OnlyUsedInOneBlock; Value *AllocaPointerVal; + DbgDeclareInst *DbgDeclare; void clear() { DefiningBlocks.clear(); @@ -270,6 +292,7 @@ namespace { OnlyBlock = 0; OnlyUsedInOneBlock = true; AllocaPointerVal = 0; + DbgDeclare = 0; } /// AnalyzeAlloca - Scan the uses of the specified alloca, filling in our @@ -304,28 +327,18 @@ namespace { OnlyUsedInOneBlock = false; } } + + DbgDeclare = FindAllocaDbgDeclare(AI); } }; } // end of anonymous namespace -/// Finds the llvm.dbg.declare intrinsic corresponding to an alloca if any. -static DbgDeclareInst *findDbgDeclare(AllocaInst *AI) { - Function *F = AI->getParent()->getParent(); - for (Function::iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI) - for (BasicBlock::iterator BI = (*FI).begin(), BE = (*FI).end(); - BI != BE; ++BI) - if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) - if (DDI->getAddress() == AI) - return DDI; - - return 0; -} - void PromoteMem2Reg::run() { Function &F = *DF.getRoot()->getParent(); if (AST) PointerAllocaValues.resize(Allocas.size()); + AllocaDbgDeclares.resize(Allocas.size()); AllocaInfo Info; LargeBlockInfo LBI; @@ -360,8 +373,11 @@ void PromoteMem2Reg::run() { // Finally, after the scan, check to see if the store is all that is left. if (Info.UsingBlocks.empty()) { - // Record debuginfo for the store before removing it. - ConvertDebugDeclareToDebugValue(findDbgDeclare(AI), Info.OnlyStore, 0); + // Record debuginfo for the store and remove the declaration's debuginfo. + if (DbgDeclareInst *DDI = Info.DbgDeclare) { + ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore); + DDI->eraseFromParent(); + } // Remove the (now dead) store and alloca. Info.OnlyStore->eraseFromParent(); LBI.deleteValue(Info.OnlyStore); @@ -388,11 +404,11 @@ void PromoteMem2Reg::run() { if (Info.UsingBlocks.empty()) { // Remove the (now dead) stores and alloca. - DbgDeclareInst *DDI = findDbgDeclare(AI); while (!AI->use_empty()) { StoreInst *SI = cast<StoreInst>(AI->use_back()); // Record debuginfo for the store before removing it. - ConvertDebugDeclareToDebugValue(DDI, SI, 0); + if (DbgDeclareInst *DDI = Info.DbgDeclare) + ConvertDebugDeclareToDebugValue(DDI, SI); SI->eraseFromParent(); LBI.deleteValue(SI); } @@ -404,6 +420,10 @@ void PromoteMem2Reg::run() { // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); + // The alloca's debuginfo can be removed as well. + if (DbgDeclareInst *DDI = Info.DbgDeclare) + DDI->eraseFromParent(); + ++NumLocalPromoted; continue; } @@ -421,6 +441,9 @@ void PromoteMem2Reg::run() { // stored into the alloca. if (AST) PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal; + + // Remember the dbg.declare intrinsic describing this alloca, if any. + if (Info.DbgDeclare) AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare; // Keep the reverse mapping of the 'Allocas' array for the rename pass. AllocaLookup[Allocas[AllocaNum]] = AllocaNum; @@ -476,7 +499,11 @@ void PromoteMem2Reg::run() { A->eraseFromParent(); } - + // Remove alloca's dbg.declare instrinsics from the function. + for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i) + if (DbgDeclareInst *DDI = AllocaDbgDeclares[i]) + DDI->eraseFromParent(); + // Loop over all of the PHI nodes and see if there are any that we can get // rid of because they merge all of the same incoming values. This can // happen due to undef values coming into the PHI nodes. This process is @@ -857,14 +884,19 @@ void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, // Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value // that has an associated llvm.dbg.decl intrinsic. void PromoteMem2Reg::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, - StoreInst* SI, - uint64_t Offset) { - if (!DDI) return; + StoreInst *SI) { + DIVariable DIVar(DDI->getVariable()); + if (!DIVar.getNode()) + return; if (!DIF) DIF = new DIFactory(*SI->getParent()->getParent()->getParent()); - DIF->InsertDbgValueIntrinsic(SI->getOperand(0), Offset, - DIVariable(DDI->getVariable()), SI); + Instruction *DbgVal = DIF->InsertDbgValueIntrinsic(SI->getOperand(0), 0, + DIVar, SI); + + // Propagate any debug metadata from the store onto the dbg.value. + if (MDNode *SIMD = SI->getMetadata("dbg")) + DbgVal->setMetadata("dbg", SIMD); } // QueuePhiNode - queues a phi-node to be added to a basic-block for a specific @@ -980,7 +1012,8 @@ NextIteration: // what value were we writing? IncomingVals[ai->second] = SI->getOperand(0); // Record debuginfo for the store before removing it. - ConvertDebugDeclareToDebugValue(findDbgDeclare(Dest), SI, 0); + if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) + ConvertDebugDeclareToDebugValue(DDI, SI); BB->getInstList().erase(SI); } } diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index 161bf21..a31235a 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -71,6 +71,50 @@ void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) { getAvailableVals(AV)[BB] = V; } +/// IsEquivalentPHI - Check if PHI has the same incoming value as specified +/// in ValueMapping for each predecessor block. +static bool IsEquivalentPHI(PHINode *PHI, + DenseMap<BasicBlock*, Value*> &ValueMapping) { + unsigned PHINumValues = PHI->getNumIncomingValues(); + if (PHINumValues != ValueMapping.size()) + return false; + + // Scan the phi to see if it matches. + for (unsigned i = 0, e = PHINumValues; i != e; ++i) + if (ValueMapping[PHI->getIncomingBlock(i)] != + PHI->getIncomingValue(i)) { + return false; + } + + return true; +} + +/// GetExistingPHI - Check if BB already contains a phi node that is equivalent +/// to the specified mapping from predecessor blocks to incoming values. +static Value *GetExistingPHI(BasicBlock *BB, + DenseMap<BasicBlock*, Value*> &ValueMapping) { + PHINode *SomePHI; + for (BasicBlock::iterator It = BB->begin(); + (SomePHI = dyn_cast<PHINode>(It)); ++It) { + if (IsEquivalentPHI(SomePHI, ValueMapping)) + return SomePHI; + } + return 0; +} + +/// GetExistingPHI - Check if BB already contains an equivalent phi node. +/// The InputIt type must be an iterator over std::pair<BasicBlock*, Value*> +/// objects that specify the mapping from predecessor blocks to incoming values. +template<typename InputIt> +static Value *GetExistingPHI(BasicBlock *BB, const InputIt &I, + const InputIt &E) { + // Avoid create the mapping if BB has no phi nodes at all. + if (!isa<PHINode>(BB->begin())) + return 0; + DenseMap<BasicBlock*, Value*> ValueMapping(I, E); + return GetExistingPHI(BB, ValueMapping); +} + /// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is /// live at the end of the specified block. Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) { @@ -149,28 +193,11 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { if (SingularValue != 0) return SingularValue; - // Otherwise, we do need a PHI: check to see if we already have one available - // in this block that produces the right value. - if (isa<PHINode>(BB->begin())) { - DenseMap<BasicBlock*, Value*> ValueMapping(PredValues.begin(), - PredValues.end()); - PHINode *SomePHI; - for (BasicBlock::iterator It = BB->begin(); - (SomePHI = dyn_cast<PHINode>(It)); ++It) { - // Scan this phi to see if it is what we need. - bool Equal = true; - for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) - if (ValueMapping[SomePHI->getIncomingBlock(i)] != - SomePHI->getIncomingValue(i)) { - Equal = false; - break; - } - - if (Equal) - return SomePHI; - } - } - + // Otherwise, we do need a PHI. + if (Value *ExistingPHI = GetExistingPHI(BB, PredValues.begin(), + PredValues.end())) + return ExistingPHI; + // Ok, we have no way out, insert a new one now. PHINode *InsertedPHI = PHINode::Create(PrototypeValue->getType(), PrototypeValue->getName(), @@ -255,7 +282,7 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { // producing the same value. If so, this value will capture it, if not, it // will get reset to null. We distinguish the no-predecessor case explicitly // below. - TrackingVH<Value> SingularValue; + TrackingVH<Value> ExistingValue; // We can get our predecessor info by walking the pred_iterator list, but it // is relatively slow. If we already have PHI nodes in this block, walk one @@ -266,11 +293,11 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { Value *PredVal = GetValueAtEndOfBlockInternal(PredBB); IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal)); - // Compute SingularValue. + // Set ExistingValue to singular value from all predecessors so far. if (i == 0) - SingularValue = PredVal; - else if (PredVal != SingularValue) - SingularValue = 0; + ExistingValue = PredVal; + else if (PredVal != ExistingValue) + ExistingValue = 0; } } else { bool isFirstPred = true; @@ -279,12 +306,12 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { Value *PredVal = GetValueAtEndOfBlockInternal(PredBB); IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal)); - // Compute SingularValue. + // Set ExistingValue to singular value from all predecessors so far. if (isFirstPred) { - SingularValue = PredVal; + ExistingValue = PredVal; isFirstPred = false; - } else if (PredVal != SingularValue) - SingularValue = 0; + } else if (PredVal != ExistingValue) + ExistingValue = 0; } } @@ -300,31 +327,38 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { /// above. TrackingVH<Value> &InsertedVal = AvailableVals[BB]; - // If all the predecessor values are the same then we don't need to insert a + // If the predecessor values are not all the same, then check to see if there + // is an existing PHI that can be used. + if (!ExistingValue) + ExistingValue = GetExistingPHI(BB, + IncomingPredInfo.begin()+FirstPredInfoEntry, + IncomingPredInfo.end()); + + // If there is an existing value we can use, then we don't need to insert a // PHI. This is the simple and common case. - if (SingularValue) { - // If a PHI node got inserted, replace it with the singlar value and delete + if (ExistingValue) { + // If a PHI node got inserted, replace it with the existing value and delete // it. if (InsertedVal) { PHINode *OldVal = cast<PHINode>(InsertedVal); // Be careful about dead loops. These RAUW's also update InsertedVal. - if (InsertedVal != SingularValue) - OldVal->replaceAllUsesWith(SingularValue); + if (InsertedVal != ExistingValue) + OldVal->replaceAllUsesWith(ExistingValue); else OldVal->replaceAllUsesWith(UndefValue::get(InsertedVal->getType())); OldVal->eraseFromParent(); } else { - InsertedVal = SingularValue; + InsertedVal = ExistingValue; } - // Either path through the 'if' should have set insertedVal -> SingularVal. - assert((InsertedVal == SingularValue || isa<UndefValue>(InsertedVal)) && - "RAUW didn't change InsertedVal to be SingularVal"); + // Either path through the 'if' should have set InsertedVal -> ExistingVal. + assert((InsertedVal == ExistingValue || isa<UndefValue>(InsertedVal)) && + "RAUW didn't change InsertedVal to be ExistingValue"); // Drop the entries we added in IncomingPredInfo to restore the stack. IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry, IncomingPredInfo.end()); - return SingularValue; + return ExistingValue; } // Otherwise, we do need a PHI: insert one now if we don't already have one. diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index cb53296..2215059 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -23,6 +23,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" @@ -36,6 +37,28 @@ using namespace llvm; STATISTIC(NumSpeculations, "Number of speculative executed instructions"); +namespace { +class SimplifyCFGOpt { + const TargetData *const TD; + + ConstantInt *GetConstantInt(Value *V); + Value *GatherConstantSetEQs(Value *V, std::vector<ConstantInt*> &Values); + Value *GatherConstantSetNEs(Value *V, std::vector<ConstantInt*> &Values); + bool GatherValueComparisons(Instruction *Cond, Value *&CompVal, + std::vector<ConstantInt*> &Values); + Value *isValueEqualityComparison(TerminatorInst *TI); + BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI, + std::vector<std::pair<ConstantInt*, BasicBlock*> > &Cases); + bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, + BasicBlock *Pred); + bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI); + +public: + explicit SimplifyCFGOpt(const TargetData *td) : TD(td) {} + bool run(BasicBlock *BB); +}; +} + /// SafeToMergeTerminators - Return true if it is safe to merge these two /// terminator instructions together. /// @@ -243,17 +266,48 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, return true; } +/// GetConstantInt - Extract ConstantInt from value, looking through IntToPtr +/// and PointerNullValue. Return NULL if value is not a constant int. +ConstantInt *SimplifyCFGOpt::GetConstantInt(Value *V) { + // Normal constant int. + ConstantInt *CI = dyn_cast<ConstantInt>(V); + if (CI || !TD || !isa<Constant>(V) || !isa<PointerType>(V->getType())) + return CI; + + // This is some kind of pointer constant. Turn it into a pointer-sized + // ConstantInt if possible. + const IntegerType *PtrTy = TD->getIntPtrType(V->getContext()); + + // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*). + if (isa<ConstantPointerNull>(V)) + return ConstantInt::get(PtrTy, 0); + + // IntToPtr const int. + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) + if (CE->getOpcode() == Instruction::IntToPtr) + if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) { + // The constant is very likely to have the right type already. + if (CI->getType() == PtrTy) + return CI; + else + return cast<ConstantInt> + (ConstantExpr::getIntegerCast(CI, PtrTy, /*isSigned=*/false)); + } + return 0; +} + /// GatherConstantSetEQs - Given a potentially 'or'd together collection of /// icmp_eq instructions that compare a value against a constant, return the /// value being compared, and stick the constant into the Values vector. -static Value *GatherConstantSetEQs(Value *V, std::vector<ConstantInt*> &Values){ +Value *SimplifyCFGOpt:: +GatherConstantSetEQs(Value *V, std::vector<ConstantInt*> &Values) { if (Instruction *Inst = dyn_cast<Instruction>(V)) { if (Inst->getOpcode() == Instruction::ICmp && cast<ICmpInst>(Inst)->getPredicate() == ICmpInst::ICMP_EQ) { - if (ConstantInt *C = dyn_cast<ConstantInt>(Inst->getOperand(1))) { + if (ConstantInt *C = GetConstantInt(Inst->getOperand(1))) { Values.push_back(C); return Inst->getOperand(0); - } else if (ConstantInt *C = dyn_cast<ConstantInt>(Inst->getOperand(0))) { + } else if (ConstantInt *C = GetConstantInt(Inst->getOperand(0))) { Values.push_back(C); return Inst->getOperand(1); } @@ -270,14 +324,15 @@ static Value *GatherConstantSetEQs(Value *V, std::vector<ConstantInt*> &Values){ /// GatherConstantSetNEs - Given a potentially 'and'd together collection of /// setne instructions that compare a value against a constant, return the value /// being compared, and stick the constant into the Values vector. -static Value *GatherConstantSetNEs(Value *V, std::vector<ConstantInt*> &Values){ +Value *SimplifyCFGOpt:: +GatherConstantSetNEs(Value *V, std::vector<ConstantInt*> &Values) { if (Instruction *Inst = dyn_cast<Instruction>(V)) { if (Inst->getOpcode() == Instruction::ICmp && cast<ICmpInst>(Inst)->getPredicate() == ICmpInst::ICMP_NE) { - if (ConstantInt *C = dyn_cast<ConstantInt>(Inst->getOperand(1))) { + if (ConstantInt *C = GetConstantInt(Inst->getOperand(1))) { Values.push_back(C); return Inst->getOperand(0); - } else if (ConstantInt *C = dyn_cast<ConstantInt>(Inst->getOperand(0))) { + } else if (ConstantInt *C = GetConstantInt(Inst->getOperand(0))) { Values.push_back(C); return Inst->getOperand(1); } @@ -294,8 +349,8 @@ static Value *GatherConstantSetNEs(Value *V, std::vector<ConstantInt*> &Values){ /// GatherValueComparisons - If the specified Cond is an 'and' or 'or' of a /// bunch of comparisons of one value against constants, return the value and /// the constants being compared. -static bool GatherValueComparisons(Instruction *Cond, Value *&CompVal, - std::vector<ConstantInt*> &Values) { +bool SimplifyCFGOpt::GatherValueComparisons(Instruction *Cond, Value *&CompVal, + std::vector<ConstantInt*> &Values) { if (Cond->getOpcode() == Instruction::Or) { CompVal = GatherConstantSetEQs(Cond, Values); @@ -327,29 +382,32 @@ static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) { /// isValueEqualityComparison - Return true if the specified terminator checks /// to see if a value is equal to constant integer value. -static Value *isValueEqualityComparison(TerminatorInst *TI) { +Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) { + Value *CV = 0; if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { // Do not permit merging of large switch instructions into their // predecessors unless there is only one predecessor. - if (SI->getNumSuccessors() * std::distance(pred_begin(SI->getParent()), - pred_end(SI->getParent())) > 128) - return 0; - - return SI->getCondition(); - } - if (BranchInst *BI = dyn_cast<BranchInst>(TI)) + if (SI->getNumSuccessors()*std::distance(pred_begin(SI->getParent()), + pred_end(SI->getParent())) <= 128) + CV = SI->getCondition(); + } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) if (BI->isConditional() && BI->getCondition()->hasOneUse()) if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) if ((ICI->getPredicate() == ICmpInst::ICMP_EQ || ICI->getPredicate() == ICmpInst::ICMP_NE) && - isa<ConstantInt>(ICI->getOperand(1))) - return ICI->getOperand(0); - return 0; + GetConstantInt(ICI->getOperand(1))) + CV = ICI->getOperand(0); + + // Unwrap any lossless ptrtoint cast. + if (TD && CV && CV->getType() == TD->getIntPtrType(CV->getContext())) + if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) + CV = PTII->getOperand(0); + return CV; } /// GetValueEqualityComparisonCases - Given a value comparison instruction, /// decode all of the 'cases' that it represents and return the 'default' block. -static BasicBlock * +BasicBlock *SimplifyCFGOpt:: GetValueEqualityComparisonCases(TerminatorInst *TI, std::vector<std::pair<ConstantInt*, BasicBlock*> > &Cases) { @@ -362,7 +420,7 @@ GetValueEqualityComparisonCases(TerminatorInst *TI, BranchInst *BI = cast<BranchInst>(TI); ICmpInst *ICI = cast<ICmpInst>(BI->getCondition()); - Cases.push_back(std::make_pair(cast<ConstantInt>(ICI->getOperand(1)), + Cases.push_back(std::make_pair(GetConstantInt(ICI->getOperand(1)), BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE))); return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ); @@ -421,8 +479,9 @@ ValuesOverlap(std::vector<std::pair<ConstantInt*, BasicBlock*> > &C1, /// comparison with the same value, and if that comparison determines the /// outcome of this comparison. If so, simplify TI. This does a very limited /// form of jump threading. -static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, - BasicBlock *Pred) { +bool SimplifyCFGOpt:: +SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, + BasicBlock *Pred) { Value *PredVal = isValueEqualityComparison(Pred->getTerminator()); if (!PredVal) return false; // Not a value comparison in predecessor. @@ -548,7 +607,7 @@ namespace { /// equality comparison instruction (either a switch or a branch on "X == c"). /// See if any of the predecessors of the terminator block are value comparisons /// on the same value. If so, and if safe to do so, fold them together. -static bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI) { +bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI) { BasicBlock *BB = TI->getParent(); Value *CV = isValueEqualityComparison(TI); // CondVal assert(CV && "Not a comparison?"); @@ -641,6 +700,13 @@ static bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI) { for (unsigned i = 0, e = NewSuccessors.size(); i != e; ++i) AddPredecessorToBlock(NewSuccessors[i], Pred, BB); + // Convert pointer to int before we switch. + if (isa<PointerType>(CV->getType())) { + assert(TD && "Cannot switch on pointer without TargetData"); + CV = new PtrToIntInst(CV, TD->getIntPtrType(CV->getContext()), + "magicptr", PTI); + } + // Now that the successors are updated, create the new Switch instruction. SwitchInst *NewSI = SwitchInst::Create(CV, PredDefault, PredCases.size(), PTI); @@ -1011,7 +1077,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI) { for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { ConstantInt *CB; if ((CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i))) && - CB->getType()->isInteger(1)) { + CB->getType()->isIntegerTy(1)) { // Okay, we now know that all edges from PredBB should be revectored to // branch to RealDest. BasicBlock *PredBB = PN->getIncomingBlock(i); @@ -1589,14 +1655,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { return true; } -/// SimplifyCFG - This function is used to do simplification of a CFG. For -/// example, it adjusts branches to branches to eliminate the extra hop, it -/// eliminates unreachable basic blocks, and does other "peephole" optimization -/// of the CFG. It returns true if a modification was made. -/// -/// WARNING: The entry node of a function may not be simplified. -/// -bool llvm::SimplifyCFG(BasicBlock *BB) { +bool SimplifyCFGOpt::run(BasicBlock *BB) { bool Changed = false; Function *M = BB->getParent(); @@ -1997,7 +2056,7 @@ bool llvm::SimplifyCFG(BasicBlock *BB) { Value *CompVal = 0; std::vector<ConstantInt*> Values; bool TrueWhenEqual = GatherValueComparisons(Cond, CompVal, Values); - if (CompVal && CompVal->getType()->isInteger()) { + if (CompVal) { // There might be duplicate constants in the list, which the switch // instruction can't handle, remove them now. std::sort(Values.begin(), Values.end(), ConstantIntOrdering()); @@ -2008,6 +2067,14 @@ bool llvm::SimplifyCFG(BasicBlock *BB) { BasicBlock *EdgeBB = BI->getSuccessor(0); if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB); + // Convert pointer to int before we switch. + if (isa<PointerType>(CompVal->getType())) { + assert(TD && "Cannot switch on pointer without TargetData"); + CompVal = new PtrToIntInst(CompVal, + TD->getIntPtrType(CompVal->getContext()), + "magicptr", BI); + } + // Create the new switch instruction now. SwitchInst *New = SwitchInst::Create(CompVal, DefaultBB, Values.size(), BI); @@ -2035,3 +2102,14 @@ bool llvm::SimplifyCFG(BasicBlock *BB) { return Changed; } + +/// SimplifyCFG - This function is used to do simplification of a CFG. For +/// example, it adjusts branches to branches to eliminate the extra hop, it +/// eliminates unreachable basic blocks, and does other "peephole" optimization +/// of the CFG. It returns true if a modification was made. +/// +/// WARNING: The entry node of a function may not be simplified. +/// +bool llvm::SimplifyCFG(BasicBlock *BB, const TargetData *TD) { + return SimplifyCFGOpt(TD).run(BB); +} diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index a6e6701..6045048 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -35,7 +35,7 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) { if (const MDNode *MD = dyn_cast<MDNode>(V)) { SmallVector<Value*, 4> Elts; - for (unsigned i = 0; i != MD->getNumOperands(); i++) + for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) Elts.push_back(MD->getOperand(i) ? MapValue(MD->getOperand(i), VM) : 0); return VM[V] = MDNode::get(V->getContext(), Elts.data(), Elts.size()); } diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp index c9f3849..f5ba7e7 100644 --- a/lib/VMCore/AsmWriter.cpp +++ b/lib/VMCore/AsmWriter.cpp @@ -27,6 +27,7 @@ #include "llvm/ValueSymbolTable.h" #include "llvm/TypeSymbolTable.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/CFG.h" @@ -238,6 +239,19 @@ void TypePrinting::CalcTypeName(const Type *Ty, OS << '>'; break; } + case Type::UnionTyID: { + const UnionType *UTy = cast<UnionType>(Ty); + OS << "union { "; + for (StructType::element_iterator I = UTy->element_begin(), + E = UTy->element_end(); I != E; ++I) { + CalcTypeName(*I, TypeStack, OS); + if (next(I) != UTy->element_end()) + OS << ','; + OS << ' '; + } + OS << '}'; + break; + } case Type::PointerTyID: { const PointerType *PTy = cast<PointerType>(Ty); CalcTypeName(PTy->getElementType(), TypeStack, OS); @@ -417,13 +431,13 @@ static void AddModuleTypesToPrinter(TypePrinting &TP, // they are used too often to have a single useful name. if (const PointerType *PTy = dyn_cast<PointerType>(Ty)) { const Type *PETy = PTy->getElementType(); - if ((PETy->isPrimitiveType() || PETy->isInteger()) && + if ((PETy->isPrimitiveType() || PETy->isIntegerTy()) && !isa<OpaqueType>(PETy)) continue; } // Likewise don't insert primitives either. - if (Ty->isInteger() || Ty->isPrimitiveType()) + if (Ty->isIntegerTy() || Ty->isPrimitiveType()) continue; // Get the name as a string and insert it into TypeNames. @@ -835,7 +849,7 @@ static void WriteOptimizationInfo(raw_ostream &Out, const User *U) { static void WriteConstantInt(raw_ostream &Out, const Constant *CV, TypePrinting &TypePrinter, SlotTracker *Machine) { if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { - if (CI->getType()->isInteger(1)) { + if (CI->getType()->isIntegerTy(1)) { Out << (CI->getZExtValue() ? "true" : "false"); return; } @@ -855,7 +869,8 @@ static void WriteConstantInt(raw_ostream &Out, const Constant *CV, bool isDouble = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEdouble; double Val = isDouble ? CFP->getValueAPF().convertToDouble() : CFP->getValueAPF().convertToFloat(); - std::string StrVal = ftostr(CFP->getValueAPF()); + SmallString<128> StrVal; + raw_svector_ostream(StrVal) << Val; // Check to make sure that the stringized number is not some string like // "Inf" or NaN, that atof will accept, but the lexer will not. Check @@ -866,7 +881,7 @@ static void WriteConstantInt(raw_ostream &Out, const Constant *CV, (StrVal[1] >= '0' && StrVal[1] <= '9'))) { // Reparse stringized version! if (atof(StrVal.c_str()) == Val) { - Out << StrVal; + Out << StrVal.str(); return; } } @@ -1250,15 +1265,14 @@ public: void printArgument(const Argument *FA, Attributes Attrs); void printBasicBlock(const BasicBlock *BB); void printInstruction(const Instruction &I); -private: +private: // printInfoComment - Print a little comment after the instruction indicating // which slot it occupies. void printInfoComment(const Value &V); }; } // end of anonymous namespace - void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) { if (Operand == 0) { Out << "<null operand!>"; @@ -1402,8 +1416,6 @@ static void PrintLinkage(GlobalValue::LinkageTypes LT, case GlobalValue::AvailableExternallyLinkage: Out << "available_externally "; break; - // This is invalid syntax and just a debugging aid. - case GlobalValue::GhostLinkage: Out << "ghost "; break; } } @@ -1418,6 +1430,9 @@ static void PrintVisibility(GlobalValue::VisibilityTypes Vis, } void AssemblyWriter::printGlobal(const GlobalVariable *GV) { + if (GV->isMaterializable()) + Out << "; Materializable\n"; + WriteAsOperandInternal(Out, GV, &TypePrinter, &Machine); Out << " = "; @@ -1448,6 +1463,9 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) { } void AssemblyWriter::printAlias(const GlobalAlias *GA) { + if (GA->isMaterializable()) + Out << "; Materializable\n"; + // Don't crash when dumping partially built GA if (!GA->hasName()) Out << "<<nameless>> = "; @@ -1521,6 +1539,9 @@ void AssemblyWriter::printFunction(const Function *F) { if (AnnotationWriter) AnnotationWriter->emitFunctionAnnot(F, Out); + if (F->isMaterializable()) + Out << "; Materializable\n"; + if (F->isDeclaration()) Out << "declare "; else @@ -1680,11 +1701,15 @@ void AssemblyWriter::printBasicBlock(const BasicBlock *BB) { if (AnnotationWriter) AnnotationWriter->emitBasicBlockEndAnnot(BB, Out); } - /// printInfoComment - Print a little comment after the instruction indicating /// which slot it occupies. /// void AssemblyWriter::printInfoComment(const Value &V) { + if (AnnotationWriter) { + AnnotationWriter->printInfoComment(V, Out); + return; + } + if (V.getType()->isVoidTy()) return; Out.PadToColumn(50); diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp index 65155f1..ff0cc9b 100644 --- a/lib/VMCore/Attributes.cpp +++ b/lib/VMCore/Attributes.cpp @@ -56,6 +56,8 @@ std::string Attribute::getAsString(Attributes Attrs) { Result += "optsize "; if (Attrs & Attribute::NoInline) Result += "noinline "; + if (Attrs & Attribute::InlineHint) + Result += "inlinehint "; if (Attrs & Attribute::AlwaysInline) Result += "alwaysinline "; if (Attrs & Attribute::StackProtect) @@ -68,6 +70,11 @@ std::string Attribute::getAsString(Attributes Attrs) { Result += "noimplicitfloat "; if (Attrs & Attribute::Naked) Result += "naked "; + if (Attrs & Attribute::StackAlignment) { + Result += "alignstack("; + Result += utostr(Attribute::getStackAlignmentFromAttrs(Attrs)); + Result += ") "; + } if (Attrs & Attribute::Alignment) { Result += "align "; Result += utostr(Attribute::getAlignmentFromAttrs(Attrs)); @@ -82,7 +89,7 @@ std::string Attribute::getAsString(Attributes Attrs) { Attributes Attribute::typeIncompatible(const Type *Ty) { Attributes Incompatible = None; - if (!Ty->isInteger()) + if (!Ty->isIntegerTy()) // Attributes that only apply to integers. Incompatible |= SExt | ZExt; diff --git a/lib/VMCore/CMakeLists.txt b/lib/VMCore/CMakeLists.txt index 5ecedf1..4b80e36 100644 --- a/lib/VMCore/CMakeLists.txt +++ b/lib/VMCore/CMakeLists.txt @@ -8,17 +8,17 @@ add_llvm_library(LLVMCore Core.cpp Dominators.cpp Function.cpp + GVMaterializer.cpp Globals.cpp + IRBuilder.cpp InlineAsm.cpp Instruction.cpp Instructions.cpp IntrinsicInst.cpp - IRBuilder.cpp LLVMContext.cpp LeakDetector.cpp Metadata.cpp Module.cpp - ModuleProvider.cpp Pass.cpp PassManager.cpp PrintModulePass.cpp diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp index ddd5587..78a45e8 100644 --- a/lib/VMCore/ConstantFold.cpp +++ b/lib/VMCore/ConstantFold.cpp @@ -24,7 +24,6 @@ #include "llvm/Function.h" #include "llvm/GlobalAlias.h" #include "llvm/GlobalVariable.h" -#include "llvm/LLVMContext.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" @@ -41,7 +40,7 @@ using namespace llvm; /// BitCastConstantVector - Convert the specified ConstantVector node to the /// specified vector type. At this point, we know that the elements of the /// input vector constant are all simple integer or FP values. -static Constant *BitCastConstantVector(LLVMContext &Context, ConstantVector *CV, +static Constant *BitCastConstantVector(ConstantVector *CV, const VectorType *DstTy) { // If this cast changes element count then we can't handle it here: // doing so requires endianness information. This should be handled by @@ -91,8 +90,7 @@ foldConstantCastPair( Type::getInt64Ty(DstTy->getContext())); } -static Constant *FoldBitCast(LLVMContext &Context, - Constant *V, const Type *DestTy) { +static Constant *FoldBitCast(Constant *V, const Type *DestTy) { const Type *SrcTy = V->getType(); if (SrcTy == DestTy) return V; // no-op cast @@ -103,7 +101,8 @@ static Constant *FoldBitCast(LLVMContext &Context, if (const PointerType *DPTy = dyn_cast<PointerType>(DestTy)) if (PTy->getAddressSpace() == DPTy->getAddressSpace()) { SmallVector<Value*, 8> IdxList; - Value *Zero = Constant::getNullValue(Type::getInt32Ty(Context)); + Value *Zero = + Constant::getNullValue(Type::getInt32Ty(DPTy->getContext())); IdxList.push_back(Zero); const Type *ElTy = PTy->getElementType(); while (ElTy != DPTy->getElementType()) { @@ -139,15 +138,14 @@ static Constant *FoldBitCast(LLVMContext &Context, return Constant::getNullValue(DestTy); if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) - return BitCastConstantVector(Context, CV, DestPTy); + return BitCastConstantVector(CV, DestPTy); } // Canonicalize scalar-to-vector bitcasts into vector-to-vector bitcasts // This allows for other simplifications (although some of them // can only be handled by Analysis/ConstantFolding.cpp). if (isa<ConstantInt>(V) || isa<ConstantFP>(V)) - return ConstantExpr::getBitCast( - ConstantVector::get(&V, 1), DestPTy); + return ConstantExpr::getBitCast(ConstantVector::get(&V, 1), DestPTy); } // Finally, implement bitcast folding now. The code below doesn't handle @@ -157,23 +155,24 @@ static Constant *FoldBitCast(LLVMContext &Context, // Handle integral constant input. if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { - if (DestTy->isInteger()) + if (DestTy->isIntegerTy()) // Integral -> Integral. This is a no-op because the bit widths must // be the same. Consequently, we just fold to V. return V; - if (DestTy->isFloatingPoint()) - return ConstantFP::get(Context, APFloat(CI->getValue(), - DestTy != Type::getPPC_FP128Ty(Context))); + if (DestTy->isFloatingPointTy()) + return ConstantFP::get(DestTy->getContext(), + APFloat(CI->getValue(), + !DestTy->isPPC_FP128Ty())); // Otherwise, can't fold this (vector?) return 0; } - // Handle ConstantFP input. + // Handle ConstantFP input: FP -> Integral. if (ConstantFP *FP = dyn_cast<ConstantFP>(V)) - // FP -> Integral. - return ConstantInt::get(Context, FP->getValueAPF().bitcastToAPInt()); + return ConstantInt::get(FP->getContext(), + FP->getValueAPF().bitcastToAPInt()); return 0; } @@ -323,9 +322,195 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart, } } +/// getFoldedSizeOf - Return a ConstantExpr with type DestTy for sizeof +/// on Ty, with any known factors factored out. If Folded is false, +/// return null if no factoring was possible, to avoid endlessly +/// bouncing an unfoldable expression back into the top-level folder. +/// +static Constant *getFoldedSizeOf(const Type *Ty, const Type *DestTy, + bool Folded) { + if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { + Constant *N = ConstantInt::get(DestTy, ATy->getNumElements()); + Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true); + return ConstantExpr::getNUWMul(E, N); + } + if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) { + Constant *N = ConstantInt::get(DestTy, VTy->getNumElements()); + Constant *E = getFoldedSizeOf(VTy->getElementType(), DestTy, true); + return ConstantExpr::getNUWMul(E, N); + } + if (const StructType *STy = dyn_cast<StructType>(Ty)) + if (!STy->isPacked()) { + unsigned NumElems = STy->getNumElements(); + // An empty struct has size zero. + if (NumElems == 0) + return ConstantExpr::getNullValue(DestTy); + // Check for a struct with all members having the same size. + Constant *MemberSize = + getFoldedSizeOf(STy->getElementType(0), DestTy, true); + bool AllSame = true; + for (unsigned i = 1; i != NumElems; ++i) + if (MemberSize != + getFoldedSizeOf(STy->getElementType(i), DestTy, true)) { + AllSame = false; + break; + } + if (AllSame) { + Constant *N = ConstantInt::get(DestTy, NumElems); + return ConstantExpr::getNUWMul(MemberSize, N); + } + } + + // Pointer size doesn't depend on the pointee type, so canonicalize them + // to an arbitrary pointee. + if (const PointerType *PTy = dyn_cast<PointerType>(Ty)) + if (!PTy->getElementType()->isIntegerTy(1)) + return + getFoldedSizeOf(PointerType::get(IntegerType::get(PTy->getContext(), 1), + PTy->getAddressSpace()), + DestTy, true); + + // If there's no interesting folding happening, bail so that we don't create + // a constant that looks like it needs folding but really doesn't. + if (!Folded) + return 0; + + // Base case: Get a regular sizeof expression. + Constant *C = ConstantExpr::getSizeOf(Ty); + C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, + DestTy, false), + C, DestTy); + return C; +} + +/// getFoldedAlignOf - Return a ConstantExpr with type DestTy for alignof +/// on Ty, with any known factors factored out. If Folded is false, +/// return null if no factoring was possible, to avoid endlessly +/// bouncing an unfoldable expression back into the top-level folder. +/// +static Constant *getFoldedAlignOf(const Type *Ty, const Type *DestTy, + bool Folded) { + // The alignment of an array is equal to the alignment of the + // array element. Note that this is not always true for vectors. + if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { + Constant *C = ConstantExpr::getAlignOf(ATy->getElementType()); + C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, + DestTy, + false), + C, DestTy); + return C; + } + + if (const StructType *STy = dyn_cast<StructType>(Ty)) { + // Packed structs always have an alignment of 1. + if (STy->isPacked()) + return ConstantInt::get(DestTy, 1); + + // Otherwise, struct alignment is the maximum alignment of any member. + // Without target data, we can't compare much, but we can check to see + // if all the members have the same alignment. + unsigned NumElems = STy->getNumElements(); + // An empty struct has minimal alignment. + if (NumElems == 0) + return ConstantInt::get(DestTy, 1); + // Check for a struct with all members having the same alignment. + Constant *MemberAlign = + getFoldedAlignOf(STy->getElementType(0), DestTy, true); + bool AllSame = true; + for (unsigned i = 1; i != NumElems; ++i) + if (MemberAlign != getFoldedAlignOf(STy->getElementType(i), DestTy, true)) { + AllSame = false; + break; + } + if (AllSame) + return MemberAlign; + } + + // Pointer alignment doesn't depend on the pointee type, so canonicalize them + // to an arbitrary pointee. + if (const PointerType *PTy = dyn_cast<PointerType>(Ty)) + if (!PTy->getElementType()->isIntegerTy(1)) + return + getFoldedAlignOf(PointerType::get(IntegerType::get(PTy->getContext(), + 1), + PTy->getAddressSpace()), + DestTy, true); + + // If there's no interesting folding happening, bail so that we don't create + // a constant that looks like it needs folding but really doesn't. + if (!Folded) + return 0; + + // Base case: Get a regular alignof expression. + Constant *C = ConstantExpr::getAlignOf(Ty); + C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, + DestTy, false), + C, DestTy); + return C; +} + +/// getFoldedOffsetOf - Return a ConstantExpr with type DestTy for offsetof +/// on Ty and FieldNo, with any known factors factored out. If Folded is false, +/// return null if no factoring was possible, to avoid endlessly +/// bouncing an unfoldable expression back into the top-level folder. +/// +static Constant *getFoldedOffsetOf(const Type *Ty, Constant *FieldNo, + const Type *DestTy, + bool Folded) { + if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { + Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo, false, + DestTy, false), + FieldNo, DestTy); + Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true); + return ConstantExpr::getNUWMul(E, N); + } + if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) { + Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo, false, + DestTy, false), + FieldNo, DestTy); + Constant *E = getFoldedSizeOf(VTy->getElementType(), DestTy, true); + return ConstantExpr::getNUWMul(E, N); + } + if (const StructType *STy = dyn_cast<StructType>(Ty)) + if (!STy->isPacked()) { + unsigned NumElems = STy->getNumElements(); + // An empty struct has no members. + if (NumElems == 0) + return 0; + // Check for a struct with all members having the same size. + Constant *MemberSize = + getFoldedSizeOf(STy->getElementType(0), DestTy, true); + bool AllSame = true; + for (unsigned i = 1; i != NumElems; ++i) + if (MemberSize != + getFoldedSizeOf(STy->getElementType(i), DestTy, true)) { + AllSame = false; + break; + } + if (AllSame) { + Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo, + false, + DestTy, + false), + FieldNo, DestTy); + return ConstantExpr::getNUWMul(MemberSize, N); + } + } + + // If there's no interesting folding happening, bail so that we don't create + // a constant that looks like it needs folding but really doesn't. + if (!Folded) + return 0; + + // Base case: Get a regular offsetof expression. + Constant *C = ConstantExpr::getOffsetOf(Ty, FieldNo); + C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, + DestTy, false), + C, DestTy); + return C; +} -Constant *llvm::ConstantFoldCastInstruction(LLVMContext &Context, - unsigned opc, Constant *V, +Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V, const Type *DestTy) { if (isa<UndefValue>(V)) { // zext(undef) = 0, because the top bits will be zero. @@ -394,7 +579,7 @@ Constant *llvm::ConstantFoldCastInstruction(LLVMContext &Context, DestTy->isFP128Ty() ? APFloat::IEEEquad : APFloat::Bogus, APFloat::rmNearestTiesToEven, &ignored); - return ConstantFP::get(Context, Val); + return ConstantFP::get(V->getContext(), Val); } return 0; // Can't fold. case Instruction::FPToUI: @@ -407,7 +592,7 @@ Constant *llvm::ConstantFoldCastInstruction(LLVMContext &Context, (void) V.convertToInteger(x, DestBitWidth, opc==Instruction::FPToSI, APFloat::rmTowardZero, &ignored); APInt Val(DestBitWidth, 2, x); - return ConstantInt::get(Context, Val); + return ConstantInt::get(FPC->getContext(), Val); } return 0; // Can't fold. case Instruction::IntToPtr: //always treated as unsigned @@ -415,9 +600,49 @@ Constant *llvm::ConstantFoldCastInstruction(LLVMContext &Context, return ConstantPointerNull::get(cast<PointerType>(DestTy)); return 0; // Other pointer types cannot be casted case Instruction::PtrToInt: // always treated as unsigned - if (V->isNullValue()) // is it a null pointer value? + // Is it a null pointer value? + if (V->isNullValue()) return ConstantInt::get(DestTy, 0); - return 0; // Other pointer types cannot be casted + // If this is a sizeof-like expression, pull out multiplications by + // known factors to expose them to subsequent folding. If it's an + // alignof-like expression, factor out known factors. + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) + if (CE->getOpcode() == Instruction::GetElementPtr && + CE->getOperand(0)->isNullValue()) { + const Type *Ty = + cast<PointerType>(CE->getOperand(0)->getType())->getElementType(); + if (CE->getNumOperands() == 2) { + // Handle a sizeof-like expression. + Constant *Idx = CE->getOperand(1); + bool isOne = isa<ConstantInt>(Idx) && cast<ConstantInt>(Idx)->isOne(); + if (Constant *C = getFoldedSizeOf(Ty, DestTy, !isOne)) { + Idx = ConstantExpr::getCast(CastInst::getCastOpcode(Idx, true, + DestTy, false), + Idx, DestTy); + return ConstantExpr::getMul(C, Idx); + } + } else if (CE->getNumOperands() == 3 && + CE->getOperand(1)->isNullValue()) { + // Handle an alignof-like expression. + if (const StructType *STy = dyn_cast<StructType>(Ty)) + if (!STy->isPacked()) { + ConstantInt *CI = cast<ConstantInt>(CE->getOperand(2)); + if (CI->isOne() && + STy->getNumElements() == 2 && + STy->getElementType(0)->isIntegerTy(1)) { + return getFoldedAlignOf(STy->getElementType(1), DestTy, false); + } + } + // Handle an offsetof-like expression. + if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)){ + if (Constant *C = getFoldedOffsetOf(Ty, CE->getOperand(2), + DestTy, false)) + return C; + } + } + } + // Other pointer types cannot be casted + return 0; case Instruction::UIToFP: case Instruction::SIToFP: if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { @@ -428,7 +653,7 @@ Constant *llvm::ConstantFoldCastInstruction(LLVMContext &Context, (void)apf.convertFromAPInt(api, opc==Instruction::SIToFP, APFloat::rmNearestTiesToEven); - return ConstantFP::get(Context, apf); + return ConstantFP::get(V->getContext(), apf); } return 0; case Instruction::ZExt: @@ -436,7 +661,7 @@ Constant *llvm::ConstantFoldCastInstruction(LLVMContext &Context, uint32_t BitWidth = cast<IntegerType>(DestTy)->getBitWidth(); APInt Result(CI->getValue()); Result.zext(BitWidth); - return ConstantInt::get(Context, Result); + return ConstantInt::get(V->getContext(), Result); } return 0; case Instruction::SExt: @@ -444,7 +669,7 @@ Constant *llvm::ConstantFoldCastInstruction(LLVMContext &Context, uint32_t BitWidth = cast<IntegerType>(DestTy)->getBitWidth(); APInt Result(CI->getValue()); Result.sext(BitWidth); - return ConstantInt::get(Context, Result); + return ConstantInt::get(V->getContext(), Result); } return 0; case Instruction::Trunc: { @@ -452,7 +677,7 @@ Constant *llvm::ConstantFoldCastInstruction(LLVMContext &Context, if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { APInt Result(CI->getValue()); Result.trunc(DestBitWidth); - return ConstantInt::get(Context, Result); + return ConstantInt::get(V->getContext(), Result); } // The input must be a constantexpr. See if we can simplify this based on @@ -466,12 +691,11 @@ Constant *llvm::ConstantFoldCastInstruction(LLVMContext &Context, return 0; } case Instruction::BitCast: - return FoldBitCast(Context, V, DestTy); + return FoldBitCast(V, DestTy); } } -Constant *llvm::ConstantFoldSelectInstruction(LLVMContext&, - Constant *Cond, +Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond, Constant *V1, Constant *V2) { if (ConstantInt *CB = dyn_cast<ConstantInt>(Cond)) return CB->getZExtValue() ? V1 : V2; @@ -483,8 +707,7 @@ Constant *llvm::ConstantFoldSelectInstruction(LLVMContext&, return 0; } -Constant *llvm::ConstantFoldExtractElementInstruction(LLVMContext &Context, - Constant *Val, +Constant *llvm::ConstantFoldExtractElementInstruction(Constant *Val, Constant *Idx) { if (isa<UndefValue>(Val)) // ee(undef, x) -> undef return UndefValue::get(cast<VectorType>(Val->getType())->getElementType()); @@ -503,8 +726,7 @@ Constant *llvm::ConstantFoldExtractElementInstruction(LLVMContext &Context, return 0; } -Constant *llvm::ConstantFoldInsertElementInstruction(LLVMContext &Context, - Constant *Val, +Constant *llvm::ConstantFoldInsertElementInstruction(Constant *Val, Constant *Elt, Constant *Idx) { ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx); @@ -563,8 +785,7 @@ Constant *llvm::ConstantFoldInsertElementInstruction(LLVMContext &Context, /// GetVectorElement - If C is a ConstantVector, ConstantAggregateZero or Undef /// return the specified element value. Otherwise return null. -static Constant *GetVectorElement(LLVMContext &Context, Constant *C, - unsigned EltNo) { +static Constant *GetVectorElement(Constant *C, unsigned EltNo) { if (ConstantVector *CV = dyn_cast<ConstantVector>(C)) return CV->getOperand(EltNo); @@ -576,8 +797,7 @@ static Constant *GetVectorElement(LLVMContext &Context, Constant *C, return 0; } -Constant *llvm::ConstantFoldShuffleVectorInstruction(LLVMContext &Context, - Constant *V1, +Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1, Constant *V2, Constant *Mask) { // Undefined shuffle mask -> undefined value. @@ -590,7 +810,7 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(LLVMContext &Context, // Loop over the shuffle mask, evaluating each element. SmallVector<Constant*, 32> Result; for (unsigned i = 0; i != MaskNumElts; ++i) { - Constant *InElt = GetVectorElement(Context, Mask, i); + Constant *InElt = GetVectorElement(Mask, i); if (InElt == 0) return 0; if (isa<UndefValue>(InElt)) @@ -600,9 +820,9 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(LLVMContext &Context, if (Elt >= SrcNumElts*2) InElt = UndefValue::get(EltTy); else if (Elt >= SrcNumElts) - InElt = GetVectorElement(Context, V2, Elt - SrcNumElts); + InElt = GetVectorElement(V2, Elt - SrcNumElts); else - InElt = GetVectorElement(Context, V1, Elt); + InElt = GetVectorElement(V1, Elt); if (InElt == 0) return 0; } else { // Unknown value. @@ -614,8 +834,7 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(LLVMContext &Context, return ConstantVector::get(&Result[0], Result.size()); } -Constant *llvm::ConstantFoldExtractValueInstruction(LLVMContext &Context, - Constant *Agg, +Constant *llvm::ConstantFoldExtractValueInstruction(Constant *Agg, const unsigned *Idxs, unsigned NumIdx) { // Base case: no indices, so return the entire value. @@ -635,19 +854,18 @@ Constant *llvm::ConstantFoldExtractValueInstruction(LLVMContext &Context, // Otherwise recurse. if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Agg)) - return ConstantFoldExtractValueInstruction(Context, CS->getOperand(*Idxs), + return ConstantFoldExtractValueInstruction(CS->getOperand(*Idxs), Idxs+1, NumIdx-1); if (ConstantArray *CA = dyn_cast<ConstantArray>(Agg)) - return ConstantFoldExtractValueInstruction(Context, CA->getOperand(*Idxs), + return ConstantFoldExtractValueInstruction(CA->getOperand(*Idxs), Idxs+1, NumIdx-1); ConstantVector *CV = cast<ConstantVector>(Agg); - return ConstantFoldExtractValueInstruction(Context, CV->getOperand(*Idxs), + return ConstantFoldExtractValueInstruction(CV->getOperand(*Idxs), Idxs+1, NumIdx-1); } -Constant *llvm::ConstantFoldInsertValueInstruction(LLVMContext &Context, - Constant *Agg, +Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, const unsigned *Idxs, unsigned NumIdx) { @@ -667,6 +885,8 @@ Constant *llvm::ConstantFoldInsertValueInstruction(LLVMContext &Context, unsigned numOps; if (const ArrayType *AR = dyn_cast<ArrayType>(AggTy)) numOps = AR->getNumElements(); + else if (isa<UnionType>(AggTy)) + numOps = 1; else numOps = cast<StructType>(AggTy)->getNumElements(); @@ -675,14 +895,18 @@ Constant *llvm::ConstantFoldInsertValueInstruction(LLVMContext &Context, const Type *MemberTy = AggTy->getTypeAtIndex(i); Constant *Op = (*Idxs == i) ? - ConstantFoldInsertValueInstruction(Context, UndefValue::get(MemberTy), + ConstantFoldInsertValueInstruction(UndefValue::get(MemberTy), Val, Idxs+1, NumIdx-1) : UndefValue::get(MemberTy); Ops[i] = Op; } if (const StructType* ST = dyn_cast<StructType>(AggTy)) - return ConstantStruct::get(Context, Ops, ST->isPacked()); + return ConstantStruct::get(ST->getContext(), Ops, ST->isPacked()); + if (const UnionType* UT = dyn_cast<UnionType>(AggTy)) { + assert(Ops.size() == 1 && "Union can only contain a single value!"); + return ConstantUnion::get(UT, Ops[0]); + } return ConstantArray::get(cast<ArrayType>(AggTy), Ops); } @@ -706,15 +930,14 @@ Constant *llvm::ConstantFoldInsertValueInstruction(LLVMContext &Context, const Type *MemberTy = AggTy->getTypeAtIndex(i); Constant *Op = (*Idxs == i) ? - ConstantFoldInsertValueInstruction(Context, - Constant::getNullValue(MemberTy), + ConstantFoldInsertValueInstruction(Constant::getNullValue(MemberTy), Val, Idxs+1, NumIdx-1) : Constant::getNullValue(MemberTy); Ops[i] = Op; } - if (const StructType* ST = dyn_cast<StructType>(AggTy)) - return ConstantStruct::get(Context, Ops, ST->isPacked()); + if (const StructType *ST = dyn_cast<StructType>(AggTy)) + return ConstantStruct::get(ST->getContext(), Ops, ST->isPacked()); return ConstantArray::get(cast<ArrayType>(AggTy), Ops); } @@ -724,13 +947,12 @@ Constant *llvm::ConstantFoldInsertValueInstruction(LLVMContext &Context, for (unsigned i = 0; i < Agg->getNumOperands(); ++i) { Constant *Op = cast<Constant>(Agg->getOperand(i)); if (*Idxs == i) - Op = ConstantFoldInsertValueInstruction(Context, Op, - Val, Idxs+1, NumIdx-1); + Op = ConstantFoldInsertValueInstruction(Op, Val, Idxs+1, NumIdx-1); Ops[i] = Op; } if (const StructType* ST = dyn_cast<StructType>(Agg->getType())) - return ConstantStruct::get(Context, Ops, ST->isPacked()); + return ConstantStruct::get(ST->getContext(), Ops, ST->isPacked()); return ConstantArray::get(cast<ArrayType>(Agg->getType()), Ops); } @@ -738,8 +960,7 @@ Constant *llvm::ConstantFoldInsertValueInstruction(LLVMContext &Context, } -Constant *llvm::ConstantFoldBinaryInstruction(LLVMContext &Context, - unsigned Opcode, +Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, Constant *C2) { // No compile-time operations on this type yet. if (C1->getType()->isPPC_FP128Ty()) @@ -896,51 +1117,51 @@ Constant *llvm::ConstantFoldBinaryInstruction(LLVMContext &Context, default: break; case Instruction::Add: - return ConstantInt::get(Context, C1V + C2V); + return ConstantInt::get(CI1->getContext(), C1V + C2V); case Instruction::Sub: - return ConstantInt::get(Context, C1V - C2V); + return ConstantInt::get(CI1->getContext(), C1V - C2V); case Instruction::Mul: - return ConstantInt::get(Context, C1V * C2V); + return ConstantInt::get(CI1->getContext(), C1V * C2V); case Instruction::UDiv: assert(!CI2->isNullValue() && "Div by zero handled above"); - return ConstantInt::get(Context, C1V.udiv(C2V)); + return ConstantInt::get(CI1->getContext(), C1V.udiv(C2V)); case Instruction::SDiv: assert(!CI2->isNullValue() && "Div by zero handled above"); if (C2V.isAllOnesValue() && C1V.isMinSignedValue()) return UndefValue::get(CI1->getType()); // MIN_INT / -1 -> undef - return ConstantInt::get(Context, C1V.sdiv(C2V)); + return ConstantInt::get(CI1->getContext(), C1V.sdiv(C2V)); case Instruction::URem: assert(!CI2->isNullValue() && "Div by zero handled above"); - return ConstantInt::get(Context, C1V.urem(C2V)); + return ConstantInt::get(CI1->getContext(), C1V.urem(C2V)); case Instruction::SRem: assert(!CI2->isNullValue() && "Div by zero handled above"); if (C2V.isAllOnesValue() && C1V.isMinSignedValue()) return UndefValue::get(CI1->getType()); // MIN_INT % -1 -> undef - return ConstantInt::get(Context, C1V.srem(C2V)); + return ConstantInt::get(CI1->getContext(), C1V.srem(C2V)); case Instruction::And: - return ConstantInt::get(Context, C1V & C2V); + return ConstantInt::get(CI1->getContext(), C1V & C2V); case Instruction::Or: - return ConstantInt::get(Context, C1V | C2V); + return ConstantInt::get(CI1->getContext(), C1V | C2V); case Instruction::Xor: - return ConstantInt::get(Context, C1V ^ C2V); + return ConstantInt::get(CI1->getContext(), C1V ^ C2V); case Instruction::Shl: { uint32_t shiftAmt = C2V.getZExtValue(); if (shiftAmt < C1V.getBitWidth()) - return ConstantInt::get(Context, C1V.shl(shiftAmt)); + return ConstantInt::get(CI1->getContext(), C1V.shl(shiftAmt)); else return UndefValue::get(C1->getType()); // too big shift is undef } case Instruction::LShr: { uint32_t shiftAmt = C2V.getZExtValue(); if (shiftAmt < C1V.getBitWidth()) - return ConstantInt::get(Context, C1V.lshr(shiftAmt)); + return ConstantInt::get(CI1->getContext(), C1V.lshr(shiftAmt)); else return UndefValue::get(C1->getType()); // too big shift is undef } case Instruction::AShr: { uint32_t shiftAmt = C2V.getZExtValue(); if (shiftAmt < C1V.getBitWidth()) - return ConstantInt::get(Context, C1V.ashr(shiftAmt)); + return ConstantInt::get(CI1->getContext(), C1V.ashr(shiftAmt)); else return UndefValue::get(C1->getType()); // too big shift is undef } @@ -970,19 +1191,19 @@ Constant *llvm::ConstantFoldBinaryInstruction(LLVMContext &Context, break; case Instruction::FAdd: (void)C3V.add(C2V, APFloat::rmNearestTiesToEven); - return ConstantFP::get(Context, C3V); + return ConstantFP::get(C1->getContext(), C3V); case Instruction::FSub: (void)C3V.subtract(C2V, APFloat::rmNearestTiesToEven); - return ConstantFP::get(Context, C3V); + return ConstantFP::get(C1->getContext(), C3V); case Instruction::FMul: (void)C3V.multiply(C2V, APFloat::rmNearestTiesToEven); - return ConstantFP::get(Context, C3V); + return ConstantFP::get(C1->getContext(), C3V); case Instruction::FDiv: (void)C3V.divide(C2V, APFloat::rmNearestTiesToEven); - return ConstantFP::get(Context, C3V); + return ConstantFP::get(C1->getContext(), C3V); case Instruction::FRem: (void)C3V.mod(C2V, APFloat::rmNearestTiesToEven); - return ConstantFP::get(Context, C3V); + return ConstantFP::get(C1->getContext(), C3V); } } } else if (const VectorType *VTy = dyn_cast<VectorType>(C1->getType())) { @@ -1127,10 +1348,19 @@ Constant *llvm::ConstantFoldBinaryInstruction(LLVMContext &Context, } } - if (isa<ConstantExpr>(C1)) { + if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) { // There are many possible foldings we could do here. We should probably // at least fold add of a pointer with an integer into the appropriate // getelementptr. This will improve alias analysis a bit. + + // Given ((a + b) + c), if (b + c) folds to something interesting, return + // (a + (b + c)). + if (Instruction::isAssociative(Opcode, C1->getType()) && + CE1->getOpcode() == Opcode) { + Constant *T = ConstantExpr::get(Opcode, CE1->getOperand(1), C2); + if (!isa<ConstantExpr>(T) || cast<ConstantExpr>(T)->getOpcode() != Opcode) + return ConstantExpr::get(Opcode, CE1->getOperand(0), T); + } } else if (isa<ConstantExpr>(C2)) { // If C2 is a constant expr and C1 isn't, flop them around and fold the // other way if possible. @@ -1143,7 +1373,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(LLVMContext &Context, case Instruction::Or: case Instruction::Xor: // No change of opcode required. - return ConstantFoldBinaryInstruction(Context, Opcode, C2, C1); + return ConstantFoldBinaryInstruction(Opcode, C2, C1); case Instruction::Shl: case Instruction::LShr: @@ -1162,7 +1392,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(LLVMContext &Context, } // i1 can be simplified in many cases. - if (C1->getType()->isInteger(1)) { + if (C1->getType()->isIntegerTy(1)) { switch (Opcode) { case Instruction::Add: case Instruction::Sub: @@ -1184,7 +1414,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(LLVMContext &Context, case Instruction::SRem: // We can assume that C2 == 1. If it were zero the result would be // undefined through division by zero. - return ConstantInt::getFalse(Context); + return ConstantInt::getFalse(C1->getContext()); default: break; } @@ -1218,8 +1448,7 @@ static bool isMaybeZeroSizedType(const Type *Ty) { /// first is less than the second, return -1, if the second is less than the /// first, return 1. If the constants are not integral, return -2. /// -static int IdxCompare(LLVMContext &Context, Constant *C1, Constant *C2, - const Type *ElTy) { +static int IdxCompare(Constant *C1, Constant *C2, const Type *ElTy) { if (C1 == C2) return 0; // Ok, we found a different index. If they are not ConstantInt, we can't do @@ -1229,11 +1458,11 @@ static int IdxCompare(LLVMContext &Context, Constant *C1, Constant *C2, // Ok, we have two differing integer indices. Sign extend them to be the same // type. Long is always big enough, so we use it. - if (!C1->getType()->isInteger(64)) - C1 = ConstantExpr::getSExt(C1, Type::getInt64Ty(Context)); + if (!C1->getType()->isIntegerTy(64)) + C1 = ConstantExpr::getSExt(C1, Type::getInt64Ty(C1->getContext())); - if (!C2->getType()->isInteger(64)) - C2 = ConstantExpr::getSExt(C2, Type::getInt64Ty(Context)); + if (!C2->getType()->isIntegerTy(64)) + C2 = ConstantExpr::getSExt(C2, Type::getInt64Ty(C1->getContext())); if (C1 == C2) return 0; // They are equal @@ -1262,8 +1491,7 @@ static int IdxCompare(LLVMContext &Context, Constant *C1, Constant *C2, /// To simplify this code we canonicalize the relation so that the first /// operand is always the most "complex" of the two. We consider ConstantFP /// to be the simplest, and ConstantExprs to be the most complex. -static FCmpInst::Predicate evaluateFCmpRelation(LLVMContext &Context, - Constant *V1, Constant *V2) { +static FCmpInst::Predicate evaluateFCmpRelation(Constant *V1, Constant *V2) { assert(V1->getType() == V2->getType() && "Cannot compare values of different types!"); @@ -1296,7 +1524,7 @@ static FCmpInst::Predicate evaluateFCmpRelation(LLVMContext &Context, } // If the first operand is simple and second is ConstantExpr, swap operands. - FCmpInst::Predicate SwappedRelation = evaluateFCmpRelation(Context, V2, V1); + FCmpInst::Predicate SwappedRelation = evaluateFCmpRelation(V2, V1); if (SwappedRelation != FCmpInst::BAD_FCMP_PREDICATE) return FCmpInst::getSwappedPredicate(SwappedRelation); } else { @@ -1331,16 +1559,16 @@ static FCmpInst::Predicate evaluateFCmpRelation(LLVMContext &Context, /// constants (like ConstantInt) to be the simplest, followed by /// GlobalValues, followed by ConstantExpr's (the most complex). /// -static ICmpInst::Predicate evaluateICmpRelation(LLVMContext &Context, - Constant *V1, - Constant *V2, +static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2, bool isSigned) { assert(V1->getType() == V2->getType() && "Cannot compare different types of values!"); if (V1 == V2) return ICmpInst::ICMP_EQ; - if (!isa<ConstantExpr>(V1) && !isa<GlobalValue>(V1)) { - if (!isa<GlobalValue>(V2) && !isa<ConstantExpr>(V2)) { + if (!isa<ConstantExpr>(V1) && !isa<GlobalValue>(V1) && + !isa<BlockAddress>(V1)) { + if (!isa<GlobalValue>(V2) && !isa<ConstantExpr>(V2) && + !isa<BlockAddress>(V2)) { // We distilled this down to a simple case, use the standard constant // folder. ConstantInt *R = 0; @@ -1363,36 +1591,63 @@ static ICmpInst::Predicate evaluateICmpRelation(LLVMContext &Context, // If the first operand is simple, swap operands. ICmpInst::Predicate SwappedRelation = - evaluateICmpRelation(Context, V2, V1, isSigned); + evaluateICmpRelation(V2, V1, isSigned); if (SwappedRelation != ICmpInst::BAD_ICMP_PREDICATE) return ICmpInst::getSwappedPredicate(SwappedRelation); - } else if (const GlobalValue *CPR1 = dyn_cast<GlobalValue>(V1)) { + } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(V1)) { if (isa<ConstantExpr>(V2)) { // Swap as necessary. ICmpInst::Predicate SwappedRelation = - evaluateICmpRelation(Context, V2, V1, isSigned); + evaluateICmpRelation(V2, V1, isSigned); if (SwappedRelation != ICmpInst::BAD_ICMP_PREDICATE) return ICmpInst::getSwappedPredicate(SwappedRelation); - else - return ICmpInst::BAD_ICMP_PREDICATE; + return ICmpInst::BAD_ICMP_PREDICATE; } - // Now we know that the RHS is a GlobalValue or simple constant, - // which (since the types must match) means that it's a ConstantPointerNull. - if (const GlobalValue *CPR2 = dyn_cast<GlobalValue>(V2)) { + // Now we know that the RHS is a GlobalValue, BlockAddress or simple + // constant (which, since the types must match, means that it's a + // ConstantPointerNull). + if (const GlobalValue *GV2 = dyn_cast<GlobalValue>(V2)) { // Don't try to decide equality of aliases. - if (!isa<GlobalAlias>(CPR1) && !isa<GlobalAlias>(CPR2)) - if (!CPR1->hasExternalWeakLinkage() || !CPR2->hasExternalWeakLinkage()) + if (!isa<GlobalAlias>(GV) && !isa<GlobalAlias>(GV2)) + if (!GV->hasExternalWeakLinkage() || !GV2->hasExternalWeakLinkage()) return ICmpInst::ICMP_NE; + } else if (isa<BlockAddress>(V2)) { + return ICmpInst::ICMP_NE; // Globals never equal labels. } else { assert(isa<ConstantPointerNull>(V2) && "Canonicalization guarantee!"); - // GlobalVals can never be null. Don't try to evaluate aliases. - if (!CPR1->hasExternalWeakLinkage() && !isa<GlobalAlias>(CPR1)) + // GlobalVals can never be null unless they have external weak linkage. + // We don't try to evaluate aliases here. + if (!GV->hasExternalWeakLinkage() && !isa<GlobalAlias>(GV)) return ICmpInst::ICMP_NE; } + } else if (const BlockAddress *BA = dyn_cast<BlockAddress>(V1)) { + if (isa<ConstantExpr>(V2)) { // Swap as necessary. + ICmpInst::Predicate SwappedRelation = + evaluateICmpRelation(V2, V1, isSigned); + if (SwappedRelation != ICmpInst::BAD_ICMP_PREDICATE) + return ICmpInst::getSwappedPredicate(SwappedRelation); + return ICmpInst::BAD_ICMP_PREDICATE; + } + + // Now we know that the RHS is a GlobalValue, BlockAddress or simple + // constant (which, since the types must match, means that it is a + // ConstantPointerNull). + if (const BlockAddress *BA2 = dyn_cast<BlockAddress>(V2)) { + // Block address in another function can't equal this one, but block + // addresses in the current function might be the same if blocks are + // empty. + if (BA2->getFunction() != BA->getFunction()) + return ICmpInst::ICMP_NE; + } else { + // Block addresses aren't null, don't equal the address of globals. + assert((isa<ConstantPointerNull>(V2) || isa<GlobalValue>(V2)) && + "Canonicalization guarantee!"); + return ICmpInst::ICMP_NE; + } } else { // Ok, the LHS is known to be a constantexpr. The RHS can be any of a - // constantexpr, a CPR, or a simple constant. + // constantexpr, a global, block address, or a simple constant. ConstantExpr *CE1 = cast<ConstantExpr>(V1); Constant *CE1Op0 = CE1->getOperand(0); @@ -1412,10 +1667,10 @@ static ICmpInst::Predicate evaluateICmpRelation(LLVMContext &Context, // If the cast is not actually changing bits, and the second operand is a // null pointer, do the comparison with the pre-casted value. if (V2->isNullValue() && - (isa<PointerType>(CE1->getType()) || CE1->getType()->isInteger())) { + (isa<PointerType>(CE1->getType()) || CE1->getType()->isIntegerTy())) { if (CE1->getOpcode() == Instruction::ZExt) isSigned = false; if (CE1->getOpcode() == Instruction::SExt) isSigned = true; - return evaluateICmpRelation(Context, CE1Op0, + return evaluateICmpRelation(CE1Op0, Constant::getNullValue(CE1Op0->getType()), isSigned); } @@ -1447,9 +1702,9 @@ static ICmpInst::Predicate evaluateICmpRelation(LLVMContext &Context, return ICmpInst::ICMP_EQ; } // Otherwise, we can't really say if the first operand is null or not. - } else if (const GlobalValue *CPR2 = dyn_cast<GlobalValue>(V2)) { + } else if (const GlobalValue *GV2 = dyn_cast<GlobalValue>(V2)) { if (isa<ConstantPointerNull>(CE1Op0)) { - if (CPR2->hasExternalWeakLinkage()) + if (GV2->hasExternalWeakLinkage()) // Weak linkage GVals could be zero or not. We're comparing it to // a null pointer, so its less-or-equal return isSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; @@ -1457,8 +1712,8 @@ static ICmpInst::Predicate evaluateICmpRelation(LLVMContext &Context, // If its not weak linkage, the GVal must have a non-zero address // so the result is less-than return isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; - } else if (const GlobalValue *CPR1 = dyn_cast<GlobalValue>(CE1Op0)) { - if (CPR1 == CPR2) { + } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(CE1Op0)) { + if (GV == GV2) { // If this is a getelementptr of the same global, then it must be // different. Because the types must match, the getelementptr could // only have at most one index, and because we fold getelementptr's @@ -1504,7 +1759,7 @@ static ICmpInst::Predicate evaluateICmpRelation(LLVMContext &Context, gep_type_iterator GTI = gep_type_begin(CE1); for (;i != CE1->getNumOperands() && i != CE2->getNumOperands(); ++i, ++GTI) - switch (IdxCompare(Context, CE1->getOperand(i), + switch (IdxCompare(CE1->getOperand(i), CE2->getOperand(i), GTI.getIndexedType())) { case -1: return isSigned ? ICmpInst::ICMP_SLT:ICmpInst::ICMP_ULT; case 1: return isSigned ? ICmpInst::ICMP_SGT:ICmpInst::ICMP_UGT; @@ -1540,14 +1795,14 @@ static ICmpInst::Predicate evaluateICmpRelation(LLVMContext &Context, return ICmpInst::BAD_ICMP_PREDICATE; } -Constant *llvm::ConstantFoldCompareInstruction(LLVMContext &Context, - unsigned short pred, +Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, Constant *C1, Constant *C2) { const Type *ResultTy; if (const VectorType *VT = dyn_cast<VectorType>(C1->getType())) - ResultTy = VectorType::get(Type::getInt1Ty(Context), VT->getNumElements()); + ResultTy = VectorType::get(Type::getInt1Ty(C1->getContext()), + VT->getNumElements()); else - ResultTy = Type::getInt1Ty(Context); + ResultTy = Type::getInt1Ty(C1->getContext()); // Fold FCMP_FALSE/FCMP_TRUE unconditionally. if (pred == FCmpInst::FCMP_FALSE) @@ -1570,9 +1825,9 @@ Constant *llvm::ConstantFoldCompareInstruction(LLVMContext &Context, // Don't try to evaluate aliases. External weak GV can be null. if (!isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage()) { if (pred == ICmpInst::ICMP_EQ) - return ConstantInt::getFalse(Context); + return ConstantInt::getFalse(C1->getContext()); else if (pred == ICmpInst::ICMP_NE) - return ConstantInt::getTrue(Context); + return ConstantInt::getTrue(C1->getContext()); } // icmp eq/ne(GV,null) -> false/true } else if (C2->isNullValue()) { @@ -1580,14 +1835,14 @@ Constant *llvm::ConstantFoldCompareInstruction(LLVMContext &Context, // Don't try to evaluate aliases. External weak GV can be null. if (!isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage()) { if (pred == ICmpInst::ICMP_EQ) - return ConstantInt::getFalse(Context); + return ConstantInt::getFalse(C1->getContext()); else if (pred == ICmpInst::ICMP_NE) - return ConstantInt::getTrue(Context); + return ConstantInt::getTrue(C1->getContext()); } } // If the comparison is a comparison between two i1's, simplify it. - if (C1->getType()->isInteger(1)) { + if (C1->getType()->isIntegerTy(1)) { switch(pred) { case ICmpInst::ICMP_EQ: if (isa<ConstantInt>(C2)) @@ -1605,26 +1860,16 @@ Constant *llvm::ConstantFoldCompareInstruction(LLVMContext &Context, APInt V2 = cast<ConstantInt>(C2)->getValue(); switch (pred) { default: llvm_unreachable("Invalid ICmp Predicate"); return 0; - case ICmpInst::ICMP_EQ: - return ConstantInt::get(Type::getInt1Ty(Context), V1 == V2); - case ICmpInst::ICMP_NE: - return ConstantInt::get(Type::getInt1Ty(Context), V1 != V2); - case ICmpInst::ICMP_SLT: - return ConstantInt::get(Type::getInt1Ty(Context), V1.slt(V2)); - case ICmpInst::ICMP_SGT: - return ConstantInt::get(Type::getInt1Ty(Context), V1.sgt(V2)); - case ICmpInst::ICMP_SLE: - return ConstantInt::get(Type::getInt1Ty(Context), V1.sle(V2)); - case ICmpInst::ICMP_SGE: - return ConstantInt::get(Type::getInt1Ty(Context), V1.sge(V2)); - case ICmpInst::ICMP_ULT: - return ConstantInt::get(Type::getInt1Ty(Context), V1.ult(V2)); - case ICmpInst::ICMP_UGT: - return ConstantInt::get(Type::getInt1Ty(Context), V1.ugt(V2)); - case ICmpInst::ICMP_ULE: - return ConstantInt::get(Type::getInt1Ty(Context), V1.ule(V2)); - case ICmpInst::ICMP_UGE: - return ConstantInt::get(Type::getInt1Ty(Context), V1.uge(V2)); + case ICmpInst::ICMP_EQ: return ConstantInt::get(ResultTy, V1 == V2); + case ICmpInst::ICMP_NE: return ConstantInt::get(ResultTy, V1 != V2); + case ICmpInst::ICMP_SLT: return ConstantInt::get(ResultTy, V1.slt(V2)); + case ICmpInst::ICMP_SGT: return ConstantInt::get(ResultTy, V1.sgt(V2)); + case ICmpInst::ICMP_SLE: return ConstantInt::get(ResultTy, V1.sle(V2)); + case ICmpInst::ICMP_SGE: return ConstantInt::get(ResultTy, V1.sge(V2)); + case ICmpInst::ICMP_ULT: return ConstantInt::get(ResultTy, V1.ult(V2)); + case ICmpInst::ICMP_UGT: return ConstantInt::get(ResultTy, V1.ugt(V2)); + case ICmpInst::ICMP_ULE: return ConstantInt::get(ResultTy, V1.ule(V2)); + case ICmpInst::ICMP_UGE: return ConstantInt::get(ResultTy, V1.uge(V2)); } } else if (isa<ConstantFP>(C1) && isa<ConstantFP>(C2)) { APFloat C1V = cast<ConstantFP>(C1)->getValueAPF(); @@ -1632,47 +1877,47 @@ Constant *llvm::ConstantFoldCompareInstruction(LLVMContext &Context, APFloat::cmpResult R = C1V.compare(C2V); switch (pred) { default: llvm_unreachable("Invalid FCmp Predicate"); return 0; - case FCmpInst::FCMP_FALSE: return ConstantInt::getFalse(Context); - case FCmpInst::FCMP_TRUE: return ConstantInt::getTrue(Context); + case FCmpInst::FCMP_FALSE: return Constant::getNullValue(ResultTy); + case FCmpInst::FCMP_TRUE: return Constant::getAllOnesValue(ResultTy); case FCmpInst::FCMP_UNO: - return ConstantInt::get(Type::getInt1Ty(Context), R==APFloat::cmpUnordered); + return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered); case FCmpInst::FCMP_ORD: - return ConstantInt::get(Type::getInt1Ty(Context), R!=APFloat::cmpUnordered); + return ConstantInt::get(ResultTy, R!=APFloat::cmpUnordered); case FCmpInst::FCMP_UEQ: - return ConstantInt::get(Type::getInt1Ty(Context), R==APFloat::cmpUnordered || - R==APFloat::cmpEqual); + return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered || + R==APFloat::cmpEqual); case FCmpInst::FCMP_OEQ: - return ConstantInt::get(Type::getInt1Ty(Context), R==APFloat::cmpEqual); + return ConstantInt::get(ResultTy, R==APFloat::cmpEqual); case FCmpInst::FCMP_UNE: - return ConstantInt::get(Type::getInt1Ty(Context), R!=APFloat::cmpEqual); + return ConstantInt::get(ResultTy, R!=APFloat::cmpEqual); case FCmpInst::FCMP_ONE: - return ConstantInt::get(Type::getInt1Ty(Context), R==APFloat::cmpLessThan || - R==APFloat::cmpGreaterThan); + return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan || + R==APFloat::cmpGreaterThan); case FCmpInst::FCMP_ULT: - return ConstantInt::get(Type::getInt1Ty(Context), R==APFloat::cmpUnordered || - R==APFloat::cmpLessThan); + return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered || + R==APFloat::cmpLessThan); case FCmpInst::FCMP_OLT: - return ConstantInt::get(Type::getInt1Ty(Context), R==APFloat::cmpLessThan); + return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan); case FCmpInst::FCMP_UGT: - return ConstantInt::get(Type::getInt1Ty(Context), R==APFloat::cmpUnordered || - R==APFloat::cmpGreaterThan); + return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered || + R==APFloat::cmpGreaterThan); case FCmpInst::FCMP_OGT: - return ConstantInt::get(Type::getInt1Ty(Context), R==APFloat::cmpGreaterThan); + return ConstantInt::get(ResultTy, R==APFloat::cmpGreaterThan); case FCmpInst::FCMP_ULE: - return ConstantInt::get(Type::getInt1Ty(Context), R!=APFloat::cmpGreaterThan); + return ConstantInt::get(ResultTy, R!=APFloat::cmpGreaterThan); case FCmpInst::FCMP_OLE: - return ConstantInt::get(Type::getInt1Ty(Context), R==APFloat::cmpLessThan || - R==APFloat::cmpEqual); + return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan || + R==APFloat::cmpEqual); case FCmpInst::FCMP_UGE: - return ConstantInt::get(Type::getInt1Ty(Context), R!=APFloat::cmpLessThan); + return ConstantInt::get(ResultTy, R!=APFloat::cmpLessThan); case FCmpInst::FCMP_OGE: - return ConstantInt::get(Type::getInt1Ty(Context), R==APFloat::cmpGreaterThan || - R==APFloat::cmpEqual); + return ConstantInt::get(ResultTy, R==APFloat::cmpGreaterThan || + R==APFloat::cmpEqual); } } else if (isa<VectorType>(C1->getType())) { SmallVector<Constant*, 16> C1Elts, C2Elts; - C1->getVectorElements(Context, C1Elts); - C2->getVectorElements(Context, C2Elts); + C1->getVectorElements(C1Elts); + C2->getVectorElements(C2Elts); if (C1Elts.empty() || C2Elts.empty()) return 0; @@ -1686,9 +1931,9 @@ Constant *llvm::ConstantFoldCompareInstruction(LLVMContext &Context, return ConstantVector::get(&ResElts[0], ResElts.size()); } - if (C1->getType()->isFloatingPoint()) { + if (C1->getType()->isFloatingPointTy()) { int Result = -1; // -1 = unknown, 0 = known false, 1 = known true. - switch (evaluateFCmpRelation(Context, C1, C2)) { + switch (evaluateFCmpRelation(C1, C2)) { default: llvm_unreachable("Unknown relation!"); case FCmpInst::FCMP_UNO: case FCmpInst::FCMP_ORD: @@ -1742,12 +1987,12 @@ Constant *llvm::ConstantFoldCompareInstruction(LLVMContext &Context, // If we evaluated the result, return it now. if (Result != -1) - return ConstantInt::get(Type::getInt1Ty(Context), Result); + return ConstantInt::get(ResultTy, Result); } else { // Evaluate the relation between the two constants, per the predicate. int Result = -1; // -1 = unknown, 0 = known false, 1 = known true. - switch (evaluateICmpRelation(Context, C1, C2, CmpInst::isSigned(pred))) { + switch (evaluateICmpRelation(C1, C2, CmpInst::isSigned(pred))) { default: llvm_unreachable("Unknown relational!"); case ICmpInst::BAD_ICMP_PREDICATE: break; // Couldn't determine anything about these constants. @@ -1812,13 +2057,15 @@ Constant *llvm::ConstantFoldCompareInstruction(LLVMContext &Context, // If we evaluated the result, return it now. if (Result != -1) - return ConstantInt::get(Type::getInt1Ty(Context), Result); + return ConstantInt::get(ResultTy, Result); // If the right hand side is a bitcast, try using its inverse to simplify - // it by moving it to the left hand side. + // it by moving it to the left hand side. We can't do this if it would turn + // a vector compare into a scalar compare or visa versa. if (ConstantExpr *CE2 = dyn_cast<ConstantExpr>(C2)) { - if (CE2->getOpcode() == Instruction::BitCast) { - Constant *CE2Op0 = CE2->getOperand(0); + Constant *CE2Op0 = CE2->getOperand(0); + if (CE2->getOpcode() == Instruction::BitCast && + isa<VectorType>(CE2->getType())==isa<VectorType>(CE2Op0->getType())) { Constant *Inverse = ConstantExpr::getBitCast(C1, CE2Op0->getType()); return ConstantExpr::getICmp(pred, Inverse, CE2Op0); } @@ -1890,8 +2137,7 @@ static bool isInBoundsIndices(Constant *const *Idxs, size_t NumIdx) { return true; } -Constant *llvm::ConstantFoldGetElementPtr(LLVMContext &Context, - Constant *C, +Constant *llvm::ConstantFoldGetElementPtr(Constant *C, bool inBounds, Constant* const *Idxs, unsigned NumIdx) { @@ -1951,10 +2197,9 @@ Constant *llvm::ConstantFoldGetElementPtr(LLVMContext &Context, if (!Idx0->isNullValue()) { const Type *IdxTy = Combined->getType(); if (IdxTy != Idx0->getType()) { - Constant *C1 = - ConstantExpr::getSExtOrBitCast(Idx0, Type::getInt64Ty(Context)); - Constant *C2 = ConstantExpr::getSExtOrBitCast(Combined, - Type::getInt64Ty(Context)); + const Type *Int64Ty = Type::getInt64Ty(IdxTy->getContext()); + Constant *C1 = ConstantExpr::getSExtOrBitCast(Idx0, Int64Ty); + Constant *C2 = ConstantExpr::getSExtOrBitCast(Combined, Int64Ty); Combined = ConstantExpr::get(Instruction::Add, C1, C2); } else { Combined = @@ -1975,7 +2220,7 @@ Constant *llvm::ConstantFoldGetElementPtr(LLVMContext &Context, } // Implement folding of: - // int* getelementptr ([2 x int]* cast ([3 x int]* %X to [2 x int]*), + // int* getelementptr ([2 x int]* bitcast ([3 x int]* %X to [2 x int]*), // long 0, long 0) // To: int* getelementptr ([3 x int]* %X, long 0, long 0) // @@ -1992,28 +2237,6 @@ Constant *llvm::ConstantFoldGetElementPtr(LLVMContext &Context, ConstantExpr::getGetElementPtr( (Constant*)CE->getOperand(0), Idxs, NumIdx); } - - // Fold: getelementptr (i8* inttoptr (i64 1 to i8*), i32 -1) - // Into: inttoptr (i64 0 to i8*) - // This happens with pointers to member functions in C++. - if (CE->getOpcode() == Instruction::IntToPtr && NumIdx == 1 && - isa<ConstantInt>(CE->getOperand(0)) && isa<ConstantInt>(Idxs[0]) && - cast<PointerType>(CE->getType())->getElementType() == - Type::getInt8Ty(Context)) { - Constant *Base = CE->getOperand(0); - Constant *Offset = Idxs[0]; - - // Convert the smaller integer to the larger type. - if (Offset->getType()->getPrimitiveSizeInBits() < - Base->getType()->getPrimitiveSizeInBits()) - Offset = ConstantExpr::getSExt(Offset, Base->getType()); - else if (Base->getType()->getPrimitiveSizeInBits() < - Offset->getType()->getPrimitiveSizeInBits()) - Base = ConstantExpr::getZExt(Base, Offset->getType()); - - Base = ConstantExpr::getAdd(Base, Offset); - return ConstantExpr::getIntToPtr(Base, CE->getType()); - } } // Check to see if any array indices are not within the corresponding @@ -2043,12 +2266,12 @@ Constant *llvm::ConstantFoldGetElementPtr(LLVMContext &Context, // Before adding, extend both operands to i64 to avoid // overflow trouble. - if (!PrevIdx->getType()->isInteger(64)) + if (!PrevIdx->getType()->isIntegerTy(64)) PrevIdx = ConstantExpr::getSExt(PrevIdx, - Type::getInt64Ty(Context)); - if (!Div->getType()->isInteger(64)) + Type::getInt64Ty(Div->getContext())); + if (!Div->getType()->isIntegerTy(64)) Div = ConstantExpr::getSExt(Div, - Type::getInt64Ty(Context)); + Type::getInt64Ty(Div->getContext())); NewIdxs[i-1] = ConstantExpr::getAdd(PrevIdx, Div); } else { diff --git a/lib/VMCore/ConstantFold.h b/lib/VMCore/ConstantFold.h index cc97001..d2dbbdd 100644 --- a/lib/VMCore/ConstantFold.h +++ b/lib/VMCore/ConstantFold.h @@ -23,46 +23,31 @@ namespace llvm { class Value; class Constant; class Type; - class LLVMContext; // Constant fold various types of instruction... Constant *ConstantFoldCastInstruction( - LLVMContext &Context, unsigned opcode, ///< The opcode of the cast Constant *V, ///< The source constant const Type *DestTy ///< The destination type ); - Constant *ConstantFoldSelectInstruction(LLVMContext &Context, - Constant *Cond, + Constant *ConstantFoldSelectInstruction(Constant *Cond, Constant *V1, Constant *V2); - Constant *ConstantFoldExtractElementInstruction(LLVMContext &Context, - Constant *Val, - Constant *Idx); - Constant *ConstantFoldInsertElementInstruction(LLVMContext &Context, - Constant *Val, - Constant *Elt, + Constant *ConstantFoldExtractElementInstruction(Constant *Val, Constant *Idx); + Constant *ConstantFoldInsertElementInstruction(Constant *Val, Constant *Elt, Constant *Idx); - Constant *ConstantFoldShuffleVectorInstruction(LLVMContext &Context, - Constant *V1, - Constant *V2, + Constant *ConstantFoldShuffleVectorInstruction(Constant *V1, Constant *V2, Constant *Mask); - Constant *ConstantFoldExtractValueInstruction(LLVMContext &Context, - Constant *Agg, + Constant *ConstantFoldExtractValueInstruction(Constant *Agg, const unsigned *Idxs, unsigned NumIdx); - Constant *ConstantFoldInsertValueInstruction(LLVMContext &Context, - Constant *Agg, - Constant *Val, + Constant *ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, const unsigned *Idxs, unsigned NumIdx); - Constant *ConstantFoldBinaryInstruction(LLVMContext &Context, - unsigned Opcode, Constant *V1, + Constant *ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1, Constant *V2); - Constant *ConstantFoldCompareInstruction(LLVMContext &Context, - unsigned short predicate, + Constant *ConstantFoldCompareInstruction(unsigned short predicate, Constant *C1, Constant *C2); - Constant *ConstantFoldGetElementPtr(LLVMContext &Context, Constant *C, - bool inBounds, + Constant *ConstantFoldGetElementPtr(Constant *C, bool inBounds, Constant* const *Idxs, unsigned NumIdx); } // End llvm namespace diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp index 916aac6..98040ea 100644 --- a/lib/VMCore/Constants.cpp +++ b/lib/VMCore/Constants.cpp @@ -228,8 +228,7 @@ Constant::PossibleRelocationsTy Constant::getRelocationInfo() const { /// type, returns the elements of the vector in the specified smallvector. /// This handles breaking down a vector undef into undef elements, etc. For /// constant exprs and other cases we can't handle, we return an empty vector. -void Constant::getVectorElements(LLVMContext &Context, - SmallVectorImpl<Constant*> &Elts) const { +void Constant::getVectorElements(SmallVectorImpl<Constant*> &Elts) const { assert(isa<VectorType>(getType()) && "Not a vector constant!"); if (const ConstantVector *CV = dyn_cast<ConstantVector>(this)) { @@ -405,13 +404,13 @@ ConstantFP* ConstantFP::getNegativeZero(const Type* Ty) { Constant* ConstantFP::getZeroValueForNegation(const Type* Ty) { if (const VectorType *PTy = dyn_cast<VectorType>(Ty)) - if (PTy->getElementType()->isFloatingPoint()) { + if (PTy->getElementType()->isFloatingPointTy()) { std::vector<Constant*> zeros(PTy->getNumElements(), getNegativeZero(PTy->getElementType())); return ConstantVector::get(PTy, zeros); } - if (Ty->isFloatingPoint()) + if (Ty->isFloatingPointTy()) return getNegativeZero(Ty); return Constant::getNullValue(Ty); @@ -586,6 +585,27 @@ Constant* ConstantStruct::get(LLVMContext &Context, return get(Context, std::vector<Constant*>(Vals, Vals+NumVals), Packed); } +ConstantUnion::ConstantUnion(const UnionType *T, Constant* V) + : Constant(T, ConstantUnionVal, + OperandTraits<ConstantUnion>::op_end(this) - 1, 1) { + Use *OL = OperandList; + assert(T->getElementTypeIndex(V->getType()) >= 0 && + "Initializer for union element isn't a member of union type!"); + *OL = V; +} + +// ConstantUnion accessors. +Constant* ConstantUnion::get(const UnionType* T, Constant* V) { + LLVMContextImpl* pImpl = T->getContext().pImpl; + + // Create a ConstantAggregateZero value if all elements are zeros... + if (!V->isNullValue()) + return pImpl->UnionConstants.getOrCreate(T, V); + + return ConstantAggregateZero::get(T); +} + + ConstantVector::ConstantVector(const VectorType *T, const std::vector<Constant*> &V) : Constant(T, ConstantVectorVal, @@ -641,26 +661,47 @@ Constant* ConstantVector::get(Constant* const* Vals, unsigned NumVals) { } Constant* ConstantExpr::getNSWNeg(Constant* C) { - assert(C->getType()->isIntOrIntVector() && + assert(C->getType()->isIntOrIntVectorTy() && "Cannot NEG a nonintegral value!"); return getNSWSub(ConstantFP::getZeroValueForNegation(C->getType()), C); } +Constant* ConstantExpr::getNUWNeg(Constant* C) { + assert(C->getType()->isIntOrIntVectorTy() && + "Cannot NEG a nonintegral value!"); + return getNUWSub(ConstantFP::getZeroValueForNegation(C->getType()), C); +} + Constant* ConstantExpr::getNSWAdd(Constant* C1, Constant* C2) { return getTy(C1->getType(), Instruction::Add, C1, C2, OverflowingBinaryOperator::NoSignedWrap); } +Constant* ConstantExpr::getNUWAdd(Constant* C1, Constant* C2) { + return getTy(C1->getType(), Instruction::Add, C1, C2, + OverflowingBinaryOperator::NoUnsignedWrap); +} + Constant* ConstantExpr::getNSWSub(Constant* C1, Constant* C2) { return getTy(C1->getType(), Instruction::Sub, C1, C2, OverflowingBinaryOperator::NoSignedWrap); } +Constant* ConstantExpr::getNUWSub(Constant* C1, Constant* C2) { + return getTy(C1->getType(), Instruction::Sub, C1, C2, + OverflowingBinaryOperator::NoUnsignedWrap); +} + Constant* ConstantExpr::getNSWMul(Constant* C1, Constant* C2) { return getTy(C1->getType(), Instruction::Mul, C1, C2, OverflowingBinaryOperator::NoSignedWrap); } +Constant* ConstantExpr::getNUWMul(Constant* C1, Constant* C2) { + return getTy(C1->getType(), Instruction::Mul, C1, C2, + OverflowingBinaryOperator::NoUnsignedWrap); +} + Constant* ConstantExpr::getExactSDiv(Constant* C1, Constant* C2) { return getTy(C1->getType(), Instruction::SDiv, C1, C2, SDivOperator::IsExact); @@ -928,7 +969,7 @@ void ConstantArray::destroyConstant() { /// if the elements of the array are all ConstantInt's. bool ConstantArray::isString() const { // Check the element type for i8... - if (!getType()->getElementType()->isInteger(8)) + if (!getType()->getElementType()->isIntegerTy(8)) return false; // Check the elements to make sure they are all integers, not constant // expressions. @@ -943,7 +984,7 @@ bool ConstantArray::isString() const { /// null bytes except its terminator. bool ConstantArray::isCString() const { // Check the element type for i8... - if (!getType()->getElementType()->isInteger(8)) + if (!getType()->getElementType()->isIntegerTy(8)) return false; // Last element must be a null. @@ -990,6 +1031,13 @@ void ConstantStruct::destroyConstant() { // destroyConstant - Remove the constant from the constant table... // +void ConstantUnion::destroyConstant() { + getType()->getContext().pImpl->UnionConstants.remove(this); + destroyConstantImpl(); +} + +// destroyConstant - Remove the constant from the constant table... +// void ConstantVector::destroyConstant() { getType()->getContext().pImpl->VectorConstants.remove(this); destroyConstantImpl(); @@ -1134,7 +1182,7 @@ static inline Constant *getFoldedCast( Instruction::CastOps opc, Constant *C, const Type *Ty) { assert(Ty->isFirstClassType() && "Cannot cast to an aggregate type!"); // Fold a few common cases - if (Constant *FC = ConstantFoldCastInstruction(Ty->getContext(), opc, C, Ty)) + if (Constant *FC = ConstantFoldCastInstruction(opc, C, Ty)) return FC; LLVMContextImpl *pImpl = Ty->getContext().pImpl; @@ -1150,24 +1198,24 @@ Constant *ConstantExpr::getCast(unsigned oc, Constant *C, const Type *Ty) { Instruction::CastOps opc = Instruction::CastOps(oc); assert(Instruction::isCast(opc) && "opcode out of range"); assert(C && Ty && "Null arguments to getCast"); - assert(Ty->isFirstClassType() && "Cannot cast to an aggregate type!"); + assert(CastInst::castIsValid(opc, C, Ty) && "Invalid constantexpr cast!"); switch (opc) { - default: - llvm_unreachable("Invalid cast opcode"); - break; - case Instruction::Trunc: return getTrunc(C, Ty); - case Instruction::ZExt: return getZExt(C, Ty); - case Instruction::SExt: return getSExt(C, Ty); - case Instruction::FPTrunc: return getFPTrunc(C, Ty); - case Instruction::FPExt: return getFPExtend(C, Ty); - case Instruction::UIToFP: return getUIToFP(C, Ty); - case Instruction::SIToFP: return getSIToFP(C, Ty); - case Instruction::FPToUI: return getFPToUI(C, Ty); - case Instruction::FPToSI: return getFPToSI(C, Ty); - case Instruction::PtrToInt: return getPtrToInt(C, Ty); - case Instruction::IntToPtr: return getIntToPtr(C, Ty); - case Instruction::BitCast: return getBitCast(C, Ty); + default: + llvm_unreachable("Invalid cast opcode"); + break; + case Instruction::Trunc: return getTrunc(C, Ty); + case Instruction::ZExt: return getZExt(C, Ty); + case Instruction::SExt: return getSExt(C, Ty); + case Instruction::FPTrunc: return getFPTrunc(C, Ty); + case Instruction::FPExt: return getFPExtend(C, Ty); + case Instruction::UIToFP: return getUIToFP(C, Ty); + case Instruction::SIToFP: return getSIToFP(C, Ty); + case Instruction::FPToUI: return getFPToUI(C, Ty); + case Instruction::FPToSI: return getFPToSI(C, Ty); + case Instruction::PtrToInt: return getPtrToInt(C, Ty); + case Instruction::IntToPtr: return getIntToPtr(C, Ty); + case Instruction::BitCast: return getBitCast(C, Ty); } return 0; } @@ -1192,17 +1240,17 @@ Constant *ConstantExpr::getTruncOrBitCast(Constant *C, const Type *Ty) { Constant *ConstantExpr::getPointerCast(Constant *S, const Type *Ty) { assert(isa<PointerType>(S->getType()) && "Invalid cast"); - assert((Ty->isInteger() || isa<PointerType>(Ty)) && "Invalid cast"); + assert((Ty->isIntegerTy() || isa<PointerType>(Ty)) && "Invalid cast"); - if (Ty->isInteger()) + if (Ty->isIntegerTy()) return getCast(Instruction::PtrToInt, S, Ty); return getCast(Instruction::BitCast, S, Ty); } Constant *ConstantExpr::getIntegerCast(Constant *C, const Type *Ty, bool isSigned) { - assert(C->getType()->isIntOrIntVector() && - Ty->isIntOrIntVector() && "Invalid cast"); + assert(C->getType()->isIntOrIntVectorTy() && + Ty->isIntOrIntVectorTy() && "Invalid cast"); unsigned SrcBits = C->getType()->getScalarSizeInBits(); unsigned DstBits = Ty->getScalarSizeInBits(); Instruction::CastOps opcode = @@ -1213,7 +1261,7 @@ Constant *ConstantExpr::getIntegerCast(Constant *C, const Type *Ty, } Constant *ConstantExpr::getFPCast(Constant *C, const Type *Ty) { - assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() && + assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() && "Invalid cast"); unsigned SrcBits = C->getType()->getScalarSizeInBits(); unsigned DstBits = Ty->getScalarSizeInBits(); @@ -1230,8 +1278,8 @@ Constant *ConstantExpr::getTrunc(Constant *C, const Type *Ty) { bool toVec = Ty->getTypeID() == Type::VectorTyID; #endif assert((fromVec == toVec) && "Cannot convert from scalar to/from vector"); - assert(C->getType()->isIntOrIntVector() && "Trunc operand must be integer"); - assert(Ty->isIntOrIntVector() && "Trunc produces only integral"); + assert(C->getType()->isIntOrIntVectorTy() && "Trunc operand must be integer"); + assert(Ty->isIntOrIntVectorTy() && "Trunc produces only integral"); assert(C->getType()->getScalarSizeInBits() > Ty->getScalarSizeInBits()&& "SrcTy must be larger than DestTy for Trunc!"); @@ -1244,8 +1292,8 @@ Constant *ConstantExpr::getSExt(Constant *C, const Type *Ty) { bool toVec = Ty->getTypeID() == Type::VectorTyID; #endif assert((fromVec == toVec) && "Cannot convert from scalar to/from vector"); - assert(C->getType()->isIntOrIntVector() && "SExt operand must be integral"); - assert(Ty->isIntOrIntVector() && "SExt produces only integer"); + assert(C->getType()->isIntOrIntVectorTy() && "SExt operand must be integral"); + assert(Ty->isIntOrIntVectorTy() && "SExt produces only integer"); assert(C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&& "SrcTy must be smaller than DestTy for SExt!"); @@ -1258,8 +1306,8 @@ Constant *ConstantExpr::getZExt(Constant *C, const Type *Ty) { bool toVec = Ty->getTypeID() == Type::VectorTyID; #endif assert((fromVec == toVec) && "Cannot convert from scalar to/from vector"); - assert(C->getType()->isIntOrIntVector() && "ZEXt operand must be integral"); - assert(Ty->isIntOrIntVector() && "ZExt produces only integer"); + assert(C->getType()->isIntOrIntVectorTy() && "ZEXt operand must be integral"); + assert(Ty->isIntOrIntVectorTy() && "ZExt produces only integer"); assert(C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&& "SrcTy must be smaller than DestTy for ZExt!"); @@ -1272,7 +1320,7 @@ Constant *ConstantExpr::getFPTrunc(Constant *C, const Type *Ty) { bool toVec = Ty->getTypeID() == Type::VectorTyID; #endif assert((fromVec == toVec) && "Cannot convert from scalar to/from vector"); - assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() && + assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() && C->getType()->getScalarSizeInBits() > Ty->getScalarSizeInBits()&& "This is an illegal floating point truncation!"); return getFoldedCast(Instruction::FPTrunc, C, Ty); @@ -1284,7 +1332,7 @@ Constant *ConstantExpr::getFPExtend(Constant *C, const Type *Ty) { bool toVec = Ty->getTypeID() == Type::VectorTyID; #endif assert((fromVec == toVec) && "Cannot convert from scalar to/from vector"); - assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() && + assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() && C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&& "This is an illegal floating point extension!"); return getFoldedCast(Instruction::FPExt, C, Ty); @@ -1296,7 +1344,7 @@ Constant *ConstantExpr::getUIToFP(Constant *C, const Type *Ty) { bool toVec = Ty->getTypeID() == Type::VectorTyID; #endif assert((fromVec == toVec) && "Cannot convert from scalar to/from vector"); - assert(C->getType()->isIntOrIntVector() && Ty->isFPOrFPVector() && + assert(C->getType()->isIntOrIntVectorTy() && Ty->isFPOrFPVectorTy() && "This is an illegal uint to floating point cast!"); return getFoldedCast(Instruction::UIToFP, C, Ty); } @@ -1307,7 +1355,7 @@ Constant *ConstantExpr::getSIToFP(Constant *C, const Type *Ty) { bool toVec = Ty->getTypeID() == Type::VectorTyID; #endif assert((fromVec == toVec) && "Cannot convert from scalar to/from vector"); - assert(C->getType()->isIntOrIntVector() && Ty->isFPOrFPVector() && + assert(C->getType()->isIntOrIntVectorTy() && Ty->isFPOrFPVectorTy() && "This is an illegal sint to floating point cast!"); return getFoldedCast(Instruction::SIToFP, C, Ty); } @@ -1318,7 +1366,7 @@ Constant *ConstantExpr::getFPToUI(Constant *C, const Type *Ty) { bool toVec = Ty->getTypeID() == Type::VectorTyID; #endif assert((fromVec == toVec) && "Cannot convert from scalar to/from vector"); - assert(C->getType()->isFPOrFPVector() && Ty->isIntOrIntVector() && + assert(C->getType()->isFPOrFPVectorTy() && Ty->isIntOrIntVectorTy() && "This is an illegal floating point to uint cast!"); return getFoldedCast(Instruction::FPToUI, C, Ty); } @@ -1329,38 +1377,26 @@ Constant *ConstantExpr::getFPToSI(Constant *C, const Type *Ty) { bool toVec = Ty->getTypeID() == Type::VectorTyID; #endif assert((fromVec == toVec) && "Cannot convert from scalar to/from vector"); - assert(C->getType()->isFPOrFPVector() && Ty->isIntOrIntVector() && + assert(C->getType()->isFPOrFPVectorTy() && Ty->isIntOrIntVectorTy() && "This is an illegal floating point to sint cast!"); return getFoldedCast(Instruction::FPToSI, C, Ty); } Constant *ConstantExpr::getPtrToInt(Constant *C, const Type *DstTy) { assert(isa<PointerType>(C->getType()) && "PtrToInt source must be pointer"); - assert(DstTy->isInteger() && "PtrToInt destination must be integral"); + assert(DstTy->isIntegerTy() && "PtrToInt destination must be integral"); return getFoldedCast(Instruction::PtrToInt, C, DstTy); } Constant *ConstantExpr::getIntToPtr(Constant *C, const Type *DstTy) { - assert(C->getType()->isInteger() && "IntToPtr source must be integral"); + assert(C->getType()->isIntegerTy() && "IntToPtr source must be integral"); assert(isa<PointerType>(DstTy) && "IntToPtr destination must be a pointer"); return getFoldedCast(Instruction::IntToPtr, C, DstTy); } Constant *ConstantExpr::getBitCast(Constant *C, const Type *DstTy) { - // BitCast implies a no-op cast of type only. No bits change. However, you - // can't cast pointers to anything but pointers. -#ifndef NDEBUG - const Type *SrcTy = C->getType(); - assert((isa<PointerType>(SrcTy) == isa<PointerType>(DstTy)) && - "BitCast cannot cast pointer to non-pointer and vice versa"); - - // Now we know we're not dealing with mismatched pointer casts (ptr->nonptr - // or nonptr->ptr). For all the other types, the cast is okay if source and - // destination bit widths are identical. - unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits(); - unsigned DstBitSize = DstTy->getPrimitiveSizeInBits(); -#endif - assert(SrcBitSize == DstBitSize && "BitCast requires types of same width"); + assert(CastInst::castIsValid(Instruction::BitCast, C, DstTy) && + "Invalid constantexpr bitcast!"); // It is common to ask for a bitcast of a value to its own type, handle this // speedily. @@ -1380,8 +1416,7 @@ Constant *ConstantExpr::getTy(const Type *ReqTy, unsigned Opcode, "Operand types in binary constant expression should match"); if (ReqTy == C1->getType() || ReqTy == Type::getInt1Ty(ReqTy->getContext())) - if (Constant *FC = ConstantFoldBinaryInstruction(ReqTy->getContext(), - Opcode, C1, C2)) + if (Constant *FC = ConstantFoldBinaryInstruction(Opcode, C1, C2)) return FC; // Fold a few common cases... std::vector<Constant*> argVec(1, C1); argVec.push_back(C2); @@ -1414,7 +1449,7 @@ Constant *ConstantExpr::getCompareTy(unsigned short predicate, Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2, unsigned Flags) { // API compatibility: Adjust integer opcodes to floating-point opcodes. - if (C1->getType()->isFPOrFPVector()) { + if (C1->getType()->isFPOrFPVectorTy()) { if (Opcode == Instruction::Add) Opcode = Instruction::FAdd; else if (Opcode == Instruction::Sub) Opcode = Instruction::FSub; else if (Opcode == Instruction::Mul) Opcode = Instruction::FMul; @@ -1425,51 +1460,51 @@ Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2, case Instruction::Sub: case Instruction::Mul: assert(C1->getType() == C2->getType() && "Op types should be identical!"); - assert(C1->getType()->isIntOrIntVector() && + assert(C1->getType()->isIntOrIntVectorTy() && "Tried to create an integer operation on a non-integer type!"); break; case Instruction::FAdd: case Instruction::FSub: case Instruction::FMul: assert(C1->getType() == C2->getType() && "Op types should be identical!"); - assert(C1->getType()->isFPOrFPVector() && + assert(C1->getType()->isFPOrFPVectorTy() && "Tried to create a floating-point operation on a " "non-floating-point type!"); break; case Instruction::UDiv: case Instruction::SDiv: assert(C1->getType() == C2->getType() && "Op types should be identical!"); - assert(C1->getType()->isIntOrIntVector() && + assert(C1->getType()->isIntOrIntVectorTy() && "Tried to create an arithmetic operation on a non-arithmetic type!"); break; case Instruction::FDiv: assert(C1->getType() == C2->getType() && "Op types should be identical!"); - assert(C1->getType()->isFPOrFPVector() && + assert(C1->getType()->isFPOrFPVectorTy() && "Tried to create an arithmetic operation on a non-arithmetic type!"); break; case Instruction::URem: case Instruction::SRem: assert(C1->getType() == C2->getType() && "Op types should be identical!"); - assert(C1->getType()->isIntOrIntVector() && + assert(C1->getType()->isIntOrIntVectorTy() && "Tried to create an arithmetic operation on a non-arithmetic type!"); break; case Instruction::FRem: assert(C1->getType() == C2->getType() && "Op types should be identical!"); - assert(C1->getType()->isFPOrFPVector() && + assert(C1->getType()->isFPOrFPVectorTy() && "Tried to create an arithmetic operation on a non-arithmetic type!"); break; case Instruction::And: case Instruction::Or: case Instruction::Xor: assert(C1->getType() == C2->getType() && "Op types should be identical!"); - assert(C1->getType()->isIntOrIntVector() && + assert(C1->getType()->isIntOrIntVectorTy() && "Tried to create a logical operation on a non-integral type!"); break; case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: assert(C1->getType() == C2->getType() && "Op types should be identical!"); - assert(C1->getType()->isIntOrIntVector() && + assert(C1->getType()->isIntOrIntVectorTy() && "Tried to create a shift operation on a non-integer type!"); break; default: @@ -1491,30 +1526,35 @@ Constant* ConstantExpr::getSizeOf(const Type* Ty) { } Constant* ConstantExpr::getAlignOf(const Type* Ty) { - // alignof is implemented as: (i64) gep ({i8,Ty}*)null, 0, 1 + // alignof is implemented as: (i64) gep ({i1,Ty}*)null, 0, 1 // Note that a non-inbounds gep is used, as null isn't within any object. const Type *AligningTy = StructType::get(Ty->getContext(), - Type::getInt8Ty(Ty->getContext()), Ty, NULL); + Type::getInt1Ty(Ty->getContext()), Ty, NULL); Constant *NullPtr = Constant::getNullValue(AligningTy->getPointerTo()); - Constant *Zero = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 0); + Constant *Zero = ConstantInt::get(Type::getInt64Ty(Ty->getContext()), 0); Constant *One = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1); Constant *Indices[2] = { Zero, One }; Constant *GEP = getGetElementPtr(NullPtr, Indices, 2); return getCast(Instruction::PtrToInt, GEP, - Type::getInt32Ty(Ty->getContext())); + Type::getInt64Ty(Ty->getContext())); } Constant* ConstantExpr::getOffsetOf(const StructType* STy, unsigned FieldNo) { + return getOffsetOf(STy, ConstantInt::get(Type::getInt32Ty(STy->getContext()), + FieldNo)); +} + +Constant* ConstantExpr::getOffsetOf(const Type* Ty, Constant *FieldNo) { // offsetof is implemented as: (i64) gep (Ty*)null, 0, FieldNo // Note that a non-inbounds gep is used, as null isn't within any object. Constant *GEPIdx[] = { - ConstantInt::get(Type::getInt64Ty(STy->getContext()), 0), - ConstantInt::get(Type::getInt32Ty(STy->getContext()), FieldNo) + ConstantInt::get(Type::getInt64Ty(Ty->getContext()), 0), + FieldNo }; Constant *GEP = getGetElementPtr( - Constant::getNullValue(PointerType::getUnqual(STy)), GEPIdx, 2); + Constant::getNullValue(PointerType::getUnqual(Ty)), GEPIdx, 2); return getCast(Instruction::PtrToInt, GEP, - Type::getInt64Ty(STy->getContext())); + Type::getInt64Ty(Ty->getContext())); } Constant *ConstantExpr::getCompare(unsigned short pred, @@ -1528,8 +1568,7 @@ Constant *ConstantExpr::getSelectTy(const Type *ReqTy, Constant *C, assert(!SelectInst::areInvalidOperands(C, V1, V2)&&"Invalid select operands"); if (ReqTy == V1->getType()) - if (Constant *SC = ConstantFoldSelectInstruction( - ReqTy->getContext(), C, V1, V2)) + if (Constant *SC = ConstantFoldSelectInstruction(C, V1, V2)) return SC; // Fold common cases std::vector<Constant*> argVec(3, C); @@ -1549,9 +1588,8 @@ Constant *ConstantExpr::getGetElementPtrTy(const Type *ReqTy, Constant *C, cast<PointerType>(ReqTy)->getElementType() && "GEP indices invalid!"); - if (Constant *FC = ConstantFoldGetElementPtr( - ReqTy->getContext(), C, /*inBounds=*/false, - (Constant**)Idxs, NumIdx)) + if (Constant *FC = ConstantFoldGetElementPtr(C, /*inBounds=*/false, + (Constant**)Idxs, NumIdx)) return FC; // Fold a few common cases... assert(isa<PointerType>(C->getType()) && @@ -1577,9 +1615,8 @@ Constant *ConstantExpr::getInBoundsGetElementPtrTy(const Type *ReqTy, cast<PointerType>(ReqTy)->getElementType() && "GEP indices invalid!"); - if (Constant *FC = ConstantFoldGetElementPtr( - ReqTy->getContext(), C, /*inBounds=*/true, - (Constant**)Idxs, NumIdx)) + if (Constant *FC = ConstantFoldGetElementPtr(C, /*inBounds=*/true, + (Constant**)Idxs, NumIdx)) return FC; // Fold a few common cases... assert(isa<PointerType>(C->getType()) && @@ -1635,8 +1672,7 @@ ConstantExpr::getICmp(unsigned short pred, Constant *LHS, Constant *RHS) { assert(pred >= ICmpInst::FIRST_ICMP_PREDICATE && pred <= ICmpInst::LAST_ICMP_PREDICATE && "Invalid ICmp Predicate"); - if (Constant *FC = ConstantFoldCompareInstruction( - LHS->getContext(), pred, LHS, RHS)) + if (Constant *FC = ConstantFoldCompareInstruction(pred, LHS, RHS)) return FC; // Fold a few common cases... // Look up the constant in the table first to ensure uniqueness @@ -1659,8 +1695,7 @@ ConstantExpr::getFCmp(unsigned short pred, Constant *LHS, Constant *RHS) { assert(LHS->getType() == RHS->getType()); assert(pred <= FCmpInst::LAST_FCMP_PREDICATE && "Invalid FCmp Predicate"); - if (Constant *FC = ConstantFoldCompareInstruction( - LHS->getContext(), pred, LHS, RHS)) + if (Constant *FC = ConstantFoldCompareInstruction(pred, LHS, RHS)) return FC; // Fold a few common cases... // Look up the constant in the table first to ensure uniqueness @@ -1680,8 +1715,7 @@ ConstantExpr::getFCmp(unsigned short pred, Constant *LHS, Constant *RHS) { Constant *ConstantExpr::getExtractElementTy(const Type *ReqTy, Constant *Val, Constant *Idx) { - if (Constant *FC = ConstantFoldExtractElementInstruction( - ReqTy->getContext(), Val, Idx)) + if (Constant *FC = ConstantFoldExtractElementInstruction(Val, Idx)) return FC; // Fold a few common cases. // Look up the constant in the table first to ensure uniqueness std::vector<Constant*> ArgVec(1, Val); @@ -1695,7 +1729,7 @@ Constant *ConstantExpr::getExtractElementTy(const Type *ReqTy, Constant *Val, Constant *ConstantExpr::getExtractElement(Constant *Val, Constant *Idx) { assert(isa<VectorType>(Val->getType()) && "Tried to create extractelement operation on non-vector type!"); - assert(Idx->getType()->isInteger(32) && + assert(Idx->getType()->isIntegerTy(32) && "Extractelement index must be i32 type!"); return getExtractElementTy(cast<VectorType>(Val->getType())->getElementType(), Val, Idx); @@ -1703,8 +1737,7 @@ Constant *ConstantExpr::getExtractElement(Constant *Val, Constant *Idx) { Constant *ConstantExpr::getInsertElementTy(const Type *ReqTy, Constant *Val, Constant *Elt, Constant *Idx) { - if (Constant *FC = ConstantFoldInsertElementInstruction( - ReqTy->getContext(), Val, Elt, Idx)) + if (Constant *FC = ConstantFoldInsertElementInstruction(Val, Elt, Idx)) return FC; // Fold a few common cases. // Look up the constant in the table first to ensure uniqueness std::vector<Constant*> ArgVec(1, Val); @@ -1722,15 +1755,14 @@ Constant *ConstantExpr::getInsertElement(Constant *Val, Constant *Elt, "Tried to create insertelement operation on non-vector type!"); assert(Elt->getType() == cast<VectorType>(Val->getType())->getElementType() && "Insertelement types must match!"); - assert(Idx->getType()->isInteger(32) && + assert(Idx->getType()->isIntegerTy(32) && "Insertelement index must be i32 type!"); return getInsertElementTy(Val->getType(), Val, Elt, Idx); } Constant *ConstantExpr::getShuffleVectorTy(const Type *ReqTy, Constant *V1, Constant *V2, Constant *Mask) { - if (Constant *FC = ConstantFoldShuffleVectorInstruction( - ReqTy->getContext(), V1, V2, Mask)) + if (Constant *FC = ConstantFoldShuffleVectorInstruction(V1, V2, Mask)) return FC; // Fold a few common cases... // Look up the constant in the table first to ensure uniqueness std::vector<Constant*> ArgVec(1, V1); @@ -1763,8 +1795,7 @@ Constant *ConstantExpr::getInsertValueTy(const Type *ReqTy, Constant *Agg, "insertvalue type invalid!"); assert(Agg->getType()->isFirstClassType() && "Non-first-class type for constant InsertValue expression"); - Constant *FC = ConstantFoldInsertValueInstruction( - ReqTy->getContext(), Agg, Val, Idxs, NumIdx); + Constant *FC = ConstantFoldInsertValueInstruction(Agg, Val, Idxs, NumIdx); assert(FC && "InsertValue constant expr couldn't be folded!"); return FC; } @@ -1790,8 +1821,7 @@ Constant *ConstantExpr::getExtractValueTy(const Type *ReqTy, Constant *Agg, "extractvalue indices invalid!"); assert(Agg->getType()->isFirstClassType() && "Non-first-class type for constant extractvalue expression"); - Constant *FC = ConstantFoldExtractValueInstruction( - ReqTy->getContext(), Agg, Idxs, NumIdx); + Constant *FC = ConstantFoldExtractValueInstruction(Agg, Idxs, NumIdx); assert(FC && "ExtractValue constant expr couldn't be folded!"); return FC; } @@ -1809,9 +1839,9 @@ Constant *ConstantExpr::getExtractValue(Constant *Agg, Constant* ConstantExpr::getNeg(Constant* C) { // API compatibility: Adjust integer opcodes to floating-point opcodes. - if (C->getType()->isFPOrFPVector()) + if (C->getType()->isFPOrFPVectorTy()) return getFNeg(C); - assert(C->getType()->isIntOrIntVector() && + assert(C->getType()->isIntOrIntVectorTy() && "Cannot NEG a nonintegral value!"); return get(Instruction::Sub, ConstantFP::getZeroValueForNegation(C->getType()), @@ -1819,7 +1849,7 @@ Constant* ConstantExpr::getNeg(Constant* C) { } Constant* ConstantExpr::getFNeg(Constant* C) { - assert(C->getType()->isFPOrFPVector() && + assert(C->getType()->isFPOrFPVectorTy() && "Cannot FNEG a non-floating-point value!"); return get(Instruction::FSub, ConstantFP::getZeroValueForNegation(C->getType()), @@ -1827,7 +1857,7 @@ Constant* ConstantExpr::getFNeg(Constant* C) { } Constant* ConstantExpr::getNot(Constant* C) { - assert(C->getType()->isIntOrIntVector() && + assert(C->getType()->isIntOrIntVectorTy() && "Cannot NOT a nonintegral value!"); return get(Instruction::Xor, C, Constant::getAllOnesValue(C->getType())); } @@ -2081,6 +2111,11 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To, destroyConstant(); } +void ConstantUnion::replaceUsesOfWithOnConstant(Value *From, Value *To, + Use *U) { + assert(false && "Implement replaceUsesOfWithOnConstant for unions"); +} + void ConstantVector::replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) { assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!"); diff --git a/lib/VMCore/ConstantsContext.h b/lib/VMCore/ConstantsContext.h index 08224e4..c798ba2 100644 --- a/lib/VMCore/ConstantsContext.h +++ b/lib/VMCore/ConstantsContext.h @@ -341,6 +341,13 @@ struct ConstantTraits< std::vector<T, Alloc> > { } }; +template<> +struct ConstantTraits<Constant *> { + static unsigned uses(Constant * const & v) { + return 1; + } +}; + template<class ConstantClass, class TypeClass, class ValType> struct ConstantCreator { static ConstantClass *create(const TypeClass *Ty, const ValType &V) { @@ -470,6 +477,14 @@ struct ConstantKeyData<ConstantStruct> { } }; +template<> +struct ConstantKeyData<ConstantUnion> { + typedef Constant* ValType; + static ValType getValType(ConstantUnion *CU) { + return cast<Constant>(CU->getOperand(0)); + } +}; + // ConstantPointerNull does not take extra "value" argument... template<class ValType> struct ConstantCreator<ConstantPointerNull, PointerType, ValType> { diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp index 984d245..a044fc5 100644 --- a/lib/VMCore/Core.cpp +++ b/lib/VMCore/Core.cpp @@ -20,12 +20,13 @@ #include "llvm/GlobalAlias.h" #include "llvm/LLVMContext.h" #include "llvm/TypeSymbolTable.h" -#include "llvm/ModuleProvider.h" #include "llvm/InlineAsm.h" #include "llvm/IntrinsicInst.h" -#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/CallSite.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" #include <cassert> #include <cstdlib> #include <cstring> @@ -140,6 +141,8 @@ LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty) { return LLVMFunctionTypeKind; case Type::StructTyID: return LLVMStructTypeKind; + case Type::UnionTyID: + return LLVMUnionTypeKind; case Type::ArrayTyID: return LLVMArrayTypeKind; case Type::PointerTyID: @@ -298,6 +301,35 @@ LLVMBool LLVMIsPackedStruct(LLVMTypeRef StructTy) { return unwrap<StructType>(StructTy)->isPacked(); } +/*--.. Operations on union types ..........................................--*/ + +LLVMTypeRef LLVMUnionTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes, + unsigned ElementCount) { + SmallVector<const Type*, 8> Tys; + for (LLVMTypeRef *I = ElementTypes, + *E = ElementTypes + ElementCount; I != E; ++I) + Tys.push_back(unwrap(*I)); + + return wrap(UnionType::get(&Tys[0], Tys.size())); +} + +LLVMTypeRef LLVMUnionType(LLVMTypeRef *ElementTypes, + unsigned ElementCount, int Packed) { + return LLVMUnionTypeInContext(LLVMGetGlobalContext(), ElementTypes, + ElementCount); +} + +unsigned LLVMCountUnionElementTypes(LLVMTypeRef UnionTy) { + return unwrap<UnionType>(UnionTy)->getNumElements(); +} + +void LLVMGetUnionElementTypes(LLVMTypeRef UnionTy, LLVMTypeRef *Dest) { + UnionType *Ty = unwrap<UnionType>(UnionTy); + for (FunctionType::param_iterator I = Ty->element_begin(), + E = Ty->element_end(); I != E; ++I) + *Dest++ = wrap(*I); +} + /*--.. Operations on array, pointer, and vector types (sequence types) .....--*/ LLVMTypeRef LLVMArrayType(LLVMTypeRef ElementType, unsigned ElementCount) { @@ -932,8 +964,6 @@ LLVMLinkage LLVMGetLinkage(LLVMValueRef Global) { return LLVMDLLExportLinkage; case GlobalValue::ExternalWeakLinkage: return LLVMExternalWeakLinkage; - case GlobalValue::GhostLinkage: - return LLVMGhostLinkage; case GlobalValue::CommonLinkage: return LLVMCommonLinkage; } @@ -988,7 +1018,8 @@ void LLVMSetLinkage(LLVMValueRef Global, LLVMLinkage Linkage) { GV->setLinkage(GlobalValue::ExternalWeakLinkage); break; case LLVMGhostLinkage: - GV->setLinkage(GlobalValue::GhostLinkage); + DEBUG(errs() + << "LLVMSetLinkage(): LLVMGhostLinkage is no longer supported."); break; case LLVMCommonLinkage: GV->setLinkage(GlobalValue::CommonLinkage); @@ -1965,7 +1996,7 @@ LLVMValueRef LLVMBuildPtrDiff(LLVMBuilderRef B, LLVMValueRef LHS, LLVMModuleProviderRef LLVMCreateModuleProviderForExistingModule(LLVMModuleRef M) { - return wrap(new ExistingModuleProvider(unwrap(M))); + return reinterpret_cast<LLVMModuleProviderRef>(M); } void LLVMDisposeModuleProvider(LLVMModuleProviderRef MP) { diff --git a/lib/VMCore/GVMaterializer.cpp b/lib/VMCore/GVMaterializer.cpp new file mode 100644 index 0000000..f77a9c9 --- /dev/null +++ b/lib/VMCore/GVMaterializer.cpp @@ -0,0 +1,18 @@ +//===-- GVMaterializer.cpp - Base implementation for GV materializers -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Minimal implementation of the abstract interface for materializing +// GlobalValues. +// +//===----------------------------------------------------------------------===// + +#include "llvm/GVMaterializer.h" +using namespace llvm; + +GVMaterializer::~GVMaterializer() {} diff --git a/lib/VMCore/Globals.cpp b/lib/VMCore/Globals.cpp index 94bf3de..489ec65 100644 --- a/lib/VMCore/Globals.cpp +++ b/lib/VMCore/Globals.cpp @@ -43,6 +43,19 @@ static bool removeDeadUsersOfConstant(const Constant *C) { return true; } +bool GlobalValue::isMaterializable() const { + return getParent() && getParent()->isMaterializable(this); +} +bool GlobalValue::isDematerializable() const { + return getParent() && getParent()->isDematerializable(this); +} +bool GlobalValue::Materialize(std::string *ErrInfo) { + return getParent()->Materialize(this, ErrInfo); +} +void GlobalValue::Dematerialize() { + getParent()->Dematerialize(this); +} + /// removeDeadConstantUsers - If there are any dead constant users dangling /// off of this global value, remove them. This method is useful for clients /// that want to check to see if a global is unused, but don't want to deal diff --git a/lib/VMCore/IRBuilder.cpp b/lib/VMCore/IRBuilder.cpp index 699bf0f..9f2786e 100644 --- a/lib/VMCore/IRBuilder.cpp +++ b/lib/VMCore/IRBuilder.cpp @@ -19,7 +19,7 @@ using namespace llvm; /// CreateGlobalString - Make a new global variable with an initializer that -/// has array of i8 type filled in the the nul terminated string value +/// has array of i8 type filled in with the nul terminated string value /// specified. If Name is specified, it is the name of the global variable /// created. Value *IRBuilderBase::CreateGlobalString(const char *Str, const Twine &Name) { diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp index e2b920e..9d5f7a5 100644 --- a/lib/VMCore/Instructions.cpp +++ b/lib/VMCore/Instructions.cpp @@ -787,7 +787,7 @@ BasicBlock *UnreachableInst::getSuccessorV(unsigned idx) const { void BranchInst::AssertOK() { if (isConditional()) - assert(getCondition()->getType()->isInteger(1) && + assert(getCondition()->getType()->isIntegerTy(1) && "May only branch on boolean predicates!"); } @@ -892,7 +892,7 @@ static Value *getAISize(LLVMContext &Context, Value *Amt) { else { assert(!isa<BasicBlock>(Amt) && "Passed basic block into allocation size parameter! Use other ctor"); - assert(Amt->getType()->isInteger(32) && + assert(Amt->getType()->isIntegerTy(32) && "Allocation array size is not a 32-bit integer!"); } return Amt; @@ -1391,7 +1391,7 @@ ExtractElementInst::ExtractElementInst(Value *Val, Value *Index, bool ExtractElementInst::isValidOperands(const Value *Val, const Value *Index) { - if (!isa<VectorType>(Val->getType()) || !Index->getType()->isInteger(32)) + if (!isa<VectorType>(Val->getType()) || !Index->getType()->isIntegerTy(32)) return false; return true; } @@ -1438,7 +1438,7 @@ bool InsertElementInst::isValidOperands(const Value *Vec, const Value *Elt, if (Elt->getType() != cast<VectorType>(Vec->getType())->getElementType()) return false;// Second operand of insertelement must be vector element type. - if (!Index->getType()->isInteger(32)) + if (!Index->getType()->isIntegerTy(32)) return false; // Third operand of insertelement must be i32. return true; } @@ -1490,7 +1490,7 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2, const VectorType *MaskTy = dyn_cast<VectorType>(Mask->getType()); if (!isa<Constant>(Mask) || MaskTy == 0 || - !MaskTy->getElementType()->isInteger(32)) + !MaskTy->getElementType()->isIntegerTy(32)) return false; return true; } @@ -1632,7 +1632,7 @@ const Type* ExtractValueInst::getIndexedType(const Type *Agg, static BinaryOperator::BinaryOps AdjustIType(BinaryOperator::BinaryOps iType, const Type *Ty) { // API compatibility: Adjust integer opcodes to floating-point opcodes. - if (Ty->isFPOrFPVector()) { + if (Ty->isFPOrFPVectorTy()) { if (iType == BinaryOperator::Add) iType = BinaryOperator::FAdd; else if (iType == BinaryOperator::Sub) iType = BinaryOperator::FSub; else if (iType == BinaryOperator::Mul) iType = BinaryOperator::FMul; @@ -1678,14 +1678,14 @@ void BinaryOperator::init(BinaryOps iType) { case Mul: assert(getType() == LHS->getType() && "Arithmetic operation should return same type as operands!"); - assert(getType()->isIntOrIntVector() && + assert(getType()->isIntOrIntVectorTy() && "Tried to create an integer operation on a non-integer type!"); break; case FAdd: case FSub: case FMul: assert(getType() == LHS->getType() && "Arithmetic operation should return same type as operands!"); - assert(getType()->isFPOrFPVector() && + assert(getType()->isFPOrFPVectorTy() && "Tried to create a floating-point operation on a " "non-floating-point type!"); break; @@ -1693,28 +1693,28 @@ void BinaryOperator::init(BinaryOps iType) { case SDiv: assert(getType() == LHS->getType() && "Arithmetic operation should return same type as operands!"); - assert((getType()->isInteger() || (isa<VectorType>(getType()) && - cast<VectorType>(getType())->getElementType()->isInteger())) && + assert((getType()->isIntegerTy() || (isa<VectorType>(getType()) && + cast<VectorType>(getType())->getElementType()->isIntegerTy())) && "Incorrect operand type (not integer) for S/UDIV"); break; case FDiv: assert(getType() == LHS->getType() && "Arithmetic operation should return same type as operands!"); - assert(getType()->isFPOrFPVector() && + assert(getType()->isFPOrFPVectorTy() && "Incorrect operand type (not floating point) for FDIV"); break; case URem: case SRem: assert(getType() == LHS->getType() && "Arithmetic operation should return same type as operands!"); - assert((getType()->isInteger() || (isa<VectorType>(getType()) && - cast<VectorType>(getType())->getElementType()->isInteger())) && + assert((getType()->isIntegerTy() || (isa<VectorType>(getType()) && + cast<VectorType>(getType())->getElementType()->isIntegerTy())) && "Incorrect operand type (not integer) for S/UREM"); break; case FRem: assert(getType() == LHS->getType() && "Arithmetic operation should return same type as operands!"); - assert(getType()->isFPOrFPVector() && + assert(getType()->isFPOrFPVectorTy() && "Incorrect operand type (not floating point) for FREM"); break; case Shl: @@ -1722,18 +1722,18 @@ void BinaryOperator::init(BinaryOps iType) { case AShr: assert(getType() == LHS->getType() && "Shift operation should return same type as operands!"); - assert((getType()->isInteger() || + assert((getType()->isIntegerTy() || (isa<VectorType>(getType()) && - cast<VectorType>(getType())->getElementType()->isInteger())) && + cast<VectorType>(getType())->getElementType()->isIntegerTy())) && "Tried to create a shift operation on a non-integral type!"); break; case And: case Or: case Xor: assert(getType() == LHS->getType() && "Logical operation should return same type as operands!"); - assert((getType()->isInteger() || + assert((getType()->isIntegerTy() || (isa<VectorType>(getType()) && - cast<VectorType>(getType())->getElementType()->isInteger())) && + cast<VectorType>(getType())->getElementType()->isIntegerTy())) && "Tried to create a logical operation on a non-integral type!"); break; default: @@ -1786,6 +1786,18 @@ BinaryOperator *BinaryOperator::CreateNSWNeg(Value *Op, const Twine &Name, return BinaryOperator::CreateNSWSub(zero, Op, Name, InsertAtEnd); } +BinaryOperator *BinaryOperator::CreateNUWNeg(Value *Op, const Twine &Name, + Instruction *InsertBefore) { + Value *zero = ConstantFP::getZeroValueForNegation(Op->getType()); + return BinaryOperator::CreateNUWSub(zero, Op, Name, InsertBefore); +} + +BinaryOperator *BinaryOperator::CreateNUWNeg(Value *Op, const Twine &Name, + BasicBlock *InsertAtEnd) { + Value *zero = ConstantFP::getZeroValueForNegation(Op->getType()); + return BinaryOperator::CreateNUWSub(zero, Op, Name, InsertAtEnd); +} + BinaryOperator *BinaryOperator::CreateFNeg(Value *Op, const Twine &Name, Instruction *InsertBefore) { Value *zero = ConstantFP::getZeroValueForNegation(Op->getType()); @@ -1948,7 +1960,8 @@ bool CastInst::isIntegerCast() const { case Instruction::Trunc: return true; case Instruction::BitCast: - return getOperand(0)->getType()->isInteger() && getType()->isInteger(); + return getOperand(0)->getType()->isIntegerTy() && + getType()->isIntegerTy(); } } @@ -2081,25 +2094,25 @@ unsigned CastInst::isEliminableCastPair( // no-op cast in second op implies firstOp as long as the DestTy // is integer and we are not converting between a vector and a // non vector type. - if (!isa<VectorType>(SrcTy) && DstTy->isInteger()) + if (!isa<VectorType>(SrcTy) && DstTy->isIntegerTy()) return firstOp; return 0; case 4: // no-op cast in second op implies firstOp as long as the DestTy // is floating point. - if (DstTy->isFloatingPoint()) + if (DstTy->isFloatingPointTy()) return firstOp; return 0; case 5: // no-op cast in first op implies secondOp as long as the SrcTy // is an integer. - if (SrcTy->isInteger()) + if (SrcTy->isIntegerTy()) return secondOp; return 0; case 6: // no-op cast in first op implies secondOp as long as the SrcTy // is a floating point. - if (SrcTy->isFloatingPoint()) + if (SrcTy->isFloatingPointTy()) return secondOp; return 0; case 7: { @@ -2262,10 +2275,10 @@ CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd) { assert(isa<PointerType>(S->getType()) && "Invalid cast"); - assert((Ty->isInteger() || isa<PointerType>(Ty)) && + assert((Ty->isIntegerTy() || isa<PointerType>(Ty)) && "Invalid cast"); - if (Ty->isInteger()) + if (Ty->isIntegerTy()) return Create(Instruction::PtrToInt, S, Ty, Name, InsertAtEnd); return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd); } @@ -2275,10 +2288,10 @@ CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore) { assert(isa<PointerType>(S->getType()) && "Invalid cast"); - assert((Ty->isInteger() || isa<PointerType>(Ty)) && + assert((Ty->isIntegerTy() || isa<PointerType>(Ty)) && "Invalid cast"); - if (Ty->isInteger()) + if (Ty->isIntegerTy()) return Create(Instruction::PtrToInt, S, Ty, Name, InsertBefore); return Create(Instruction::BitCast, S, Ty, Name, InsertBefore); } @@ -2286,7 +2299,7 @@ CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty, CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty, bool isSigned, const Twine &Name, Instruction *InsertBefore) { - assert(C->getType()->isIntOrIntVector() && Ty->isIntOrIntVector() && + assert(C->getType()->isIntOrIntVectorTy() && Ty->isIntOrIntVectorTy() && "Invalid integer cast"); unsigned SrcBits = C->getType()->getScalarSizeInBits(); unsigned DstBits = Ty->getScalarSizeInBits(); @@ -2300,7 +2313,7 @@ CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty, CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty, bool isSigned, const Twine &Name, BasicBlock *InsertAtEnd) { - assert(C->getType()->isIntOrIntVector() && Ty->isIntOrIntVector() && + assert(C->getType()->isIntOrIntVectorTy() && Ty->isIntOrIntVectorTy() && "Invalid cast"); unsigned SrcBits = C->getType()->getScalarSizeInBits(); unsigned DstBits = Ty->getScalarSizeInBits(); @@ -2314,7 +2327,7 @@ CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty, CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty, const Twine &Name, Instruction *InsertBefore) { - assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() && + assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() && "Invalid cast"); unsigned SrcBits = C->getType()->getScalarSizeInBits(); unsigned DstBits = Ty->getScalarSizeInBits(); @@ -2327,7 +2340,7 @@ CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty, CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd) { - assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() && + assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() && "Invalid cast"); unsigned SrcBits = C->getType()->getScalarSizeInBits(); unsigned DstBits = Ty->getScalarSizeInBits(); @@ -2351,10 +2364,10 @@ bool CastInst::isCastable(const Type *SrcTy, const Type *DestTy) { unsigned DestBits = DestTy->getScalarSizeInBits(); // 0 for ptr // Run through the possibilities ... - if (DestTy->isInteger()) { // Casting to integral - if (SrcTy->isInteger()) { // Casting from integral + if (DestTy->isIntegerTy()) { // Casting to integral + if (SrcTy->isIntegerTy()) { // Casting from integral return true; - } else if (SrcTy->isFloatingPoint()) { // Casting from floating pt + } else if (SrcTy->isFloatingPointTy()) { // Casting from floating pt return true; } else if (const VectorType *PTy = dyn_cast<VectorType>(SrcTy)) { // Casting from vector @@ -2362,10 +2375,10 @@ bool CastInst::isCastable(const Type *SrcTy, const Type *DestTy) { } else { // Casting from something else return isa<PointerType>(SrcTy); } - } else if (DestTy->isFloatingPoint()) { // Casting to floating pt - if (SrcTy->isInteger()) { // Casting from integral + } else if (DestTy->isFloatingPointTy()) { // Casting to floating pt + if (SrcTy->isIntegerTy()) { // Casting from integral return true; - } else if (SrcTy->isFloatingPoint()) { // Casting from floating pt + } else if (SrcTy->isFloatingPointTy()) { // Casting from floating pt return true; } else if (const VectorType *PTy = dyn_cast<VectorType>(SrcTy)) { // Casting from vector @@ -2384,7 +2397,7 @@ bool CastInst::isCastable(const Type *SrcTy, const Type *DestTy) { } else if (isa<PointerType>(DestTy)) { // Casting to pointer if (isa<PointerType>(SrcTy)) { // Casting from pointer return true; - } else if (SrcTy->isInteger()) { // Casting from integral + } else if (SrcTy->isIntegerTy()) { // Casting from integral return true; } else { // Casting from something else return false; @@ -2413,8 +2426,8 @@ CastInst::getCastOpcode( "Only first class types are castable!"); // Run through the possibilities ... - if (DestTy->isInteger()) { // Casting to integral - if (SrcTy->isInteger()) { // Casting from integral + if (DestTy->isIntegerTy()) { // Casting to integral + if (SrcTy->isIntegerTy()) { // Casting from integral if (DestBits < SrcBits) return Trunc; // int -> smaller int else if (DestBits > SrcBits) { // its an extension @@ -2425,7 +2438,7 @@ CastInst::getCastOpcode( } else { return BitCast; // Same size, No-op cast } - } else if (SrcTy->isFloatingPoint()) { // Casting from floating pt + } else if (SrcTy->isFloatingPointTy()) { // Casting from floating pt if (DestIsSigned) return FPToSI; // FP -> sint else @@ -2440,13 +2453,13 @@ CastInst::getCastOpcode( "Casting from a value that is not first-class type"); return PtrToInt; // ptr -> int } - } else if (DestTy->isFloatingPoint()) { // Casting to floating pt - if (SrcTy->isInteger()) { // Casting from integral + } else if (DestTy->isFloatingPointTy()) { // Casting to floating pt + if (SrcTy->isIntegerTy()) { // Casting from integral if (SrcIsSigned) return SIToFP; // sint -> FP else return UIToFP; // uint -> FP - } else if (SrcTy->isFloatingPoint()) { // Casting from floating pt + } else if (SrcTy->isFloatingPointTy()) { // Casting from floating pt if (DestBits < SrcBits) { return FPTrunc; // FP -> smaller FP } else if (DestBits > SrcBits) { @@ -2476,7 +2489,7 @@ CastInst::getCastOpcode( } else if (isa<PointerType>(DestTy)) { if (isa<PointerType>(SrcTy)) { return BitCast; // ptr -> ptr - } else if (SrcTy->isInteger()) { + } else if (SrcTy->isIntegerTy()) { return IntToPtr; // int -> ptr } else { assert(!"Casting pointer to other than pointer or int"); @@ -2504,7 +2517,8 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, const Type *DstTy) { // Check for type sanity on the arguments const Type *SrcTy = S->getType(); - if (!SrcTy->isFirstClassType() || !DstTy->isFirstClassType()) + if (!SrcTy->isFirstClassType() || !DstTy->isFirstClassType() || + SrcTy->isAggregateType() || DstTy->isAggregateType()) return false; // Get the size of the types in bits, we'll need this later @@ -2515,46 +2529,46 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, const Type *DstTy) { switch (op) { default: return false; // This is an input error case Instruction::Trunc: - return SrcTy->isIntOrIntVector() && - DstTy->isIntOrIntVector()&& SrcBitSize > DstBitSize; + return SrcTy->isIntOrIntVectorTy() && + DstTy->isIntOrIntVectorTy()&& SrcBitSize > DstBitSize; case Instruction::ZExt: - return SrcTy->isIntOrIntVector() && - DstTy->isIntOrIntVector()&& SrcBitSize < DstBitSize; + return SrcTy->isIntOrIntVectorTy() && + DstTy->isIntOrIntVectorTy()&& SrcBitSize < DstBitSize; case Instruction::SExt: - return SrcTy->isIntOrIntVector() && - DstTy->isIntOrIntVector()&& SrcBitSize < DstBitSize; + return SrcTy->isIntOrIntVectorTy() && + DstTy->isIntOrIntVectorTy()&& SrcBitSize < DstBitSize; case Instruction::FPTrunc: - return SrcTy->isFPOrFPVector() && - DstTy->isFPOrFPVector() && + return SrcTy->isFPOrFPVectorTy() && + DstTy->isFPOrFPVectorTy() && SrcBitSize > DstBitSize; case Instruction::FPExt: - return SrcTy->isFPOrFPVector() && - DstTy->isFPOrFPVector() && + return SrcTy->isFPOrFPVectorTy() && + DstTy->isFPOrFPVectorTy() && SrcBitSize < DstBitSize; case Instruction::UIToFP: case Instruction::SIToFP: if (const VectorType *SVTy = dyn_cast<VectorType>(SrcTy)) { if (const VectorType *DVTy = dyn_cast<VectorType>(DstTy)) { - return SVTy->getElementType()->isIntOrIntVector() && - DVTy->getElementType()->isFPOrFPVector() && + return SVTy->getElementType()->isIntOrIntVectorTy() && + DVTy->getElementType()->isFPOrFPVectorTy() && SVTy->getNumElements() == DVTy->getNumElements(); } } - return SrcTy->isIntOrIntVector() && DstTy->isFPOrFPVector(); + return SrcTy->isIntOrIntVectorTy() && DstTy->isFPOrFPVectorTy(); case Instruction::FPToUI: case Instruction::FPToSI: if (const VectorType *SVTy = dyn_cast<VectorType>(SrcTy)) { if (const VectorType *DVTy = dyn_cast<VectorType>(DstTy)) { - return SVTy->getElementType()->isFPOrFPVector() && - DVTy->getElementType()->isIntOrIntVector() && + return SVTy->getElementType()->isFPOrFPVectorTy() && + DVTy->getElementType()->isIntOrIntVectorTy() && SVTy->getNumElements() == DVTy->getNumElements(); } } - return SrcTy->isFPOrFPVector() && DstTy->isIntOrIntVector(); + return SrcTy->isFPOrFPVectorTy() && DstTy->isIntOrIntVectorTy(); case Instruction::PtrToInt: - return isa<PointerType>(SrcTy) && DstTy->isInteger(); + return isa<PointerType>(SrcTy) && DstTy->isIntegerTy(); case Instruction::IntToPtr: - return SrcTy->isInteger() && isa<PointerType>(DstTy); + return SrcTy->isIntegerTy() && isa<PointerType>(DstTy); case Instruction::BitCast: // BitCast implies a no-op cast of type only. No bits change. // However, you can't cast pointers to anything but pointers. @@ -2865,25 +2879,53 @@ ICmpInst::makeConstantRange(Predicate pred, const APInt &C) { default: llvm_unreachable("Invalid ICmp opcode to ConstantRange ctor!"); case ICmpInst::ICMP_EQ: Upper++; break; case ICmpInst::ICMP_NE: Lower++; break; - case ICmpInst::ICMP_ULT: Lower = APInt::getMinValue(BitWidth); break; - case ICmpInst::ICMP_SLT: Lower = APInt::getSignedMinValue(BitWidth); break; + case ICmpInst::ICMP_ULT: + Lower = APInt::getMinValue(BitWidth); + // Check for an empty-set condition. + if (Lower == Upper) + return ConstantRange(BitWidth, /*isFullSet=*/false); + break; + case ICmpInst::ICMP_SLT: + Lower = APInt::getSignedMinValue(BitWidth); + // Check for an empty-set condition. + if (Lower == Upper) + return ConstantRange(BitWidth, /*isFullSet=*/false); + break; case ICmpInst::ICMP_UGT: Lower++; Upper = APInt::getMinValue(BitWidth); // Min = Next(Max) + // Check for an empty-set condition. + if (Lower == Upper) + return ConstantRange(BitWidth, /*isFullSet=*/false); break; case ICmpInst::ICMP_SGT: Lower++; Upper = APInt::getSignedMinValue(BitWidth); // Min = Next(Max) + // Check for an empty-set condition. + if (Lower == Upper) + return ConstantRange(BitWidth, /*isFullSet=*/false); break; case ICmpInst::ICMP_ULE: Lower = APInt::getMinValue(BitWidth); Upper++; + // Check for a full-set condition. + if (Lower == Upper) + return ConstantRange(BitWidth, /*isFullSet=*/true); break; case ICmpInst::ICMP_SLE: Lower = APInt::getSignedMinValue(BitWidth); Upper++; + // Check for a full-set condition. + if (Lower == Upper) + return ConstantRange(BitWidth, /*isFullSet=*/true); break; case ICmpInst::ICMP_UGE: Upper = APInt::getMinValue(BitWidth); // Min = Next(Max) + // Check for a full-set condition. + if (Lower == Upper) + return ConstantRange(BitWidth, /*isFullSet=*/true); break; case ICmpInst::ICMP_SGE: Upper = APInt::getSignedMinValue(BitWidth); // Min = Next(Max) + // Check for a full-set condition. + if (Lower == Upper) + return ConstantRange(BitWidth, /*isFullSet=*/true); break; } return ConstantRange(Lower, Upper); diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h index ccca789..62491d8 100644 --- a/lib/VMCore/LLVMContextImpl.h +++ b/lib/VMCore/LLVMContextImpl.h @@ -116,6 +116,10 @@ public: ConstantStruct, true /*largekey*/> StructConstantsTy; StructConstantsTy StructConstants; + typedef ConstantUniqueMap<Constant*, UnionType, ConstantUnion> + UnionConstantsTy; + UnionConstantsTy UnionConstants; + typedef ConstantUniqueMap<std::vector<Constant*>, VectorType, ConstantVector> VectorConstantsTy; VectorConstantsTy VectorConstants; @@ -159,12 +163,16 @@ public: TypeMap<PointerValType, PointerType> PointerTypes; TypeMap<FunctionValType, FunctionType> FunctionTypes; TypeMap<StructValType, StructType> StructTypes; + TypeMap<UnionValType, UnionType> UnionTypes; TypeMap<IntegerValType, IntegerType> IntegerTypes; // Opaque types are not structurally uniqued, so don't use TypeMap. typedef SmallPtrSet<const OpaqueType*, 8> OpaqueTypesTy; OpaqueTypesTy OpaqueTypes; - + + /// Used as an abstract type that will never be resolved. + OpaqueType *const AlwaysOpaqueTy; + /// ValueHandles - This map keeps track of all of the value handles that are /// watching a Value*. The Value::HasValueHandle bit is used to know @@ -196,7 +204,12 @@ public: Int8Ty(C, 8), Int16Ty(C, 16), Int32Ty(C, 32), - Int64Ty(C, 64) { } + Int64Ty(C, 64), + AlwaysOpaqueTy(new OpaqueType(C)) { + // Make sure the AlwaysOpaqueTy stays alive as long as the Context. + AlwaysOpaqueTy->addRef(); + OpaqueTypes.insert(AlwaysOpaqueTy); + } ~LLVMContextImpl() { ExprConstants.freeConstants(); @@ -217,6 +230,7 @@ public: delete I->second; } MDNodeSet.clear(); + AlwaysOpaqueTy->dropRef(); for (OpaqueTypesTy::iterator I = OpaqueTypes.begin(), E = OpaqueTypes.end(); I != E; ++I) { (*I)->AbstractTypeUsers.clear(); diff --git a/lib/VMCore/Makefile b/lib/VMCore/Makefile index ecadaee..bc5e77d 100644 --- a/lib/VMCore/Makefile +++ b/lib/VMCore/Makefile @@ -9,7 +9,7 @@ LEVEL = ../.. LIBRARYNAME = LLVMCore BUILD_ARCHIVE = 1 -#CXXFLAGS = -fno-rtti +REQUIRES_RTTI = 1 BUILT_SOURCES = $(PROJ_OBJ_ROOT)/include/llvm/Intrinsics.gen diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp index ee8e713..07a5f3c 100644 --- a/lib/VMCore/Metadata.cpp +++ b/lib/VMCore/Metadata.cpp @@ -186,43 +186,50 @@ void MDNode::destroy() { } MDNode *MDNode::getMDNode(LLVMContext &Context, Value *const *Vals, - unsigned NumVals, FunctionLocalness FL) { + unsigned NumVals, FunctionLocalness FL, + bool Insert) { LLVMContextImpl *pImpl = Context.pImpl; FoldingSetNodeID ID; for (unsigned i = 0; i != NumVals; ++i) ID.AddPointer(Vals[i]); void *InsertPoint; - MDNode *N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint); - if (!N) { - bool isFunctionLocal = false; - switch (FL) { - case FL_Unknown: - for (unsigned i = 0; i != NumVals; ++i) { - Value *V = Vals[i]; - if (!V) continue; - if (isa<Instruction>(V) || isa<Argument>(V) || isa<BasicBlock>(V) || - (isa<MDNode>(V) && cast<MDNode>(V)->isFunctionLocal())) { - isFunctionLocal = true; - break; - } + MDNode *N = NULL; + + if ((N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint))) + return N; + + if (!Insert) + return NULL; + + bool isFunctionLocal = false; + switch (FL) { + case FL_Unknown: + for (unsigned i = 0; i != NumVals; ++i) { + Value *V = Vals[i]; + if (!V) continue; + if (isa<Instruction>(V) || isa<Argument>(V) || isa<BasicBlock>(V) || + (isa<MDNode>(V) && cast<MDNode>(V)->isFunctionLocal())) { + isFunctionLocal = true; + break; } - break; - case FL_No: - isFunctionLocal = false; - break; - case FL_Yes: - isFunctionLocal = true; - break; } + break; + case FL_No: + isFunctionLocal = false; + break; + case FL_Yes: + isFunctionLocal = true; + break; + } - // Coallocate space for the node and Operands together, then placement new. - void *Ptr = malloc(sizeof(MDNode)+NumVals*sizeof(MDNodeOperand)); - N = new (Ptr) MDNode(Context, Vals, NumVals, isFunctionLocal); + // Coallocate space for the node and Operands together, then placement new. + void *Ptr = malloc(sizeof(MDNode)+NumVals*sizeof(MDNodeOperand)); + N = new (Ptr) MDNode(Context, Vals, NumVals, isFunctionLocal); + + // InsertPoint will have been set by the FindNodeOrInsertPos call. + pImpl->MDNodeSet.InsertNode(N, InsertPoint); - // InsertPoint will have been set by the FindNodeOrInsertPos call. - pImpl->MDNodeSet.InsertNode(N, InsertPoint); - } return N; } @@ -230,11 +237,16 @@ MDNode *MDNode::get(LLVMContext &Context, Value*const* Vals, unsigned NumVals) { return getMDNode(Context, Vals, NumVals, FL_Unknown); } -MDNode *MDNode::getWhenValsUnresolved(LLVMContext &Context, Value*const* Vals, +MDNode *MDNode::getWhenValsUnresolved(LLVMContext &Context, Value *const *Vals, unsigned NumVals, bool isFunctionLocal) { return getMDNode(Context, Vals, NumVals, isFunctionLocal ? FL_Yes : FL_No); } +MDNode *MDNode::getIfExists(LLVMContext &Context, Value *const *Vals, + unsigned NumVals) { + return getMDNode(Context, Vals, NumVals, FL_Unknown, false); +} + /// getOperand - Return specified operand. Value *MDNode::getOperand(unsigned i) const { return *getOperandPtr(const_cast<MDNode*>(this), i); diff --git a/lib/VMCore/Module.cpp b/lib/VMCore/Module.cpp index 503e708..001bb00 100644 --- a/lib/VMCore/Module.cpp +++ b/lib/VMCore/Module.cpp @@ -15,6 +15,7 @@ #include "llvm/InstrTypes.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/GVMaterializer.h" #include "llvm/LLVMContext.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" @@ -56,7 +57,7 @@ template class llvm::SymbolTableListTraits<GlobalAlias, Module>; // Module::Module(StringRef MID, LLVMContext& C) - : Context(C), ModuleID(MID), DataLayout("") { + : Context(C), Materializer(NULL), ModuleID(MID), DataLayout("") { ValSymTab = new ValueSymbolTable(); TypeSymTab = new TypeSymbolTable(); NamedMDSymTab = new MDSymbolTable(); @@ -372,6 +373,52 @@ std::string Module::getTypeName(const Type *Ty) const { } //===----------------------------------------------------------------------===// +// Methods to control the materialization of GlobalValues in the Module. +// +void Module::setMaterializer(GVMaterializer *GVM) { + assert(!Materializer && + "Module already has a GVMaterializer. Call MaterializeAllPermanently" + " to clear it out before setting another one."); + Materializer.reset(GVM); +} + +bool Module::isMaterializable(const GlobalValue *GV) const { + if (Materializer) + return Materializer->isMaterializable(GV); + return false; +} + +bool Module::isDematerializable(const GlobalValue *GV) const { + if (Materializer) + return Materializer->isDematerializable(GV); + return false; +} + +bool Module::Materialize(GlobalValue *GV, std::string *ErrInfo) { + if (Materializer) + return Materializer->Materialize(GV, ErrInfo); + return false; +} + +void Module::Dematerialize(GlobalValue *GV) { + if (Materializer) + return Materializer->Dematerialize(GV); +} + +bool Module::MaterializeAll(std::string *ErrInfo) { + if (!Materializer) + return false; + return Materializer->MaterializeModule(this, ErrInfo); +} + +bool Module::MaterializeAllPermanently(std::string *ErrInfo) { + if (MaterializeAll(ErrInfo)) + return true; + Materializer.reset(); + return false; +} + +//===----------------------------------------------------------------------===// // Other module related stuff. // diff --git a/lib/VMCore/ModuleProvider.cpp b/lib/VMCore/ModuleProvider.cpp deleted file mode 100644 index cfff97c..0000000 --- a/lib/VMCore/ModuleProvider.cpp +++ /dev/null @@ -1,26 +0,0 @@ -//===-- ModuleProvider.cpp - Base implementation for module providers -----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Minimal implementation of the abstract interface for providing a module. -// -//===----------------------------------------------------------------------===// - -#include "llvm/ModuleProvider.h" -#include "llvm/Module.h" -using namespace llvm; - -/// ctor - always have a valid Module -/// -ModuleProvider::ModuleProvider() : TheModule(0) { } - -/// dtor - when we leave, we take our Module with us -/// -ModuleProvider::~ModuleProvider() { - delete TheModule; -} diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp index 45000f2..a782e5a 100644 --- a/lib/VMCore/Pass.cpp +++ b/lib/VMCore/Pass.cpp @@ -16,7 +16,6 @@ #include "llvm/Pass.h" #include "llvm/PassManager.h" #include "llvm/Module.h" -#include "llvm/ModuleProvider.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/Debug.h" @@ -195,6 +194,9 @@ PassManagerType BasicBlockPass::getPotentialPassManagerType() const { // namespace { class PassRegistrar { + /// Guards the contents of this class. + mutable sys::SmartMutex<true> Lock; + /// PassInfoMap - Keep track of the passinfo object for each registered llvm /// pass. typedef std::map<intptr_t, const PassInfo*> MapType; @@ -214,16 +216,19 @@ class PassRegistrar { public: const PassInfo *GetPassInfo(intptr_t TI) const { + sys::SmartScopedLock<true> Guard(Lock); MapType::const_iterator I = PassInfoMap.find(TI); return I != PassInfoMap.end() ? I->second : 0; } const PassInfo *GetPassInfo(StringRef Arg) const { + sys::SmartScopedLock<true> Guard(Lock); StringMapType::const_iterator I = PassInfoStringMap.find(Arg); return I != PassInfoStringMap.end() ? I->second : 0; } void RegisterPass(const PassInfo &PI) { + sys::SmartScopedLock<true> Guard(Lock); bool Inserted = PassInfoMap.insert(std::make_pair(PI.getTypeInfo(),&PI)).second; assert(Inserted && "Pass registered multiple times!"); Inserted=Inserted; @@ -231,6 +236,7 @@ public: } void UnregisterPass(const PassInfo &PI) { + sys::SmartScopedLock<true> Guard(Lock); MapType::iterator I = PassInfoMap.find(PI.getTypeInfo()); assert(I != PassInfoMap.end() && "Pass registered but not in map!"); @@ -240,6 +246,7 @@ public: } void EnumerateWith(PassRegistrationListener *L) { + sys::SmartScopedLock<true> Guard(Lock); for (MapType::const_iterator I = PassInfoMap.begin(), E = PassInfoMap.end(); I != E; ++I) L->passEnumerate(I->second); @@ -250,6 +257,7 @@ public: void RegisterAnalysisGroup(PassInfo *InterfaceInfo, const PassInfo *ImplementationInfo, bool isDefault) { + sys::SmartScopedLock<true> Guard(Lock); AnalysisGroupInfo &AGI = AnalysisGroupInfoMap[InterfaceInfo]; assert(AGI.Implementations.count(ImplementationInfo) == 0 && "Cannot add a pass to the same analysis group more than once!"); diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp index 0c0d64e..8a3527e 100644 --- a/lib/VMCore/PassManager.cpp +++ b/lib/VMCore/PassManager.cpp @@ -18,7 +18,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/Timer.h" #include "llvm/Module.h" -#include "llvm/ModuleProvider.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/raw_ostream.h" @@ -1194,15 +1193,13 @@ bool BBPassManager::doFinalization(Function &F) { // FunctionPassManager implementation /// Create new Function pass manager -FunctionPassManager::FunctionPassManager(ModuleProvider *P) { +FunctionPassManager::FunctionPassManager(Module *m) : M(m) { FPM = new FunctionPassManagerImpl(0); // FPM is the top level manager. FPM->setTopLevelManager(FPM); AnalysisResolver *AR = new AnalysisResolver(*FPM); FPM->setResolver(AR); - - MP = P; } FunctionPassManager::~FunctionPassManager() { @@ -1223,9 +1220,11 @@ void FunctionPassManager::add(Pass *P) { /// so, return true. /// bool FunctionPassManager::run(Function &F) { - std::string errstr; - if (MP->materializeFunction(&F, &errstr)) { - llvm_report_error("Error reading bitcode file: " + errstr); + if (F.isMaterializable()) { + std::string errstr; + if (F.Materialize(&errstr)) { + llvm_report_error("Error reading bitcode file: " + errstr); + } } return FPM->run(F); } @@ -1234,13 +1233,13 @@ bool FunctionPassManager::run(Function &F) { /// doInitialization - Run all of the initializers for the function passes. /// bool FunctionPassManager::doInitialization() { - return FPM->doInitialization(*MP->getModule()); + return FPM->doInitialization(*M); } /// doFinalization - Run all of the finalizers for the function passes. /// bool FunctionPassManager::doFinalization() { - return FPM->doFinalization(*MP->getModule()); + return FPM->doFinalization(*M); } //===----------------------------------------------------------------------===// diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp index 044de4f..f4cd366 100644 --- a/lib/VMCore/Type.cpp +++ b/lib/VMCore/Type.cpp @@ -50,8 +50,8 @@ void AbstractTypeUser::setType(Value *V, const Type *NewTy) { /// Because of the way Type subclasses are allocated, this function is necessary /// to use the correct kind of "delete" operator to deallocate the Type object. -/// Some type objects (FunctionTy, StructTy) allocate additional space after -/// the space for their derived type to hold the contained types array of +/// Some type objects (FunctionTy, StructTy, UnionTy) allocate additional space +/// after the space for their derived type to hold the contained types array of /// PATypeHandles. Using this allocation scheme means all the PATypeHandles are /// allocated with the type object, decreasing allocations and eliminating the /// need for a std::vector to be used in the Type class itself. @@ -61,7 +61,8 @@ void Type::destroy() const { // Structures and Functions allocate their contained types past the end of // the type object itself. These need to be destroyed differently than the // other types. - if (isa<FunctionType>(this) || isa<StructType>(this)) { + if (isa<FunctionType>(this) || isa<StructType>(this) || + isa<UnionType>(this)) { // First, make sure we destruct any PATypeHandles allocated by these // subclasses. They must be manually destructed. for (unsigned i = 0; i < NumContainedTys; ++i) @@ -71,8 +72,10 @@ void Type::destroy() const { // to delete this as an array of char. if (isa<FunctionType>(this)) static_cast<const FunctionType*>(this)->FunctionType::~FunctionType(); - else + else if (isa<StructType>(this)) static_cast<const StructType*>(this)->StructType::~StructType(); + else + static_cast<const UnionType*>(this)->UnionType::~UnionType(); // Finally, remove the memory as an array deallocation of the chars it was // constructed from. @@ -124,32 +127,32 @@ const Type *Type::getScalarType() const { return this; } -/// isInteger - Return true if this is an IntegerType of the specified width. -bool Type::isInteger(unsigned Bitwidth) const { - return isInteger() && cast<IntegerType>(this)->getBitWidth() == Bitwidth; +/// isIntegerTy - Return true if this is an IntegerType of the specified width. +bool Type::isIntegerTy(unsigned Bitwidth) const { + return isIntegerTy() && cast<IntegerType>(this)->getBitWidth() == Bitwidth; } -/// isIntOrIntVector - Return true if this is an integer type or a vector of +/// isIntOrIntVectorTy - Return true if this is an integer type or a vector of /// integer types. /// -bool Type::isIntOrIntVector() const { - if (isInteger()) +bool Type::isIntOrIntVectorTy() const { + if (isIntegerTy()) return true; if (ID != Type::VectorTyID) return false; - return cast<VectorType>(this)->getElementType()->isInteger(); + return cast<VectorType>(this)->getElementType()->isIntegerTy(); } -/// isFPOrFPVector - Return true if this is a FP type or a vector of FP types. +/// isFPOrFPVectorTy - Return true if this is a FP type or a vector of FP types. /// -bool Type::isFPOrFPVector() const { +bool Type::isFPOrFPVectorTy() const { if (ID == Type::FloatTyID || ID == Type::DoubleTyID || ID == Type::FP128TyID || ID == Type::X86_FP80TyID || ID == Type::PPC_FP128TyID) return true; if (ID != Type::VectorTyID) return false; - return cast<VectorType>(this)->getElementType()->isFloatingPoint(); + return cast<VectorType>(this)->getElementType()->isFloatingPointTy(); } // canLosslesslyBitCastTo - Return true if this type can be converted to @@ -204,7 +207,7 @@ unsigned Type::getScalarSizeInBits() const { int Type::getFPMantissaWidth() const { if (const VectorType *VTy = dyn_cast<VectorType>(this)) return VTy->getElementType()->getFPMantissaWidth(); - assert(isFloatingPoint() && "Not a floating point type!"); + assert(isFloatingPointTy() && "Not a floating point type!"); if (ID == FloatTyID) return 24; if (ID == DoubleTyID) return 53; if (ID == X86_FP80TyID) return 64; @@ -226,7 +229,7 @@ bool Type::isSizedDerivedType() const { if (const VectorType *PTy = dyn_cast<VectorType>(this)) return PTy->getElementType()->isSized(); - if (!isa<StructType>(this)) + if (!isa<StructType>(this) && !isa<UnionType>(this)) return false; // Okay, our struct is sized if all of the elements are... @@ -285,7 +288,7 @@ std::string Type::getDescription() const { bool StructType::indexValid(const Value *V) const { // Structure indexes require 32-bit integer constants. - if (V->getType()->isInteger(32)) + if (V->getType()->isIntegerTy(32)) if (const ConstantInt *CU = dyn_cast<ConstantInt>(V)) return indexValid(CU->getZExtValue()); return false; @@ -308,6 +311,32 @@ const Type *StructType::getTypeAtIndex(unsigned Idx) const { return ContainedTys[Idx]; } + +bool UnionType::indexValid(const Value *V) const { + // Union indexes require 32-bit integer constants. + if (V->getType()->isIntegerTy(32)) + if (const ConstantInt *CU = dyn_cast<ConstantInt>(V)) + return indexValid(CU->getZExtValue()); + return false; +} + +bool UnionType::indexValid(unsigned V) const { + return V < NumContainedTys; +} + +// getTypeAtIndex - Given an index value into the type, return the type of the +// element. For a structure type, this must be a constant value... +// +const Type *UnionType::getTypeAtIndex(const Value *V) const { + unsigned Idx = (unsigned)cast<ConstantInt>(V)->getZExtValue(); + return getTypeAtIndex(Idx); +} + +const Type *UnionType::getTypeAtIndex(unsigned Idx) const { + assert(indexValid(Idx) && "Invalid structure index!"); + return ContainedTys[Idx]; +} + //===----------------------------------------------------------------------===// // Primitive 'Type' data //===----------------------------------------------------------------------===// @@ -463,6 +492,23 @@ StructType::StructType(LLVMContext &C, setAbstract(isAbstract); } +UnionType::UnionType(LLVMContext &C,const Type* const* Types, unsigned NumTypes) + : CompositeType(C, UnionTyID) { + ContainedTys = reinterpret_cast<PATypeHandle*>(this + 1); + NumContainedTys = NumTypes; + bool isAbstract = false; + for (unsigned i = 0; i < NumTypes; ++i) { + assert(Types[i] && "<null> type for union field!"); + assert(isValidElementType(Types[i]) && + "Invalid type for union element!"); + new (&ContainedTys[i]) PATypeHandle(Types[i], this); + isAbstract |= Types[i]->isAbstract(); + } + + // Calculate whether or not this type is abstract + setAbstract(isAbstract); +} + ArrayType::ArrayType(const Type *ElType, uint64_t NumEl) : SequentialType(ArrayTyID, ElType) { NumElements = NumEl; @@ -507,30 +553,7 @@ void DerivedType::dropAllTypeUses() { if (NumContainedTys != 0) { // The type must stay abstract. To do this, we insert a pointer to a type // that will never get resolved, thus will always be abstract. - static Type *AlwaysOpaqueTy = 0; - static PATypeHolder* Holder = 0; - Type *tmp = AlwaysOpaqueTy; - if (llvm_is_multithreaded()) { - sys::MemoryFence(); - if (!tmp) { - llvm_acquire_global_lock(); - tmp = AlwaysOpaqueTy; - if (!tmp) { - tmp = OpaqueType::get(getContext()); - PATypeHolder* tmp2 = new PATypeHolder(tmp); - sys::MemoryFence(); - AlwaysOpaqueTy = tmp; - Holder = tmp2; - } - - llvm_release_global_lock(); - } - } else if (!AlwaysOpaqueTy) { - AlwaysOpaqueTy = OpaqueType::get(getContext()); - Holder = new PATypeHolder(AlwaysOpaqueTy); - } - - ContainedTys[0] = AlwaysOpaqueTy; + ContainedTys[0] = getContext().pImpl->AlwaysOpaqueTy; // Change the rest of the types to be Int32Ty's. It doesn't matter what we // pick so long as it doesn't point back to this type. We choose something @@ -667,6 +690,13 @@ static bool TypesEqual(const Type *Ty, const Type *Ty2, if (!TypesEqual(STy->getElementType(i), STy2->getElementType(i), EqTypes)) return false; return true; + } else if (const UnionType *UTy = dyn_cast<UnionType>(Ty)) { + const UnionType *UTy2 = cast<UnionType>(Ty2); + if (UTy->getNumElements() != UTy2->getNumElements()) return false; + for (unsigned i = 0, e = UTy2->getNumElements(); i != e; ++i) + if (!TypesEqual(UTy->getElementType(i), UTy2->getElementType(i), EqTypes)) + return false; + return true; } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { const ArrayType *ATy2 = cast<ArrayType>(Ty2); return ATy->getNumElements() == ATy2->getNumElements() && @@ -881,7 +911,7 @@ VectorType *VectorType::get(const Type *ElementType, unsigned NumElements) { } bool VectorType::isValidElementType(const Type *ElemTy) { - return ElemTy->isInteger() || ElemTy->isFloatingPoint() || + return ElemTy->isIntegerTy() || ElemTy->isFloatingPointTy() || isa<OpaqueType>(ElemTy); } @@ -924,10 +954,64 @@ StructType *StructType::get(LLVMContext &Context, const Type *type, ...) { } bool StructType::isValidElementType(const Type *ElemTy) { - return ElemTy->getTypeID() != VoidTyID && ElemTy->getTypeID() != LabelTyID && - ElemTy->getTypeID() != MetadataTyID && !isa<FunctionType>(ElemTy); + return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() && + !ElemTy->isMetadataTy() && !isa<FunctionType>(ElemTy); +} + + +//===----------------------------------------------------------------------===// +// Union Type Factory... +// + +UnionType *UnionType::get(const Type* const* Types, unsigned NumTypes) { + assert(NumTypes > 0 && "union must have at least one member type!"); + UnionValType UTV(Types, NumTypes); + UnionType *UT = 0; + + LLVMContextImpl *pImpl = Types[0]->getContext().pImpl; + + UT = pImpl->UnionTypes.get(UTV); + + if (!UT) { + // Value not found. Derive a new type! + UT = (UnionType*) operator new(sizeof(UnionType) + + sizeof(PATypeHandle) * NumTypes); + new (UT) UnionType(Types[0]->getContext(), Types, NumTypes); + pImpl->UnionTypes.add(UTV, UT); + } +#ifdef DEBUG_MERGE_TYPES + DEBUG(dbgs() << "Derived new type: " << *UT << "\n"); +#endif + return UT; +} + +UnionType *UnionType::get(const Type *type, ...) { + va_list ap; + SmallVector<const llvm::Type*, 8> UnionFields; + va_start(ap, type); + while (type) { + UnionFields.push_back(type); + type = va_arg(ap, llvm::Type*); + } + unsigned NumTypes = UnionFields.size(); + assert(NumTypes > 0 && "union must have at least one member type!"); + return llvm::UnionType::get(&UnionFields[0], NumTypes); } +bool UnionType::isValidElementType(const Type *ElemTy) { + return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() && + !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy(); +} + +int UnionType::getElementTypeIndex(const Type *ElemTy) const { + int index = 0; + for (UnionType::element_iterator I = element_begin(), E = element_end(); + I != E; ++I, ++index) { + if (ElemTy == *I) return index; + } + + return -1; +} //===----------------------------------------------------------------------===// // Pointer Type Factory... @@ -1192,6 +1276,21 @@ void StructType::typeBecameConcrete(const DerivedType *AbsTy) { // concrete - this could potentially change us from an abstract type to a // concrete type. // +void UnionType::refineAbstractType(const DerivedType *OldType, + const Type *NewType) { + LLVMContextImpl *pImpl = OldType->getContext().pImpl; + pImpl->UnionTypes.RefineAbstractType(this, OldType, NewType); +} + +void UnionType::typeBecameConcrete(const DerivedType *AbsTy) { + LLVMContextImpl *pImpl = AbsTy->getContext().pImpl; + pImpl->UnionTypes.TypeBecameConcrete(this, AbsTy); +} + +// refineAbstractType - Called when a contained type is found to be more +// concrete - this could potentially change us from an abstract type to a +// concrete type. +// void PointerType::refineAbstractType(const DerivedType *OldType, const Type *NewType) { LLVMContextImpl *pImpl = OldType->getContext().pImpl; diff --git a/lib/VMCore/TypesContext.h b/lib/VMCore/TypesContext.h index 93a801b..02ab113 100644 --- a/lib/VMCore/TypesContext.h +++ b/lib/VMCore/TypesContext.h @@ -68,7 +68,7 @@ static unsigned getSubElementHash(const Type *Ty) { class IntegerValType { uint32_t bits; public: - IntegerValType(uint16_t numbits) : bits(numbits) {} + IntegerValType(uint32_t numbits) : bits(numbits) {} static IntegerValType get(const IntegerType *Ty) { return IntegerValType(Ty->getBitWidth()); @@ -180,6 +180,32 @@ public: } }; +// UnionValType - Define a class to hold the key that goes into the TypeMap +// +class UnionValType { + std::vector<const Type*> ElTypes; +public: + UnionValType(const Type* const* Types, unsigned NumTypes) + : ElTypes(&Types[0], &Types[NumTypes]) {} + + static UnionValType get(const UnionType *UT) { + std::vector<const Type *> ElTypes; + ElTypes.reserve(UT->getNumElements()); + for (unsigned i = 0, e = UT->getNumElements(); i != e; ++i) + ElTypes.push_back(UT->getElementType(i)); + + return UnionValType(&ElTypes[0], ElTypes.size()); + } + + static unsigned hashTypeStructure(const UnionType *UT) { + return UT->getNumElements(); + } + + inline bool operator<(const UnionValType &UTV) const { + return (ElTypes < UTV.ElTypes); + } +}; + // FunctionValType - Define a class to hold the key that goes into the TypeMap // class FunctionValType { @@ -216,7 +242,6 @@ protected: /// std::multimap<unsigned, PATypeHolder> TypesByHash; -public: ~TypeMapBase() { // PATypeHolder won't destroy non-abstract types. // We can't destroy them by simply iterating, because @@ -236,6 +261,7 @@ public: } } +public: void RemoveFromTypesByHash(unsigned Hash, const Type *Ty) { std::multimap<unsigned, PATypeHolder>::iterator I = TypesByHash.lower_bound(Hash); @@ -281,7 +307,6 @@ class TypeMap : public TypeMapBase { std::map<ValType, PATypeHolder> Map; public: typedef typename std::map<ValType, PATypeHolder>::iterator iterator; - ~TypeMap() { print("ON EXIT"); } inline TypeClass *get(const ValType &V) { iterator I = Map.find(V); diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp index 40679bf..3759b8a 100644 --- a/lib/VMCore/Value.cpp +++ b/lib/VMCore/Value.cpp @@ -341,12 +341,11 @@ Value *Value::stripPointerCasts() { } while (1); } -Value *Value::getUnderlyingObject() { +Value *Value::getUnderlyingObject(unsigned MaxLookup) { if (!isa<PointerType>(getType())) return this; Value *V = this; - unsigned MaxLookup = 6; - do { + for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) { if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { V = GEP->getPointerOperand(); } else if (Operator::getOpcode(V) == Instruction::BitCast) { @@ -359,7 +358,7 @@ Value *Value::getUnderlyingObject() { return V; } assert(isa<PointerType>(V->getType()) && "Unexpected operand type!"); - } while (--MaxLookup); + } return V; } diff --git a/lib/VMCore/ValueTypes.cpp b/lib/VMCore/ValueTypes.cpp index 7f9a6cd..62b9034 100644 --- a/lib/VMCore/ValueTypes.cpp +++ b/lib/VMCore/ValueTypes.cpp @@ -36,12 +36,12 @@ EVT EVT::getExtendedVectorVT(LLVMContext &Context, EVT VT, bool EVT::isExtendedFloatingPoint() const { assert(isExtended() && "Type is not extended!"); - return LLVMTy->isFPOrFPVector(); + return LLVMTy->isFPOrFPVectorTy(); } bool EVT::isExtendedInteger() const { assert(isExtended() && "Type is not extended!"); - return LLVMTy->isIntOrIntVector(); + return LLVMTy->isIntOrIntVectorTy(); } bool EVT::isExtendedVector() const { diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp index 76d9d43..2b4892b 100644 --- a/lib/VMCore/Verifier.cpp +++ b/lib/VMCore/Verifier.cpp @@ -47,7 +47,6 @@ #include "llvm/IntrinsicInst.h" #include "llvm/Metadata.h" #include "llvm/Module.h" -#include "llvm/ModuleProvider.h" #include "llvm/Pass.h" #include "llvm/PassManager.h" #include "llvm/TypeSymbolTable.h" @@ -162,7 +161,8 @@ namespace { VerifierFailureAction action; // What to do if verification fails. Module *Mod; // Module we are verifying right now - DominatorTree *DT; // Dominator Tree, caution can be null! + LLVMContext *Context; // Context within which we are verifying + DominatorTree *DT; // Dominator Tree, caution can be null! std::string Messages; raw_string_ostream MessagesStr; @@ -179,24 +179,25 @@ namespace { Verifier() : FunctionPass(&ID), Broken(false), RealPass(true), action(AbortProcessAction), - DT(0), MessagesStr(Messages) {} + Mod(0), Context(0), DT(0), MessagesStr(Messages) {} explicit Verifier(VerifierFailureAction ctn) : FunctionPass(&ID), - Broken(false), RealPass(true), action(ctn), DT(0), + Broken(false), RealPass(true), action(ctn), Mod(0), Context(0), DT(0), MessagesStr(Messages) {} explicit Verifier(bool AB) : FunctionPass(&ID), Broken(false), RealPass(true), - action( AB ? AbortProcessAction : PrintMessageAction), DT(0), - MessagesStr(Messages) {} + action( AB ? AbortProcessAction : PrintMessageAction), Mod(0), + Context(0), DT(0), MessagesStr(Messages) {} explicit Verifier(DominatorTree &dt) : FunctionPass(&ID), - Broken(false), RealPass(false), action(PrintMessageAction), - DT(&dt), MessagesStr(Messages) {} + Broken(false), RealPass(false), action(PrintMessageAction), Mod(0), + Context(0), DT(&dt), MessagesStr(Messages) {} bool doInitialization(Module &M) { Mod = &M; + Context = &M.getContext(); verifyTypeSymbolTable(M.getTypeSymbolTable()); // If this is a real pass, in a pass manager, we must abort before @@ -212,6 +213,7 @@ namespace { if (RealPass) DT = &getAnalysis<DominatorTree>(); Mod = F.getParent(); + if (!Context) Context = &F.getContext(); visit(F); InstsInThisBlock.clear(); @@ -315,6 +317,7 @@ namespace { void visitStoreInst(StoreInst &SI); void visitInstruction(Instruction &I); void visitTerminatorInst(TerminatorInst &I); + void visitBranchInst(BranchInst &BI); void visitReturnInst(ReturnInst &RI); void visitSwitchInst(SwitchInst &SI); void visitSelectInst(SelectInst &SI); @@ -413,10 +416,10 @@ void Verifier::visit(Instruction &I) { void Verifier::visitGlobalValue(GlobalValue &GV) { Assert1(!GV.isDeclaration() || + GV.isMaterializable() || GV.hasExternalLinkage() || GV.hasDLLImportLinkage() || GV.hasExternalWeakLinkage() || - GV.hasGhostLinkage() || (isa<GlobalAlias>(GV) && (GV.hasLocalLinkage() || GV.hasWeakLinkage())), "Global is external, but doesn't have external or dllimport or weak linkage!", @@ -597,6 +600,9 @@ void Verifier::visitFunction(Function &F) { const FunctionType *FT = F.getFunctionType(); unsigned NumArgs = F.arg_size(); + Assert1(Context == &F.getContext(), + "Function context does not match Module context!", &F); + Assert1(!F.hasCommonLinkage(), "Functions may not have common linkage", &F); Assert2(FT->getNumParams() == NumArgs, "# formal arguments must match # of arguments for function type!", @@ -648,9 +654,11 @@ void Verifier::visitFunction(Function &F) { "Function takes metadata but isn't an intrinsic", I, &F); } - if (F.isDeclaration()) { + if (F.isMaterializable()) { + // Function has a body somewhere we can't see. + } else if (F.isDeclaration()) { Assert1(F.hasExternalLinkage() || F.hasDLLImportLinkage() || - F.hasExternalWeakLinkage() || F.hasGhostLinkage(), + F.hasExternalWeakLinkage(), "invalid linkage type for function declaration", &F); } else { // Verify that this function (which has a body) is not named "llvm.*". It @@ -742,6 +750,14 @@ void Verifier::visitTerminatorInst(TerminatorInst &I) { visitInstruction(I); } +void Verifier::visitBranchInst(BranchInst &BI) { + if (BI.isConditional()) { + Assert2(BI.getCondition()->getType()->isIntegerTy(1), + "Branch condition is not 'i1' type!", &BI, BI.getCondition()); + } + visitTerminatorInst(BI); +} + void Verifier::visitReturnInst(ReturnInst &RI) { Function *F = RI.getParent()->getParent(); unsigned N = RI.getNumOperands(); @@ -820,8 +836,8 @@ void Verifier::visitTruncInst(TruncInst &I) { unsigned SrcBitSize = SrcTy->getScalarSizeInBits(); unsigned DestBitSize = DestTy->getScalarSizeInBits(); - Assert1(SrcTy->isIntOrIntVector(), "Trunc only operates on integer", &I); - Assert1(DestTy->isIntOrIntVector(), "Trunc only produces integer", &I); + Assert1(SrcTy->isIntOrIntVectorTy(), "Trunc only operates on integer", &I); + Assert1(DestTy->isIntOrIntVectorTy(), "Trunc only produces integer", &I); Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy), "trunc source and destination must both be a vector or neither", &I); Assert1(SrcBitSize > DestBitSize,"DestTy too big for Trunc", &I); @@ -835,8 +851,8 @@ void Verifier::visitZExtInst(ZExtInst &I) { const Type *DestTy = I.getType(); // Get the size of the types in bits, we'll need this later - Assert1(SrcTy->isIntOrIntVector(), "ZExt only operates on integer", &I); - Assert1(DestTy->isIntOrIntVector(), "ZExt only produces an integer", &I); + Assert1(SrcTy->isIntOrIntVectorTy(), "ZExt only operates on integer", &I); + Assert1(DestTy->isIntOrIntVectorTy(), "ZExt only produces an integer", &I); Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy), "zext source and destination must both be a vector or neither", &I); unsigned SrcBitSize = SrcTy->getScalarSizeInBits(); @@ -856,8 +872,8 @@ void Verifier::visitSExtInst(SExtInst &I) { unsigned SrcBitSize = SrcTy->getScalarSizeInBits(); unsigned DestBitSize = DestTy->getScalarSizeInBits(); - Assert1(SrcTy->isIntOrIntVector(), "SExt only operates on integer", &I); - Assert1(DestTy->isIntOrIntVector(), "SExt only produces an integer", &I); + Assert1(SrcTy->isIntOrIntVectorTy(), "SExt only operates on integer", &I); + Assert1(DestTy->isIntOrIntVectorTy(), "SExt only produces an integer", &I); Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy), "sext source and destination must both be a vector or neither", &I); Assert1(SrcBitSize < DestBitSize,"Type too small for SExt", &I); @@ -873,8 +889,8 @@ void Verifier::visitFPTruncInst(FPTruncInst &I) { unsigned SrcBitSize = SrcTy->getScalarSizeInBits(); unsigned DestBitSize = DestTy->getScalarSizeInBits(); - Assert1(SrcTy->isFPOrFPVector(),"FPTrunc only operates on FP", &I); - Assert1(DestTy->isFPOrFPVector(),"FPTrunc only produces an FP", &I); + Assert1(SrcTy->isFPOrFPVectorTy(),"FPTrunc only operates on FP", &I); + Assert1(DestTy->isFPOrFPVectorTy(),"FPTrunc only produces an FP", &I); Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy), "fptrunc source and destination must both be a vector or neither",&I); Assert1(SrcBitSize > DestBitSize,"DestTy too big for FPTrunc", &I); @@ -891,8 +907,8 @@ void Verifier::visitFPExtInst(FPExtInst &I) { unsigned SrcBitSize = SrcTy->getScalarSizeInBits(); unsigned DestBitSize = DestTy->getScalarSizeInBits(); - Assert1(SrcTy->isFPOrFPVector(),"FPExt only operates on FP", &I); - Assert1(DestTy->isFPOrFPVector(),"FPExt only produces an FP", &I); + Assert1(SrcTy->isFPOrFPVectorTy(),"FPExt only operates on FP", &I); + Assert1(DestTy->isFPOrFPVectorTy(),"FPExt only produces an FP", &I); Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy), "fpext source and destination must both be a vector or neither", &I); Assert1(SrcBitSize < DestBitSize,"DestTy too small for FPExt", &I); @@ -910,9 +926,9 @@ void Verifier::visitUIToFPInst(UIToFPInst &I) { Assert1(SrcVec == DstVec, "UIToFP source and dest must both be vector or scalar", &I); - Assert1(SrcTy->isIntOrIntVector(), + Assert1(SrcTy->isIntOrIntVectorTy(), "UIToFP source must be integer or integer vector", &I); - Assert1(DestTy->isFPOrFPVector(), + Assert1(DestTy->isFPOrFPVectorTy(), "UIToFP result must be FP or FP vector", &I); if (SrcVec && DstVec) @@ -933,9 +949,9 @@ void Verifier::visitSIToFPInst(SIToFPInst &I) { Assert1(SrcVec == DstVec, "SIToFP source and dest must both be vector or scalar", &I); - Assert1(SrcTy->isIntOrIntVector(), + Assert1(SrcTy->isIntOrIntVectorTy(), "SIToFP source must be integer or integer vector", &I); - Assert1(DestTy->isFPOrFPVector(), + Assert1(DestTy->isFPOrFPVectorTy(), "SIToFP result must be FP or FP vector", &I); if (SrcVec && DstVec) @@ -956,8 +972,9 @@ void Verifier::visitFPToUIInst(FPToUIInst &I) { Assert1(SrcVec == DstVec, "FPToUI source and dest must both be vector or scalar", &I); - Assert1(SrcTy->isFPOrFPVector(), "FPToUI source must be FP or FP vector", &I); - Assert1(DestTy->isIntOrIntVector(), + Assert1(SrcTy->isFPOrFPVectorTy(), "FPToUI source must be FP or FP vector", + &I); + Assert1(DestTy->isIntOrIntVectorTy(), "FPToUI result must be integer or integer vector", &I); if (SrcVec && DstVec) @@ -978,9 +995,9 @@ void Verifier::visitFPToSIInst(FPToSIInst &I) { Assert1(SrcVec == DstVec, "FPToSI source and dest must both be vector or scalar", &I); - Assert1(SrcTy->isFPOrFPVector(), + Assert1(SrcTy->isFPOrFPVectorTy(), "FPToSI source must be FP or FP vector", &I); - Assert1(DestTy->isIntOrIntVector(), + Assert1(DestTy->isIntOrIntVectorTy(), "FPToSI result must be integer or integer vector", &I); if (SrcVec && DstVec) @@ -997,7 +1014,7 @@ void Verifier::visitPtrToIntInst(PtrToIntInst &I) { const Type *DestTy = I.getType(); Assert1(isa<PointerType>(SrcTy), "PtrToInt source must be pointer", &I); - Assert1(DestTy->isInteger(), "PtrToInt result must be integral", &I); + Assert1(DestTy->isIntegerTy(), "PtrToInt result must be integral", &I); visitInstruction(I); } @@ -1007,7 +1024,7 @@ void Verifier::visitIntToPtrInst(IntToPtrInst &I) { const Type *SrcTy = I.getOperand(0)->getType(); const Type *DestTy = I.getType(); - Assert1(SrcTy->isInteger(), "IntToPtr source must be an integral", &I); + Assert1(SrcTy->isIntegerTy(), "IntToPtr source must be an integral", &I); Assert1(isa<PointerType>(DestTy), "IntToPtr result must be a pointer",&I); visitInstruction(I); @@ -1150,7 +1167,7 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) { case Instruction::UDiv: case Instruction::SRem: case Instruction::URem: - Assert1(B.getType()->isIntOrIntVector(), + Assert1(B.getType()->isIntOrIntVectorTy(), "Integer arithmetic operators only work with integral types!", &B); Assert1(B.getType() == B.getOperand(0)->getType(), "Integer arithmetic operators must have same type " @@ -1163,7 +1180,7 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) { case Instruction::FMul: case Instruction::FDiv: case Instruction::FRem: - Assert1(B.getType()->isFPOrFPVector(), + Assert1(B.getType()->isFPOrFPVectorTy(), "Floating-point arithmetic operators only work with " "floating-point types!", &B); Assert1(B.getType() == B.getOperand(0)->getType(), @@ -1174,7 +1191,7 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) { case Instruction::And: case Instruction::Or: case Instruction::Xor: - Assert1(B.getType()->isIntOrIntVector(), + Assert1(B.getType()->isIntOrIntVectorTy(), "Logical operators only work with integral types!", &B); Assert1(B.getType() == B.getOperand(0)->getType(), "Logical operators must have same type for operands and result!", @@ -1183,7 +1200,7 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) { case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: - Assert1(B.getType()->isIntOrIntVector(), + Assert1(B.getType()->isIntOrIntVectorTy(), "Shifts only work with integral types!", &B); Assert1(B.getType() == B.getOperand(0)->getType(), "Shift return type must be same as operands!", &B); @@ -1202,7 +1219,7 @@ void Verifier::visitICmpInst(ICmpInst& IC) { Assert1(Op0Ty == Op1Ty, "Both operands to ICmp instruction are not of the same type!", &IC); // Check that the operands are the right type - Assert1(Op0Ty->isIntOrIntVector() || isa<PointerType>(Op0Ty), + Assert1(Op0Ty->isIntOrIntVectorTy() || isa<PointerType>(Op0Ty), "Invalid operand types for ICmp instruction", &IC); visitInstruction(IC); @@ -1215,7 +1232,7 @@ void Verifier::visitFCmpInst(FCmpInst& FC) { Assert1(Op0Ty == Op1Ty, "Both operands to FCmp instruction are not of the same type!", &FC); // Check that the operands are the right type - Assert1(Op0Ty->isFPOrFPVector(), + Assert1(Op0Ty->isFPOrFPVectorTy(), "Invalid operand types for FCmp instruction", &FC); visitInstruction(FC); } @@ -1301,7 +1318,7 @@ void Verifier::visitAllocaInst(AllocaInst &AI) { &AI); Assert1(PTy->getElementType()->isSized(), "Cannot allocate unsized type", &AI); - Assert1(AI.getArraySize()->getType()->isInteger(32), + Assert1(AI.getArraySize()->getType()->isIntegerTy(32), "Alloca array size must be i32", &AI); visitInstruction(AI); } @@ -1480,7 +1497,7 @@ void Verifier::visitInstruction(Instruction &I) { void Verifier::VerifyType(const Type *Ty) { if (!Types.insert(Ty)) return; - Assert1(&Mod->getContext() == &Ty->getContext(), + Assert1(Context == &Ty->getContext(), "Type context does not match Module context!", Ty); switch (Ty->getTypeID()) { @@ -1733,7 +1750,7 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty, } } } else if (VT == MVT::iAny) { - if (!EltTy->isInteger()) { + if (!EltTy->isIntegerTy()) { CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is not " "an integer type.", F); return false; @@ -1758,7 +1775,7 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty, break; } } else if (VT == MVT::fAny) { - if (!EltTy->isFloatingPoint()) { + if (!EltTy->isFloatingPointTy()) { CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is not " "a floating-point type.", F); return false; @@ -1913,12 +1930,10 @@ bool llvm::verifyFunction(const Function &f, VerifierFailureAction action) { Function &F = const_cast<Function&>(f); assert(!F.isDeclaration() && "Cannot verify external functions"); - ExistingModuleProvider MP(F.getParent()); - FunctionPassManager FPM(&MP); + FunctionPassManager FPM(F.getParent()); Verifier *V = new Verifier(action); FPM.add(V); FPM.run(F); - MP.releaseModule(); return V->Broken; } |