diff options
Diffstat (limited to 'lib')
297 files changed, 9904 insertions, 6184 deletions
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index c81190b..b8d69f4 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -23,7 +23,6 @@ #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" #include "llvm/Operator.h" #include "llvm/Pass.h" #include "llvm/Target/TargetData.h" @@ -99,7 +98,7 @@ static bool isNonEscapingLocalObject(const Value *V) { /// isObjectSmallerThan - Return true if we can prove that the object specified /// by V is smaller than Size. static bool isObjectSmallerThan(const Value *V, unsigned Size, - LLVMContext &Context, const TargetData &TD) { + const TargetData &TD) { const Type *AccessTy; if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) { AccessTy = GV->getType()->getElementType(); @@ -109,7 +108,7 @@ static bool isObjectSmallerThan(const Value *V, unsigned Size, else return false; } else if (const CallInst* CI = extractMallocCall(V)) { - if (!isArrayMalloc(V, Context, &TD)) + if (!isArrayMalloc(V, &TD)) // The size is the argument to the malloc call. if (const ConstantInt* C = dyn_cast<ConstantInt>(CI->getOperand(1))) return (C->getZExtValue() < Size); @@ -647,11 +646,25 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size, const Value *O1 = V1->getUnderlyingObject(); const Value *O2 = V2->getUnderlyingObject(); + // Null values in the default address space don't point to any object, so they + // don't alias any other pointer. + if (const ConstantPointerNull *CPN = dyn_cast<ConstantPointerNull>(O1)) + if (CPN->getType()->getAddressSpace() == 0) + return NoAlias; + if (const ConstantPointerNull *CPN = dyn_cast<ConstantPointerNull>(O2)) + if (CPN->getType()->getAddressSpace() == 0) + return NoAlias; + if (O1 != O2) { // If V1/V2 point to two different objects we know that we have no alias. if (isIdentifiedObject(O1) && isIdentifiedObject(O2)) return NoAlias; - + + // Constant pointers can't alias with non-const isIdentifiedObject objects. + if ((isa<Constant>(O1) && isIdentifiedObject(O2) && !isa<Constant>(O2)) || + (isa<Constant>(O2) && isIdentifiedObject(O1) && !isa<Constant>(O1))) + return NoAlias; + // Arguments can't alias with local allocations or noalias calls. if ((isa<Argument>(O1) && (isa<AllocaInst>(O2) || isNoAliasCall(O2))) || (isa<Argument>(O2) && (isa<AllocaInst>(O1) || isNoAliasCall(O1)))) @@ -665,10 +678,9 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size, // If the size of one access is larger than the entire object on the other // side, then we know such behavior is undefined and can assume no alias. - LLVMContext &Context = V1->getContext(); if (TD) - if ((V1Size != ~0U && isObjectSmallerThan(O2, V1Size, Context, *TD)) || - (V2Size != ~0U && isObjectSmallerThan(O1, V2Size, Context, *TD))) + if ((V1Size != ~0U && isObjectSmallerThan(O2, V1Size, *TD)) || + (V2Size != ~0U && isObjectSmallerThan(O1, V2Size, *TD))) return NoAlias; // If one pointer is the result of a call/invoke and the other is a @@ -707,16 +719,16 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size, // This function is used to determine if the indices of two GEP instructions are // equal. V1 and V2 are the indices. -static bool IndexOperandsEqual(Value *V1, Value *V2, LLVMContext &Context) { +static bool IndexOperandsEqual(Value *V1, Value *V2) { if (V1->getType() == V2->getType()) return V1 == V2; if (Constant *C1 = dyn_cast<Constant>(V1)) if (Constant *C2 = dyn_cast<Constant>(V2)) { // Sign extend the constants to long types, if necessary - if (C1->getType() != Type::getInt64Ty(Context)) - C1 = ConstantExpr::getSExt(C1, Type::getInt64Ty(Context)); - if (C2->getType() != Type::getInt64Ty(Context)) - C2 = ConstantExpr::getSExt(C2, Type::getInt64Ty(Context)); + if (C1->getType() != Type::getInt64Ty(C1->getContext())) + C1 = ConstantExpr::getSExt(C1, Type::getInt64Ty(C1->getContext())); + if (C2->getType() != Type::getInt64Ty(C1->getContext())) + C2 = ConstantExpr::getSExt(C2, Type::getInt64Ty(C1->getContext())); return C1 == C2; } return false; @@ -737,8 +749,6 @@ BasicAliasAnalysis::CheckGEPInstructions( const PointerType *GEPPointerTy = cast<PointerType>(BasePtr1Ty); - LLVMContext &Context = GEPPointerTy->getContext(); - // Find the (possibly empty) initial sequence of equal values... which are not // necessarily constants. unsigned NumGEP1Operands = NumGEP1Ops, NumGEP2Operands = NumGEP2Ops; @@ -746,8 +756,7 @@ BasicAliasAnalysis::CheckGEPInstructions( unsigned MaxOperands = std::max(NumGEP1Operands, NumGEP2Operands); unsigned UnequalOper = 0; while (UnequalOper != MinOperands && - IndexOperandsEqual(GEP1Ops[UnequalOper], GEP2Ops[UnequalOper], - Context)) { + IndexOperandsEqual(GEP1Ops[UnequalOper], GEP2Ops[UnequalOper])) { // Advance through the type as we go... ++UnequalOper; if (const CompositeType *CT = dyn_cast<CompositeType>(BasePtr1Ty)) @@ -811,10 +820,11 @@ BasicAliasAnalysis::CheckGEPInstructions( if (Constant *G2OC = dyn_cast<ConstantInt>(const_cast<Value*>(G2Oper))){ if (G1OC->getType() != G2OC->getType()) { // Sign extend both operands to long. - if (G1OC->getType() != Type::getInt64Ty(Context)) - G1OC = ConstantExpr::getSExt(G1OC, Type::getInt64Ty(Context)); - if (G2OC->getType() != Type::getInt64Ty(Context)) - G2OC = ConstantExpr::getSExt(G2OC, Type::getInt64Ty(Context)); + const Type *Int64Ty = Type::getInt64Ty(G1OC->getContext()); + if (G1OC->getType() != Int64Ty) + G1OC = ConstantExpr::getSExt(G1OC, Int64Ty); + if (G2OC->getType() != Int64Ty) + G2OC = ConstantExpr::getSExt(G2OC, Int64Ty); GEP1Ops[FirstConstantOper] = G1OC; GEP2Ops[FirstConstantOper] = G2OC; } @@ -950,7 +960,7 @@ BasicAliasAnalysis::CheckGEPInstructions( for (unsigned i = 0; i != FirstConstantOper; ++i) { if (!isa<StructType>(ZeroIdxTy)) GEP1Ops[i] = GEP2Ops[i] = - Constant::getNullValue(Type::getInt32Ty(Context)); + Constant::getNullValue(Type::getInt32Ty(ZeroIdxTy->getContext())); if (const CompositeType *CT = dyn_cast<CompositeType>(ZeroIdxTy)) ZeroIdxTy = CT->getTypeAtIndex(GEP1Ops[i]); @@ -992,11 +1002,11 @@ BasicAliasAnalysis::CheckGEPInstructions( // if (const ArrayType *AT = dyn_cast<ArrayType>(BasePtr1Ty)) GEP1Ops[i] = - ConstantInt::get(Type::getInt64Ty(Context), + ConstantInt::get(Type::getInt64Ty(AT->getContext()), AT->getNumElements()-1); else if (const VectorType *VT = dyn_cast<VectorType>(BasePtr1Ty)) GEP1Ops[i] = - ConstantInt::get(Type::getInt64Ty(Context), + ConstantInt::get(Type::getInt64Ty(VT->getContext()), VT->getNumElements()-1); } } diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index f21fd54..0a83c3d 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -15,8 +15,10 @@ add_llvm_library(LLVMAnalysis IVUsers.cpp InlineCost.cpp InstCount.cpp + InstructionSimplify.cpp Interval.cpp IntervalPartition.cpp + LazyValueInfo.cpp LibCallAliasAnalysis.cpp LibCallSemantics.cpp LiveValues.cpp diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 33a5792..1cdadbf 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -23,7 +23,6 @@ #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" -#include "llvm/LLVMContext.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" #include "llvm/ADT/SmallVector.h" @@ -493,8 +492,7 @@ static Constant *ConstantFoldLoadInst(const LoadInst *LI, const TargetData *TD){ /// these together. If target data info is available, it is provided as TD, /// otherwise TD is null. static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, - Constant *Op1, const TargetData *TD, - LLVMContext &Context){ + Constant *Op1, const TargetData *TD){ // SROA // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl. @@ -521,15 +519,15 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, /// SymbolicallyEvaluateGEP - If we can symbolically evaluate the specified GEP /// constant expression, do so. -static Constant *SymbolicallyEvaluateGEP(Constant* const* Ops, unsigned NumOps, +static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps, const Type *ResultTy, - LLVMContext &Context, const TargetData *TD) { Constant *Ptr = Ops[0]; if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized()) return 0; - unsigned BitWidth = TD->getTypeSizeInBits(TD->getIntPtrType(Context)); + unsigned BitWidth = + TD->getTypeSizeInBits(TD->getIntPtrType(Ptr->getContext())); APInt BasePtr(BitWidth, 0); bool BaseIsInt = true; if (!Ptr->isNullValue()) { @@ -558,7 +556,7 @@ static Constant *SymbolicallyEvaluateGEP(Constant* const* Ops, unsigned NumOps, // If the base value for this address is a literal integer value, fold the // getelementptr to the resulting integer value casted to the pointer type. if (BaseIsInt) { - Constant *C = ConstantInt::get(Context, Offset+BasePtr); + Constant *C = ConstantInt::get(Ptr->getContext(), Offset+BasePtr); return ConstantExpr::getIntToPtr(C, ResultTy); } @@ -579,7 +577,8 @@ static Constant *SymbolicallyEvaluateGEP(Constant* const* Ops, unsigned NumOps, return 0; APInt NewIdx = Offset.udiv(ElemSize); Offset -= NewIdx * ElemSize; - NewIdxs.push_back(ConstantInt::get(TD->getIntPtrType(Context), NewIdx)); + NewIdxs.push_back(ConstantInt::get(TD->getIntPtrType(Ty->getContext()), + NewIdx)); Ty = ATy->getElementType(); } else if (const StructType *STy = dyn_cast<StructType>(Ty)) { // Determine which field of the struct the offset points into. The @@ -587,7 +586,8 @@ static Constant *SymbolicallyEvaluateGEP(Constant* const* Ops, unsigned NumOps, // know the offset is within the struct at this point. const StructLayout &SL = *TD->getStructLayout(STy); unsigned ElIdx = SL.getElementContainingOffset(Offset.getZExtValue()); - NewIdxs.push_back(ConstantInt::get(Type::getInt32Ty(Context), ElIdx)); + NewIdxs.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()), + ElIdx)); Offset -= APInt(BitWidth, SL.getElementOffset(ElIdx)); Ty = STy->getTypeAtIndex(ElIdx); } else { @@ -628,8 +628,7 @@ static Constant *SymbolicallyEvaluateGEP(Constant* const* Ops, unsigned NumOps, /// is returned. Note that this function can only fail when attempting to fold /// instructions like loads and stores, which have no constant expression form. /// -Constant *llvm::ConstantFoldInstruction(Instruction *I, LLVMContext &Context, - const TargetData *TD) { +Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) { if (PHINode *PN = dyn_cast<PHINode>(I)) { if (PN->getNumIncomingValues() == 0) return UndefValue::get(PN->getType()); @@ -656,33 +655,30 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, LLVMContext &Context, return 0; // All operands not constant! if (const CmpInst *CI = dyn_cast<CmpInst>(I)) - return ConstantFoldCompareInstOperands(CI->getPredicate(), - Ops.data(), Ops.size(), - Context, TD); + return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1], + TD); if (const LoadInst *LI = dyn_cast<LoadInst>(I)) return ConstantFoldLoadInst(LI, TD); return ConstantFoldInstOperands(I->getOpcode(), I->getType(), - Ops.data(), Ops.size(), Context, TD); + Ops.data(), Ops.size(), TD); } /// ConstantFoldConstantExpression - Attempt to fold the constant expression /// using the specified TargetData. If successful, the constant result is /// result is returned, if not, null is returned. Constant *llvm::ConstantFoldConstantExpression(ConstantExpr *CE, - LLVMContext &Context, const TargetData *TD) { SmallVector<Constant*, 8> Ops; for (User::op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i) Ops.push_back(cast<Constant>(*i)); if (CE->isCompare()) - return ConstantFoldCompareInstOperands(CE->getPredicate(), - Ops.data(), Ops.size(), - Context, TD); + return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1], + TD); return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), - Ops.data(), Ops.size(), Context, TD); + Ops.data(), Ops.size(), TD); } /// ConstantFoldInstOperands - Attempt to constant fold an instruction with the @@ -693,13 +689,11 @@ Constant *llvm::ConstantFoldConstantExpression(ConstantExpr *CE, /// Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, Constant* const* Ops, unsigned NumOps, - LLVMContext &Context, const TargetData *TD) { // Handle easy binops first. if (Instruction::isBinaryOp(Opcode)) { if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1])) - if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], TD, - Context)) + if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], TD)) return C; return ConstantExpr::get(Opcode, Ops[0], Ops[1]); @@ -724,7 +718,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, unsigned InWidth = Input->getType()->getScalarSizeInBits(); if (TD->getPointerSizeInBits() < InWidth) { Constant *Mask = - ConstantInt::get(Context, APInt::getLowBitsSet(InWidth, + ConstantInt::get(CE->getContext(), APInt::getLowBitsSet(InWidth, TD->getPointerSizeInBits())); Input = ConstantExpr::getAnd(Input, Mask); } @@ -766,7 +760,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, AT->getNumElements()))) { Constant *Index[] = { Constant::getNullValue(CE->getType()), - ConstantInt::get(Context, ElemIdx) + ConstantInt::get(ElTy->getContext(), ElemIdx) }; return ConstantExpr::getGetElementPtr(GV, &Index[0], 2); @@ -800,7 +794,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, case Instruction::ShuffleVector: return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]); case Instruction::GetElementPtr: - if (Constant *C = SymbolicallyEvaluateGEP(Ops, NumOps, DestTy, Context, TD)) + if (Constant *C = SymbolicallyEvaluateGEP(Ops, NumOps, DestTy, TD)) return C; return ConstantExpr::getGetElementPtr(Ops[0], Ops+1, NumOps-1); @@ -812,9 +806,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, /// returns a constant expression of the specified operands. /// Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, - Constant*const * Ops, - unsigned NumOps, - LLVMContext &Context, + Constant *Ops0, Constant *Ops1, const TargetData *TD) { // fold: icmp (inttoptr x), null -> icmp x, 0 // fold: icmp (ptrtoint x), 0 -> icmp x, null @@ -823,17 +815,16 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, // // ConstantExpr::getCompare cannot do this, because it doesn't have TD // around to know if bit truncation is happening. - if (ConstantExpr *CE0 = dyn_cast<ConstantExpr>(Ops[0])) { - if (TD && Ops[1]->isNullValue()) { - const Type *IntPtrTy = TD->getIntPtrType(Context); + if (ConstantExpr *CE0 = dyn_cast<ConstantExpr>(Ops0)) { + if (TD && Ops1->isNullValue()) { + const Type *IntPtrTy = TD->getIntPtrType(CE0->getContext()); if (CE0->getOpcode() == Instruction::IntToPtr) { // Convert the integer value to the right size to ensure we get the // proper extension or truncation. Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0), IntPtrTy, false); - Constant *NewOps[] = { C, Constant::getNullValue(C->getType()) }; - return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, - Context, TD); + Constant *Null = Constant::getNullValue(C->getType()); + return ConstantFoldCompareInstOperands(Predicate, C, Null, TD); } // Only do this transformation if the int is intptrty in size, otherwise @@ -841,16 +832,14 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, if (CE0->getOpcode() == Instruction::PtrToInt && CE0->getType() == IntPtrTy) { Constant *C = CE0->getOperand(0); - Constant *NewOps[] = { C, Constant::getNullValue(C->getType()) }; - // FIXME! - return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, - Context, TD); + Constant *Null = Constant::getNullValue(C->getType()); + return ConstantFoldCompareInstOperands(Predicate, C, Null, TD); } } - if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(Ops[1])) { + if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(Ops1)) { if (TD && CE0->getOpcode() == CE1->getOpcode()) { - const Type *IntPtrTy = TD->getIntPtrType(Context); + const Type *IntPtrTy = TD->getIntPtrType(CE0->getContext()); if (CE0->getOpcode() == Instruction::IntToPtr) { // Convert the integer value to the right size to ensure we get the @@ -859,26 +848,21 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, IntPtrTy, false); Constant *C1 = ConstantExpr::getIntegerCast(CE1->getOperand(0), IntPtrTy, false); - Constant *NewOps[] = { C0, C1 }; - return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, - Context, TD); + return ConstantFoldCompareInstOperands(Predicate, C0, C1, TD); } // Only do this transformation if the int is intptrty in size, otherwise // there is a truncation or extension that we aren't modeling. if ((CE0->getOpcode() == Instruction::PtrToInt && CE0->getType() == IntPtrTy && - CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType())) { - Constant *NewOps[] = { - CE0->getOperand(0), CE1->getOperand(0) - }; - return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, - Context, TD); - } + CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType())) + return ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), + CE1->getOperand(0), TD); } } } - return ConstantExpr::getCompare(Predicate, Ops[0], Ops[1]); + + return ConstantExpr::getCompare(Predicate, Ops0, Ops1); } @@ -996,7 +980,7 @@ llvm::canConstantFoldCallTo(const Function *F) { } static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, - const Type *Ty, LLVMContext &Context) { + const Type *Ty) { errno = 0; V = NativeFP(V); if (errno != 0) { @@ -1005,17 +989,15 @@ static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, } if (Ty->isFloatTy()) - return ConstantFP::get(Context, APFloat((float)V)); + return ConstantFP::get(Ty->getContext(), APFloat((float)V)); if (Ty->isDoubleTy()) - return ConstantFP::get(Context, APFloat(V)); + return ConstantFP::get(Ty->getContext(), APFloat(V)); llvm_unreachable("Can only constant fold float/double"); return 0; // dummy return to suppress warning } static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), - double V, double W, - const Type *Ty, - LLVMContext &Context) { + double V, double W, const Type *Ty) { errno = 0; V = NativeFP(V, W); if (errno != 0) { @@ -1024,9 +1006,9 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), } if (Ty->isFloatTy()) - return ConstantFP::get(Context, APFloat((float)V)); + return ConstantFP::get(Ty->getContext(), APFloat((float)V)); if (Ty->isDoubleTy()) - return ConstantFP::get(Context, APFloat(V)); + return ConstantFP::get(Ty->getContext(), APFloat(V)); llvm_unreachable("Can only constant fold float/double"); return 0; // dummy return to suppress warning } @@ -1037,7 +1019,6 @@ Constant * llvm::ConstantFoldCall(Function *F, Constant *const *Operands, unsigned NumOperands) { if (!F->hasName()) return 0; - LLVMContext &Context = F->getContext(); StringRef Name = F->getName(); const Type *Ty = F->getReturnType(); @@ -1054,62 +1035,62 @@ llvm::ConstantFoldCall(Function *F, switch (Name[0]) { case 'a': if (Name == "acos") - return ConstantFoldFP(acos, V, Ty, Context); + return ConstantFoldFP(acos, V, Ty); else if (Name == "asin") - return ConstantFoldFP(asin, V, Ty, Context); + return ConstantFoldFP(asin, V, Ty); else if (Name == "atan") - return ConstantFoldFP(atan, V, Ty, Context); + return ConstantFoldFP(atan, V, Ty); break; case 'c': if (Name == "ceil") - return ConstantFoldFP(ceil, V, Ty, Context); + return ConstantFoldFP(ceil, V, Ty); else if (Name == "cos") - return ConstantFoldFP(cos, V, Ty, Context); + return ConstantFoldFP(cos, V, Ty); else if (Name == "cosh") - return ConstantFoldFP(cosh, V, Ty, Context); + return ConstantFoldFP(cosh, V, Ty); else if (Name == "cosf") - return ConstantFoldFP(cos, V, Ty, Context); + return ConstantFoldFP(cos, V, Ty); break; case 'e': if (Name == "exp") - return ConstantFoldFP(exp, V, Ty, Context); + return ConstantFoldFP(exp, V, Ty); break; case 'f': if (Name == "fabs") - return ConstantFoldFP(fabs, V, Ty, Context); + return ConstantFoldFP(fabs, V, Ty); else if (Name == "floor") - return ConstantFoldFP(floor, V, Ty, Context); + return ConstantFoldFP(floor, V, Ty); break; case 'l': if (Name == "log" && V > 0) - return ConstantFoldFP(log, V, Ty, Context); + return ConstantFoldFP(log, V, Ty); else if (Name == "log10" && V > 0) - return ConstantFoldFP(log10, V, Ty, Context); + return ConstantFoldFP(log10, V, Ty); else if (Name == "llvm.sqrt.f32" || Name == "llvm.sqrt.f64") { if (V >= -0.0) - return ConstantFoldFP(sqrt, V, Ty, Context); + return ConstantFoldFP(sqrt, V, Ty); else // Undefined return Constant::getNullValue(Ty); } break; case 's': if (Name == "sin") - return ConstantFoldFP(sin, V, Ty, Context); + return ConstantFoldFP(sin, V, Ty); else if (Name == "sinh") - return ConstantFoldFP(sinh, V, Ty, Context); + return ConstantFoldFP(sinh, V, Ty); else if (Name == "sqrt" && V >= 0) - return ConstantFoldFP(sqrt, V, Ty, Context); + return ConstantFoldFP(sqrt, V, Ty); else if (Name == "sqrtf" && V >= 0) - return ConstantFoldFP(sqrt, V, Ty, Context); + return ConstantFoldFP(sqrt, V, Ty); else if (Name == "sinf") - return ConstantFoldFP(sin, V, Ty, Context); + return ConstantFoldFP(sin, V, Ty); break; case 't': if (Name == "tan") - return ConstantFoldFP(tan, V, Ty, Context); + return ConstantFoldFP(tan, V, Ty); else if (Name == "tanh") - return ConstantFoldFP(tanh, V, Ty, Context); + return ConstantFoldFP(tanh, V, Ty); break; default: break; @@ -1120,7 +1101,7 @@ llvm::ConstantFoldCall(Function *F, if (ConstantInt *Op = dyn_cast<ConstantInt>(Operands[0])) { if (Name.startswith("llvm.bswap")) - return ConstantInt::get(Context, Op->getValue().byteSwap()); + return ConstantInt::get(F->getContext(), Op->getValue().byteSwap()); else if (Name.startswith("llvm.ctpop")) return ConstantInt::get(Ty, Op->getValue().countPopulation()); else if (Name.startswith("llvm.cttz")) @@ -1149,18 +1130,20 @@ llvm::ConstantFoldCall(Function *F, Op2->getValueAPF().convertToDouble(); if (Name == "pow") - return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty, Context); + return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); if (Name == "fmod") - return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty, Context); + return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty); if (Name == "atan2") - return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty, Context); + return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty); } else if (ConstantInt *Op2C = dyn_cast<ConstantInt>(Operands[1])) { if (Name == "llvm.powi.f32") - return ConstantFP::get(Context, APFloat((float)std::pow((float)Op1V, + return ConstantFP::get(F->getContext(), + APFloat((float)std::pow((float)Op1V, (int)Op2C->getZExtValue()))); if (Name == "llvm.powi.f64") - return ConstantFP::get(Context, APFloat((double)std::pow((double)Op1V, - (int)Op2C->getZExtValue()))); + return ConstantFP::get(F->getContext(), + APFloat((double)std::pow((double)Op1V, + (int)Op2C->getZExtValue()))); } return 0; } diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp index b64dbf4..8f62245 100644 --- a/lib/Analysis/DebugInfo.cpp +++ b/lib/Analysis/DebugInfo.cpp @@ -366,6 +366,9 @@ bool DIGlobalVariable::Verify() const { if (isNull()) return false; + if (!getDisplayName()) + return false; + if (getContext().isNull()) return false; @@ -406,6 +409,10 @@ uint64_t DIDerivedType::getOriginalTypeSize() const { Tag == dwarf::DW_TAG_const_type || Tag == dwarf::DW_TAG_volatile_type || Tag == dwarf::DW_TAG_restrict_type) { DIType BaseType = getTypeDerivedFrom(); + // If this type is not derived from any type then take conservative + // approach. + if (BaseType.isNull()) + return getSizeInBits(); if (BaseType.isDerivedType()) return DIDerivedType(BaseType.getNode()).getOriginalTypeSize(); else @@ -599,9 +606,7 @@ void DIVariable::dump() const { //===----------------------------------------------------------------------===// DIFactory::DIFactory(Module &m) - : M(m), VMContext(M.getContext()), StopPointFn(0), FuncStartFn(0), - RegionStartFn(0), RegionEndFn(0), - DeclareFn(0) { + : M(m), VMContext(M.getContext()), DeclareFn(0) { EmptyStructPtr = PointerType::getUnqual(StructType::get(VMContext)); } @@ -646,9 +651,9 @@ DISubrange DIFactory::GetOrCreateSubrange(int64_t Lo, int64_t Hi) { /// CreateCompileUnit - Create a new descriptor for the specified compile /// unit. Note that this does not unique compile units within the module. DICompileUnit DIFactory::CreateCompileUnit(unsigned LangID, - StringRef Filename, - StringRef Directory, - StringRef Producer, + const char * Filename, + const char * Directory, + const char * Producer, bool isMain, bool isOptimized, const char *Flags, @@ -670,7 +675,7 @@ DICompileUnit DIFactory::CreateCompileUnit(unsigned LangID, } /// CreateEnumerator - Create a single enumerator value. -DIEnumerator DIFactory::CreateEnumerator(StringRef Name, uint64_t Val){ +DIEnumerator DIFactory::CreateEnumerator(const char * Name, uint64_t Val){ Value *Elts[] = { GetTagConstant(dwarf::DW_TAG_enumerator), MDString::get(VMContext, Name), @@ -682,7 +687,7 @@ DIEnumerator DIFactory::CreateEnumerator(StringRef Name, uint64_t Val){ /// CreateBasicType - Create a basic type like int, float, etc. DIBasicType DIFactory::CreateBasicType(DIDescriptor Context, - StringRef Name, + const char * Name, DICompileUnit CompileUnit, unsigned LineNumber, uint64_t SizeInBits, @@ -707,7 +712,7 @@ DIBasicType DIFactory::CreateBasicType(DIDescriptor Context, /// CreateBasicType - Create a basic type like int, float, etc. DIBasicType DIFactory::CreateBasicTypeEx(DIDescriptor Context, - StringRef Name, + const char * Name, DICompileUnit CompileUnit, unsigned LineNumber, Constant *SizeInBits, @@ -734,7 +739,7 @@ DIBasicType DIFactory::CreateBasicTypeEx(DIDescriptor Context, /// pointer, typedef, etc. DIDerivedType DIFactory::CreateDerivedType(unsigned Tag, DIDescriptor Context, - StringRef Name, + const char * Name, DICompileUnit CompileUnit, unsigned LineNumber, uint64_t SizeInBits, @@ -762,7 +767,7 @@ DIDerivedType DIFactory::CreateDerivedType(unsigned Tag, /// pointer, typedef, etc. DIDerivedType DIFactory::CreateDerivedTypeEx(unsigned Tag, DIDescriptor Context, - StringRef Name, + const char * Name, DICompileUnit CompileUnit, unsigned LineNumber, Constant *SizeInBits, @@ -789,7 +794,7 @@ DIDerivedType DIFactory::CreateDerivedTypeEx(unsigned Tag, /// CreateCompositeType - Create a composite type like array, struct, etc. DICompositeType DIFactory::CreateCompositeType(unsigned Tag, DIDescriptor Context, - StringRef Name, + const char * Name, DICompileUnit CompileUnit, unsigned LineNumber, uint64_t SizeInBits, @@ -821,7 +826,7 @@ DICompositeType DIFactory::CreateCompositeType(unsigned Tag, /// CreateCompositeType - Create a composite type like array, struct, etc. DICompositeType DIFactory::CreateCompositeTypeEx(unsigned Tag, DIDescriptor Context, - StringRef Name, + const char * Name, DICompileUnit CompileUnit, unsigned LineNumber, Constant *SizeInBits, @@ -854,9 +859,9 @@ DICompositeType DIFactory::CreateCompositeTypeEx(unsigned Tag, /// See comments in DISubprogram for descriptions of these fields. This /// method does not unique the generated descriptors. DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context, - StringRef Name, - StringRef DisplayName, - StringRef LinkageName, + const char * Name, + const char * DisplayName, + const char * LinkageName, DICompileUnit CompileUnit, unsigned LineNo, DIType Type, bool isLocalToUnit, @@ -880,9 +885,9 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context, /// CreateGlobalVariable - Create a new descriptor for the specified global. DIGlobalVariable -DIFactory::CreateGlobalVariable(DIDescriptor Context, StringRef Name, - StringRef DisplayName, - StringRef LinkageName, +DIFactory::CreateGlobalVariable(DIDescriptor Context, const char * Name, + const char * DisplayName, + const char * LinkageName, DICompileUnit CompileUnit, unsigned LineNo, DIType Type,bool isLocalToUnit, bool isDefinition, llvm::GlobalVariable *Val) { @@ -914,7 +919,7 @@ DIFactory::CreateGlobalVariable(DIDescriptor Context, StringRef Name, /// CreateVariable - Create a new descriptor for the specified variable. DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context, - StringRef Name, + const char * Name, DICompileUnit CompileUnit, unsigned LineNo, DIType Type) { Value *Elts[] = { @@ -976,60 +981,8 @@ DILocation DIFactory::CreateLocation(unsigned LineNo, unsigned ColumnNo, // DIFactory: Routines for inserting code into a function //===----------------------------------------------------------------------===// -/// InsertStopPoint - Create a new llvm.dbg.stoppoint intrinsic invocation, -/// inserting it at the end of the specified basic block. -void DIFactory::InsertStopPoint(DICompileUnit CU, unsigned LineNo, - unsigned ColNo, BasicBlock *BB) { - - // Lazily construct llvm.dbg.stoppoint function. - if (!StopPointFn) - StopPointFn = llvm::Intrinsic::getDeclaration(&M, - llvm::Intrinsic::dbg_stoppoint); - - // Invoke llvm.dbg.stoppoint - Value *Args[] = { - ConstantInt::get(llvm::Type::getInt32Ty(VMContext), LineNo), - ConstantInt::get(llvm::Type::getInt32Ty(VMContext), ColNo), - CU.getNode() - }; - CallInst::Create(StopPointFn, Args, Args+3, "", BB); -} - -/// InsertSubprogramStart - Create a new llvm.dbg.func.start intrinsic to -/// mark the start of the specified subprogram. -void DIFactory::InsertSubprogramStart(DISubprogram SP, BasicBlock *BB) { - // Lazily construct llvm.dbg.func.start. - if (!FuncStartFn) - FuncStartFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_func_start); - - // Call llvm.dbg.func.start which also implicitly sets a stoppoint. - CallInst::Create(FuncStartFn, SP.getNode(), "", BB); -} - -/// InsertRegionStart - Insert a new llvm.dbg.region.start intrinsic call to -/// mark the start of a region for the specified scoping descriptor. -void DIFactory::InsertRegionStart(DIDescriptor D, BasicBlock *BB) { - // Lazily construct llvm.dbg.region.start function. - if (!RegionStartFn) - RegionStartFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_region_start); - - // Call llvm.dbg.func.start. - CallInst::Create(RegionStartFn, D.getNode(), "", BB); -} - -/// InsertRegionEnd - Insert a new llvm.dbg.region.end intrinsic call to -/// mark the end of a region for the specified scoping descriptor. -void DIFactory::InsertRegionEnd(DIDescriptor D, BasicBlock *BB) { - // Lazily construct llvm.dbg.region.end function. - if (!RegionEndFn) - RegionEndFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_region_end); - - // Call llvm.dbg.region.end. - CallInst::Create(RegionEndFn, D.getNode(), "", BB); -} - /// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call. -void DIFactory::InsertDeclare(Value *Storage, DIVariable D, +Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D, Instruction *InsertBefore) { // Cast the storage to a {}* for the call to llvm.dbg.declare. Storage = new BitCastInst(Storage, EmptyStructPtr, "", InsertBefore); @@ -1038,11 +991,11 @@ void DIFactory::InsertDeclare(Value *Storage, DIVariable D, DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); Value *Args[] = { Storage, D.getNode() }; - CallInst::Create(DeclareFn, Args, Args+2, "", InsertBefore); + return CallInst::Create(DeclareFn, Args, Args+2, "", InsertBefore); } /// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call. -void DIFactory::InsertDeclare(Value *Storage, DIVariable D, +Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D, BasicBlock *InsertAtEnd) { // Cast the storage to a {}* for the call to llvm.dbg.declare. Storage = new BitCastInst(Storage, EmptyStructPtr, "", InsertAtEnd); @@ -1051,7 +1004,7 @@ void DIFactory::InsertDeclare(Value *Storage, DIVariable D, DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); Value *Args[] = { Storage, D.getNode() }; - CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd); + return CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd); } @@ -1062,38 +1015,18 @@ void DIFactory::InsertDeclare(Value *Storage, DIVariable D, /// processModule - Process entire module and collect debug info. void DebugInfoFinder::processModule(Module &M) { -#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN MetadataContext &TheMetadata = M.getContext().getMetadata(); unsigned MDDbgKind = TheMetadata.getMDKind("dbg"); -#endif + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) for (Function::iterator FI = (*I).begin(), FE = (*I).end(); FI != FE; ++FI) for (BasicBlock::iterator BI = (*FI).begin(), BE = (*FI).end(); BI != BE; ++BI) { - if (DbgStopPointInst *SPI = dyn_cast<DbgStopPointInst>(BI)) - processStopPoint(SPI); - else if (DbgFuncStartInst *FSI = dyn_cast<DbgFuncStartInst>(BI)) - processFuncStart(FSI); - else if (DbgRegionStartInst *DRS = dyn_cast<DbgRegionStartInst>(BI)) - processRegionStart(DRS); - else if (DbgRegionEndInst *DRE = dyn_cast<DbgRegionEndInst>(BI)) - processRegionEnd(DRE); - else if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) + if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) processDeclare(DDI); -#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN - else if (MDDbgKind) { - if (MDNode *L = TheMetadata.getMD(MDDbgKind, BI)) { - DILocation Loc(L); - DIScope S(Loc.getScope().getNode()); - if (S.isCompileUnit()) - addCompileUnit(DICompileUnit(S.getNode())); - else if (S.isSubprogram()) - processSubprogram(DISubprogram(S.getNode())); - else if (S.isLexicalBlock()) - processLexicalBlock(DILexicalBlock(S.getNode())); - } - } -#endif + else if (MDDbgKind) + if (MDNode *L = TheMetadata.getMD(MDDbgKind, BI)) + processLocation(DILocation(L)); } NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv"); @@ -1109,6 +1042,20 @@ void DebugInfoFinder::processModule(Module &M) { } } +/// processLocation - Process DILocation. +void DebugInfoFinder::processLocation(DILocation Loc) { + if (Loc.isNull()) return; + DIScope S(Loc.getScope().getNode()); + if (S.isNull()) return; + if (S.isCompileUnit()) + addCompileUnit(DICompileUnit(S.getNode())); + else if (S.isSubprogram()) + processSubprogram(DISubprogram(S.getNode())); + else if (S.isLexicalBlock()) + processLexicalBlock(DILexicalBlock(S.getNode())); + processLocation(Loc.getOrigLocation()); +} + /// processType - Process DIType. void DebugInfoFinder::processType(DIType DT) { if (!addType(DT)) @@ -1156,30 +1103,6 @@ void DebugInfoFinder::processSubprogram(DISubprogram SP) { processType(SP.getType()); } -/// processStopPoint - Process DbgStopPointInst. -void DebugInfoFinder::processStopPoint(DbgStopPointInst *SPI) { - MDNode *Context = dyn_cast<MDNode>(SPI->getContext()); - addCompileUnit(DICompileUnit(Context)); -} - -/// processFuncStart - Process DbgFuncStartInst. -void DebugInfoFinder::processFuncStart(DbgFuncStartInst *FSI) { - MDNode *SP = dyn_cast<MDNode>(FSI->getSubprogram()); - processSubprogram(DISubprogram(SP)); -} - -/// processRegionStart - Process DbgRegionStart. -void DebugInfoFinder::processRegionStart(DbgRegionStartInst *DRS) { - MDNode *SP = dyn_cast<MDNode>(DRS->getContext()); - processSubprogram(DISubprogram(SP)); -} - -/// processRegionEnd - Process DbgRegionEnd. -void DebugInfoFinder::processRegionEnd(DbgRegionEndInst *DRE) { - MDNode *SP = dyn_cast<MDNode>(DRE->getContext()); - processSubprogram(DISubprogram(SP)); -} - /// processDeclare - Process DbgDeclareInst. void DebugInfoFinder::processDeclare(DbgDeclareInst *DDI) { DIVariable DV(cast<MDNode>(DDI->getVariable())); @@ -1475,22 +1398,4 @@ bool getLocationInfo(const Value *V, std::string &DisplayName, return DebugLoc::get(Id); } - - /// isInlinedFnStart - Return true if FSI is starting an inlined function. - bool isInlinedFnStart(DbgFuncStartInst &FSI, const Function *CurrentFn) { - DISubprogram Subprogram(cast<MDNode>(FSI.getSubprogram())); - if (Subprogram.describes(CurrentFn)) - return false; - - return true; - } - - /// isInlinedFnEnd - Return true if REI is ending an inlined function. - bool isInlinedFnEnd(DbgRegionEndInst &REI, const Function *CurrentFn) { - DISubprogram Subprogram(cast<MDNode>(REI.getContext())); - if (Subprogram.isNull() || Subprogram.describes(CurrentFn)) - return false; - - return true; - } } diff --git a/lib/Analysis/IPA/Andersens.cpp b/lib/Analysis/IPA/Andersens.cpp index 17f304c..40a8cd5 100644 --- a/lib/Analysis/IPA/Andersens.cpp +++ b/lib/Analysis/IPA/Andersens.cpp @@ -518,7 +518,7 @@ namespace { /// getObject - Return the node corresponding to the memory object for the /// specified global or allocation instruction. unsigned getObject(Value *V) const { - DenseMap<Value*, unsigned>::iterator I = ObjectNodes.find(V); + DenseMap<Value*, unsigned>::const_iterator I = ObjectNodes.find(V); assert(I != ObjectNodes.end() && "Value does not have an object in the points-to graph!"); return I->second; @@ -527,7 +527,7 @@ namespace { /// getReturnNode - Return the node representing the return value for the /// specified function. unsigned getReturnNode(Function *F) const { - DenseMap<Function*, unsigned>::iterator I = ReturnNodes.find(F); + DenseMap<Function*, unsigned>::const_iterator I = ReturnNodes.find(F); assert(I != ReturnNodes.end() && "Function does not return a value!"); return I->second; } @@ -535,7 +535,7 @@ namespace { /// getVarargNode - Return the node representing the variable arguments /// formal for the specified function. unsigned getVarargNode(Function *F) const { - DenseMap<Function*, unsigned>::iterator I = VarargNodes.find(F); + DenseMap<Function*, unsigned>::const_iterator I = VarargNodes.find(F); assert(I != VarargNodes.end() && "Function does not take var args!"); return I->second; } diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index 543e017..cf52320 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -151,6 +151,8 @@ static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV, if (L->contains(User->getParent())) return false; BasicBlock *LatchBlock = L->getLoopLatch(); + if (!LatchBlock) + return false; // Ok, the user is outside of the loop. If it is dominated by the latch // block, use the post-inc value. @@ -265,6 +267,18 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { return true; } +void IVUsers::AddUser(const SCEV *Stride, const SCEV *Offset, + Instruction *User, Value *Operand) { + IVUsersOfOneStride *StrideUses = IVUsesByStride[Stride]; + if (!StrideUses) { // First occurrence of this stride? + StrideOrder.push_back(Stride); + StrideUses = new IVUsersOfOneStride(Stride); + IVUses.push_back(StrideUses); + IVUsesByStride[Stride] = StrideUses; + } + IVUsesByStride[Stride]->addUser(Offset, User, Operand); +} + IVUsers::IVUsers() : LoopPass(&ID) { } diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp new file mode 100644 index 0000000..f9953e3 --- /dev/null +++ b/lib/Analysis/InstructionSimplify.cpp @@ -0,0 +1,348 @@ +//===- InstructionSimplify.cpp - Fold instruction operands ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements routines for folding instructions into simpler forms +// that do not require creating new instructions. For example, this does +// constant folding, and can handle identities like (X&0)->0. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Support/ValueHandle.h" +#include "llvm/Instructions.h" +#include "llvm/Support/PatternMatch.h" +using namespace llvm; +using namespace llvm::PatternMatch; + +/// SimplifyAndInst - Given operands for an And, see if we can +/// fold the result. If not, this returns null. +Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, + const TargetData *TD) { + if (Constant *CLHS = dyn_cast<Constant>(Op0)) { + if (Constant *CRHS = dyn_cast<Constant>(Op1)) { + Constant *Ops[] = { CLHS, CRHS }; + return ConstantFoldInstOperands(Instruction::And, CLHS->getType(), + Ops, 2, TD); + } + + // Canonicalize the constant to the RHS. + std::swap(Op0, Op1); + } + + // X & undef -> 0 + if (isa<UndefValue>(Op1)) + return Constant::getNullValue(Op0->getType()); + + // X & X = X + if (Op0 == Op1) + return Op0; + + // X & <0,0> = <0,0> + if (isa<ConstantAggregateZero>(Op1)) + return Op1; + + // X & <-1,-1> = X + if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) + if (CP->isAllOnesValue()) + return Op0; + + if (ConstantInt *Op1CI = dyn_cast<ConstantInt>(Op1)) { + // X & 0 = 0 + if (Op1CI->isZero()) + return Op1CI; + // X & -1 = X + if (Op1CI->isAllOnesValue()) + return Op0; + } + + // A & ~A = ~A & A = 0 + Value *A, *B; + if ((match(Op0, m_Not(m_Value(A))) && A == Op1) || + (match(Op1, m_Not(m_Value(A))) && A == Op0)) + return Constant::getNullValue(Op0->getType()); + + // (A | ?) & A = A + if (match(Op0, m_Or(m_Value(A), m_Value(B))) && + (A == Op1 || B == Op1)) + return Op1; + + // A & (A | ?) = A + if (match(Op1, m_Or(m_Value(A), m_Value(B))) && + (A == Op0 || B == Op0)) + return Op0; + + return 0; +} + +/// SimplifyOrInst - Given operands for an Or, see if we can +/// fold the result. If not, this returns null. +Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, + const TargetData *TD) { + if (Constant *CLHS = dyn_cast<Constant>(Op0)) { + if (Constant *CRHS = dyn_cast<Constant>(Op1)) { + Constant *Ops[] = { CLHS, CRHS }; + return ConstantFoldInstOperands(Instruction::Or, CLHS->getType(), + Ops, 2, TD); + } + + // Canonicalize the constant to the RHS. + std::swap(Op0, Op1); + } + + // X | undef -> -1 + if (isa<UndefValue>(Op1)) + return Constant::getAllOnesValue(Op0->getType()); + + // X | X = X + if (Op0 == Op1) + return Op0; + + // X | <0,0> = X + if (isa<ConstantAggregateZero>(Op1)) + return Op0; + + // X | <-1,-1> = <-1,-1> + if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) + if (CP->isAllOnesValue()) + return Op1; + + if (ConstantInt *Op1CI = dyn_cast<ConstantInt>(Op1)) { + // X | 0 = X + if (Op1CI->isZero()) + return Op0; + // X | -1 = -1 + if (Op1CI->isAllOnesValue()) + return Op1CI; + } + + // A | ~A = ~A | A = -1 + Value *A, *B; + if ((match(Op0, m_Not(m_Value(A))) && A == Op1) || + (match(Op1, m_Not(m_Value(A))) && A == Op0)) + return Constant::getAllOnesValue(Op0->getType()); + + // (A & ?) | A = A + if (match(Op0, m_And(m_Value(A), m_Value(B))) && + (A == Op1 || B == Op1)) + return Op1; + + // A | (A & ?) = A + if (match(Op1, m_And(m_Value(A), m_Value(B))) && + (A == Op0 || B == Op0)) + return Op0; + + return 0; +} + + + + +static const Type *GetCompareTy(Value *Op) { + return CmpInst::makeCmpResultType(Op->getType()); +} + + +/// SimplifyICmpInst - Given operands for an ICmpInst, see if we can +/// fold the result. If not, this returns null. +Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, + const TargetData *TD) { + CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate; + assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!"); + + if (Constant *CLHS = dyn_cast<Constant>(LHS)) { + if (Constant *CRHS = dyn_cast<Constant>(RHS)) + return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD); + + // If we have a constant, make sure it is on the RHS. + std::swap(LHS, RHS); + Pred = CmpInst::getSwappedPredicate(Pred); + } + + // ITy - This is the return type of the compare we're considering. + const Type *ITy = GetCompareTy(LHS); + + // icmp X, X -> true/false + if (LHS == RHS) + return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred)); + + if (isa<UndefValue>(RHS)) // X icmp undef -> undef + return UndefValue::get(ITy); + + // icmp <global/alloca*/null>, <global/alloca*/null> - Global/Stack value + // addresses never equal each other! We already know that Op0 != Op1. + if ((isa<GlobalValue>(LHS) || isa<AllocaInst>(LHS) || + isa<ConstantPointerNull>(LHS)) && + (isa<GlobalValue>(RHS) || isa<AllocaInst>(RHS) || + isa<ConstantPointerNull>(RHS))) + return ConstantInt::get(ITy, CmpInst::isFalseWhenEqual(Pred)); + + // See if we are doing a comparison with a constant. + if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { + // If we have an icmp le or icmp ge instruction, turn it into the + // appropriate icmp lt or icmp gt instruction. This allows us to rely on + // them being folded in the code below. + switch (Pred) { + default: break; + case ICmpInst::ICMP_ULE: + if (CI->isMaxValue(false)) // A <=u MAX -> TRUE + return ConstantInt::getTrue(CI->getContext()); + break; + case ICmpInst::ICMP_SLE: + if (CI->isMaxValue(true)) // A <=s MAX -> TRUE + return ConstantInt::getTrue(CI->getContext()); + break; + case ICmpInst::ICMP_UGE: + if (CI->isMinValue(false)) // A >=u MIN -> TRUE + return ConstantInt::getTrue(CI->getContext()); + break; + case ICmpInst::ICMP_SGE: + if (CI->isMinValue(true)) // A >=s MIN -> TRUE + return ConstantInt::getTrue(CI->getContext()); + break; + } + } + + + return 0; +} + +/// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can +/// fold the result. If not, this returns null. +Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, + const TargetData *TD) { + CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate; + assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!"); + + if (Constant *CLHS = dyn_cast<Constant>(LHS)) { + if (Constant *CRHS = dyn_cast<Constant>(RHS)) + return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD); + + // If we have a constant, make sure it is on the RHS. + std::swap(LHS, RHS); + Pred = CmpInst::getSwappedPredicate(Pred); + } + + // Fold trivial predicates. + if (Pred == FCmpInst::FCMP_FALSE) + return ConstantInt::get(GetCompareTy(LHS), 0); + if (Pred == FCmpInst::FCMP_TRUE) + return ConstantInt::get(GetCompareTy(LHS), 1); + + if (isa<UndefValue>(RHS)) // fcmp pred X, undef -> undef + return UndefValue::get(GetCompareTy(LHS)); + + // fcmp x,x -> true/false. Not all compares are foldable. + if (LHS == RHS) { + if (CmpInst::isTrueWhenEqual(Pred)) + return ConstantInt::get(GetCompareTy(LHS), 1); + if (CmpInst::isFalseWhenEqual(Pred)) + return ConstantInt::get(GetCompareTy(LHS), 0); + } + + // Handle fcmp with constant RHS + if (Constant *RHSC = dyn_cast<Constant>(RHS)) { + // If the constant is a nan, see if we can fold the comparison based on it. + if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) { + if (CFP->getValueAPF().isNaN()) { + if (FCmpInst::isOrdered(Pred)) // True "if ordered and foo" + return ConstantInt::getFalse(CFP->getContext()); + assert(FCmpInst::isUnordered(Pred) && + "Comparison must be either ordered or unordered!"); + // True if unordered. + return ConstantInt::getTrue(CFP->getContext()); + } + } + } + + return 0; +} + +//=== Helper functions for higher up the class hierarchy. + +/// SimplifyBinOp - Given operands for a BinaryOperator, see if we can +/// fold the result. If not, this returns null. +Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, + const TargetData *TD) { + switch (Opcode) { + case Instruction::And: return SimplifyAndInst(LHS, RHS, TD); + case Instruction::Or: return SimplifyOrInst(LHS, RHS, TD); + default: + if (Constant *CLHS = dyn_cast<Constant>(LHS)) + if (Constant *CRHS = dyn_cast<Constant>(RHS)) { + Constant *COps[] = {CLHS, CRHS}; + return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, 2, TD); + } + return 0; + } +} + +/// SimplifyCmpInst - Given operands for a CmpInst, see if we can +/// fold the result. +Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, + const TargetData *TD) { + if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate)) + return SimplifyICmpInst(Predicate, LHS, RHS, TD); + return SimplifyFCmpInst(Predicate, LHS, RHS, TD); +} + + +/// SimplifyInstruction - See if we can compute a simplified version of this +/// instruction. If not, this returns null. +Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD) { + switch (I->getOpcode()) { + default: + return ConstantFoldInstruction(I, TD); + case Instruction::And: + return SimplifyAndInst(I->getOperand(0), I->getOperand(1), TD); + case Instruction::Or: + return SimplifyOrInst(I->getOperand(0), I->getOperand(1), TD); + case Instruction::ICmp: + return SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(), + I->getOperand(0), I->getOperand(1), TD); + case Instruction::FCmp: + return SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(), + I->getOperand(0), I->getOperand(1), TD); + } +} + +/// ReplaceAndSimplifyAllUses - Perform From->replaceAllUsesWith(To) and then +/// delete the From instruction. In addition to a basic RAUW, this does a +/// recursive simplification of the newly formed instructions. This catches +/// things where one simplification exposes other opportunities. This only +/// simplifies and deletes scalar operations, it does not change the CFG. +/// +void llvm::ReplaceAndSimplifyAllUses(Instruction *From, Value *To, + const TargetData *TD) { + assert(From != To && "ReplaceAndSimplifyAllUses(X,X) is not valid!"); + + // FromHandle - This keeps a weakvh on the from value so that we can know if + // it gets deleted out from under us in a recursive simplification. + WeakVH FromHandle(From); + + while (!From->use_empty()) { + // Update the instruction to use the new value. + Use &U = From->use_begin().getUse(); + Instruction *User = cast<Instruction>(U.getUser()); + U = To; + + // See if we can simplify it. + if (Value *V = SimplifyInstruction(User, TD)) { + // Recursively simplify this. + ReplaceAndSimplifyAllUses(User, V, TD); + + // If the recursive simplification ended up revisiting and deleting 'From' + // then we're done. + if (FromHandle == 0) + return; + } + } + From->eraseFromParent(); +} + diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp new file mode 100644 index 0000000..5796c6f --- /dev/null +++ b/lib/Analysis/LazyValueInfo.cpp @@ -0,0 +1,582 @@ +//===- LazyValueInfo.cpp - Value constraint analysis ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface for lazy computation of value constraint +// information. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "lazy-value-info" +#include "llvm/Analysis/LazyValueInfo.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/STLExtras.h" +using namespace llvm; + +char LazyValueInfo::ID = 0; +static RegisterPass<LazyValueInfo> +X("lazy-value-info", "Lazy Value Information Analysis", false, true); + +namespace llvm { + FunctionPass *createLazyValueInfoPass() { return new LazyValueInfo(); } +} + + +//===----------------------------------------------------------------------===// +// LVILatticeVal +//===----------------------------------------------------------------------===// + +/// LVILatticeVal - This is the information tracked by LazyValueInfo for each +/// value. +/// +/// FIXME: This is basically just for bringup, this can be made a lot more rich +/// in the future. +/// +namespace { +class LVILatticeVal { + enum LatticeValueTy { + /// undefined - This LLVM Value has no known value yet. + undefined, + /// constant - This LLVM Value has a specific constant value. + constant, + + /// notconstant - This LLVM value is known to not have the specified value. + notconstant, + + /// overdefined - This instruction is not known to be constant, and we know + /// it has a value. + overdefined + }; + + /// Val: This stores the current lattice value along with the Constant* for + /// the constant if this is a 'constant' or 'notconstant' value. + PointerIntPair<Constant *, 2, LatticeValueTy> Val; + +public: + LVILatticeVal() : Val(0, undefined) {} + + static LVILatticeVal get(Constant *C) { + LVILatticeVal Res; + Res.markConstant(C); + return Res; + } + static LVILatticeVal getNot(Constant *C) { + LVILatticeVal Res; + Res.markNotConstant(C); + return Res; + } + + bool isUndefined() const { return Val.getInt() == undefined; } + bool isConstant() const { return Val.getInt() == constant; } + bool isNotConstant() const { return Val.getInt() == notconstant; } + bool isOverdefined() const { return Val.getInt() == overdefined; } + + Constant *getConstant() const { + assert(isConstant() && "Cannot get the constant of a non-constant!"); + return Val.getPointer(); + } + + Constant *getNotConstant() const { + assert(isNotConstant() && "Cannot get the constant of a non-notconstant!"); + return Val.getPointer(); + } + + /// markOverdefined - Return true if this is a change in status. + bool markOverdefined() { + if (isOverdefined()) + return false; + Val.setInt(overdefined); + return true; + } + + /// markConstant - Return true if this is a change in status. + bool markConstant(Constant *V) { + if (isConstant()) { + assert(getConstant() == V && "Marking constant with different value"); + return false; + } + + assert(isUndefined()); + Val.setInt(constant); + assert(V && "Marking constant with NULL"); + Val.setPointer(V); + return true; + } + + /// markNotConstant - Return true if this is a change in status. + bool markNotConstant(Constant *V) { + if (isNotConstant()) { + assert(getNotConstant() == V && "Marking !constant with different value"); + return false; + } + + if (isConstant()) + assert(getConstant() != V && "Marking not constant with different value"); + else + assert(isUndefined()); + + Val.setInt(notconstant); + assert(V && "Marking constant with NULL"); + Val.setPointer(V); + return true; + } + + /// mergeIn - Merge the specified lattice value into this one, updating this + /// one and returning true if anything changed. + bool mergeIn(const LVILatticeVal &RHS) { + if (RHS.isUndefined() || isOverdefined()) return false; + if (RHS.isOverdefined()) return markOverdefined(); + + if (RHS.isNotConstant()) { + if (isNotConstant()) { + if (getNotConstant() != RHS.getNotConstant() || + isa<ConstantExpr>(getNotConstant()) || + isa<ConstantExpr>(RHS.getNotConstant())) + return markOverdefined(); + return false; + } + if (isConstant()) { + if (getConstant() == RHS.getNotConstant() || + isa<ConstantExpr>(RHS.getNotConstant()) || + isa<ConstantExpr>(getConstant())) + return markOverdefined(); + return markNotConstant(RHS.getNotConstant()); + } + + assert(isUndefined() && "Unexpected lattice"); + return markNotConstant(RHS.getNotConstant()); + } + + // RHS must be a constant, we must be undef, constant, or notconstant. + if (isUndefined()) + return markConstant(RHS.getConstant()); + + if (isConstant()) { + if (getConstant() != RHS.getConstant()) + return markOverdefined(); + return false; + } + + // If we are known "!=4" and RHS is "==5", stay at "!=4". + if (getNotConstant() == RHS.getConstant() || + isa<ConstantExpr>(getNotConstant()) || + isa<ConstantExpr>(RHS.getConstant())) + return markOverdefined(); + return false; + } + +}; + +} // end anonymous namespace. + +namespace llvm { +raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) { + if (Val.isUndefined()) + return OS << "undefined"; + if (Val.isOverdefined()) + return OS << "overdefined"; + + if (Val.isNotConstant()) + return OS << "notconstant<" << *Val.getNotConstant() << '>'; + return OS << "constant<" << *Val.getConstant() << '>'; +} +} + +//===----------------------------------------------------------------------===// +// LazyValueInfoCache Decl +//===----------------------------------------------------------------------===// + +namespace { + /// LazyValueInfoCache - This is the cache kept by LazyValueInfo which + /// maintains information about queries across the clients' queries. + class LazyValueInfoCache { + public: + /// BlockCacheEntryTy - This is a computed lattice value at the end of the + /// specified basic block for a Value* that depends on context. + typedef std::pair<BasicBlock*, LVILatticeVal> BlockCacheEntryTy; + + /// ValueCacheEntryTy - This is all of the cached block information for + /// exactly one Value*. The entries are sorted by the BasicBlock* of the + /// entries, allowing us to do a lookup with a binary search. + typedef std::vector<BlockCacheEntryTy> ValueCacheEntryTy; + + private: + /// ValueCache - This is all of the cached information for all values, + /// mapped from Value* to key information. + DenseMap<Value*, ValueCacheEntryTy> ValueCache; + public: + + /// getValueInBlock - This is the query interface to determine the lattice + /// value for the specified Value* at the end of the specified block. + LVILatticeVal getValueInBlock(Value *V, BasicBlock *BB); + + /// getValueOnEdge - This is the query interface to determine the lattice + /// value for the specified Value* that is true on the specified edge. + LVILatticeVal getValueOnEdge(Value *V, BasicBlock *FromBB,BasicBlock *ToBB); + }; +} // end anonymous namespace + +namespace { + struct BlockCacheEntryComparator { + static int Compare(const void *LHSv, const void *RHSv) { + const LazyValueInfoCache::BlockCacheEntryTy *LHS = + static_cast<const LazyValueInfoCache::BlockCacheEntryTy *>(LHSv); + const LazyValueInfoCache::BlockCacheEntryTy *RHS = + static_cast<const LazyValueInfoCache::BlockCacheEntryTy *>(RHSv); + if (LHS->first < RHS->first) + return -1; + if (LHS->first > RHS->first) + return 1; + return 0; + } + + bool operator()(const LazyValueInfoCache::BlockCacheEntryTy &LHS, + const LazyValueInfoCache::BlockCacheEntryTy &RHS) const { + return LHS.first < RHS.first; + } + }; +} + +//===----------------------------------------------------------------------===// +// LVIQuery Impl +//===----------------------------------------------------------------------===// + +namespace { + /// LVIQuery - This is a transient object that exists while a query is + /// being performed. + /// + /// TODO: Reuse LVIQuery instead of recreating it for every query, this avoids + /// reallocation of the densemap on every query. + class LVIQuery { + typedef LazyValueInfoCache::BlockCacheEntryTy BlockCacheEntryTy; + typedef LazyValueInfoCache::ValueCacheEntryTy ValueCacheEntryTy; + + /// This is the current value being queried for. + Value *Val; + + /// This is all of the cached information about this value. + ValueCacheEntryTy &Cache; + + /// NewBlocks - This is a mapping of the new BasicBlocks which have been + /// added to cache but that are not in sorted order. + DenseMap<BasicBlock*, LVILatticeVal> NewBlockInfo; + public: + + LVIQuery(Value *V, ValueCacheEntryTy &VC) : Val(V), Cache(VC) { + } + + ~LVIQuery() { + // When the query is done, insert the newly discovered facts into the + // cache in sorted order. + if (NewBlockInfo.empty()) return; + + // Grow the cache to exactly fit the new data. + Cache.reserve(Cache.size() + NewBlockInfo.size()); + + // If we only have one new entry, insert it instead of doing a full-on + // sort. + if (NewBlockInfo.size() == 1) { + BlockCacheEntryTy Entry = *NewBlockInfo.begin(); + ValueCacheEntryTy::iterator I = + std::lower_bound(Cache.begin(), Cache.end(), Entry, + BlockCacheEntryComparator()); + assert((I == Cache.end() || I->first != Entry.first) && + "Entry already in map!"); + + Cache.insert(I, Entry); + return; + } + + // TODO: If we only have two new elements, INSERT them both. + + Cache.insert(Cache.end(), NewBlockInfo.begin(), NewBlockInfo.end()); + array_pod_sort(Cache.begin(), Cache.end(), + BlockCacheEntryComparator::Compare); + + } + + LVILatticeVal getBlockValue(BasicBlock *BB); + LVILatticeVal getEdgeValue(BasicBlock *FromBB, BasicBlock *ToBB); + + private: + LVILatticeVal &getCachedEntryForBlock(BasicBlock *BB); + }; +} // end anonymous namespace + +/// getCachedEntryForBlock - See if we already have a value for this block. If +/// so, return it, otherwise create a new entry in the NewBlockInfo map to use. +LVILatticeVal &LVIQuery::getCachedEntryForBlock(BasicBlock *BB) { + + // Do a binary search to see if we already have an entry for this block in + // the cache set. If so, find it. + if (!Cache.empty()) { + ValueCacheEntryTy::iterator Entry = + std::lower_bound(Cache.begin(), Cache.end(), + BlockCacheEntryTy(BB, LVILatticeVal()), + BlockCacheEntryComparator()); + if (Entry != Cache.end() && Entry->first == BB) + return Entry->second; + } + + // Otherwise, check to see if it's in NewBlockInfo or create a new entry if + // not. + return NewBlockInfo[BB]; +} + +LVILatticeVal LVIQuery::getBlockValue(BasicBlock *BB) { + // See if we already have a value for this block. + LVILatticeVal &BBLV = getCachedEntryForBlock(BB); + + // If we've already computed this block's value, return it. + if (!BBLV.isUndefined()) { + DEBUG(errs() << " reuse BB '" << BB->getName() << "' val=" << BBLV <<'\n'); + return BBLV; + } + + // Otherwise, this is the first time we're seeing this block. Reset the + // lattice value to overdefined, so that cycles will terminate and be + // conservatively correct. + BBLV.markOverdefined(); + + // If V is live into BB, see if our predecessors know anything about it. + Instruction *BBI = dyn_cast<Instruction>(Val); + if (BBI == 0 || BBI->getParent() != BB) { + LVILatticeVal Result; // Start Undefined. + unsigned NumPreds = 0; + + // Loop over all of our predecessors, merging what we know from them into + // result. + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + Result.mergeIn(getEdgeValue(*PI, BB)); + + // If we hit overdefined, exit early. The BlockVals entry is already set + // to overdefined. + if (Result.isOverdefined()) { + DEBUG(errs() << " compute BB '" << BB->getName() + << "' - overdefined because of pred.\n"); + return Result; + } + ++NumPreds; + } + + // If this is the entry block, we must be asking about an argument. The + // value is overdefined. + if (NumPreds == 0 && BB == &BB->getParent()->front()) { + assert(isa<Argument>(Val) && "Unknown live-in to the entry block"); + Result.markOverdefined(); + return Result; + } + + // Return the merged value, which is more precise than 'overdefined'. + assert(!Result.isOverdefined()); + return getCachedEntryForBlock(BB) = Result; + } + + // If this value is defined by an instruction in this block, we have to + // process it here somehow or return overdefined. + if (PHINode *PN = dyn_cast<PHINode>(BBI)) { + (void)PN; + // TODO: PHI Translation in preds. + } else { + + } + + DEBUG(errs() << " compute BB '" << BB->getName() + << "' - overdefined because inst def found.\n"); + + LVILatticeVal Result; + Result.markOverdefined(); + return getCachedEntryForBlock(BB) = Result; +} + + +/// getEdgeValue - This method attempts to infer more complex +LVILatticeVal LVIQuery::getEdgeValue(BasicBlock *BBFrom, BasicBlock *BBTo) { + // TODO: Handle more complex conditionals. If (v == 0 || v2 < 1) is false, we + // know that v != 0. + if (BranchInst *BI = dyn_cast<BranchInst>(BBFrom->getTerminator())) { + // If this is a conditional branch and only one successor goes to BBTo, then + // we maybe able to infer something from the condition. + if (BI->isConditional() && + BI->getSuccessor(0) != BI->getSuccessor(1)) { + bool isTrueDest = BI->getSuccessor(0) == BBTo; + assert(BI->getSuccessor(!isTrueDest) == BBTo && + "BBTo isn't a successor of BBFrom"); + + // If V is the condition of the branch itself, then we know exactly what + // it is. + if (BI->getCondition() == Val) + return LVILatticeVal::get(ConstantInt::get( + Type::getInt1Ty(Val->getContext()), isTrueDest)); + + // If the condition of the branch is an equality comparison, we may be + // able to infer the value. + if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) + if (ICI->isEquality() && ICI->getOperand(0) == Val && + isa<Constant>(ICI->getOperand(1))) { + // We know that V has the RHS constant if this is a true SETEQ or + // false SETNE. + if (isTrueDest == (ICI->getPredicate() == ICmpInst::ICMP_EQ)) + return LVILatticeVal::get(cast<Constant>(ICI->getOperand(1))); + return LVILatticeVal::getNot(cast<Constant>(ICI->getOperand(1))); + } + } + } + + // If the edge was formed by a switch on the value, then we may know exactly + // what it is. + if (SwitchInst *SI = dyn_cast<SwitchInst>(BBFrom->getTerminator())) { + // If BBTo is the default destination of the switch, we don't know anything. + // Given a more powerful range analysis we could know stuff. + if (SI->getCondition() == Val && SI->getDefaultDest() != BBTo) { + // We only know something if there is exactly one value that goes from + // BBFrom to BBTo. + unsigned NumEdges = 0; + ConstantInt *EdgeVal = 0; + for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) { + if (SI->getSuccessor(i) != BBTo) continue; + if (NumEdges++) break; + EdgeVal = SI->getCaseValue(i); + } + assert(EdgeVal && "Missing successor?"); + if (NumEdges == 1) + return LVILatticeVal::get(EdgeVal); + } + } + + // Otherwise see if the value is known in the block. + return getBlockValue(BBFrom); +} + + +//===----------------------------------------------------------------------===// +// LazyValueInfoCache Impl +//===----------------------------------------------------------------------===// + +LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB) { + // If already a constant, there is nothing to compute. + if (Constant *VC = dyn_cast<Constant>(V)) + return LVILatticeVal::get(VC); + + DEBUG(errs() << "LVI Getting block end value " << *V << " at '" + << BB->getName() << "'\n"); + + LVILatticeVal Result = LVIQuery(V, ValueCache[V]).getBlockValue(BB); + + DEBUG(errs() << " Result = " << Result << "\n"); + return Result; +} + +LVILatticeVal LazyValueInfoCache:: +getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB) { + // If already a constant, there is nothing to compute. + if (Constant *VC = dyn_cast<Constant>(V)) + return LVILatticeVal::get(VC); + + DEBUG(errs() << "LVI Getting edge value " << *V << " from '" + << FromBB->getName() << "' to '" << ToBB->getName() << "'\n"); + LVILatticeVal Result = + LVIQuery(V, ValueCache[V]).getEdgeValue(FromBB, ToBB); + + DEBUG(errs() << " Result = " << Result << "\n"); + + return Result; +} + +//===----------------------------------------------------------------------===// +// LazyValueInfo Impl +//===----------------------------------------------------------------------===// + +bool LazyValueInfo::runOnFunction(Function &F) { + TD = getAnalysisIfAvailable<TargetData>(); + // Fully lazy. + return false; +} + +/// getCache - This lazily constructs the LazyValueInfoCache. +static LazyValueInfoCache &getCache(void *&PImpl) { + if (!PImpl) + PImpl = new LazyValueInfoCache(); + return *static_cast<LazyValueInfoCache*>(PImpl); +} + +void LazyValueInfo::releaseMemory() { + // If the cache was allocated, free it. + if (PImpl) { + delete &getCache(PImpl); + PImpl = 0; + } +} + +Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB) { + LVILatticeVal Result = getCache(PImpl).getValueInBlock(V, BB); + + if (Result.isConstant()) + return Result.getConstant(); + return 0; +} + +/// getConstantOnEdge - Determine whether the specified value is known to be a +/// constant on the specified edge. Return null if not. +Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB, + BasicBlock *ToBB) { + LVILatticeVal Result = getCache(PImpl).getValueOnEdge(V, FromBB, ToBB); + + if (Result.isConstant()) + return Result.getConstant(); + return 0; +} + +/// getPredicateOnEdge - Determine whether the specified value comparison +/// with a constant is known to be true or false on the specified CFG edge. +/// Pred is a CmpInst predicate. +LazyValueInfo::Tristate +LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, + BasicBlock *FromBB, BasicBlock *ToBB) { + LVILatticeVal Result = getCache(PImpl).getValueOnEdge(V, FromBB, ToBB); + + // If we know the value is a constant, evaluate the conditional. + Constant *Res = 0; + if (Result.isConstant()) { + Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, TD); + if (ConstantInt *ResCI = dyn_cast_or_null<ConstantInt>(Res)) + return ResCI->isZero() ? False : True; + return Unknown; + } + + if (Result.isNotConstant()) { + // If this is an equality comparison, we can try to fold it knowing that + // "V != C1". + if (Pred == ICmpInst::ICMP_EQ) { + // !C1 == C -> false iff C1 == C. + Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE, + Result.getNotConstant(), C, TD); + if (Res->isNullValue()) + return False; + } else if (Pred == ICmpInst::ICMP_NE) { + // !C1 != C -> true iff C1 == C. + Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE, + Result.getNotConstant(), C, TD); + if (Res->isNullValue()) + return True; + } + return Unknown; + } + + return Unknown; +} + + diff --git a/lib/Analysis/LiveValues.cpp b/lib/Analysis/LiveValues.cpp index 2bbe98a..02ec7d3 100644 --- a/lib/Analysis/LiveValues.cpp +++ b/lib/Analysis/LiveValues.cpp @@ -17,7 +17,9 @@ #include "llvm/Analysis/LoopInfo.h" using namespace llvm; -FunctionPass *llvm::createLiveValuesPass() { return new LiveValues(); } +namespace llvm { + FunctionPass *createLiveValuesPass() { return new LiveValues(); } +} char LiveValues::ID = 0; static RegisterPass<LiveValues> diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index e9256b7..1c614b0 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -263,14 +263,13 @@ bool Loop::isLCSSAForm() const { SmallPtrSet<BasicBlock *, 16> LoopBBs(block_begin(), block_end()); for (block_iterator BI = block_begin(), E = block_end(); BI != E; ++BI) { - BasicBlock *BB = *BI; - for (BasicBlock ::iterator I = BB->begin(), E = BB->end(); I != E;++I) + BasicBlock *BB = *BI; + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;++I) for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI) { BasicBlock *UserBB = cast<Instruction>(*UI)->getParent(); - if (PHINode *P = dyn_cast<PHINode>(*UI)) { + if (PHINode *P = dyn_cast<PHINode>(*UI)) UserBB = P->getIncomingBlock(UI); - } // Check the current block, as a fast-path. Most values are used in // the same block they are defined in. @@ -286,12 +285,14 @@ bool Loop::isLCSSAForm() const { /// the LoopSimplify form transforms loops to, which is sometimes called /// normal form. bool Loop::isLoopSimplifyForm() const { - // Normal-form loops have a preheader. - if (!getLoopPreheader()) - return false; - // Normal-form loops have a single backedge. - if (!getLoopLatch()) - return false; + // Normal-form loops have a preheader, a single backedge, and all of their + // exits have all their predecessors inside the loop. + return getLoopPreheader() && getLoopLatch() && hasDedicatedExits(); +} + +/// hasDedicatedExits - Return true if no exit block for the loop +/// has a predecessor that is outside the loop. +bool Loop::hasDedicatedExits() const { // Sort the blocks vector so that we can use binary search to do quick // lookups. SmallPtrSet<BasicBlock *, 16> LoopBBs(block_begin(), block_end()); diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index f4eb793..b448628 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -16,7 +16,7 @@ #include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/Module.h" -#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" using namespace llvm; @@ -87,13 +87,8 @@ const CallInst *llvm::extractMallocCallFromBitCast(const Value *I) { : NULL; } -/// isConstantOne - Return true only if val is constant int 1. -static bool isConstantOne(Value *val) { - return isa<ConstantInt>(val) && cast<ConstantInt>(val)->isOne(); -} - -static Value *isArrayMallocHelper(const CallInst *CI, LLVMContext &Context, - const TargetData *TD) { +static Value *computeArraySize(const CallInst *CI, const TargetData *TD, + bool LookThroughSExt = false) { if (!CI) return NULL; @@ -102,99 +97,27 @@ static Value *isArrayMallocHelper(const CallInst *CI, LLVMContext &Context, if (!T || !T->isSized() || !TD) return NULL; - Value *MallocArg = CI->getOperand(1); - const Type *ArgType = MallocArg->getType(); - ConstantExpr *CO = dyn_cast<ConstantExpr>(MallocArg); - BinaryOperator *BO = dyn_cast<BinaryOperator>(MallocArg); - - unsigned ElementSizeInt = TD->getTypeAllocSize(T); + unsigned ElementSize = TD->getTypeAllocSize(T); if (const StructType *ST = dyn_cast<StructType>(T)) - ElementSizeInt = TD->getStructLayout(ST)->getSizeInBytes(); - Constant *ElementSize = ConstantInt::get(ArgType, ElementSizeInt); - - // First, check if CI is a non-array malloc. - if (CO && CO == ElementSize) - // Match CreateMalloc's use of constant 1 array-size for non-array mallocs. - return ConstantInt::get(ArgType, 1); - - // Second, check if CI is an array malloc whose array size can be determined. - if (isConstantOne(ElementSize)) - return MallocArg; - - if (ConstantInt *CInt = dyn_cast<ConstantInt>(MallocArg)) - if (CInt->getZExtValue() % ElementSizeInt == 0) - return ConstantInt::get(ArgType, CInt->getZExtValue() / ElementSizeInt); + ElementSize = TD->getStructLayout(ST)->getSizeInBytes(); - if (!CO && !BO) - return NULL; - - Value *Op0 = NULL; - Value *Op1 = NULL; - unsigned Opcode = 0; - if (CO && ((CO->getOpcode() == Instruction::Mul) || - (CO->getOpcode() == Instruction::Shl))) { - Op0 = CO->getOperand(0); - Op1 = CO->getOperand(1); - Opcode = CO->getOpcode(); - } - if (BO && ((BO->getOpcode() == Instruction::Mul) || - (BO->getOpcode() == Instruction::Shl))) { - Op0 = BO->getOperand(0); - Op1 = BO->getOperand(1); - Opcode = BO->getOpcode(); - } - - // Determine array size if malloc's argument is the product of a mul or shl. - if (Op0) { - if (Opcode == Instruction::Mul) { - if (Op1 == ElementSize) - // ArraySize * ElementSize - return Op0; - if (Op0 == ElementSize) - // ElementSize * ArraySize - return Op1; - } - if (Opcode == Instruction::Shl) { - ConstantInt *Op1CI = dyn_cast<ConstantInt>(Op1); - if (!Op1CI) return NULL; - - APInt Op1Int = Op1CI->getValue(); - uint64_t BitToSet = Op1Int.getLimitedValue(Op1Int.getBitWidth() - 1); - Value *Op1Pow = ConstantInt::get(Context, - APInt(Op1Int.getBitWidth(), 0).set(BitToSet)); - if (Op0 == ElementSize) - // ArraySize << log2(ElementSize) - return Op1Pow; - if (Op1Pow == ElementSize) - // ElementSize << log2(ArraySize) - return Op0; - } - } + // If malloc calls' arg can be determined to be a multiple of ElementSize, + // return the multiple. Otherwise, return NULL. + Value *MallocArg = CI->getOperand(1); + Value *Multiple = NULL; + if (ComputeMultiple(MallocArg, ElementSize, Multiple, + LookThroughSExt)) + return Multiple; - // We could not determine the malloc array size from MallocArg. return NULL; } /// isArrayMalloc - Returns the corresponding CallInst if the instruction /// is a call to malloc whose array size can be determined and the array size /// is not constant 1. Otherwise, return NULL. -CallInst *llvm::isArrayMalloc(Value *I, LLVMContext &Context, - const TargetData *TD) { - CallInst *CI = extractMallocCall(I); - Value *ArraySize = isArrayMallocHelper(CI, Context, TD); - - if (ArraySize && - ArraySize != ConstantInt::get(CI->getOperand(1)->getType(), 1)) - return CI; - - // CI is a non-array malloc or we can't figure out that it is an array malloc. - return NULL; -} - -const CallInst *llvm::isArrayMalloc(const Value *I, LLVMContext &Context, - const TargetData *TD) { +const CallInst *llvm::isArrayMalloc(const Value *I, const TargetData *TD) { const CallInst *CI = extractMallocCall(I); - Value *ArraySize = isArrayMallocHelper(CI, Context, TD); + Value *ArraySize = computeArraySize(CI, TD); if (ArraySize && ArraySize != ConstantInt::get(CI->getOperand(1)->getType(), 1)) @@ -210,7 +133,7 @@ const CallInst *llvm::isArrayMalloc(const Value *I, LLVMContext &Context, /// 1: PointerType is the bitcast's result type. /// >1: Unique PointerType cannot be determined, return NULL. const PointerType *llvm::getMallocType(const CallInst *CI) { - assert(isMalloc(CI) && "GetMallocType and not malloc call"); + assert(isMalloc(CI) && "getMallocType and not malloc call"); const PointerType *MallocType = NULL; unsigned NumOfBitCastUses = 0; @@ -250,9 +173,10 @@ const Type *llvm::getMallocAllocatedType(const CallInst *CI) { /// then return that multiple. For non-array mallocs, the multiple is /// constant 1. Otherwise, return NULL for mallocs whose array size cannot be /// determined. -Value *llvm::getMallocArraySize(CallInst *CI, LLVMContext &Context, - const TargetData *TD) { - return isArrayMallocHelper(CI, Context, TD); +Value *llvm::getMallocArraySize(CallInst *CI, const TargetData *TD, + bool LookThroughSExt) { + assert(isMalloc(CI) && "getMallocArraySize and not malloc call"); + return computeArraySize(CI, TD, LookThroughSExt); } //===----------------------------------------------------------------------===// diff --git a/lib/Analysis/PointerTracking.cpp b/lib/Analysis/PointerTracking.cpp index 2251b62..8da07e7 100644 --- a/lib/Analysis/PointerTracking.cpp +++ b/lib/Analysis/PointerTracking.cpp @@ -10,6 +10,7 @@ // This file implements tracking of pointer bounds. // //===----------------------------------------------------------------------===// + #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" @@ -101,7 +102,7 @@ const SCEV *PointerTracking::computeAllocationCount(Value *P, } if (CallInst *CI = extractMallocCall(V)) { - Value *arraySize = getMallocArraySize(CI, P->getContext(), TD); + Value *arraySize = getMallocArraySize(CI, TD); const Type* AllocTy = getMallocAllocatedType(CI); if (!AllocTy || !arraySize) return SE->getCouldNotCompute(); Ty = AllocTy; diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 3e87ca2..ea4af40 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -3811,29 +3811,26 @@ static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) { /// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node /// in the loop has the value PHIVal. If we can't fold this expression for some /// reason, return null. -static Constant *EvaluateExpression(Value *V, Constant *PHIVal) { +static Constant *EvaluateExpression(Value *V, Constant *PHIVal, + const TargetData *TD) { if (isa<PHINode>(V)) return PHIVal; if (Constant *C = dyn_cast<Constant>(V)) return C; if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) return GV; Instruction *I = cast<Instruction>(V); - LLVMContext &Context = I->getParent()->getContext(); std::vector<Constant*> Operands; Operands.resize(I->getNumOperands()); for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { - Operands[i] = EvaluateExpression(I->getOperand(i), PHIVal); + Operands[i] = EvaluateExpression(I->getOperand(i), PHIVal, TD); if (Operands[i] == 0) return 0; } if (const CmpInst *CI = dyn_cast<CmpInst>(I)) - return ConstantFoldCompareInstOperands(CI->getPredicate(), - &Operands[0], Operands.size(), - Context); - else - return ConstantFoldInstOperands(I->getOpcode(), I->getType(), - &Operands[0], Operands.size(), - Context); + return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0], + Operands[1], TD); + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), + &Operands[0], Operands.size(), TD); } /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is @@ -3879,7 +3876,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, return RetVal = PHIVal; // Got exit value! // Compute the value of the PHI node for the next iteration. - Constant *NextPHI = EvaluateExpression(BEValue, PHIVal); + Constant *NextPHI = EvaluateExpression(BEValue, PHIVal, TD); if (NextPHI == PHIVal) return RetVal = NextPHI; // Stopped evolving! if (NextPHI == 0) @@ -3920,7 +3917,7 @@ ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L, for (Constant *PHIVal = StartCST; IterationNum != MaxIterations; ++IterationNum) { ConstantInt *CondVal = - dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, PHIVal)); + dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, PHIVal, TD)); // Couldn't symbolically evaluate. if (!CondVal) return getCouldNotCompute(); @@ -3931,7 +3928,7 @@ ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L, } // Compute the value of the PHI node for the next iteration. - Constant *NextPHI = EvaluateExpression(BEValue, PHIVal); + Constant *NextPHI = EvaluateExpression(BEValue, PHIVal, TD); if (NextPHI == 0 || NextPHI == PHIVal) return getCouldNotCompute();// Couldn't evaluate or not making progress... PHIVal = NextPHI; @@ -4040,12 +4037,10 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { Constant *C; if (const CmpInst *CI = dyn_cast<CmpInst>(I)) C = ConstantFoldCompareInstOperands(CI->getPredicate(), - &Operands[0], Operands.size(), - getContext()); + Operands[0], Operands[1], TD); else C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), - &Operands[0], Operands.size(), - getContext()); + &Operands[0], Operands.size(), TD); return getSCEV(C); } } diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 5672510..b0e6900 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -789,6 +789,118 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD, return std::max(FirstAnswer, std::min(TyBits, Mask.countLeadingZeros())); } +/// ComputeMultiple - This function computes the integer multiple of Base that +/// equals V. If successful, it returns true and returns the multiple in +/// Multiple. If unsuccessful, it returns false. It looks +/// through SExt instructions only if LookThroughSExt is true. +bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, + bool LookThroughSExt, unsigned Depth) { + const unsigned MaxDepth = 6; + + assert(V && "No Value?"); + assert(Depth <= MaxDepth && "Limit Search Depth"); + assert(V->getType()->isInteger() && "Not integer or pointer type!"); + + const Type *T = V->getType(); + + ConstantInt *CI = dyn_cast<ConstantInt>(V); + + if (Base == 0) + return false; + + if (Base == 1) { + Multiple = V; + return true; + } + + ConstantExpr *CO = dyn_cast<ConstantExpr>(V); + Constant *BaseVal = ConstantInt::get(T, Base); + if (CO && CO == BaseVal) { + // Multiple is 1. + Multiple = ConstantInt::get(T, 1); + return true; + } + + if (CI && CI->getZExtValue() % Base == 0) { + Multiple = ConstantInt::get(T, CI->getZExtValue() / Base); + return true; + } + + if (Depth == MaxDepth) return false; // Limit search depth. + + Operator *I = dyn_cast<Operator>(V); + if (!I) return false; + + switch (I->getOpcode()) { + default: break; + case Instruction::SExt: { + if (!LookThroughSExt) return false; + // otherwise fall through to ZExt + } + case Instruction::ZExt: { + return ComputeMultiple(I->getOperand(0), Base, Multiple, + LookThroughSExt, Depth+1); + } + case Instruction::Shl: + case Instruction::Mul: { + Value *Op0 = I->getOperand(0); + Value *Op1 = I->getOperand(1); + + if (I->getOpcode() == Instruction::Shl) { + ConstantInt *Op1CI = dyn_cast<ConstantInt>(Op1); + if (!Op1CI) return false; + // Turn Op0 << Op1 into Op0 * 2^Op1 + APInt Op1Int = Op1CI->getValue(); + uint64_t BitToSet = Op1Int.getLimitedValue(Op1Int.getBitWidth() - 1); + Op1 = ConstantInt::get(V->getContext(), + APInt(Op1Int.getBitWidth(), 0).set(BitToSet)); + } + + Value *Mul0 = NULL; + Value *Mul1 = NULL; + bool M0 = ComputeMultiple(Op0, Base, Mul0, + LookThroughSExt, Depth+1); + bool M1 = ComputeMultiple(Op1, Base, Mul1, + LookThroughSExt, Depth+1); + + if (M0) { + if (isa<Constant>(Op1) && isa<Constant>(Mul0)) { + // V == Base * (Mul0 * Op1), so return (Mul0 * Op1) + Multiple = ConstantExpr::getMul(cast<Constant>(Mul0), + cast<Constant>(Op1)); + return true; + } + + if (ConstantInt *Mul0CI = dyn_cast<ConstantInt>(Mul0)) + if (Mul0CI->getValue() == 1) { + // V == Base * Op1, so return Op1 + Multiple = Op1; + return true; + } + } + + if (M1) { + if (isa<Constant>(Op0) && isa<Constant>(Mul1)) { + // V == Base * (Mul1 * Op0), so return (Mul1 * Op0) + Multiple = ConstantExpr::getMul(cast<Constant>(Mul1), + cast<Constant>(Op0)); + return true; + } + + if (ConstantInt *Mul1CI = dyn_cast<ConstantInt>(Mul1)) + if (Mul1CI->getValue() == 1) { + // V == Base * Op0, so return Op0 + Multiple = Op0; + return true; + } + } + } + } + + // We could not determine if V is a multiple of Base. + return false; +} + /// CannotBeNegativeZero - Return true if we can prove that the specified FP /// value is never equal to -0.0. /// diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 63af42d..26b6a09 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -2039,7 +2039,7 @@ bool LLParser::ParseValID(ValID &ID) { ParseToken(lltok::StringConstant, "expected constraint string")) return true; ID.StrVal2 = Lex.getStrVal(); - ID.UIntVal = HasSideEffect | ((unsigned)AlignStack<<1); + ID.UIntVal = unsigned(HasSideEffect) | (unsigned(AlignStack)<<1); ID.Kind = ValID::t_InlineAsm; return false; } diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 86d051c..c37c793 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -54,10 +54,13 @@ unsigned AggressiveAntiDepState::GetGroup(unsigned Reg) return Node; } -void AggressiveAntiDepState::GetGroupRegs(unsigned Group, std::vector<unsigned> &Regs) +void AggressiveAntiDepState::GetGroupRegs( + unsigned Group, + std::vector<unsigned> &Regs, + std::multimap<unsigned, AggressiveAntiDepState::RegisterReference> *RegRefs) { for (unsigned Reg = 0; Reg != TargetRegisterInfo::FirstVirtualRegister; ++Reg) { - if (GetGroup(Reg) == Group) + if ((GetGroup(Reg) == Group) && (RegRefs->count(Reg) > 0)) Regs.push_back(Reg); } } @@ -99,12 +102,28 @@ bool AggressiveAntiDepState::IsLive(unsigned Reg) AggressiveAntiDepBreaker:: -AggressiveAntiDepBreaker(MachineFunction& MFi) : +AggressiveAntiDepBreaker(MachineFunction& MFi, + TargetSubtarget::RegClassVector& CriticalPathRCs) : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()), TRI(MF.getTarget().getRegisterInfo()), AllocatableSet(TRI->getAllocatableSet(MF)), State(NULL), SavedState(NULL) { + /* Collect a bitset of all registers that are only broken if they + are on the critical path. */ + for (unsigned i = 0, e = CriticalPathRCs.size(); i < e; ++i) { + BitVector CPSet = TRI->getAllocatableSet(MF, CriticalPathRCs[i]); + if (CriticalPathSet.none()) + CriticalPathSet = CPSet; + else + CriticalPathSet |= CPSet; + } + + DEBUG(errs() << "AntiDep Critical-Path Registers:"); + DEBUG(for (int r = CriticalPathSet.find_first(); r != -1; + r = CriticalPathSet.find_next(r)) + errs() << " " << TRI->getName(r)); + DEBUG(errs() << '\n'); } AggressiveAntiDepBreaker::~AggressiveAntiDepBreaker() { @@ -264,16 +283,18 @@ void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI, } } -/// AntiDepPathStep - Return SUnit that SU has an anti-dependence on. -static void AntiDepPathStep(SUnit *SU, AntiDepBreaker::AntiDepRegVector& Regs, - std::vector<SDep*>& Edges) { +/// AntiDepEdges - Return in Edges the anti- and output- +/// dependencies on Regs in SU that we want to consider for breaking. +static void AntiDepEdges(SUnit *SU, + const AntiDepBreaker::AntiDepRegVector& Regs, + std::vector<SDep*>& Edges) { AntiDepBreaker::AntiDepRegSet RegSet; for (unsigned i = 0, e = Regs.size(); i < e; ++i) RegSet.insert(Regs[i]); for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); P != PE; ++P) { - if (P->getKind() == SDep::Anti) { + if ((P->getKind() == SDep::Anti) || (P->getKind() == SDep::Output)) { unsigned Reg = P->getReg(); if (RegSet.count(Reg) != 0) { Edges.push_back(&*P); @@ -285,6 +306,31 @@ static void AntiDepPathStep(SUnit *SU, AntiDepBreaker::AntiDepRegVector& Regs, assert(RegSet.empty() && "Expected all antidep registers to be found"); } +/// CriticalPathStep - Return the next SUnit after SU on the bottom-up +/// critical path. +static SUnit *CriticalPathStep(SUnit *SU) { + SDep *Next = 0; + unsigned NextDepth = 0; + // Find the predecessor edge with the greatest depth. + if (SU != 0) { + for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); + P != PE; ++P) { + SUnit *PredSU = P->getSUnit(); + unsigned PredLatency = P->getLatency(); + unsigned PredTotalLatency = PredSU->getDepth() + PredLatency; + // In the case of a latency tie, prefer an anti-dependency edge over + // other types of edges. + if (NextDepth < PredTotalLatency || + (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) { + NextDepth = PredTotalLatency; + Next = &*P; + } + } + } + + return (Next) ? Next->getSUnit() : 0; +} + void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, const char *tag) { unsigned *KillIndices = State->GetKillIndices(); @@ -499,11 +545,11 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& RegRefs = State->GetRegRefs(); - // Collect all registers in the same group as AntiDepReg. These all - // need to be renamed together if we are to break the - // anti-dependence. + // Collect all referenced registers in the same group as + // AntiDepReg. These all need to be renamed together if we are to + // break the anti-dependence. std::vector<unsigned> Regs; - State->GetGroupRegs(AntiDepGroupIndex, Regs); + State->GetGroupRegs(AntiDepGroupIndex, Regs, &RegRefs); assert(Regs.size() > 0 && "Empty register group!"); if (Regs.size() == 0) return false; @@ -544,9 +590,10 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( } // FIXME: for now just handle single register in group case... - // FIXME: check only regs that have references... - if (Regs.size() > 1) + if (Regs.size() > 1) { + DEBUG(errs() << "\tMultiple rename registers in group\n"); return false; + } // Check each possible rename register for SuperReg in round-robin // order. If that register is available, and the corresponding @@ -630,12 +677,6 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& RegRefs = State->GetRegRefs(); - // Nothing to do if no candidates. - if (Candidates.empty()) { - DEBUG(errs() << "\n===== No anti-dependency candidates\n"); - return 0; - } - // The code below assumes that there is at least one instruction, // so just duck out immediately if the block is empty. if (SUnits.empty()) return 0; @@ -655,16 +696,37 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( // ...need a map from MI to SUnit. std::map<MachineInstr *, SUnit *> MISUnitMap; - - DEBUG(errs() << "\n===== Attempting to break " << Candidates.size() << - " anti-dependencies\n"); for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { SUnit *SU = &SUnits[i]; MISUnitMap.insert(std::pair<MachineInstr *, SUnit *>(SU->getInstr(), SU)); } + // Track progress along the critical path through the SUnit graph as + // we walk the instructions. This is needed for regclasses that only + // break critical-path anti-dependencies. + SUnit *CriticalPathSU = 0; + MachineInstr *CriticalPathMI = 0; + if (CriticalPathSet.any()) { + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + SUnit *SU = &SUnits[i]; + if (!CriticalPathSU || + ((SU->getDepth() + SU->Latency) > + (CriticalPathSU->getDepth() + CriticalPathSU->Latency))) { + CriticalPathSU = SU; + } + } + + CriticalPathMI = CriticalPathSU->getInstr(); + } + + // Even if there are no anti-dependencies we still need to go + // through the instructions to update Def, Kills, etc. #ifndef NDEBUG - { + if (Candidates.empty()) { + DEBUG(errs() << "\n===== No anti-dependency candidates\n"); + } else { + DEBUG(errs() << "\n===== Attempting to break " << Candidates.size() << + " anti-dependencies\n"); DEBUG(errs() << "Available regs:"); for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) { if (!State->IsLive(Reg)) @@ -691,14 +753,26 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( // Process the defs in MI... PrescanInstruction(MI, Count, PassthruRegs); - + + // The the dependence edges that represent anti- and output- + // dependencies that are candidates for breaking. std::vector<SDep*> Edges; SUnit *PathSU = MISUnitMap[MI]; AntiDepBreaker::CandidateMap::iterator citer = Candidates.find(PathSU); if (citer != Candidates.end()) - AntiDepPathStep(PathSU, citer->second, Edges); - + AntiDepEdges(PathSU, citer->second, Edges); + + // If MI is not on the critical path, then we don't rename + // registers in the CriticalPathSet. + BitVector *ExcludeRegs = NULL; + if (MI == CriticalPathMI) { + CriticalPathSU = CriticalPathStep(CriticalPathSU); + CriticalPathMI = (CriticalPathSU) ? CriticalPathSU->getInstr() : 0; + } else { + ExcludeRegs = &CriticalPathSet; + } + // Ignore KILL instructions (they form a group in ScanInstruction // but don't cause any anti-dependence breaking themselves) if (MI->getOpcode() != TargetInstrInfo::KILL) { @@ -707,7 +781,8 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( SDep *Edge = Edges[i]; SUnit *NextSU = Edge->getSUnit(); - if (Edge->getKind() != SDep::Anti) continue; + if ((Edge->getKind() != SDep::Anti) && + (Edge->getKind() != SDep::Output)) continue; unsigned AntiDepReg = Edge->getReg(); DEBUG(errs() << "\tAntidep reg: " << TRI->getName(AntiDepReg)); @@ -717,6 +792,11 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( // Don't break anti-dependencies on non-allocatable registers. DEBUG(errs() << " (non-allocatable)\n"); continue; + } else if ((ExcludeRegs != NULL) && ExcludeRegs->test(AntiDepReg)) { + // Don't break anti-dependencies for critical path registers + // if not on the critical path + DEBUG(errs() << " (not critical-path)\n"); + continue; } else if (PassthruRegs.count(AntiDepReg) != 0) { // If the anti-dep register liveness "passes-thru", then // don't try to change it. It will be changed along with diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h index c512168..e5c9a7b 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.h +++ b/lib/CodeGen/AggressiveAntiDepBreaker.h @@ -23,6 +23,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Target/TargetSubtarget.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallSet.h" @@ -85,8 +86,11 @@ namespace llvm { unsigned GetGroup(unsigned Reg); // GetGroupRegs - Return a vector of the registers belonging to a - // group. - void GetGroupRegs(unsigned Group, std::vector<unsigned> &Regs); + // group. If RegRefs is non-NULL then only included referenced registers. + void GetGroupRegs( + unsigned Group, + std::vector<unsigned> &Regs, + std::multimap<unsigned, AggressiveAntiDepState::RegisterReference> *RegRefs); // UnionGroups - Union Reg1's and Reg2's groups to form a new // group. Return the index of the GroupNode representing the @@ -114,6 +118,10 @@ namespace llvm { /// because they may not be safe to break. const BitVector AllocatableSet; + /// CriticalPathSet - The set of registers that should only be + /// renamed if they are on the critical path. + BitVector CriticalPathSet; + /// State - The state used to identify and rename anti-dependence /// registers. AggressiveAntiDepState *State; @@ -124,7 +132,8 @@ namespace llvm { AggressiveAntiDepState *SavedState; public: - AggressiveAntiDepBreaker(MachineFunction& MFi); + AggressiveAntiDepBreaker(MachineFunction& MFi, + TargetSubtarget::RegClassVector& CriticalPathRCs); ~AggressiveAntiDepBreaker(); /// GetMaxTrials - As anti-dependencies are broken, additional diff --git a/lib/CodeGen/AntiDepBreaker.h b/lib/CodeGen/AntiDepBreaker.h index 2775087..b614f68 100644 --- a/lib/CodeGen/AntiDepBreaker.h +++ b/lib/CodeGen/AntiDepBreaker.h @@ -23,6 +23,7 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include <map> namespace llvm { diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index bb6bd95..08e0eae 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -18,6 +18,7 @@ #include "llvm/Module.h" #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -35,6 +36,7 @@ #include "llvm/Support/Mangler.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" @@ -512,7 +514,7 @@ void AsmPrinter::EmitXXStructorList(Constant *List) { //===----------------------------------------------------------------------===// /// LEB 128 number encoding. -/// PrintULEB128 - Print a series of hexidecimal values (separated by commas) +/// PrintULEB128 - Print a series of hexadecimal values (separated by commas) /// representing an unsigned leb128 value. void AsmPrinter::PrintULEB128(unsigned Value) const { char Buffer[20]; @@ -525,7 +527,7 @@ void AsmPrinter::PrintULEB128(unsigned Value) const { } while (Value); } -/// PrintSLEB128 - Print a series of hexidecimal values (separated by commas) +/// PrintSLEB128 - Print a series of hexadecimal values (separated by commas) /// representing a signed leb128 value. void AsmPrinter::PrintSLEB128(int Value) const { int Sign = Value >> (8 * sizeof(Value) - 1); @@ -546,7 +548,7 @@ void AsmPrinter::PrintSLEB128(int Value) const { // Emission and print routines // -/// PrintHex - Print a value as a hexidecimal value. +/// PrintHex - Print a value as a hexadecimal value. /// void AsmPrinter::PrintHex(int Value) const { char Buffer[20]; @@ -727,7 +729,7 @@ static void printStringChar(formatted_raw_ostream &O, unsigned char C) { /// Special characters are emitted properly. /// \literal (Eg. '\t') \endliteral void AsmPrinter::EmitString(const std::string &String) const { - EmitString(String.c_str(), String.size()); + EmitString(String.data(), String.size()); } void AsmPrinter::EmitString(const char *String, unsigned Size) const { @@ -1357,32 +1359,31 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, const char *Code) const { /// instruction's DebugLoc. void AsmPrinter::processDebugLoc(const MachineInstr *MI, bool BeforePrintingInsn) { - if (!MAI || !DW) + if (!MAI || !DW || !MAI->doesSupportDebugInformation() + || !DW->ShouldEmitDwarfDebug()) return; DebugLoc DL = MI->getDebugLoc(); - if (MAI->doesSupportDebugInformation() && DW->ShouldEmitDwarfDebug()) { - if (!DL.isUnknown()) { - DebugLocTuple CurDLT = MF->getDebugLocTuple(DL); - if (BeforePrintingInsn) { - if (CurDLT.Scope != 0 && PrevDLT != CurDLT) { - unsigned L = DW->RecordSourceLine(CurDLT.Line, CurDLT.Col, - CurDLT.Scope); - printLabel(L); - O << '\n'; -#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN - DW->SetDbgScopeBeginLabels(MI, L); -#endif - } else { -#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN - DW->SetDbgScopeEndLabels(MI, 0); -#endif - } - } + if (DL.isUnknown()) + return; + DebugLocTuple CurDLT = MF->getDebugLocTuple(DL); + if (CurDLT.Scope == 0) + return; + + if (BeforePrintingInsn) { + if (CurDLT != PrevDLT) { + unsigned L = DW->RecordSourceLine(CurDLT.Line, CurDLT.Col, + CurDLT.Scope); + printLabel(L); + DW->BeginScope(MI, L); PrevDLT = CurDLT; } + } else { + // After printing instruction + DW->EndScope(MI); } } + /// printInlineAsm - This method formats and prints the specified machine /// instruction that is an inline asm. void AsmPrinter::printInlineAsm(const MachineInstr *MI) const { @@ -1399,6 +1400,8 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const { // Disassemble the AsmStr, printing out the literal pieces, the operands, etc. const char *AsmStr = MI->getOperand(NumDefs).getSymbolName(); + O << '\t'; + // If this asmstr is empty, just print the #APP/#NOAPP markers. // These are useful to see where empty asm's wound up. if (AsmStr[0] == 0) { @@ -1636,13 +1639,17 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const Function *F, assert(BB->hasName() && "Address of anonymous basic block not supported yet!"); - // FIXME: This isn't guaranteed to produce a unique name even if the - // block and function have a name. - std::string Mangled = - Mang->getMangledName(F, Mang->makeNameProper(BB->getName()).c_str(), - /*ForcePrivate=*/true); + // This code must use the function name itself, and not the function number, + // since it must be possible to generate the label name from within other + // functions. + std::string FuncName = Mang->getMangledName(F); + + SmallString<60> Name; + raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "BA" + << FuncName.size() << '_' << FuncName << '_' + << Mang->makeNameProper(BB->getName()); - return OutContext.GetOrCreateSymbol(StringRef(Mangled)); + return OutContext.GetOrCreateSymbol(Name.str()); } MCSymbol *AsmPrinter::GetMBBSymbol(unsigned MBBID) const { @@ -1817,21 +1824,80 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) { /// EmitComments - Pretty-print comments for instructions void AsmPrinter::EmitComments(const MachineInstr &MI) const { - assert(VerboseAsm && !MI.getDebugLoc().isUnknown()); - - DebugLocTuple DLT = MF->getDebugLocTuple(MI.getDebugLoc()); + if (!VerboseAsm) + return; - // Print source line info. - O.PadToColumn(MAI->getCommentColumn()); - O << MAI->getCommentString() << " SrcLine "; - if (DLT.Scope) { - DICompileUnit CU(DLT.Scope); - if (!CU.isNull()) - O << CU.getFilename() << " "; + bool Newline = false; + + if (!MI.getDebugLoc().isUnknown()) { + DebugLocTuple DLT = MF->getDebugLocTuple(MI.getDebugLoc()); + + // Print source line info. + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << " SrcLine "; + if (DLT.Scope) { + DICompileUnit CU(DLT.Scope); + if (!CU.isNull()) + O << CU.getFilename() << " "; + } + O << DLT.Line; + if (DLT.Col != 0) + O << ":" << DLT.Col; + Newline = true; + } + + // Check for spills and reloads + int FI; + + const MachineFrameInfo *FrameInfo = + MI.getParent()->getParent()->getFrameInfo(); + + // We assume a single instruction only has a spill or reload, not + // both. + if (TM.getInstrInfo()->isLoadFromStackSlotPostFE(&MI, FI)) { + if (FrameInfo->isSpillSlotObjectIndex(FI)) { + if (Newline) O << '\n'; + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << " Reload"; + Newline = true; + } + } + else if (TM.getInstrInfo()->hasLoadFromStackSlot(&MI, FI)) { + if (FrameInfo->isSpillSlotObjectIndex(FI)) { + if (Newline) O << '\n'; + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << " Folded Reload"; + Newline = true; + } + } + else if (TM.getInstrInfo()->isStoreToStackSlotPostFE(&MI, FI)) { + if (FrameInfo->isSpillSlotObjectIndex(FI)) { + if (Newline) O << '\n'; + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << " Spill"; + Newline = true; + } + } + else if (TM.getInstrInfo()->hasStoreToStackSlot(&MI, FI)) { + if (FrameInfo->isSpillSlotObjectIndex(FI)) { + if (Newline) O << '\n'; + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << " Folded Spill"; + Newline = true; + } + } + + // Check for spill-induced copies + unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; + if (TM.getInstrInfo()->isMoveInstr(MI, SrcReg, DstReg, + SrcSubIdx, DstSubIdx)) { + if (MI.getAsmPrinterFlag(ReloadReuse)) { + if (Newline) O << '\n'; + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << " Reload Reuse"; + Newline = true; + } } - O << DLT.Line; - if (DLT.Col != 0) - O << ":" << DLT.Col; } /// PrintChildLoopComment - Print comments about child loops within @@ -1862,8 +1928,7 @@ static void PrintChildLoopComment(formatted_raw_ostream &O, } /// EmitComments - Pretty-print comments for basic blocks -void AsmPrinter::EmitComments(const MachineBasicBlock &MBB) const -{ +void AsmPrinter::EmitComments(const MachineBasicBlock &MBB) const { if (VerboseAsm) { // Add loop depth information const MachineLoop *loop = LI->getLoopFor(&MBB); diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h index 62b51ec..3e50a15 100644 --- a/lib/CodeGen/AsmPrinter/DIE.h +++ b/lib/CodeGen/AsmPrinter/DIE.h @@ -29,7 +29,7 @@ namespace llvm { //===--------------------------------------------------------------------===// /// DIEAbbrevData - Dwarf abbreviation data, describes the one attribute of a /// Dwarf abbreviation. - class VISIBILITY_HIDDEN DIEAbbrevData { + class DIEAbbrevData { /// Attribute - Dwarf attribute code. /// unsigned Attribute; @@ -52,7 +52,7 @@ namespace llvm { //===--------------------------------------------------------------------===// /// DIEAbbrev - Dwarf abbreviation, describes the organization of a debug /// information object. - class VISIBILITY_HIDDEN DIEAbbrev : public FoldingSetNode { + class DIEAbbrev : public FoldingSetNode { /// Tag - Dwarf tag code. /// unsigned Tag; @@ -113,7 +113,7 @@ namespace llvm { class CompileUnit; class DIEValue; - class VISIBILITY_HIDDEN DIE : public FoldingSetNode { + class DIE : public FoldingSetNode { protected: /// Abbrev - Buffer for constructing abbreviation. /// @@ -202,7 +202,7 @@ namespace llvm { //===--------------------------------------------------------------------===// /// DIEValue - A debug information entry value. /// - class VISIBILITY_HIDDEN DIEValue : public FoldingSetNode { + class DIEValue : public FoldingSetNode { public: enum { isInteger, @@ -249,7 +249,7 @@ namespace llvm { //===--------------------------------------------------------------------===// /// DIEInteger - An integer value DIE. /// - class VISIBILITY_HIDDEN DIEInteger : public DIEValue { + class DIEInteger : public DIEValue { uint64_t Integer; public: explicit DIEInteger(uint64_t I) : DIEValue(isInteger), Integer(I) {} @@ -294,7 +294,7 @@ namespace llvm { //===--------------------------------------------------------------------===// /// DIEString - A string value DIE. /// - class VISIBILITY_HIDDEN DIEString : public DIEValue { + class DIEString : public DIEValue { const std::string Str; public: explicit DIEString(const std::string &S) : DIEValue(isString), Str(S) {} @@ -326,7 +326,7 @@ namespace llvm { //===--------------------------------------------------------------------===// /// DIEDwarfLabel - A Dwarf internal label expression DIE. // - class VISIBILITY_HIDDEN DIEDwarfLabel : public DIEValue { + class DIEDwarfLabel : public DIEValue { const DWLabel Label; public: explicit DIEDwarfLabel(const DWLabel &L) : DIEValue(isLabel), Label(L) {} @@ -356,7 +356,7 @@ namespace llvm { //===--------------------------------------------------------------------===// /// DIEObjectLabel - A label to an object in code or data. // - class VISIBILITY_HIDDEN DIEObjectLabel : public DIEValue { + class DIEObjectLabel : public DIEValue { const std::string Label; public: explicit DIEObjectLabel(const std::string &L) @@ -389,7 +389,7 @@ namespace llvm { //===--------------------------------------------------------------------===// /// DIESectionOffset - A section offset DIE. /// - class VISIBILITY_HIDDEN DIESectionOffset : public DIEValue { + class DIESectionOffset : public DIEValue { const DWLabel Label; const DWLabel Section; bool IsEH : 1; @@ -428,7 +428,7 @@ namespace llvm { //===--------------------------------------------------------------------===// /// DIEDelta - A simple label difference DIE. /// - class VISIBILITY_HIDDEN DIEDelta : public DIEValue { + class DIEDelta : public DIEValue { const DWLabel LabelHi; const DWLabel LabelLo; public: @@ -462,7 +462,7 @@ namespace llvm { /// DIEntry - A pointer to another debug information entry. An instance of /// this class can also be used as a proxy for a debug information entry not /// yet defined (ie. types.) - class VISIBILITY_HIDDEN DIEEntry : public DIEValue { + class DIEEntry : public DIEValue { DIE *Entry; public: explicit DIEEntry(DIE *E) : DIEValue(isEntry), Entry(E) {} @@ -497,7 +497,7 @@ namespace llvm { //===--------------------------------------------------------------------===// /// DIEBlock - A block of values. Primarily used for location expressions. // - class VISIBILITY_HIDDEN DIEBlock : public DIEValue, public DIE { + class DIEBlock : public DIEValue, public DIE { unsigned Size; // Size in bytes excluding size header. public: DIEBlock() diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 1372fc2..c62c435 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -48,7 +48,7 @@ namespace llvm { //===----------------------------------------------------------------------===// /// CompileUnit - This dwarf writer support class manages information associate /// with a source file. -class VISIBILITY_HIDDEN CompileUnit { +class CompileUnit { /// ID - File identifier for source. /// unsigned ID; @@ -127,61 +127,66 @@ public: class DbgVariable { DIVariable Var; // Variable Descriptor. unsigned FrameIndex; // Variable frame index. - bool InlinedFnVar; // Variable for an inlined function. + DbgVariable *AbstractVar; // Abstract variable for this variable. + DIE *TheDIE; public: - DbgVariable(DIVariable V, unsigned I, bool IFV) - : Var(V), FrameIndex(I), InlinedFnVar(IFV) {} + DbgVariable(DIVariable V, unsigned I) + : Var(V), FrameIndex(I), AbstractVar(0), TheDIE(0) {} // Accessors. - DIVariable getVariable() const { return Var; } - unsigned getFrameIndex() const { return FrameIndex; } - bool isInlinedFnVar() const { return InlinedFnVar; } + DIVariable getVariable() const { return Var; } + unsigned getFrameIndex() const { return FrameIndex; } + void setAbstractVariable(DbgVariable *V) { AbstractVar = V; } + DbgVariable *getAbstractVariable() const { return AbstractVar; } + void setDIE(DIE *D) { TheDIE = D; } + DIE *getDIE() const { return TheDIE; } }; //===----------------------------------------------------------------------===// /// DbgScope - This class is used to track scope information. /// -class DbgConcreteScope; class DbgScope { DbgScope *Parent; // Parent to this scope. - DIDescriptor Desc; // Debug info descriptor for scope. - // FIXME use WeakVH for Desc. - WeakVH InlinedAt; // If this scope represents inlined - // function body then this is the location - // where this body is inlined. + DIDescriptor Desc; // Debug info descriptor for scope. + WeakVH InlinedAtLocation; // Location at which scope is inlined. + bool AbstractScope; // Abstract Scope unsigned StartLabelID; // Label ID of the beginning of scope. unsigned EndLabelID; // Label ID of the end of scope. const MachineInstr *LastInsn; // Last instruction of this scope. const MachineInstr *FirstInsn; // First instruction of this scope. SmallVector<DbgScope *, 4> Scopes; // Scopes defined in scope. SmallVector<DbgVariable *, 8> Variables;// Variables declared in scope. - SmallVector<DbgConcreteScope *, 8> ConcreteInsts;// Concrete insts of funcs. // Private state for dump() mutable unsigned IndentLevel; public: DbgScope(DbgScope *P, DIDescriptor D, MDNode *I = 0) - : Parent(P), Desc(D), InlinedAt(I), StartLabelID(0), EndLabelID(0), + : Parent(P), Desc(D), InlinedAtLocation(I), AbstractScope(false), + StartLabelID(0), EndLabelID(0), LastInsn(0), FirstInsn(0), IndentLevel(0) {} virtual ~DbgScope(); // Accessors. DbgScope *getParent() const { return Parent; } + void setParent(DbgScope *P) { Parent = P; } DIDescriptor getDesc() const { return Desc; } - MDNode *getInlinedAt() const { - return dyn_cast_or_null<MDNode>(InlinedAt); + MDNode *getInlinedAt() const { + return dyn_cast_or_null<MDNode>(InlinedAtLocation); } + MDNode *getScopeNode() const { return Desc.getNode(); } unsigned getStartLabelID() const { return StartLabelID; } unsigned getEndLabelID() const { return EndLabelID; } SmallVector<DbgScope *, 4> &getScopes() { return Scopes; } SmallVector<DbgVariable *, 8> &getVariables() { return Variables; } - SmallVector<DbgConcreteScope*,8> &getConcreteInsts() { return ConcreteInsts; } void setStartLabelID(unsigned S) { StartLabelID = S; } void setEndLabelID(unsigned E) { EndLabelID = E; } void setLastInsn(const MachineInstr *MI) { LastInsn = MI; } const MachineInstr *getLastInsn() { return LastInsn; } void setFirstInsn(const MachineInstr *MI) { FirstInsn = MI; } + void setAbstractScope() { AbstractScope = true; } + bool isAbstractScope() const { return AbstractScope; } const MachineInstr *getFirstInsn() { return FirstInsn; } + /// AddScope - Add a scope to the scope. /// void AddScope(DbgScope *S) { Scopes.push_back(S); } @@ -190,10 +195,6 @@ public: /// void AddVariable(DbgVariable *V) { Variables.push_back(V); } - /// AddConcreteInst - Add a concrete instance to the scope. - /// - void AddConcreteInst(DbgConcreteScope *C) { ConcreteInsts.push_back(C); } - void FixInstructionMarkers() { assert (getFirstInsn() && "First instruction is missing!"); if (getLastInsn()) @@ -218,11 +219,15 @@ public: void DbgScope::dump() const { raw_ostream &err = errs(); err.indent(IndentLevel); - Desc.dump(); + MDNode *N = Desc.getNode(); + N->dump(); err << " [" << StartLabelID << ", " << EndLabelID << "]\n"; + if (AbstractScope) + err << "Abstract Scope\n"; IndentLevel += 2; - + if (!Scopes.empty()) + err << "Children ...\n"; for (unsigned i = 0, e = Scopes.size(); i != e; ++i) if (Scopes[i] != this) Scopes[i]->dump(); @@ -235,7 +240,7 @@ void DbgScope::dump() const { /// DbgConcreteScope - This class is used to track a scope that holds concrete /// instance information. /// -class VISIBILITY_HIDDEN DbgConcreteScope : public DbgScope { +class DbgConcreteScope : public DbgScope { CompileUnit *Unit; DIE *Die; // Debug info for this concrete scope. public: @@ -251,8 +256,6 @@ DbgScope::~DbgScope() { delete Scopes[i]; for (unsigned j = 0, M = Variables.size(); j < M; ++j) delete Variables[j]; - for (unsigned k = 0, O = ConcreteInsts.size(); k < O; ++k) - delete ConcreteInsts[k]; } } // end llvm namespace @@ -262,7 +265,7 @@ DwarfDebug::DwarfDebug(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T) AbbreviationsSet(InitAbbreviationsSetSize), Abbreviations(), ValuesSet(InitValuesSetSize), Values(), StringPool(), SectionSourceLines(), didInitial(false), shouldEmit(false), - FunctionDbgScope(0), DebugTimer(0) { + CurrentFnDbgScope(0), DebugTimer(0) { if (TimePassesIsEnabled) DebugTimer = new Timer("Dwarf Debug Writer", getDwarfTimerGroup()); @@ -271,11 +274,6 @@ DwarfDebug::~DwarfDebug() { for (unsigned j = 0, M = Values.size(); j < M; ++j) delete Values[j]; - for (DenseMap<const MDNode *, DbgScope *>::iterator - I = AbstractInstanceRootMap.begin(), - E = AbstractInstanceRootMap.end(); I != E;++I) - delete I->second; - delete DebugTimer; } @@ -1097,6 +1095,10 @@ DIE *DwarfDebug::ConstructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy) { /// CreateGlobalVariableDIE - Create new DIE using GV. DIE *DwarfDebug::CreateGlobalVariableDIE(CompileUnit *DW_Unit, const DIGlobalVariable &GV) { + // If the global variable was optmized out then no need to create debug info entry. + if (!GV.getGlobal()) return NULL; + if (!GV.getDisplayName()) return NULL; + DIE *GVDie = new DIE(dwarf::DW_TAG_variable); AddString(GVDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, GV.getDisplayName()); @@ -1233,9 +1235,6 @@ DIE *DwarfDebug::CreateSubprogramDIE(CompileUnit *DW_Unit, } } - if (!SP.isLocalToUnit() && !IsInlined) - AddUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); - // DW_TAG_inlined_subroutine may refer to this DIE. DIE *&Slot = DW_Unit->getDieMapSlotFor(SP.getNode()); Slot = SPDie; @@ -1283,263 +1282,341 @@ DIE *DwarfDebug::CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) { AddSourceLine(VariableDie, &VD); // Add variable type. - // FIXME: isBlockByrefVariable should be reformulated in terms of complex addresses instead. + // FIXME: isBlockByrefVariable should be reformulated in terms of complex + // addresses instead. if (VD.isBlockByrefVariable()) AddType(Unit, VariableDie, GetBlockByrefType(VD.getType(), Name)); else AddType(Unit, VariableDie, VD.getType()); // Add variable address. - if (!DV->isInlinedFnVar()) { - // Variables for abstract instances of inlined functions don't get a - // location. - MachineLocation Location; - Location.set(RI->getFrameRegister(*MF), - RI->getFrameIndexOffset(*MF, DV->getFrameIndex())); - - - if (VD.hasComplexAddress()) - AddComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location); - else if (VD.isBlockByrefVariable()) - AddBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location); - else - AddAddress(VariableDie, dwarf::DW_AT_location, Location); - } + // Variables for abstract instances of inlined functions don't get a + // location. + MachineLocation Location; + Location.set(RI->getFrameRegister(*MF), + RI->getFrameIndexOffset(*MF, DV->getFrameIndex())); + + + if (VD.hasComplexAddress()) + AddComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location); + else if (VD.isBlockByrefVariable()) + AddBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location); + else + AddAddress(VariableDie, dwarf::DW_AT_location, Location); return VariableDie; } -/// getOrCreateScope - Returns the scope associated with the given descriptor. -/// -DbgScope *DwarfDebug::getDbgScope(MDNode *N, const MachineInstr *MI, - MDNode *InlinedAt) { - ValueMap<MDNode *, DbgScope *>::iterator VI = DbgScopeMap.find(N); - if (VI != DbgScopeMap.end()) - return VI->second; +/// getUpdatedDbgScope - Find or create DbgScope assicated with the instruction. +/// Initialize scope and update scope hierarchy. +DbgScope *DwarfDebug::getUpdatedDbgScope(MDNode *N, const MachineInstr *MI, + MDNode *InlinedAt) { + assert (N && "Invalid Scope encoding!"); + assert (MI && "Missing machine instruction!"); + bool GetConcreteScope = (MI && InlinedAt); - DbgScope *Parent = NULL; + DbgScope *NScope = NULL; + + if (InlinedAt) + NScope = DbgScopeMap.lookup(InlinedAt); + else + NScope = DbgScopeMap.lookup(N); + assert (NScope && "Unable to find working scope!"); + + if (NScope->getFirstInsn()) + return NScope; - if (InlinedAt) { + DbgScope *Parent = NULL; + if (GetConcreteScope) { DILocation IL(InlinedAt); - assert (!IL.isNull() && "Invalid InlindAt location!"); - ValueMap<MDNode *, DbgScope *>::iterator DSI = - DbgScopeMap.find(IL.getScope().getNode()); - assert (DSI != DbgScopeMap.end() && "Unable to find InlineAt scope!"); - Parent = DSI->second; - } else { - DIDescriptor Scope(N); - if (Scope.isCompileUnit()) { - return NULL; - } else if (Scope.isSubprogram()) { - DISubprogram SP(N); - DIDescriptor ParentDesc = SP.getContext(); - if (!ParentDesc.isNull() && !ParentDesc.isCompileUnit()) - Parent = getDbgScope(ParentDesc.getNode(), MI, InlinedAt); - } else if (Scope.isLexicalBlock()) { - DILexicalBlock DB(N); - DIDescriptor ParentDesc = DB.getContext(); - if (!ParentDesc.isNull()) - Parent = getDbgScope(ParentDesc.getNode(), MI, InlinedAt); - } else - assert (0 && "Unexpected scope info"); - } - - DbgScope *NScope = new DbgScope(Parent, DIDescriptor(N), InlinedAt); + Parent = getUpdatedDbgScope(IL.getScope().getNode(), MI, + IL.getOrigLocation().getNode()); + assert (Parent && "Unable to find Parent scope!"); + NScope->setParent(Parent); + Parent->AddScope(NScope); + } else if (DIDescriptor(N).isLexicalBlock()) { + DILexicalBlock DB(N); + if (!DB.getContext().isNull()) { + Parent = getUpdatedDbgScope(DB.getContext().getNode(), MI, InlinedAt); + NScope->setParent(Parent); + Parent->AddScope(NScope); + } + } + NScope->setFirstInsn(MI); - if (Parent) - Parent->AddScope(NScope); - else - // First function is top level function. - if (!FunctionDbgScope) - FunctionDbgScope = NScope; + if (!Parent && !InlinedAt) { + StringRef SPName = DISubprogram(N).getLinkageName(); + if (SPName == MF->getFunction()->getName()) + CurrentFnDbgScope = NScope; + } + + if (GetConcreteScope) { + ConcreteScopes[InlinedAt] = NScope; + getOrCreateAbstractScope(N); + } - DbgScopeMap.insert(std::make_pair(N, NScope)); return NScope; } +DbgScope *DwarfDebug::getOrCreateAbstractScope(MDNode *N) { + assert (N && "Invalid Scope encoding!"); -/// getOrCreateScope - Returns the scope associated with the given descriptor. -/// FIXME - Remove this method. -DbgScope *DwarfDebug::getOrCreateScope(MDNode *N) { - DbgScope *&Slot = DbgScopeMap[N]; - if (Slot) return Slot; - + DbgScope *AScope = AbstractScopes.lookup(N); + if (AScope) + return AScope; + DbgScope *Parent = NULL; - DILexicalBlock Block(N); - // Don't create a new scope if we already created one for an inlined function. - DenseMap<const MDNode *, DbgScope *>::iterator - II = AbstractInstanceRootMap.find(N); - if (II != AbstractInstanceRootMap.end()) - return LexicalScopeStack.back(); - - if (!Block.isNull()) { - DIDescriptor ParentDesc = Block.getContext(); - Parent = - ParentDesc.isNull() ? NULL : getOrCreateScope(ParentDesc.getNode()); + DIDescriptor Scope(N); + if (Scope.isLexicalBlock()) { + DILexicalBlock DB(N); + DIDescriptor ParentDesc = DB.getContext(); + if (!ParentDesc.isNull()) + Parent = getOrCreateAbstractScope(ParentDesc.getNode()); } - Slot = new DbgScope(Parent, DIDescriptor(N)); + AScope = new DbgScope(Parent, DIDescriptor(N), NULL); if (Parent) - Parent->AddScope(Slot); - else - // First function is top level function. - FunctionDbgScope = Slot; + Parent->AddScope(AScope); + AScope->setAbstractScope(); + AbstractScopes[N] = AScope; + if (DIDescriptor(N).isSubprogram()) + AbstractScopesList.push_back(AScope); + return AScope; +} + +static DISubprogram getDISubprogram(MDNode *N) { - return Slot; + DIDescriptor D(N); + if (D.isNull()) + return DISubprogram(); + + if (D.isCompileUnit()) + return DISubprogram(); + + if (D.isSubprogram()) + return DISubprogram(N); + + if (D.isLexicalBlock()) + return getDISubprogram(DILexicalBlock(N).getContext().getNode()); + + llvm_unreachable("Unexpected Descriptor!"); } -/// ConstructDbgScope - Construct the components of a scope. -/// -void DwarfDebug::ConstructDbgScope(DbgScope *ParentScope, - unsigned ParentStartID, - unsigned ParentEndID, - DIE *ParentDie, CompileUnit *Unit) { - // Add variables to scope. - SmallVector<DbgVariable *, 8> &Variables = ParentScope->getVariables(); - for (unsigned i = 0, N = Variables.size(); i < N; ++i) { - DIE *VariableDie = CreateDbgScopeVariable(Variables[i], Unit); - if (VariableDie) ParentDie->AddChild(VariableDie); - } +DIE *DwarfDebug::UpdateSubprogramScopeDIE(MDNode *SPNode) { + + DIE *SPDie = ModuleCU->getDieMapSlotFor(SPNode); + assert (SPDie && "Unable to find subprogram DIE!"); + AddLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, + DWLabel("func_begin", SubprogramCount)); + AddLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, + DWLabel("func_end", SubprogramCount)); + MachineLocation Location(RI->getFrameRegister(*MF)); + AddAddress(SPDie, dwarf::DW_AT_frame_base, Location); + + if (!DISubprogram(SPNode).isLocalToUnit()) + AddUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + + // If there are global variables at this scope then add their dies. + for (SmallVector<WeakVH, 4>::iterator SGI = ScopedGVs.begin(), + SGE = ScopedGVs.end(); SGI != SGE; ++SGI) { + MDNode *N = dyn_cast_or_null<MDNode>(*SGI); + if (!N) continue; + DIGlobalVariable GV(N); + if (GV.getContext().getNode() == SPNode) { + DIE *ScopedGVDie = CreateGlobalVariableDIE(ModuleCU, GV); + if (ScopedGVDie) + SPDie->AddChild(ScopedGVDie); + } + } + return SPDie; +} + +DIE *DwarfDebug::ConstructLexicalScopeDIE(DbgScope *Scope) { + unsigned StartID = MMI->MappedLabel(Scope->getStartLabelID()); + unsigned EndID = MMI->MappedLabel(Scope->getEndLabelID()); + + // Ignore empty scopes. + if (StartID == EndID && StartID != 0) + return NULL; - // Add concrete instances to scope. - SmallVector<DbgConcreteScope *, 8> &ConcreteInsts = - ParentScope->getConcreteInsts(); - for (unsigned i = 0, N = ConcreteInsts.size(); i < N; ++i) { - DbgConcreteScope *ConcreteInst = ConcreteInsts[i]; - DIE *Die = ConcreteInst->getDie(); + DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block); + if (Scope->isAbstractScope()) + return ScopeDIE; - unsigned StartID = ConcreteInst->getStartLabelID(); - unsigned EndID = ConcreteInst->getEndLabelID(); + AddLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, + StartID ? + DWLabel("label", StartID) + : DWLabel("func_begin", SubprogramCount)); + AddLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, + EndID ? + DWLabel("label", EndID) + : DWLabel("func_end", SubprogramCount)); - // Add the scope bounds. - if (StartID) - AddLabel(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, - DWLabel("label", StartID)); - else - AddLabel(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, - DWLabel("func_begin", SubprogramCount)); - if (EndID) - AddLabel(Die, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, - DWLabel("label", EndID)); - else - AddLabel(Die, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, - DWLabel("func_end", SubprogramCount)); - ParentDie->AddChild(Die); - } + return ScopeDIE; +} - // Add nested scopes. - SmallVector<DbgScope *, 4> &Scopes = ParentScope->getScopes(); - for (unsigned j = 0, M = Scopes.size(); j < M; ++j) { - // Define the Scope debug information entry. - DbgScope *Scope = Scopes[j]; +DIE *DwarfDebug::ConstructInlinedScopeDIE(DbgScope *Scope) { + unsigned StartID = MMI->MappedLabel(Scope->getStartLabelID()); + unsigned EndID = MMI->MappedLabel(Scope->getEndLabelID()); + assert (StartID && "Invalid starting label for an inlined scope!"); + assert (EndID && "Invalid end label for an inlined scope!"); + // Ignore empty scopes. + if (StartID == EndID && StartID != 0) + return NULL; - unsigned StartID = MMI->MappedLabel(Scope->getStartLabelID()); - unsigned EndID = MMI->MappedLabel(Scope->getEndLabelID()); + DIScope DS(Scope->getScopeNode()); + if (DS.isNull()) + return NULL; + DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine); - // Ignore empty scopes. - if (StartID == EndID && StartID != 0) continue; + DISubprogram InlinedSP = getDISubprogram(DS.getNode()); + DIE *&OriginDIE = ModuleCU->getDieMapSlotFor(InlinedSP.getNode()); + assert (OriginDIE && "Unable to find Origin DIE!"); + AddDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin, + dwarf::DW_FORM_ref4, OriginDIE); - // Do not ignore inlined scopes even if they don't have any variables or - // scopes. - if (Scope->getScopes().empty() && Scope->getVariables().empty() && - Scope->getConcreteInsts().empty()) - continue; + AddLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, + DWLabel("label", StartID)); + AddLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, + DWLabel("label", EndID)); - if (StartID == ParentStartID && EndID == ParentEndID) { - // Just add stuff to the parent scope. - ConstructDbgScope(Scope, ParentStartID, ParentEndID, ParentDie, Unit); - } else { - DIE *ScopeDie = new DIE(dwarf::DW_TAG_lexical_block); + InlinedSubprogramDIEs.insert(OriginDIE); - // Add the scope bounds. - if (StartID) - AddLabel(ScopeDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, - DWLabel("label", StartID)); - else - AddLabel(ScopeDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, - DWLabel("func_begin", SubprogramCount)); + // Track the start label for this inlined function. + ValueMap<MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator + I = InlineInfo.find(InlinedSP.getNode()); - if (EndID) - AddLabel(ScopeDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, - DWLabel("label", EndID)); - else - AddLabel(ScopeDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, - DWLabel("func_end", SubprogramCount)); + if (I == InlineInfo.end()) { + InlineInfo[InlinedSP.getNode()].push_back(std::make_pair(StartID, ScopeDIE)); + InlinedSPNodes.push_back(InlinedSP.getNode()); + } else + I->second.push_back(std::make_pair(StartID, ScopeDIE)); - // Add the scope's contents. - ConstructDbgScope(Scope, StartID, EndID, ScopeDie, Unit); - ParentDie->AddChild(ScopeDie); - } - } + StringPool.insert(InlinedSP.getName()); + StringPool.insert(InlinedSP.getLinkageName()); + DILocation DL(Scope->getInlinedAt()); + AddUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, ModuleCU->getID()); + AddUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber()); + + return ScopeDIE; } -/// ConstructFunctionDbgScope - Construct the scope for the subprogram. -/// -void DwarfDebug::ConstructFunctionDbgScope(DbgScope *RootScope, - bool AbstractScope) { - // Exit if there is no root scope. - if (!RootScope) return; - DIDescriptor Desc = RootScope->getDesc(); - if (Desc.isNull()) - return; +DIE *DwarfDebug::ConstructVariableDIE(DbgVariable *DV, + DbgScope *Scope, CompileUnit *Unit) { + // Get the descriptor. + const DIVariable &VD = DV->getVariable(); + const char *Name = VD.getName(); + if (!Name) + return NULL; - // Get the subprogram debug information entry. - DISubprogram SPD(Desc.getNode()); - - // Get the subprogram die. - DIE *SPDie = ModuleCU->getDieMapSlotFor(SPD.getNode()); - if (!SPDie) { - ConstructSubprogram(SPD.getNode()); - SPDie = ModuleCU->getDieMapSlotFor(SPD.getNode()); - } - assert(SPDie && "Missing subprogram descriptor"); - - if (!AbstractScope) { - // Add the function bounds. - AddLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, - DWLabel("func_begin", SubprogramCount)); - AddLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, - DWLabel("func_end", SubprogramCount)); - MachineLocation Location(RI->getFrameRegister(*MF)); - AddAddress(SPDie, dwarf::DW_AT_frame_base, Location); - } - - ConstructDbgScope(RootScope, 0, 0, SPDie, ModuleCU); - // If there are global variables at this scope then add their dies. - for (SmallVector<WeakVH, 4>::iterator SGI = ScopedGVs.begin(), - SGE = ScopedGVs.end(); SGI != SGE; ++SGI) { - MDNode *N = dyn_cast_or_null<MDNode>(*SGI); - if (!N) continue; - DIGlobalVariable GV(N); - if (GV.getContext().getNode() == RootScope->getDesc().getNode()) { - DIE *ScopedGVDie = CreateGlobalVariableDIE(ModuleCU, GV); - SPDie->AddChild(ScopedGVDie); - } + // Translate tag to proper Dwarf tag. The result variable is dropped for + // now. + unsigned Tag; + switch (VD.getTag()) { + case dwarf::DW_TAG_return_variable: + return NULL; + case dwarf::DW_TAG_arg_variable: + Tag = dwarf::DW_TAG_formal_parameter; + break; + case dwarf::DW_TAG_auto_variable: // fall thru + default: + Tag = dwarf::DW_TAG_variable; + break; } -} -/// ConstructDefaultDbgScope - Construct a default scope for the subprogram. -/// -void DwarfDebug::ConstructDefaultDbgScope(MachineFunction *MF) { - StringMap<DIE*> &Globals = ModuleCU->getGlobals(); - StringMap<DIE*>::iterator GI = Globals.find(MF->getFunction()->getName()); - if (GI != Globals.end()) { - DIE *SPDie = GI->second; + // Define variable debug information entry. + DIE *VariableDie = new DIE(Tag); - // Add the function bounds. - AddLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, - DWLabel("func_begin", SubprogramCount)); - AddLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, - DWLabel("func_end", SubprogramCount)); - MachineLocation Location(RI->getFrameRegister(*MF)); - AddAddress(SPDie, dwarf::DW_AT_frame_base, Location); + DIE *AbsDIE = NULL; + if (DbgVariable *AV = DV->getAbstractVariable()) + AbsDIE = AV->getDIE(); + + if (AbsDIE) { + DIScope DS(Scope->getScopeNode()); + DISubprogram InlinedSP = getDISubprogram(DS.getNode()); + DIE *&OriginSPDIE = ModuleCU->getDieMapSlotFor(InlinedSP.getNode()); + (void) OriginSPDIE; + assert (OriginSPDIE && "Unable to find Origin DIE for the SP!"); + DIE *AbsDIE = DV->getAbstractVariable()->getDIE(); + assert (AbsDIE && "Unable to find Origin DIE for the Variable!"); + AddDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin, + dwarf::DW_FORM_ref4, AbsDIE); } + else { + AddString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + AddSourceLine(VariableDie, &VD); + + // Add variable type. + // FIXME: isBlockByrefVariable should be reformulated in terms of complex + // addresses instead. + if (VD.isBlockByrefVariable()) + AddType(Unit, VariableDie, GetBlockByrefType(VD.getType(), Name)); + else + AddType(Unit, VariableDie, VD.getType()); + } + + // Add variable address. + if (!Scope->isAbstractScope()) { + MachineLocation Location; + Location.set(RI->getFrameRegister(*MF), + RI->getFrameIndexOffset(*MF, DV->getFrameIndex())); + + + if (VD.hasComplexAddress()) + AddComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location); + else if (VD.isBlockByrefVariable()) + AddBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location); + else + AddAddress(VariableDie, dwarf::DW_AT_location, Location); + } + DV->setDIE(VariableDie); + return VariableDie; + +} +DIE *DwarfDebug::ConstructScopeDIE(DbgScope *Scope) { + if (!Scope) + return NULL; + DIScope DS(Scope->getScopeNode()); + if (DS.isNull()) + return NULL; + + DIE *ScopeDIE = NULL; + if (Scope->getInlinedAt()) + ScopeDIE = ConstructInlinedScopeDIE(Scope); + else if (DS.isSubprogram()) { + if (Scope->isAbstractScope()) + ScopeDIE = ModuleCU->getDieMapSlotFor(DS.getNode()); + else + ScopeDIE = UpdateSubprogramScopeDIE(DS.getNode()); + } + else { + ScopeDIE = ConstructLexicalScopeDIE(Scope); + if (!ScopeDIE) return NULL; + } + + // Add variables to scope. + SmallVector<DbgVariable *, 8> &Variables = Scope->getVariables(); + for (unsigned i = 0, N = Variables.size(); i < N; ++i) { + DIE *VariableDIE = ConstructVariableDIE(Variables[i], Scope, ModuleCU); + if (VariableDIE) + ScopeDIE->AddChild(VariableDIE); + } + + // Add nested scopes. + SmallVector<DbgScope *, 4> &Scopes = Scope->getScopes(); + for (unsigned j = 0, M = Scopes.size(); j < M; ++j) { + // Define the Scope debug information entry. + DIE *NestedDIE = ConstructScopeDIE(Scopes[j]); + if (NestedDIE) + ScopeDIE->AddChild(NestedDIE); + } + return ScopeDIE; } /// GetOrCreateSourceID - Look up the source id with the given directory and @@ -1680,6 +1757,9 @@ void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) { if (TimePassesIsEnabled) DebugTimer->startTimer(); + if (!MAI->doesSupportDebugInformation()) + return; + DebugInfoFinder DbgFinder; DbgFinder.processModule(*M); @@ -1710,7 +1790,7 @@ void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) { ConstructGlobalVariableDIE(*I); } - // Create DIEs for each of the externally visible subprograms. + // Create DIEs for each subprogram. for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(), E = DbgFinder.subprogram_end(); I != E; ++I) ConstructSubprogram(*I); @@ -1754,6 +1834,13 @@ void DwarfDebug::EndModule() { if (TimePassesIsEnabled) DebugTimer->startTimer(); + // Attach DW_AT_inline attribute with inlined subprogram DIEs. + for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(), + AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) { + DIE *ISP = *AI; + AddUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); + } + // Standard sections final addresses. Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getTextSection()); EmitLabel("text_end", 0); @@ -1811,55 +1898,102 @@ void DwarfDebug::EndModule() { DebugTimer->stopTimer(); } +/// findAbstractVariable - Find abstract variable, if any, associated with Var. +DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var, unsigned FrameIdx, + DILocation &ScopeLoc) { + + DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var.getNode()); + if (AbsDbgVariable) + return AbsDbgVariable; + + DbgScope *Scope = AbstractScopes.lookup(ScopeLoc.getScope().getNode()); + if (!Scope) + return NULL; + + AbsDbgVariable = new DbgVariable(Var, FrameIdx); + Scope->AddVariable(AbsDbgVariable); + AbstractVariables[Var.getNode()] = AbsDbgVariable; + return AbsDbgVariable; +} + /// CollectVariableInfo - Populate DbgScope entries with variables' info. void DwarfDebug::CollectVariableInfo() { if (!MMI) return; + MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo(); for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(), VE = VMap.end(); VI != VE; ++VI) { MetadataBase *MB = VI->first; MDNode *Var = dyn_cast_or_null<MDNode>(MB); + if (!Var) continue; DIVariable DV (Var); - if (DV.isNull()) continue; - unsigned VSlot = VI->second; - DbgScope *Scope = NULL; - ValueMap<MDNode *, DbgScope *>::iterator DSI = - DbgScopeMap.find(DV.getContext().getNode()); - if (DSI != DbgScopeMap.end()) - Scope = DSI->second; - else - // There is not any instruction assocated with this scope, so get - // a new scope. - Scope = getDbgScope(DV.getContext().getNode(), - NULL /* Not an instruction */, - NULL /* Not inlined */); - assert (Scope && "Unable to find variable scope!"); - Scope->AddVariable(new DbgVariable(DV, VSlot, false)); - } -} - -/// SetDbgScopeBeginLabels - Update DbgScope begin labels for the scopes that -/// start with this machine instruction. -void DwarfDebug::SetDbgScopeBeginLabels(const MachineInstr *MI, unsigned Label) { + std::pair< unsigned, MDNode *> VP = VI->second; + DILocation ScopeLoc(VP.second); + + DbgScope *Scope = + ConcreteScopes.lookup(ScopeLoc.getOrigLocation().getNode()); + if (!Scope) + Scope = DbgScopeMap.lookup(ScopeLoc.getScope().getNode()); + // If variable scope is not found then skip this variable. + if (!Scope) + continue; + + DbgVariable *RegVar = new DbgVariable(DV, VP.first); + Scope->AddVariable(RegVar); + if (DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.first, ScopeLoc)) + RegVar->setAbstractVariable(AbsDbgVariable); + } +} + +/// BeginScope - Process beginning of a scope starting at Label. +void DwarfDebug::BeginScope(const MachineInstr *MI, unsigned Label) { InsnToDbgScopeMapTy::iterator I = DbgScopeBeginMap.find(MI); if (I == DbgScopeBeginMap.end()) return; - SmallVector<DbgScope *, 2> &SD = I->second; - for (SmallVector<DbgScope *, 2>::iterator SDI = SD.begin(), SDE = SD.end(); + ScopeVector &SD = DbgScopeBeginMap[MI]; + for (ScopeVector::iterator SDI = SD.begin(), SDE = SD.end(); SDI != SDE; ++SDI) (*SDI)->setStartLabelID(Label); } -/// SetDbgScopeEndLabels - Update DbgScope end labels for the scopes that -/// end with this machine instruction. -void DwarfDebug::SetDbgScopeEndLabels(const MachineInstr *MI, unsigned Label) { +/// EndScope - Process end of a scope. +void DwarfDebug::EndScope(const MachineInstr *MI) { InsnToDbgScopeMapTy::iterator I = DbgScopeEndMap.find(MI); if (I == DbgScopeEndMap.end()) return; + + unsigned Label = MMI->NextLabelID(); + Asm->printLabel(Label); + SmallVector<DbgScope *, 2> &SD = I->second; for (SmallVector<DbgScope *, 2>::iterator SDI = SD.begin(), SDE = SD.end(); SDI != SDE; ++SDI) (*SDI)->setEndLabelID(Label); + return; +} + +/// createDbgScope - Create DbgScope for the scope. +void DwarfDebug::createDbgScope(MDNode *Scope, MDNode *InlinedAt) { + + if (!InlinedAt) { + DbgScope *WScope = DbgScopeMap.lookup(Scope); + if (WScope) + return; + WScope = new DbgScope(NULL, DIDescriptor(Scope), NULL); + DbgScopeMap.insert(std::make_pair(Scope, WScope)); + if (DIDescriptor(Scope).isLexicalBlock()) + createDbgScope(DILexicalBlock(Scope).getContext().getNode(), NULL); + return; + } + + DbgScope *WScope = DbgScopeMap.lookup(InlinedAt); + if (WScope) + return; + + WScope = new DbgScope(NULL, DIDescriptor(Scope), InlinedAt); + DbgScopeMap.insert(std::make_pair(InlinedAt, WScope)); + DILocation DL(InlinedAt); + createDbgScope(DL.getScope().getNode(), DL.getOrigLocation().getNode()); } /// ExtractScopeInformation - Scan machine instructions in this function @@ -1870,26 +2004,41 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) { if (!DbgScopeMap.empty()) return false; - // Scan each instruction and create scopes. + // Scan each instruction and create scopes. First build working set of scopes. for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; ++I) { for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); II != IE; ++II) { const MachineInstr *MInsn = II; DebugLoc DL = MInsn->getDebugLoc(); - if (DL.isUnknown()) - continue; + if (DL.isUnknown()) continue; DebugLocTuple DLT = MF->getDebugLocTuple(DL); - if (!DLT.Scope) - continue; + if (!DLT.Scope) continue; // There is no need to create another DIE for compile unit. For all // other scopes, create one DbgScope now. This will be translated // into a scope DIE at the end. - DIDescriptor D(DLT.Scope); - if (!D.isCompileUnit()) { - DbgScope *Scope = getDbgScope(DLT.Scope, MInsn, DLT.InlinedAtLoc); - Scope->setLastInsn(MInsn); - } + if (DIDescriptor(DLT.Scope).isCompileUnit()) continue; + createDbgScope(DLT.Scope, DLT.InlinedAtLoc); + } + } + + + // Build scope hierarchy using working set of scopes. + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); + I != E; ++I) { + for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); + II != IE; ++II) { + const MachineInstr *MInsn = II; + DebugLoc DL = MInsn->getDebugLoc(); + if (DL.isUnknown()) continue; + DebugLocTuple DLT = MF->getDebugLocTuple(DL); + if (!DLT.Scope) continue; + // There is no need to create another DIE for compile unit. For all + // other scopes, create one DbgScope now. This will be translated + // into a scope DIE at the end. + if (DIDescriptor(DLT.Scope).isCompileUnit()) continue; + DbgScope *Scope = getUpdatedDbgScope(DLT.Scope, MInsn, DLT.InlinedAtLoc); + Scope->setLastInsn(MInsn); } } @@ -1897,8 +2046,8 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) { // last instruction as this scope's last instrunction. for (ValueMap<MDNode *, DbgScope *>::iterator DI = DbgScopeMap.begin(), DE = DbgScopeMap.end(); DI != DE; ++DI) { - DbgScope *S = DI->second; - if (!S) continue; + if (DI->second->isAbstractScope()) + continue; assert (DI->second->getFirstInsn() && "Invalid first instruction!"); DI->second->FixInstructionMarkers(); assert (DI->second->getLastInsn() && "Invalid last instruction!"); @@ -1911,7 +2060,8 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) { for (ValueMap<MDNode *, DbgScope *>::iterator DI = DbgScopeMap.begin(), DE = DbgScopeMap.end(); DI != DE; ++DI) { DbgScope *S = DI->second; - if (!S) continue; + if (S->isAbstractScope()) + continue; const MachineInstr *MI = S->getFirstInsn(); assert (MI && "DbgScope does not have first instruction!"); @@ -1919,8 +2069,7 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) { if (IDI != DbgScopeBeginMap.end()) IDI->second.push_back(S); else - DbgScopeBeginMap.insert(std::make_pair(MI, - SmallVector<DbgScope *, 2>(2, S))); + DbgScopeBeginMap[MI].push_back(S); MI = S->getLastInsn(); assert (MI && "DbgScope does not have last instruction!"); @@ -1928,31 +2077,12 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) { if (IDI != DbgScopeEndMap.end()) IDI->second.push_back(S); else - DbgScopeEndMap.insert(std::make_pair(MI, - SmallVector<DbgScope *, 2>(2, S))); + DbgScopeEndMap[MI].push_back(S); } return !DbgScopeMap.empty(); } -static DISubprogram getDISubprogram(MDNode *N) { - - DIDescriptor D(N); - if (D.isNull()) - return DISubprogram(); - - if (D.isCompileUnit()) - return DISubprogram(); - - if (D.isSubprogram()) - return DISubprogram(N); - - if (D.isLexicalBlock()) - return getDISubprogram(DILexicalBlock(N).getContext().getNode()); - - llvm_unreachable("Unexpected Descriptor!"); -} - /// BeginFunction - Gather pre-function debug information. Assumes being /// emitted immediately after the function entry point. void DwarfDebug::BeginFunction(MachineFunction *MF) { @@ -1963,11 +2093,9 @@ void DwarfDebug::BeginFunction(MachineFunction *MF) { if (TimePassesIsEnabled) DebugTimer->startTimer(); -#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN if (!ExtractScopeInformation(MF)) return; CollectVariableInfo(); -#endif // Begin accumulating function debug information. MMI->BeginFunction(MF); @@ -1977,7 +2105,6 @@ void DwarfDebug::BeginFunction(MachineFunction *MF) { // Emit label for the implicitly defined dbg.stoppoint at the start of the // function. -#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN DebugLoc FDL = MF->getDefaultDebugLoc(); if (!FDL.isUnknown()) { DebugLocTuple DLT = MF->getDebugLocTuple(FDL); @@ -1990,15 +2117,6 @@ void DwarfDebug::BeginFunction(MachineFunction *MF) { Asm->printLabel(LabelID); O << '\n'; } -#else - DebugLoc FDL = MF->getDefaultDebugLoc(); - if (!FDL.isUnknown()) { - DebugLocTuple DLT = MF->getDebugLocTuple(FDL); - unsigned LabelID = RecordSourceLine(DLT.Line, DLT.Col, DLT.Scope); - Asm->printLabel(LabelID); - O << '\n'; - } -#endif if (TimePassesIsEnabled) DebugTimer->stopTimer(); } @@ -2011,10 +2129,9 @@ void DwarfDebug::EndFunction(MachineFunction *MF) { if (TimePassesIsEnabled) DebugTimer->startTimer(); -#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN if (DbgScopeMap.empty()) return; -#endif + // Define end label for subprogram. EmitLabel("func_end", SubprogramCount); @@ -2029,41 +2146,24 @@ void DwarfDebug::EndFunction(MachineFunction *MF) { Lines.begin(), Lines.end()); } - // Construct the DbgScope for abstract instances. - for (SmallVector<DbgScope *, 32>::iterator - I = AbstractInstanceRootList.begin(), - E = AbstractInstanceRootList.end(); I != E; ++I) - ConstructFunctionDbgScope(*I); + // Construct abstract scopes. + for (SmallVector<DbgScope *, 4>::iterator AI = AbstractScopesList.begin(), + AE = AbstractScopesList.end(); AI != AE; ++AI) + ConstructScopeDIE(*AI); - // Construct scopes for subprogram. - if (FunctionDbgScope) - ConstructFunctionDbgScope(FunctionDbgScope); - else - // FIXME: This is wrong. We are essentially getting past a problem with - // debug information not being able to handle unreachable blocks that have - // debug information in them. In particular, those unreachable blocks that - // have "region end" info in them. That situation results in the "root - // scope" not being created. If that's the case, then emit a "default" - // scope, i.e., one that encompasses the whole function. This isn't - // desirable. And a better way of handling this (and all of the debugging - // information) needs to be explored. - ConstructDefaultDbgScope(MF); + ConstructScopeDIE(CurrentFnDbgScope); DebugFrames.push_back(FunctionDebugFrameInfo(SubprogramCount, MMI->getFrameMoves())); // Clear debug info - if (FunctionDbgScope) { - delete FunctionDbgScope; + if (CurrentFnDbgScope) { + CurrentFnDbgScope = NULL; DbgScopeMap.clear(); DbgScopeBeginMap.clear(); DbgScopeEndMap.clear(); - DbgAbstractScopeMap.clear(); - DbgConcreteScopeMap.clear(); - FunctionDbgScope = NULL; - LexicalScopeStack.clear(); - AbstractInstanceRootList.clear(); - AbstractInstanceRootMap.clear(); + ConcreteScopes.clear(); + AbstractScopesList.clear(); } Lines.clear(); @@ -2130,201 +2230,6 @@ unsigned DwarfDebug::getOrCreateSourceID(const std::string &DirName, return SrcId; } -/// RecordRegionStart - Indicate the start of a region. -unsigned DwarfDebug::RecordRegionStart(MDNode *N) { - if (TimePassesIsEnabled) - DebugTimer->startTimer(); - - DbgScope *Scope = getOrCreateScope(N); - unsigned ID = MMI->NextLabelID(); - if (!Scope->getStartLabelID()) Scope->setStartLabelID(ID); - LexicalScopeStack.push_back(Scope); - - if (TimePassesIsEnabled) - DebugTimer->stopTimer(); - - return ID; -} - -/// RecordRegionEnd - Indicate the end of a region. -unsigned DwarfDebug::RecordRegionEnd(MDNode *N) { - if (TimePassesIsEnabled) - DebugTimer->startTimer(); - - DbgScope *Scope = getOrCreateScope(N); - unsigned ID = MMI->NextLabelID(); - Scope->setEndLabelID(ID); - // FIXME : region.end() may not be in the last basic block. - // For now, do not pop last lexical scope because next basic - // block may start new inlined function's body. - unsigned LSSize = LexicalScopeStack.size(); - if (LSSize != 0 && LSSize != 1) - LexicalScopeStack.pop_back(); - - if (TimePassesIsEnabled) - DebugTimer->stopTimer(); - - return ID; -} - -/// RecordVariable - Indicate the declaration of a local variable. -void DwarfDebug::RecordVariable(MDNode *N, unsigned FrameIndex) { - if (TimePassesIsEnabled) - DebugTimer->startTimer(); - - DIDescriptor Desc(N); - DbgScope *Scope = NULL; - bool InlinedFnVar = false; - - if (Desc.getTag() == dwarf::DW_TAG_variable) - Scope = getOrCreateScope(DIGlobalVariable(N).getContext().getNode()); - else { - bool InlinedVar = false; - MDNode *Context = DIVariable(N).getContext().getNode(); - DISubprogram SP(Context); - if (!SP.isNull()) { - // SP is inserted into DbgAbstractScopeMap when inlined function - // start was recorded by RecordInlineFnStart. - ValueMap<MDNode *, DbgScope *>::iterator - I = DbgAbstractScopeMap.find(SP.getNode()); - if (I != DbgAbstractScopeMap.end()) { - InlinedVar = true; - Scope = I->second; - } - } - if (!InlinedVar) - Scope = getOrCreateScope(Context); - } - - assert(Scope && "Unable to find the variable's scope"); - DbgVariable *DV = new DbgVariable(DIVariable(N), FrameIndex, InlinedFnVar); - Scope->AddVariable(DV); - - if (TimePassesIsEnabled) - DebugTimer->stopTimer(); -} - -//// RecordInlinedFnStart - Indicate the start of inlined subroutine. -unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU, - unsigned Line, unsigned Col) { - unsigned LabelID = MMI->NextLabelID(); - - if (!MAI->doesDwarfUsesInlineInfoSection()) - return LabelID; - - if (TimePassesIsEnabled) - DebugTimer->startTimer(); - - MDNode *Node = SP.getNode(); - DenseMap<const MDNode *, DbgScope *>::iterator - II = AbstractInstanceRootMap.find(Node); - - if (II == AbstractInstanceRootMap.end()) { - // Create an abstract instance entry for this inlined function if it doesn't - // already exist. - DbgScope *Scope = new DbgScope(NULL, DIDescriptor(Node)); - - // Get the compile unit context. - DIE *SPDie = ModuleCU->getDieMapSlotFor(Node); - if (!SPDie) - SPDie = CreateSubprogramDIE(ModuleCU, SP, false, true); - - // Mark as being inlined. This makes this subprogram entry an abstract - // instance root. - // FIXME: Our debugger doesn't care about the value of DW_AT_inline, only - // that it's defined. That probably won't change in the future. However, - // this could be more elegant. - AddUInt(SPDie, dwarf::DW_AT_inline, 0, dwarf::DW_INL_declared_not_inlined); - - // Keep track of the abstract scope for this function. - DbgAbstractScopeMap[Node] = Scope; - - AbstractInstanceRootMap[Node] = Scope; - AbstractInstanceRootList.push_back(Scope); - } - - // Create a concrete inlined instance for this inlined function. - DbgConcreteScope *ConcreteScope = new DbgConcreteScope(DIDescriptor(Node)); - DIE *ScopeDie = new DIE(dwarf::DW_TAG_inlined_subroutine); - ScopeDie->setAbstractCompileUnit(ModuleCU); - - DIE *Origin = ModuleCU->getDieMapSlotFor(Node); - AddDIEEntry(ScopeDie, dwarf::DW_AT_abstract_origin, - dwarf::DW_FORM_ref4, Origin); - AddUInt(ScopeDie, dwarf::DW_AT_call_file, 0, ModuleCU->getID()); - AddUInt(ScopeDie, dwarf::DW_AT_call_line, 0, Line); - AddUInt(ScopeDie, dwarf::DW_AT_call_column, 0, Col); - - ConcreteScope->setDie(ScopeDie); - ConcreteScope->setStartLabelID(LabelID); - MMI->RecordUsedDbgLabel(LabelID); - - LexicalScopeStack.back()->AddConcreteInst(ConcreteScope); - - // Keep track of the concrete scope that's inlined into this function. - ValueMap<MDNode *, SmallVector<DbgScope *, 8> >::iterator - SI = DbgConcreteScopeMap.find(Node); - - if (SI == DbgConcreteScopeMap.end()) - DbgConcreteScopeMap[Node].push_back(ConcreteScope); - else - SI->second.push_back(ConcreteScope); - - // Track the start label for this inlined function. - ValueMap<MDNode *, SmallVector<unsigned, 4> >::iterator - I = InlineInfo.find(Node); - - if (I == InlineInfo.end()) - InlineInfo[Node].push_back(LabelID); - else - I->second.push_back(LabelID); - - if (TimePassesIsEnabled) - DebugTimer->stopTimer(); - - return LabelID; -} - -/// RecordInlinedFnEnd - Indicate the end of inlined subroutine. -unsigned DwarfDebug::RecordInlinedFnEnd(DISubprogram &SP) { - if (!MAI->doesDwarfUsesInlineInfoSection()) - return 0; - - if (TimePassesIsEnabled) - DebugTimer->startTimer(); - - MDNode *Node = SP.getNode(); - ValueMap<MDNode *, SmallVector<DbgScope *, 8> >::iterator - I = DbgConcreteScopeMap.find(Node); - - if (I == DbgConcreteScopeMap.end()) { - // FIXME: Can this situation actually happen? And if so, should it? - if (TimePassesIsEnabled) - DebugTimer->stopTimer(); - - return 0; - } - - SmallVector<DbgScope *, 8> &Scopes = I->second; - if (Scopes.empty()) { - // Returned ID is 0 if this is unbalanced "end of inlined - // scope". This could happen if optimizer eats dbg intrinsics - // or "beginning of inlined scope" is not recoginized due to - // missing location info. In such cases, ignore this region.end. - return 0; - } - - DbgScope *Scope = Scopes.back(); Scopes.pop_back(); - unsigned ID = MMI->NextLabelID(); - MMI->RecordUsedDbgLabel(ID); - Scope->setEndLabelID(ID); - - if (TimePassesIsEnabled) - DebugTimer->stopTimer(); - - return ID; -} - //===----------------------------------------------------------------------===// // Emit Methods //===----------------------------------------------------------------------===// @@ -2470,10 +2375,7 @@ void DwarfDebug::EmitDIE(DIE *Die) { case dwarf::DW_AT_abstract_origin: { DIEEntry *E = cast<DIEEntry>(Values[i]); DIE *Origin = E->getEntry(); - unsigned Addr = - CompileUnitOffsets[Die->getAbstractCompileUnit()] + - Origin->getOffset(); - + unsigned Addr = Origin->getOffset(); Asm->EmitInt32(Addr); break; } @@ -3002,10 +2904,14 @@ void DwarfDebug::EmitDebugInlineInfo() { Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->EOL("Dwarf Version"); Asm->EmitInt8(TD->getPointerSize()); Asm->EOL("Address Size (in bytes)"); - for (ValueMap<MDNode *, SmallVector<unsigned, 4> >::iterator - I = InlineInfo.begin(), E = InlineInfo.end(); I != E; ++I) { - MDNode *Node = I->first; - SmallVector<unsigned, 4> &Labels = I->second; + for (SmallVector<MDNode *, 4>::iterator I = InlinedSPNodes.begin(), + E = InlinedSPNodes.end(); I != E; ++I) { + +// for (ValueMap<MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator + // I = InlineInfo.begin(), E = InlineInfo.end(); I != E; ++I) { + MDNode *Node = *I; + ValueMap<MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator II = InlineInfo.find(Node); + SmallVector<InlineInfoLabels, 4> &Labels = II->second; DISubprogram SP(Node); const char *LName = SP.getLinkageName(); const char *Name = SP.getName(); @@ -3019,17 +2925,21 @@ void DwarfDebug::EmitDebugInlineInfo() { // __asm__ attribute. if (LName[0] == 1) LName = &LName[1]; - Asm->EmitString(LName); +// Asm->EmitString(LName); + EmitSectionOffset("string", "section_str", + StringPool.idFor(LName), false, true); + } Asm->EOL("MIPS linkage name"); - - Asm->EmitString(Name); Asm->EOL("Function name"); - +// Asm->EmitString(Name); + EmitSectionOffset("string", "section_str", + StringPool.idFor(Name), false, true); + Asm->EOL("Function name"); Asm->EmitULEB128Bytes(Labels.size()); Asm->EOL("Inline count"); - for (SmallVector<unsigned, 4>::iterator LI = Labels.begin(), + for (SmallVector<InlineInfoLabels, 4>::iterator LI = Labels.begin(), LE = Labels.end(); LI != LE; ++LI) { - DIE *SP = ModuleCU->getDieMapSlotFor(Node); + DIE *SP = LI->second; Asm->EmitInt32(SP->getOffset()); Asm->EOL("DIE offset"); if (TD->getPointerSize() == sizeof(int32_t)) @@ -3037,7 +2947,7 @@ void DwarfDebug::EmitDebugInlineInfo() { else O << MAI->getData64bitsDirective(); - PrintLabelName("label", *LI); Asm->EOL("low_pc"); + PrintLabelName("label", LI->first); Asm->EOL("low_pc"); } } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index ddb0a15..646de8f 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -30,9 +30,9 @@ namespace llvm { class CompileUnit; -class DbgVariable; -class DbgScope; class DbgConcreteScope; +class DbgScope; +class DbgVariable; class MachineFrameInfo; class MachineModuleInfo; class MCAsmInfo; @@ -41,7 +41,7 @@ class Timer; //===----------------------------------------------------------------------===// /// SrcLineInfo - This class is used to record source line correspondence. /// -class VISIBILITY_HIDDEN SrcLineInfo { +class SrcLineInfo { unsigned Line; // Source line number. unsigned Column; // Source column. unsigned SourceID; // Source ID number. @@ -57,7 +57,7 @@ public: unsigned getLabelID() const { return LabelID; } }; -class VISIBILITY_HIDDEN DwarfDebug : public Dwarf { +class DwarfDebug : public Dwarf { //===--------------------------------------------------------------------===// // Attributes used to construct specific Dwarf sections. // @@ -134,52 +134,52 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf { /// bool shouldEmit; - // FunctionDbgScope - Top level scope for the current function. + // CurrentFnDbgScope - Top level scope for the current function. // - DbgScope *FunctionDbgScope; + DbgScope *CurrentFnDbgScope; /// DbgScopeMap - Tracks the scopes in the current function. + /// ValueMap<MDNode *, DbgScope *> DbgScopeMap; + /// ConcreteScopes - Tracks the concrete scopees in the current function. + /// These scopes are also included in DbgScopeMap. + ValueMap<MDNode *, DbgScope *> ConcreteScopes; + + /// AbstractScopes - Tracks the abstract scopes a module. These scopes are + /// not included DbgScopeMap. + ValueMap<MDNode *, DbgScope *> AbstractScopes; + SmallVector<DbgScope *, 4>AbstractScopesList; + + /// AbstractVariables - Collection on abstract variables. + ValueMap<MDNode *, DbgVariable *> AbstractVariables; + + /// InliendSubprogramDIEs - Collection of subprgram DIEs that are marked + /// (at the end of the module) as DW_AT_inline. + SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs; + + /// AbstractSubprogramDIEs - Collection of abstruct subprogram DIEs. + SmallPtrSet<DIE *, 4> AbstractSubprogramDIEs; + /// ScopedGVs - Tracks global variables that are not at file scope. /// For example void f() { static int b = 42; } SmallVector<WeakVH, 4> ScopedGVs; - typedef DenseMap<const MachineInstr *, SmallVector<DbgScope *, 2> > + typedef SmallVector<DbgScope *, 2> ScopeVector; + typedef DenseMap<const MachineInstr *, ScopeVector> InsnToDbgScopeMapTy; - /// DbgScopeBeginMap - Maps instruction with a list DbgScopes it starts. + /// DbgScopeBeginMap - Maps instruction with a list of DbgScopes it starts. InsnToDbgScopeMapTy DbgScopeBeginMap; /// DbgScopeEndMap - Maps instruction with a list DbgScopes it ends. InsnToDbgScopeMapTy DbgScopeEndMap; - /// DbgAbstractScopeMap - Tracks abstract instance scopes in the current - /// function. - ValueMap<MDNode *, DbgScope *> DbgAbstractScopeMap; - - /// DbgConcreteScopeMap - Tracks concrete instance scopes in the current - /// function. - ValueMap<MDNode *, - SmallVector<DbgScope *, 8> > DbgConcreteScopeMap; - /// InlineInfo - Keep track of inlined functions and their location. This /// information is used to populate debug_inlined section. - ValueMap<MDNode *, SmallVector<unsigned, 4> > InlineInfo; - - /// AbstractInstanceRootMap - Map of abstract instance roots of inlined - /// functions. These are subroutine entries that contain a DW_AT_inline - /// attribute. - DenseMap<const MDNode *, DbgScope *> AbstractInstanceRootMap; - - /// AbstractInstanceRootList - List of abstract instance roots of inlined - /// functions. These are subroutine entries that contain a DW_AT_inline - /// attribute. - SmallVector<DbgScope *, 32> AbstractInstanceRootList; - - /// LexicalScopeStack - A stack of lexical scopes. The top one is the current - /// scope. - SmallVector<DbgScope *, 16> LexicalScopeStack; + typedef std::pair<unsigned, DIE *> InlineInfoLabels; + ValueMap<MDNode *, SmallVector<InlineInfoLabels, 4> > InlineInfo; + SmallVector<MDNode *, 4> InlinedSPNodes; /// CompileUnitOffsets - A vector of the offsets of the compile units. This is /// used when calculating the "origin" of a concrete instance of an inlined @@ -361,10 +361,24 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf { /// DIE *CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit); - /// getDbgScope - Returns the scope associated with the given descriptor. - /// - DbgScope *getOrCreateScope(MDNode *N); - DbgScope *getDbgScope(MDNode *N, const MachineInstr *MI, MDNode *InlinedAt); + /// getUpdatedDbgScope - Find or create DbgScope assicated with + /// the instruction. Initialize scope and update scope hierarchy. + DbgScope *getUpdatedDbgScope(MDNode *N, const MachineInstr *MI, MDNode *InlinedAt); + + /// createDbgScope - Create DbgScope for the scope. + void createDbgScope(MDNode *Scope, MDNode *InlinedAt); + + DbgScope *getOrCreateAbstractScope(MDNode *N); + + /// findAbstractVariable - Find abstract variable associated with Var. + DbgVariable *findAbstractVariable(DIVariable &Var, unsigned FrameIdx, + DILocation &Loc); + + DIE *UpdateSubprogramScopeDIE(MDNode *SPNode); + DIE *ConstructLexicalScopeDIE(DbgScope *Scope); + DIE *ConstructScopeDIE(DbgScope *Scope); + DIE *ConstructInlinedScopeDIE(DbgScope *Scope); + DIE *ConstructVariableDIE(DbgVariable *DV, DbgScope *S, CompileUnit *Unit); /// ConstructDbgScope - Construct the components of a scope. /// @@ -372,15 +386,6 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf { unsigned ParentStartID, unsigned ParentEndID, DIE *ParentDie, CompileUnit *Unit); - /// ConstructFunctionDbgScope - Construct the scope for the subprogram. - /// - void ConstructFunctionDbgScope(DbgScope *RootScope, - bool AbstractScope = false); - - /// ConstructDefaultDbgScope - Construct a default scope for the subprogram. - /// - void ConstructDefaultDbgScope(MachineFunction *MF); - /// EmitInitial - Emit initial Dwarf declarations. This is necessary for cc /// tools to recognize the object file contains Dwarf information. void EmitInitial(); @@ -535,22 +540,6 @@ public: unsigned getOrCreateSourceID(const std::string &DirName, const std::string &FileName); - /// RecordRegionStart - Indicate the start of a region. - unsigned RecordRegionStart(MDNode *N); - - /// RecordRegionEnd - Indicate the end of a region. - unsigned RecordRegionEnd(MDNode *N); - - /// RecordVariable - Indicate the declaration of a local variable. - void RecordVariable(MDNode *N, unsigned FrameIndex); - - //// RecordInlinedFnStart - Indicate the start of inlined subroutine. - unsigned RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU, - unsigned Line, unsigned Col); - - /// RecordInlinedFnEnd - Indicate the end of inlined subroutine. - unsigned RecordInlinedFnEnd(DISubprogram &SP); - /// ExtractScopeInformation - Scan machine instructions in this function /// and collect DbgScopes. Return true, if atleast one scope was found. bool ExtractScopeInformation(MachineFunction *MF); @@ -558,15 +547,16 @@ public: /// CollectVariableInfo - Populate DbgScope entries with variables' info. void CollectVariableInfo(); - /// SetDbgScopeBeginLabels - Update DbgScope begin labels for the scopes that - /// start with this machine instruction. - void SetDbgScopeBeginLabels(const MachineInstr *MI, unsigned Label); - /// SetDbgScopeEndLabels - Update DbgScope end labels for the scopes that /// end with this machine instruction. void SetDbgScopeEndLabels(const MachineInstr *MI, unsigned Label); -}; + /// BeginScope - Process beginning of a scope starting at Label. + void BeginScope(const MachineInstr *MI, unsigned Label); + + /// EndScope - Prcess end of a scope. + void EndScope(const MachineInstr *MI); +}; } // End of namespace llvm #endif diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp index 6c03b55..1c8b8f4 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -74,6 +74,25 @@ unsigned DwarfException::SizeOfEncodedValue(unsigned Encoding) { return 0; } +/// CreateLabelDiff - Emit a label and subtract it from the expression we +/// already have. This is equivalent to emitting "foo - .", but we have to emit +/// the label for "." directly. +const MCExpr *DwarfException::CreateLabelDiff(const MCExpr *ExprRef, + const char *LabelName, + unsigned Index) { + SmallString<64> Name; + raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() + << LabelName << Asm->getFunctionNumber() + << "_" << Index; + MCSymbol *DotSym = Asm->OutContext.GetOrCreateSymbol(Name.str()); + Asm->OutStreamer.EmitLabel(DotSym); + + return MCBinaryExpr::CreateSub(ExprRef, + MCSymbolRefExpr::Create(DotSym, + Asm->OutContext), + Asm->OutContext); +} + /// EmitCIE - Emit a Common Information Entry (CIE). This holds information that /// is shared among many Frame Description Entries. There is at least one CIE /// in every non-empty .debug_frame section. @@ -176,24 +195,10 @@ void DwarfException::EmitCIE(const Function *PersonalityFn, unsigned Index) { // If there is a personality, we need to indicate the function's location. if (PersonalityRef) { - // If the reference to the personality function symbol is not already - // pc-relative, then we need to subtract our current address from it. Do - // this by emitting a label and subtracting it from the expression we - // already have. This is equivalent to emitting "foo - .", but we have to - // emit the label for "." directly. - if (!IsPersonalityPCRel) { - SmallString<64> Name; - raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() - << "personalityref_addr" << Asm->getFunctionNumber() << "_" << Index; - MCSymbol *DotSym = Asm->OutContext.GetOrCreateSymbol(Name.str()); - Asm->OutStreamer.EmitLabel(DotSym); - - PersonalityRef = - MCBinaryExpr::CreateSub(PersonalityRef, - MCSymbolRefExpr::Create(DotSym,Asm->OutContext), - Asm->OutContext); - } - + if (!IsPersonalityPCRel) + PersonalityRef = CreateLabelDiff(PersonalityRef, "personalityref_addr", + Index); + O << MAI->getData32bitsDirective(); PersonalityRef->print(O, MAI); Asm->EOL("Personality"); @@ -232,11 +237,16 @@ void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) { // corresponding function is static, this should not be externally visible. if (!TheFunc->hasLocalLinkage()) if (const char *GlobalEHDirective = MAI->getGlobalEHDirective()) - O << GlobalEHDirective << EHFrameInfo.FnName << "\n"; + O << GlobalEHDirective << EHFrameInfo.FnName << '\n'; // If corresponding function is weak definition, this should be too. if (TheFunc->isWeakForLinker() && MAI->getWeakDefDirective()) - O << MAI->getWeakDefDirective() << EHFrameInfo.FnName << "\n"; + O << MAI->getWeakDefDirective() << EHFrameInfo.FnName << '\n'; + + // If corresponding function is hidden, this should be too. + if (TheFunc->hasHiddenVisibility()) + if (const char *HiddenDirective = MAI->getHiddenDirective()) + O << HiddenDirective << EHFrameInfo.FnName << '\n' ; // If there are no calls then you can't unwind. This may mean we can omit the // EH Frame, but some environments do not handle weak absolute symbols. If @@ -457,6 +467,39 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, return SizeActions; } +/// CallToNoUnwindFunction - Return `true' if this is a call to a function +/// marked `nounwind'. Return `false' otherwise. +bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) { + assert(MI->getDesc().isCall() && "This should be a call instruction!"); + + bool MarkedNoUnwind = false; + bool SawFunc = false; + + for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { + const MachineOperand &MO = MI->getOperand(I); + + if (MO.isGlobal()) { + if (Function *F = dyn_cast<Function>(MO.getGlobal())) { + if (SawFunc) { + // Be conservative. If we have more than one function operand for this + // call, then we can't make the assumption that it's the callee and + // not a parameter to the call. + // + // FIXME: Determine if there's a way to say that `F' is the callee or + // parameter. + MarkedNoUnwind = false; + break; + } + + MarkedNoUnwind = F->doesNotThrow(); + SawFunc = true; + } + } + } + + return MarkedNoUnwind; +} + /// ComputeCallSiteTable - Compute the call-site table. The entry for an invoke /// has a try-range containing the call, a non-zero landing pad, and an /// appropriate action. The entry for an ordinary call has a try-range @@ -485,7 +528,9 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end(); MI != E; ++MI) { if (!MI->isLabel()) { - SawPotentiallyThrowing |= MI->getDesc().isCall(); + if (MI->getDesc().isCall()) + SawPotentiallyThrowing |= !CallToNoUnwindFunction(MI); + continue; } @@ -497,7 +542,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, SawPotentiallyThrowing = false; // Beginning of a new try-range? - RangeMapType::iterator L = PadMap.find(BeginLabel); + RangeMapType::const_iterator L = PadMap.find(BeginLabel); if (L == PadMap.end()) // Nope, it was just some random label. continue; diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h index f6f5025..aff1665 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/lib/CodeGen/AsmPrinter/DwarfException.h @@ -25,13 +25,14 @@ namespace llvm { struct LandingPadInfo; class MachineModuleInfo; class MCAsmInfo; +class MCExpr; class Timer; class raw_ostream; //===----------------------------------------------------------------------===// /// DwarfException - Emits Dwarf exception handling directives. /// -class VISIBILITY_HIDDEN DwarfException : public Dwarf { +class DwarfException : public Dwarf { struct FunctionEHFrameInfo { std::string FnName; unsigned Number; @@ -155,6 +156,10 @@ class VISIBILITY_HIDDEN DwarfException : public Dwarf { SmallVectorImpl<ActionEntry> &Actions, SmallVectorImpl<unsigned> &FirstActions); + /// CallToNoUnwindFunction - Return `true' if this is a call to a function + /// marked `nounwind'. Return `false' otherwise. + bool CallToNoUnwindFunction(const MachineInstr *MI); + /// ComputeCallSiteTable - Compute the call-site table. The entry for an /// invoke has a try-range containing the call, a non-zero landing pad and an /// appropriate action. The entry for an ordinary call has a try-range @@ -168,6 +173,11 @@ class VISIBILITY_HIDDEN DwarfException : public Dwarf { const SmallVectorImpl<unsigned> &FirstActions); void EmitExceptionTable(); + /// CreateLabelDiff - Emit a label and subtract it from the expression we + /// already have. This is equivalent to emitting "foo - .", but we have to + /// emit the label for "." directly. + const MCExpr *CreateLabelDiff(const MCExpr *ExprRef, const char *LabelName, + unsigned Index); public: //===--------------------------------------------------------------------===// // Main entry points. diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.h b/lib/CodeGen/AsmPrinter/DwarfPrinter.h index 33ebb3b..dedd695 100644 --- a/lib/CodeGen/AsmPrinter/DwarfPrinter.h +++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.h @@ -29,7 +29,7 @@ namespace llvm { class TargetData; class TargetRegisterInfo; - class VISIBILITY_HIDDEN Dwarf { + class Dwarf { protected: //===-------------------------------------------------------------==---===// // Core attributes used by the DWARF printer. diff --git a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp index 0638d35..63ae653 100644 --- a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp @@ -81,47 +81,20 @@ unsigned DwarfWriter::RecordSourceLine(unsigned Line, unsigned Col, return DD->RecordSourceLine(Line, Col, Scope); } -/// RecordRegionStart - Indicate the start of a region. -unsigned DwarfWriter::RecordRegionStart(MDNode *N) { - return DD->RecordRegionStart(N); -} - -/// RecordRegionEnd - Indicate the end of a region. -unsigned DwarfWriter::RecordRegionEnd(MDNode *N) { - return DD->RecordRegionEnd(N); -} - /// getRecordSourceLineCount - Count source lines. unsigned DwarfWriter::getRecordSourceLineCount() { return DD->getRecordSourceLineCount(); } -/// RecordVariable - Indicate the declaration of a local variable. -/// -void DwarfWriter::RecordVariable(MDNode *N, unsigned FrameIndex) { - DD->RecordVariable(N, FrameIndex); -} - /// ShouldEmitDwarfDebug - Returns true if Dwarf debugging declarations should /// be emitted. bool DwarfWriter::ShouldEmitDwarfDebug() const { return DD && DD->ShouldEmitDwarfDebug(); } -//// RecordInlinedFnStart -unsigned DwarfWriter::RecordInlinedFnStart(DISubprogram SP, DICompileUnit CU, - unsigned Line, unsigned Col) { - return DD->RecordInlinedFnStart(SP, CU, Line, Col); -} - -/// RecordInlinedFnEnd - Indicate the end of inlined subroutine. -unsigned DwarfWriter::RecordInlinedFnEnd(DISubprogram SP) { - return DD->RecordInlinedFnEnd(SP); -} - -void DwarfWriter::SetDbgScopeBeginLabels(const MachineInstr *MI, unsigned L) { - DD->SetDbgScopeEndLabels(MI, L); +void DwarfWriter::BeginScope(const MachineInstr *MI, unsigned L) { + DD->BeginScope(MI, L); } -void DwarfWriter::SetDbgScopeEndLabels(const MachineInstr *MI, unsigned L) { - DD->SetDbgScopeBeginLabels(MI, L); +void DwarfWriter::EndScope(const MachineInstr *MI) { + DD->EndScope(MI); } diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index baea964..94bfb72 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -32,6 +32,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include <algorithm> @@ -40,18 +41,38 @@ using namespace llvm; STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); STATISTIC(NumBranchOpts, "Number of branches optimized"); STATISTIC(NumTailMerge , "Number of block tails merged"); -static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge", +static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge", cl::init(cl::BOU_UNSET), cl::Hidden); // Throttle for huge numbers of predecessors (compile speed problems) static cl::opt<unsigned> -TailMergeThreshold("tail-merge-threshold", +TailMergeThreshold("tail-merge-threshold", cl::desc("Max number of predecessors to consider tail merging"), cl::init(150), cl::Hidden); +// Heuristic for tail merging (and, inversely, tail duplication). +// TODO: This should be replaced with a target query. +static cl::opt<unsigned> +TailMergeSize("tail-merge-size", + cl::desc("Min number of instructions to consider tail merging"), + cl::init(3), cl::Hidden); + +namespace { + /// BranchFolderPass - Wrap branch folder in a machine function pass. + class BranchFolderPass : public MachineFunctionPass, + public BranchFolder { + public: + static char ID; + explicit BranchFolderPass(bool defaultEnableTailMerge) + : MachineFunctionPass(&ID), BranchFolder(defaultEnableTailMerge) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + virtual const char *getPassName() const { return "Control Flow Optimizer"; } + }; +} char BranchFolderPass::ID = 0; -FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) { +FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) { return new BranchFolderPass(DefaultEnableTailMerge); } @@ -63,7 +84,6 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { } - BranchFolder::BranchFolder(bool defaultEnableTailMerge) { switch (FlagEnableTailMerge) { case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break; @@ -77,12 +97,12 @@ BranchFolder::BranchFolder(bool defaultEnableTailMerge) { void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) { assert(MBB->pred_empty() && "MBB must be dead!"); DEBUG(errs() << "\nRemoving MBB: " << *MBB); - + MachineFunction *MF = MBB->getParent(); // drop all successors. while (!MBB->succ_empty()) MBB->removeSuccessor(MBB->succ_end()-1); - + // If there are any labels in the basic block, unregister them from // MachineModuleInfo. if (MMI && !MBB->empty()) { @@ -93,7 +113,7 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) { MMI->InvalidateLabel(I->getOperand(0).getImm()); } } - + // Remove the block. MF->erase(MBB); } @@ -182,6 +202,11 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, MadeChange |= MadeChangeThisIteration; } + // Do tail duplication once after tail merging is done. Otherwise it is + // tough to avoid situations where tail duplication and tail merging undo + // each other's transformations ad infinitum. + MadeChange |= TailDuplicateBlocks(MF); + // See if any jump tables have become mergable or dead as the code generator // did its thing. MachineJumpTableInfo *JTI = MF.getJumpTableInfo(); @@ -190,7 +215,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, // Figure out how these jump tables should be merged. std::vector<unsigned> JTMapping; JTMapping.reserve(JTs.size()); - + // We always keep the 0th jump table. JTMapping.push_back(0); @@ -202,7 +227,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, else JTMapping.push_back(JTI->getJumpTableIndex(JTs[i].MBBs)); } - + // If a jump table was merge with another one, walk the function rewriting // references to jump tables to reference the new JT ID's. Keep track of // whether we see a jump table idx, if not, we can delete the JT. @@ -221,7 +246,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, JTIsLive.set(NewIdx); } } - + // Finally, remove dead jump tables. This happens either because the // indirect jump was unreachable (and thus deleted) or because the jump // table was merged with some other one. @@ -245,7 +270,7 @@ static unsigned HashMachineInstr(const MachineInstr *MI) { unsigned Hash = MI->getOpcode(); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &Op = MI->getOperand(i); - + // Merge in bits from the operand if easy. unsigned OperandHash = 0; switch (Op.getType()) { @@ -267,31 +292,30 @@ static unsigned HashMachineInstr(const MachineInstr *MI) { break; default: break; } - + Hash += ((OperandHash << 3) | Op.getType()) << (i&31); } return Hash; } /// HashEndOfMBB - Hash the last few instructions in the MBB. For blocks -/// with no successors, we hash two instructions, because cross-jumping -/// only saves code when at least two instructions are removed (since a +/// with no successors, we hash two instructions, because cross-jumping +/// only saves code when at least two instructions are removed (since a /// branch must be inserted). For blocks with a successor, one of the /// two blocks to be tail-merged will end with a branch already, so /// it gains to cross-jump even for one instruction. - static unsigned HashEndOfMBB(const MachineBasicBlock *MBB, unsigned minCommonTailLength) { MachineBasicBlock::const_iterator I = MBB->end(); if (I == MBB->begin()) return 0; // Empty MBB. - + --I; unsigned Hash = HashMachineInstr(I); - + if (I == MBB->begin() || minCommonTailLength == 1) return Hash; // Single instr MBB. - + --I; // Hash in the second-to-last instruction. Hash ^= HashMachineInstr(I) << 2; @@ -307,11 +331,11 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, MachineBasicBlock::iterator &I2) { I1 = MBB1->end(); I2 = MBB2->end(); - + unsigned TailLen = 0; while (I1 != MBB1->begin() && I2 != MBB2->begin()) { --I1; --I2; - if (!I1->isIdenticalTo(I2) || + if (!I1->isIdenticalTo(I2) || // FIXME: This check is dubious. It's used to get around a problem where // people incorrectly expect inline asm directives to remain in the same // relative order. This is untenable because normal compiler @@ -332,11 +356,11 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, MachineBasicBlock *NewDest) { MachineBasicBlock *OldBB = OldInst->getParent(); - + // Remove all the old successors of OldBB from the CFG. while (!OldBB->succ_empty()) OldBB->removeSuccessor(OldBB->succ_begin()); - + // Remove all the dead instructions from the end of OldBB. OldBB->erase(OldInst, OldBB->end()); @@ -361,10 +385,10 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, // Move all the successors of this block to the specified block. NewMBB->transferSuccessors(&CurMBB); - + // Add an edge from CurMBB to NewMBB for the fall-through. CurMBB.addSuccessor(NewMBB); - + // Splice the code over. NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end()); @@ -375,7 +399,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, RS->forward(prior(CurMBB.end())); BitVector RegsLiveAtExit(TRI->getNumRegs()); RS->getRegsUsed(RegsLiveAtExit, false); - for (unsigned int i=0, e=TRI->getNumRegs(); i!=e; i++) + for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++) if (RegsLiveAtExit[i]) NewMBB->addLiveIn(i); } @@ -404,8 +428,7 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I, // branches temporarily for tail merging). In the case where CurMBB ends // with a conditional branch to the next block, optimize by reversing the // test and conditionally branching to SuccMBB instead. - -static void FixTail(MachineBasicBlock* CurMBB, MachineBasicBlock *SuccBB, +static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB, const TargetInstrInfo *TII) { MachineFunction *MF = CurMBB->getParent(); MachineFunction::iterator I = next(MachineFunction::iterator(CurMBB)); @@ -425,24 +448,43 @@ static void FixTail(MachineBasicBlock* CurMBB, MachineBasicBlock *SuccBB, TII->InsertBranch(*CurMBB, SuccBB, NULL, SmallVector<MachineOperand, 0>()); } -static bool MergeCompare(const std::pair<unsigned,MachineBasicBlock*> &p, - const std::pair<unsigned,MachineBasicBlock*> &q) { - if (p.first < q.first) - return true; - else if (p.first > q.first) - return false; - else if (p.second->getNumber() < q.second->getNumber()) - return true; - else if (p.second->getNumber() > q.second->getNumber()) - return false; - else { - // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing - // an object with itself. +bool +BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const { + if (getHash() < o.getHash()) + return true; + else if (getHash() > o.getHash()) + return false; + else if (getBlock()->getNumber() < o.getBlock()->getNumber()) + return true; + else if (getBlock()->getNumber() > o.getBlock()->getNumber()) + return false; + else { + // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing + // an object with itself. #ifndef _GLIBCXX_DEBUG - llvm_unreachable("Predecessor appears twice"); + llvm_unreachable("Predecessor appears twice"); #endif - return false; + return false; + } +} + +/// CountTerminators - Count the number of terminators in the given +/// block and set I to the position of the first non-terminator, if there +/// is one, or MBB->end() otherwise. +static unsigned CountTerminators(MachineBasicBlock *MBB, + MachineBasicBlock::iterator &I) { + I = MBB->end(); + unsigned NumTerms = 0; + for (;;) { + if (I == MBB->begin()) { + I = MBB->end(); + break; } + --I; + if (!I->getDesc().isTerminator()) break; + ++NumTerms; + } + return NumTerms; } /// ProfitableToMerge - Check if two machine basic blocks have a common tail @@ -454,21 +496,52 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1, unsigned minCommonTailLength, unsigned &CommonTailLen, MachineBasicBlock::iterator &I1, - MachineBasicBlock::iterator &I2) { + MachineBasicBlock::iterator &I2, + MachineBasicBlock *SuccBB, + MachineBasicBlock *PredBB) { CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2); MachineFunction *MF = MBB1->getParent(); - if (CommonTailLen >= minCommonTailLength) - return true; - if (CommonTailLen == 0) return false; - // If we are optimizing for code size, 1 instruction in common is enough if - // we don't have to split a block. At worst we will be replacing a - // fallthrough into the common tail with a branch, which at worst breaks - // even with falling through into the duplicated common tail. - if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) && + // It's almost always profitable to merge any number of non-terminator + // instructions with the block that falls through into the common successor. + if (MBB1 == PredBB || MBB2 == PredBB) { + MachineBasicBlock::iterator I; + unsigned NumTerms = CountTerminators(MBB1 == PredBB ? MBB2 : MBB1, I); + if (CommonTailLen > NumTerms) + return true; + } + + // If one of the blocks can be completely merged and happens to be in + // a position where the other could fall through into it, merge any number + // of instructions, because it can be done without a branch. + // TODO: If the blocks are not adjacent, move one of them so that they are? + if (MBB1->isLayoutSuccessor(MBB2) && I2 == MBB2->begin()) + return true; + if (MBB2->isLayoutSuccessor(MBB1) && I1 == MBB1->begin()) + return true; + + // If both blocks have an unconditional branch temporarily stripped out, + // count that as an additional common instruction for the following + // heuristics. + unsigned EffectiveTailLen = CommonTailLen; + if (SuccBB && MBB1 != PredBB && MBB2 != PredBB && + !MBB1->back().getDesc().isBarrier() && + !MBB2->back().getDesc().isBarrier()) + ++EffectiveTailLen; + + // Check if the common tail is long enough to be worthwhile. + if (EffectiveTailLen >= minCommonTailLength) + return true; + + // If we are optimizing for code size, 2 instructions in common is enough if + // we don't have to split a block. At worst we will be introducing 1 new + // branch instruction, which is likely to be smaller than the 2 + // instructions that would be deleted in the merge. + if (EffectiveTailLen >= 2 && + MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) && (I1 == MBB1->begin() || I2 == MBB2->begin())) return true; @@ -476,40 +549,44 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1, } /// ComputeSameTails - Look through all the blocks in MergePotentials that have -/// hash CurHash (guaranteed to match the last element). Build the vector +/// hash CurHash (guaranteed to match the last element). Build the vector /// SameTails of all those that have the (same) largest number of instructions /// in common of any pair of these blocks. SameTails entries contain an -/// iterator into MergePotentials (from which the MachineBasicBlock can be -/// found) and a MachineBasicBlock::iterator into that MBB indicating the +/// iterator into MergePotentials (from which the MachineBasicBlock can be +/// found) and a MachineBasicBlock::iterator into that MBB indicating the /// instruction where the matching code sequence begins. /// Order of elements in SameTails is the reverse of the order in which /// those blocks appear in MergePotentials (where they are not necessarily /// consecutive). -unsigned BranchFolder::ComputeSameTails(unsigned CurHash, - unsigned minCommonTailLength) { +unsigned BranchFolder::ComputeSameTails(unsigned CurHash, + unsigned minCommonTailLength, + MachineBasicBlock *SuccBB, + MachineBasicBlock *PredBB) { unsigned maxCommonTailLength = 0U; SameTails.clear(); MachineBasicBlock::iterator TrialBBI1, TrialBBI2; MPIterator HighestMPIter = prior(MergePotentials.end()); for (MPIterator CurMPIter = prior(MergePotentials.end()), - B = MergePotentials.begin(); - CurMPIter!=B && CurMPIter->first==CurHash; + B = MergePotentials.begin(); + CurMPIter != B && CurMPIter->getHash() == CurHash; --CurMPIter) { - for (MPIterator I = prior(CurMPIter); I->first==CurHash ; --I) { + for (MPIterator I = prior(CurMPIter); I->getHash() == CurHash ; --I) { unsigned CommonTailLen; - if (ProfitableToMerge(CurMPIter->second, I->second, minCommonTailLength, - CommonTailLen, TrialBBI1, TrialBBI2)) { + if (ProfitableToMerge(CurMPIter->getBlock(), I->getBlock(), + minCommonTailLength, + CommonTailLen, TrialBBI1, TrialBBI2, + SuccBB, PredBB)) { if (CommonTailLen > maxCommonTailLength) { SameTails.clear(); maxCommonTailLength = CommonTailLen; HighestMPIter = CurMPIter; - SameTails.push_back(std::make_pair(CurMPIter, TrialBBI1)); + SameTails.push_back(SameTailElt(CurMPIter, TrialBBI1)); } if (HighestMPIter == CurMPIter && CommonTailLen == maxCommonTailLength) - SameTails.push_back(std::make_pair(I, TrialBBI2)); + SameTails.push_back(SameTailElt(I, TrialBBI2)); } - if (I==B) + if (I == B) break; } } @@ -518,21 +595,21 @@ unsigned BranchFolder::ComputeSameTails(unsigned CurHash, /// RemoveBlocksWithHash - Remove all blocks with hash CurHash from /// MergePotentials, restoring branches at ends of blocks as appropriate. -void BranchFolder::RemoveBlocksWithHash(unsigned CurHash, - MachineBasicBlock* SuccBB, - MachineBasicBlock* PredBB) { +void BranchFolder::RemoveBlocksWithHash(unsigned CurHash, + MachineBasicBlock *SuccBB, + MachineBasicBlock *PredBB) { MPIterator CurMPIter, B; - for (CurMPIter = prior(MergePotentials.end()), B = MergePotentials.begin(); - CurMPIter->first==CurHash; + for (CurMPIter = prior(MergePotentials.end()), B = MergePotentials.begin(); + CurMPIter->getHash() == CurHash; --CurMPIter) { // Put the unconditional branch back, if we need one. - MachineBasicBlock *CurMBB = CurMPIter->second; + MachineBasicBlock *CurMBB = CurMPIter->getBlock(); if (SuccBB && CurMBB != PredBB) FixTail(CurMBB, SuccBB, TII); - if (CurMPIter==B) + if (CurMPIter == B) break; } - if (CurMPIter->first!=CurHash) + if (CurMPIter->getHash() != CurHash) CurMPIter++; MergePotentials.erase(CurMPIter, MergePotentials.end()); } @@ -541,35 +618,37 @@ void BranchFolder::RemoveBlocksWithHash(unsigned CurHash, /// only of the common tail. Create a block that does by splitting one. unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, unsigned maxCommonTailLength) { - unsigned i, commonTailIndex; + unsigned commonTailIndex = 0; unsigned TimeEstimate = ~0U; - for (i=0, commonTailIndex=0; i<SameTails.size(); i++) { + for (unsigned i = 0, e = SameTails.size(); i != e; ++i) { // Use PredBB if possible; that doesn't require a new branch. - if (SameTails[i].first->second==PredBB) { + if (SameTails[i].getBlock() == PredBB) { commonTailIndex = i; break; } // Otherwise, make a (fairly bogus) choice based on estimate of // how long it will take the various blocks to execute. - unsigned t = EstimateRuntime(SameTails[i].first->second->begin(), - SameTails[i].second); - if (t<=TimeEstimate) { + unsigned t = EstimateRuntime(SameTails[i].getBlock()->begin(), + SameTails[i].getTailStartPos()); + if (t <= TimeEstimate) { TimeEstimate = t; commonTailIndex = i; } } - MachineBasicBlock::iterator BBI = SameTails[commonTailIndex].second; - MachineBasicBlock *MBB = SameTails[commonTailIndex].first->second; + MachineBasicBlock::iterator BBI = + SameTails[commonTailIndex].getTailStartPos(); + MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock(); - DEBUG(errs() << "\nSplitting " << MBB->getNumber() << ", size " + DEBUG(errs() << "\nSplitting BB#" << MBB->getNumber() << ", size " << maxCommonTailLength); MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI); - SameTails[commonTailIndex].first->second = newMBB; - SameTails[commonTailIndex].second = newMBB->begin(); + SameTails[commonTailIndex].setBlock(newMBB); + SameTails[commonTailIndex].setTailStartPos(newMBB->begin()); + // If we split PredBB, newMBB is the new predecessor. - if (PredBB==MBB) + if (PredBB == MBB) PredBB = newMBB; return commonTailIndex; @@ -579,35 +658,49 @@ unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, // successor, or all have no successor) can be tail-merged. If there is a // successor, any blocks in MergePotentials that are not tail-merged and // are not immediately before Succ must have an unconditional branch to -// Succ added (but the predecessor/successor lists need no adjustment). +// Succ added (but the predecessor/successor lists need no adjustment). // The lone predecessor of Succ that falls through into Succ, // if any, is given in PredBB. -bool BranchFolder::TryMergeBlocks(MachineBasicBlock *SuccBB, - MachineBasicBlock* PredBB) { +bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, + MachineBasicBlock *PredBB) { bool MadeChange = false; - // It doesn't make sense to save a single instruction since tail merging - // will add a jump. - // FIXME: Ask the target to provide the threshold? - unsigned minCommonTailLength = (SuccBB ? 1 : 2) + 1; - - DEBUG(errs() << "\nTryMergeBlocks " << MergePotentials.size() << '\n'); + // Except for the special cases below, tail-merge if there are at least + // this many instructions in common. + unsigned minCommonTailLength = TailMergeSize; + + DEBUG(errs() << "\nTryTailMergeBlocks: "; + for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) + errs() << "BB#" << MergePotentials[i].getBlock()->getNumber() + << (i == e-1 ? "" : ", "); + errs() << "\n"; + if (SuccBB) { + errs() << " with successor BB#" << SuccBB->getNumber() << '\n'; + if (PredBB) + errs() << " which has fall-through from BB#" + << PredBB->getNumber() << "\n"; + } + errs() << "Looking for common tails of at least " + << minCommonTailLength << " instruction" + << (minCommonTailLength == 1 ? "" : "s") << '\n'; + ); // Sort by hash value so that blocks with identical end sequences sort // together. - std::stable_sort(MergePotentials.begin(), MergePotentials.end(),MergeCompare); + std::stable_sort(MergePotentials.begin(), MergePotentials.end()); // Walk through equivalence sets looking for actual exact matches. while (MergePotentials.size() > 1) { - unsigned CurHash = prior(MergePotentials.end())->first; - + unsigned CurHash = MergePotentials.back().getHash(); + // Build SameTails, identifying the set of blocks with this hash code // and with the maximum number of instructions in common. - unsigned maxCommonTailLength = ComputeSameTails(CurHash, - minCommonTailLength); + unsigned maxCommonTailLength = ComputeSameTails(CurHash, + minCommonTailLength, + SuccBB, PredBB); - // If we didn't find any pair that has at least minCommonTailLength + // If we didn't find any pair that has at least minCommonTailLength // instructions in common, remove all blocks with this hash code and retry. if (SameTails.empty()) { RemoveBlocksWithHash(CurHash, SuccBB, PredBB); @@ -618,36 +711,58 @@ bool BranchFolder::TryMergeBlocks(MachineBasicBlock *SuccBB, // block, which we can't jump to), we can treat all blocks with this same // tail at once. Use PredBB if that is one of the possibilities, as that // will not introduce any extra branches. - MachineBasicBlock *EntryBB = MergePotentials.begin()->second-> - getParent()->begin(); - unsigned int commonTailIndex, i; - for (commonTailIndex=SameTails.size(), i=0; i<SameTails.size(); i++) { - MachineBasicBlock *MBB = SameTails[i].first->second; - if (MBB->begin() == SameTails[i].second && MBB != EntryBB) { - commonTailIndex = i; - if (MBB==PredBB) + MachineBasicBlock *EntryBB = MergePotentials.begin()->getBlock()-> + getParent()->begin(); + unsigned commonTailIndex = SameTails.size(); + // If there are two blocks, check to see if one can be made to fall through + // into the other. + if (SameTails.size() == 2 && + SameTails[0].getBlock()->isLayoutSuccessor(SameTails[1].getBlock()) && + SameTails[1].tailIsWholeBlock()) + commonTailIndex = 1; + else if (SameTails.size() == 2 && + SameTails[1].getBlock()->isLayoutSuccessor( + SameTails[0].getBlock()) && + SameTails[0].tailIsWholeBlock()) + commonTailIndex = 0; + else { + // Otherwise just pick one, favoring the fall-through predecessor if + // there is one. + for (unsigned i = 0, e = SameTails.size(); i != e; ++i) { + MachineBasicBlock *MBB = SameTails[i].getBlock(); + if (MBB == EntryBB && SameTails[i].tailIsWholeBlock()) + continue; + if (MBB == PredBB) { + commonTailIndex = i; break; + } + if (SameTails[i].tailIsWholeBlock()) + commonTailIndex = i; } } - if (commonTailIndex==SameTails.size()) { + if (commonTailIndex == SameTails.size() || + (SameTails[commonTailIndex].getBlock() == PredBB && + !SameTails[commonTailIndex].tailIsWholeBlock())) { // None of the blocks consist entirely of the common tail. // Split a block so that one does. - commonTailIndex = CreateCommonTailOnlyBlock(PredBB, maxCommonTailLength); + commonTailIndex = CreateCommonTailOnlyBlock(PredBB, maxCommonTailLength); } - MachineBasicBlock *MBB = SameTails[commonTailIndex].first->second; + MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock(); // MBB is common tail. Adjust all other BB's to jump to this one. // Traversal must be forwards so erases work. - DEBUG(errs() << "\nUsing common tail " << MBB->getNumber() << " for "); - for (unsigned int i=0; i<SameTails.size(); ++i) { - if (commonTailIndex==i) + DEBUG(errs() << "\nUsing common tail in BB#" << MBB->getNumber() + << " for "); + for (unsigned int i=0, e = SameTails.size(); i != e; ++i) { + if (commonTailIndex == i) continue; - DEBUG(errs() << SameTails[i].first->second->getNumber() << ","); + DEBUG(errs() << "BB#" << SameTails[i].getBlock()->getNumber() + << (i == e-1 ? "" : ", ")); // Hack the end off BB i, making it jump to BB commonTailIndex instead. - ReplaceTailWithBranchTo(SameTails[i].second, MBB); + ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB); // BB i is no longer a predecessor of SuccBB; remove it from the worklist. - MergePotentials.erase(SameTails[i].first); + MergePotentials.erase(SameTails[i].getMPIter()); } DEBUG(errs() << "\n"); // We leave commonTailIndex in the worklist in case there are other blocks @@ -660,26 +775,27 @@ bool BranchFolder::TryMergeBlocks(MachineBasicBlock *SuccBB, bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { if (!EnableTailMerge) return false; - + bool MadeChange = false; // First find blocks with no successors. MergePotentials.clear(); for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { if (I->succ_empty()) - MergePotentials.push_back(std::make_pair(HashEndOfMBB(I, 2U), I)); + MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I, 2U), I)); } + // See if we can do any tail merging on those. if (MergePotentials.size() < TailMergeThreshold && MergePotentials.size() >= 2) - MadeChange |= TryMergeBlocks(NULL, NULL); + MadeChange |= TryTailMergeBlocks(NULL, NULL); // Look at blocks (IBB) with multiple predecessors (PBB). // We change each predecessor to a canonical form, by // (1) temporarily removing any unconditional branch from the predecessor // to IBB, and // (2) alter conditional branches so they branch to the other block - // not IBB; this may require adding back an unconditional branch to IBB + // not IBB; this may require adding back an unconditional branch to IBB // later, where there wasn't one coming in. E.g. // Bcc IBB // fallthrough to QBB @@ -693,18 +809,19 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // a compile-time infinite loop repeatedly doing and undoing the same // transformations.) - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + for (MachineFunction::iterator I = next(MF.begin()), E = MF.end(); + I != E; ++I) { if (I->pred_size() >= 2 && I->pred_size() < TailMergeThreshold) { SmallPtrSet<MachineBasicBlock *, 8> UniquePreds; MachineBasicBlock *IBB = I; MachineBasicBlock *PredBB = prior(I); MergePotentials.clear(); - for (MachineBasicBlock::pred_iterator P = I->pred_begin(), + for (MachineBasicBlock::pred_iterator P = I->pred_begin(), E2 = I->pred_end(); P != E2; ++P) { - MachineBasicBlock* PBB = *P; + MachineBasicBlock *PBB = *P; // Skip blocks that loop to themselves, can't tail merge these. - if (PBB==IBB) + if (PBB == IBB) continue; // Visit each predecessor only once. if (!UniquePreds.insert(PBB)) @@ -715,7 +832,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // Failing case: IBB is the target of a cbr, and // we cannot reverse the branch. SmallVector<MachineOperand, 4> NewCond(Cond); - if (!Cond.empty() && TBB==IBB) { + if (!Cond.empty() && TBB == IBB) { if (TII->ReverseBranchCondition(NewCond)) continue; // This is the QBB case described above @@ -727,20 +844,20 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // to have a bit in the edge so we didn't have to do all this. if (IBB->isLandingPad()) { MachineFunction::iterator IP = PBB; IP++; - MachineBasicBlock* PredNextBB = NULL; - if (IP!=MF.end()) + MachineBasicBlock *PredNextBB = NULL; + if (IP != MF.end()) PredNextBB = IP; - if (TBB==NULL) { - if (IBB!=PredNextBB) // fallthrough + if (TBB == NULL) { + if (IBB != PredNextBB) // fallthrough continue; } else if (FBB) { - if (TBB!=IBB && FBB!=IBB) // cbr then ubr + if (TBB != IBB && FBB != IBB) // cbr then ubr continue; } else if (Cond.empty()) { - if (TBB!=IBB) // ubr + if (TBB != IBB) // ubr continue; } else { - if (TBB!=IBB && IBB!=PredNextBB) // cbr + if (TBB != IBB && IBB != PredNextBB) // cbr continue; } } @@ -749,19 +866,20 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { TII->RemoveBranch(*PBB); if (!Cond.empty()) // reinsert conditional branch only, for now - TII->InsertBranch(*PBB, (TBB==IBB) ? FBB : TBB, 0, NewCond); + TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond); } - MergePotentials.push_back(std::make_pair(HashEndOfMBB(PBB, 1U), *P)); + MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB, 1U), + *P)); } } - if (MergePotentials.size() >= 2) - MadeChange |= TryMergeBlocks(I, PredBB); - // Reinsert an unconditional branch if needed. - // The 1 below can occur as a result of removing blocks in TryMergeBlocks. - PredBB = prior(I); // this may have been changed in TryMergeBlocks - if (MergePotentials.size()==1 && - MergePotentials.begin()->second != PredBB) - FixTail(MergePotentials.begin()->second, I, TII); + if (MergePotentials.size() >= 2) + MadeChange |= TryTailMergeBlocks(IBB, PredBB); + // Reinsert an unconditional branch if needed. + // The 1 below can occur as a result of removing blocks in TryTailMergeBlocks. + PredBB = prior(I); // this may have been changed in TryTailMergeBlocks + if (MergePotentials.size() == 1 && + MergePotentials.begin()->getBlock() != PredBB) + FixTail(MergePotentials.begin()->getBlock(), IBB, TII); } } return MadeChange; @@ -773,14 +891,14 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { bool BranchFolder::OptimizeBranches(MachineFunction &MF) { bool MadeChange = false; - + // Make sure blocks are numbered in order MF.RenumberBlocks(); for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) { MachineBasicBlock *MBB = I++; MadeChange |= OptimizeBlock(MBB); - + // If it is dead, remove it. if (MBB->pred_empty()) { RemoveDeadBlock(MBB); @@ -801,7 +919,7 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) { /// bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB, bool BranchUnAnalyzable, - MachineBasicBlock *TBB, + MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond) { MachineFunction::iterator Fallthrough = CurBB; @@ -809,14 +927,22 @@ bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB, // If FallthroughBlock is off the end of the function, it can't fall through. if (Fallthrough == CurBB->getParent()->end()) return false; - + // If FallthroughBlock isn't a successor of CurBB, no fallthrough is possible. if (!CurBB->isSuccessor(Fallthrough)) return false; - - // If we couldn't analyze the branch, assume it could fall through. - if (BranchUnAnalyzable) return true; - + + // If we couldn't analyze the branch, examine the last instruction. + // If the block doesn't end in a known control barrier, assume fallthrough + // is possible. The isPredicable check is needed because this code can be + // called during IfConversion, where an instruction which is normally a + // Barrier is predicated and thus no longer an actual control barrier. This + // is over-conservative though, because if an instruction isn't actually + // predicated we could still treat it like a barrier. + if (BranchUnAnalyzable) + return CurBB->empty() || !CurBB->back().getDesc().isBarrier() || + CurBB->back().getDesc().isPredicable(); + // If there is no branch, control always falls through. if (TBB == 0) return true; @@ -825,11 +951,11 @@ bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB, if (MachineFunction::iterator(TBB) == Fallthrough || MachineFunction::iterator(FBB) == Fallthrough) return true; - - // If it's an unconditional branch to some block not the fall through, it + + // If it's an unconditional branch to some block not the fall through, it // doesn't fall through. if (Cond.empty()) return false; - + // Otherwise, if it is conditional and has no explicit false block, it falls // through. return FBB == 0; @@ -853,14 +979,14 @@ bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB) { /// fall-through to MBB1 than to fall through into MBB2. This has to return /// a strict ordering, returning true for both (MBB1,MBB2) and (MBB2,MBB1) will /// result in infinite loops. -static bool IsBetterFallthrough(MachineBasicBlock *MBB1, +static bool IsBetterFallthrough(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2) { // Right now, we use a simple heuristic. If MBB2 ends with a call, and // MBB1 doesn't, we prefer to fall through into MBB1. This allows us to // optimize branches that branch to either a return block or an assert block // into a fallthrough to the return. if (MBB1->empty() || MBB2->empty()) return false; - + // If there is a clear successor ordering we make sure that one block // will fall through to the next if (MBB1->isSuccessor(MBB2)) return true; @@ -871,14 +997,153 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1, return MBB2I->getDesc().isCall() && !MBB1I->getDesc().isCall(); } +/// TailDuplicateBlocks - Look for small blocks that are unconditionally +/// branched to and do not fall through. Tail-duplicate their instructions +/// into their predecessors to eliminate (dynamic) branches. +bool BranchFolder::TailDuplicateBlocks(MachineFunction &MF) { + bool MadeChange = false; + + // Make sure blocks are numbered in order + MF.RenumberBlocks(); + + for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) { + MachineBasicBlock *MBB = I++; + + // Only duplicate blocks that end with unconditional branches. + if (CanFallThrough(MBB)) + continue; + + MadeChange |= TailDuplicate(MBB, MF); + + // If it is dead, remove it. + if (MBB->pred_empty()) { + RemoveDeadBlock(MBB); + MadeChange = true; + ++NumDeadBlocks; + } + } + return MadeChange; +} + +/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each +/// of its predecessors. +bool BranchFolder::TailDuplicate(MachineBasicBlock *TailBB, + MachineFunction &MF) { + // Don't try to tail-duplicate single-block loops. + if (TailBB->isSuccessor(TailBB)) + return false; + + // Set the limit on the number of instructions to duplicate, with a default + // of one less than the tail-merge threshold. When optimizing for size, + // duplicate only one, because one branch instruction can be eliminated to + // compensate for the duplication. + unsigned MaxDuplicateCount = + MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize) ? + 1 : TII->TailDuplicationLimit(*TailBB, TailMergeSize - 1); + + // Check the instructions in the block to determine whether tail-duplication + // is invalid or unlikely to be profitable. + unsigned i = 0; + bool HasCall = false; + for (MachineBasicBlock::iterator I = TailBB->begin(); + I != TailBB->end(); ++I, ++i) { + // Non-duplicable things shouldn't be tail-duplicated. + if (I->getDesc().isNotDuplicable()) return false; + // Don't duplicate more than the threshold. + if (i == MaxDuplicateCount) return false; + // Remember if we saw a call. + if (I->getDesc().isCall()) HasCall = true; + } + // Heuristically, don't tail-duplicate calls if it would expand code size, + // as it's less likely to be worth the extra cost. + if (i > 1 && HasCall) + return false; + + // Iterate through all the unique predecessors and tail-duplicate this + // block into them, if possible. Copying the list ahead of time also + // avoids trouble with the predecessor list reallocating. + bool Changed = false; + SmallSetVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(), + TailBB->pred_end()); + for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), + PE = Preds.end(); PI != PE; ++PI) { + MachineBasicBlock *PredBB = *PI; + + assert(TailBB != PredBB && + "Single-block loop should have been rejected earlier!"); + if (PredBB->succ_size() > 1) continue; + + MachineBasicBlock *PredTBB, *PredFBB; + SmallVector<MachineOperand, 4> PredCond; + if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) + continue; + if (!PredCond.empty()) + continue; + // EH edges are ignored by AnalyzeBranch. + if (PredBB->succ_size() != 1) + continue; + // Don't duplicate into a fall-through predecessor (at least for now). + if (PredBB->isLayoutSuccessor(TailBB) && CanFallThrough(PredBB)) + continue; + + DEBUG(errs() << "\nTail-duplicating into PredBB: " << *PredBB + << "From Succ: " << *TailBB); + + // Remove PredBB's unconditional branch. + TII->RemoveBranch(*PredBB); + // Clone the contents of TailBB into PredBB. + for (MachineBasicBlock::iterator I = TailBB->begin(), E = TailBB->end(); + I != E; ++I) { + MachineInstr *NewMI = MF.CloneMachineInstr(I); + PredBB->insert(PredBB->end(), NewMI); + } + + // Update the CFG. + PredBB->removeSuccessor(PredBB->succ_begin()); + assert(PredBB->succ_empty() && + "TailDuplicate called on block with multiple successors!"); + for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(), + E = TailBB->succ_end(); I != E; ++I) + PredBB->addSuccessor(*I); + + Changed = true; + } + + // If TailBB was duplicated into all its predecessors except for the prior + // block, which falls through unconditionally, move the contents of this + // block into the prior block. + MachineBasicBlock &PrevBB = *prior(MachineFunction::iterator(TailBB)); + MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0; + SmallVector<MachineOperand, 4> PriorCond; + bool PriorUnAnalyzable = + TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true); + // This has to check PrevBB->succ_size() because EH edges are ignored by + // AnalyzeBranch. + if (!PriorUnAnalyzable && PriorCond.empty() && !PriorTBB && + TailBB->pred_size() == 1 && PrevBB.succ_size() == 1 && + !TailBB->hasAddressTaken()) { + DEBUG(errs() << "\nMerging into block: " << PrevBB + << "From MBB: " << *TailBB); + PrevBB.splice(PrevBB.end(), TailBB, TailBB->begin(), TailBB->end()); + PrevBB.removeSuccessor(PrevBB.succ_begin());; + assert(PrevBB.succ_empty()); + PrevBB.transferSuccessors(TailBB); + Changed = true; + } + + return Changed; +} + /// OptimizeBlock - Analyze and optimize control flow related to the specified /// block. This is never called on the entry block. bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { bool MadeChange = false; + MachineFunction &MF = *MBB->getParent(); +ReoptimizeBlock: MachineFunction::iterator FallThrough = MBB; ++FallThrough; - + // If this block is empty, make everyone use its fall-through, not the block // explicitly. Landing pads should not do this since the landing-pad table // points to this block. Blocks with their addresses taken shouldn't be @@ -886,8 +1151,8 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { if (MBB->empty() && !MBB->isLandingPad() && !MBB->hasAddressTaken()) { // Dead block? Leave for cleanup later. if (MBB->pred_empty()) return MadeChange; - - if (FallThrough == MBB->getParent()->end()) { + + if (FallThrough == MF.end()) { // TODO: Simplify preds to not branch here if possible! } else { // Rewrite all predecessors of the old block to go to the fallthrough @@ -898,8 +1163,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { } // If MBB was the target of a jump table, update jump tables to go to the // fallthrough instead. - MBB->getParent()->getJumpTableInfo()-> - ReplaceMBBInJumpTables(MBB, FallThrough); + MF.getJumpTableInfo()->ReplaceMBBInJumpTables(MBB, FallThrough); MadeChange = true; } return MadeChange; @@ -917,29 +1181,49 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { // If the CFG for the prior block has extra edges, remove them. MadeChange |= PrevBB.CorrectExtraCFGEdges(PriorTBB, PriorFBB, !PriorCond.empty()); - + // If the previous branch is conditional and both conditions go to the same // destination, remove the branch, replacing it with an unconditional one or // a fall-through. if (PriorTBB && PriorTBB == PriorFBB) { TII->RemoveBranch(PrevBB); - PriorCond.clear(); + PriorCond.clear(); if (PriorTBB != MBB) TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond); MadeChange = true; ++NumBranchOpts; - return OptimizeBlock(MBB); + goto ReoptimizeBlock; } - + + // If the previous block unconditionally falls through to this block and + // this block has no other predecessors, move the contents of this block + // into the prior block. This doesn't usually happen when SimplifyCFG + // has been used, but it can happen if tail merging splits a fall-through + // predecessor of a block. + // This has to check PrevBB->succ_size() because EH edges are ignored by + // AnalyzeBranch. + if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 && + PrevBB.succ_size() == 1 && + !MBB->hasAddressTaken()) { + DEBUG(errs() << "\nMerging into block: " << PrevBB + << "From MBB: " << *MBB); + PrevBB.splice(PrevBB.end(), MBB, MBB->begin(), MBB->end()); + PrevBB.removeSuccessor(PrevBB.succ_begin());; + assert(PrevBB.succ_empty()); + PrevBB.transferSuccessors(MBB); + MadeChange = true; + return MadeChange; + } + // If the previous branch *only* branches to *this* block (conditional or // not) remove the branch. if (PriorTBB == MBB && PriorFBB == 0) { TII->RemoveBranch(PrevBB); MadeChange = true; ++NumBranchOpts; - return OptimizeBlock(MBB); + goto ReoptimizeBlock; } - + // If the prior block branches somewhere else on the condition and here if // the condition is false, remove the uncond second branch. if (PriorFBB == MBB) { @@ -947,9 +1231,9 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond); MadeChange = true; ++NumBranchOpts; - return OptimizeBlock(MBB); + goto ReoptimizeBlock; } - + // If the prior block branches here on true and somewhere else on false, and // if the branch condition is reversible, reverse the branch to create a // fall-through. @@ -960,10 +1244,10 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond); MadeChange = true; ++NumBranchOpts; - return OptimizeBlock(MBB); + goto ReoptimizeBlock; } } - + // If this block has no successors (e.g. it is a return block or ends with // a call to a no-return function like abort or __cxa_throw) and if the pred // falls through into this block, and if it would otherwise fall through @@ -976,13 +1260,13 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { MachineFunction::iterator(PriorTBB) == FallThrough && !CanFallThrough(MBB)) { bool DoTransform = true; - + // We have to be careful that the succs of PredBB aren't both no-successor // blocks. If neither have successors and if PredBB is the second from // last block in the function, we'd just keep swapping the two blocks for // last. Only do the swap if one is clearly better to fall through than // the other. - if (FallThrough == --MBB->getParent()->end() && + if (FallThrough == --MF.end() && !IsBetterFallthrough(PriorTBB, MBB)) DoTransform = false; @@ -1000,20 +1284,20 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { if (DoTransform && !MBB->succ_empty() && (!CanFallThrough(PriorTBB) || PriorTBB->empty())) DoTransform = false; - - + + if (DoTransform) { // Reverse the branch so we will fall through on the previous true cond. SmallVector<MachineOperand, 4> NewPriorCond(PriorCond); if (!TII->ReverseBranchCondition(NewPriorCond)) { DEBUG(errs() << "\nMoving MBB: " << *MBB << "To make fallthrough to: " << *PriorTBB << "\n"); - + TII->RemoveBranch(PrevBB); TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond); // Move this block to the end of the function. - MBB->moveAfter(--MBB->getParent()->end()); + MBB->moveAfter(--MF.end()); MadeChange = true; ++NumBranchOpts; return MadeChange; @@ -1021,7 +1305,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { } } } - + // Analyze the branch in the current block. MachineBasicBlock *CurTBB = 0, *CurFBB = 0; SmallVector<MachineOperand, 4> CurCond; @@ -1030,7 +1314,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { // If the CFG for the prior block has extra edges, remove them. MadeChange |= MBB->CorrectExtraCFGEdges(CurTBB, CurFBB, !CurCond.empty()); - // If this is a two-way branch, and the FBB branches to this block, reverse + // If this is a two-way branch, and the FBB branches to this block, reverse // the condition so the single-basic-block loop is faster. Instead of: // Loop: xxx; jcc Out; jmp Loop // we want: @@ -1042,14 +1326,13 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond); MadeChange = true; ++NumBranchOpts; - return OptimizeBlock(MBB); + goto ReoptimizeBlock; } } - - + // If this branch is the only thing in its block, see if we can forward // other blocks across it. - if (CurTBB && CurCond.empty() && CurFBB == 0 && + if (CurTBB && CurCond.empty() && CurFBB == 0 && MBB->begin()->getDesc().isBranch() && CurTBB != MBB && !MBB->hasAddressTaken()) { // This block may contain just an unconditional branch. Because there can @@ -1068,7 +1351,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { !PrevBB.isSuccessor(MBB)) { // If the prior block falls through into us, turn it into an // explicit branch to us to make updates simpler. - if (!PredHasNoFallThrough && PrevBB.isSuccessor(MBB) && + if (!PredHasNoFallThrough && PrevBB.isSuccessor(MBB) && PriorTBB != MBB && PriorFBB != MBB) { if (PriorTBB == 0) { assert(PriorCond.empty() && PriorFBB == 0 && @@ -1104,18 +1387,17 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { NewCurFBB, NewCurCond, true); if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) { TII->RemoveBranch(*PMBB); - NewCurCond.clear(); + NewCurCond.clear(); TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond); MadeChange = true; ++NumBranchOpts; - PMBB->CorrectExtraCFGEdges(NewCurTBB, NewCurFBB, false); + PMBB->CorrectExtraCFGEdges(NewCurTBB, 0, false); } } } // Change any jumptables to go to the new MBB. - MBB->getParent()->getJumpTableInfo()-> - ReplaceMBBInJumpTables(MBB, CurTBB); + MF.getJumpTableInfo()->ReplaceMBBInJumpTables(MBB, CurTBB); if (DidChange) { ++NumBranchOpts; MadeChange = true; @@ -1123,7 +1405,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { } } } - + // Add the branch back if the block is more than just an uncond branch. TII->InsertBranch(*MBB, CurTBB, 0, CurCond); } @@ -1134,9 +1416,10 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { // place to move this block where a fall-through will happen. if (!CanFallThrough(&PrevBB, PriorUnAnalyzable, PriorTBB, PriorFBB, PriorCond)) { + // Now we know that there was no fall-through into this block, check to // see if it has a fall-through into its successor. - bool CurFallsThru = CanFallThrough(MBB, CurUnAnalyzable, CurTBB, CurFBB, + bool CurFallsThru = CanFallThrough(MBB, CurUnAnalyzable, CurTBB, CurFBB, CurCond); if (!MBB->isLandingPad()) { @@ -1147,12 +1430,15 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { // Analyze the branch at the end of the pred. MachineBasicBlock *PredBB = *PI; MachineFunction::iterator PredFallthrough = PredBB; ++PredFallthrough; - if (PredBB != MBB && !CanFallThrough(PredBB) + MachineBasicBlock *PredTBB, *PredFBB; + SmallVector<MachineOperand, 4> PredCond; + if (PredBB != MBB && !CanFallThrough(PredBB) && + !TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true) && (!CurFallsThru || !CurTBB || !CurFBB) && (!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) { // If the current block doesn't fall through, just move it. // If the current block can fall through and does not end with a - // conditional branch, we need to append an unconditional jump to + // conditional branch, we need to append an unconditional jump to // the (current) next block. To avoid a possible compile-time // infinite loop, move blocks only backward in this case. // Also, if there are already 2 branches here, we cannot add a third; @@ -1167,11 +1453,11 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { } MBB->moveAfter(PredBB); MadeChange = true; - return OptimizeBlock(MBB); + goto ReoptimizeBlock; } } } - + if (!CurFallsThru) { // Check all successors to see if we can move this block before it. for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), @@ -1179,26 +1465,29 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { // Analyze the branch at the end of the block before the succ. MachineBasicBlock *SuccBB = *SI; MachineFunction::iterator SuccPrev = SuccBB; --SuccPrev; - std::vector<MachineOperand> SuccPrevCond; - + // If this block doesn't already fall-through to that successor, and if // the succ doesn't already have a block that can fall through into it, // and if the successor isn't an EH destination, we can arrange for the // fallthrough to happen. - if (SuccBB != MBB && !CanFallThrough(SuccPrev) && + if (SuccBB != MBB && &*SuccPrev != MBB && + !CanFallThrough(SuccPrev) && !CurUnAnalyzable && !SuccBB->isLandingPad()) { MBB->moveBefore(SuccBB); MadeChange = true; - return OptimizeBlock(MBB); + goto ReoptimizeBlock; } } - + // Okay, there is no really great place to put this block. If, however, // the block before this one would be a fall-through if this block were // removed, move this block to the end of the function. - if (FallThrough != MBB->getParent()->end() && + MachineBasicBlock *PrevTBB, *PrevFBB; + SmallVector<MachineOperand, 4> PrevCond; + if (FallThrough != MF.end() && + !TII->AnalyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) && PrevBB.isSuccessor(FallThrough)) { - MBB->moveAfter(--MBB->getParent()->end()); + MBB->moveAfter(--MF.end()); MadeChange = true; return MadeChange; } diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h index 9763e33..4920755 100644 --- a/lib/CodeGen/BranchFolding.h +++ b/lib/CodeGen/BranchFolding.h @@ -11,7 +11,6 @@ #define LLVM_CODEGEN_BRANCHFOLDING_HPP #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFunctionPass.h" #include <vector> namespace llvm { @@ -20,6 +19,7 @@ namespace llvm { class RegScavenger; class TargetInstrInfo; class TargetRegisterInfo; + template<typename T> class SmallVectorImpl; class BranchFolder { public: @@ -30,11 +30,58 @@ namespace llvm { const TargetRegisterInfo *tri, MachineModuleInfo *mmi); private: - typedef std::pair<unsigned,MachineBasicBlock*> MergePotentialsElt; + class MergePotentialsElt { + unsigned Hash; + MachineBasicBlock *Block; + public: + MergePotentialsElt(unsigned h, MachineBasicBlock *b) + : Hash(h), Block(b) {} + + unsigned getHash() const { return Hash; } + MachineBasicBlock *getBlock() const { return Block; } + + void setBlock(MachineBasicBlock *MBB) { + Block = MBB; + } + + bool operator<(const MergePotentialsElt &) const; + }; typedef std::vector<MergePotentialsElt>::iterator MPIterator; std::vector<MergePotentialsElt> MergePotentials; - typedef std::pair<MPIterator, MachineBasicBlock::iterator> SameTailElt; + class SameTailElt { + MPIterator MPIter; + MachineBasicBlock::iterator TailStartPos; + public: + SameTailElt(MPIterator mp, MachineBasicBlock::iterator tsp) + : MPIter(mp), TailStartPos(tsp) {} + + MPIterator getMPIter() const { + return MPIter; + } + MergePotentialsElt &getMergePotentialsElt() const { + return *getMPIter(); + } + MachineBasicBlock::iterator getTailStartPos() const { + return TailStartPos; + } + unsigned getHash() const { + return getMergePotentialsElt().getHash(); + } + MachineBasicBlock *getBlock() const { + return getMergePotentialsElt().getBlock(); + } + bool tailIsWholeBlock() const { + return TailStartPos == getBlock()->begin(); + } + + void setBlock(MachineBasicBlock *MBB) { + getMergePotentialsElt().setBlock(MBB); + } + void setTailStartPos(MachineBasicBlock::iterator Pos) { + TailStartPos = Pos; + } + }; std::vector<SameTailElt> SameTails; bool EnableTailMerge; @@ -44,18 +91,23 @@ namespace llvm { RegScavenger *RS; bool TailMergeBlocks(MachineFunction &MF); - bool TryMergeBlocks(MachineBasicBlock* SuccBB, - MachineBasicBlock* PredBB); + bool TryTailMergeBlocks(MachineBasicBlock* SuccBB, + MachineBasicBlock* PredBB); void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, MachineBasicBlock *NewDest); MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB, MachineBasicBlock::iterator BBI1); - unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength); + unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength, + MachineBasicBlock *SuccBB, + MachineBasicBlock *PredBB); void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB, MachineBasicBlock* PredBB); unsigned CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, unsigned maxCommonTailLength); + bool TailDuplicateBlocks(MachineFunction &MF); + bool TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF); + bool OptimizeBranches(MachineFunction &MF); bool OptimizeBlock(MachineBasicBlock *MBB); void RemoveDeadBlock(MachineBasicBlock *MBB); @@ -66,19 +118,6 @@ namespace llvm { MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond); }; - - - /// BranchFolderPass - Wrap branch folder in a machine function pass. - class BranchFolderPass : public MachineFunctionPass, - public BranchFolder { - public: - static char ID; - explicit BranchFolderPass(bool defaultEnableTailMerge) - : MachineFunctionPass(&ID), BranchFolder(defaultEnableTailMerge) {} - - virtual bool runOnMachineFunction(MachineFunction &MF); - virtual const char *getPassName() const { return "Control Flow Optimizer"; } - }; } #endif /* LLVM_CODEGEN_BRANCHFOLDING_HPP */ diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp index 6fff12c..e9844d8 100644 --- a/lib/CodeGen/CodePlacementOpt.cpp +++ b/lib/CodeGen/CodePlacementOpt.cpp @@ -56,7 +56,6 @@ namespace { MachineFunction::iterator InsertPt, MachineFunction::iterator Begin, MachineFunction::iterator End); - void UpdateTerminator(MachineBasicBlock *MBB); bool EliminateUnconditionalJumpsToTop(MachineFunction &MF, MachineLoop *L); bool MoveDiscontiguousLoopBlocks(MachineFunction &MF, @@ -141,66 +140,9 @@ void CodePlacementOpt::Splice(MachineFunction &MF, MF.splice(InsertPt, Begin, End); - UpdateTerminator(prior(Begin)); - UpdateTerminator(OldBeginPrior); - UpdateTerminator(OldEndPrior); -} - -/// UpdateTerminator - Update the terminator instructions in MBB to account -/// for changes to the layout. If the block previously used a fallthrough, -/// it may now need a branch, and if it previously used branching it may now -/// be able to use a fallthrough. -/// -void CodePlacementOpt::UpdateTerminator(MachineBasicBlock *MBB) { - // A block with no successors has no concerns with fall-through edges. - if (MBB->succ_empty()) return; - - MachineBasicBlock *TBB = 0, *FBB = 0; - SmallVector<MachineOperand, 4> Cond; - bool B = TII->AnalyzeBranch(*MBB, TBB, FBB, Cond); - (void) B; - assert(!B && "UpdateTerminators requires analyzable predecessors!"); - if (Cond.empty()) { - if (TBB) { - // The block has an unconditional branch. If its successor is now - // its layout successor, delete the branch. - if (MBB->isLayoutSuccessor(TBB)) - TII->RemoveBranch(*MBB); - } else { - // The block has an unconditional fallthrough. If its successor is not - // its layout successor, insert a branch. - TBB = *MBB->succ_begin(); - if (!MBB->isLayoutSuccessor(TBB)) - TII->InsertBranch(*MBB, TBB, 0, Cond); - } - } else { - if (FBB) { - // The block has a non-fallthrough conditional branch. If one of its - // successors is its layout successor, rewrite it to a fallthrough - // conditional branch. - if (MBB->isLayoutSuccessor(TBB)) { - TII->RemoveBranch(*MBB); - TII->ReverseBranchCondition(Cond); - TII->InsertBranch(*MBB, FBB, 0, Cond); - } else if (MBB->isLayoutSuccessor(FBB)) { - TII->RemoveBranch(*MBB); - TII->InsertBranch(*MBB, TBB, 0, Cond); - } - } else { - // The block has a fallthrough conditional branch. - MachineBasicBlock *MBBA = *MBB->succ_begin(); - MachineBasicBlock *MBBB = *next(MBB->succ_begin()); - if (MBBA == TBB) std::swap(MBBB, MBBA); - if (MBB->isLayoutSuccessor(TBB)) { - TII->RemoveBranch(*MBB); - TII->ReverseBranchCondition(Cond); - TII->InsertBranch(*MBB, MBBA, 0, Cond); - } else if (!MBB->isLayoutSuccessor(MBBA)) { - TII->RemoveBranch(*MBB); - TII->InsertBranch(*MBB, TBB, MBBA, Cond); - } - } - } + prior(Begin)->updateTerminator(); + OldBeginPrior->updateTerminator(); + OldEndPrior->updateTerminator(); } /// EliminateUnconditionalJumpsToTop - Move blocks which unconditionally jump diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index 3e3b28a..8a3bd0b 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -515,6 +515,15 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { if (CI->getType() != Type::getVoidTy(Context)) CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1)); break; + case Intrinsic::invariant_start: + case Intrinsic::lifetime_start: + // Discard region information. + CI->replaceAllUsesWith(UndefValue::get(CI->getType())); + break; + case Intrinsic::invariant_end: + case Intrinsic::lifetime_end: + // Discard region information. + break; } assert(CI->use_empty() && diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp index 794ecf7..23dce4a 100644 --- a/lib/CodeGen/LatencyPriorityQueue.cpp +++ b/lib/CodeGen/LatencyPriorityQueue.cpp @@ -55,7 +55,10 @@ SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) { SUnit *OnlyAvailablePred = 0; for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { - if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue; + if (IgnoreAntiDep && + ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output))) + continue; + SUnit &Pred = *I->getSUnit(); if (!Pred.isScheduled) { // We found an available, but not scheduled, predecessor. If it's the @@ -75,7 +78,10 @@ void LatencyPriorityQueue::push_impl(SUnit *SU) { unsigned NumNodesBlocking = 0; for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { - if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue; + if (IgnoreAntiDep && + ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output))) + continue; + if (getSingleUnscheduledPred(I->getSUnit()) == SU) ++NumNodesBlocking; } @@ -92,7 +98,10 @@ void LatencyPriorityQueue::push_impl(SUnit *SU) { void LatencyPriorityQueue::ScheduledNode(SUnit *SU) { for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { - if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue; + if (IgnoreAntiDep && + ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output))) + continue; + AdjustPriorityOfUnscheduledPreds(I->getSUnit()); } } diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 2a93a35..a60d34f 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -53,7 +53,8 @@ static cl::opt<bool> DisableReMat("disable-rematerialization", static cl::opt<bool> EnableFastSpilling("fast-spill", cl::init(false), cl::Hidden); -static cl::opt<bool> EarlyCoalescing("early-coalescing", cl::init(false)); +static cl::opt<bool> EarlyCoalescing("early-coalescing", + cl::init(false), cl::Hidden); static cl::opt<int> CoalescingLimit("early-coalescing-limit", cl::init(-1), cl::Hidden); @@ -646,17 +647,17 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, 0, false, VNInfoAllocator); vni->setIsPHIDef(true); LiveRange LR(start, end, vni); - + interval.addRange(LR); LR.valno->addKill(end); DEBUG(errs() << " +" << LR << '\n'); } -bool -LiveIntervals::isProfitableToCoalesce(LiveInterval &DstInt, LiveInterval &SrcInt, - SmallVector<MachineInstr*,16> &IdentCopies, - SmallVector<MachineInstr*,16> &OtherCopies) { - bool HaveConflict = false; +bool LiveIntervals:: +isSafeAndProfitableToCoalesce(LiveInterval &DstInt, + LiveInterval &SrcInt, + SmallVector<MachineInstr*,16> &IdentCopies, + SmallVector<MachineInstr*,16> &OtherCopies) { unsigned NumIdent = 0; for (MachineRegisterInfo::def_iterator ri = mri_->def_begin(SrcInt.reg), re = mri_->def_end(); ri != re; ++ri) { @@ -665,16 +666,16 @@ LiveIntervals::isProfitableToCoalesce(LiveInterval &DstInt, LiveInterval &SrcInt if (!tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg)) return false; if (SrcReg != DstInt.reg) { + // Non-identity copy - we cannot handle overlapping intervals + if (DstInt.liveAt(getInstructionIndex(MI))) + return false; OtherCopies.push_back(MI); - HaveConflict |= DstInt.liveAt(getInstructionIndex(MI)); } else { IdentCopies.push_back(MI); ++NumIdent; } } - if (!HaveConflict) - return false; // Let coalescer handle it return IdentCopies.size() > OtherCopies.size(); } @@ -701,19 +702,21 @@ void LiveIntervals::performEarlyCoalescing() { LiveInterval &SrcInt = getInterval(PHISrc); SmallVector<MachineInstr*, 16> IdentCopies; SmallVector<MachineInstr*, 16> OtherCopies; - if (!isProfitableToCoalesce(DstInt, SrcInt, IdentCopies, OtherCopies)) + if (!isSafeAndProfitableToCoalesce(DstInt, SrcInt, + IdentCopies, OtherCopies)) continue; DEBUG(errs() << "PHI Join: " << *Join); assert(DstInt.containsOneValue() && "PHI join should have just one val#!"); + assert(std::distance(mri_->use_begin(PHISrc), mri_->use_end()) == 1 && + "PHI join src should not be used elsewhere"); VNInfo *VNI = DstInt.getValNumInfo(0); // Change the non-identity copies to directly target the phi destination. for (unsigned i = 0, e = OtherCopies.size(); i != e; ++i) { MachineInstr *PHICopy = OtherCopies[i]; - DEBUG(errs() << "Moving: " << *PHICopy); - SlotIndex MIIndex = getInstructionIndex(PHICopy); + DEBUG(errs() << "Moving: " << MIIndex << ' ' << *PHICopy); SlotIndex DefIndex = MIIndex.getDefIndex(); LiveRange *SLR = SrcInt.getLiveRangeContaining(DefIndex); SlotIndex StartIndex = SLR->start; @@ -724,8 +727,7 @@ void LiveIntervals::performEarlyCoalescing() { SrcInt.removeValNo(SLR->valno); DEBUG(errs() << " added range [" << StartIndex << ',' << EndIndex << "] to reg" << DstInt.reg << '\n'); - if (DstInt.liveAt(StartIndex)) - DstInt.removeRange(StartIndex, EndIndex); + assert (!DstInt.liveAt(StartIndex) && "Cannot coalesce when dst live!"); VNInfo *NewVNI = DstInt.getNextValue(DefIndex, PHICopy, true, VNInfoAllocator); NewVNI->setHasPHIKill(true); diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index 96c655c..16a79bb 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -50,6 +50,14 @@ void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } +MachineInstr * +LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const { + for (unsigned i = 0, e = Kills.size(); i != e; ++i) + if (Kills[i]->getParent() == MBB) + return Kills[i]; + return NULL; +} + void LiveVariables::VarInfo::dump() const { errs() << " Alive in blocks: "; for (SparseBitVector<>::iterator I = AliveBlocks.begin(), @@ -222,8 +230,9 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg, /// implicit defs to a machine instruction if there was an earlier def of its /// super-register. void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) { + MachineInstr *LastDef = PhysRegDef[Reg]; // If there was a previous use or a "full" def all is well. - if (!PhysRegDef[Reg] && !PhysRegUse[Reg]) { + if (!LastDef && !PhysRegUse[Reg]) { // Otherwise, the last sub-register def implicitly defines this register. // e.g. // AH = @@ -257,6 +266,11 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) { } } } + else if (LastDef && !PhysRegUse[Reg] && + !LastDef->findRegisterDefOperand(Reg)) + // Last def defines the super register, add an implicit def of reg. + LastDef->addOperand(MachineOperand::CreateReg(Reg, + true/*IsDef*/, true/*IsImp*/)); // Remember this use. PhysRegUse[Reg] = MI; @@ -641,3 +655,36 @@ void LiveVariables::analyzePHINodes(const MachineFunction& Fn) { PHIVarInfo[BBI->getOperand(i + 1).getMBB()->getNumber()] .push_back(BBI->getOperand(i).getReg()); } + +/// addNewBlock - Add a new basic block BB as an empty succcessor to DomBB. All +/// variables that are live out of DomBB will be marked as passing live through +/// BB. +void LiveVariables::addNewBlock(MachineBasicBlock *BB, + MachineBasicBlock *DomBB) { + const unsigned NumNew = BB->getNumber(); + const unsigned NumDom = DomBB->getNumber(); + + // Update info for all live variables + for (unsigned Reg = TargetRegisterInfo::FirstVirtualRegister, + E = MRI->getLastVirtReg()+1; Reg != E; ++Reg) { + VarInfo &VI = getVarInfo(Reg); + + // Anything live through DomBB is also live through BB. + if (VI.AliveBlocks.test(NumDom)) { + VI.AliveBlocks.set(NumNew); + continue; + } + + // Variables not defined in DomBB cannot be live out. + const MachineInstr *Def = MRI->getVRegDef(Reg); + if (!Def || Def->getParent() != DomBB) + continue; + + // Killed by DomBB? + if (VI.findKill(DomBB)) + continue; + + // This register is defined in DomBB and live out + VI.AliveBlocks.set(NumNew); + } +} diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 7fbdb12..cd52825 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -17,6 +17,7 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetInstrDesc.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/LeakDetector.h" #include "llvm/Support/raw_ostream.h" @@ -242,6 +243,58 @@ void MachineBasicBlock::moveAfter(MachineBasicBlock *NewBefore) { getParent()->splice(++BBI, this); } +void MachineBasicBlock::updateTerminator() { + const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo(); + // A block with no successors has no concerns with fall-through edges. + if (this->succ_empty()) return; + + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + bool B = TII->AnalyzeBranch(*this, TBB, FBB, Cond); + (void) B; + assert(!B && "UpdateTerminators requires analyzable predecessors!"); + if (Cond.empty()) { + if (TBB) { + // The block has an unconditional branch. If its successor is now + // its layout successor, delete the branch. + if (isLayoutSuccessor(TBB)) + TII->RemoveBranch(*this); + } else { + // The block has an unconditional fallthrough. If its successor is not + // its layout successor, insert a branch. + TBB = *succ_begin(); + if (!isLayoutSuccessor(TBB)) + TII->InsertBranch(*this, TBB, 0, Cond); + } + } else { + if (FBB) { + // The block has a non-fallthrough conditional branch. If one of its + // successors is its layout successor, rewrite it to a fallthrough + // conditional branch. + if (isLayoutSuccessor(TBB)) { + TII->RemoveBranch(*this); + TII->ReverseBranchCondition(Cond); + TII->InsertBranch(*this, FBB, 0, Cond); + } else if (isLayoutSuccessor(FBB)) { + TII->RemoveBranch(*this); + TII->InsertBranch(*this, TBB, 0, Cond); + } + } else { + // The block has a fallthrough conditional branch. + MachineBasicBlock *MBBA = *succ_begin(); + MachineBasicBlock *MBBB = *next(succ_begin()); + if (MBBA == TBB) std::swap(MBBB, MBBA); + if (isLayoutSuccessor(TBB)) { + TII->RemoveBranch(*this); + TII->ReverseBranchCondition(Cond); + TII->InsertBranch(*this, MBBA, 0, Cond); + } else if (!isLayoutSuccessor(MBBA)) { + TII->RemoveBranch(*this); + TII->InsertBranch(*this, TBB, MBBA, Cond); + } + } + } +} void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ) { Successors.push_back(succ); @@ -371,10 +424,7 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA, MachineBasicBlock::succ_iterator SI = succ_begin(); MachineBasicBlock *OrigDestA = DestA, *OrigDestB = DestB; while (SI != succ_end()) { - if (*SI == DestA && DestA == DestB) { - DestA = DestB = 0; - ++SI; - } else if (*SI == DestA) { + if (*SI == DestA) { DestA = 0; ++SI; } else if (*SI == DestB) { @@ -397,3 +447,8 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA, } return MadeChange; } + +void llvm::WriteAsOperand(raw_ostream &OS, const MachineBasicBlock *MBB, + bool t) { + OS << "BB#" << MBB->getNumber(); +} diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 5a1d9e6..81d1301 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -441,9 +441,10 @@ DebugLocTuple MachineFunction::getDebugLocTuple(DebugLoc DL) const { /// index with a negative value. /// int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, - bool Immutable) { + bool Immutable, bool isSS) { assert(Size != 0 && "Cannot allocate zero size fixed stack objects!"); - Objects.insert(Objects.begin(), StackObject(Size, 1, SPOffset, Immutable)); + Objects.insert(Objects.begin(), StackObject(Size, 1, SPOffset, Immutable, + isSS)); return -++NumFixedObjects; } @@ -529,10 +530,6 @@ void MachineFrameInfo::dump(const MachineFunction &MF) const { unsigned MachineJumpTableInfo::getJumpTableIndex( const std::vector<MachineBasicBlock*> &DestBBs) { assert(!DestBBs.empty() && "Cannot create an empty jump table!"); - for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) - if (JumpTables[i].MBBs == DestBBs) - return i; - JumpTables.push_back(MachineJumpTableEntry(DestBBs)); return JumpTables.size()-1; } @@ -544,14 +541,25 @@ MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old, MachineBasicBlock *New) { assert(Old != New && "Not making a change?"); bool MadeChange = false; - for (size_t i = 0, e = JumpTables.size(); i != e; ++i) { - MachineJumpTableEntry &JTE = JumpTables[i]; - for (size_t j = 0, e = JTE.MBBs.size(); j != e; ++j) - if (JTE.MBBs[j] == Old) { - JTE.MBBs[j] = New; - MadeChange = true; - } - } + for (size_t i = 0, e = JumpTables.size(); i != e; ++i) + ReplaceMBBInJumpTable(i, Old, New); + return MadeChange; +} + +/// ReplaceMBBInJumpTable - If Old is a target of the jump tables, update +/// the jump table to branch to New instead. +bool +MachineJumpTableInfo::ReplaceMBBInJumpTable(unsigned Idx, + MachineBasicBlock *Old, + MachineBasicBlock *New) { + assert(Old != New && "Not making a change?"); + bool MadeChange = false; + MachineJumpTableEntry &JTE = JumpTables[Idx]; + for (size_t j = 0, e = JTE.MBBs.size(); j != e; ++j) + if (JTE.MBBs[j] == Old) { + JTE.MBBs[j] = New; + MadeChange = true; + } return MadeChange; } diff --git a/lib/CodeGen/MachineFunctionAnalysis.cpp b/lib/CodeGen/MachineFunctionAnalysis.cpp index 56294d9..f5febc5 100644 --- a/lib/CodeGen/MachineFunctionAnalysis.cpp +++ b/lib/CodeGen/MachineFunctionAnalysis.cpp @@ -24,7 +24,7 @@ X("Machine Function Analysis", "machine-function-analysis", char MachineFunctionAnalysis::ID = 0; -MachineFunctionAnalysis::MachineFunctionAnalysis(TargetMachine &tm, +MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm, CodeGenOpt::Level OL) : FunctionPass(&ID), TM(tm), OptLevel(OL), MF(0) { } diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 5744c8a..b250faa 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -189,19 +189,19 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { /// print - Print the specified machine operand. /// void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { + // If the instruction is embedded into a basic block, we can find the + // target info for the instruction. + if (!TM) + if (const MachineInstr *MI = getParent()) + if (const MachineBasicBlock *MBB = MI->getParent()) + if (const MachineFunction *MF = MBB->getParent()) + TM = &MF->getTarget(); + switch (getType()) { case MachineOperand::MO_Register: if (getReg() == 0 || TargetRegisterInfo::isVirtualRegister(getReg())) { OS << "%reg" << getReg(); } else { - // If the instruction is embedded into a basic block, we can find the - // target info for the instruction. - if (TM == 0) - if (const MachineInstr *MI = getParent()) - if (const MachineBasicBlock *MBB = MI->getParent()) - if (const MachineFunction *MF = MBB->getParent()) - TM = &MF->getTarget(); - if (TM) OS << "%" << TM->getRegisterInfo()->get(getReg()).Name; else @@ -265,7 +265,8 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { OS << "<jt#" << getIndex() << '>'; break; case MachineOperand::MO_GlobalAddress: - OS << "<ga:" << ((Value*)getGlobal())->getName(); + OS << "<ga:"; + WriteAsOperand(OS, getGlobal(), /*PrintType=*/false); if (getOffset()) OS << "+" << getOffset(); OS << '>'; break; @@ -375,7 +376,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) { /// MachineInstr ctor - This constructor creates a dummy MachineInstr with /// TID NULL and no operands. MachineInstr::MachineInstr() - : TID(0), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0), + : TID(0), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(DebugLoc::getUnknownLoc()) { // Make sure that we get added to a machine basicblock LeakDetector::addGarbageObject(this); @@ -395,7 +396,8 @@ void MachineInstr::addImplicitDefUseOperands() { /// TargetInstrDesc or the numOperands if it is not zero. (for /// instructions with variable number of operands). MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp) - : TID(&tid), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0), Parent(0), + : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), + MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(DebugLoc::getUnknownLoc()) { if (!NoImp && TID->getImplicitDefs()) for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs) @@ -413,7 +415,7 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp) /// MachineInstr ctor - As above, but with a DebugLoc. MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl, bool NoImp) - : TID(&tid), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0), + : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) { if (!NoImp && TID->getImplicitDefs()) for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs) @@ -433,7 +435,8 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl, /// basic block. /// MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid) - : TID(&tid), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0), Parent(0), + : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), + MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(DebugLoc::getUnknownLoc()) { assert(MBB && "Cannot use inserting ctor with null basic block!"); if (TID->ImplicitDefs) @@ -453,7 +456,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid) /// MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, const TargetInstrDesc &tid) - : TID(&tid), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0), + : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) { assert(MBB && "Cannot use inserting ctor with null basic block!"); if (TID->ImplicitDefs) @@ -472,7 +475,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, /// MachineInstr ctor - Copies MachineInstr arg exactly /// MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) - : TID(&MI.getDesc()), NumImplicitOps(0), + : TID(&MI.getDesc()), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(MI.MemRefs), MemRefsEnd(MI.MemRefsEnd), Parent(0), debugLoc(MI.getDebugLoc()) { Operands.reserve(MI.getNumOperands()); @@ -1060,9 +1063,16 @@ void MachineInstr::dump() const { } void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { - unsigned StartOp = 0, e = getNumOperands(); + // We can be a bit tidier if we know the TargetMachine and/or MachineFunction. + const MachineFunction *MF = 0; + if (const MachineBasicBlock *MBB = getParent()) { + MF = MBB->getParent(); + if (!TM && MF) + TM = &MF->getTarget(); + } // Print explicitly defined operands on the left of an assignment syntax. + unsigned StartOp = 0, e = getNumOperands(); for (; StartOp < e && getOperand(StartOp).isReg() && getOperand(StartOp).isDef() && !getOperand(StartOp).isImplicit(); @@ -1078,11 +1088,45 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { OS << getDesc().getName(); // Print the rest of the operands. + bool OmittedAnyCallClobbers = false; + bool FirstOp = true; for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) { - if (i != StartOp) - OS << ","; + const MachineOperand &MO = getOperand(i); + + // Omit call-clobbered registers which aren't used anywhere. This makes + // call instructions much less noisy on targets where calls clobber lots + // of registers. Don't rely on MO.isDead() because we may be called before + // LiveVariables is run, or we may be looking at a non-allocatable reg. + if (MF && getDesc().isCall() && + MO.isReg() && MO.isImplicit() && MO.isDef()) { + unsigned Reg = MO.getReg(); + if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) { + const MachineRegisterInfo &MRI = MF->getRegInfo(); + if (MRI.use_empty(Reg) && !MRI.isLiveOut(Reg)) { + bool HasAliasLive = false; + for (const unsigned *Alias = TM->getRegisterInfo()->getAliasSet(Reg); + unsigned AliasReg = *Alias; ++Alias) + if (!MRI.use_empty(AliasReg) || MRI.isLiveOut(AliasReg)) { + HasAliasLive = true; + break; + } + if (!HasAliasLive) { + OmittedAnyCallClobbers = true; + continue; + } + } + } + } + + if (FirstOp) FirstOp = false; else OS << ","; OS << " "; - getOperand(i).print(OS, TM); + MO.print(OS, TM); + } + + // Briefly indicate whether any call clobbers were omitted. + if (OmittedAnyCallClobbers) { + if (FirstOp) FirstOp = false; else OS << ","; + OS << " ..."; } bool HaveSemi = false; @@ -1098,12 +1142,11 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { } } - if (!debugLoc.isUnknown()) { + if (!debugLoc.isUnknown() && MF) { if (!HaveSemi) OS << ";"; HaveSemi = true; // TODO: print InlinedAtLoc information - const MachineFunction *MF = getParent()->getParent(); DebugLocTuple DLT = MF->getDebugLocTuple(debugLoc); DICompileUnit CU(DLT.Scope); if (!CU.isNull()) diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index de3ab27..33b6b82 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -22,6 +22,7 @@ #define DEBUG_TYPE "machine-licm" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" @@ -43,6 +44,7 @@ STATISTIC(NumCSEed, "Number of hoisted machine instructions CSEed"); namespace { class MachineLICM : public MachineFunctionPass { + MachineConstantPool *MCP; const TargetMachine *TM; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; @@ -111,6 +113,11 @@ namespace { /// be hoistable. MachineInstr *ExtractHoistableLoad(MachineInstr *MI); + /// LookForDuplicate - Find an instruction amount PrevMIs that is a + /// duplicate of MI. Return this instruction if it's found. + const MachineInstr *LookForDuplicate(const MachineInstr *MI, + std::vector<const MachineInstr*> &PrevMIs); + /// EliminateCSE - Given a LICM'ed instruction, look for an instruction on /// the preheader that compute the same value. If it's found, do a RAU on /// with the definition of the existing instruction rather than hoisting @@ -153,6 +160,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { DEBUG(errs() << "******** Machine LICM ********\n"); Changed = FirstInLoop = false; + MCP = MF.getConstantPool(); TM = &MF.getTarget(); TII = TM->getInstrInfo(); TRI = TM->getRegisterInfo(); @@ -234,9 +242,9 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { // to decide whether the loaded value is actually a constant. If so, we can // actually use it as a load. if (!I.isInvariantLoad(AA)) - // FIXME: we should be able to sink loads with no other side effects if - // there is nothing that can change memory from here until the end of - // block. This is a trivial form of alias analysis. + // FIXME: we should be able to hoist loads with no other side effects if + // there are no other instructions which can change memory in this loop. + // This is a trivial form of alias analysis. return false; } @@ -432,32 +440,12 @@ void MachineLICM::InitCSEMap(MachineBasicBlock *BB) { } } -static const MachineInstr *LookForDuplicate(const MachineInstr *MI, - std::vector<const MachineInstr*> &PrevMIs, - MachineRegisterInfo *RegInfo) { - unsigned NumOps = MI->getNumOperands(); +const MachineInstr* +MachineLICM::LookForDuplicate(const MachineInstr *MI, + std::vector<const MachineInstr*> &PrevMIs) { for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) { const MachineInstr *PrevMI = PrevMIs[i]; - unsigned NumOps2 = PrevMI->getNumOperands(); - if (NumOps != NumOps2) - continue; - bool IsSame = true; - for (unsigned j = 0; j != NumOps; ++j) { - const MachineOperand &MO = MI->getOperand(j); - if (MO.isReg() && MO.isDef()) { - if (RegInfo->getRegClass(MO.getReg()) != - RegInfo->getRegClass(PrevMI->getOperand(j).getReg())) { - IsSame = false; - break; - } - continue; - } - if (!MO.isIdenticalTo(PrevMI->getOperand(j))) { - IsSame = false; - break; - } - } - if (IsSame) + if (TII->isIdentical(MI, PrevMI, RegInfo)) return PrevMI; } return 0; @@ -465,18 +453,19 @@ static const MachineInstr *LookForDuplicate(const MachineInstr *MI, bool MachineLICM::EliminateCSE(MachineInstr *MI, DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) { - if (CI != CSEMap.end()) { - if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second, RegInfo)) { - DEBUG(errs() << "CSEing " << *MI << " with " << *Dup); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.isDef()) - RegInfo->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg()); - } - MI->eraseFromParent(); - ++NumCSEed; - return true; + if (CI == CSEMap.end()) + return false; + + if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) { + DEBUG(errs() << "CSEing " << *MI << " with " << *Dup); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef()) + RegInfo->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg()); } + MI->eraseFromParent(); + ++NumCSEed; + return true; } return false; } diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index b62803f..4b067a0 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -76,9 +76,7 @@ void MachineModuleInfo::EndFunction() { FilterEnds.clear(); CallsEHReturn = 0; CallsUnwindInit = 0; -#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN VariableDbgInfo.clear(); -#endif } /// AnalyzeModule - Scan the module for global debug information. diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 99812e0..be9f68f 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -175,6 +175,10 @@ FunctionPass *llvm::createMachineVerifierPass(bool allowPhysDoubleDefs) { return new MachineVerifier(allowPhysDoubleDefs); } +void MachineFunction::verify() const { + MachineVerifier().runOnMachineFunction(const_cast<MachineFunction&>(*this)); +} + bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { raw_ostream *OutFile = 0; if (OutFileName) { @@ -287,7 +291,18 @@ void MachineVerifier::visitMachineFunctionBefore() { markReachable(&MF->front()); } -void MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { +// Does iterator point to a and b as the first two elements? +bool matchPair(MachineBasicBlock::const_succ_iterator i, + const MachineBasicBlock *a, const MachineBasicBlock *b) { + if (*i == a) + return *++i == b; + if (*i == b) + return *++i == a; + return false; +} + +void +MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); // Start with minimal CFG sanity checks. @@ -379,8 +394,7 @@ void MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) } if (MBB->succ_size() != 2) { report("MBB exits via conditional branch/fall-through but doesn't have " "exactly two CFG successors!", MBB); - } else if ((MBB->succ_begin()[0] == TBB && MBB->succ_end()[1] == MBBI) || - (MBB->succ_begin()[1] == TBB && MBB->succ_end()[0] == MBBI)) { + } else if (!matchPair(MBB->succ_begin(), TBB, MBBI)) { report("MBB exits via conditional branch/fall-through but the CFG " "successors don't match the actual successors!", MBB); } @@ -400,8 +414,7 @@ void MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) if (MBB->succ_size() != 2) { report("MBB exits via conditional branch/branch but doesn't have " "exactly two CFG successors!", MBB); - } else if ((MBB->succ_begin()[0] == TBB && MBB->succ_end()[1] == FBB) || - (MBB->succ_begin()[1] == TBB && MBB->succ_end()[0] == FBB)) { + } else if (!matchPair(MBB->succ_begin(), TBB, FBB)) { report("MBB exits via conditional branch/branch but the CFG " "successors don't match the actual successors!", MBB); } diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index 8071b0a..cd38dd1 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -15,24 +15,32 @@ #define DEBUG_TYPE "phielim" #include "PHIElimination.h" -#include "llvm/BasicBlock.h" -#include "llvm/Instructions.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/Function.h" #include "llvm/Target/TargetMachine.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" #include <algorithm> #include <map> using namespace llvm; STATISTIC(NumAtomic, "Number of atomic phis lowered"); +STATISTIC(NumSplits, "Number of critical edges split on demand"); + +static cl::opt<bool> +SplitEdges("split-phi-edges", + cl::desc("Split critical edges during phi elimination"), + cl::init(false), cl::Hidden); char PHIElimination::ID = 0; static RegisterPass<PHIElimination> @@ -40,11 +48,26 @@ X("phi-node-elimination", "Eliminate PHI nodes for register allocation"); const PassInfo *const llvm::PHIEliminationID = &X; +namespace llvm { FunctionPass *createLocalRegisterAllocator(); } + +// Should we run edge splitting? +static bool shouldSplitEdges() { + // Edge splitting breaks the local register allocator. It cannot tolerate + // LiveVariables being run. + if (RegisterRegAlloc::getDefault() == createLocalRegisterAllocator) + return false; + return SplitEdges; +} + void llvm::PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); AU.addPreserved<LiveVariables>(); - AU.addPreservedID(MachineLoopInfoID); - AU.addPreservedID(MachineDominatorsID); + AU.addPreserved<MachineDominatorTree>(); + if (shouldSplitEdges()) { + AU.addRequired<LiveVariables>(); + } else { + AU.setPreservesCFG(); + AU.addPreservedID(MachineLoopInfoID); + } MachineFunctionPass::getAnalysisUsage(AU); } @@ -53,10 +76,16 @@ bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &Fn) { PHIDefs.clear(); PHIKills.clear(); - analyzePHINodes(Fn); - bool Changed = false; + // Split critical edges to help the coalescer + if (shouldSplitEdges()) + for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) + Changed |= SplitPHIEdges(Fn, *I); + + // Populate VRegPHIUseCount + analyzePHINodes(Fn); + // Eliminate PHI instructions by inserting copies into predecessor blocks. for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) Changed |= EliminatePHINodes(Fn, *I); @@ -75,7 +104,6 @@ bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &Fn) { return Changed; } - /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in /// predecessor basic blocks. /// @@ -107,26 +135,28 @@ static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi, return true; } -// FindCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg. -// This needs to be after any def or uses of SrcReg, but before any subsequent -// point where control flow might jump out of the basic block. +// FindCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg +// when following the CFG edge to SuccMBB. This needs to be after any def of +// SrcReg, but before any subsequent point where control flow might jump out of +// the basic block. MachineBasicBlock::iterator llvm::PHIElimination::FindCopyInsertPoint(MachineBasicBlock &MBB, + MachineBasicBlock &SuccMBB, unsigned SrcReg) { // Handle the trivial case trivially. if (MBB.empty()) return MBB.begin(); - // If this basic block does not contain an invoke, then control flow always - // reaches the end of it, so place the copy there. The logic below works in - // this case too, but is more expensive. - if (!isa<InvokeInst>(MBB.getBasicBlock()->getTerminator())) + // Usually, we just want to insert the copy before the first terminator + // instruction. However, for the edge going to a landing pad, we must insert + // the copy before the call/invoke instruction. + if (!SuccMBB.isLandingPad()) return MBB.getFirstTerminator(); - // Discover any definition/uses in this basic block. + // Discover any defs/uses in this basic block. SmallPtrSet<MachineInstr*, 8> DefUsesInMBB; for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg), - RE = MRI->reg_end(); RI != RE; ++RI) { + RE = MRI->reg_end(); RI != RE; ++RI) { MachineInstr *DefUseMI = &*RI; if (DefUseMI->getParent() == &MBB) DefUsesInMBB.insert(DefUseMI); @@ -134,14 +164,14 @@ llvm::PHIElimination::FindCopyInsertPoint(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPoint; if (DefUsesInMBB.empty()) { - // No def/uses. Insert the copy at the start of the basic block. + // No defs. Insert the copy at the start of the basic block. InsertPoint = MBB.begin(); } else if (DefUsesInMBB.size() == 1) { - // Insert the copy immediately after the definition/use. + // Insert the copy immediately after the def/use. InsertPoint = *DefUsesInMBB.begin(); ++InsertPoint; } else { - // Insert the copy immediately after the last definition/use. + // Insert the copy immediately after the last def/use. InsertPoint = MBB.end(); while (!DefUsesInMBB.count(&*--InsertPoint)) {} ++InsertPoint; @@ -155,7 +185,7 @@ llvm::PHIElimination::FindCopyInsertPoint(MachineBasicBlock &MBB, /// under the assuption that it needs to be lowered in a way that supports /// atomic execution of PHIs. This lowering method is always correct all of the /// time. -/// +/// void llvm::PHIElimination::LowerAtomicPHINode( MachineBasicBlock &MBB, MachineBasicBlock::iterator AfterPHIsIt) { @@ -186,7 +216,7 @@ void llvm::PHIElimination::LowerAtomicPHINode( } // Record PHI def. - assert(!hasPHIDef(DestReg) && "Vreg has multiple phi-defs?"); + assert(!hasPHIDef(DestReg) && "Vreg has multiple phi-defs?"); PHIDefs[DestReg] = &MBB; // Update live variable information if there is any. @@ -250,92 +280,35 @@ void llvm::PHIElimination::LowerAtomicPHINode( // basic block. if (!MBBsInsertedInto.insert(&opBlock)) continue; // If the copy has already been emitted, we're done. - + // Find a safe location to insert the copy, this may be the first terminator // in the block (or end()). - MachineBasicBlock::iterator InsertPos = FindCopyInsertPoint(opBlock, SrcReg); + MachineBasicBlock::iterator InsertPos = + FindCopyInsertPoint(opBlock, MBB, SrcReg); // Insert the copy. TII->copyRegToReg(opBlock, InsertPos, IncomingReg, SrcReg, RC, RC); // Now update live variable information if we have it. Otherwise we're done if (!LV) continue; - + // We want to be able to insert a kill of the register if this PHI (aka, the // copy we just inserted) is the last use of the source value. Live // variable analysis conservatively handles this by saying that the value is // live until the end of the block the PHI entry lives in. If the value // really is dead at the PHI copy, there will be no successor blocks which // have the value live-in. - // - // Check to see if the copy is the last use, and if so, update the live - // variables information so that it knows the copy source instruction kills - // the incoming value. - LiveVariables::VarInfo &InRegVI = LV->getVarInfo(SrcReg); - - // Loop over all of the successors of the basic block, checking to see if - // the value is either live in the block, or if it is killed in the block. + // Also check to see if this register is in use by another PHI node which // has not yet been eliminated. If so, it will be killed at an appropriate // point later. // Is it used by any PHI instructions in this block? - bool ValueIsLive = VRegPHIUseCount[BBVRegPair(&opBlock, SrcReg)] != 0; - - std::vector<MachineBasicBlock*> OpSuccBlocks; - - // Otherwise, scan successors, including the BB the PHI node lives in. - for (MachineBasicBlock::succ_iterator SI = opBlock.succ_begin(), - E = opBlock.succ_end(); SI != E && !ValueIsLive; ++SI) { - MachineBasicBlock *SuccMBB = *SI; - - // Is it alive in this successor? - unsigned SuccIdx = SuccMBB->getNumber(); - if (InRegVI.AliveBlocks.test(SuccIdx)) { - ValueIsLive = true; - break; - } - - OpSuccBlocks.push_back(SuccMBB); - } - - // Check to see if this value is live because there is a use in a successor - // that kills it. - if (!ValueIsLive) { - switch (OpSuccBlocks.size()) { - case 1: { - MachineBasicBlock *MBB = OpSuccBlocks[0]; - for (unsigned i = 0, e = InRegVI.Kills.size(); i != e; ++i) - if (InRegVI.Kills[i]->getParent() == MBB) { - ValueIsLive = true; - break; - } - break; - } - case 2: { - MachineBasicBlock *MBB1 = OpSuccBlocks[0], *MBB2 = OpSuccBlocks[1]; - for (unsigned i = 0, e = InRegVI.Kills.size(); i != e; ++i) - if (InRegVI.Kills[i]->getParent() == MBB1 || - InRegVI.Kills[i]->getParent() == MBB2) { - ValueIsLive = true; - break; - } - break; - } - default: - std::sort(OpSuccBlocks.begin(), OpSuccBlocks.end()); - for (unsigned i = 0, e = InRegVI.Kills.size(); i != e; ++i) - if (std::binary_search(OpSuccBlocks.begin(), OpSuccBlocks.end(), - InRegVI.Kills[i]->getParent())) { - ValueIsLive = true; - break; - } - } - } + bool ValueIsUsed = VRegPHIUseCount[BBVRegPair(&opBlock, SrcReg)] != 0; // Okay, if we now know that the value is not live out of the block, we can // add a kill marker in this block saying that it kills the incoming value! - if (!ValueIsLive) { + if (!ValueIsUsed && !isLiveOut(SrcReg, opBlock, *LV)) { // In our final twist, we have to decide which instruction kills the // register. In most cases this is the copy, however, the first // terminator instruction at the end of the block may also use the value. @@ -346,7 +319,7 @@ void llvm::PHIElimination::LowerAtomicPHINode( if (Term != opBlock.end()) { if (Term->readsRegister(SrcReg)) KillInst = Term; - + // Check that no other terminators use values. #ifndef NDEBUG for (MachineBasicBlock::iterator TI = next(Term); TI != opBlock.end(); @@ -357,16 +330,16 @@ void llvm::PHIElimination::LowerAtomicPHINode( } #endif } - + // Finally, mark it killed. LV->addVirtualRegisterKilled(SrcReg, KillInst); // This vreg no longer lives all of the way through opBlock. unsigned opBlockNum = opBlock.getNumber(); - InRegVI.AliveBlocks.reset(opBlockNum); + LV->getVarInfo(SrcReg).AliveBlocks.reset(opBlockNum); } } - + // Really delete the PHI instruction now! MF.DeleteMachineInstr(MPhi); ++NumAtomic; @@ -386,3 +359,134 @@ void llvm::PHIElimination::analyzePHINodes(const MachineFunction& Fn) { ++VRegPHIUseCount[BBVRegPair(BBI->getOperand(i + 1).getMBB(), BBI->getOperand(i).getReg())]; } + +bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF, + MachineBasicBlock &MBB) { + if (MBB.empty() || MBB.front().getOpcode() != TargetInstrInfo::PHI) + return false; // Quick exit for basic blocks without PHIs. + LiveVariables &LV = getAnalysis<LiveVariables>(); + for (MachineBasicBlock::const_iterator BBI = MBB.begin(), BBE = MBB.end(); + BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI) { + for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) { + unsigned Reg = BBI->getOperand(i).getReg(); + MachineBasicBlock *PreMBB = BBI->getOperand(i+1).getMBB(); + // We break edges when registers are live out from the predecessor block + // (not considering PHI nodes). If the register is live in to this block + // anyway, we would gain nothing from splitting. + if (isLiveOut(Reg, *PreMBB, LV) && !isLiveIn(Reg, MBB, LV)) + SplitCriticalEdge(PreMBB, &MBB); + } + } + return true; +} + +bool llvm::PHIElimination::isLiveOut(unsigned Reg, const MachineBasicBlock &MBB, + LiveVariables &LV) { + LiveVariables::VarInfo &VI = LV.getVarInfo(Reg); + + // Loop over all of the successors of the basic block, checking to see if + // the value is either live in the block, or if it is killed in the block. + std::vector<MachineBasicBlock*> OpSuccBlocks; + for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(), + E = MBB.succ_end(); SI != E; ++SI) { + MachineBasicBlock *SuccMBB = *SI; + + // Is it alive in this successor? + unsigned SuccIdx = SuccMBB->getNumber(); + if (VI.AliveBlocks.test(SuccIdx)) + return true; + OpSuccBlocks.push_back(SuccMBB); + } + + // Check to see if this value is live because there is a use in a successor + // that kills it. + switch (OpSuccBlocks.size()) { + case 1: { + MachineBasicBlock *SuccMBB = OpSuccBlocks[0]; + for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i) + if (VI.Kills[i]->getParent() == SuccMBB) + return true; + break; + } + case 2: { + MachineBasicBlock *SuccMBB1 = OpSuccBlocks[0], *SuccMBB2 = OpSuccBlocks[1]; + for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i) + if (VI.Kills[i]->getParent() == SuccMBB1 || + VI.Kills[i]->getParent() == SuccMBB2) + return true; + break; + } + default: + std::sort(OpSuccBlocks.begin(), OpSuccBlocks.end()); + for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i) + if (std::binary_search(OpSuccBlocks.begin(), OpSuccBlocks.end(), + VI.Kills[i]->getParent())) + return true; + } + return false; +} + +bool llvm::PHIElimination::isLiveIn(unsigned Reg, const MachineBasicBlock &MBB, + LiveVariables &LV) { + LiveVariables::VarInfo &VI = LV.getVarInfo(Reg); + + if (VI.AliveBlocks.test(MBB.getNumber())) + return true; + + // defined in MBB? + const MachineInstr *Def = MRI->getVRegDef(Reg); + if (Def && Def->getParent() == &MBB) + return false; + + // killed in MBB? + return VI.findKill(&MBB); +} + +MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A, + MachineBasicBlock *B) { + assert(A && B && "Missing MBB end point"); + + MachineFunction *MF = A->getParent(); + + // We may need to update A's terminator, but we can't do that if AnalyzeBranch + // fails. If A uses a jump table, we won't touch it. + const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + if (TII->AnalyzeBranch(*A, TBB, FBB, Cond)) + return NULL; + + ++NumSplits; + + MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock(); + MF->push_back(NMBB); + DEBUG(errs() << "PHIElimination splitting critical edge:" + " BB#" << A->getNumber() + << " -- BB#" << NMBB->getNumber() + << " -- BB#" << B->getNumber() << '\n'); + + A->ReplaceUsesOfBlockWith(B, NMBB); + // If A may fall through to B, we may have to insert a branch. + if (A->isLayoutSuccessor(B)) + A->updateTerminator(); + + // Insert unconditional "jump B" instruction in NMBB. + NMBB->addSuccessor(B); + Cond.clear(); + MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, B, NULL, Cond); + + // Fix PHI nodes in B so they refer to NMBB instead of A + for (MachineBasicBlock::iterator i = B->begin(), e = B->end(); + i != e && i->getOpcode() == TargetInstrInfo::PHI; ++i) + for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2) + if (i->getOperand(ni+1).getMBB() == A) + i->getOperand(ni+1).setMBB(NMBB); + + if (LiveVariables *LV=getAnalysisIfAvailable<LiveVariables>()) + LV->addNewBlock(NMBB, A); + + if (MachineDominatorTree *MDT=getAnalysisIfAvailable<MachineDominatorTree>()) + MDT->addNewBlock(NMBB, A); + + return NMBB; +} diff --git a/lib/CodeGen/PHIElimination.h b/lib/CodeGen/PHIElimination.h index 3d02dfd..94716ee 100644 --- a/lib/CodeGen/PHIElimination.h +++ b/lib/CodeGen/PHIElimination.h @@ -89,11 +89,33 @@ namespace llvm { /// void analyzePHINodes(const MachineFunction& Fn); - // FindCopyInsertPoint - Find a safe place in MBB to insert a copy from - // SrcReg. This needs to be after any def or uses of SrcReg, but before - // any subsequent point where control flow might jump out of the basic - // block. + /// Split critical edges where necessary for good coalescer performance. + bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB); + + /// isLiveOut - Determine if Reg is live out from MBB, when not + /// considering PHI nodes. This means that Reg is either killed by + /// a successor block or passed through one. + bool isLiveOut(unsigned Reg, const MachineBasicBlock &MBB, + LiveVariables &LV); + + /// isLiveIn - Determine if Reg is live in to MBB, not considering PHI + /// source registers. This means that Reg is either killed by MBB or passes + /// through it. + bool isLiveIn(unsigned Reg, const MachineBasicBlock &MBB, + LiveVariables &LV); + + /// SplitCriticalEdge - Split a critical edge from A to B by + /// inserting a new MBB. Update branches in A and PHI instructions + /// in B. Return the new block. + MachineBasicBlock *SplitCriticalEdge(MachineBasicBlock *A, + MachineBasicBlock *B); + + /// FindCopyInsertPoint - Find a safe place in MBB to insert a copy from + /// SrcReg when following the CFG edge to SuccMBB. This needs to be after + /// any def of SrcReg, but before any subsequent point where control flow + /// might jump out of the basic block. MachineBasicBlock::iterator FindCopyInsertPoint(MachineBasicBlock &MBB, + MachineBasicBlock &SuccMBB, unsigned SrcReg); // SkipPHIsAndLabels - Copies need to be inserted after phi nodes and diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index 3ed61a2..5f1f1f3 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -216,13 +216,14 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { // Check for explicit enable/disable of post-ra scheduling. TargetSubtarget::AntiDepBreakMode AntiDepMode = TargetSubtarget::ANTIDEP_NONE; + SmallVector<TargetRegisterClass*, 4> CriticalPathRCs; if (EnablePostRAScheduler.getPosition() > 0) { if (!EnablePostRAScheduler) return false; } else { // Check that post-RA scheduling is enabled for this target. const TargetSubtarget &ST = Fn.getTarget().getSubtarget<TargetSubtarget>(); - if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode)) + if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode, CriticalPathRCs)) return false; } @@ -243,7 +244,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { (ScheduleHazardRecognizer *)new SimpleHazardRecognizer(); AntiDepBreaker *ADB = ((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ? - (AntiDepBreaker *)new AggressiveAntiDepBreaker(Fn) : + (AntiDepBreaker *)new AggressiveAntiDepBreaker(Fn, CriticalPathRCs) : ((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ? (AntiDepBreaker *)new CriticalAntiDepBreaker(Fn) : NULL)); @@ -602,7 +603,9 @@ void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge, void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU, bool IgnoreAntiDep) { for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { - if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue; + if (IgnoreAntiDep && + ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output))) + continue; ReleaseSucc(SU, &*I, IgnoreAntiDep); } } @@ -657,7 +660,7 @@ void SchedulePostRATDList::ListScheduleTopDown( available = true; for (SUnit::const_pred_iterator I = SUnits[i].Preds.begin(), E = SUnits[i].Preds.end(); I != E; ++I) { - if (I->getKind() != SDep::Anti) { + if ((I->getKind() != SDep::Anti) && (I->getKind() != SDep::Output)) { available = false; } else { SUnits[i].NumPredsLeft -= 1; @@ -736,7 +739,9 @@ void SchedulePostRATDList::ListScheduleTopDown( AntiDepBreaker::AntiDepRegVector AntiDepRegs; for (SUnit::const_pred_iterator I = FoundSUnit->Preds.begin(), E = FoundSUnit->Preds.end(); I != E; ++I) { - if ((I->getKind() == SDep::Anti) && !I->getSUnit()->isScheduled) + if (((I->getKind() == SDep::Anti) || + (I->getKind() == SDep::Output)) && + !I->getSUnit()->isScheduled) AntiDepRegs.push_back(I->getReg()); } diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp index cce5ae8..8f62345 100644 --- a/lib/CodeGen/PreAllocSplitting.cpp +++ b/lib/CodeGen/PreAllocSplitting.cpp @@ -39,8 +39,10 @@ using namespace llvm; static cl::opt<int> PreSplitLimit("pre-split-limit", cl::init(-1), cl::Hidden); -static cl::opt<int> DeadSplitLimit("dead-split-limit", cl::init(-1), cl::Hidden); -static cl::opt<int> RestoreFoldLimit("restore-fold-limit", cl::init(-1), cl::Hidden); +static cl::opt<int> DeadSplitLimit("dead-split-limit", cl::init(-1), + cl::Hidden); +static cl::opt<int> RestoreFoldLimit("restore-fold-limit", cl::init(-1), + cl::Hidden); STATISTIC(NumSplits, "Number of intervals split"); STATISTIC(NumRemats, "Number of intervals split by rematerialization"); @@ -131,17 +133,14 @@ namespace { private: - MachineBasicBlock::iterator - findNextEmptySlot(MachineBasicBlock*, MachineInstr*, - SlotIndex&); MachineBasicBlock::iterator findSpillPoint(MachineBasicBlock*, MachineInstr*, MachineInstr*, - SmallPtrSet<MachineInstr*, 4>&, SlotIndex&); + SmallPtrSet<MachineInstr*, 4>&); MachineBasicBlock::iterator findRestorePoint(MachineBasicBlock*, MachineInstr*, SlotIndex, - SmallPtrSet<MachineInstr*, 4>&, SlotIndex&); + SmallPtrSet<MachineInstr*, 4>&); int CreateSpillStackSlot(unsigned, const TargetRegisterClass *); @@ -161,7 +160,6 @@ namespace { bool Rematerialize(unsigned vreg, VNInfo* ValNo, MachineInstr* DefMI, MachineBasicBlock::iterator RestorePt, - SlotIndex RestoreIdx, SmallPtrSet<MachineInstr*, 4>& RefsInMBB); MachineInstr* FoldSpill(unsigned vreg, const TargetRegisterClass* RC, MachineInstr* DefMI, @@ -208,24 +206,6 @@ X("pre-alloc-splitting", "Pre-Register Allocation Live Interval Splitting"); const PassInfo *const llvm::PreAllocSplittingID = &X; - -/// findNextEmptySlot - Find a gap after the given machine instruction in the -/// instruction index map. If there isn't one, return end(). -MachineBasicBlock::iterator -PreAllocSplitting::findNextEmptySlot(MachineBasicBlock *MBB, MachineInstr *MI, - SlotIndex &SpotIndex) { - MachineBasicBlock::iterator MII = MI; - if (++MII != MBB->end()) { - SlotIndex Index = - LIs->findGapBeforeInstr(LIs->getInstructionIndex(MII)); - if (Index != SlotIndex()) { - SpotIndex = Index; - return MII; - } - } - return MBB->end(); -} - /// findSpillPoint - Find a gap as far away from the given MI that's suitable /// for spilling the current live interval. The index must be before any /// defs and uses of the live interval register in the mbb. Return begin() if @@ -233,8 +213,7 @@ PreAllocSplitting::findNextEmptySlot(MachineBasicBlock *MBB, MachineInstr *MI, MachineBasicBlock::iterator PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI, MachineInstr *DefMI, - SmallPtrSet<MachineInstr*, 4> &RefsInMBB, - SlotIndex &SpillIndex) { + SmallPtrSet<MachineInstr*, 4> &RefsInMBB) { MachineBasicBlock::iterator Pt = MBB->begin(); MachineBasicBlock::iterator MII = MI; @@ -247,8 +226,6 @@ PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI, if (MII == EndPt || RefsInMBB.count(MII)) return Pt; while (MII != EndPt && !RefsInMBB.count(MII)) { - SlotIndex Index = LIs->getInstructionIndex(MII); - // We can't insert the spill between the barrier (a call), and its // corresponding call frame setup. if (MII->getOpcode() == TRI->getCallFrameDestroyOpcode()) { @@ -259,9 +236,8 @@ PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI, } } continue; - } else if (LIs->hasGapBeforeInstr(Index)) { + } else { Pt = MII; - SpillIndex = LIs->findGapBeforeInstr(Index, true); } if (RefsInMBB.count(MII)) @@ -281,8 +257,7 @@ PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI, MachineBasicBlock::iterator PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI, SlotIndex LastIdx, - SmallPtrSet<MachineInstr*, 4> &RefsInMBB, - SlotIndex &RestoreIndex) { + SmallPtrSet<MachineInstr*, 4> &RefsInMBB) { // FIXME: Allow spill to be inserted to the beginning of the mbb. Update mbb // begin index accordingly. MachineBasicBlock::iterator Pt = MBB->end(); @@ -306,7 +281,6 @@ PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI, SlotIndex Index = LIs->getInstructionIndex(MII); if (Index > LastIdx) break; - SlotIndex Gap = LIs->findGapBeforeInstr(Index); // We can't insert a restore between the barrier (a call) and its // corresponding call frame teardown. @@ -315,9 +289,8 @@ PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI, if (MII == EndPt || RefsInMBB.count(MII)) return Pt; ++MII; } while (MII->getOpcode() != TRI->getCallFrameDestroyOpcode()); - } else if (Gap != SlotIndex()) { + } else { Pt = MII; - RestoreIndex = Gap; } if (RefsInMBB.count(MII)) @@ -339,7 +312,7 @@ int PreAllocSplitting::CreateSpillStackSlot(unsigned Reg, if (I != IntervalSSMap.end()) { SS = I->second; } else { - SS = MFI->CreateStackObject(RC->getSize(), RC->getAlignment()); + SS = MFI->CreateSpillStackObject(RC->getSize(), RC->getAlignment()); IntervalSSMap[Reg] = SS; } @@ -364,10 +337,10 @@ PreAllocSplitting::IsAvailableInStack(MachineBasicBlock *DefMBB, if (!DefMBB) return false; - DenseMap<unsigned, int>::iterator I = IntervalSSMap.find(Reg); + DenseMap<unsigned, int>::const_iterator I = IntervalSSMap.find(Reg); if (I == IntervalSSMap.end()) return false; - DenseMap<SlotIndex, SlotIndex>::iterator + DenseMap<SlotIndex, SlotIndex>::const_iterator II = Def2SpillMap.find(DefIndex); if (II == Def2SpillMap.end()) return false; @@ -740,7 +713,7 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) { DefIdx = DefIdx.getDefIndex(); assert(DI->getOpcode() != TargetInstrInfo::PHI && - "Following NewVN isPHIDef flag incorrect. Fix me!"); + "PHI instr in code during pre-alloc splitting."); VNInfo* NewVN = LI->getNextValue(DefIdx, 0, true, Alloc); // If the def is a move, set the copy field. @@ -896,25 +869,22 @@ void PreAllocSplitting::RenumberValno(VNInfo* VN) { bool PreAllocSplitting::Rematerialize(unsigned VReg, VNInfo* ValNo, MachineInstr* DefMI, MachineBasicBlock::iterator RestorePt, - SlotIndex RestoreIdx, SmallPtrSet<MachineInstr*, 4>& RefsInMBB) { MachineBasicBlock& MBB = *RestorePt->getParent(); MachineBasicBlock::iterator KillPt = BarrierMBB->end(); - SlotIndex KillIdx; if (!ValNo->isDefAccurate() || DefMI->getParent() == BarrierMBB) - KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB, KillIdx); + KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB); else - KillPt = findNextEmptySlot(DefMI->getParent(), DefMI, KillIdx); + KillPt = next(MachineBasicBlock::iterator(DefMI)); if (KillPt == DefMI->getParent()->end()) return false; - TII->reMaterialize(MBB, RestorePt, VReg, 0, DefMI); - LIs->InsertMachineInstrInMaps(prior(RestorePt), RestoreIdx); + TII->reMaterialize(MBB, RestorePt, VReg, 0, DefMI, TRI); + SlotIndex RematIdx = LIs->InsertMachineInstrInMaps(prior(RestorePt)); ReconstructLiveInterval(CurrLI); - SlotIndex RematIdx = LIs->getInstructionIndex(prior(RestorePt)); RematIdx = RematIdx.getDefIndex(); RenumberValno(CurrLI->findDefinedVNInfoForRegInt(RematIdx)); @@ -955,7 +925,7 @@ MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg, if (I != IntervalSSMap.end()) { SS = I->second; } else { - SS = MFI->CreateStackObject(RC->getSize(), RC->getAlignment()); + SS = MFI->CreateSpillStackObject(RC->getSize(), RC->getAlignment()); } MachineInstr* FMI = TII->foldMemoryOperand(*MBB->getParent(), @@ -1086,17 +1056,15 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { } // Find a point to restore the value after the barrier. - SlotIndex RestoreIndex; MachineBasicBlock::iterator RestorePt = - findRestorePoint(BarrierMBB, Barrier, LR->end, RefsInMBB, RestoreIndex); + findRestorePoint(BarrierMBB, Barrier, LR->end, RefsInMBB); if (RestorePt == BarrierMBB->end()) { DEBUG(errs() << "FAILED (could not find a suitable restore point).\n"); return false; } if (DefMI && LIs->isReMaterializable(*LI, ValNo, DefMI)) - if (Rematerialize(LI->reg, ValNo, DefMI, RestorePt, - RestoreIndex, RefsInMBB)) { + if (Rematerialize(LI->reg, ValNo, DefMI, RestorePt, RefsInMBB)) { DEBUG(errs() << "success (remat).\n"); return true; } @@ -1114,7 +1082,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { SpillIndex = LIs->getInstructionIndex(SpillMI); } else { MachineBasicBlock::iterator SpillPt = - findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB, SpillIndex); + findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB); if (SpillPt == BarrierMBB->begin()) { DEBUG(errs() << "FAILED (could not find a suitable spill point).\n"); return false; // No gap to insert spill. @@ -1124,10 +1092,10 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { SS = CreateSpillStackSlot(CurrLI->reg, RC); TII->storeRegToStackSlot(*BarrierMBB, SpillPt, CurrLI->reg, true, SS, RC); SpillMI = prior(SpillPt); - LIs->InsertMachineInstrInMaps(SpillMI, SpillIndex); + SpillIndex = LIs->InsertMachineInstrInMaps(SpillMI); } } else if (!IsAvailableInStack(DefMBB, CurrLI->reg, ValNo->def, - RestoreIndex, SpillIndex, SS)) { + LIs->getZeroIndex(), SpillIndex, SS)) { // If it's already split, just restore the value. There is no need to spill // the def again. if (!DefMI) { @@ -1144,13 +1112,13 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { if (DefMBB == BarrierMBB) { // Add spill after the def and the last use before the barrier. SpillPt = findSpillPoint(BarrierMBB, Barrier, DefMI, - RefsInMBB, SpillIndex); + RefsInMBB); if (SpillPt == DefMBB->begin()) { DEBUG(errs() << "FAILED (could not find a suitable spill point).\n"); return false; // No gap to insert spill. } } else { - SpillPt = findNextEmptySlot(DefMBB, DefMI, SpillIndex); + SpillPt = next(MachineBasicBlock::iterator(DefMI)); if (SpillPt == DefMBB->end()) { DEBUG(errs() << "FAILED (could not find a suitable spill point).\n"); return false; // No gap to insert spill. @@ -1160,7 +1128,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { SS = CreateSpillStackSlot(CurrLI->reg, RC); TII->storeRegToStackSlot(*DefMBB, SpillPt, CurrLI->reg, false, SS, RC); SpillMI = prior(SpillPt); - LIs->InsertMachineInstrInMaps(SpillMI, SpillIndex); + SpillIndex = LIs->InsertMachineInstrInMaps(SpillMI); } } @@ -1170,6 +1138,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { // Add restore. bool FoldedRestore = false; + SlotIndex RestoreIndex; if (MachineInstr* LMI = FoldRestore(CurrLI->reg, RC, Barrier, BarrierMBB, SS, RefsInMBB)) { RestorePt = LMI; @@ -1178,7 +1147,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { } else { TII->loadRegFromStackSlot(*BarrierMBB, RestorePt, CurrLI->reg, SS, RC); MachineInstr *LoadMI = prior(RestorePt); - LIs->InsertMachineInstrInMaps(LoadMI, RestoreIndex); + RestoreIndex = LIs->InsertMachineInstrInMaps(LoadMI); } // Update spill stack slot live interval. @@ -1398,7 +1367,7 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) { // Otherwise, this is a load-store case, so DCE them. for (SmallPtrSet<MachineInstr*, 4>::iterator UI = VNUseCount[CurrVN].begin(), UE = VNUseCount[CurrVN].end(); - UI != UI; ++UI) { + UI != UE; ++UI) { LIs->RemoveMachineInstrFromMaps(*UI); (*UI)->eraseFromParent(); } diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index 48567a0..455964b 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -77,6 +77,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { SmallVector<MachineInstr*, 8> ImpDefMIs; MachineBasicBlock *Entry = fn.begin(); SmallPtrSet<MachineBasicBlock*,16> Visited; + SmallPtrSet<MachineInstr*, 8> ModInsts; for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> > DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited); @@ -201,6 +202,8 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { MachineOperand &RMO = UI.getOperand(); MachineInstr *RMI = &*UI; ++UI; + if (ModInsts.count(RMI)) + continue; MachineBasicBlock *RMBB = RMI->getParent(); if (RMBB == MBB) continue; @@ -209,9 +212,14 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; if (tii_->isMoveInstr(*RMI, SrcReg, DstReg, SrcSubReg, DstSubReg) && Reg == SrcReg) { + if (RMO.isKill()) { + LiveVariables::VarInfo& vi = lv_->getVarInfo(Reg); + vi.removeKill(RMI); + } RMI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF)); for (int j = RMI->getNumOperands() - 1, ee = 0; j > ee; --j) RMI->RemoveOperand(j); + ModInsts.insert(RMI); continue; } @@ -222,6 +230,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { RMO.setIsKill(); } } + ModInsts.clear(); ImpDefRegs.clear(); ImpDefMIs.clear(); } diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 230a20c..8905f75 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -264,7 +264,8 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; } else { // Spill it to the stack where we must. - FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset); + FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, + true, false); } I->setFrameIdx(FrameIdx); diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp index 5507646..7fb3e6e 100644 --- a/lib/CodeGen/PseudoSourceValue.cpp +++ b/lib/CodeGen/PseudoSourceValue.cpp @@ -43,35 +43,14 @@ static const char *const PSVNames[] = { // Eventually these should be uniqued on LLVMContext rather than in a managed // static. For now, we can safely use the global context for the time being to // squeak by. -PseudoSourceValue::PseudoSourceValue() : +PseudoSourceValue::PseudoSourceValue(enum ValueTy Subclass) : Value(Type::getInt8PtrTy(getGlobalContext()), - PseudoSourceValueVal) {} + Subclass) {} void PseudoSourceValue::printCustom(raw_ostream &O) const { O << PSVNames[this - *PSVs]; } -namespace { - /// FixedStackPseudoSourceValue - A specialized PseudoSourceValue - /// for holding FixedStack values, which must include a frame - /// index. - class FixedStackPseudoSourceValue : public PseudoSourceValue { - const int FI; - public: - explicit FixedStackPseudoSourceValue(int fi) : FI(fi) {} - - virtual bool isConstant(const MachineFrameInfo *MFI) const; - - virtual bool isAliased(const MachineFrameInfo *MFI) const; - - virtual bool mayAlias(const MachineFrameInfo *) const; - - virtual void printCustom(raw_ostream &OS) const { - OS << "FixedStack" << FI; - } - }; -} - static ManagedStatic<std::map<int, const PseudoSourceValue *> > FSValues; const PseudoSourceValue *PseudoSourceValue::getFixedStack(int FI) { @@ -130,3 +109,7 @@ bool FixedStackPseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const { // Spill slots will not alias any LLVM IR value. return !MFI->isSpillSlotObjectIndex(FI); } + +void FixedStackPseudoSourceValue::printCustom(raw_ostream &OS) const { + OS << "FixedStack" << FI; +} diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp index 1957c16..7bb020a 100644 --- a/lib/CodeGen/RegAllocLocal.cpp +++ b/lib/CodeGen/RegAllocLocal.cpp @@ -261,8 +261,8 @@ int RALocal::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) { return SS; // Already has space allocated? // Allocate a new stack object for this spill location... - int FrameIdx = MF->getFrameInfo()->CreateStackObject(RC->getSize(), - RC->getAlignment(),true); + int FrameIdx = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(), + RC->getAlignment()); // Assign the slot... StackSlotForVirtReg[VirtReg] = FrameIdx; diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 5757e47..c677d34 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -693,6 +693,11 @@ void PBQPRegAlloc::addStackInterval(const LiveInterval *spilled, } bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) { + + // Assert that this is a valid solution to the regalloc problem. + assert(solution.getCost() != std::numeric_limits<PBQP::PBQPNum>::infinity() && + "Invalid (infinite cost) solution for PBQP problem."); + // Set to true if we have any spills bool anotherRoundNeeded = false; diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index cf90aba..94680ed 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -100,11 +100,8 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { CalleeSavedRegs.set(CSRegs[i]); } - // RS used within emit{Pro,Epi}logue() - if (mbb != MBB) { - MBB = mbb; - initRegState(); - } + MBB = mbb; + initRegState(); Tracking = false; } diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 1363a92..6b27db2 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -214,7 +214,10 @@ void SUnit::ComputeDepth(bool IgnoreAntiDep) { unsigned MaxPredDepth = 0; for (SUnit::const_pred_iterator I = Cur->Preds.begin(), E = Cur->Preds.end(); I != E; ++I) { - if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue; + if (IgnoreAntiDep && + ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output))) + continue; + SUnit *PredSU = I->getSUnit(); if (PredSU->isDepthCurrent) MaxPredDepth = std::max(MaxPredDepth, @@ -248,7 +251,10 @@ void SUnit::ComputeHeight(bool IgnoreAntiDep) { unsigned MaxSuccHeight = 0; for (SUnit::const_succ_iterator I = Cur->Succs.begin(), E = Cur->Succs.end(); I != E; ++I) { - if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue; + if (IgnoreAntiDep && + ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output))) + continue; + SUnit *SuccSU = I->getSUnit(); if (SuccSU->isHeightCurrent) MaxSuccHeight = std::max(MaxSuccHeight, diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index f8b219d..56dd533 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -112,12 +112,13 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI, V = getUnderlyingObject(V); if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) { - MayAlias = PSV->mayAlias(MFI); // For now, ignore PseudoSourceValues which may alias LLVM IR values // because the code that uses this function has no way to cope with // such aliases. if (PSV->isAliased(MFI)) return 0; + + MayAlias = PSV->mayAlias(MFI); return V; } @@ -127,23 +128,6 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI, return 0; } -static bool mayUnderlyingObjectForInstrAlias(const MachineInstr *MI, - const MachineFrameInfo *MFI) { - if (!MI->hasOneMemOperand() || - !(*MI->memoperands_begin())->getValue() || - (*MI->memoperands_begin())->isVolatile()) - return true; - - const Value *V = (*MI->memoperands_begin())->getValue(); - if (!V) - return true; - - V = getUnderlyingObject(V); - if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) - return PSV->mayAlias(MFI); - return true; -} - void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) { if (MachineLoop *ML = MLI.getLoopFor(BB)) if (BB == ML->getLoopLatch()) { @@ -163,16 +147,15 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { // We build scheduling units by walking a block's instruction list from bottom // to top. - // Remember where a generic side-effecting instruction is as we procede. If - // ChainMMO is null, this is assumed to have arbitrary side-effects. If - // ChainMMO is non-null, then Chain makes only a single memory reference. - SUnit *Chain = 0; - MachineMemOperand *ChainMMO = 0; + // Remember where a generic side-effecting instruction is as we procede. + SUnit *BarrierChain = 0, *AliasChain = 0; - // Memory references to specific known memory locations are tracked so that - // they can be given more precise dependencies. - std::map<const Value *, SUnit *> MemDefs; - std::map<const Value *, std::vector<SUnit *> > MemUses; + // Memory references to specific known memory locations are tracked + // so that they can be given more precise dependencies. We track + // separately the known memory locations that may alias and those + // that are known not to alias + std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs; + std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses; // Check to see if the scheduler cares about latencies. bool UnitLatencies = ForceUnitLatencies(); @@ -347,114 +330,132 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { // produce more precise dependence information. #define STORE_LOAD_LATENCY 1 unsigned TrueMemOrderLatency = 0; - if (TID.isCall() || TID.hasUnmodeledSideEffects()) { - new_chain: - // This is the conservative case. Add dependencies on all memory - // references. - if (Chain) - Chain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); - Chain = SU; + if (TID.isCall() || TID.hasUnmodeledSideEffects() || + (MI->hasVolatileMemoryRef() && + (!TID.mayLoad() || !MI->isInvariantLoad(AA)))) { + // Be conservative with these and add dependencies on all memory + // references, even those that are known to not alias. + for (std::map<const Value *, SUnit *>::iterator I = + NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) { + I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + } + for (std::map<const Value *, std::vector<SUnit *> >::iterator I = + NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) { + for (unsigned i = 0, e = I->second.size(); i != e; ++i) + I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); + } + NonAliasMemDefs.clear(); + NonAliasMemUses.clear(); + // Add SU to the barrier chain. + if (BarrierChain) + BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + BarrierChain = SU; + + // fall-through + new_alias_chain: + // Chain all possibly aliasing memory references though SU. + if (AliasChain) + AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + AliasChain = SU; for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); - PendingLoads.clear(); - for (std::map<const Value *, SUnit *>::iterator I = MemDefs.begin(), - E = MemDefs.end(); I != E; ++I) { + for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(), + E = AliasMemDefs.end(); I != E; ++I) { I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); - I->second = SU; } for (std::map<const Value *, std::vector<SUnit *> >::iterator I = - MemUses.begin(), E = MemUses.end(); I != E; ++I) { + AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) { for (unsigned i = 0, e = I->second.size(); i != e; ++i) I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); - I->second.clear(); - I->second.push_back(SU); } - // See if it is known to just have a single memory reference. - MachineInstr *ChainMI = Chain->getInstr(); - const TargetInstrDesc &ChainTID = ChainMI->getDesc(); - if (!ChainTID.isCall() && - !ChainTID.hasUnmodeledSideEffects() && - ChainMI->hasOneMemOperand() && - !(*ChainMI->memoperands_begin())->isVolatile() && - (*ChainMI->memoperands_begin())->getValue()) - // We know that the Chain accesses one specific memory location. - ChainMMO = *ChainMI->memoperands_begin(); - else - // Unknown memory accesses. Assume the worst. - ChainMMO = 0; + PendingLoads.clear(); + AliasMemDefs.clear(); + AliasMemUses.clear(); } else if (TID.mayStore()) { bool MayAlias = true; TrueMemOrderLatency = STORE_LOAD_LATENCY; if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { // A store to a specific PseudoSourceValue. Add precise dependencies. - // Handle the def in MemDefs, if there is one. - std::map<const Value *, SUnit *>::iterator I = MemDefs.find(V); - if (I != MemDefs.end()) { + // Record the def in MemDefs, first adding a dep if there is + // an existing def. + std::map<const Value *, SUnit *>::iterator I = + ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); + std::map<const Value *, SUnit *>::iterator IE = + ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); + if (I != IE) { I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0, /*isNormalMemory=*/true)); I->second = SU; } else { - MemDefs[V] = SU; + if (MayAlias) + AliasMemDefs[V] = SU; + else + NonAliasMemDefs[V] = SU; } // Handle the uses in MemUses, if there are any. std::map<const Value *, std::vector<SUnit *> >::iterator J = - MemUses.find(V); - if (J != MemUses.end()) { + ((MayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V)); + std::map<const Value *, std::vector<SUnit *> >::iterator JE = + ((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); + if (J != JE) { for (unsigned i = 0, e = J->second.size(); i != e; ++i) J->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency, /*Reg=*/0, /*isNormalMemory=*/true)); J->second.clear(); } if (MayAlias) { - // Add dependencies from all the PendingLoads, since without - // memoperands we must assume they alias anything. + // Add dependencies from all the PendingLoads, i.e. loads + // with no underlying object. for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); - // Add a general dependence too, if needed. - if (Chain) - Chain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + // Add dependence on alias chain, if needed. + if (AliasChain) + AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); } + // Add dependence on barrier chain, if needed. + if (BarrierChain) + BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); } else { // Treat all other stores conservatively. - goto new_chain; + goto new_alias_chain; } } else if (TID.mayLoad()) { bool MayAlias = true; TrueMemOrderLatency = 0; if (MI->isInvariantLoad(AA)) { // Invariant load, no chain dependencies needed! - } else if (const Value *V = - getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { - // A load from a specific PseudoSourceValue. Add precise dependencies. - std::map<const Value *, SUnit *>::iterator I = MemDefs.find(V); - if (I != MemDefs.end()) - I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0, - /*isNormalMemory=*/true)); - MemUses[V].push_back(SU); - - // Add a general dependence too, if needed. - if (Chain && (!ChainMMO || - (ChainMMO->isStore() || ChainMMO->isVolatile()))) - Chain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); - } else if (MI->hasVolatileMemoryRef()) { - // Treat volatile loads conservatively. Note that this includes - // cases where memoperand information is unavailable. - goto new_chain; } else { - // A "MayAlias" load. Depend on the general chain, as well as on - // all stores. In the absense of MachineMemOperand information, - // we can't even assume that the load doesn't alias well-behaved - // memory locations. - if (Chain) - Chain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); - for (std::map<const Value *, SUnit *>::iterator I = MemDefs.begin(), - E = MemDefs.end(); I != E; ++I) { - SUnit *DefSU = I->second; - if (mayUnderlyingObjectForInstrAlias(DefSU->getInstr(), MFI)) - DefSU->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + if (const Value *V = + getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { + // A load from a specific PseudoSourceValue. Add precise dependencies. + std::map<const Value *, SUnit *>::iterator I = + ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); + std::map<const Value *, SUnit *>::iterator IE = + ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); + if (I != IE) + I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0, + /*isNormalMemory=*/true)); + if (MayAlias) + AliasMemUses[V].push_back(SU); + else + NonAliasMemUses[V].push_back(SU); + } else { + // A load with no underlying object. Depend on all + // potentially aliasing stores. + for (std::map<const Value *, SUnit *>::iterator I = + AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) + I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + + PendingLoads.push_back(SU); + MayAlias = true; } - PendingLoads.push_back(SU); - } + + // Add dependencies on alias and barrier chains, if needed. + if (MayAlias && AliasChain) + AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + if (BarrierChain) + BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + } } } diff --git a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp index fbe40b6..38839c4 100644 --- a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp +++ b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp @@ -77,6 +77,21 @@ CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins, } } +/// CheckReturn - Analyze the return values of a function, returning true if +/// the return can be performed without sret-demotion, and false otherwise. +bool CCState::CheckReturn(const SmallVectorImpl<EVT> &OutTys, + const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags, + CCAssignFn Fn) { + // Determine which register each value should be copied into. + for (unsigned i = 0, e = OutTys.size(); i != e; ++i) { + EVT VT = OutTys[i]; + ISD::ArgFlagsTy ArgFlags = ArgsFlags[i]; + if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) + return false; + } + return true; +} + /// AnalyzeReturn - Analyze the returned values of a return, /// incorporating info about the result values into this state. void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 5f70cb8..06ffdd6 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -37,7 +37,6 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> -#include <set> using namespace llvm; STATISTIC(NodesCombined , "Number of dag nodes combined"); @@ -4443,14 +4442,13 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { SDValue Chain = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); - // never taken branch, fold to chain - if (N1C && N1C->isNullValue()) - return Chain; - // unconditional branch - if (N1C && N1C->getAPIntValue() == 1) - return DAG.getNode(ISD::BR, N->getDebugLoc(), MVT::Other, Chain, N2); + // If N is a constant we could fold this into a fallthrough or unconditional + // branch. However that doesn't happen very often in normal code, because + // Instcombine/SimplifyCFG should have handled the available opportunities. + // If we did this folding here, it would be necessary to update the + // MachineBasicBlock CFG, which is awkward. + // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal // on the target. if (N1.getOpcode() == ISD::SETCC && @@ -4517,22 +4515,18 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) { CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1)); SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3); + // If N is a constant we could fold this into a fallthrough or unconditional + // branch. However that doesn't happen very often in normal code, because + // Instcombine/SimplifyCFG should have handled the available opportunities. + // If we did this folding here, it would be necessary to update the + // MachineBasicBlock CFG, which is awkward. + // Use SimplifySetCC to simplify SETCC's. SDValue Simp = SimplifySetCC(TLI.getSetCCResultType(CondLHS.getValueType()), CondLHS, CondRHS, CC->get(), N->getDebugLoc(), false); if (Simp.getNode()) AddToWorkList(Simp.getNode()); - ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(Simp.getNode()); - - // fold br_cc true, dest -> br dest (unconditional branch) - if (SCCC && !SCCC->isNullValue()) - return DAG.getNode(ISD::BR, N->getDebugLoc(), MVT::Other, - N->getOperand(0), N->getOperand(4)); - // fold br_cc false, dest -> unconditional fall through - if (SCCC && SCCC->isNullValue()) - return N->getOperand(0); - // fold to a simpler setcc if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other, diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 8e955af..7dbc136 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -43,6 +43,7 @@ #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -324,82 +325,12 @@ bool FastISel::SelectCall(User *I) { unsigned IID = F->getIntrinsicID(); switch (IID) { default: break; - case Intrinsic::dbg_stoppoint: { - DbgStopPointInst *SPI = cast<DbgStopPointInst>(I); - if (isValidDebugInfoIntrinsic(*SPI, CodeGenOpt::None)) - setCurDebugLoc(ExtractDebugLocation(*SPI, MF.getDebugLocInfo())); + case Intrinsic::dbg_stoppoint: + case Intrinsic::dbg_region_start: + case Intrinsic::dbg_region_end: + case Intrinsic::dbg_func_start: + // FIXME - Remove this instructions once the dust settles. return true; - } - case Intrinsic::dbg_region_start: { - DbgRegionStartInst *RSI = cast<DbgRegionStartInst>(I); - if (isValidDebugInfoIntrinsic(*RSI, CodeGenOpt::None) && DW - && DW->ShouldEmitDwarfDebug()) { - unsigned ID = - DW->RecordRegionStart(RSI->getContext()); - const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL); - BuildMI(MBB, DL, II).addImm(ID); - } - return true; - } - case Intrinsic::dbg_region_end: { - DbgRegionEndInst *REI = cast<DbgRegionEndInst>(I); - if (isValidDebugInfoIntrinsic(*REI, CodeGenOpt::None) && DW - && DW->ShouldEmitDwarfDebug()) { - unsigned ID = 0; - DISubprogram Subprogram(REI->getContext()); - if (isInlinedFnEnd(*REI, MF.getFunction())) { - // This is end of an inlined function. - const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL); - ID = DW->RecordInlinedFnEnd(Subprogram); - if (ID) - // Returned ID is 0 if this is unbalanced "end of inlined - // scope". This could happen if optimizer eats dbg intrinsics - // or "beginning of inlined scope" is not recoginized due to - // missing location info. In such cases, ignore this region.end. - BuildMI(MBB, DL, II).addImm(ID); - } else { - const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL); - ID = DW->RecordRegionEnd(REI->getContext()); - BuildMI(MBB, DL, II).addImm(ID); - } - } - return true; - } - case Intrinsic::dbg_func_start: { - DbgFuncStartInst *FSI = cast<DbgFuncStartInst>(I); - if (!isValidDebugInfoIntrinsic(*FSI, CodeGenOpt::None) || !DW - || !DW->ShouldEmitDwarfDebug()) - return true; - - if (isInlinedFnStart(*FSI, MF.getFunction())) { - // This is a beginning of an inlined function. - - // If llvm.dbg.func.start is seen in a new block before any - // llvm.dbg.stoppoint intrinsic then the location info is unknown. - // FIXME : Why DebugLoc is reset at the beginning of each block ? - DebugLoc PrevLoc = DL; - if (PrevLoc.isUnknown()) - return true; - // Record the source line. - setCurDebugLoc(ExtractDebugLocation(*FSI, MF.getDebugLocInfo())); - - DebugLocTuple PrevLocTpl = MF.getDebugLocTuple(PrevLoc); - DISubprogram SP(FSI->getSubprogram()); - unsigned LabelID = - DW->RecordInlinedFnStart(SP,DICompileUnit(PrevLocTpl.Scope), - PrevLocTpl.Line, PrevLocTpl.Col); - const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL); - BuildMI(MBB, DL, II).addImm(LabelID); - return true; - } - - // This is a beginning of a new function. - MF.setDefaultDebugLoc(ExtractDebugLocation(*FSI, MF.getDebugLocInfo())); - - // llvm.dbg.func_start also defines beginning of function scope. - DW->RecordRegionStart(FSI->getSubprogram()); - return true; - } case Intrinsic::dbg_declare: { DbgDeclareInst *DI = cast<DbgDeclareInst>(I); if (!isValidDebugInfoIntrinsic(*DI, CodeGenOpt::None) || !DW @@ -416,11 +347,13 @@ bool FastISel::SelectCall(User *I) { StaticAllocaMap.find(AI); if (SI == StaticAllocaMap.end()) break; // VLAs. int FI = SI->second; - if (MMI) - MMI->setVariableDbgInfo(DI->getVariable(), FI); -#ifndef ATTACH_DEBUG_INFO_TO_AN_INSN - DW->RecordVariable(DI->getVariable(), FI); -#endif + if (MMI) { + MetadataContext &TheMetadata = + DI->getParent()->getContext().getMetadata(); + unsigned MDDbgKind = TheMetadata.getMDKind("dbg"); + MDNode *Dbg = TheMetadata.getMD(MDDbgKind, DI); + MMI->setVariableDbgInfo(DI->getVariable(), FI, Dbg); + } return true; } case Intrinsic::eh_exception: { diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index da311ed..52b0832 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -497,7 +497,7 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, assert(isNew && "Node emitted out of order - early"); } -/// EmitNode - Generate machine code for an node and needed dependencies. +/// EmitNode - Generate machine code for a node and needed dependencies. /// void InstrEmitter::EmitNode(SDNode *Node, bool IsClone, bool IsCloned, DenseMap<SDValue, unsigned> &VRBaseMap, diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h index bb4634d..91817e4 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -97,7 +97,7 @@ public: /// MachineInstr. static unsigned CountOperands(SDNode *Node); - /// EmitNode - Generate machine code for an node and needed dependencies. + /// EmitNode - Generate machine code for a node and needed dependencies. /// void EmitNode(SDNode *Node, bool IsClone, bool IsCloned, DenseMap<SDValue, unsigned> &VRBaseMap, diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index f389f7f..4f0a229 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -148,8 +148,11 @@ private: SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, RTLIB::Libcall Call_PPCF128); - SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I16, - RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64, + SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, + RTLIB::Libcall Call_I8, + RTLIB::Libcall Call_I16, + RTLIB::Libcall Call_I32, + RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128); SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl); @@ -1810,10 +1813,19 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { CV.push_back(const_cast<ConstantFP *>(V->getConstantFPValue())); } else if (ConstantSDNode *V = dyn_cast<ConstantSDNode>(Node->getOperand(i))) { - CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue())); + if (OpVT==EltVT) + CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue())); + else { + // If OpVT and EltVT don't match, EltVT is not legal and the + // element values have been promoted/truncated earlier. Undo this; + // we don't want a v16i8 to become a v16i32 for example. + const ConstantInt *CI = V->getConstantIntValue(); + CV.push_back(ConstantInt::get(EltVT.getTypeForEVT(*DAG.getContext()), + CI->getZExtValue())); + } } else { assert(Node->getOperand(i).getOpcode() == ISD::UNDEF); - const Type *OpNTy = OpVT.getTypeForEVT(*DAG.getContext()); + const Type *OpNTy = EltVT.getTypeForEVT(*DAG.getContext()); CV.push_back(UndefValue::get(OpNTy)); } } @@ -1909,6 +1921,7 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, } SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, + RTLIB::Libcall Call_I8, RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64, @@ -1916,9 +1929,10 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, RTLIB::Libcall LC; switch (Node->getValueType(0).getSimpleVT().SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); - case MVT::i16: LC = Call_I16; break; - case MVT::i32: LC = Call_I32; break; - case MVT::i64: LC = Call_I64; break; + case MVT::i8: LC = Call_I8; break; + case MVT::i16: LC = Call_I16; break; + case MVT::i32: LC = Call_I32; break; + case MVT::i64: LC = Call_I64; break; case MVT::i128: LC = Call_I128; break; } return ExpandLibCall(LC, Node, isSigned); @@ -2624,10 +2638,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3); Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1); } else if (isSigned) { - Tmp1 = ExpandIntLibCall(Node, true, RTLIB::SREM_I16, RTLIB::SREM_I32, + Tmp1 = ExpandIntLibCall(Node, true, + RTLIB::SREM_I8, + RTLIB::SREM_I16, RTLIB::SREM_I32, RTLIB::SREM_I64, RTLIB::SREM_I128); } else { - Tmp1 = ExpandIntLibCall(Node, false, RTLIB::UREM_I16, RTLIB::UREM_I32, + Tmp1 = ExpandIntLibCall(Node, false, + RTLIB::UREM_I8, + RTLIB::UREM_I16, RTLIB::UREM_I32, RTLIB::UREM_I64, RTLIB::UREM_I128); } Results.push_back(Tmp1); @@ -2643,10 +2661,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0), Node->getOperand(1)); else if (isSigned) - Tmp1 = ExpandIntLibCall(Node, true, RTLIB::SDIV_I16, RTLIB::SDIV_I32, + Tmp1 = ExpandIntLibCall(Node, true, + RTLIB::SDIV_I8, + RTLIB::SDIV_I16, RTLIB::SDIV_I32, RTLIB::SDIV_I64, RTLIB::SDIV_I128); else - Tmp1 = ExpandIntLibCall(Node, false, RTLIB::UDIV_I16, RTLIB::UDIV_I32, + Tmp1 = ExpandIntLibCall(Node, false, + RTLIB::UDIV_I8, + RTLIB::UDIV_I16, RTLIB::UDIV_I32, RTLIB::UDIV_I64, RTLIB::UDIV_I128); Results.push_back(Tmp1); break; @@ -2691,7 +2713,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Node->getOperand(1))); break; } - Tmp1 = ExpandIntLibCall(Node, false, RTLIB::MUL_I16, RTLIB::MUL_I32, + Tmp1 = ExpandIntLibCall(Node, false, + RTLIB::MUL_I8, + RTLIB::MUL_I16, RTLIB::MUL_I32, RTLIB::MUL_I64, RTLIB::MUL_I128); Results.push_back(Tmp1); break; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 98e7317..4530ffc 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1270,11 +1270,12 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, DebugLoc dl, return Val; FoldingSetNodeID ID; + SDValue Ops[] = { Val, DTy, STy, Rnd, Sat }; + AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), &Ops[0], 5); void* IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); CvtRndSatSDNode *N = NodeAllocator.Allocate<CvtRndSatSDNode>(); - SDValue Ops[] = { Val, DTy, STy, Rnd, Sat }; new (N) CvtRndSatSDNode(VT, dl, Ops, 5, Code); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); @@ -1378,7 +1379,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { unsigned StackAlign = std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), minAlign); - int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign); + int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false); return getFrameIndex(FrameIdx, TLI.getPointerTy()); } @@ -1394,7 +1395,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { TD->getPrefTypeAlignment(Ty2)); MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo(); - int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align); + int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align, false); return getFrameIndex(FrameIdx, TLI.getPointerTy()); } @@ -5814,9 +5815,8 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const { print_types(OS, G); - OS << " "; for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { - if (i) OS << ", "; + if (i) OS << ", "; else OS << " "; OS << (void*)getOperand(i).getNode(); if (unsigned RN = getOperand(i).getResNo()) OS << ":" << RN; @@ -5916,7 +5916,8 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, - unsigned MinSplatBits) { + unsigned MinSplatBits, + bool isBigEndian) { EVT VT = getValueType(0); assert(VT.isVector() && "Expected a vector type"); unsigned sz = VT.getSizeInBits(); @@ -5933,12 +5934,14 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, unsigned int nOps = getNumOperands(); assert(nOps > 0 && "isConstantSplat has 0-size build vector"); unsigned EltBitSize = VT.getVectorElementType().getSizeInBits(); - for (unsigned i = 0; i < nOps; ++i) { + + for (unsigned j = 0; j < nOps; ++j) { + unsigned i = isBigEndian ? nOps-1-j : j; SDValue OpVal = getOperand(i); - unsigned BitPos = i * EltBitSize; + unsigned BitPos = j * EltBitSize; if (OpVal.getOpcode() == ISD::UNDEF) - SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos +EltBitSize); + SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize); else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) SplatValue |= (APInt(CN->getAPIntValue()).zextOrTrunc(EltBitSize). zextOrTrunc(sz) << BitPos); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp index c0d2a4d..90fd95e 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp @@ -26,6 +26,7 @@ #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" #include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/GCStrategy.h" @@ -304,7 +305,7 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf, TySize *= CUI->getZExtValue(); // Get total allocated size. if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. StaticAllocaMap[AI] = - MF->getFrameInfo()->CreateStackObject(TySize, Align); + MF->getFrameInfo()->CreateStackObject(TySize, Align, false); } for (; BB != EB; ++BB) @@ -334,25 +335,6 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf, DebugLoc DL; for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - if (CallInst *CI = dyn_cast<CallInst>(I)) { - if (Function *F = CI->getCalledFunction()) { - switch (F->getIntrinsicID()) { - default: break; - case Intrinsic::dbg_stoppoint: { - DbgStopPointInst *SPI = cast<DbgStopPointInst>(I); - if (isValidDebugInfoIntrinsic(*SPI, CodeGenOpt::Default)) - DL = ExtractDebugLocation(*SPI, MF->getDebugLocInfo()); - break; - } - case Intrinsic::dbg_func_start: { - DbgFuncStartInst *FSI = cast<DbgFuncStartInst>(I); - if (isValidDebugInfoIntrinsic(*FSI, CodeGenOpt::Default)) - DL = ExtractDebugLocation(*FSI, MF->getDebugLocInfo()); - break; - } - } - } - } PN = dyn_cast<PHINode>(I); if (!PN || PN->use_empty()) continue; @@ -947,58 +929,143 @@ SDValue SelectionDAGLowering::getValue(const Value *V) { return RFV.getCopyFromRegs(DAG, getCurDebugLoc(), Chain, NULL); } +/// Get the EVTs and ArgFlags collections that represent the return type +/// of the given function. This does not require a DAG or a return value, and +/// is suitable for use before any DAGs for the function are constructed. +static void getReturnInfo(const Type* ReturnType, + Attributes attr, SmallVectorImpl<EVT> &OutVTs, + SmallVectorImpl<ISD::ArgFlagsTy> &OutFlags, + TargetLowering &TLI, + SmallVectorImpl<uint64_t> *Offsets = 0) { + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, ReturnType, ValueVTs, Offsets); + unsigned NumValues = ValueVTs.size(); + if ( NumValues == 0 ) return; + + for (unsigned j = 0, f = NumValues; j != f; ++j) { + EVT VT = ValueVTs[j]; + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + + if (attr & Attribute::SExt) + ExtendKind = ISD::SIGN_EXTEND; + else if (attr & Attribute::ZExt) + ExtendKind = ISD::ZERO_EXTEND; + + // FIXME: C calling convention requires the return type to be promoted to + // at least 32-bit. But this is not necessary for non-C calling + // conventions. The frontend should mark functions whose return values + // require promoting with signext or zeroext attributes. + if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { + EVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32); + if (VT.bitsLT(MinVT)) + VT = MinVT; + } + + unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT); + EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT); + // 'inreg' on function refers to return value + ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); + if (attr & Attribute::InReg) + Flags.setInReg(); + + // Propagate extension type if any + if (attr & Attribute::SExt) + Flags.setSExt(); + else if (attr & Attribute::ZExt) + Flags.setZExt(); + + for (unsigned i = 0; i < NumParts; ++i) { + OutVTs.push_back(PartVT); + OutFlags.push_back(Flags); + } + } +} void SelectionDAGLowering::visitRet(ReturnInst &I) { SDValue Chain = getControlRoot(); SmallVector<ISD::OutputArg, 8> Outs; - for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { + FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo(); + + if (!FLI.CanLowerReturn) { + unsigned DemoteReg = FLI.DemoteRegister; + const Function *F = I.getParent()->getParent(); + + // Emit a store of the return value through the virtual register. + // Leave Outs empty so that LowerReturn won't try to load return + // registers the usual way. + SmallVector<EVT, 1> PtrValueVTs; + ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()), + PtrValueVTs); + + SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]); + SDValue RetOp = getValue(I.getOperand(0)); + SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, I.getOperand(i)->getType(), ValueVTs); + SmallVector<uint64_t, 4> Offsets; + ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); - if (NumValues == 0) continue; - - SDValue RetOp = getValue(I.getOperand(i)); - for (unsigned j = 0, f = NumValues; j != f; ++j) { - EVT VT = ValueVTs[j]; - ISD::NodeType ExtendKind = ISD::ANY_EXTEND; - - const Function *F = I.getParent()->getParent(); - if (F->paramHasAttr(0, Attribute::SExt)) - ExtendKind = ISD::SIGN_EXTEND; - else if (F->paramHasAttr(0, Attribute::ZExt)) - ExtendKind = ISD::ZERO_EXTEND; + SmallVector<SDValue, 4> Chains(NumValues); + EVT PtrVT = PtrValueVTs[0]; + for (unsigned i = 0; i != NumValues; ++i) + Chains[i] = DAG.getStore(Chain, getCurDebugLoc(), + SDValue(RetOp.getNode(), RetOp.getResNo() + i), + DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, RetPtr, + DAG.getConstant(Offsets[i], PtrVT)), + NULL, Offsets[i], false, 0); + Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + MVT::Other, &Chains[0], NumValues); + } + else { + for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, I.getOperand(i)->getType(), ValueVTs); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) continue; + + SDValue RetOp = getValue(I.getOperand(i)); + for (unsigned j = 0, f = NumValues; j != f; ++j) { + EVT VT = ValueVTs[j]; + + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + + const Function *F = I.getParent()->getParent(); + if (F->paramHasAttr(0, Attribute::SExt)) + ExtendKind = ISD::SIGN_EXTEND; + else if (F->paramHasAttr(0, Attribute::ZExt)) + ExtendKind = ISD::ZERO_EXTEND; + + // FIXME: C calling convention requires the return type to be promoted to + // at least 32-bit. But this is not necessary for non-C calling + // conventions. The frontend should mark functions whose return values + // require promoting with signext or zeroext attributes. + if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { + EVT MinVT = TLI.getRegisterType(*DAG.getContext(), MVT::i32); + if (VT.bitsLT(MinVT)) + VT = MinVT; + } - // FIXME: C calling convention requires the return type to be promoted to - // at least 32-bit. But this is not necessary for non-C calling - // conventions. The frontend should mark functions whose return values - // require promoting with signext or zeroext attributes. - if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { - EVT MinVT = TLI.getRegisterType(*DAG.getContext(), MVT::i32); - if (VT.bitsLT(MinVT)) - VT = MinVT; + unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT); + EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT); + SmallVector<SDValue, 4> Parts(NumParts); + getCopyToParts(DAG, getCurDebugLoc(), + SDValue(RetOp.getNode(), RetOp.getResNo() + j), + &Parts[0], NumParts, PartVT, ExtendKind); + + // 'inreg' on function refers to return value + ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); + if (F->paramHasAttr(0, Attribute::InReg)) + Flags.setInReg(); + + // Propagate extension type if any + if (F->paramHasAttr(0, Attribute::SExt)) + Flags.setSExt(); + else if (F->paramHasAttr(0, Attribute::ZExt)) + Flags.setZExt(); + + for (unsigned i = 0; i < NumParts; ++i) + Outs.push_back(ISD::OutputArg(Flags, Parts[i], /*isfixed=*/true)); } - - unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT); - EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT); - SmallVector<SDValue, 4> Parts(NumParts); - getCopyToParts(DAG, getCurDebugLoc(), - SDValue(RetOp.getNode(), RetOp.getResNo() + j), - &Parts[0], NumParts, PartVT, ExtendKind); - - // 'inreg' on function refers to return value - ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); - if (F->paramHasAttr(0, Attribute::InReg)) - Flags.setInReg(); - - // Propagate extension type if any - if (F->paramHasAttr(0, Attribute::SExt)) - Flags.setSExt(); - else if (F->paramHasAttr(0, Attribute::ZExt)) - Flags.setZExt(); - - for (unsigned i = 0; i < NumParts; ++i) - Outs.push_back(ISD::OutputArg(Flags, Parts[i], /*isfixed=*/true)); } } @@ -1691,19 +1758,19 @@ bool SelectionDAGLowering::handleJTSwitchCase(CaseRec& CR, Case& FrontCase = *CR.Range.first; Case& BackCase = *(CR.Range.second-1); - const APInt& First = cast<ConstantInt>(FrontCase.Low)->getValue(); - const APInt& Last = cast<ConstantInt>(BackCase.High)->getValue(); + const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue(); + const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue(); - size_t TSize = 0; + APInt TSize(First.getBitWidth(), 0); for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) TSize += I->size(); - if (!areJTsAllowed(TLI) || TSize <= 3) + if (!areJTsAllowed(TLI) || TSize.ult(APInt(First.getBitWidth(), 4))) return false; APInt Range = ComputeRange(First, Last); - double Density = (double)TSize / Range.roundToDouble(); + double Density = TSize.roundToDouble() / Range.roundToDouble(); if (Density < 0.4) return false; @@ -1797,32 +1864,34 @@ bool SelectionDAGLowering::handleBTSplitSwitchCase(CaseRec& CR, // Size is the number of Cases represented by this range. unsigned Size = CR.Range.second - CR.Range.first; - const APInt& First = cast<ConstantInt>(FrontCase.Low)->getValue(); - const APInt& Last = cast<ConstantInt>(BackCase.High)->getValue(); + const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue(); + const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue(); double FMetric = 0; CaseItr Pivot = CR.Range.first + Size/2; // Select optimal pivot, maximizing sum density of LHS and RHS. This will // (heuristically) allow us to emit JumpTable's later. - size_t TSize = 0; + APInt TSize(First.getBitWidth(), 0); for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) TSize += I->size(); - size_t LSize = FrontCase.size(); - size_t RSize = TSize-LSize; + APInt LSize = FrontCase.size(); + APInt RSize = TSize-LSize; DEBUG(errs() << "Selecting best pivot: \n" << "First: " << First << ", Last: " << Last <<'\n' << "LSize: " << LSize << ", RSize: " << RSize << '\n'); for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second; J!=E; ++I, ++J) { - const APInt& LEnd = cast<ConstantInt>(I->High)->getValue(); - const APInt& RBegin = cast<ConstantInt>(J->Low)->getValue(); + const APInt &LEnd = cast<ConstantInt>(I->High)->getValue(); + const APInt &RBegin = cast<ConstantInt>(J->Low)->getValue(); APInt Range = ComputeRange(LEnd, RBegin); assert((Range - 2ULL).isNonNegative() && "Invalid case distance"); - double LDensity = (double)LSize / (LEnd - First + 1ULL).roundToDouble(); - double RDensity = (double)RSize / (Last - RBegin + 1ULL).roundToDouble(); + double LDensity = (double)LSize.roundToDouble() / + (LEnd - First + 1ULL).roundToDouble(); + double RDensity = (double)RSize.roundToDouble() / + (Last - RBegin + 1ULL).roundToDouble(); double Metric = Range.logBase2()*(LDensity+RDensity); // Should always split in some non-trivial place DEBUG(errs() <<"=>Step\n" @@ -3842,112 +3911,12 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { I.getOperand(1), 0, I.getOperand(2), 0)); return 0; } - case Intrinsic::dbg_stoppoint: { - DbgStopPointInst &SPI = cast<DbgStopPointInst>(I); - if (isValidDebugInfoIntrinsic(SPI, CodeGenOpt::Default)) { - MachineFunction &MF = DAG.getMachineFunction(); - DebugLoc Loc = ExtractDebugLocation(SPI, MF.getDebugLocInfo()); - setCurDebugLoc(Loc); - - if (OptLevel == CodeGenOpt::None) - DAG.setRoot(DAG.getDbgStopPoint(Loc, getRoot(), - SPI.getLine(), - SPI.getColumn(), - SPI.getContext())); - } + case Intrinsic::dbg_stoppoint: + case Intrinsic::dbg_region_start: + case Intrinsic::dbg_region_end: + case Intrinsic::dbg_func_start: + // FIXME - Remove this instructions once the dust settles. return 0; - } - case Intrinsic::dbg_region_start: { - DwarfWriter *DW = DAG.getDwarfWriter(); - DbgRegionStartInst &RSI = cast<DbgRegionStartInst>(I); - if (isValidDebugInfoIntrinsic(RSI, OptLevel) && DW - && DW->ShouldEmitDwarfDebug()) { - unsigned LabelID = - DW->RecordRegionStart(RSI.getContext()); - DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(), - getRoot(), LabelID)); - } - return 0; - } - case Intrinsic::dbg_region_end: { - DwarfWriter *DW = DAG.getDwarfWriter(); - DbgRegionEndInst &REI = cast<DbgRegionEndInst>(I); - - if (!isValidDebugInfoIntrinsic(REI, OptLevel) || !DW - || !DW->ShouldEmitDwarfDebug()) - return 0; - - MachineFunction &MF = DAG.getMachineFunction(); - DISubprogram Subprogram(REI.getContext()); - - if (isInlinedFnEnd(REI, MF.getFunction())) { - // This is end of inlined function. Debugging information for inlined - // function is not handled yet (only supported by FastISel). - if (OptLevel == CodeGenOpt::None) { - unsigned ID = DW->RecordInlinedFnEnd(Subprogram); - if (ID != 0) - // Returned ID is 0 if this is unbalanced "end of inlined - // scope". This could happen if optimizer eats dbg intrinsics or - // "beginning of inlined scope" is not recoginized due to missing - // location info. In such cases, do ignore this region.end. - DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(), - getRoot(), ID)); - } - return 0; - } - - unsigned LabelID = - DW->RecordRegionEnd(REI.getContext()); - DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(), - getRoot(), LabelID)); - return 0; - } - case Intrinsic::dbg_func_start: { - DwarfWriter *DW = DAG.getDwarfWriter(); - DbgFuncStartInst &FSI = cast<DbgFuncStartInst>(I); - if (!isValidDebugInfoIntrinsic(FSI, CodeGenOpt::None)) - return 0; - - MachineFunction &MF = DAG.getMachineFunction(); - // This is a beginning of an inlined function. - if (isInlinedFnStart(FSI, MF.getFunction())) { - if (OptLevel != CodeGenOpt::None) - // FIXME: Debugging informaation for inlined function is only - // supported at CodeGenOpt::Node. - return 0; - - DebugLoc PrevLoc = CurDebugLoc; - // If llvm.dbg.func.start is seen in a new block before any - // llvm.dbg.stoppoint intrinsic then the location info is unknown. - // FIXME : Why DebugLoc is reset at the beginning of each block ? - if (PrevLoc.isUnknown()) - return 0; - - // Record the source line. - setCurDebugLoc(ExtractDebugLocation(FSI, MF.getDebugLocInfo())); - - if (!DW || !DW->ShouldEmitDwarfDebug()) - return 0; - DebugLocTuple PrevLocTpl = MF.getDebugLocTuple(PrevLoc); - DISubprogram SP(FSI.getSubprogram()); - DICompileUnit CU(PrevLocTpl.Scope); - unsigned LabelID = DW->RecordInlinedFnStart(SP, CU, - PrevLocTpl.Line, - PrevLocTpl.Col); - DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(), - getRoot(), LabelID)); - return 0; - } - - // This is a beginning of a new function. - MF.setDefaultDebugLoc(ExtractDebugLocation(FSI, MF.getDebugLocInfo())); - - if (!DW || !DW->ShouldEmitDwarfDebug()) - return 0; - // llvm.dbg.func_start also defines beginning of function scope. - DW->RecordRegionStart(FSI.getSubprogram()); - return 0; - } case Intrinsic::dbg_declare: { if (OptLevel != CodeGenOpt::None) // FIXME: Variable debug info is not supported here. @@ -3972,13 +3941,15 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { if (SI == FuncInfo.StaticAllocaMap.end()) return 0; // VLAs. int FI = SI->second; -#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN + MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); - if (MMI) - MMI->setVariableDbgInfo(Variable, FI); -#else - DW->RecordVariable(Variable, FI); -#endif + if (MMI) { + MetadataContext &TheMetadata = + DI.getParent()->getContext().getMetadata(); + unsigned MDDbgKind = TheMetadata.getMDKind("dbg"); + MDNode *Dbg = TheMetadata.getMD(MDDbgKind, &DI); + MMI->setVariableDbgInfo(Variable, FI, Dbg); + } return 0; } case Intrinsic::eh_exception: { @@ -4233,7 +4204,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { EVT Ty = Arg.getValueType(); if (CI->getZExtValue() < 2) - setValue(&I, DAG.getConstant(-1U, Ty)); + setValue(&I, DAG.getConstant(-1ULL, Ty)); else setValue(&I, DAG.getConstant(0, Ty)); return 0; @@ -4355,6 +4326,16 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMAX); case Intrinsic::atomic_swap: return implVisitBinaryAtomic(I, ISD::ATOMIC_SWAP); + + case Intrinsic::invariant_start: + case Intrinsic::lifetime_start: + // Discard region information. + setValue(&I, DAG.getUNDEF(TLI.getPointerTy())); + return 0; + case Intrinsic::invariant_end: + case Intrinsic::lifetime_end: + // Discard region information. + return 0; } } @@ -4368,7 +4349,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { /// TargetLowering::IsEligibleForTailCallOptimization. /// static bool -isInTailCallPosition(const Instruction *I, Attributes RetAttr, +isInTailCallPosition(const Instruction *I, Attributes CalleeRetAttr, const TargetLowering &TLI) { const BasicBlock *ExitBB = I->getParent(); const TerminatorInst *Term = ExitBB->getTerminator(); @@ -4395,9 +4376,14 @@ isInTailCallPosition(const Instruction *I, Attributes RetAttr, // what the call's return type is. if (!Ret || Ret->getNumOperands() == 0) return true; + // If the return value is undef, it doesn't matter what the call's + // return type is. + if (isa<UndefValue>(Ret->getOperand(0))) return true; + // Conservatively require the attributes of the call to match those of - // the return. - if (F->getAttributes().getRetAttributes() != RetAttr) + // the return. Ignore noalias because it doesn't affect the call sequence. + unsigned CallerRetAttr = F->getAttributes().getRetAttributes(); + if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias) return false; // Otherwise, make sure the unmodified return value of I is the return value. @@ -4431,15 +4417,52 @@ void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee, MachineBasicBlock *LandingPad) { const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); const FunctionType *FTy = cast<FunctionType>(PT->getElementType()); + const Type *RetTy = FTy->getReturnType(); MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); unsigned BeginLabel = 0, EndLabel = 0; TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Args.reserve(CS.arg_size()); - unsigned j = 1; + + // Check whether the function can return without sret-demotion. + SmallVector<EVT, 4> OutVTs; + SmallVector<ISD::ArgFlagsTy, 4> OutsFlags; + SmallVector<uint64_t, 4> Offsets; + getReturnInfo(RetTy, CS.getAttributes().getRetAttributes(), + OutVTs, OutsFlags, TLI, &Offsets); + + + bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(), + FTy->isVarArg(), OutVTs, OutsFlags, DAG); + + SDValue DemoteStackSlot; + + if (!CanLowerReturn) { + uint64_t TySize = TLI.getTargetData()->getTypeAllocSize( + FTy->getReturnType()); + unsigned Align = TLI.getTargetData()->getPrefTypeAlignment( + FTy->getReturnType()); + MachineFunction &MF = DAG.getMachineFunction(); + int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); + const Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType()); + + DemoteStackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); + Entry.Node = DemoteStackSlot; + Entry.Ty = StackSlotPtrType; + Entry.isSExt = false; + Entry.isZExt = false; + Entry.isInReg = false; + Entry.isSRet = true; + Entry.isNest = false; + Entry.isByVal = false; + Entry.Alignment = Align; + Args.push_back(Entry); + RetTy = Type::getVoidTy(FTy->getContext()); + } + for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); - i != e; ++i, ++j) { + i != e; ++i) { SDValue ArgNode = getValue(*i); Entry.Node = ArgNode; Entry.Ty = (*i)->getType(); @@ -4475,7 +4498,7 @@ void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee, isTailCall = false; std::pair<SDValue,SDValue> Result = - TLI.LowerCallTo(getRoot(), CS.getType(), + TLI.LowerCallTo(getRoot(), RetTy, CS.paramHasAttr(0, Attribute::SExt), CS.paramHasAttr(0, Attribute::ZExt), FTy->isVarArg(), CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(), @@ -4489,6 +4512,35 @@ void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee, "Null value expected with tail call!"); if (Result.first.getNode()) setValue(CS.getInstruction(), Result.first); + else if (!CanLowerReturn && Result.second.getNode()) { + // The instruction result is the result of loading from the + // hidden sret parameter. + SmallVector<EVT, 1> PVTs; + const Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType()); + + ComputeValueVTs(TLI, PtrRetTy, PVTs); + assert(PVTs.size() == 1 && "Pointers should fit in one register"); + EVT PtrVT = PVTs[0]; + unsigned NumValues = OutVTs.size(); + SmallVector<SDValue, 4> Values(NumValues); + SmallVector<SDValue, 4> Chains(NumValues); + + for (unsigned i = 0; i < NumValues; ++i) { + SDValue L = DAG.getLoad(OutVTs[i], getCurDebugLoc(), Result.second, + DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, DemoteStackSlot, + DAG.getConstant(Offsets[i], PtrVT)), + NULL, Offsets[i], false, 1); + Values[i] = L; + Chains[i] = L.getValue(1); + } + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + MVT::Other, &Chains[0], NumValues); + PendingLoads.push_back(Chain); + + setValue(CS.getInstruction(), DAG.getNode(ISD::MERGE_VALUES, + getCurDebugLoc(), DAG.getVTList(&OutVTs[0], NumValues), + &Values[0], NumValues)); + } // As a special case, a null chain means that a tail call has // been emitted and the DAG root is already updated. if (Result.second.getNode()) @@ -5229,7 +5281,7 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) { uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty); unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(Ty); MachineFunction &MF = DAG.getMachineFunction(); - int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align); + int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); Chain = DAG.getStore(Chain, getCurDebugLoc(), OpInfo.CallOperand, StackSlot, NULL, 0); @@ -5757,9 +5809,32 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) { SDValue OldRoot = DAG.getRoot(); DebugLoc dl = SDL->getCurDebugLoc(); const TargetData *TD = TLI.getTargetData(); + SmallVector<ISD::InputArg, 16> Ins; + + // Check whether the function can return without sret-demotion. + SmallVector<EVT, 4> OutVTs; + SmallVector<ISD::ArgFlagsTy, 4> OutsFlags; + getReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(), + OutVTs, OutsFlags, TLI); + FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo(); + + FLI.CanLowerReturn = TLI.CanLowerReturn(F.getCallingConv(), F.isVarArg(), + OutVTs, OutsFlags, DAG); + if (!FLI.CanLowerReturn) { + // Put in an sret pointer parameter before all the other parameters. + SmallVector<EVT, 1> ValueVTs; + ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); + + // NOTE: Assuming that a pointer will never break down to more than one VT + // or one register. + ISD::ArgFlagsTy Flags; + Flags.setSRet(); + EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), ValueVTs[0]); + ISD::InputArg RetArg(Flags, RegisterVT, true); + Ins.push_back(RetArg); + } // Set up the incoming argument description vector. - SmallVector<ISD::InputArg, 16> Ins; unsigned Idx = 1; for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I, ++Idx) { @@ -5837,6 +5912,28 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) { // Set up the argument values. unsigned i = 0; Idx = 1; + if (!FLI.CanLowerReturn) { + // Create a virtual register for the sret pointer, and put in a copy + // from the sret argument into it. + SmallVector<EVT, 1> ValueVTs; + ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); + EVT VT = ValueVTs[0]; + EVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT); + ISD::NodeType AssertOp = ISD::DELETED_NODE; + SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, + VT, AssertOp); + + MachineFunction& MF = SDL->DAG.getMachineFunction(); + MachineRegisterInfo& RegInfo = MF.getRegInfo(); + unsigned SRetReg = RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)); + FLI.DemoteRegister = SRetReg; + NewRoot = SDL->DAG.getCopyToReg(NewRoot, SDL->getCurDebugLoc(), SRetReg, ArgValue); + DAG.setRoot(NewRoot); + + // i indexes lowered arguments. Bump it past the hidden sret argument. + // Idx indexes LLVM arguments. Don't touch it. + ++i; + } for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I, ++Idx) { SmallVector<SDValue, 4> ArgValues; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h index a0ec7aa..10f256c 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h @@ -90,6 +90,14 @@ public: MachineFunction *MF; MachineRegisterInfo *RegInfo; + /// CanLowerReturn - true iff the function's return value can be lowered to + /// registers. + bool CanLowerReturn; + + /// DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg + /// allocated to hold a pointer to the hidden sret parameter. + unsigned DemoteRegister; + explicit FunctionLoweringInfo(TargetLowering &TLI); /// set - Initialize this FunctionLoweringInfo with the given Function @@ -193,9 +201,9 @@ class SelectionDAGLowering { Case() : Low(0), High(0), BB(0) { } Case(Constant* low, Constant* high, MachineBasicBlock* bb) : Low(low), High(high), BB(bb) { } - uint64_t size() const { - uint64_t rHigh = cast<ConstantInt>(High)->getSExtValue(); - uint64_t rLow = cast<ConstantInt>(Low)->getSExtValue(); + APInt size() const { + const APInt &rHigh = cast<ConstantInt>(High)->getValue(); + const APInt &rLow = cast<ConstantInt>(Low)->getValue(); return (rHigh - rLow + 1ULL); } }; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index b63d5bb..ab5f21e 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -68,7 +68,7 @@ static cl::opt<bool> EnableFastISelAbort("fast-isel-abort", cl::Hidden, cl::desc("Enable abort calls when \"fast\" instruction fails")); static cl::opt<bool> -SchedLiveInCopies("schedule-livein-copies", +SchedLiveInCopies("schedule-livein-copies", cl::Hidden, cl::desc("Schedule copies of livein registers"), cl::init(false)); @@ -387,13 +387,14 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB, if (MDDbgKind) { // Update DebugLoc if debug information is attached with this // instruction. - if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, I)) { - DILocation DILoc(Dbg); - DebugLoc Loc = ExtractDebugLocation(DILoc, MF->getDebugLocInfo()); - SDL->setCurDebugLoc(Loc); - if (MF->getDefaultDebugLoc().isUnknown()) - MF->setDefaultDebugLoc(Loc); - } + if (!isa<DbgInfoIntrinsic>(I)) + if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, I)) { + DILocation DILoc(Dbg); + DebugLoc Loc = ExtractDebugLocation(DILoc, MF->getDebugLocInfo()); + SDL->setCurDebugLoc(Loc); + if (MF->getDefaultDebugLoc().isUnknown()) + MF->setDefaultDebugLoc(Loc); + } } if (!isa<TerminatorInst>(I)) SDL->visit(*I); @@ -750,14 +751,15 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, if (MDDbgKind) { // Update DebugLoc if debug information is attached with this // instruction. - if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, BI)) { - DILocation DILoc(Dbg); - DebugLoc Loc = ExtractDebugLocation(DILoc, - MF.getDebugLocInfo()); - FastIS->setCurDebugLoc(Loc); - if (MF.getDefaultDebugLoc().isUnknown()) - MF.setDefaultDebugLoc(Loc); - } + if (!isa<DbgInfoIntrinsic>(BI)) + if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, BI)) { + DILocation DILoc(Dbg); + DebugLoc Loc = ExtractDebugLocation(DILoc, + MF.getDebugLocInfo()); + FastIS->setCurDebugLoc(Loc); + if (MF.getDefaultDebugLoc().isUnknown()) + MF.setDefaultDebugLoc(Loc); + } } // Just before the terminator instruction, insert instructions to diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 9f36b67..2ca52a4 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -22,7 +22,6 @@ #include "llvm/DerivedTypes.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -65,22 +64,27 @@ static void InitLibcallNames(const char **Names) { Names[RTLIB::SRA_I32] = "__ashrsi3"; Names[RTLIB::SRA_I64] = "__ashrdi3"; Names[RTLIB::SRA_I128] = "__ashrti3"; + Names[RTLIB::MUL_I8] = "__mulqi3"; Names[RTLIB::MUL_I16] = "__mulhi3"; Names[RTLIB::MUL_I32] = "__mulsi3"; Names[RTLIB::MUL_I64] = "__muldi3"; Names[RTLIB::MUL_I128] = "__multi3"; + Names[RTLIB::SDIV_I8] = "__divqi3"; Names[RTLIB::SDIV_I16] = "__divhi3"; Names[RTLIB::SDIV_I32] = "__divsi3"; Names[RTLIB::SDIV_I64] = "__divdi3"; Names[RTLIB::SDIV_I128] = "__divti3"; + Names[RTLIB::UDIV_I8] = "__udivqi3"; Names[RTLIB::UDIV_I16] = "__udivhi3"; Names[RTLIB::UDIV_I32] = "__udivsi3"; Names[RTLIB::UDIV_I64] = "__udivdi3"; Names[RTLIB::UDIV_I128] = "__udivti3"; + Names[RTLIB::SREM_I8] = "__modqi3"; Names[RTLIB::SREM_I16] = "__modhi3"; Names[RTLIB::SREM_I32] = "__modsi3"; Names[RTLIB::SREM_I64] = "__moddi3"; Names[RTLIB::SREM_I128] = "__modti3"; + Names[RTLIB::UREM_I8] = "__umodqi3"; Names[RTLIB::UREM_I16] = "__umodhi3"; Names[RTLIB::UREM_I32] = "__umodsi3"; Names[RTLIB::UREM_I64] = "__umoddi3"; @@ -2360,7 +2364,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint, assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?"); // Remove the braces from around the name. - std::string RegName(Constraint.begin()+1, Constraint.end()-1); + StringRef RegName(Constraint.data()+1, Constraint.size()-2); // Figure out which register class contains this reg. const TargetRegisterInfo *RI = TM.getRegisterInfo(); @@ -2383,7 +2387,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint, for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E; ++I) { - if (StringsEqualNoCase(RegName, RI->getName(*I))) + if (RegName.equals_lower(RI->getName(*I))) return std::make_pair(*I, RC); } } diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp index b5d6b47..3909c56 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -709,7 +709,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, } MachineBasicBlock::iterator MII = next(MachineBasicBlock::iterator(CopyMI)); - tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI); + tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, tri_); MachineInstr *NewMI = prior(MII); if (checkForDeadDef) { diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index f3ad0d1..f85384b 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -13,15 +13,43 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/ManagedStatic.h" using namespace llvm; -std::auto_ptr<IndexListEntry> IndexListEntry::emptyKeyEntry, - IndexListEntry::tombstoneKeyEntry; + +// Yep - these are thread safe. See the header for details. +namespace { + + + class EmptyIndexListEntry : public IndexListEntry { + public: + EmptyIndexListEntry() : IndexListEntry(EMPTY_KEY) {} + }; + + class TombstoneIndexListEntry : public IndexListEntry { + public: + TombstoneIndexListEntry() : IndexListEntry(TOMBSTONE_KEY) {} + }; + + // The following statics are thread safe. They're read only, and you + // can't step from them to any other list entries. + ManagedStatic<EmptyIndexListEntry> IndexListEntryEmptyKey; + ManagedStatic<TombstoneIndexListEntry> IndexListEntryTombstoneKey; +} char SlotIndexes::ID = 0; static RegisterPass<SlotIndexes> X("slotindexes", "Slot index numbering"); +IndexListEntry* IndexListEntry::getEmptyKeyEntry() { + return &*IndexListEntryEmptyKey; +} + +IndexListEntry* IndexListEntry::getTombstoneKeyEntry() { + return &*IndexListEntryTombstoneKey; +} + + void SlotIndexes::getAnalysisUsage(AnalysisUsage &au) const { au.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(au); @@ -51,8 +79,6 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { mf = &fn; initList(); - const unsigned gap = 1; - // Check that the list contains only the sentinal. assert(indexListHead->getNext() == 0 && "Index list non-empty at initial numbering?"); @@ -64,14 +90,6 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { "MachineInstr -> Index mapping non-empty at initial numbering?"); functionSize = 0; - /* - for (unsigned s = 0; s < SlotIndex::NUM; ++s) { - indexList.push_back(createEntry(0, s)); - } - - unsigned index = gap * SlotIndex::NUM; - */ - unsigned index = 0; // Iterate over the the function. @@ -83,7 +101,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { push_back(createEntry(0, index)); SlotIndex blockStartIndex(back(), SlotIndex::LOAD); - index += gap * SlotIndex::NUM; + index += SlotIndex::NUM; for (MachineBasicBlock::iterator miItr = mbb->begin(), miEnd = mbb->end(); miItr != miEnd; ++miItr) { @@ -93,7 +111,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { push_back(createEntry(0, index)); terminatorGaps.insert( std::make_pair(mbb, SlotIndex(back(), SlotIndex::PHI_BIT))); - index += gap * SlotIndex::NUM; + index += SlotIndex::NUM; } // Insert a store index for the instr. @@ -109,14 +127,14 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { if (Slots == 0) Slots = 1; - index += (Slots + 1) * gap * SlotIndex::NUM; + index += (Slots + 1) * SlotIndex::NUM; } if (mbb->getFirstTerminator() == mbb->end()) { push_back(createEntry(0, index)); terminatorGaps.insert( std::make_pair(mbb, SlotIndex(back(), SlotIndex::PHI_BIT))); - index += gap * SlotIndex::NUM; + index += SlotIndex::NUM; } SlotIndex blockEndIndex(back(), SlotIndex::STORE); @@ -138,21 +156,36 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { return false; } -void SlotIndexes::renumber() { - assert(false && "SlotIndexes::runmuber is not fully implemented yet."); +void SlotIndexes::renumberIndexes() { - // Compute numbering as follows: - // Grab an iterator to the start of the index list. - // Iterate over all MBBs, and within each MBB all MIs, keeping the MI - // iterator in lock-step (though skipping it over indexes which have - // null pointers in the instruction field). - // At each iteration assert that the instruction pointed to in the index - // is the same one pointed to by the MI iterator. This + // Renumber updates the index of every element of the index list. + // If all instrs in the function have been allocated an index (which has been + // placed in the index list in the order of instruction iteration) then the + // resulting numbering will match what would have been generated by the + // pass during the initial numbering of the function if the new instructions + // had been present. - // FIXME: This can be simplified. The mi2iMap_, Idx2MBBMap, etc. should - // only need to be set up once - when the first numbering is computed. + functionSize = 0; + unsigned index = 0; + + for (IndexListEntry *curEntry = front(); curEntry != getTail(); + curEntry = curEntry->getNext()) { - assert(false && "Renumbering not supported yet."); + curEntry->setIndex(index); + + if (curEntry->getInstr() == 0) { + // MBB start entry or terminator gap. Just step index by 1. + index += SlotIndex::NUM; + } + else { + ++functionSize; + unsigned Slots = curEntry->getInstr()->getDesc().getNumDefs(); + if (Slots == 0) + Slots = 1; + + index += (Slots + 1) * SlotIndex::NUM; + } + } } void SlotIndexes::dump() const { @@ -167,7 +200,7 @@ void SlotIndexes::dump() const { } } - for (MBB2IdxMap::iterator itr = mbb2IdxMap.begin(); + for (MBB2IdxMap::const_iterator itr = mbb2IdxMap.begin(); itr != mbb2IdxMap.end(); ++itr) { errs() << "MBB " << itr->first->getNumber() << " (" << itr->first << ") - [" << itr->second.first << ", " << itr->second.second << "]\n"; diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp index 95e85be..9107325 100644 --- a/lib/CodeGen/Spiller.cpp +++ b/lib/CodeGen/Spiller.cpp @@ -52,16 +52,16 @@ protected: /// Ensures there is space before the given machine instruction, returns the /// instruction's new number. SlotIndex makeSpaceBefore(MachineInstr *mi) { - if (!lis->hasGapBeforeInstr(lis->getInstructionIndex(mi))) { + //if (!lis->hasGapBeforeInstr(lis->getInstructionIndex(mi))) { // FIXME: Should be updated to use rewrite-in-place methods when they're // introduced. Currently broken. //lis->scaleNumbering(2); //ls->scaleNumbering(2); - } + //} SlotIndex miIdx = lis->getInstructionIndex(mi); - assert(lis->hasGapBeforeInstr(miIdx)); + //assert(lis->hasGapBeforeInstr(miIdx)); return miIdx; } @@ -69,16 +69,16 @@ protected: /// Ensure there is space after the given machine instruction, returns the /// instruction's new number. SlotIndex makeSpaceAfter(MachineInstr *mi) { - if (!lis->hasGapAfterInstr(lis->getInstructionIndex(mi))) { + //if (!lis->hasGapAfterInstr(lis->getInstructionIndex(mi))) { // FIXME: Should be updated to use rewrite-in-place methods when they're // introduced. Currently broken. // lis->scaleNumbering(2); // ls->scaleNumbering(2); - } + //} SlotIndex miIdx = lis->getInstructionIndex(mi); - assert(lis->hasGapAfterInstr(miIdx)); + //assert(lis->hasGapAfterInstr(miIdx)); return miIdx; } @@ -99,14 +99,8 @@ protected: true, ss, trc); MachineBasicBlock::iterator storeInstItr(next(mi)); MachineInstr *storeInst = &*storeInstItr; - SlotIndex storeInstIdx = miIdx.getNextIndex(); - - assert(lis->getInstructionFromIndex(storeInstIdx) == 0 && - "Store inst index already in use."); - lis->InsertMachineInstrInMaps(storeInst, storeInstIdx); - - return storeInstIdx; + return lis->InsertMachineInstrInMaps(storeInst); } /// Insert a store of the given vreg to the given stack slot immediately @@ -120,14 +114,8 @@ protected: tii->storeRegToStackSlot(*mi->getParent(), mi, vreg, true, ss, trc); MachineBasicBlock::iterator storeInstItr(prior(mi)); MachineInstr *storeInst = &*storeInstItr; - SlotIndex storeInstIdx = miIdx.getPrevIndex(); - - assert(lis->getInstructionFromIndex(storeInstIdx) == 0 && - "Store inst index already in use."); - lis->InsertMachineInstrInMaps(storeInst, storeInstIdx); - - return storeInstIdx; + return lis->InsertMachineInstrInMaps(storeInst); } void insertStoreAfterInstOnInterval(LiveInterval *li, @@ -164,14 +152,8 @@ protected: tii->loadRegFromStackSlot(*mi->getParent(), nextInstItr, vreg, ss, trc); MachineBasicBlock::iterator loadInstItr(next(mi)); MachineInstr *loadInst = &*loadInstItr; - SlotIndex loadInstIdx = miIdx.getNextIndex(); - - assert(lis->getInstructionFromIndex(loadInstIdx) == 0 && - "Store inst index already in use."); - lis->InsertMachineInstrInMaps(loadInst, loadInstIdx); - - return loadInstIdx; + return lis->InsertMachineInstrInMaps(loadInst); } /// Insert a load of the given vreg from the given stack slot immediately @@ -186,14 +168,8 @@ protected: tii->loadRegFromStackSlot(*mi->getParent(), mi, vreg, ss, trc); MachineBasicBlock::iterator loadInstItr(prior(mi)); MachineInstr *loadInst = &*loadInstItr; - SlotIndex loadInstIdx = miIdx.getPrevIndex(); - - assert(lis->getInstructionFromIndex(loadInstIdx) == 0 && - "Load inst index already in use."); - - lis->InsertMachineInstrInMaps(loadInst, loadInstIdx); - return loadInstIdx; + return lis->InsertMachineInstrInMaps(loadInst); } void insertLoadBeforeInstOnInterval(LiveInterval *li, diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index c646869..102e2a3 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -135,14 +135,52 @@ void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SubIdx, - const MachineInstr *Orig) const { + const MachineInstr *Orig, + const TargetRegisterInfo *TRI) const { MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); MachineOperand &MO = MI->getOperand(0); - MO.setReg(DestReg); - MO.setSubReg(SubIdx); + if (TargetRegisterInfo::isVirtualRegister(DestReg)) { + MO.setReg(DestReg); + MO.setSubReg(SubIdx); + } else if (SubIdx) { + MO.setReg(TRI->getSubReg(DestReg, SubIdx)); + } else { + MO.setReg(DestReg); + } MBB.insert(I, MI); } +bool +TargetInstrInfoImpl::isIdentical(const MachineInstr *MI, + const MachineInstr *Other, + const MachineRegisterInfo *MRI) const { + if (MI->getOpcode() != Other->getOpcode() || + MI->getNumOperands() != Other->getNumOperands()) + return false; + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + const MachineOperand &OMO = Other->getOperand(i); + if (MO.isReg() && MO.isDef()) { + assert(OMO.isReg() && OMO.isDef()); + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Reg != OMO.getReg()) + return false; + } else if (MRI->getRegClass(MO.getReg()) != + MRI->getRegClass(OMO.getReg())) + return false; + + continue; + } + + if (!MO.isIdenticalTo(OMO)) + return false; + } + + return true; +} + unsigned TargetInstrInfoImpl::GetFunctionSizeInBytes(const MachineFunction &MF) const { unsigned FnSize = 0; diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 0a6a0d7..84467ed 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1033,7 +1033,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){ DEBUG(errs() << "2addr: REMATTING : " << *DefMI << "\n"); unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg(); - TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI); + TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, TRI); ReMatRegs.set(regB); ++NumReMats; } else { diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index ce3eed1..c8c5d86 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -117,8 +117,8 @@ int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) { assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT && "attempt to assign stack slot to already spilled register"); const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(virtReg); - int SS = MF->getFrameInfo()->CreateStackObject(RC->getSize(), - RC->getAlignment(), /*isSS*/true); + int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(), + RC->getAlignment()); if (LowSpillSlot == NO_STACK_SLOT) LowSpillSlot = SS; if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot) @@ -161,8 +161,8 @@ int VirtRegMap::getEmergencySpillSlot(const TargetRegisterClass *RC) { EmergencySpillSlots.find(RC); if (I != EmergencySpillSlots.end()) return I->second; - int SS = MF->getFrameInfo()->CreateStackObject(RC->getSize(), - RC->getAlignment(), /*isSS*/true); + int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(), + RC->getAlignment()); if (LowSpillSlot == NO_STACK_SLOT) LowSpillSlot = SS; if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot) diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp index fd80f46..ec0abd1 100644 --- a/lib/CodeGen/VirtRegRewriter.cpp +++ b/lib/CodeGen/VirtRegRewriter.cpp @@ -483,19 +483,20 @@ static void InvalidateKills(MachineInstr &MI, } /// InvalidateRegDef - If the def operand of the specified def MI is now dead -/// (since it's spill instruction is removed), mark it isDead. Also checks if +/// (since its spill instruction is removed), mark it isDead. Also checks if /// the def MI has other definition operands that are not dead. Returns it by /// reference. static bool InvalidateRegDef(MachineBasicBlock::iterator I, MachineInstr &NewDef, unsigned Reg, - bool &HasLiveDef) { + bool &HasLiveDef, + const TargetRegisterInfo *TRI) { // Due to remat, it's possible this reg isn't being reused. That is, // the def of this reg (by prev MI) is now dead. MachineInstr *DefMI = I; MachineOperand *DefOp = NULL; for (unsigned i = 0, e = DefMI->getNumOperands(); i != e; ++i) { MachineOperand &MO = DefMI->getOperand(i); - if (!MO.isReg() || !MO.isUse() || !MO.isKill() || MO.isUndef()) + if (!MO.isReg() || !MO.isDef() || !MO.isKill() || MO.isUndef()) continue; if (MO.getReg() == Reg) DefOp = &MO; @@ -512,7 +513,8 @@ static bool InvalidateRegDef(MachineBasicBlock::iterator I, MachineInstr *NMI = I; for (unsigned j = 0, ee = NMI->getNumOperands(); j != ee; ++j) { MachineOperand &MO = NMI->getOperand(j); - if (!MO.isReg() || MO.getReg() != Reg) + if (!MO.isReg() || MO.getReg() == 0 || + (MO.getReg() != Reg && !TRI->isSubRegister(Reg, MO.getReg()))) continue; if (MO.isUse()) FoundUse = true; @@ -556,11 +558,30 @@ static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI, KillOps[*SR] = NULL; RegKills.reset(*SR); } - - if (!MI.isRegTiedToDefOperand(i)) - // Unless it's a two-address operand, this is the new kill. - MO.setIsKill(); + } else { + // Check for subreg kills as well. + // d4 = + // store d4, fi#0 + // ... + // = s8<kill> + // ... + // = d4 <avoiding reload> + for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) { + unsigned SReg = *SR; + if (RegKills[SReg] && KillOps[SReg]->getParent() != &MI) { + KillOps[SReg]->setIsKill(false); + unsigned KReg = KillOps[SReg]->getReg(); + KillOps[KReg] = NULL; + RegKills.reset(KReg); + + for (const unsigned *SSR = TRI->getSubRegisters(KReg); *SSR; ++SSR) { + KillOps[*SSR] = NULL; + RegKills.reset(*SSR); + } + } + } } + if (MO.isKill()) { RegKills.set(Reg); KillOps[Reg] = &MO; @@ -573,7 +594,7 @@ static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI, for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || !MO.isDef()) + if (!MO.isReg() || !MO.getReg() || !MO.isDef()) continue; unsigned Reg = MO.getReg(); RegKills.reset(Reg); @@ -583,6 +604,10 @@ static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI, RegKills.reset(*SR); KillOps[*SR] = NULL; } + for (const unsigned *SR = TRI->getSuperRegisters(Reg); *SR; ++SR) { + RegKills.reset(*SR); + KillOps[*SR] = NULL; + } } } @@ -601,7 +626,7 @@ static void ReMaterialize(MachineBasicBlock &MBB, "Don't know how to remat instructions that define > 1 values!"); #endif TII->reMaterialize(MBB, MII, DestReg, - ReMatDefMI->getOperand(0).getSubReg(), ReMatDefMI); + ReMatDefMI->getOperand(0).getSubReg(), ReMatDefMI, TRI); MachineInstr *NewMI = prior(MII); for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { MachineOperand &MO = NewMI->getOperand(i); @@ -816,11 +841,8 @@ unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC, "A reuse cannot be a virtual register"); if (PRRU != RealPhysRegUsed) { // What was the sub-register index? - unsigned SubReg; - for (SubIdx = 1; (SubReg = TRI->getSubReg(PRRU, SubIdx)); SubIdx++) - if (SubReg == RealPhysRegUsed) - break; - assert(SubReg == RealPhysRegUsed && + SubIdx = TRI->getSubRegIndex(PRRU, RealPhysRegUsed); + assert(SubIdx && "Operand physreg is not a sub-register of PhysRegUsed"); } @@ -1454,7 +1476,7 @@ private: // being reused. for (unsigned j = 0, ee = KillRegs.size(); j != ee; ++j) { bool HasOtherDef = false; - if (InvalidateRegDef(PrevMII, *MII, KillRegs[j], HasOtherDef)) { + if (InvalidateRegDef(PrevMII, *MII, KillRegs[j], HasOtherDef, TRI)) { MachineInstr *DeadDef = PrevMII; if (ReMatDefs.count(DeadDef) && !HasOtherDef) { // FIXME: This assumes a remat def does not have side effects. @@ -1704,6 +1726,7 @@ private: // Mark is killed. MachineInstr *CopyMI = prior(InsertLoc); + CopyMI->setAsmPrinterFlag(AsmPrinter::ReloadReuse); MachineOperand *KillOpnd = CopyMI->findRegisterUseOperand(InReg); KillOpnd->setIsKill(); UpdateKills(*CopyMI, TRI, RegKills, KillOps); @@ -1984,6 +2007,7 @@ private: TII->copyRegToReg(MBB, InsertLoc, DesignatedReg, PhysReg, RC, RC); MachineInstr *CopyMI = prior(InsertLoc); + CopyMI->setAsmPrinterFlag(AsmPrinter::ReloadReuse); UpdateKills(*CopyMI, TRI, RegKills, KillOps); // This invalidates DesignatedReg. @@ -2112,6 +2136,7 @@ private: // virtual or needing to clobber any values if it's physical). NextMII = &MI; --NextMII; // backtrack to the copy. + NextMII->setAsmPrinterFlag(AsmPrinter::ReloadReuse); // Propagate the sub-register index over. if (SubIdx) { DefMO = NextMII->findRegisterDefOperand(DestReg); diff --git a/lib/CompilerDriver/Action.cpp b/lib/CompilerDriver/Action.cpp index 5fd63ee..7bcd30a 100644 --- a/lib/CompilerDriver/Action.cpp +++ b/lib/CompilerDriver/Action.cpp @@ -13,9 +13,13 @@ #include "llvm/CompilerDriver/Action.h" #include "llvm/CompilerDriver/BuiltinOptions.h" + #include "llvm/Support/raw_ostream.h" #include "llvm/System/Program.h" +#include "llvm/System/TimeValue.h" + #include <stdexcept> +#include <string> using namespace llvm; using namespace llvmc; @@ -60,14 +64,31 @@ namespace { } } +namespace llvmc { + void AppendToGlobalTimeLog(const std::string& cmd, double time); +} + int llvmc::Action::Execute() const { if (DryRun || VerboseMode) { errs() << Command_ << " "; std::for_each(Args_.begin(), Args_.end(), print_string); errs() << '\n'; } - if (DryRun) - return 0; - else - return ExecuteProgram(Command_, Args_); + if (!DryRun) { + if (Time) { + sys::TimeValue now = sys::TimeValue::now(); + int ret = ExecuteProgram(Command_, Args_); + sys::TimeValue now2 = sys::TimeValue::now(); + now2 -= now; + double elapsed = now2.seconds() + now2.microseconds() / 1000000.0; + AppendToGlobalTimeLog(Command_, elapsed); + + return ret; + } + else { + return ExecuteProgram(Command_, Args_); + } + } + + return 0; } diff --git a/lib/CompilerDriver/BuiltinOptions.cpp b/lib/CompilerDriver/BuiltinOptions.cpp index d90c50d..d1ac8c9 100644 --- a/lib/CompilerDriver/BuiltinOptions.cpp +++ b/lib/CompilerDriver/BuiltinOptions.cpp @@ -30,8 +30,10 @@ cl::opt<std::string> TempDirname("temp-dir", cl::desc("Temp dir name"), cl::list<std::string> Languages("x", cl::desc("Specify the language of the following input files"), cl::ZeroOrMore); + cl::opt<bool> DryRun("dry-run", cl::desc("Only pretend to run commands")); +cl::opt<bool> Time("time", cl::desc("Time individual commands")); cl::opt<bool> VerboseMode("v", cl::desc("Enable verbose mode")); diff --git a/lib/CompilerDriver/Main.cpp b/lib/CompilerDriver/Main.cpp index c581809..3a3487a 100644 --- a/lib/CompilerDriver/Main.cpp +++ b/lib/CompilerDriver/Main.cpp @@ -19,6 +19,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/System/Path.h" +#include <sstream> #include <stdexcept> #include <string> @@ -28,6 +29,8 @@ using namespace llvmc; namespace { + std::stringstream* GlobalTimeLog; + sys::Path getTempDir() { sys::Path tempDir; @@ -81,6 +84,11 @@ namespace { namespace llvmc { +// Used to implement -time option. External linkage is intentional. +void AppendToGlobalTimeLog(const std::string& cmd, double time) { + *GlobalTimeLog << "# " << cmd << ' ' << time << '\n'; +} + // Sometimes plugins want to condition on the value in argv[0]. const char* ProgramName; @@ -122,7 +130,19 @@ int Main(int argc, char** argv) { throw std::runtime_error("no input files"); } - return BuildTargets(graph, langMap); + if (Time) { + GlobalTimeLog = new std::stringstream; + GlobalTimeLog->precision(2); + } + + int ret = BuildTargets(graph, langMap); + + if (Time) { + llvm::errs() << GlobalTimeLog->str(); + delete GlobalTimeLog; + } + + return ret; } catch(llvmc::error_code& ec) { return ec.code(); diff --git a/lib/CompilerDriver/Tool.cpp b/lib/CompilerDriver/Tool.cpp index 5a32fd3..9f4ab49 100644 --- a/lib/CompilerDriver/Tool.cpp +++ b/lib/CompilerDriver/Tool.cpp @@ -20,11 +20,6 @@ using namespace llvm; using namespace llvmc; -// SplitString is used by derived Tool classes. -typedef void (*SplitStringFunPtr)(const std::string&, - std::vector<std::string>&, const char*); -SplitStringFunPtr ForceLinkageSplitString = &llvm::SplitString; - namespace { sys::Path MakeTempFile(const sys::Path& TempDir, const std::string& BaseName, const std::string& Suffix) { diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index 21499e5..cb30748 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -40,7 +40,8 @@ ExecutionEngine *(*ExecutionEngine::JITCtor)(ModuleProvider *MP, std::string *ErrorStr, JITMemoryManager *JMM, CodeGenOpt::Level OptLevel, - bool GVsWithCode) = 0; + bool GVsWithCode, + CodeModel::Model CMM) = 0; ExecutionEngine *(*ExecutionEngine::InterpCtor)(ModuleProvider *MP, std::string *ErrorStr) = 0; ExecutionEngine::EERegisterFn ExecutionEngine::ExceptionTableRegister = 0; @@ -52,7 +53,6 @@ ExecutionEngine::ExecutionEngine(ModuleProvider *P) CompilingLazily = false; GVCompilationDisabled = false; SymbolSearchingDisabled = false; - DlsymStubsEnabled = false; Modules.push_back(P); assert(P && "ModuleProvider is null?"); } @@ -445,7 +445,7 @@ ExecutionEngine *EngineBuilder::create() { if (ExecutionEngine::JITCtor) { ExecutionEngine *EE = ExecutionEngine::JITCtor(MP, ErrorStr, JMM, OptLevel, - AllocateGVsWithCode); + AllocateGVsWithCode, CMModel); if (EE) return EE; } } diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp index 01bd2c7..b59cfd1 100644 --- a/lib/ExecutionEngine/Interpreter/Execution.cpp +++ b/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -882,16 +882,6 @@ void Interpreter::visitCallSite(CallSite CS) { e = SF.Caller.arg_end(); i != e; ++i, ++pNum) { Value *V = *i; ArgVals.push_back(getOperandValue(V, SF)); - // Promote all integral types whose size is < sizeof(i32) into i32. - // We do this by zero or sign extending the value as appropriate - // according to the parameter attributes - const Type *Ty = V->getType(); - if (Ty->isInteger() && (ArgVals.back().IntVal.getBitWidth() < 32)) { - if (CS.paramHasAttr(pNum, Attribute::ZExt)) - ArgVals.back().IntVal = ArgVals.back().IntVal.zext(32); - else if (CS.paramHasAttr(pNum, Attribute::SExt)) - ArgVals.back().IntVal = ArgVals.back().IntVal.sext(32); - } } // To handle indirect calls, we must get the pointer value from the argument diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp index 8c45a36..c02d84f 100644 --- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp +++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp @@ -158,7 +158,7 @@ static void *ffiValueFor(const Type *Ty, const GenericValue &AV, } case Type::FloatTyID: { float *FloatPtr = (float *) ArgDataPtr; - *FloatPtr = AV.DoubleVal; + *FloatPtr = AV.FloatVal; return ArgDataPtr; } case Type::DoubleTyID: { @@ -284,6 +284,9 @@ GenericValue Interpreter::callExternalFunction(Function *F, else llvm_report_error("Tried to execute an unknown external function: " + F->getType()->getDescription() + " " +F->getName()); +#ifndef USE_LIBFFI + errs() << "Recompiling LLVM with --enable-libffi might help.\n"; +#endif return GenericValue(); } @@ -419,83 +422,6 @@ GenericValue lle_X_printf(const FunctionType *FT, return GV; } -static void ByteswapSCANFResults(LLVMContext &C, - const char *Fmt, void *Arg0, void *Arg1, - void *Arg2, void *Arg3, void *Arg4, void *Arg5, - void *Arg6, void *Arg7, void *Arg8) { - void *Args[] = { Arg0, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7, Arg8, 0 }; - - // Loop over the format string, munging read values as appropriate (performs - // byteswaps as necessary). - unsigned ArgNo = 0; - while (*Fmt) { - if (*Fmt++ == '%') { - // Read any flag characters that may be present... - bool Suppress = false; - bool Half = false; - bool Long = false; - bool LongLong = false; // long long or long double - - while (1) { - switch (*Fmt++) { - case '*': Suppress = true; break; - case 'a': /*Allocate = true;*/ break; // We don't need to track this - case 'h': Half = true; break; - case 'l': Long = true; break; - case 'q': - case 'L': LongLong = true; break; - default: - if (Fmt[-1] > '9' || Fmt[-1] < '0') // Ignore field width specs - goto Out; - } - } - Out: - - // Read the conversion character - if (!Suppress && Fmt[-1] != '%') { // Nothing to do? - unsigned Size = 0; - const Type *Ty = 0; - - switch (Fmt[-1]) { - case 'i': case 'o': case 'u': case 'x': case 'X': case 'n': case 'p': - case 'd': - if (Long || LongLong) { - Size = 8; Ty = Type::getInt64Ty(C); - } else if (Half) { - Size = 4; Ty = Type::getInt16Ty(C); - } else { - Size = 4; Ty = Type::getInt32Ty(C); - } - break; - - case 'e': case 'g': case 'E': - case 'f': - if (Long || LongLong) { - Size = 8; Ty = Type::getDoubleTy(C); - } else { - Size = 4; Ty = Type::getFloatTy(C); - } - break; - - case 's': case 'c': case '[': // No byteswap needed - Size = 1; - Ty = Type::getInt8Ty(C); - break; - - default: break; - } - - if (Size) { - GenericValue GV; - void *Arg = Args[ArgNo++]; - memcpy(&GV, Arg, Size); - TheInterpreter->StoreValueToMemory(GV, (GenericValue*)Arg, Ty); - } - } - } - } -} - // int sscanf(const char *format, ...); GenericValue lle_X_sscanf(const FunctionType *FT, const std::vector<GenericValue> &args) { @@ -508,9 +434,6 @@ GenericValue lle_X_sscanf(const FunctionType *FT, GenericValue GV; GV.IntVal = APInt(32, sscanf(Args[0], Args[1], Args[2], Args[3], Args[4], Args[5], Args[6], Args[7], Args[8], Args[9])); - ByteswapSCANFResults(FT->getContext(), - Args[1], Args[2], Args[3], Args[4], - Args[5], Args[6], Args[7], Args[8], Args[9], 0); return GV; } @@ -526,9 +449,6 @@ GenericValue lle_X_scanf(const FunctionType *FT, GenericValue GV; GV.IntVal = APInt(32, scanf( Args[0], Args[1], Args[2], Args[3], Args[4], Args[5], Args[6], Args[7], Args[8], Args[9])); - ByteswapSCANFResults(FT->getContext(), - Args[0], Args[1], Args[2], Args[3], Args[4], - Args[5], Args[6], Args[7], Args[8], Args[9]); return GV; } diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp index e21d760..6d781c7 100644 --- a/lib/ExecutionEngine/JIT/JIT.cpp +++ b/lib/ExecutionEngine/JIT/JIT.cpp @@ -198,15 +198,17 @@ ExecutionEngine *ExecutionEngine::createJIT(ModuleProvider *MP, std::string *ErrorStr, JITMemoryManager *JMM, CodeGenOpt::Level OptLevel, - bool GVsWithCode) { - return JIT::createJIT(MP, ErrorStr, JMM, OptLevel, GVsWithCode); + bool GVsWithCode, + CodeModel::Model CMM) { + return JIT::createJIT(MP, ErrorStr, JMM, OptLevel, GVsWithCode, CMM); } ExecutionEngine *JIT::createJIT(ModuleProvider *MP, std::string *ErrorStr, JITMemoryManager *JMM, CodeGenOpt::Level OptLevel, - bool GVsWithCode) { + bool GVsWithCode, + CodeModel::Model CMM) { // Make sure we can resolve symbols in the program as well. The zero arg // to the function tells DynamicLibrary to load the program, not a library. if (sys::DynamicLibrary::LoadLibraryPermanently(0, ErrorStr)) @@ -215,6 +217,7 @@ ExecutionEngine *JIT::createJIT(ModuleProvider *MP, // Pick a target either via -march or by guessing the native arch. TargetMachine *TM = JIT::selectTarget(MP, ErrorStr); if (!TM || (ErrorStr && ErrorStr->length() > 0)) return 0; + TM->setCodeModel(CMM); // If the target supports JIT code generation, create a the JIT. if (TargetJITInfo *TJ = TM->getJITInfo()) { @@ -613,11 +616,6 @@ void JIT::runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked) { // the stub with real address of the function. updateFunctionStub(PF); } - - // If the JIT is configured to emit info so that dlsym can be used to - // rewrite stubs to external globals, do so now. - if (areDlsymStubsEnabled() && !isCompilingLazily()) - updateDlsymStubTable(); } /// getPointerToFunction - This method is used to get the address of the @@ -660,8 +658,7 @@ void *JIT::getPointerToFunction(Function *F) { } if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) { - bool AbortOnFailure = - !areDlsymStubsEnabled() && !F->hasExternalWeakLinkage(); + bool AbortOnFailure = !F->hasExternalWeakLinkage(); void *Addr = getPointerToNamedFunction(F->getName(), AbortOnFailure); addGlobalMapping(F, Addr); return Addr; @@ -690,7 +687,7 @@ void *JIT::getOrEmitGlobalVariable(const GlobalVariable *GV) { return (void*)&__dso_handle; #endif Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(GV->getName()); - if (Ptr == 0 && !areDlsymStubsEnabled()) { + if (Ptr == 0) { llvm_report_error("Could not resolve external global address: " +GV->getName()); } diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h index fb3cb24..f165bd6 100644 --- a/lib/ExecutionEngine/JIT/JIT.h +++ b/lib/ExecutionEngine/JIT/JIT.h @@ -85,8 +85,10 @@ public: JITMemoryManager *JMM, CodeGenOpt::Level OptLevel = CodeGenOpt::Default, - bool GVsWithCode = true) { - return ExecutionEngine::createJIT(MP, Err, JMM, OptLevel, GVsWithCode); + bool GVsWithCode = true, + CodeModel::Model CMM = CodeModel::Default) { + return ExecutionEngine::createJIT(MP, Err, JMM, OptLevel, GVsWithCode, + CMM); } virtual void addModuleProvider(ModuleProvider *MP); @@ -175,7 +177,8 @@ public: std::string *ErrorStr, JITMemoryManager *JMM, CodeGenOpt::Level OptLevel, - bool GVsWithCode); + bool GVsWithCode, + CodeModel::Model CMM); // Run the JIT on F and return information about the generated code void runJITOnFunction(Function *F, MachineCodeInfo *MCI = 0); @@ -195,7 +198,6 @@ private: TargetMachine &tm); void runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked); void updateFunctionStub(Function *F); - void updateDlsymStubTable(); protected: diff --git a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp index 49faf64..565509c 100644 --- a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp +++ b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp @@ -35,7 +35,7 @@ namespace llvm { extern "C" { // Debuggers puts a breakpoint in this function. - void DISABLE_INLINE __jit_debug_register_code() { } + DISABLE_INLINE void __jit_debug_register_code() { } // We put information about the JITed function in this global, which the // debugger reads. Make sure to specify the version statically, because the diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp index 79f1eb4..5f195ee 100644 --- a/lib/ExecutionEngine/JIT/JITEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp @@ -63,6 +63,7 @@ static JIT *TheJIT = 0; // JIT lazy compilation code. // namespace { + class JITEmitter; class JITResolverState; template<typename ValueTy> @@ -213,16 +214,18 @@ namespace { std::map<void*, unsigned> revGOTMap; unsigned nextGOTIndex; + JITEmitter &JE; + static JITResolver *TheJITResolver; public: - explicit JITResolver(JIT &jit) : nextGOTIndex(0) { + explicit JITResolver(JIT &jit, JITEmitter &je) : nextGOTIndex(0), JE(je) { TheJIT = &jit; LazyResolverFn = jit.getJITInfo().getLazyResolverFunction(JITCompilerFn); assert(TheJITResolver == 0 && "Multiple JIT resolvers?"); TheJITResolver = this; } - + ~JITResolver() { TheJITResolver = 0; } @@ -244,19 +247,9 @@ namespace { /// specified GV address. void *getGlobalValueIndirectSym(GlobalValue *V, void *GVAddress); - /// AddCallbackAtLocation - If the target is capable of rewriting an - /// instruction without the use of a stub, record the location of the use so - /// we know which function is being used at the location. - void *AddCallbackAtLocation(Function *F, void *Location) { - MutexGuard locked(TheJIT->lock); - /// Get the target-specific JIT resolver function. - state.AddCallSite(locked, Location, F); - return (void*)(intptr_t)LazyResolverFn; - } - void getRelocatableGVs(SmallVectorImpl<GlobalValue*> &GVs, SmallVectorImpl<void*> &Ptrs); - + GlobalValue *invalidateStub(void *Stub); /// getGOTIndexForAddress - Return a new or existing index in the GOT for @@ -269,6 +262,225 @@ namespace { /// been compiled, this function compiles it first. static void *JITCompilerFn(void *Stub); }; + + /// JITEmitter - The JIT implementation of the MachineCodeEmitter, which is + /// used to output functions to memory for execution. + class JITEmitter : public JITCodeEmitter { + JITMemoryManager *MemMgr; + + // When outputting a function stub in the context of some other function, we + // save BufferBegin/BufferEnd/CurBufferPtr here. + uint8_t *SavedBufferBegin, *SavedBufferEnd, *SavedCurBufferPtr; + + // When reattempting to JIT a function after running out of space, we store + // the estimated size of the function we're trying to JIT here, so we can + // ask the memory manager for at least this much space. When we + // successfully emit the function, we reset this back to zero. + uintptr_t SizeEstimate; + + /// Relocations - These are the relocations that the function needs, as + /// emitted. + std::vector<MachineRelocation> Relocations; + + /// MBBLocations - This vector is a mapping from MBB ID's to their address. + /// It is filled in by the StartMachineBasicBlock callback and queried by + /// the getMachineBasicBlockAddress callback. + std::vector<uintptr_t> MBBLocations; + + /// ConstantPool - The constant pool for the current function. + /// + MachineConstantPool *ConstantPool; + + /// ConstantPoolBase - A pointer to the first entry in the constant pool. + /// + void *ConstantPoolBase; + + /// ConstPoolAddresses - Addresses of individual constant pool entries. + /// + SmallVector<uintptr_t, 8> ConstPoolAddresses; + + /// JumpTable - The jump tables for the current function. + /// + MachineJumpTableInfo *JumpTable; + + /// JumpTableBase - A pointer to the first entry in the jump table. + /// + void *JumpTableBase; + + /// Resolver - This contains info about the currently resolved functions. + JITResolver Resolver; + + /// DE - The dwarf emitter for the jit. + OwningPtr<JITDwarfEmitter> DE; + + /// DR - The debug registerer for the jit. + OwningPtr<JITDebugRegisterer> DR; + + /// LabelLocations - This vector is a mapping from Label ID's to their + /// address. + std::vector<uintptr_t> LabelLocations; + + /// MMI - Machine module info for exception informations + MachineModuleInfo* MMI; + + // GVSet - a set to keep track of which globals have been seen + SmallPtrSet<const GlobalVariable*, 8> GVSet; + + // CurFn - The llvm function being emitted. Only valid during + // finishFunction(). + const Function *CurFn; + + /// Information about emitted code, which is passed to the + /// JITEventListeners. This is reset in startFunction and used in + /// finishFunction. + JITEvent_EmittedFunctionDetails EmissionDetails; + + struct EmittedCode { + void *FunctionBody; // Beginning of the function's allocation. + void *Code; // The address the function's code actually starts at. + void *ExceptionTable; + EmittedCode() : FunctionBody(0), Code(0), ExceptionTable(0) {} + }; + struct EmittedFunctionConfig : public ValueMapConfig<const Function*> { + typedef JITEmitter *ExtraData; + static void onDelete(JITEmitter *, const Function*); + static void onRAUW(JITEmitter *, const Function*, const Function*); + }; + ValueMap<const Function *, EmittedCode, + EmittedFunctionConfig> EmittedFunctions; + + // CurFnStubUses - For a given Function, a vector of stubs that it + // references. This facilitates the JIT detecting that a stub is no + // longer used, so that it may be deallocated. + DenseMap<AssertingVH<const Function>, SmallVector<void*, 1> > CurFnStubUses; + + // StubFnRefs - For a given pointer to a stub, a set of Functions which + // reference the stub. When the count of a stub's references drops to zero, + // the stub is unused. + DenseMap<void *, SmallPtrSet<const Function*, 1> > StubFnRefs; + + DebugLocTuple PrevDLT; + + public: + JITEmitter(JIT &jit, JITMemoryManager *JMM, TargetMachine &TM) + : SizeEstimate(0), Resolver(jit, *this), MMI(0), CurFn(0), + EmittedFunctions(this) { + MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager(); + if (jit.getJITInfo().needsGOT()) { + MemMgr->AllocateGOT(); + DEBUG(errs() << "JIT is managing a GOT\n"); + } + + if (DwarfExceptionHandling || JITEmitDebugInfo) { + DE.reset(new JITDwarfEmitter(jit)); + } + if (JITEmitDebugInfo) { + DR.reset(new JITDebugRegisterer(TM)); + } + } + ~JITEmitter() { + delete MemMgr; + } + + /// classof - Methods for support type inquiry through isa, cast, and + /// dyn_cast: + /// + static inline bool classof(const JITEmitter*) { return true; } + static inline bool classof(const MachineCodeEmitter*) { return true; } + + JITResolver &getJITResolver() { return Resolver; } + + virtual void startFunction(MachineFunction &F); + virtual bool finishFunction(MachineFunction &F); + + void emitConstantPool(MachineConstantPool *MCP); + void initJumpTableInfo(MachineJumpTableInfo *MJTI); + void emitJumpTableInfo(MachineJumpTableInfo *MJTI); + + virtual void startGVStub(const GlobalValue* GV, unsigned StubSize, + unsigned Alignment = 1); + virtual void startGVStub(const GlobalValue* GV, void *Buffer, + unsigned StubSize); + virtual void* finishGVStub(const GlobalValue *GV); + + /// allocateSpace - Reserves space in the current block if any, or + /// allocate a new one of the given size. + virtual void *allocateSpace(uintptr_t Size, unsigned Alignment); + + /// allocateGlobal - Allocate memory for a global. Unlike allocateSpace, + /// this method does not allocate memory in the current output buffer, + /// because a global may live longer than the current function. + virtual void *allocateGlobal(uintptr_t Size, unsigned Alignment); + + virtual void addRelocation(const MachineRelocation &MR) { + Relocations.push_back(MR); + } + + virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) { + if (MBBLocations.size() <= (unsigned)MBB->getNumber()) + MBBLocations.resize((MBB->getNumber()+1)*2); + MBBLocations[MBB->getNumber()] = getCurrentPCValue(); + DEBUG(errs() << "JIT: Emitting BB" << MBB->getNumber() << " at [" + << (void*) getCurrentPCValue() << "]\n"); + } + + virtual uintptr_t getConstantPoolEntryAddress(unsigned Entry) const; + virtual uintptr_t getJumpTableEntryAddress(unsigned Entry) const; + + virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const { + assert(MBBLocations.size() > (unsigned)MBB->getNumber() && + MBBLocations[MBB->getNumber()] && "MBB not emitted!"); + return MBBLocations[MBB->getNumber()]; + } + + /// retryWithMoreMemory - Log a retry and deallocate all memory for the + /// given function. Increase the minimum allocation size so that we get + /// more memory next time. + void retryWithMoreMemory(MachineFunction &F); + + /// deallocateMemForFunction - Deallocate all memory for the specified + /// function body. + void deallocateMemForFunction(const Function *F); + + /// AddStubToCurrentFunction - Mark the current function being JIT'd as + /// using the stub at the specified address. Allows + /// deallocateMemForFunction to also remove stubs no longer referenced. + void AddStubToCurrentFunction(void *Stub); + + virtual void processDebugLoc(DebugLoc DL, bool BeforePrintingInsn); + + virtual void emitLabel(uint64_t LabelID) { + if (LabelLocations.size() <= LabelID) + LabelLocations.resize((LabelID+1)*2); + LabelLocations[LabelID] = getCurrentPCValue(); + } + + virtual uintptr_t getLabelAddress(uint64_t LabelID) const { + assert(LabelLocations.size() > (unsigned)LabelID && + LabelLocations[LabelID] && "Label not emitted!"); + return LabelLocations[LabelID]; + } + + virtual void setModuleInfo(MachineModuleInfo* Info) { + MMI = Info; + if (DE.get()) DE->setModuleInfo(Info); + } + + void setMemoryExecutable() { + MemMgr->setMemoryExecutable(); + } + + JITMemoryManager *getMemMgr() const { return MemMgr; } + + private: + void *getPointerToGlobal(GlobalValue *GV, void *Reference, + bool MayNeedFarStub); + void *getPointerToGVIndirectSym(GlobalValue *V, void *Reference); + unsigned addSizeOfGlobal(const GlobalVariable *GV, unsigned Size); + unsigned addSizeOfGlobalsInConstantVal(const Constant *C, unsigned Size); + unsigned addSizeOfGlobalsInInitializer(const Constant *Init, unsigned Size); + unsigned GetSizeOfGlobalsInBytes(MachineFunction &MF); + }; } JITResolver *JITResolver::TheJITResolver = 0; @@ -306,16 +518,13 @@ void *JITResolver::getFunctionStub(Function *F) { Actual = TheJIT->getPointerToFunction(F); // If we resolved the symbol to a null address (eg. a weak external) - // don't emit a stub. Return a null pointer to the application. If dlsym - // stubs are enabled, not being able to resolve the address is not - // meaningful. - if (!Actual && !TheJIT->areDlsymStubsEnabled()) return 0; + // don't emit a stub. Return a null pointer to the application. + if (!Actual) return 0; } // Codegen a new stub, calling the lazy resolver or the actual address of the // external function, if it was resolved. - Stub = TheJIT->getJITInfo().emitFunctionStub(F, Actual, - *TheJIT->getCodeEmitter()); + Stub = TheJIT->getJITInfo().emitFunctionStub(F, Actual, JE); if (Actual != (void*)(intptr_t)LazyResolverFn) { // If we are getting the stub for an external function, we really want the @@ -352,9 +561,9 @@ void *JITResolver::getGlobalValueIndirectSym(GlobalValue *GV, void *GVAddress) { // Otherwise, codegen a new indirect symbol. IndirectSym = TheJIT->getJITInfo().emitGlobalValueIndirectSym(GV, GVAddress, - *TheJIT->getCodeEmitter()); + JE); - DEBUG(errs() << "JIT: Indirect symbol emitted at [" << IndirectSym + DEBUG(errs() << "JIT: Indirect symbol emitted at [" << IndirectSym << "] for GV '" << GV->getName() << "'\n"); return IndirectSym; @@ -367,8 +576,7 @@ void *JITResolver::getExternalFunctionStub(void *FnAddr) { void *&Stub = ExternalFnToStubMap[FnAddr]; if (Stub) return Stub; - Stub = TheJIT->getJITInfo().emitFunctionStub(0, FnAddr, - *TheJIT->getCodeEmitter()); + Stub = TheJIT->getJITInfo().emitFunctionStub(0, FnAddr, JE); DEBUG(errs() << "JIT: Stub emitted at [" << Stub << "] for external function at '" << FnAddr << "'\n"); @@ -389,10 +597,10 @@ unsigned JITResolver::getGOTIndexForAddr(void* addr) { void JITResolver::getRelocatableGVs(SmallVectorImpl<GlobalValue*> &GVs, SmallVectorImpl<void*> &Ptrs) { MutexGuard locked(TheJIT->lock); - + const FunctionToStubMapTy &FM = state.getFunctionToStubMap(locked); GlobalToIndirectSymMapTy &GM = state.getGlobalToIndirectSymMap(locked); - + for (FunctionToStubMapTy::const_iterator i = FM.begin(), e = FM.end(); i != e; ++i){ Function *F = i->first; @@ -428,7 +636,7 @@ GlobalValue *JITResolver::invalidateStub(void *Stub) { GM.erase(i); return GV; } - + // Lastly, check to see if it's in the ExternalFnToStubMap. for (std::map<void *, void *>::iterator i = ExternalFnToStubMap.begin(), e = ExternalFnToStubMap.end(); i != e; ++i) { @@ -437,7 +645,7 @@ GlobalValue *JITResolver::invalidateStub(void *Stub) { ExternalFnToStubMap.erase(i); break; } - + return 0; } @@ -446,7 +654,7 @@ GlobalValue *JITResolver::invalidateStub(void *Stub) { /// it if necessary, then returns the resultant function pointer. void *JITResolver::JITCompilerFn(void *Stub) { JITResolver &JR = *TheJITResolver; - + Function* F = 0; void* ActualPtr = 0; @@ -466,16 +674,16 @@ void *JITResolver::JITCompilerFn(void *Stub) { // If we have already code generated the function, just return the address. void *Result = TheJIT->getPointerToGlobalIfAvailable(F); - + if (!Result) { // Otherwise we don't have it, do lazy compilation now. - + // If lazy compilation is disabled, emit a useful error message and abort. if (!TheJIT->isCompilingLazily()) { llvm_report_error("LLVM JIT requested to do lazy compilation of function '" + F->getName() + "' when lazy compiles are disabled!"); } - + DEBUG(errs() << "JIT: Lazily resolving function '" << F->getName() << "' In stub ptr = " << Stub << " actual ptr = " << ActualPtr << "\n"); @@ -508,237 +716,8 @@ void *JITResolver::JITCompilerFn(void *Stub) { //===----------------------------------------------------------------------===// // JITEmitter code. // -namespace { - /// JITEmitter - The JIT implementation of the MachineCodeEmitter, which is - /// used to output functions to memory for execution. - class JITEmitter : public JITCodeEmitter { - JITMemoryManager *MemMgr; - - // When outputting a function stub in the context of some other function, we - // save BufferBegin/BufferEnd/CurBufferPtr here. - uint8_t *SavedBufferBegin, *SavedBufferEnd, *SavedCurBufferPtr; - - // When reattempting to JIT a function after running out of space, we store - // the estimated size of the function we're trying to JIT here, so we can - // ask the memory manager for at least this much space. When we - // successfully emit the function, we reset this back to zero. - uintptr_t SizeEstimate; - - /// Relocations - These are the relocations that the function needs, as - /// emitted. - std::vector<MachineRelocation> Relocations; - - /// MBBLocations - This vector is a mapping from MBB ID's to their address. - /// It is filled in by the StartMachineBasicBlock callback and queried by - /// the getMachineBasicBlockAddress callback. - std::vector<uintptr_t> MBBLocations; - - /// ConstantPool - The constant pool for the current function. - /// - MachineConstantPool *ConstantPool; - - /// ConstantPoolBase - A pointer to the first entry in the constant pool. - /// - void *ConstantPoolBase; - - /// ConstPoolAddresses - Addresses of individual constant pool entries. - /// - SmallVector<uintptr_t, 8> ConstPoolAddresses; - - /// JumpTable - The jump tables for the current function. - /// - MachineJumpTableInfo *JumpTable; - - /// JumpTableBase - A pointer to the first entry in the jump table. - /// - void *JumpTableBase; - - /// Resolver - This contains info about the currently resolved functions. - JITResolver Resolver; - - /// DE - The dwarf emitter for the jit. - OwningPtr<JITDwarfEmitter> DE; - - /// DR - The debug registerer for the jit. - OwningPtr<JITDebugRegisterer> DR; - - /// LabelLocations - This vector is a mapping from Label ID's to their - /// address. - std::vector<uintptr_t> LabelLocations; - - /// MMI - Machine module info for exception informations - MachineModuleInfo* MMI; - - // GVSet - a set to keep track of which globals have been seen - SmallPtrSet<const GlobalVariable*, 8> GVSet; - - // CurFn - The llvm function being emitted. Only valid during - // finishFunction(). - const Function *CurFn; - - /// Information about emitted code, which is passed to the - /// JITEventListeners. This is reset in startFunction and used in - /// finishFunction. - JITEvent_EmittedFunctionDetails EmissionDetails; - - struct EmittedCode { - void *FunctionBody; // Beginning of the function's allocation. - void *Code; // The address the function's code actually starts at. - void *ExceptionTable; - EmittedCode() : FunctionBody(0), Code(0), ExceptionTable(0) {} - }; - struct EmittedFunctionConfig : public ValueMapConfig<const Function*> { - typedef JITEmitter *ExtraData; - static void onDelete(JITEmitter *, const Function*); - static void onRAUW(JITEmitter *, const Function*, const Function*); - }; - ValueMap<const Function *, EmittedCode, - EmittedFunctionConfig> EmittedFunctions; - - // CurFnStubUses - For a given Function, a vector of stubs that it - // references. This facilitates the JIT detecting that a stub is no - // longer used, so that it may be deallocated. - DenseMap<AssertingVH<const Function>, SmallVector<void*, 1> > CurFnStubUses; - - // StubFnRefs - For a given pointer to a stub, a set of Functions which - // reference the stub. When the count of a stub's references drops to zero, - // the stub is unused. - DenseMap<void *, SmallPtrSet<const Function*, 1> > StubFnRefs; - - // ExtFnStubs - A map of external function names to stubs which have entries - // in the JITResolver's ExternalFnToStubMap. - StringMap<void *> ExtFnStubs; - - DebugLocTuple PrevDLT; - - public: - JITEmitter(JIT &jit, JITMemoryManager *JMM, TargetMachine &TM) - : SizeEstimate(0), Resolver(jit), MMI(0), CurFn(0), - EmittedFunctions(this) { - MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager(); - if (jit.getJITInfo().needsGOT()) { - MemMgr->AllocateGOT(); - DEBUG(errs() << "JIT is managing a GOT\n"); - } - - if (DwarfExceptionHandling || JITEmitDebugInfo) { - DE.reset(new JITDwarfEmitter(jit)); - } - if (JITEmitDebugInfo) { - DR.reset(new JITDebugRegisterer(TM)); - } - } - ~JITEmitter() { - delete MemMgr; - } - - /// classof - Methods for support type inquiry through isa, cast, and - /// dyn_cast: - /// - static inline bool classof(const JITEmitter*) { return true; } - static inline bool classof(const MachineCodeEmitter*) { return true; } - - JITResolver &getJITResolver() { return Resolver; } - - virtual void startFunction(MachineFunction &F); - virtual bool finishFunction(MachineFunction &F); - - void emitConstantPool(MachineConstantPool *MCP); - void initJumpTableInfo(MachineJumpTableInfo *MJTI); - void emitJumpTableInfo(MachineJumpTableInfo *MJTI); - - virtual void startGVStub(const GlobalValue* GV, unsigned StubSize, - unsigned Alignment = 1); - virtual void startGVStub(const GlobalValue* GV, void *Buffer, - unsigned StubSize); - virtual void* finishGVStub(const GlobalValue *GV); - - /// allocateSpace - Reserves space in the current block if any, or - /// allocate a new one of the given size. - virtual void *allocateSpace(uintptr_t Size, unsigned Alignment); - - /// allocateGlobal - Allocate memory for a global. Unlike allocateSpace, - /// this method does not allocate memory in the current output buffer, - /// because a global may live longer than the current function. - virtual void *allocateGlobal(uintptr_t Size, unsigned Alignment); - - virtual void addRelocation(const MachineRelocation &MR) { - Relocations.push_back(MR); - } - - virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) { - if (MBBLocations.size() <= (unsigned)MBB->getNumber()) - MBBLocations.resize((MBB->getNumber()+1)*2); - MBBLocations[MBB->getNumber()] = getCurrentPCValue(); - DEBUG(errs() << "JIT: Emitting BB" << MBB->getNumber() << " at [" - << (void*) getCurrentPCValue() << "]\n"); - } - - virtual uintptr_t getConstantPoolEntryAddress(unsigned Entry) const; - virtual uintptr_t getJumpTableEntryAddress(unsigned Entry) const; - - virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const { - assert(MBBLocations.size() > (unsigned)MBB->getNumber() && - MBBLocations[MBB->getNumber()] && "MBB not emitted!"); - return MBBLocations[MBB->getNumber()]; - } - - /// retryWithMoreMemory - Log a retry and deallocate all memory for the - /// given function. Increase the minimum allocation size so that we get - /// more memory next time. - void retryWithMoreMemory(MachineFunction &F); - - /// deallocateMemForFunction - Deallocate all memory for the specified - /// function body. - void deallocateMemForFunction(const Function *F); - - /// AddStubToCurrentFunction - Mark the current function being JIT'd as - /// using the stub at the specified address. Allows - /// deallocateMemForFunction to also remove stubs no longer referenced. - void AddStubToCurrentFunction(void *Stub); - - /// getExternalFnStubs - Accessor for the JIT to find stubs emitted for - /// MachineRelocations that reference external functions by name. - const StringMap<void*> &getExternalFnStubs() const { return ExtFnStubs; } - - virtual void processDebugLoc(DebugLoc DL, bool BeforePrintingInsn); - - virtual void emitLabel(uint64_t LabelID) { - if (LabelLocations.size() <= LabelID) - LabelLocations.resize((LabelID+1)*2); - LabelLocations[LabelID] = getCurrentPCValue(); - } - - virtual uintptr_t getLabelAddress(uint64_t LabelID) const { - assert(LabelLocations.size() > (unsigned)LabelID && - LabelLocations[LabelID] && "Label not emitted!"); - return LabelLocations[LabelID]; - } - - virtual void setModuleInfo(MachineModuleInfo* Info) { - MMI = Info; - if (DE.get()) DE->setModuleInfo(Info); - } - - void setMemoryExecutable() { - MemMgr->setMemoryExecutable(); - } - - JITMemoryManager *getMemMgr() const { return MemMgr; } - - private: - void *getPointerToGlobal(GlobalValue *GV, void *Reference, bool NoNeedStub); - void *getPointerToGVIndirectSym(GlobalValue *V, void *Reference, - bool NoNeedStub); - unsigned addSizeOfGlobal(const GlobalVariable *GV, unsigned Size); - unsigned addSizeOfGlobalsInConstantVal(const Constant *C, unsigned Size); - unsigned addSizeOfGlobalsInInitializer(const Constant *Init, unsigned Size); - unsigned GetSizeOfGlobalsInBytes(MachineFunction &MF); - }; -} - void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference, - bool DoesntNeedStub) { + bool MayNeedFarStub) { if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) return TheJIT->getOrEmitGlobalVariable(GV); @@ -747,31 +726,26 @@ void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference, // If we have already compiled the function, return a pointer to its body. Function *F = cast<Function>(V); - void *ResultPtr; - if (!DoesntNeedStub) { - // Return the function stub if it's already created. - ResultPtr = Resolver.getFunctionStubIfAvailable(F); - if (ResultPtr) - AddStubToCurrentFunction(ResultPtr); - } else { - ResultPtr = TheJIT->getPointerToGlobalIfAvailable(F); + + void *FnStub = Resolver.getFunctionStubIfAvailable(F); + if (FnStub) { + // Return the function stub if it's already created. We do this first + // so that we're returning the same address for the function as any + // previous call. + AddStubToCurrentFunction(FnStub); + return FnStub; } + + // Otherwise if we have code, go ahead and return that. + void *ResultPtr = TheJIT->getPointerToGlobalIfAvailable(F); if (ResultPtr) return ResultPtr; // If this is an external function pointer, we can force the JIT to - // 'compile' it, which really just adds it to the map. In dlsym mode, - // external functions are forced through a stub, regardless of reloc type. + // 'compile' it, which really just adds it to the map. if (F->isDeclaration() && !F->hasNotBeenReadFromBitcode() && - DoesntNeedStub && !TheJIT->areDlsymStubsEnabled()) + !MayNeedFarStub) return TheJIT->getPointerToFunction(F); - // Okay, the function has not been compiled yet, if the target callback - // mechanism is capable of rewriting the instruction directly, prefer to do - // that instead of emitting a stub. This uses the lazy resolver, so is not - // legal if lazy compilation is disabled. - if (DoesntNeedStub && TheJIT->isCompilingLazily()) - return Resolver.AddCallbackAtLocation(F, Reference); - // Otherwise, we have to emit a stub. void *StubAddr = Resolver.getFunctionStub(F); @@ -785,17 +759,16 @@ void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference, return StubAddr; } -void *JITEmitter::getPointerToGVIndirectSym(GlobalValue *V, void *Reference, - bool NoNeedStub) { +void *JITEmitter::getPointerToGVIndirectSym(GlobalValue *V, void *Reference) { // Make sure GV is emitted first, and create a stub containing the fully // resolved address. - void *GVAddress = getPointerToGlobal(V, Reference, true); + void *GVAddress = getPointerToGlobal(V, Reference, false); void *StubAddr = Resolver.getGlobalValueIndirectSym(V, GVAddress); - + // Add the stub to the current function's list of referenced stubs, so we can // deallocate them if the current function is ever freed. AddStubToCurrentFunction(StubAddr); - + return StubAddr; } @@ -820,7 +793,7 @@ void JITEmitter::processDebugLoc(DebugLoc DL, bool BeforePrintingInsn) { NextLine.Loc = DL; EmissionDetails.LineStarts.push_back(NextLine); } - + PrevDLT = CurDLT; } } @@ -845,7 +818,7 @@ static unsigned GetConstantPoolSizeInBytes(MachineConstantPool *MCP, static unsigned GetJumpTableSizeInBytes(MachineJumpTableInfo *MJTI) { const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); if (JT.empty()) return 0; - + unsigned NumEntries = 0; for (unsigned i = 0, e = JT.size(); i != e; ++i) NumEntries += JT[i].MBBs.size(); @@ -857,7 +830,7 @@ static unsigned GetJumpTableSizeInBytes(MachineJumpTableInfo *MJTI) { static uintptr_t RoundUpToAlign(uintptr_t Size, unsigned Alignment) { if (Alignment == 0) Alignment = 1; - // Since we do not know where the buffer will be allocated, be pessimistic. + // Since we do not know where the buffer will be allocated, be pessimistic. return Size + Alignment; } @@ -867,7 +840,7 @@ static uintptr_t RoundUpToAlign(uintptr_t Size, unsigned Alignment) { unsigned JITEmitter::addSizeOfGlobal(const GlobalVariable *GV, unsigned Size) { const Type *ElTy = GV->getType()->getElementType(); size_t GVSize = (size_t)TheJIT->getTargetData()->getTypeAllocSize(ElTy); - size_t GVAlign = + size_t GVAlign = (size_t)TheJIT->getTargetData()->getPreferredAlignment(GV); DEBUG(errs() << "JIT: Adding in size " << GVSize << " alignment " << GVAlign); DEBUG(GV->dump()); @@ -884,7 +857,7 @@ unsigned JITEmitter::addSizeOfGlobal(const GlobalVariable *GV, unsigned Size) { /// but are referenced from the constant; put them in GVSet and add their /// size into the running total Size. -unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C, +unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C, unsigned Size) { // If its undefined, return the garbage. if (isa<UndefValue>(C)) @@ -947,7 +920,7 @@ unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C, /// addSizeOfGLobalsInInitializer - handle any globals that we haven't seen yet /// but are referenced from the given initializer. -unsigned JITEmitter::addSizeOfGlobalsInInitializer(const Constant *Init, +unsigned JITEmitter::addSizeOfGlobalsInInitializer(const Constant *Init, unsigned Size) { if (!isa<UndefValue>(Init) && !isa<ConstantVector>(Init) && @@ -968,7 +941,7 @@ unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) { unsigned Size = 0; GVSet.clear(); - for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); + for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); MBB != E; ++MBB) { for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) { @@ -1000,7 +973,7 @@ unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) { DEBUG(errs() << "JIT: About to look through initializers\n"); // Look for more globals that are referenced only from initializers. // GVSet.end is computed each time because the set can grow as we go. - for (SmallPtrSet<const GlobalVariable *, 8>::iterator I = GVSet.begin(); + for (SmallPtrSet<const GlobalVariable *, 8>::iterator I = GVSet.begin(); I != GVSet.end(); I++) { const GlobalVariable* GV = *I; if (GV->hasInitializer()) @@ -1022,10 +995,10 @@ void JITEmitter::startFunction(MachineFunction &F) { const TargetInstrInfo* TII = F.getTarget().getInstrInfo(); MachineJumpTableInfo *MJTI = F.getJumpTableInfo(); MachineConstantPool *MCP = F.getConstantPool(); - + // Ensure the constant pool/jump table info is at least 4-byte aligned. ActualSize = RoundUpToAlign(ActualSize, 16); - + // Add the alignment of the constant pool ActualSize = RoundUpToAlign(ActualSize, MCP->getConstantPoolAlignment()); @@ -1037,7 +1010,7 @@ void JITEmitter::startFunction(MachineFunction &F) { // Add the jump table size ActualSize += GetJumpTableSizeInBytes(MJTI); - + // Add the alignment for the function ActualSize = RoundUpToAlign(ActualSize, std::max(F.getFunction()->getAlignment(), 8U)); @@ -1110,29 +1083,19 @@ bool JITEmitter::finishFunction(MachineFunction &F) { ResultPtr = TheJIT->getPointerToNamedFunction(MR.getExternalSymbol(), false); DEBUG(errs() << "JIT: Map \'" << MR.getExternalSymbol() << "\' to [" - << ResultPtr << "]\n"); + << ResultPtr << "]\n"); // If the target REALLY wants a stub for this function, emit it now. - if (!MR.doesntNeedStub()) { - if (!TheJIT->areDlsymStubsEnabled()) { - ResultPtr = Resolver.getExternalFunctionStub(ResultPtr); - } else { - void *&Stub = ExtFnStubs[MR.getExternalSymbol()]; - if (!Stub) { - Stub = Resolver.getExternalFunctionStub((void *)&Stub); - AddStubToCurrentFunction(Stub); - } - ResultPtr = Stub; - } + if (MR.mayNeedFarStub()) { + ResultPtr = Resolver.getExternalFunctionStub(ResultPtr); } } else if (MR.isGlobalValue()) { ResultPtr = getPointerToGlobal(MR.getGlobalValue(), BufferBegin+MR.getMachineCodeOffset(), - MR.doesntNeedStub()); + MR.mayNeedFarStub()); } else if (MR.isIndirectSymbol()) { - ResultPtr = getPointerToGVIndirectSym(MR.getGlobalValue(), - BufferBegin+MR.getMachineCodeOffset(), - MR.doesntNeedStub()); + ResultPtr = getPointerToGVIndirectSym( + MR.getGlobalValue(), BufferBegin+MR.getMachineCodeOffset()); } else if (MR.isBasicBlock()) { ResultPtr = (void*)getMachineBasicBlockAddress(MR.getBasicBlock()); } else if (MR.isConstantPoolIndex()) { @@ -1278,7 +1241,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) { if (MMI) MMI->EndFunction(); - + return false; } @@ -1316,20 +1279,20 @@ void JITEmitter::deallocateMemForFunction(const Function *F) { // If the function did not reference any stubs, return. if (CurFnStubUses.find(F) == CurFnStubUses.end()) return; - + // For each referenced stub, erase the reference to this function, and then // erase the list of referenced stubs. SmallVectorImpl<void *> &StubList = CurFnStubUses[F]; for (unsigned i = 0, e = StubList.size(); i != e; ++i) { void *Stub = StubList[i]; - + // If we already invalidated this stub for this function, continue. if (StubFnRefs.count(Stub) == 0) continue; - + SmallPtrSet<const Function *, 1> &FnRefs = StubFnRefs[Stub]; FnRefs.erase(F); - + // If this function was the last reference to the stub, invalidate the stub // in the JITResolver. Were there a memory manager deallocateStub routine, // we could call that at this point too. @@ -1338,19 +1301,10 @@ void JITEmitter::deallocateMemForFunction(const Function *F) { StubFnRefs.erase(Stub); // Invalidate the stub. If it is a GV stub, update the JIT's global - // mapping for that GV to zero, otherwise, search the string map of - // external function names to stubs and remove the entry for this stub. + // mapping for that GV to zero. GlobalValue *GV = Resolver.invalidateStub(Stub); if (GV) { TheJIT->updateGlobalMapping(GV, 0); - } else { - for (StringMapIterator<void*> i = ExtFnStubs.begin(), - e = ExtFnStubs.end(); i != e; ++i) { - if (i->second == Stub) { - ExtFnStubs.erase(i); - break; - } - } } } } @@ -1421,7 +1375,7 @@ void JITEmitter::initJumpTableInfo(MachineJumpTableInfo *MJTI) { const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); if (JT.empty()) return; - + unsigned NumEntries = 0; for (unsigned i = 0, e = JT.size(); i != e; ++i) NumEntries += JT[i].MBBs.size(); @@ -1441,7 +1395,7 @@ void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) { const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); if (JT.empty() || JumpTableBase == 0) return; - + if (TargetMachine::getRelocationModel() == Reloc::PIC_) { assert(MJTI->getEntrySize() == 4 && "Cross JIT'ing?"); // For each jump table, place the offset from the beginning of the table @@ -1460,8 +1414,8 @@ void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) { } } else { assert(MJTI->getEntrySize() == sizeof(void*) && "Cross JIT'ing?"); - - // For each jump table, map each target in the jump table to the address of + + // For each jump table, map each target in the jump table to the address of // an emitted MachineBasicBlock. intptr_t *SlotPtr = (intptr_t*)JumpTableBase; @@ -1480,7 +1434,7 @@ void JITEmitter::startGVStub(const GlobalValue* GV, unsigned StubSize, SavedBufferBegin = BufferBegin; SavedBufferEnd = BufferEnd; SavedCurBufferPtr = CurBufferPtr; - + BufferBegin = CurBufferPtr = MemMgr->allocateStub(GV, StubSize, Alignment); BufferEnd = BufferBegin+StubSize+1; } @@ -1490,7 +1444,7 @@ void JITEmitter::startGVStub(const GlobalValue* GV, void *Buffer, SavedBufferBegin = BufferBegin; SavedBufferEnd = BufferEnd; SavedCurBufferPtr = CurBufferPtr; - + BufferBegin = CurBufferPtr = (uint8_t *)Buffer; BufferEnd = BufferBegin+StubSize+1; } @@ -1519,15 +1473,15 @@ uintptr_t JITEmitter::getConstantPoolEntryAddress(unsigned ConstantNum) const { uintptr_t JITEmitter::getJumpTableEntryAddress(unsigned Index) const { const std::vector<MachineJumpTableEntry> &JT = JumpTable->getJumpTables(); assert(Index < JT.size() && "Invalid jump table index!"); - + unsigned Offset = 0; unsigned EntrySize = JumpTable->getEntrySize(); - + for (unsigned i = 0; i < Index; ++i) Offset += JT[i].MBBs.size(); - + Offset *= EntrySize; - + return (uintptr_t)((char *)JumpTableBase + Offset); } @@ -1572,7 +1526,7 @@ void *JIT::getPointerToFunctionOrStub(Function *F) { // If we have already code generated the function, just return the address. if (void *Addr = getPointerToGlobalIfAvailable(F)) return Addr; - + // Get a stub if the target supports it. assert(isa<JITEmitter>(JCE) && "Unexpected MCE?"); JITEmitter *JE = cast<JITEmitter>(getCodeEmitter()); @@ -1591,92 +1545,6 @@ void JIT::updateFunctionStub(Function *F) { getJITInfo().emitFunctionStubAtAddr(F, Addr, Stub, *getCodeEmitter()); } -/// updateDlsymStubTable - Emit the data necessary to relocate the stubs -/// that were emitted during code generation. -/// -void JIT::updateDlsymStubTable() { - assert(isa<JITEmitter>(JCE) && "Unexpected MCE?"); - JITEmitter *JE = cast<JITEmitter>(getCodeEmitter()); - - SmallVector<GlobalValue*, 8> GVs; - SmallVector<void*, 8> Ptrs; - const StringMap<void *> &ExtFns = JE->getExternalFnStubs(); - - JE->getJITResolver().getRelocatableGVs(GVs, Ptrs); - - unsigned nStubs = GVs.size() + ExtFns.size(); - - // If there are no relocatable stubs, return. - if (nStubs == 0) - return; - - // If there are no new relocatable stubs, return. - void *CurTable = JE->getMemMgr()->getDlsymTable(); - if (CurTable && (*(unsigned *)CurTable == nStubs)) - return; - - // Calculate the size of the stub info - unsigned offset = 4 + 4 * nStubs + sizeof(intptr_t) * nStubs; - - SmallVector<unsigned, 8> Offsets; - for (unsigned i = 0; i != GVs.size(); ++i) { - Offsets.push_back(offset); - offset += GVs[i]->getName().size() + 1; - } - for (StringMapConstIterator<void*> i = ExtFns.begin(), e = ExtFns.end(); - i != e; ++i) { - Offsets.push_back(offset); - offset += strlen(i->first()) + 1; - } - - // Allocate space for the new "stub", which contains the dlsym table. - JE->startGVStub(0, offset, 4); - - // Emit the number of records - JE->emitInt32(nStubs); - - // Emit the string offsets - for (unsigned i = 0; i != nStubs; ++i) - JE->emitInt32(Offsets[i]); - - // Emit the pointers. Verify that they are at least 2-byte aligned, and set - // the low bit to 0 == GV, 1 == Function, so that the client code doing the - // relocation can write the relocated pointer at the appropriate place in - // the stub. - for (unsigned i = 0; i != GVs.size(); ++i) { - intptr_t Ptr = (intptr_t)Ptrs[i]; - assert((Ptr & 1) == 0 && "Stub pointers must be at least 2-byte aligned!"); - - if (isa<Function>(GVs[i])) - Ptr |= (intptr_t)1; - - if (sizeof(Ptr) == 8) - JE->emitInt64(Ptr); - else - JE->emitInt32(Ptr); - } - for (StringMapConstIterator<void*> i = ExtFns.begin(), e = ExtFns.end(); - i != e; ++i) { - intptr_t Ptr = (intptr_t)i->second | 1; - - if (sizeof(Ptr) == 8) - JE->emitInt64(Ptr); - else - JE->emitInt32(Ptr); - } - - // Emit the strings. - for (unsigned i = 0; i != GVs.size(); ++i) - JE->emitString(GVs[i]->getName()); - for (StringMapConstIterator<void*> i = ExtFns.begin(), e = ExtFns.end(); - i != e; ++i) - JE->emitString(i->first()); - - // Tell the JIT memory manager where it is. The JIT Memory Manager will - // deallocate space for the old one, if one existed. - JE->getMemMgr()->SetDlsymTable(JE->finishGVStub(0)); -} - /// freeMachineCodeForFunction - release machine code memory for given Function. /// void JIT::freeMachineCodeForFunction(Function *F) { diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp index 3796624..80cb999 100644 --- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp +++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp @@ -49,23 +49,23 @@ namespace { /// ThisAllocated - This is true if this block is currently allocated. If /// not, this can be converted to a FreeRangeHeader. unsigned ThisAllocated : 1; - + /// PrevAllocated - Keep track of whether the block immediately before us is /// allocated. If not, the word immediately before this header is the size /// of the previous block. unsigned PrevAllocated : 1; - + /// BlockSize - This is the size in bytes of this memory block, /// including this header. uintptr_t BlockSize : (sizeof(intptr_t)*CHAR_BIT - 2); - + /// getBlockAfter - Return the memory block immediately after this one. /// MemoryRangeHeader &getBlockAfter() const { return *(MemoryRangeHeader*)((char*)this+BlockSize); } - + /// getFreeBlockBefore - If the block before this one is free, return it, /// otherwise return null. FreeRangeHeader *getFreeBlockBefore() const { @@ -73,15 +73,15 @@ namespace { intptr_t PrevSize = ((intptr_t *)this)[-1]; return (FreeRangeHeader*)((char*)this-PrevSize); } - + /// FreeBlock - Turn an allocated block into a free block, adjusting /// bits in the object headers, and adding an end of region memory block. FreeRangeHeader *FreeBlock(FreeRangeHeader *FreeList); - + /// TrimAllocationToSize - If this allocated block is significantly larger /// than NewSize, split it into two pieces (where the former is NewSize /// bytes, including the header), and add the new block to the free list. - FreeRangeHeader *TrimAllocationToSize(FreeRangeHeader *FreeList, + FreeRangeHeader *TrimAllocationToSize(FreeRangeHeader *FreeList, uint64_t NewSize); }; @@ -91,13 +91,13 @@ namespace { struct FreeRangeHeader : public MemoryRangeHeader { FreeRangeHeader *Prev; FreeRangeHeader *Next; - + /// getMinBlockSize - Get the minimum size for a memory block. Blocks /// smaller than this size cannot be created. static unsigned getMinBlockSize() { return sizeof(FreeRangeHeader)+sizeof(intptr_t); } - + /// SetEndOfBlockSizeMarker - The word at the end of every free block is /// known to be the size of the free block. Set it for this block. void SetEndOfBlockSizeMarker() { @@ -110,7 +110,7 @@ namespace { Next->Prev = Prev; return Prev->Next = Next; } - + void AddToFreeList(FreeRangeHeader *FreeList) { Next = FreeList; Prev = FreeList->Prev; @@ -121,7 +121,7 @@ namespace { /// GrowBlock - The block after this block just got deallocated. Merge it /// into the current block. void GrowBlock(uintptr_t NewSize); - + /// AllocateBlock - Mark this entire block allocated, updating freelists /// etc. This returns a pointer to the circular free-list. FreeRangeHeader *AllocateBlock(); @@ -137,7 +137,7 @@ FreeRangeHeader *FreeRangeHeader::AllocateBlock() { // Mark this block allocated. ThisAllocated = 1; getBlockAfter().PrevAllocated = 1; - + // Remove it from the free list. return RemoveFromFreeList(); } @@ -150,9 +150,9 @@ FreeRangeHeader *MemoryRangeHeader::FreeBlock(FreeRangeHeader *FreeList) { MemoryRangeHeader *FollowingBlock = &getBlockAfter(); assert(ThisAllocated && "This block is already free!"); assert(FollowingBlock->PrevAllocated && "Flags out of sync!"); - + FreeRangeHeader *FreeListToReturn = FreeList; - + // If the block after this one is free, merge it into this block. if (!FollowingBlock->ThisAllocated) { FreeRangeHeader &FollowingFreeBlock = *(FreeRangeHeader *)FollowingBlock; @@ -164,18 +164,18 @@ FreeRangeHeader *MemoryRangeHeader::FreeBlock(FreeRangeHeader *FreeList) { assert(&FollowingFreeBlock != FreeList && "No tombstone block?"); } FollowingFreeBlock.RemoveFromFreeList(); - + // Include the following block into this one. BlockSize += FollowingFreeBlock.BlockSize; FollowingBlock = &FollowingFreeBlock.getBlockAfter(); - + // Tell the block after the block we are coalescing that this block is // allocated. FollowingBlock->PrevAllocated = 1; } - + assert(FollowingBlock->ThisAllocated && "Missed coalescing?"); - + if (FreeRangeHeader *PrevFreeBlock = getFreeBlockBefore()) { PrevFreeBlock->GrowBlock(PrevFreeBlock->BlockSize + BlockSize); return FreeListToReturn ? FreeListToReturn : PrevFreeBlock; @@ -218,24 +218,24 @@ TrimAllocationToSize(FreeRangeHeader *FreeList, uint64_t NewSize) { // Round up size for alignment of header. unsigned HeaderAlign = __alignof(FreeRangeHeader); NewSize = (NewSize+ (HeaderAlign-1)) & ~(HeaderAlign-1); - + // Size is now the size of the block we will remove from the start of the // current block. assert(NewSize <= BlockSize && "Allocating more space from this block than exists!"); - + // If splitting this block will cause the remainder to be too small, do not // split the block. if (BlockSize <= NewSize+FreeRangeHeader::getMinBlockSize()) return FreeList; - + // Otherwise, we splice the required number of bytes out of this block, form // a new block immediately after it, then mark this block allocated. MemoryRangeHeader &FormerNextBlock = getBlockAfter(); - + // Change the size of this block. BlockSize = NewSize; - + // Get the new block we just sliced out and turn it into a free block. FreeRangeHeader &NewNextBlock = (FreeRangeHeader &)getBlockAfter(); NewNextBlock.BlockSize = (char*)&FormerNextBlock - (char*)&NewNextBlock; @@ -283,7 +283,7 @@ namespace { sys::MemoryBlock LastSlab; // Memory slabs allocated by the JIT. We refer to them as slabs so we don't - // confuse them with the blocks of memory descibed above. + // confuse them with the blocks of memory described above. std::vector<sys::MemoryBlock> CodeSlabs; JITSlabAllocator BumpSlabAllocator; BumpPtrAllocator StubAllocator; @@ -296,7 +296,6 @@ namespace { MemoryRangeHeader *CurBlock; uint8_t *GOTBase; // Target Specific reserved memory - void *DlsymTable; // Stub external symbol information public: DefaultJITMemoryManager(); ~DefaultJITMemoryManager(); @@ -318,7 +317,6 @@ namespace { static const size_t DefaultSizeThreshold; void AllocateGOT(); - void SetDlsymTable(void *); // Testing methods. virtual bool CheckInvariants(std::string &ErrorStr); @@ -349,7 +347,7 @@ namespace { } largest = largest - sizeof(MemoryRangeHeader); - + // If this block isn't big enough for the allocation desired, allocate // another block of memory and add it to the free list. if (largest < ActualSize || @@ -445,34 +443,30 @@ namespace { return (uint8_t*)DataAllocator.Allocate(Size, Alignment); } - /// startExceptionTable - Use startFunctionBody to allocate memory for the + /// startExceptionTable - Use startFunctionBody to allocate memory for the /// function's exception table. uint8_t* startExceptionTable(const Function* F, uintptr_t &ActualSize) { return startFunctionBody(F, ActualSize); } - /// endExceptionTable - The exception table of F is now allocated, + /// endExceptionTable - The exception table of F is now allocated, /// and takes the memory in the range [TableStart,TableEnd). void endExceptionTable(const Function *F, uint8_t *TableStart, uint8_t *TableEnd, uint8_t* FrameRegister) { assert(TableEnd > TableStart); assert(TableStart == (uint8_t *)(CurBlock+1) && "Mismatched table start/end!"); - + uintptr_t BlockSize = TableEnd - (uint8_t *)CurBlock; // Release the memory at the end of this block that isn't needed. FreeMemoryList =CurBlock->TrimAllocationToSize(FreeMemoryList, BlockSize); } - + uint8_t *getGOTBase() const { return GOTBase; } - - void *getDlsymTable() const { - return DlsymTable; - } - + void deallocateBlock(void *Block) { // Find the block that is allocated for this function. MemoryRangeHeader *MemRange = static_cast<MemoryRangeHeader*>(Block) - 1; @@ -561,16 +555,16 @@ DefaultJITMemoryManager::DefaultJITMemoryManager() // END ] // // The last three blocks are never deallocated or touched. - + // Add MemoryRangeHeader to the end of the memory region, indicating that // the space after the block of memory is allocated. This is block #3. MemoryRangeHeader *Mem3 = (MemoryRangeHeader*)(MemBase+MemBlock.size())-1; Mem3->ThisAllocated = 1; Mem3->PrevAllocated = 0; Mem3->BlockSize = sizeof(MemoryRangeHeader); - + /// Add a tiny free region so that the free list always has one entry. - FreeRangeHeader *Mem2 = + FreeRangeHeader *Mem2 = (FreeRangeHeader *)(((char*)Mem3)-FreeRangeHeader::getMinBlockSize()); Mem2->ThisAllocated = 0; Mem2->PrevAllocated = 1; @@ -584,7 +578,7 @@ DefaultJITMemoryManager::DefaultJITMemoryManager() Mem1->ThisAllocated = 1; Mem1->PrevAllocated = 0; Mem1->BlockSize = sizeof(MemoryRangeHeader); - + // Add a FreeRangeHeader to the start of the function body region, indicating // that the space is free. Mark the previous block allocated so we never look // at it. @@ -594,12 +588,11 @@ DefaultJITMemoryManager::DefaultJITMemoryManager() Mem0->BlockSize = (char*)Mem1-(char*)Mem0; Mem0->SetEndOfBlockSizeMarker(); Mem0->AddToFreeList(Mem2); - + // Start out with the freelist pointing to Mem0. FreeMemoryList = Mem0; GOTBase = NULL; - DlsymTable = NULL; } void DefaultJITMemoryManager::AllocateGOT() { @@ -608,10 +601,6 @@ void DefaultJITMemoryManager::AllocateGOT() { HasGOT = true; } -void DefaultJITMemoryManager::SetDlsymTable(void *ptr) { - DlsymTable = ptr; -} - DefaultJITMemoryManager::~DefaultJITMemoryManager() { for (unsigned i = 0, e = CodeSlabs.size(); i != e; ++i) sys::Memory::ReleaseRWX(CodeSlabs[i]); diff --git a/lib/Linker/LinkArchives.cpp b/lib/Linker/LinkArchives.cpp index 76d81c2..365ec05 100644 --- a/lib/Linker/LinkArchives.cpp +++ b/lib/Linker/LinkArchives.cpp @@ -172,10 +172,9 @@ Linker::LinkInArchive(const sys::Path &Filename, bool &is_native) { verbose(" Linking in module: " + aModule->getModuleIdentifier()); // Link it in - if (LinkInModule(aModule, &moduleErrorMsg)) { + if (LinkInModule(aModule, &moduleErrorMsg)) return error("Cannot link in module '" + aModule->getModuleIdentifier() + "': " + moduleErrorMsg); - } } } diff --git a/lib/Linker/LinkItems.cpp b/lib/Linker/LinkItems.cpp index 61f3c26..2c22550 100644 --- a/lib/Linker/LinkItems.cpp +++ b/lib/Linker/LinkItems.cpp @@ -70,7 +70,7 @@ Linker::LinkInItems(const ItemList& Items, ItemList& NativeItems) { /// LinkInLibrary - links one library into the HeadModule. /// -bool Linker::LinkInLibrary(const StringRef &Lib, bool& is_native) { +bool Linker::LinkInLibrary(StringRef Lib, bool& is_native) { is_native = false; // Determine where this library lives. sys::Path Pathname = FindLib(Lib); @@ -160,14 +160,17 @@ bool Linker::LinkInFile(const sys::Path &File, bool &is_native) { // Check for a file of name "-", which means "read standard input" if (File.str() == "-") { std::auto_ptr<Module> M; - if (MemoryBuffer *Buffer = MemoryBuffer::getSTDIN()) { + MemoryBuffer *Buffer = MemoryBuffer::getSTDIN(); + if (!Buffer->getBufferSize()) { + delete Buffer; + Error = "standard input is empty"; + } else { M.reset(ParseBitcodeFile(Buffer, Context, &Error)); delete Buffer; if (M.get()) if (!LinkInModule(M.get(), &Error)) return false; - } else - Error = "standard input is empty"; + } return error("Cannot link stdin: " + Error); } @@ -187,7 +190,6 @@ bool Linker::LinkInFile(const sys::Path &File, bool &is_native) { case sys::Archive_FileType: // A user may specify an ar archive without -l, perhaps because it // is not installed as a library. Detect that and link the archive. - verbose("Linking archive file '" + File.str() + "'"); if (LinkInArchive(File, is_native)) return true; break; diff --git a/lib/Linker/Linker.cpp b/lib/Linker/Linker.cpp index aef79d0..32aa0f9 100644 --- a/lib/Linker/Linker.cpp +++ b/lib/Linker/Linker.cpp @@ -20,8 +20,8 @@ #include "llvm/Config/config.h" using namespace llvm; -Linker::Linker(const StringRef &progname, const StringRef &modname, - LLVMContext& C, unsigned flags): +Linker::Linker(StringRef progname, StringRef modname, + LLVMContext& C, unsigned flags): Context(C), Composite(new Module(modname, C)), LibPaths(), @@ -29,7 +29,7 @@ Linker::Linker(const StringRef &progname, const StringRef &modname, Error(), ProgramName(progname) { } -Linker::Linker(const StringRef &progname, Module* aModule, unsigned flags) : +Linker::Linker(StringRef progname, Module* aModule, unsigned flags) : Context(aModule->getContext()), Composite(aModule), LibPaths(), @@ -42,7 +42,7 @@ Linker::~Linker() { } bool -Linker::error(const StringRef &message) { +Linker::error(StringRef message) { Error = message; if (!(Flags&QuietErrors)) errs() << ProgramName << ": error: " << message << "\n"; @@ -50,7 +50,7 @@ Linker::error(const StringRef &message) { } bool -Linker::warning(const StringRef &message) { +Linker::warning(StringRef message) { Error = message; if (!(Flags&QuietWarnings)) errs() << ProgramName << ": warning: " << message << "\n"; @@ -58,7 +58,7 @@ Linker::warning(const StringRef &message) { } void -Linker::verbose(const StringRef &message) { +Linker::verbose(StringRef message) { if (Flags&Verbose) errs() << " " << message << "\n"; } @@ -114,7 +114,7 @@ Linker::LoadObject(const sys::Path &FN) { // IsLibrary - Determine if "Name" is a library in "Directory". Return // a non-empty sys::Path if its found, an empty one otherwise. -static inline sys::Path IsLibrary(const StringRef &Name, +static inline sys::Path IsLibrary(StringRef Name, const sys::Path &Directory) { sys::Path FullPath(Directory); @@ -153,7 +153,7 @@ static inline sys::Path IsLibrary(const StringRef &Name, /// Path if no matching file can be found. /// sys::Path -Linker::FindLib(const StringRef &Filename) { +Linker::FindLib(StringRef Filename) { // Determine if the pathname can be found as it stands. sys::Path FilePath(Filename); if (FilePath.canRead() && diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index e939f37..b6ebb1a 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -58,7 +58,7 @@ public: virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, unsigned Size = 0, unsigned ByteAlignment = 0); - virtual void EmitBytes(const StringRef &Data); + virtual void EmitBytes(StringRef Data); virtual void EmitValue(const MCExpr *Value, unsigned Size); @@ -186,7 +186,7 @@ void MCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, OS << '\n'; } -void MCAsmStreamer::EmitBytes(const StringRef &Data) { +void MCAsmStreamer::EmitBytes(StringRef Data) { assert(CurSection && "Cannot emit contents before setting section!"); for (unsigned i = 0, e = Data.size(); i != e; ++i) OS << ".byte " << (unsigned) (unsigned char) Data[i] << '\n'; diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index 4f39f1e..1f5b6f1 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -180,7 +180,7 @@ public: OS << StringRef(Zeros, N % 16); } - void WriteString(const StringRef &Str, unsigned ZeroFillSize = 0) { + void WriteString(StringRef Str, unsigned ZeroFillSize = 0) { OS << Str; if (ZeroFillSize) WriteZeros(ZeroFillSize - Str.size()); diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp index 09479c5..45d2c02 100644 --- a/lib/MC/MCContext.cpp +++ b/lib/MC/MCContext.cpp @@ -23,7 +23,7 @@ MCContext::~MCContext() { // we don't need to free them here. } -MCSymbol *MCContext::CreateSymbol(const StringRef &Name) { +MCSymbol *MCContext::CreateSymbol(StringRef Name) { assert(Name[0] != '\0' && "Normal symbols cannot be unnamed!"); // Create and bind the symbol, and ensure that names are unique. @@ -32,7 +32,7 @@ MCSymbol *MCContext::CreateSymbol(const StringRef &Name) { return Entry = new (*this) MCSymbol(Name, false); } -MCSymbol *MCContext::GetOrCreateSymbol(const StringRef &Name) { +MCSymbol *MCContext::GetOrCreateSymbol(StringRef Name) { MCSymbol *&Entry = Symbols[Name]; if (Entry) return Entry; @@ -46,7 +46,7 @@ MCSymbol *MCContext::GetOrCreateSymbol(const Twine &Name) { } -MCSymbol *MCContext::CreateTemporarySymbol(const StringRef &Name) { +MCSymbol *MCContext::CreateTemporarySymbol(StringRef Name) { // If unnamed, just create a symbol. if (Name.empty()) new (*this) MCSymbol("", true); @@ -57,6 +57,6 @@ MCSymbol *MCContext::CreateTemporarySymbol(const StringRef &Name) { return Entry = new (*this) MCSymbol(Name, true); } -MCSymbol *MCContext::LookupSymbol(const StringRef &Name) const { +MCSymbol *MCContext::LookupSymbol(StringRef Name) const { return Symbols.lookup(Name); } diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp index c950ff2..a5a2256 100644 --- a/lib/MC/MCExpr.cpp +++ b/lib/MC/MCExpr.cpp @@ -133,8 +133,7 @@ const MCSymbolRefExpr *MCSymbolRefExpr::Create(const MCSymbol *Sym, return new (Ctx) MCSymbolRefExpr(Sym); } -const MCSymbolRefExpr *MCSymbolRefExpr::Create(const StringRef &Name, - MCContext &Ctx) { +const MCSymbolRefExpr *MCSymbolRefExpr::Create(StringRef Name, MCContext &Ctx) { return Create(Ctx.GetOrCreateSymbol(Name), Ctx); } diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp index 189f072..828b92a 100644 --- a/lib/MC/MCMachOStreamer.cpp +++ b/lib/MC/MCMachOStreamer.cpp @@ -134,7 +134,7 @@ public: virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, unsigned Size = 0, unsigned ByteAlignment = 0); - virtual void EmitBytes(const StringRef &Data); + virtual void EmitBytes(StringRef Data); virtual void EmitValue(const MCExpr *Value, unsigned Size); @@ -315,7 +315,7 @@ void MCMachOStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, SectData.setAlignment(ByteAlignment); } -void MCMachOStreamer::EmitBytes(const StringRef &Data) { +void MCMachOStreamer::EmitBytes(StringRef Data) { MCDataFragment *DF = dyn_cast_or_null<MCDataFragment>(getCurrentFragment()); if (!DF) DF = new MCDataFragment(CurSectionData); diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp index 3cd22ca..ddc4e69 100644 --- a/lib/MC/MCNullStreamer.cpp +++ b/lib/MC/MCNullStreamer.cpp @@ -45,7 +45,7 @@ namespace { virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, unsigned Size = 0, unsigned ByteAlignment = 0) {} - virtual void EmitBytes(const StringRef &Data) {} + virtual void EmitBytes(StringRef Data) {} virtual void EmitValue(const MCExpr *Value, unsigned Size) {} diff --git a/lib/MC/MCSection.cpp b/lib/MC/MCSection.cpp index 333a471..24c89ef 100644 --- a/lib/MC/MCSection.cpp +++ b/lib/MC/MCSection.cpp @@ -25,7 +25,7 @@ MCSection::~MCSection() { //===----------------------------------------------------------------------===// MCSectionCOFF *MCSectionCOFF:: -Create(const StringRef &Name, bool IsDirective, SectionKind K, MCContext &Ctx) { +Create(StringRef Name, bool IsDirective, SectionKind K, MCContext &Ctx) { return new (Ctx) MCSectionCOFF(Name, IsDirective, K); } diff --git a/lib/MC/MCSectionELF.cpp b/lib/MC/MCSectionELF.cpp index 660a8c9..c6812ed 100644 --- a/lib/MC/MCSectionELF.cpp +++ b/lib/MC/MCSectionELF.cpp @@ -15,7 +15,7 @@ using namespace llvm; MCSectionELF *MCSectionELF:: -Create(const StringRef &Section, unsigned Type, unsigned Flags, +Create(StringRef Section, unsigned Type, unsigned Flags, SectionKind K, bool isExplicit, MCContext &Ctx) { return new (Ctx) MCSectionELF(Section, Type, Flags, K, isExplicit); } diff --git a/lib/MC/MCSectionMachO.cpp b/lib/MC/MCSectionMachO.cpp index b3aeb9c..6cc67a2 100644 --- a/lib/MC/MCSectionMachO.cpp +++ b/lib/MC/MCSectionMachO.cpp @@ -66,7 +66,7 @@ ENTRY(0 /*FIXME*/, S_ATTR_LOC_RELOC) MCSectionMachO *MCSectionMachO:: -Create(const StringRef &Segment, const StringRef &Section, +Create(StringRef Segment, StringRef Section, unsigned TypeAndAttributes, unsigned Reserved2, SectionKind K, MCContext &Ctx) { // S_SYMBOL_STUBS must be set for Reserved2 to be non-zero. diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp index 86ff3f3..b145d07 100644 --- a/lib/MC/MCSymbol.cpp +++ b/lib/MC/MCSymbol.cpp @@ -35,7 +35,7 @@ static void MangleLetter(raw_ostream &OS, unsigned char C) { /// NameNeedsEscaping - Return true if the identifier \arg Str needs quotes /// for this assembler. -static bool NameNeedsEscaping(const StringRef &Str, const MCAsmInfo &MAI) { +static bool NameNeedsEscaping(StringRef Str, const MCAsmInfo &MAI) { assert(!Str.empty() && "Cannot create an empty MCSymbol"); // If the first character is a number and the target does not allow this, we diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index 626daa2..59340d4 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -17,6 +17,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/ManagedStatic.h" @@ -765,6 +766,11 @@ void cl::ParseCommandLineOptions(int argc, char **argv, free(*i); } + DEBUG(errs() << "\nArgs: "; + for (int i = 0; i < argc; ++i) + errs() << argv[i] << ' '; + ); + // If we had an error processing our arguments, don't let the program execute if (ErrorParsing) exit(1); } @@ -1147,9 +1153,12 @@ public: #ifndef NDEBUG OS << " with assertions"; #endif + std::string CPU = sys::getHostCPUName(); + if (CPU == "generic") CPU = "(unknown)"; OS << ".\n" << " Built " << __DATE__ << " (" << __TIME__ << ").\n" << " Host: " << sys::getHostTriple() << '\n' + << " Host CPU: " << CPU << '\n' << '\n' << " Registered Targets:\n"; diff --git a/lib/Support/ConstantRange.cpp b/lib/Support/ConstantRange.cpp index 423e90d..e427f82 100644 --- a/lib/Support/ConstantRange.cpp +++ b/lib/Support/ConstantRange.cpp @@ -492,6 +492,30 @@ ConstantRange ConstantRange::truncate(uint32_t DstTySize) const { return ConstantRange(L, U); } +/// zextOrTrunc - make this range have the bit width given by \p DstTySize. The +/// value is zero extended, truncated, or left alone to make it that width. +ConstantRange ConstantRange::zextOrTrunc(uint32_t DstTySize) const { + unsigned SrcTySize = getBitWidth(); + if (SrcTySize > DstTySize) + return truncate(DstTySize); + else if (SrcTySize < DstTySize) + return zeroExtend(DstTySize); + else + return *this; +} + +/// sextOrTrunc - make this range have the bit width given by \p DstTySize. The +/// value is sign extended, truncated, or left alone to make it that width. +ConstantRange ConstantRange::sextOrTrunc(uint32_t DstTySize) const { + unsigned SrcTySize = getBitWidth(); + if (SrcTySize > DstTySize) + return truncate(DstTySize); + else if (SrcTySize < DstTySize) + return signExtend(DstTySize); + else + return *this; +} + ConstantRange ConstantRange::add(const ConstantRange &Other) const { if (isEmptySet() || Other.isEmptySet()) @@ -585,6 +609,43 @@ ConstantRange::udiv(const ConstantRange &RHS) const { return ConstantRange(Lower, Upper); } +ConstantRange +ConstantRange::shl(const ConstantRange &Amount) const { + if (isEmptySet()) + return *this; + + APInt min = getUnsignedMin() << Amount.getUnsignedMin(); + APInt max = getUnsignedMax() << Amount.getUnsignedMax(); + + // there's no overflow! + APInt Zeros(getBitWidth(), getUnsignedMax().countLeadingZeros()); + if (Zeros.uge(Amount.getUnsignedMax())) + return ConstantRange(min, max); + + // FIXME: implement the other tricky cases + return ConstantRange(getBitWidth()); +} + +ConstantRange +ConstantRange::ashr(const ConstantRange &Amount) const { + if (isEmptySet()) + return *this; + + APInt min = getUnsignedMax().ashr(Amount.getUnsignedMin()); + APInt max = getUnsignedMin().ashr(Amount.getUnsignedMax()); + return ConstantRange(min, max); +} + +ConstantRange +ConstantRange::lshr(const ConstantRange &Amount) const { + if (isEmptySet()) + return *this; + + APInt min = getUnsignedMax().lshr(Amount.getUnsignedMin()); + APInt max = getUnsignedMin().lshr(Amount.getUnsignedMax()); + return ConstantRange(min, max); +} + /// print - Print out the bounds to a stream... /// void ConstantRange::print(raw_ostream &OS) const { diff --git a/lib/Support/Debug.cpp b/lib/Support/Debug.cpp index d4954b6..50abe01 100644 --- a/lib/Support/Debug.cpp +++ b/lib/Support/Debug.cpp @@ -62,7 +62,7 @@ bool llvm::isCurrentDebugType(const char *DebugType) { /// option were specified. Note that DebugFlag also needs to be set to true for /// debug output to be produced. /// -void SetCurrentDebugType(const char *Type) { +void llvm::SetCurrentDebugType(const char *Type) { CurrentDebugType = Type; } diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp index 88e2050..b04864a 100644 --- a/lib/Support/MemoryBuffer.cpp +++ b/lib/Support/MemoryBuffer.cpp @@ -70,7 +70,7 @@ namespace { class MemoryBufferMem : public MemoryBuffer { std::string FileID; public: - MemoryBufferMem(const char *Start, const char *End, const char *FID, + MemoryBufferMem(const char *Start, const char *End, StringRef FID, bool Copy = false) : FileID(FID) { if (!Copy) @@ -107,7 +107,7 @@ MemoryBuffer *MemoryBuffer::getMemBufferCopy(const char *StartPtr, /// initialize the memory allocated by this method. The memory is owned by /// the MemoryBuffer object. MemoryBuffer *MemoryBuffer::getNewUninitMemBuffer(size_t Size, - const char *BufferName) { + StringRef BufferName) { char *Buf = (char *)malloc((Size+1) * sizeof(char)); if (!Buf) return 0; Buf[Size] = 0; @@ -134,17 +134,12 @@ MemoryBuffer *MemoryBuffer::getNewMemBuffer(size_t Size, /// if the Filename is "-". If an error occurs, this returns null and fills /// in *ErrStr with a reason. If stdin is empty, this API (unlike getSTDIN) /// returns an empty buffer. -MemoryBuffer *MemoryBuffer::getFileOrSTDIN(const char *Filename, +MemoryBuffer *MemoryBuffer::getFileOrSTDIN(StringRef Filename, std::string *ErrStr, int64_t FileSize) { - if (Filename[0] != '-' || Filename[1] != 0) - return getFile(Filename, ErrStr, FileSize); - MemoryBuffer *M = getSTDIN(); - if (M) return M; - - // If stdin was empty, M is null. Cons up an empty memory buffer now. - const char *EmptyStr = ""; - return MemoryBuffer::getMemBuffer(EmptyStr, EmptyStr, "<stdin>"); + if (Filename == "-") + return getSTDIN(); + return getFile(Filename, ErrStr, FileSize); } //===----------------------------------------------------------------------===// @@ -158,7 +153,7 @@ namespace { class MemoryBufferMMapFile : public MemoryBuffer { std::string Filename; public: - MemoryBufferMMapFile(const char *filename, const char *Pages, uint64_t Size) + MemoryBufferMMapFile(StringRef filename, const char *Pages, uint64_t Size) : Filename(filename) { init(Pages, Pages+Size); } @@ -173,13 +168,13 @@ public: }; } -MemoryBuffer *MemoryBuffer::getFile(const char *Filename, std::string *ErrStr, +MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr, int64_t FileSize) { int OpenFlags = 0; #ifdef O_BINARY OpenFlags |= O_BINARY; // Open input file in binary mode on win32. #endif - int FD = ::open(Filename, O_RDONLY|OpenFlags); + int FD = ::open(Filename.str().c_str(), O_RDONLY|OpenFlags); if (FD == -1) { if (ErrStr) *ErrStr = "could not open file"; return 0; @@ -203,6 +198,8 @@ MemoryBuffer *MemoryBuffer::getFile(const char *Filename, std::string *ErrStr, // for small files, because this can severely fragment our address space. Also // don't try to map files that are exactly a multiple of the system page size, // as the file would not have the required null terminator. + // + // FIXME: Can we just mmap an extra page in the latter case? if (FileSize >= 4096*4 && (FileSize & (sys::Process::GetPageSize()-1)) != 0) { if (const char *Pages = sys::Path::MapInFilePages(FD, FileSize)) { @@ -262,6 +259,9 @@ MemoryBuffer *MemoryBuffer::getSTDIN() { std::vector<char> FileData; // Read in all of the data from stdin, we cannot mmap stdin. + // + // FIXME: That isn't necessarily true, we should try to mmap stdin and + // fallback if it fails. sys::Program::ChangeStdinToBinary(); size_t ReadBytes; do { @@ -271,8 +271,6 @@ MemoryBuffer *MemoryBuffer::getSTDIN() { FileData.push_back(0); // &FileData[Size] is invalid. So is &*FileData.end(). size_t Size = FileData.size(); - if (Size <= 1) - return 0; MemoryBuffer *B = new STDINBufferFile(); B->initCopyOf(&FileData[0], &FileData[Size-1]); return B; diff --git a/lib/Support/StringExtras.cpp b/lib/Support/StringExtras.cpp index c72f121..1b233ab 100644 --- a/lib/Support/StringExtras.cpp +++ b/lib/Support/StringExtras.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/SmallVector.h" #include <cstring> using namespace llvm; @@ -56,3 +57,24 @@ void llvm::SplitString(const std::string &Source, S2 = getToken(S, Delimiters); } } + +void llvm::StringRef::split(SmallVectorImpl<StringRef> &A, + StringRef Separators, int MaxSplit, + bool KeepEmpty) const { + StringRef rest = *this; + + // rest.data() is used to distinguish cases like "a," that splits into + // "a" + "" and "a" that splits into "a" + 0. + for (int splits = 0; + rest.data() != NULL && (MaxSplit < 0 || splits < MaxSplit); + ++splits) { + std::pair<llvm::StringRef, llvm::StringRef> p = rest.split(Separators); + + if (p.first.size() != 0 || KeepEmpty) + A.push_back(p.first); + rest = p.second; + } + // If we have a tail left, add it. + if (rest.data() != NULL && (rest.size() != 0 || KeepEmpty)) + A.push_back(rest); +} diff --git a/lib/Support/StringMap.cpp b/lib/Support/StringMap.cpp index a729d3d..6f28277 100644 --- a/lib/Support/StringMap.cpp +++ b/lib/Support/StringMap.cpp @@ -52,7 +52,7 @@ void StringMapImpl::init(unsigned InitSize) { /// specified bucket will be non-null. Otherwise, it will be null. In either /// case, the FullHashValue field of the bucket will be set to the hash value /// of the string. -unsigned StringMapImpl::LookupBucketFor(const StringRef &Name) { +unsigned StringMapImpl::LookupBucketFor(StringRef Name) { unsigned HTSize = NumBuckets; if (HTSize == 0) { // Hash table unallocated so far? init(16); @@ -110,7 +110,7 @@ unsigned StringMapImpl::LookupBucketFor(const StringRef &Name) { /// FindKey - Look up the bucket that contains the specified key. If it exists /// in the map, return the bucket number of the key. Otherwise return -1. /// This does not modify the map. -int StringMapImpl::FindKey(const StringRef &Key) const { +int StringMapImpl::FindKey(StringRef Key) const { unsigned HTSize = NumBuckets; if (HTSize == 0) return -1; // Really empty table? unsigned FullHashValue = HashString(Key); @@ -161,7 +161,7 @@ void StringMapImpl::RemoveKey(StringMapEntryBase *V) { /// RemoveKey - Remove the StringMapEntry for the specified key from the /// table, returning it. If the key is not in the table, this returns null. -StringMapEntryBase *StringMapImpl::RemoveKey(const StringRef &Key) { +StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) { int Bucket = FindKey(Key); if (Bucket == -1) return 0; diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp index deaa19e..51e1100 100644 --- a/lib/Support/StringRef.cpp +++ b/lib/Support/StringRef.cpp @@ -15,6 +15,26 @@ using namespace llvm; const size_t StringRef::npos; #endif +static char ascii_tolower(char x) { + if (x >= 'A' && x <= 'Z') + return x - 'A' + 'a'; + return x; +} + +/// compare_lower - Compare strings, ignoring case. +int StringRef::compare_lower(StringRef RHS) const { + for (size_t I = 0, E = std::min(Length, RHS.Length); I != E; ++I) { + char LHC = ascii_tolower(Data[I]); + char RHC = ascii_tolower(RHS.Data[I]); + if (LHC != RHC) + return LHC < RHC ? -1 : 1; + } + + if (Length == RHS.Length) + return 0; + return Length < RHS.Length ? -1 : 1; +} + //===----------------------------------------------------------------------===// // String Searching //===----------------------------------------------------------------------===// @@ -24,11 +44,11 @@ const size_t StringRef::npos; /// /// \return - The index of the first occurence of \arg Str, or npos if not /// found. -size_t StringRef::find(const StringRef &Str) const { +size_t StringRef::find(StringRef Str, size_t From) const { size_t N = Str.size(); if (N > Length) return npos; - for (size_t i = 0, e = Length - N + 1; i != e; ++i) + for (size_t e = Length - N + 1, i = std::min(From, e); i != e; ++i) if (substr(i, N).equals(Str)) return i; return npos; @@ -38,7 +58,7 @@ size_t StringRef::find(const StringRef &Str) const { /// /// \return - The index of the last occurence of \arg Str, or npos if not /// found. -size_t StringRef::rfind(const StringRef &Str) const { +size_t StringRef::rfind(StringRef Str) const { size_t N = Str.size(); if (N > Length) return npos; @@ -50,19 +70,34 @@ size_t StringRef::rfind(const StringRef &Str) const { return npos; } -/// find_first_of - Find the first character from the string 'Chars' in the -/// current string or return npos if not in string. -StringRef::size_type StringRef::find_first_of(StringRef Chars) const { - for (size_type i = 0, e = Length; i != e; ++i) +/// find_first_of - Find the first character in the string that is in \arg +/// Chars, or npos if not found. +/// +/// Note: O(size() * Chars.size()) +StringRef::size_type StringRef::find_first_of(StringRef Chars, + size_t From) const { + for (size_type i = std::min(From, Length), e = Length; i != e; ++i) if (Chars.find(Data[i]) != npos) return i; return npos; } /// find_first_not_of - Find the first character in the string that is not -/// in the string 'Chars' or return npos if all are in string. Same as find. -StringRef::size_type StringRef::find_first_not_of(StringRef Chars) const { - for (size_type i = 0, e = Length; i != e; ++i) +/// \arg C or npos if not found. +StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const { + for (size_type i = std::min(From, Length), e = Length; i != e; ++i) + if (Data[i] != C) + return i; + return npos; +} + +/// find_first_not_of - Find the first character in the string that is not +/// in the string \arg Chars, or npos if not found. +/// +/// Note: O(size() * Chars.size()) +StringRef::size_type StringRef::find_first_not_of(StringRef Chars, + size_t From) const { + for (size_type i = std::min(From, Length), e = Length; i != e; ++i) if (Chars.find(Data[i]) == npos) return i; return npos; @@ -75,7 +110,7 @@ StringRef::size_type StringRef::find_first_not_of(StringRef Chars) const { /// count - Return the number of non-overlapped occurrences of \arg Str in /// the string. -size_t StringRef::count(const StringRef &Str) const { +size_t StringRef::count(StringRef Str) const { size_t Count = 0; size_t N = Str.size(); if (N > Length) diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp index dd58d1f..7d32ee6 100644 --- a/lib/Support/Timer.cpp +++ b/lib/Support/Timer.cpp @@ -66,7 +66,7 @@ static TimerGroup *getDefaultTimerGroup() { } llvm_release_global_lock(); } - + return tmp; } @@ -145,7 +145,7 @@ static TimeRecord getTimeRecord(bool Start) { static ManagedStatic<std::vector<Timer*> > ActiveTimers; void Timer::startTimer() { - sys::SmartScopedLock<true> L(Lock); + sys::SmartScopedLock<true> L(*TimerLock); Started = true; ActiveTimers->push_back(this); TimeRecord TR = getTimeRecord(true); @@ -157,7 +157,7 @@ void Timer::startTimer() { } void Timer::stopTimer() { - sys::SmartScopedLock<true> L(Lock); + sys::SmartScopedLock<true> L(*TimerLock); TimeRecord TR = getTimeRecord(false); Elapsed += TR.Elapsed; UserTime += TR.UserTime; @@ -175,27 +175,11 @@ void Timer::stopTimer() { } void Timer::sum(const Timer &T) { - if (&T < this) { - T.Lock.acquire(); - Lock.acquire(); - } else { - Lock.acquire(); - T.Lock.acquire(); - } - Elapsed += T.Elapsed; UserTime += T.UserTime; SystemTime += T.SystemTime; MemUsed += T.MemUsed; PeakMem += T.PeakMem; - - if (&T < this) { - T.Lock.release(); - Lock.release(); - } else { - Lock.release(); - T.Lock.release(); - } } /// addPeakMemoryMeasurement - This method should be called whenever memory @@ -203,14 +187,12 @@ void Timer::sum(const Timer &T) { /// currently active timers, which will be printed when the timer group prints /// void Timer::addPeakMemoryMeasurement() { + sys::SmartScopedLock<true> L(*TimerLock); size_t MemUsed = getMemUsage(); for (std::vector<Timer*>::iterator I = ActiveTimers->begin(), - E = ActiveTimers->end(); I != E; ++I) { - (*I)->Lock.acquire(); + E = ActiveTimers->end(); I != E; ++I) (*I)->PeakMem = std::max((*I)->PeakMem, MemUsed-(*I)->PeakMemBase); - (*I)->Lock.release(); - } } //===----------------------------------------------------------------------===// @@ -280,14 +262,7 @@ static void printVal(double Val, double Total, raw_ostream &OS) { } void Timer::print(const Timer &Total, raw_ostream &OS) { - if (&Total < this) { - Total.Lock.acquire(); - Lock.acquire(); - } else { - Lock.acquire(); - Total.Lock.acquire(); - } - + sys::SmartScopedLock<true> L(*TimerLock); if (Total.UserTime) printVal(UserTime, Total.UserTime, OS); if (Total.SystemTime) @@ -310,14 +285,6 @@ void Timer::print(const Timer &Total, raw_ostream &OS) { OS << Name << "\n"; Started = false; // Once printed, don't print again - - if (&Total < this) { - Total.Lock.release(); - Lock.release(); - } else { - Lock.release(); - Total.Lock.release(); - } } // GetLibSupportInfoOutputFile - Return a file stream to print our output on... @@ -329,13 +296,13 @@ llvm::GetLibSupportInfoOutputFile() { if (LibSupportInfoOutputFilename == "-") return &outs(); - + std::string Error; raw_ostream *Result = new raw_fd_ostream(LibSupportInfoOutputFilename.c_str(), Error, raw_fd_ostream::F_Append); if (Error.empty()) return Result; - + errs() << "Error opening info-output-file '" << LibSupportInfoOutputFilename << " for appending!\n"; delete Result; diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index 26a1a4e..840fb98 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -94,6 +94,7 @@ const char *Triple::getOSTypeName(OSType Kind) { case MinGW64: return "mingw64"; case NetBSD: return "netbsd"; case OpenBSD: return "openbsd"; + case Psp: return "psp"; case Solaris: return "solaris"; case Win32: return "win32"; case Haiku: return "haiku"; @@ -102,7 +103,7 @@ const char *Triple::getOSTypeName(OSType Kind) { return "<invalid>"; } -Triple::ArchType Triple::getArchTypeForLLVMName(const StringRef &Name) { +Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { if (Name == "alpha") return alpha; if (Name == "arm") @@ -141,7 +142,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(const StringRef &Name) { return UnknownArch; } -Triple::ArchType Triple::getArchTypeForDarwinArchName(const StringRef &Str) { +Triple::ArchType Triple::getArchTypeForDarwinArchName(StringRef Str) { // See arch(3) and llvm-gcc's driver-driver.c. We don't implement support for // archs which Darwin doesn't use. @@ -178,6 +179,33 @@ Triple::ArchType Triple::getArchTypeForDarwinArchName(const StringRef &Str) { return Triple::UnknownArch; } +// Returns architecture name that is unsderstood by the target assembler. +const char *Triple::getArchNameForAssembler() { + if (getOS() != Triple::Darwin && getVendor() != Triple::Apple) + return NULL; + + StringRef Str = getArchName(); + if (Str == "i386") + return "i386"; + if (Str == "x86_64") + return "x86_64"; + if (Str == "powerpc") + return "ppc"; + if (Str == "powerpc64") + return "ppc64"; + if (Str == "arm") + return "arm"; + if (Str == "armv4t" || Str == "thumbv4t") + return "armv4t"; + if (Str == "armv5" || Str == "armv5e" || Str == "thumbv5" || Str == "thumbv5e") + return "armv5"; + if (Str == "armv6" || Str == "thumbv6") + return "armv6"; + if (Str == "armv7" || Str == "thumbv7") + return "armv7"; + return NULL; +} + // void Triple::Parse() const { @@ -273,6 +301,8 @@ void Triple::Parse() const { OS = NetBSD; else if (OSName.startswith("openbsd")) OS = OpenBSD; + else if (OSName.startswith("psp")) + OS = Psp; else if (OSName.startswith("solaris")) OS = Solaris; else if (OSName.startswith("win32")) @@ -393,7 +423,7 @@ void Triple::setOS(OSType Kind) { setOSName(getOSTypeName(Kind)); } -void Triple::setArchName(const StringRef &Str) { +void Triple::setArchName(StringRef Str) { // Work around a miscompilation bug for Twines in gcc 4.0.3. SmallString<64> Triple; Triple += Str; @@ -404,11 +434,11 @@ void Triple::setArchName(const StringRef &Str) { setTriple(Triple.str()); } -void Triple::setVendorName(const StringRef &Str) { +void Triple::setVendorName(StringRef Str) { setTriple(getArchName() + "-" + Str + "-" + getOSAndEnvironmentName()); } -void Triple::setOSName(const StringRef &Str) { +void Triple::setOSName(StringRef Str) { if (hasEnvironment()) setTriple(getArchName() + "-" + getVendorName() + "-" + Str + "-" + getEnvironmentName()); @@ -416,11 +446,11 @@ void Triple::setOSName(const StringRef &Str) { setTriple(getArchName() + "-" + getVendorName() + "-" + Str); } -void Triple::setEnvironmentName(const StringRef &Str) { - setTriple(getArchName() + "-" + getVendorName() + "-" + getOSName() + +void Triple::setEnvironmentName(StringRef Str) { + setTriple(getArchName() + "-" + getVendorName() + "-" + getOSName() + "-" + Str); } -void Triple::setOSAndEnvironmentName(const StringRef &Str) { +void Triple::setOSAndEnvironmentName(StringRef Str) { setTriple(getArchName() + "-" + getVendorName() + "-" + Str); } diff --git a/lib/System/Host.cpp b/lib/System/Host.cpp index fd2d952..37591a5 100644 --- a/lib/System/Host.cpp +++ b/lib/System/Host.cpp @@ -13,6 +13,7 @@ #include "llvm/System/Host.h" #include "llvm/Config/config.h" +#include <string.h> // Include the platform-specific parts of this class. #ifdef LLVM_ON_UNIX @@ -22,3 +23,276 @@ #include "Win32/Host.inc" #endif +//===----------------------------------------------------------------------===// +// +// Implementations of the CPU detection routines +// +//===----------------------------------------------------------------------===// + +using namespace llvm; + +#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)\ + || defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64) + +/// GetX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in the +/// specified arguments. If we can't run cpuid on the host, return true. +static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX, + unsigned *rEBX, unsigned *rECX, unsigned *rEDX) { +#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64) + #if defined(__GNUC__) + // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually. + asm ("movq\t%%rbx, %%rsi\n\t" + "cpuid\n\t" + "xchgq\t%%rbx, %%rsi\n\t" + : "=a" (*rEAX), + "=S" (*rEBX), + "=c" (*rECX), + "=d" (*rEDX) + : "a" (value)); + return false; + #elif defined(_MSC_VER) + int registers[4]; + __cpuid(registers, value); + *rEAX = registers[0]; + *rEBX = registers[1]; + *rECX = registers[2]; + *rEDX = registers[3]; + return false; + #endif +#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86) + #if defined(__GNUC__) + asm ("movl\t%%ebx, %%esi\n\t" + "cpuid\n\t" + "xchgl\t%%ebx, %%esi\n\t" + : "=a" (*rEAX), + "=S" (*rEBX), + "=c" (*rECX), + "=d" (*rEDX) + : "a" (value)); + return false; + #elif defined(_MSC_VER) + __asm { + mov eax,value + cpuid + mov esi,rEAX + mov dword ptr [esi],eax + mov esi,rEBX + mov dword ptr [esi],ebx + mov esi,rECX + mov dword ptr [esi],ecx + mov esi,rEDX + mov dword ptr [esi],edx + } + return false; + #endif +#endif + return true; +} + +static void DetectX86FamilyModel(unsigned EAX, unsigned &Family, unsigned &Model) { + Family = (EAX >> 8) & 0xf; // Bits 8 - 11 + Model = (EAX >> 4) & 0xf; // Bits 4 - 7 + if (Family == 6 || Family == 0xf) { + if (Family == 0xf) + // Examine extended family ID if family ID is F. + Family += (EAX >> 20) & 0xff; // Bits 20 - 27 + // Examine extended model ID if family ID is 6 or F. + Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 + } +} +#endif + + +std::string sys::getHostCPUName() { +#if defined(__x86_64__) || defined(__i386__) + unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; + if (GetX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX)) + return "generic"; + unsigned Family = 0; + unsigned Model = 0; + DetectX86FamilyModel(EAX, Family, Model); + + GetX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); + bool Em64T = (EDX >> 29) & 0x1; + bool HasSSE3 = (ECX & 0x1); + + union { + unsigned u[3]; + char c[12]; + } text; + + GetX86CpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1); + if (memcmp(text.c, "GenuineIntel", 12) == 0) { + switch (Family) { + case 3: + return "i386"; + case 4: + switch (Model) { + case 0: // Intel486TM DX processors + case 1: // Intel486TM DX processors + case 2: // Intel486 SX processors + case 3: // Intel487TM processors, IntelDX2 OverDrive® processors, + // IntelDX2TM processors + case 4: // Intel486 SL processor + case 5: // IntelSX2TM processors + case 7: // Write-Back Enhanced IntelDX2 processors + case 8: // IntelDX4 OverDrive processors, IntelDX4TM processors + default: return "i486"; + } + case 5: + switch (Model) { + case 1: // Pentium OverDrive processor for Pentium processor (60, 66), + // Pentium® processors (60, 66) + case 2: // Pentium OverDrive processor for Pentium processor (75, 90, + // 100, 120, 133), Pentium processors (75, 90, 100, 120, 133, + // 150, 166, 200) + case 3: // Pentium OverDrive processors for Intel486 processor-based + // systems + return "pentium"; + + case 4: // Pentium OverDrive processor with MMXTM technology for Pentium + // processor (75, 90, 100, 120, 133), Pentium processor with + // MMXTM technology (166, 200) + return "pentium-mmx"; + + default: return "pentium"; + } + case 6: + switch (Model) { + case 1: // Pentium Pro processor + return "pentiumpro"; + + case 3: // Intel Pentium II OverDrive processor, Pentium II processor, + // model 03 + case 5: // Pentium II processor, model 05, Pentium II Xeon processor, + // model 05, and Intel® Celeron® processor, model 05 + case 6: // Celeron processor, model 06 + return "pentium2"; + + case 7: // Pentium III processor, model 07, and Pentium III Xeon + // processor, model 07 + case 8: // Pentium III processor, model 08, Pentium III Xeon processor, + // model 08, and Celeron processor, model 08 + case 10: // Pentium III Xeon processor, model 0Ah + case 11: // Pentium III processor, model 0Bh + return "pentium3"; + + case 9: // Intel Pentium M processor, Intel Celeron M processor model 09. + case 13: // Intel Pentium M processor, Intel Celeron M processor, model + // 0Dh. All processors are manufactured using the 90 nm process. + return "pentium-m"; + + case 14: // Intel CoreTM Duo processor, Intel CoreTM Solo processor, model + // 0Eh. All processors are manufactured using the 65 nm process. + return "yonah"; + + case 15: // Intel CoreTM2 Duo processor, Intel CoreTM2 Duo mobile + // processor, Intel CoreTM2 Quad processor, Intel CoreTM2 Quad + // mobile processor, Intel CoreTM2 Extreme processor, Intel + // Pentium Dual-Core processor, Intel Xeon processor, model + // 0Fh. All processors are manufactured using the 65 nm process. + case 22: // Intel Celeron processor model 16h. All processors are + // manufactured using the 65 nm process + return "core2"; + + case 21: // Intel EP80579 Integrated Processor and Intel EP80579 + // Integrated Processor with Intel QuickAssist Technology + return "i686"; // FIXME: ??? + + case 23: // Intel CoreTM2 Extreme processor, Intel Xeon processor, model + // 17h. All processors are manufactured using the 45 nm process. + // + // 45nm: Penryn , Wolfdale, Yorkfield (XE) + return "penryn"; + + case 26: // Intel Core i7 processor and Intel Xeon processor. All + // processors are manufactured using the 45 nm process. + case 29: // Intel Xeon processor MP. All processors are manufactured using + // the 45 nm process. + return "corei7"; + + case 28: // Intel Atom processor. All processors are manufactured using + // the 45 nm process + return "atom"; + + default: return "i686"; + } + case 15: { + switch (Model) { + case 0: // Pentium 4 processor, Intel Xeon processor. All processors are + // model 00h and manufactured using the 0.18 micron process. + case 1: // Pentium 4 processor, Intel Xeon processor, Intel Xeon + // processor MP, and Intel Celeron processor. All processors are + // model 01h and manufactured using the 0.18 micron process. + case 2: // Pentium 4 processor, Mobile Intel Pentium 4 processor – M, + // Intel Xeon processor, Intel Xeon processor MP, Intel Celeron + // processor, and Mobile Intel Celeron processor. All processors + // are model 02h and manufactured using the 0.13 micron process. + return (Em64T) ? "x86-64" : "pentium4"; + + case 3: // Pentium 4 processor, Intel Xeon processor, Intel Celeron D + // processor. All processors are model 03h and manufactured using + // the 90 nm process. + case 4: // Pentium 4 processor, Pentium 4 processor Extreme Edition, + // Pentium D processor, Intel Xeon processor, Intel Xeon + // processor MP, Intel Celeron D processor. All processors are + // model 04h and manufactured using the 90 nm process. + case 6: // Pentium 4 processor, Pentium D processor, Pentium processor + // Extreme Edition, Intel Xeon processor, Intel Xeon processor + // MP, Intel Celeron D processor. All processors are model 06h + // and manufactured using the 65 nm process. + return (Em64T) ? "nocona" : "prescott"; + + default: + return (Em64T) ? "x86-64" : "pentium4"; + } + } + + default: + return "generic"; + } + } else if (memcmp(text.c, "AuthenticAMD", 12) == 0) { + // FIXME: this poorly matches the generated SubtargetFeatureKV table. There + // appears to be no way to generate the wide variety of AMD-specific targets + // from the information returned from CPUID. + switch (Family) { + case 4: + return "i486"; + case 5: + switch (Model) { + case 6: + case 7: return "k6"; + case 8: return "k6-2"; + case 9: + case 13: return "k6-3"; + default: return "pentium"; + } + case 6: + switch (Model) { + case 4: return "athlon-tbird"; + case 6: + case 7: + case 8: return "athlon-mp"; + case 10: return "athlon-xp"; + default: return "athlon"; + } + case 15: + if (HasSSE3) { + return "k8-sse3"; + } else { + switch (Model) { + case 1: return "opteron"; + case 5: return "athlon-fx"; // also opteron + default: return "athlon64"; + } + } + case 16: + return "amdfam10"; + default: + return "generic"; + } + } +#endif + + return "generic"; +} diff --git a/lib/System/Unix/Program.inc b/lib/System/Unix/Program.inc index c52f3a8..43c3606 100644 --- a/lib/System/Unix/Program.inc +++ b/lib/System/Unix/Program.inc @@ -121,6 +121,9 @@ static bool RedirectIO(const Path *Path, int FD, std::string* ErrMsg) { return false; } +static void TimeOutHandler(int Sig) { +} + static void SetMemoryLimits (unsigned size) { #if HAVE_SYS_RESOURCE_H @@ -231,11 +234,14 @@ Program::Wait(unsigned secondsToWait, return -1; } - // Install a timeout handler. + // Install a timeout handler. The handler itself does nothing, but the simple + // fact of having a handler at all causes the wait below to return with EINTR, + // unlike if we used SIG_IGN. if (secondsToWait) { - memset(&Act, 0, sizeof(Act)); - Act.sa_handler = SIG_IGN; + Act.sa_sigaction = 0; + Act.sa_handler = TimeOutHandler; sigemptyset(&Act.sa_mask); + Act.sa_flags = 0; sigaction(SIGALRM, &Act, &Old); alarm(secondsToWait); } diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 76cc06e..ff1980d 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -103,11 +103,13 @@ FunctionPass *createARMObjectCodeEmitterPass(ARMBaseTargetMachine &TM, ObjectCodeEmitter &OCE); FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false); +FunctionPass *createARMExpandPseudoPass(); FunctionPass *createARMConstantIslandPass(); FunctionPass *createNEONPreAllocPass(); FunctionPass *createNEONMoveFixPass(); FunctionPass *createThumb2ITBlockPass(); FunctionPass *createThumb2SizeReductionPass(); +FunctionPass *createARMMaxStackAlignmentCalculatorPass(); extern Target TheARMTarget, TheThumbTarget; diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h index c603708..ddeb1b9 100644 --- a/lib/Target/ARM/ARMAddressingModes.h +++ b/lib/Target/ARM/ARMAddressingModes.h @@ -520,8 +520,8 @@ namespace ARM_AM { return ((AM5Opc >> 8) & 1) ? sub : add; } - /// getAM5Opc - This function encodes the addrmode5 opc field for FLDM and - /// FSTM instructions. + /// getAM5Opc - This function encodes the addrmode5 opc field for VLDM and + /// VSTM instructions. static inline unsigned getAM5Opc(AMSubMode SubMode, bool WB, unsigned char Offset) { assert((SubMode == ia || SubMode == db) && @@ -541,13 +541,15 @@ namespace ARM_AM { // // This is used for NEON load / store instructions. // - // addrmode6 := reg with optional writeback + // addrmode6 := reg with optional writeback and alignment // - // This is stored in three operands [regaddr, regupdate, opc]. The first is - // the address register. The second register holds the value of a post-access - // increment for writeback or reg0 if no writeback or if the writeback - // increment is the size of the memory access. The third operand encodes - // whether there is writeback to the address register. + // This is stored in four operands [regaddr, regupdate, opc, align]. The + // first is the address register. The second register holds the value of + // a post-access increment for writeback or reg0 if no writeback or if the + // writeback increment is the size of the memory access. The third + // operand encodes whether there is writeback to the address register. The + // fourth operand is the value of the alignment specifier to use or zero if + // no explicit alignment. static inline unsigned getAM6Opc(bool WB = false) { return (int)WB; diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 7c5b0f0..b50b609 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1,4 +1,4 @@ -//===- ARMBaseInstrInfo.cpp - ARM Instruction Information -----------*- C++ -*-===// +//===- ARMBaseInstrInfo.cpp - ARM Instruction Information -------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -14,11 +14,16 @@ #include "ARMBaseInstrInfo.h" #include "ARM.h" #include "ARMAddressingModes.h" +#include "ARMConstantPoolValue.h" #include "ARMGenInstrInfo.inc" #include "ARMMachineFunctionInfo.h" #include "ARMRegisterInfo.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/GlobalValue.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" @@ -504,9 +509,9 @@ ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI, switch (MI.getOpcode()) { default: break; - case ARM::FCPYS: - case ARM::FCPYD: + case ARM::VMOVS: case ARM::VMOVD: + case ARM::VMOVDneon: case ARM::VMOVQ: { SrcReg = MI.getOperand(1).getReg(); DstReg = MI.getOperand(0).getReg(); @@ -556,8 +561,8 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, return MI->getOperand(0).getReg(); } break; - case ARM::FLDD: - case ARM::FLDS: + case ARM::VLDRD: + case ARM::VLDRS: if (MI->getOperand(1).isFI() && MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) { @@ -595,8 +600,8 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, return MI->getOperand(0).getReg(); } break; - case ARM::FSTD: - case ARM::FSTS: + case ARM::VSTRD: + case ARM::VSTRS: if (MI->getOperand(1).isFI() && MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) { @@ -632,17 +637,17 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB, AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg).addReg(SrcReg))); } else if (DestRC == ARM::SPRRegisterClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYS), DestReg) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVS), DestReg) .addReg(SrcReg)); } else if (DestRC == ARM::DPRRegisterClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYD), DestReg) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVD), DestReg) .addReg(SrcReg)); } else if (DestRC == ARM::DPR_VFP2RegisterClass || DestRC == ARM::DPR_8RegisterClass || SrcRC == ARM::DPR_VFP2RegisterClass || SrcRC == ARM::DPR_8RegisterClass) { // Always use neon reg-reg move if source or dest is NEON-only regclass. - BuildMI(MBB, I, DL, get(ARM::VMOVD), DestReg).addReg(SrcReg); + BuildMI(MBB, I, DL, get(ARM::VMOVDneon), DestReg).addReg(SrcReg); } else if (DestRC == ARM::QPRRegisterClass || DestRC == ARM::QPR_VFP2RegisterClass || DestRC == ARM::QPR_8RegisterClass) { @@ -662,12 +667,13 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (I != MBB.end()) DL = I->getDebugLoc(); MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); + unsigned Align = MFI.getObjectAlignment(FI); MachineMemOperand *MMO = MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI), MachineMemOperand::MOStore, 0, MFI.getObjectSize(FI), - MFI.getObjectAlignment(FI)); + Align); if (RC == ARM::GPRRegisterClass) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR)) @@ -676,19 +682,27 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, } else if (RC == ARM::DPRRegisterClass || RC == ARM::DPR_VFP2RegisterClass || RC == ARM::DPR_8RegisterClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTD)) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } else if (RC == ARM::SPRRegisterClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTS)) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } else { assert((RC == ARM::QPRRegisterClass || RC == ARM::QPR_VFP2RegisterClass) && "Unknown regclass!"); // FIXME: Neon instructions should support predicates - BuildMI(MBB, I, DL, get(ARM::VSTRQ)).addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO); + if (Align >= 16 + && (getRegisterInfo().needsStackRealignment(MF))) { + BuildMI(MBB, I, DL, get(ARM::VST1q64)) + .addFrameIndex(FI).addImm(0).addImm(0).addImm(128).addMemOperand(MMO) + .addReg(SrcReg, getKillRegState(isKill)); + } else { + BuildMI(MBB, I, DL, get(ARM::VSTRQ)). + addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO); + } } } @@ -700,12 +714,13 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (I != MBB.end()) DL = I->getDebugLoc(); MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); + unsigned Align = MFI.getObjectAlignment(FI); MachineMemOperand *MMO = MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI), MachineMemOperand::MOLoad, 0, MFI.getObjectSize(FI), - MFI.getObjectAlignment(FI)); + Align); if (RC == ARM::GPRRegisterClass) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg) @@ -713,18 +728,24 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, } else if (RC == ARM::DPRRegisterClass || RC == ARM::DPR_VFP2RegisterClass || RC == ARM::DPR_8RegisterClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDD), DestReg) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } else if (RC == ARM::SPRRegisterClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDS), DestReg) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } else { assert((RC == ARM::QPRRegisterClass || RC == ARM::QPR_VFP2RegisterClass || RC == ARM::QPR_8RegisterClass) && "Unknown regclass!"); // FIXME: Neon instructions should support predicates - BuildMI(MBB, I, DL, get(ARM::VLDRQ), DestReg).addFrameIndex(FI).addImm(0). - addMemOperand(MMO); + if (Align >= 16 + && (getRegisterInfo().needsStackRealignment(MF))) { + BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg) + .addFrameIndex(FI).addImm(0).addImm(0).addImm(128).addMemOperand(MMO); + } else { + BuildMI(MBB, I, DL, get(ARM::VLDRQ), DestReg).addFrameIndex(FI).addImm(0). + addMemOperand(MMO); + } } } @@ -805,7 +826,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, DstSubReg) .addFrameIndex(FI).addImm(0).addImm(ARMCC::AL).addReg(0); } - } else if (Opc == ARM::FCPYS) { + } else if (Opc == ARM::VMOVS) { unsigned Pred = MI->getOperand(2).getImm(); unsigned PredReg = MI->getOperand(3).getReg(); if (OpNum == 0) { // move -> store @@ -813,7 +834,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, unsigned SrcSubReg = MI->getOperand(1).getSubReg(); bool isKill = MI->getOperand(1).isKill(); bool isUndef = MI->getOperand(1).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTS)) + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTRS)) .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef), SrcSubReg) .addFrameIndex(FI) @@ -823,7 +844,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, unsigned DstSubReg = MI->getOperand(0).getSubReg(); bool isDead = MI->getOperand(0).isDead(); bool isUndef = MI->getOperand(0).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDS)) + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDRS)) .addReg(DstReg, RegState::Define | getDeadRegState(isDead) | @@ -832,7 +853,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); } } - else if (Opc == ARM::FCPYD) { + else if (Opc == ARM::VMOVD) { unsigned Pred = MI->getOperand(2).getImm(); unsigned PredReg = MI->getOperand(3).getReg(); if (OpNum == 0) { // move -> store @@ -840,7 +861,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, unsigned SrcSubReg = MI->getOperand(1).getSubReg(); bool isKill = MI->getOperand(1).isKill(); bool isUndef = MI->getOperand(1).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTD)) + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTRD)) .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef), SrcSubReg) @@ -850,7 +871,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, unsigned DstSubReg = MI->getOperand(0).getSubReg(); bool isDead = MI->getOperand(0).isDead(); bool isUndef = MI->getOperand(0).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDD)) + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDRD)) .addReg(DstReg, RegState::Define | getDeadRegState(isDead) | @@ -886,15 +907,114 @@ ARMBaseInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, Opc == ARM::tMOVtgpr2gpr || Opc == ARM::tMOVgpr2tgpr) { return true; - } else if (Opc == ARM::FCPYS || Opc == ARM::FCPYD) { + } else if (Opc == ARM::VMOVS || Opc == ARM::VMOVD) { return true; - } else if (Opc == ARM::VMOVD || Opc == ARM::VMOVQ) { + } else if (Opc == ARM::VMOVDneon || Opc == ARM::VMOVQ) { return false; // FIXME } return false; } +void ARMBaseInstrInfo:: +reMaterialize(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SubIdx, + const MachineInstr *Orig, + const TargetRegisterInfo *TRI) const { + DebugLoc dl = Orig->getDebugLoc(); + + if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) { + DestReg = TRI->getSubReg(DestReg, SubIdx); + SubIdx = 0; + } + + unsigned Opcode = Orig->getOpcode(); + switch (Opcode) { + default: { + MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); + MI->getOperand(0).setReg(DestReg); + MBB.insert(I, MI); + break; + } + case ARM::tLDRpci_pic: + case ARM::t2LDRpci_pic: { + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + MachineConstantPool *MCP = MF.getConstantPool(); + unsigned CPI = Orig->getOperand(1).getIndex(); + const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI]; + assert(MCPE.isMachineConstantPoolEntry() && + "Expecting a machine constantpool entry!"); + ARMConstantPoolValue *ACPV = + static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal); + unsigned PCLabelId = AFI->createConstPoolEntryUId(); + ARMConstantPoolValue *NewCPV = 0; + if (ACPV->isGlobalValue()) + NewCPV = new ARMConstantPoolValue(ACPV->getGV(), PCLabelId, + ARMCP::CPValue, 4); + else if (ACPV->isExtSymbol()) + NewCPV = new ARMConstantPoolValue(MF.getFunction()->getContext(), + ACPV->getSymbol(), PCLabelId, 4); + else if (ACPV->isBlockAddress()) + NewCPV = new ARMConstantPoolValue(ACPV->getBlockAddress(), PCLabelId, + ARMCP::CPBlockAddress, 4); + else + llvm_unreachable("Unexpected ARM constantpool value type!!"); + CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment()); + MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode), + DestReg) + .addConstantPoolIndex(CPI).addImm(PCLabelId); + (*MIB).setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end()); + break; + } + } + + MachineInstr *NewMI = prior(I); + NewMI->getOperand(0).setSubReg(SubIdx); +} + +bool ARMBaseInstrInfo::isIdentical(const MachineInstr *MI0, + const MachineInstr *MI1, + const MachineRegisterInfo *MRI) const { + int Opcode = MI0->getOpcode(); + if (Opcode == ARM::t2LDRpci_pic || Opcode == ARM::tLDRpci_pic) { + if (MI1->getOpcode() != Opcode) + return false; + if (MI0->getNumOperands() != MI1->getNumOperands()) + return false; + + const MachineOperand &MO0 = MI0->getOperand(1); + const MachineOperand &MO1 = MI1->getOperand(1); + if (MO0.getOffset() != MO1.getOffset()) + return false; + + const MachineFunction *MF = MI0->getParent()->getParent(); + const MachineConstantPool *MCP = MF->getConstantPool(); + int CPI0 = MO0.getIndex(); + int CPI1 = MO1.getIndex(); + const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0]; + const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1]; + ARMConstantPoolValue *ACPV0 = + static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal); + ARMConstantPoolValue *ACPV1 = + static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal); + return ACPV0->hasSameValue(ACPV1); + } + + return TargetInstrInfoImpl::isIdentical(MI0, MI1, MRI); +} + +unsigned ARMBaseInstrInfo::TailDuplicationLimit(const MachineBasicBlock &MBB, + unsigned DefaultLimit) const { + // If the target processor can predict indirect branches, it is highly + // desirable to duplicate them, since it can often make them predictable. + if (!MBB.empty() && isIndirectBranchOpcode(MBB.back().getOpcode()) && + getSubtarget().hasBranchTargetBuffer()) + return DefaultLimit + 2; + return DefaultLimit; +} + /// getInstrPredicate - If instruction is predicated, returns its predicate /// condition, otherwise returns AL. It also returns the condition code /// register by reference. @@ -1022,6 +1142,7 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, break; } case ARMII::AddrMode4: + case ARMII::AddrMode6: // Can't fold any offset even if it's zero. return false; case ARMII::AddrMode5: { diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 2ba3774..73e854f 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -1,4 +1,4 @@ -//===- ARMBaseInstrInfo.h - ARM Base Instruction Information -------------*- C++ -*-===// +//===- ARMBaseInstrInfo.h - ARM Base Instruction Information ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -261,9 +261,20 @@ public: virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, + const SmallVectorImpl<unsigned> &Ops, MachineInstr* LoadMI) const; + virtual void reMaterialize(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, unsigned SubIdx, + const MachineInstr *Orig, + const TargetRegisterInfo *TRI) const; + + virtual bool isIdentical(const MachineInstr *MI, const MachineInstr *Other, + const MachineRegisterInfo *MRI) const; + + virtual unsigned TailDuplicationLimit(const MachineBasicBlock &MBB, + unsigned DefaultLimit) const; }; static inline diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 70377f9e..19762ee 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -44,10 +44,6 @@ static cl::opt<bool> ReuseFrameIndexVals("arm-reuse-frame-index-vals", cl::Hidden, cl::init(true), cl::desc("Reuse repeated frame index values")); -static cl::opt<bool> -ARMDynamicStackAlign("arm-dynamic-stack-alignment", cl::Hidden, cl::init(false), - cl::desc("Dynamically re-align the stack as needed")); - unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum, bool *isSPVFP) { if (isSPVFP) @@ -476,11 +472,7 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg, } static unsigned calculateMaxStackAlignment(const MachineFrameInfo *FFI) { - // FIXME: For now, force at least 128-bit alignment. This will push the - // nightly tester harder for making sure things work correctly. When - // we're ready to enable this for real, this goes back to starting at zero. - unsigned MaxAlign = 16; -// unsigned MaxAlign = 0; + unsigned MaxAlign = 0; for (int i = FFI->getObjectIndexBegin(), e = FFI->getObjectIndexEnd(); i != e; ++i) { @@ -508,20 +500,12 @@ bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const { bool ARMBaseRegisterInfo:: needsStackRealignment(const MachineFunction &MF) const { - // Only do this for ARM if explicitly enabled - // FIXME: Once it's passing all the tests, enable by default - if (!ARMDynamicStackAlign) - return false; - - // FIXME: To force more brutal testing, realign whether we need to or not. - // Change this to be more selective when we turn it on for real, of course. const MachineFrameInfo *MFI = MF.getFrameInfo(); const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); -// unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); + unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); return (RealignStack && !AFI->isThumb1OnlyFunction() && - AFI->hasStackFrame() && -// (MFI->getMaxAlignment() > StackAlign) && + (MFI->getMaxAlignment() > StackAlign) && !MFI->hasVarSizedObjects()); } @@ -529,7 +513,8 @@ bool ARMBaseRegisterInfo::cannotEliminateFrame(const MachineFunction &MF) const const MachineFrameInfo *MFI = MF.getFrameInfo(); if (NoFramePointerElim && MFI->hasCalls()) return true; - return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken(); + return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() + || needsStackRealignment(MF); } /// estimateStackSize - Estimate and return the size of the frame. @@ -604,7 +589,7 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Calculate and set max stack object alignment early, so we can decide // whether we will need stack realignment (and thus FP). - if (ARMDynamicStackAlign) { + if (RealignStack) { unsigned MaxAlign = std::max(MFI->getMaxAlignment(), calculateMaxStackAlignment(MFI)); MFI->setMaxAlignment(MaxAlign); @@ -789,7 +774,8 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Reserve a slot closest to SP or frame pointer. const TargetRegisterClass *RC = ARM::GPRRegisterClass; RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment())); + RC->getAlignment(), + false)); } } } @@ -806,7 +792,8 @@ unsigned ARMBaseRegisterInfo::getRARegister() const { return ARM::LR; } -unsigned ARMBaseRegisterInfo::getFrameRegister(MachineFunction &MF) const { +unsigned +ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const { if (STI.isTargetDarwin() || hasFP(MF)) return FramePtr; return ARM::SP; @@ -1183,7 +1170,8 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // as much as possible above, handle the rest, providing a register that is // SP+LargeImm. assert((Offset || - (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode4) && + (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode4 || + (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode6) && "This code isn't needed if offset already handled!"); unsigned ScratchReg = 0; @@ -1192,7 +1180,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, ? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm(); unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg(); if (Offset == 0) - // Must be addrmode4. + // Must be addrmode4/6. MI.getOperand(i).ChangeToRegister(FrameReg, false, false, false); else { ScratchReg = MF.getRegInfo().createVirtualRegister(ARM::GPRRegisterClass); @@ -1346,7 +1334,7 @@ emitPrologue(MachineFunction &MF) const { AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); - movePastCSLoadStoreOps(MBB, MBBI, ARM::FSTD, 0, 3, STI); + movePastCSLoadStoreOps(MBB, MBBI, ARM::VSTRD, 0, 3, STI); NumBytes = DPRCSOffset; if (NumBytes) { // Adjust SP after all the callee-save spills. @@ -1385,7 +1373,7 @@ static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { static bool isCSRestore(MachineInstr *MI, const ARMBaseInstrInfo &TII, const unsigned *CSRegs) { - return ((MI->getOpcode() == (int)ARM::FLDD || + return ((MI->getOpcode() == (int)ARM::VLDRD || MI->getOpcode() == (int)ARM::LDR || MI->getOpcode() == (int)ARM::t2LDRi12) && MI->getOperand(1).isFI() && @@ -1411,7 +1399,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { if (NumBytes != 0) emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); } else { - // Unwind MBBI to point to first LDR / FLDD. + // Unwind MBBI to point to first LDR / VLDRD. const unsigned *CSRegs = getCalleeSavedRegs(); if (MBBI != MBB.begin()) { do @@ -1459,7 +1447,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); // Move SP to start of integer callee save spill area 2. - movePastCSLoadStoreOps(MBB, MBBI, ARM::FLDD, 0, 3, STI); + movePastCSLoadStoreOps(MBB, MBBI, ARM::VLDRD, 0, 3, STI); emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedAreaSize()); // Move SP to start of integer callee save spill area 1. @@ -1475,4 +1463,48 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize); } +namespace { + struct MaximalStackAlignmentCalculator : public MachineFunctionPass { + static char ID; + MaximalStackAlignmentCalculator() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF) { + MachineFrameInfo *FFI = MF.getFrameInfo(); + MachineRegisterInfo &RI = MF.getRegInfo(); + + // Calculate max stack alignment of all already allocated stack objects. + unsigned MaxAlign = calculateMaxStackAlignment(FFI); + + // Be over-conservative: scan over all vreg defs and find, whether vector + // registers are used. If yes - there is probability, that vector register + // will be spilled and thus stack needs to be aligned properly. + for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister; + RegNum < RI.getLastVirtReg(); ++RegNum) + MaxAlign = std::max(MaxAlign, RI.getRegClass(RegNum)->getAlignment()); + + if (FFI->getMaxAlignment() == MaxAlign) + return false; + + FFI->setMaxAlignment(MaxAlign); + return true; + } + + virtual const char *getPassName() const { + return "ARM Stack Required Alignment Auto-Detector"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + }; + + char MaximalStackAlignmentCalculator::ID = 0; +} + +FunctionPass* +llvm::createARMMaxStackAlignmentCalculatorPass() { + return new MaximalStackAlignmentCalculator(); +} + #include "ARMGenRegisterInfo.inc" diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index 029e468..4b267b0 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -105,7 +105,7 @@ public: // Debug information queries. unsigned getRARegister() const; - unsigned getFrameRegister(MachineFunction &MF) const; + unsigned getFrameRegister(const MachineFunction &MF) const; // Exception handling queries. unsigned getEHExceptionRegister() const; diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index 13cf676..766acff 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -168,7 +168,8 @@ namespace { /// Routines that handle operands which add machine relocations which are /// fixed up by the relocation stage. void emitGlobalAddress(GlobalValue *GV, unsigned Reloc, - bool NeedStub, bool Indirect, intptr_t ACPV = 0); + bool MayNeedFarStub, bool Indirect, + intptr_t ACPV = 0); void emitExternalSymbolAddress(const char *ES, unsigned Reloc); void emitConstPoolAddress(unsigned CPI, unsigned Reloc); void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc); @@ -277,13 +278,13 @@ unsigned Emitter<CodeEmitter>::getMachineOpValue(const MachineInstr &MI, /// template<class CodeEmitter> void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc, - bool NeedStub, bool Indirect, + bool MayNeedFarStub, bool Indirect, intptr_t ACPV) { MachineRelocation MR = Indirect ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc, - GV, ACPV, NeedStub) + GV, ACPV, MayNeedFarStub) : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, - GV, ACPV, NeedStub); + GV, ACPV, MayNeedFarStub); MCE.addRelocation(MR); } diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 9819625..d22c43a 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -31,6 +31,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" #include <algorithm> using namespace llvm; @@ -42,6 +43,13 @@ STATISTIC(NumTBs, "Number of table branches generated"); STATISTIC(NumT2CPShrunk, "Number of Thumb2 constantpool instructions shrunk"); STATISTIC(NumT2BrShrunk, "Number of Thumb2 immediate branches shrunk"); STATISTIC(NumCBZ, "Number of CBZ / CBNZ formed"); +STATISTIC(NumJTMoved, "Number of jump table destination blocks moved"); +STATISTIC(NumJTInserted, "Number of jump table intermediate blocks inserted"); + + +static cl::opt<bool> +AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true), + cl::desc("Adjust basic block layout to better use TB[BH]")); namespace { /// ARMConstantIslands - Due to limited PC-relative displacements, ARM @@ -174,6 +182,7 @@ namespace { void DoInitialPlacement(MachineFunction &MF, std::vector<MachineInstr*> &CPEMIs); CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI); + void JumpTableFunctionScan(MachineFunction &MF); void InitialFunctionScan(MachineFunction &MF, const std::vector<MachineInstr*> &CPEMIs); MachineBasicBlock *SplitBlockBeforeInstr(MachineInstr *MI); @@ -201,7 +210,10 @@ namespace { bool UndoLRSpillRestore(); bool OptimizeThumb2Instructions(MachineFunction &MF); bool OptimizeThumb2Branches(MachineFunction &MF); + bool ReorderThumb2JumpTables(MachineFunction &MF); bool OptimizeThumb2JumpTables(MachineFunction &MF); + MachineBasicBlock *AdjustJTTargetBlockForward(MachineBasicBlock *BB, + MachineBasicBlock *JTBB); unsigned GetOffsetOf(MachineInstr *MI) const; void dumpBBs(); @@ -262,6 +274,18 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { // the numbers agree with the position of the block in the function. MF.RenumberBlocks(); + // Try to reorder and otherwise adjust the block layout to make good use + // of the TB[BH] instructions. + bool MadeChange = false; + if (isThumb2 && AdjustJumpTableBlocks) { + JumpTableFunctionScan(MF); + MadeChange |= ReorderThumb2JumpTables(MF); + // Data is out of date, so clear it. It'll be re-computed later. + T2JumpTables.clear(); + // Blocks may have shifted around. Keep the numbering up to date. + MF.RenumberBlocks(); + } + // Thumb1 functions containing constant pools get 4-byte alignment. // This is so we can keep exact track of where the alignment padding goes. @@ -292,7 +316,6 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { // Iteratively place constant pool entries and fix up branches until there // is no change. - bool MadeChange = false; unsigned NoCPIters = 0, NoBRIters = 0; while (true) { bool CPChange = false; @@ -409,6 +432,21 @@ ARMConstantIslands::CPEntry return NULL; } +/// JumpTableFunctionScan - Do a scan of the function, building up +/// information about the sizes of each block and the locations of all +/// the jump tables. +void ARMConstantIslands::JumpTableFunctionScan(MachineFunction &MF) { + for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); + MBBI != E; ++MBBI) { + MachineBasicBlock &MBB = *MBBI; + + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E; ++I) + if (I->getDesc().isBranch() && I->getOpcode() == ARM::t2BR_JT) + T2JumpTables.push_back(I); + } +} + /// InitialFunctionScan - Do the initial scan of the function, building up /// information about the sizes of each block, the location of all the water, /// and finding all of the constant pool users. @@ -541,8 +579,8 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, Scale = 4; // +(offset_8*4) break; - case ARM::FLDD: - case ARM::FLDS: + case ARM::VLDRD: + case ARM::VLDRS: Bits = 8; Scale = 4; // +-(offset_8*4) NegOk = true; @@ -1552,7 +1590,6 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) { return MadeChange; } - /// OptimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller /// jumptables when it's possible. bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { @@ -1560,7 +1597,7 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { // FIXME: After the tables are shrunk, can we get rid some of the // constantpool tables? - const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); + MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) { MachineInstr *MI = T2JumpTables[i]; @@ -1660,3 +1697,99 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { return MadeChange; } + +/// ReorderThumb2JumpTables - Adjust the function's block layout to ensure that +/// jump tables always branch forwards, since that's what tbb and tbh need. +bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) { + bool MadeChange = false; + + MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); + const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); + for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) { + MachineInstr *MI = T2JumpTables[i]; + const TargetInstrDesc &TID = MI->getDesc(); + unsigned NumOps = TID.getNumOperands(); + unsigned JTOpIdx = NumOps - (TID.isPredicable() ? 3 : 2); + MachineOperand JTOP = MI->getOperand(JTOpIdx); + unsigned JTI = JTOP.getIndex(); + assert(JTI < JT.size()); + + // We prefer if target blocks for the jump table come after the jump + // instruction so we can use TB[BH]. Loop through the target blocks + // and try to adjust them such that that's true. + int JTNumber = MI->getParent()->getNumber(); + const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; + for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) { + MachineBasicBlock *MBB = JTBBs[j]; + int DTNumber = MBB->getNumber(); + + if (DTNumber < JTNumber) { + // The destination precedes the switch. Try to move the block forward + // so we have a positive offset. + MachineBasicBlock *NewBB = + AdjustJTTargetBlockForward(MBB, MI->getParent()); + if (NewBB) + MJTI->ReplaceMBBInJumpTable(JTI, JTBBs[j], NewBB); + MadeChange = true; + } + } + } + + return MadeChange; +} + +MachineBasicBlock *ARMConstantIslands:: +AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) +{ + MachineFunction &MF = *BB->getParent(); + + // If it's the destination block is terminated by an unconditional branch, + // try to move it; otherwise, create a new block following the jump + // table that branches back to the actual target. This is a very simple + // heuristic. FIXME: We can definitely improve it. + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + SmallVector<MachineOperand, 4> CondPrior; + MachineFunction::iterator BBi = BB; + MachineFunction::iterator OldPrior = prior(BBi); + + // If the block terminator isn't analyzable, don't try to move the block + bool B = TII->AnalyzeBranch(*BB, TBB, FBB, Cond); + + // If the block ends in an unconditional branch, move it. The prior block + // has to have an analyzable terminator for us to move this one. Be paranoid + // and make sure we're not trying to move the entry block of the function. + if (!B && Cond.empty() && BB != MF.begin() && + !TII->AnalyzeBranch(*OldPrior, TBB, FBB, CondPrior)) { + BB->moveAfter(JTBB); + OldPrior->updateTerminator(); + BB->updateTerminator(); + // Update numbering to account for the block being moved. + MF.RenumberBlocks(); + ++NumJTMoved; + return NULL; + } + + // Create a new MBB for the code after the jump BB. + MachineBasicBlock *NewBB = + MF.CreateMachineBasicBlock(JTBB->getBasicBlock()); + MachineFunction::iterator MBBI = JTBB; ++MBBI; + MF.insert(MBBI, NewBB); + + // Add an unconditional branch from NewBB to BB. + // There doesn't seem to be meaningful DebugInfo available; this doesn't + // correspond directly to anything in the source. + assert (isThumb2 && "Adjusting for TB[BH] but not in Thumb2?"); + BuildMI(NewBB, DebugLoc::getUnknownLoc(), TII->get(ARM::t2B)).addMBB(BB); + + // Update internal data structures to account for the newly inserted MBB. + MF.RenumberBlocks(NewBB); + + // Update the CFG. + NewBB->addSuccessor(BB); + JTBB->removeSuccessor(BB); + JTBB->addSuccessor(NewBB); + + ++NumJTInserted; + return NewBB; +} diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp index efa941a..90dd0c7 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.cpp +++ b/lib/Target/ARM/ARMConstantPoolValue.cpp @@ -62,9 +62,10 @@ int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP, ARMConstantPoolValue *CPV = (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal; if (CPV->CVal == CVal && - CPV->S == S && CPV->LabelId == LabelId && - CPV->PCAdjust == PCAdjust) + CPV->PCAdjust == PCAdjust && + (CPV->S == S || strcmp(CPV->S, S) == 0) && + (CPV->Modifier == Modifier || strcmp(CPV->Modifier, Modifier) == 0)) return i; } } @@ -84,6 +85,23 @@ ARMConstantPoolValue::AddSelectionDAGCSEId(FoldingSetNodeID &ID) { ID.AddInteger(PCAdjust); } +bool +ARMConstantPoolValue::hasSameValue(ARMConstantPoolValue *ACPV) { + if (ACPV->Kind == Kind && + ACPV->CVal == CVal && + ACPV->PCAdjust == PCAdjust && + (ACPV->S == S || strcmp(ACPV->S, S) == 0) && + (ACPV->Modifier == Modifier || strcmp(ACPV->Modifier, Modifier) == 0)) { + if (ACPV->LabelId == LabelId) + return true; + // Two PC relative constpool entries containing the same GV address or + // external symbols. FIXME: What about blockaddress? + if (Kind == ARMCP::CPValue || Kind == ARMCP::CPExtSymbol) + return true; + } + return false; +} + void ARMConstantPoolValue::dump() const { errs() << " " << *this; } diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h index 8fb3f92..741acde 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.h +++ b/lib/Target/ARM/ARMConstantPoolValue.h @@ -81,6 +81,10 @@ public: virtual void AddSelectionDAGCSEId(FoldingSetNodeID &ID); + /// hasSameValue - Return true if this ARM constpool value + /// can share the same constantpool entry as another ARM constpool value. + bool hasSameValue(ARMConstantPoolValue *ACPV); + void print(raw_ostream *O) const { if (O) print(*O); } void print(raw_ostream &O) const; void dump() const; diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp new file mode 100644 index 0000000..4d0f899 --- /dev/null +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -0,0 +1,115 @@ +//===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -----*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that expand pseudo instructions into target +// instructions to allow proper scheduling, if-conversion, and other late +// optimizations. This pass should be run after register allocation but before +// post- regalloc scheduling pass. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "arm-pseudo" +#include "ARM.h" +#include "ARMBaseInstrInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" + +using namespace llvm; + +namespace { + class ARMExpandPseudo : public MachineFunctionPass { + public: + static char ID; + ARMExpandPseudo() : MachineFunctionPass(&ID) {} + + const TargetInstrInfo *TII; + + virtual bool runOnMachineFunction(MachineFunction &Fn); + + virtual const char *getPassName() const { + return "ARM pseudo instruction expansion pass"; + } + + private: + bool ExpandMBB(MachineBasicBlock &MBB); + }; + char ARMExpandPseudo::ID = 0; +} + +bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { + bool Modified = false; + + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + while (MBBI != E) { + MachineInstr &MI = *MBBI; + MachineBasicBlock::iterator NMBBI = next(MBBI); + + unsigned Opcode = MI.getOpcode(); + switch (Opcode) { + default: break; + case ARM::tLDRpci_pic: + case ARM::t2LDRpci_pic: { + unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic) + ? ARM::tLDRpci : ARM::t2LDRpci; + unsigned DstReg = MI.getOperand(0).getReg(); + if (!MI.getOperand(0).isDead()) { + MachineInstr *NewMI = + AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(NewLdOpc), DstReg) + .addOperand(MI.getOperand(1))); + NewMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD)) + .addReg(DstReg, getDefRegState(true)) + .addReg(DstReg) + .addOperand(MI.getOperand(2)); + } + MI.eraseFromParent(); + Modified = true; + break; + } + case ARM::t2MOVi32imm: { + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned Imm = MI.getOperand(1).getImm(); + unsigned Lo16 = Imm & 0xffff; + unsigned Hi16 = (Imm >> 16) & 0xffff; + if (!MI.getOperand(0).isDead()) { + AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(ARM::t2MOVi16), DstReg) + .addImm(Lo16)); + AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(ARM::t2MOVTi16)) + .addReg(DstReg, getDefRegState(true)) + .addReg(DstReg).addImm(Hi16)); + } + MI.eraseFromParent(); + Modified = true; + } + // FIXME: expand t2MOVi32imm + } + MBBI = NMBBI; + } + + return Modified; +} + +bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) { + TII = MF.getTarget().getInstrInfo(); + + bool Modified = false; + for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E; + ++MFI) + Modified |= ExpandMBB(*MFI); + return Modified; +} + +/// createARMExpandPseudoPass - returns an instance of the pseudo instruction +/// expansion pass. +FunctionPass *llvm::createARMExpandPseudoPass() { + return new ARMExpandPseudo(); +} diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 1489cab..9be7454 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -81,7 +81,7 @@ public: bool SelectAddrMode5(SDValue Op, SDValue N, SDValue &Base, SDValue &Offset); bool SelectAddrMode6(SDValue Op, SDValue N, SDValue &Addr, SDValue &Update, - SDValue &Opc); + SDValue &Opc, SDValue &Align); bool SelectAddrModePC(SDValue Op, SDValue N, SDValue &Offset, SDValue &Label); @@ -187,8 +187,6 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { void ARMDAGToDAGISel::InstructionSelect() { - DEBUG(BB->dump()); - SelectRoot(*CurDAG); CurDAG->RemoveDeadNodes(); } @@ -491,11 +489,13 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue Op, SDValue N, bool ARMDAGToDAGISel::SelectAddrMode6(SDValue Op, SDValue N, SDValue &Addr, SDValue &Update, - SDValue &Opc) { + SDValue &Opc, SDValue &Align) { Addr = N; // Default to no writeback. Update = CurDAG->getRegister(0, MVT::i32); Opc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(false), MVT::i32); + // Default to no alignment. + Align = CurDAG->getTargetConstant(0, MVT::i32); return true; } @@ -1010,8 +1010,8 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs, SDNode *N = Op.getNode(); DebugLoc dl = N->getDebugLoc(); - SDValue MemAddr, MemUpdate, MemOpc; - if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc)) + SDValue MemAddr, MemUpdate, MemOpc, Align; + if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align)) return NULL; SDValue Chain = N->getOperand(0); @@ -1036,10 +1036,10 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs, if (is64BitVector) { unsigned Opc = DOpcodes[OpcodeIndex]; - const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain }; + const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, Chain }; std::vector<EVT> ResTys(NumVecs, VT); ResTys.push_back(MVT::Other); - return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 4); + return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); } EVT RegVT = GetNEONSubregVT(VT); @@ -1047,10 +1047,10 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs, // Quad registers are directly supported for VLD2, // loading 2 pairs of D regs. unsigned Opc = QOpcodes0[OpcodeIndex]; - const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain }; + const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, Chain }; std::vector<EVT> ResTys(4, VT); ResTys.push_back(MVT::Other); - SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 4); + SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); Chain = SDValue(VLd, 4); // Combine the even and odd subregs to produce the result. @@ -1071,14 +1071,15 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs, // Load the even subregs. unsigned Opc = QOpcodes0[OpcodeIndex]; - const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Chain }; - SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 4); + const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Align, Chain }; + SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 5); Chain = SDValue(VLdA, NumVecs+1); // Load the odd subregs. Opc = QOpcodes1[OpcodeIndex]; - const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate, MemOpc, Chain }; - SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 4); + const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate, MemOpc, + Align, Chain }; + SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 5); Chain = SDValue(VLdB, NumVecs+1); // Combine the even and odd subregs to produce the result. @@ -1098,8 +1099,8 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs, SDNode *N = Op.getNode(); DebugLoc dl = N->getDebugLoc(); - SDValue MemAddr, MemUpdate, MemOpc; - if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc)) + SDValue MemAddr, MemUpdate, MemOpc, Align; + if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align)) return NULL; SDValue Chain = N->getOperand(0); @@ -1126,13 +1127,14 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs, Ops.push_back(MemAddr); Ops.push_back(MemUpdate); Ops.push_back(MemOpc); + Ops.push_back(Align); if (is64BitVector) { unsigned Opc = DOpcodes[OpcodeIndex]; for (unsigned Vec = 0; Vec < NumVecs; ++Vec) Ops.push_back(N->getOperand(Vec+3)); Ops.push_back(Chain); - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+4); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5); } EVT RegVT = GetNEONSubregVT(VT); @@ -1147,7 +1149,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs, N->getOperand(Vec+3))); } Ops.push_back(Chain); - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 8); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 9); } // Otherwise, quad registers are stored with two separate instructions, @@ -1163,18 +1165,18 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs, Ops.push_back(Chain); unsigned Opc = QOpcodes0[OpcodeIndex]; SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+4); + MVT::Other, Ops.data(), NumVecs+5); Chain = SDValue(VStA, 1); // Store the odd subregs. Ops[0] = SDValue(VStA, 0); // MemAddr for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, + Ops[Vec+4] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, N->getOperand(Vec+3)); - Ops[NumVecs+3] = Chain; + Ops[NumVecs+4] = Chain; Opc = QOpcodes1[OpcodeIndex]; SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+4); + MVT::Other, Ops.data(), NumVecs+5); Chain = SDValue(VStB, 1); ReplaceUses(SDValue(N, 0), Chain); return NULL; @@ -1188,8 +1190,8 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDValue Op, bool IsLoad, SDNode *N = Op.getNode(); DebugLoc dl = N->getDebugLoc(); - SDValue MemAddr, MemUpdate, MemOpc; - if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc)) + SDValue MemAddr, MemUpdate, MemOpc, Align; + if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align)) return NULL; SDValue Chain = N->getOperand(0); @@ -1226,6 +1228,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDValue Op, bool IsLoad, Ops.push_back(MemAddr); Ops.push_back(MemUpdate); Ops.push_back(MemOpc); + Ops.push_back(Align); unsigned Opc = 0; if (is64BitVector) { @@ -1463,8 +1466,8 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { } break; } - case ARMISD::FMRRD: - return CurDAG->getMachineNode(ARM::FMRRD, dl, MVT::i32, MVT::i32, + case ARMISD::VMOVRRD: + return CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32, Op.getOperand(0), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32)); case ISD::UMUL_LOHI: { @@ -1653,10 +1656,10 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { : ARM::MOVCCr; break; case MVT::f32: - Opc = ARM::FCPYScc; + Opc = ARM::VMOVScc; break; case MVT::f64: - Opc = ARM::FCPYDcc; + Opc = ARM::VMOVDcc; break; } return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5); @@ -1680,10 +1683,10 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { default: assert(false && "Illegal conditional move type!"); break; case MVT::f32: - Opc = ARM::FNEGScc; + Opc = ARM::VNEGScc; break; case MVT::f64: - Opc = ARM::FNEGDcc; + Opc = ARM::VNEGDcc; break; } return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index b6ce5dd..c3af8e6 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -133,7 +133,7 @@ static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { } ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) - : TargetLowering(TM, createTLOF(TM)), ARMPCLabelIndex(0) { + : TargetLowering(TM, createTLOF(TM)) { Subtarget = &TM.getSubtarget<ARMSubtarget>(); if (Subtarget->isTargetDarwin()) { @@ -389,7 +389,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) - // Turn f64->i64 into FMRRD, i64 -> f64 to FMDRR iff target supports vfp2. + // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR iff target supports vfp2. setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom); // We want to custom lower some of our intrinsics. @@ -434,7 +434,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } // We have target-specific dag combine patterns for the following nodes: - // ARMISD::FMRRD - No need to call setTargetDAGCombine + // ARMISD::VMOVRRD - No need to call setTargetDAGCombine setTargetDAGCombine(ISD::ADD); setTargetDAGCombine(ISD::SUB); @@ -493,8 +493,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; case ARMISD::RRX: return "ARMISD::RRX"; - case ARMISD::FMRRD: return "ARMISD::FMRRD"; - case ARMISD::FMDRR: return "ARMISD::FMDRR"; + case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; + case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP"; @@ -790,7 +790,7 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, InFlag); Chain = Hi.getValue(1); InFlag = Hi.getValue(2); - Val = DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi); + Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); if (VA.getLocVT() == MVT::v2f64) { SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); @@ -805,7 +805,7 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); Chain = Hi.getValue(1); InFlag = Hi.getValue(2); - Val = DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi); + Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, DAG.getConstant(1, MVT::i32)); } @@ -870,7 +870,7 @@ void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, SmallVector<SDValue, 8> &MemOpChains, ISD::ArgFlagsTy Flags) { - SDValue fmrrd = DAG.getNode(ARMISD::FMRRD, dl, + SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Arg); RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd)); @@ -1004,6 +1004,8 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, bool isDirect = false; bool isARMFunc = false; bool isLocalARMFunc = false; + MachineFunction &MF = DAG.getMachineFunction(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { GlobalValue *GV = G->getGlobal(); isDirect = true; @@ -1015,6 +1017,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, isLocalARMFunc = !Subtarget->isThumb() && !isExt; // tBX takes a register source operand. if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { + unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, 4); @@ -1023,7 +1026,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), CPAddr, PseudoSourceValue::getConstantPool(), 0); - SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); Callee = DAG.getNode(ARMISD::PIC_ADD, dl, getPointerTy(), Callee, PICLabel); } else @@ -1036,6 +1039,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // tBX takes a register source operand. const char *Sym = S->getSymbol(); if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { + unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), Sym, ARMPCLabelIndex, 4); SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); @@ -1043,7 +1047,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), CPAddr, PseudoSourceValue::getConstantPool(), 0); - SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); Callee = DAG.getNode(ARMISD::PIC_ADD, dl, getPointerTy(), Callee, PICLabel); } else @@ -1145,7 +1149,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain, // Extract the first half and return it in two registers. SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, DAG.getConstant(0, MVT::i32)); - SDValue HalfGPRs = DAG.getNode(ARMISD::FMRRD, dl, + SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Half); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag); @@ -1162,7 +1166,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain, } // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is // available. - SDValue fmrrd = DAG.getNode(ARMISD::FMRRD, dl, + SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); Flag = Chain.getValue(1); @@ -1208,6 +1212,9 @@ static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { } SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) { + MachineFunction &MF = DAG.getMachineFunction(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + unsigned ARMPCLabelIndex = 0; DebugLoc DL = Op.getDebugLoc(); EVT PtrVT = getPointerTy(); BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); @@ -1217,6 +1224,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) { CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4); } else { unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; + ARMPCLabelIndex = AFI->createConstPoolEntryUId(); ARMConstantPoolValue *CPV = new ARMConstantPoolValue(BA, ARMPCLabelIndex, ARMCP::CPBlockAddress, PCAdj); @@ -1227,7 +1235,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) { PseudoSourceValue::getConstantPool(), 0); if (RelocM == Reloc::Static) return Result; - SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel); } @@ -1238,6 +1246,9 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, DebugLoc dl = GA->getDebugLoc(); EVT PtrVT = getPointerTy(); unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; + MachineFunction &MF = DAG.getMachineFunction(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, ARMCP::CPValue, PCAdj, "tlsgd", true); @@ -1247,7 +1258,7 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, PseudoSourceValue::getConstantPool(), 0); SDValue Chain = Argument.getValue(1); - SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel); // call __tls_get_addr. @@ -1279,7 +1290,10 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); if (GV->isDeclaration()) { - // initial exec model + MachineFunction &MF = DAG.getMachineFunction(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); + // Initial exec model. unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, @@ -1290,7 +1304,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, PseudoSourceValue::getConstantPool(), 0); Chain = Offset.getValue(1); - SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, @@ -1355,6 +1369,9 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) { + MachineFunction &MF = DAG.getMachineFunction(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + unsigned ARMPCLabelIndex = 0; EVT PtrVT = getPointerTy(); DebugLoc dl = Op.getDebugLoc(); GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); @@ -1363,6 +1380,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, if (RelocM == Reloc::Static) CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); else { + ARMPCLabelIndex = AFI->createConstPoolEntryUId(); unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8); ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj); @@ -1375,7 +1393,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, SDValue Chain = Result.getValue(1); if (RelocM == Reloc::PIC_) { - SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); } @@ -1390,6 +1408,9 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG){ assert(Subtarget->isTargetELF() && "GLOBAL OFFSET TABLE not implemented for non-ELF targets"); + MachineFunction &MF = DAG.getMachineFunction(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); EVT PtrVT = getPointerTy(); DebugLoc dl = Op.getDebugLoc(); unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; @@ -1400,7 +1421,7 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, PseudoSourceValue::getConstantPool(), 0); - SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); } @@ -1416,6 +1437,8 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { } case Intrinsic::eh_sjlj_lsda: { MachineFunction &MF = DAG.getMachineFunction(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); EVT PtrVT = getPointerTy(); DebugLoc dl = Op.getDebugLoc(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); @@ -1433,7 +1456,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { SDValue Chain = Result.getValue(1); if (RelocM == Reloc::PIC_) { - SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); } return Result; @@ -1522,7 +1545,8 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, if (NextVA.isMemLoc()) { unsigned ArgSize = NextVA.getLocVT().getSizeInBits()/8; MachineFrameInfo *MFI = MF.getFrameInfo(); - int FI = MFI->CreateFixedObject(ArgSize, NextVA.getLocMemOffset()); + int FI = MFI->CreateFixedObject(ArgSize, NextVA.getLocMemOffset(), + true, false); // Create load node to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); @@ -1533,7 +1557,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); } - return DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, ArgValue, ArgValue2); + return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); } SDValue @@ -1636,7 +1660,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; - int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset()); + int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), + true, false); // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); @@ -1664,7 +1689,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // the result of va_next. AFI->setVarArgsRegSaveSize(VARegSaveSize); VarArgsFrameIndex = MFI->CreateFixedObject(VARegSaveSize, ArgOffset + - VARegSaveSize - VARegSize); + VARegSaveSize - VARegSize, + true, false); SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); SmallVector<SDValue, 4> MemOps; @@ -1688,7 +1714,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, &MemOps[0], MemOps.size()); } else // This will point to the next argument passed via stack. - VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset); + VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset, true, false); } return Chain; @@ -1710,46 +1736,41 @@ static bool isFloatingPointZero(SDValue Op) { return false; } -static bool isLegalCmpImmediate(unsigned C, bool isThumb1Only) { - return ( isThumb1Only && (C & ~255U) == 0) || - (!isThumb1Only && ARM_AM::getSOImmVal(C) != -1); -} - /// Returns appropriate ARM CMP (cmp) and corresponding condition code for /// the given operands. -static SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, - SDValue &ARMCC, SelectionDAG &DAG, bool isThumb1Only, - DebugLoc dl) { +SDValue +ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, + SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) { if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { unsigned C = RHSC->getZExtValue(); - if (!isLegalCmpImmediate(C, isThumb1Only)) { + if (!isLegalICmpImmediate(C)) { // Constant does not fit, try adjusting it by one? switch (CC) { default: break; case ISD::SETLT: case ISD::SETGE: - if (isLegalCmpImmediate(C-1, isThumb1Only)) { + if (isLegalICmpImmediate(C-1)) { CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; RHS = DAG.getConstant(C-1, MVT::i32); } break; case ISD::SETULT: case ISD::SETUGE: - if (C > 0 && isLegalCmpImmediate(C-1, isThumb1Only)) { + if (C > 0 && isLegalICmpImmediate(C-1)) { CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; RHS = DAG.getConstant(C-1, MVT::i32); } break; case ISD::SETLE: case ISD::SETGT: - if (isLegalCmpImmediate(C+1, isThumb1Only)) { + if (isLegalICmpImmediate(C+1)) { CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; RHS = DAG.getConstant(C+1, MVT::i32); } break; case ISD::SETULE: case ISD::SETUGT: - if (C < 0xffffffff && isLegalCmpImmediate(C+1, isThumb1Only)) { + if (C < 0xffffffff && isLegalICmpImmediate(C+1)) { CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; RHS = DAG.getConstant(C+1, MVT::i32); } @@ -1785,8 +1806,7 @@ static SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp); } -static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG, - const ARMSubtarget *ST) { +SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); @@ -1798,7 +1818,7 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG, if (LHS.getValueType() == MVT::i32) { SDValue ARMCC; SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb1Only(), dl); + SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl); return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, CCR,Cmp); } @@ -1820,8 +1840,7 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG, return Result; } -static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG, - const ARMSubtarget *ST) { +SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) { SDValue Chain = Op.getOperand(0); ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); SDValue LHS = Op.getOperand(2); @@ -1832,7 +1851,7 @@ static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG, if (LHS.getValueType() == MVT::i32) { SDValue ARMCC; SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb1Only(), dl); + SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl); return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMCC, CCR,Cmp); } @@ -2049,16 +2068,16 @@ static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) { SDValue Op = N->getOperand(0); DebugLoc dl = N->getDebugLoc(); if (N->getValueType(0) == MVT::f64) { - // Turn i64->f64 into FMDRR. + // Turn i64->f64 into VMOVDRR. SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, DAG.getConstant(0, MVT::i32)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, DAG.getConstant(1, MVT::i32)); - return DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi); + return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); } - // Turn f64->i64 into FMRRD. - SDValue Cvt = DAG.getNode(ARMISD::FMRRD, dl, + // Turn f64->i64 into VMOVRRD. + SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); // Merge the pieces into a single i64 value. @@ -2115,8 +2134,7 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { /// LowerShiftRightParts - Lower SRA_PARTS, which returns two /// i32 values and take a 2 x i32 value to shift plus a shift amount. -static SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG, - const ARMSubtarget *ST) { +SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) { assert(Op.getNumOperands() == 3 && "Not a double-shift!"); EVT VT = Op.getValueType(); unsigned VTBits = VT.getSizeInBits(); @@ -2140,7 +2158,7 @@ static SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG, SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, - ARMCC, DAG, ST->isThumb1Only(), dl); + ARMCC, DAG, dl); SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, CCR, Cmp); @@ -2151,8 +2169,7 @@ static SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG, /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two /// i32 values and take a 2 x i32 value to shift plus a shift amount. -static SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG, - const ARMSubtarget *ST) { +SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) { assert(Op.getNumOperands() == 3 && "Not a double-shift!"); EVT VT = Op.getValueType(); unsigned VTBits = VT.getSizeInBits(); @@ -2174,7 +2191,7 @@ static SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG, SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, - ARMCC, DAG, ST->isThumb1Only(), dl); + ARMCC, DAG, dl); SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMCC, CCR, Cmp); @@ -2860,8 +2877,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) : LowerGlobalAddressELF(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); - case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG, Subtarget); - case ISD::BR_CC: return LowerBR_CC(Op, DAG, Subtarget); + case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::BR_CC: return LowerBR_CC(Op, DAG); case ISD::BR_JT: return LowerBR_JT(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex); @@ -2878,9 +2895,9 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::SHL: case ISD::SRL: case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget); - case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG, Subtarget); + case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); case ISD::SRL_PARTS: - case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG, Subtarget); + case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); case ISD::VSETCC: return LowerVSETCC(Op, DAG); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); @@ -3155,12 +3172,12 @@ static SDValue PerformSUBCombine(SDNode *N, return SDValue(); } -/// PerformFMRRDCombine - Target-specific dag combine xforms for ARMISD::FMRRD. -static SDValue PerformFMRRDCombine(SDNode *N, +/// PerformVMOVRRDCombine - Target-specific dag combine xforms for ARMISD::VMOVRRD. +static SDValue PerformVMOVRRDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { // fmrrd(fmdrr x, y) -> x,y SDValue InDouble = N->getOperand(0); - if (InDouble.getOpcode() == ARMISD::FMDRR) + if (InDouble.getOpcode() == ARMISD::VMOVDRR) return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); return SDValue(); } @@ -3455,7 +3472,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, default: break; case ISD::ADD: return PerformADDCombine(N, DCI); case ISD::SUB: return PerformSUBCombine(N, DCI); - case ARMISD::FMRRD: return PerformFMRRDCombine(N, DCI); + case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); case ISD::SHL: @@ -3683,6 +3700,18 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, return true; } +/// isLegalICmpImmediate - Return true if the specified immediate is legal +/// icmp immediate, that is the target has icmp instructions which can compare +/// a register against the immediate without having to materialize the +/// immediate into a register. +bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const { + if (!Subtarget->isThumb()) + return ARM_AM::getSOImmVal(Imm) != -1; + if (Subtarget->isThumb2()) + return ARM_AM::getT2SOImmVal(Imm) != -1; + return Imm >= 0 && Imm <= 255; +} + static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, @@ -3737,7 +3766,7 @@ static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, return true; } - // FIXME: Use FLDM / FSTM to emulate indexed FP load / store. + // FIXME: Use VLDM / VSTM to emulate indexed FP load / store. return false; } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 9c7a91d..4f31f8a 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -62,8 +62,8 @@ namespace llvm { SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out. RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag. - FMRRD, // double to two gprs. - FMDRR, // Two gprs to double. + VMOVRRD, // double to two gprs. + VMOVDRR, // Two gprs to double. EH_SJLJ_SETJMP, // SjLj exception handling setjmp. EH_SJLJ_LONGJMP, // SjLj exception handling longjmp. @@ -180,6 +180,12 @@ namespace llvm { virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const; bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const; + /// isLegalICmpImmediate - Return true if the specified immediate is legal + /// icmp immediate, that is the target has icmp instructions which can compare + /// a register against the immediate without having to materialize the + /// immediate into a register. + virtual bool isLegalICmpImmediate(int64_t Imm) const; + /// getPreIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if the node's address /// can be legally represented as pre-indexed load / store address. @@ -278,8 +284,12 @@ namespace llvm { SelectionDAG &DAG); SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG); SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG); + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG); + SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG); SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG); SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG); + SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG); + SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG); SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, SDValue Chain, @@ -315,6 +325,9 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, DebugLoc dl, SelectionDAG &DAG); + + SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, + SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl); }; } diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index 86bbe2a..87bb12b 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -80,22 +80,26 @@ bool ARMInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { } void ARMInstrInfo:: -reMaterialize(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SubIdx, - const MachineInstr *Orig) const { +reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, + const TargetRegisterInfo *TRI) const { DebugLoc dl = Orig->getDebugLoc(); - if (Orig->getOpcode() == ARM::MOVi2pieces) { + unsigned Opcode = Orig->getOpcode(); + switch (Opcode) { + default: + break; + case ARM::MOVi2pieces: { RI.emitLoadConstPool(MBB, I, dl, DestReg, SubIdx, Orig->getOperand(1).getImm(), (ARMCC::CondCodes)Orig->getOperand(2).getImm(), Orig->getOperand(3).getReg()); + MachineInstr *NewMI = prior(I); + NewMI->getOperand(0).setSubReg(SubIdx); return; } + } - MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); - MI->getOperand(0).setReg(DestReg); - MBB.insert(I, MI); + return ARMBaseInstrInfo::reMaterialize(MBB, I, DestReg, SubIdx, Orig, TRI); } diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h index 5d1678d..4319577 100644 --- a/lib/Target/ARM/ARMInstrInfo.h +++ b/lib/Target/ARM/ARMInstrInfo.h @@ -35,15 +35,16 @@ public: // Return true if the block does not fall through. bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; + void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + unsigned DestReg, unsigned SubIdx, + const MachineInstr *Orig, + const TargetRegisterInfo *TRI) const; + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). /// const ARMRegisterInfo &getRegisterInfo() const { return RI; } - - void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - unsigned DestReg, unsigned SubIdx, - const MachineInstr *Orig) const; }; } diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index cbe80b4..3fe634e 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -340,9 +340,9 @@ def addrmode5 : Operand<i32>, // addrmode6 := reg with optional writeback // def addrmode6 : Operand<i32>, - ComplexPattern<i32, 3, "SelectAddrMode6", []> { + ComplexPattern<i32, 4, "SelectAddrMode6", []> { let PrintMethod = "printAddrMode6Operand"; - let MIOperandInfo = (ops GPR:$addr, GPR:$upd, i32imm); + let MIOperandInfo = (ops GPR:$addr, GPR:$upd, i32imm, i32imm); } // addrmodepc := pc + reg @@ -377,15 +377,13 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc, PatFrag opnode, def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, IIC_iALUr, opc, "\t$dst, $a, $b", [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> { - let Inst{4} = 0; + let Inst{11-4} = 0b00000000; let Inst{25} = 0; let isCommutable = Commutable; } def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iALUsr, opc, "\t$dst, $a, $b", [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]> { - let Inst{4} = 1; - let Inst{7} = 0; let Inst{25} = 0; } } @@ -396,24 +394,22 @@ let Defs = [CPSR] in { multiclass AI1_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0> { def ri : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, - IIC_iALUi, opc, "s\t$dst, $a, $b", + IIC_iALUi, opc, "\t$dst, $a, $b", [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]> { let Inst{20} = 1; let Inst{25} = 1; } def rr : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, - IIC_iALUr, opc, "s\t$dst, $a, $b", + IIC_iALUr, opc, "\t$dst, $a, $b", [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> { let isCommutable = Commutable; - let Inst{4} = 0; + let Inst{11-4} = 0b00000000; let Inst{20} = 1; let Inst{25} = 0; } def rs : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, - IIC_iALUsr, opc, "s\t$dst, $a, $b", + IIC_iALUsr, opc, "\t$dst, $a, $b", [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]> { - let Inst{4} = 1; - let Inst{7} = 0; let Inst{20} = 1; let Inst{25} = 0; } @@ -435,7 +431,7 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode, def rr : AI1<opcod, (outs), (ins GPR:$a, GPR:$b), DPFrm, IIC_iCMPr, opc, "\t$a, $b", [(opnode GPR:$a, GPR:$b)]> { - let Inst{4} = 0; + let Inst{11-4} = 0b00000000; let Inst{20} = 1; let Inst{25} = 0; let isCommutable = Commutable; @@ -443,8 +439,6 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode, def rs : AI1<opcod, (outs), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iCMPsr, opc, "\t$a, $b", [(opnode GPR:$a, so_reg:$b)]> { - let Inst{4} = 1; - let Inst{7} = 0; let Inst{20} = 1; let Inst{25} = 0; } @@ -501,20 +495,22 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>, Requires<[IsARM, CarryDefIsUnused]> { let isCommutable = Commutable; - let Inst{4} = 0; + let Inst{11-4} = 0b00000000; let Inst{25} = 0; } def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iALUsr, opc, "\t$dst, $a, $b", [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>, Requires<[IsARM, CarryDefIsUnused]> { - let Inst{4} = 1; - let Inst{7} = 0; let Inst{25} = 0; } - // Carry setting variants +} +// Carry setting variants +let Defs = [CPSR] in { +multiclass AI1_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode, + bit Commutable = 0> { def Sri : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), - DPFrm, IIC_iALUi, !strconcat(opc, "s\t$dst, $a, $b"), + DPFrm, IIC_iALUi, !strconcat(opc, "\t$dst, $a, $b"), [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>, Requires<[IsARM, CarryDefIsUsed]> { let Defs = [CPSR]; @@ -522,26 +518,25 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{25} = 1; } def Srr : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - DPFrm, IIC_iALUr, !strconcat(opc, "s\t$dst, $a, $b"), + DPFrm, IIC_iALUr, !strconcat(opc, "\t$dst, $a, $b"), [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>, Requires<[IsARM, CarryDefIsUsed]> { let Defs = [CPSR]; - let Inst{4} = 0; + let Inst{11-4} = 0b00000000; let Inst{20} = 1; let Inst{25} = 0; } def Srs : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), - DPSoRegFrm, IIC_iALUsr, !strconcat(opc, "s\t$dst, $a, $b"), + DPSoRegFrm, IIC_iALUsr, !strconcat(opc, "\t$dst, $a, $b"), [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>, Requires<[IsARM, CarryDefIsUsed]> { let Defs = [CPSR]; - let Inst{4} = 1; - let Inst{7} = 0; let Inst{20} = 1; let Inst{25} = 0; } } } +} //===----------------------------------------------------------------------===// // Instructions @@ -652,6 +647,7 @@ def LEApcrelJT : AXI1<0x0, (outs GPR:$dst), let isReturn = 1, isTerminator = 1, isBarrier = 1 in def BX_RET : AI<(outs), (ins), BrMiscFrm, IIC_Br, "bx", "\tlr", [(ARMretflag)]> { + let Inst{3-0} = 0b1110; let Inst{7-4} = 0b0001; let Inst{19-8} = 0b111111111111; let Inst{27-20} = 0b00010010; @@ -664,6 +660,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { let Inst{7-4} = 0b0001; let Inst{19-8} = 0b111111111111; let Inst{27-20} = 0b00010010; + let Inst{31-28} = 0b1110; } } @@ -673,7 +670,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1, hasExtraDefRegAllocReq = 1 in def LDM_RET : AXI4ld<(outs), (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), - LdStMulFrm, IIC_Br, "ldm${p}${addr:submode}\t$addr, $wb", + LdStMulFrm, IIC_Br, "ldm${addr:submode}${p}\t$addr, $wb", []>; // On non-Darwin platforms R9 is callee-saved. @@ -762,6 +759,7 @@ let isBranch = 1, isTerminator = 1 in { def BR_JTr : JTI<(outs), (ins GPR:$target, jtblock_operand:$jt, i32imm:$id), IIC_Br, "mov\tpc, $target \n$jt", [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]> { + let Inst{15-12} = 0b1111; let Inst{20} = 0; // S Bit let Inst{24-21} = 0b1101; let Inst{27-25} = 0b000; @@ -771,6 +769,7 @@ let isBranch = 1, isTerminator = 1 in { IIC_Br, "ldr\tpc, $target \n$jt", [(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt, imm:$id)]> { + let Inst{15-12} = 0b1111; let Inst{20} = 1; // L bit let Inst{21} = 0; // W bit let Inst{22} = 0; // B bit @@ -782,6 +781,7 @@ let isBranch = 1, isTerminator = 1 in { IIC_Br, "add\tpc, $target, $idx \n$jt", [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt, imm:$id)]> { + let Inst{15-12} = 0b1111; let Inst{20} = 0; // S bit let Inst{24-21} = 0b0100; let Inst{27-25} = 0b000; @@ -813,26 +813,26 @@ def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr, // Loads with zero extension def LDRH : AI3ldh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, - IIC_iLoadr, "ldr", "h\t$dst, $addr", + IIC_iLoadr, "ldrh", "\t$dst, $addr", [(set GPR:$dst, (zextloadi16 addrmode3:$addr))]>; def LDRB : AI2ldb<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, - IIC_iLoadr, "ldr", "b\t$dst, $addr", + IIC_iLoadr, "ldrb", "\t$dst, $addr", [(set GPR:$dst, (zextloadi8 addrmode2:$addr))]>; // Loads with sign extension def LDRSH : AI3ldsh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, - IIC_iLoadr, "ldr", "sh\t$dst, $addr", + IIC_iLoadr, "ldrsh", "\t$dst, $addr", [(set GPR:$dst, (sextloadi16 addrmode3:$addr))]>; def LDRSB : AI3ldsb<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, - IIC_iLoadr, "ldr", "sb\t$dst, $addr", + IIC_iLoadr, "ldrsb", "\t$dst, $addr", [(set GPR:$dst, (sextloadi8 addrmode3:$addr))]>; let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { // Load doubleword def LDRD : AI3ldd<(outs GPR:$dst1, GPR:$dst2), (ins addrmode3:$addr), LdMiscFrm, - IIC_iLoadr, "ldr", "d\t$dst1, $addr", + IIC_iLoadr, "ldrd", "\t$dst1, $addr", []>, Requires<[IsARM, HasV5TE]>; // Indexed loads @@ -846,35 +846,35 @@ def LDR_POST : AI2ldwpo<(outs GPR:$dst, GPR:$base_wb), def LDRH_PRE : AI3ldhpr<(outs GPR:$dst, GPR:$base_wb), (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru, - "ldr", "h\t$dst, $addr!", "$addr.base = $base_wb", []>; + "ldrh", "\t$dst, $addr!", "$addr.base = $base_wb", []>; def LDRH_POST : AI3ldhpo<(outs GPR:$dst, GPR:$base_wb), (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru, - "ldr", "h\t$dst, [$base], $offset", "$base = $base_wb", []>; + "ldrh", "\t$dst, [$base], $offset", "$base = $base_wb", []>; def LDRB_PRE : AI2ldbpr<(outs GPR:$dst, GPR:$base_wb), (ins addrmode2:$addr), LdFrm, IIC_iLoadru, - "ldr", "b\t$dst, $addr!", "$addr.base = $base_wb", []>; + "ldrb", "\t$dst, $addr!", "$addr.base = $base_wb", []>; def LDRB_POST : AI2ldbpo<(outs GPR:$dst, GPR:$base_wb), (ins GPR:$base,am2offset:$offset), LdFrm, IIC_iLoadru, - "ldr", "b\t$dst, [$base], $offset", "$base = $base_wb", []>; + "ldrb", "\t$dst, [$base], $offset", "$base = $base_wb", []>; def LDRSH_PRE : AI3ldshpr<(outs GPR:$dst, GPR:$base_wb), (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru, - "ldr", "sh\t$dst, $addr!", "$addr.base = $base_wb", []>; + "ldrsh", "\t$dst, $addr!", "$addr.base = $base_wb", []>; def LDRSH_POST: AI3ldshpo<(outs GPR:$dst, GPR:$base_wb), (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru, - "ldr", "sh\t$dst, [$base], $offset", "$base = $base_wb", []>; + "ldrsh", "\t$dst, [$base], $offset", "$base = $base_wb", []>; def LDRSB_PRE : AI3ldsbpr<(outs GPR:$dst, GPR:$base_wb), (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru, - "ldr", "sb\t$dst, $addr!", "$addr.base = $base_wb", []>; + "ldrsb", "\t$dst, $addr!", "$addr.base = $base_wb", []>; def LDRSB_POST: AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb), (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru, - "ldr", "sb\t$dst, [$base], $offset", "$base = $base_wb", []>; + "ldrsb", "\t$dst, [$base], $offset", "$base = $base_wb", []>; } // Store @@ -884,18 +884,18 @@ def STR : AI2stw<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer, // Stores with truncate def STRH : AI3sth<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm, IIC_iStorer, - "str", "h\t$src, $addr", + "strh", "\t$src, $addr", [(truncstorei16 GPR:$src, addrmode3:$addr)]>; def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer, - "str", "b\t$src, $addr", + "strb", "\t$src, $addr", [(truncstorei8 GPR:$src, addrmode2:$addr)]>; // Store doubleword let mayStore = 1, hasExtraSrcRegAllocReq = 1 in def STRD : AI3std<(outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr), StMiscFrm, IIC_iStorer, - "str", "d\t$src1, $addr", []>, Requires<[IsARM, HasV5TE]>; + "strd", "\t$src1, $addr", []>, Requires<[IsARM, HasV5TE]>; // Indexed stores def STR_PRE : AI2stwpr<(outs GPR:$base_wb), @@ -915,28 +915,28 @@ def STR_POST : AI2stwpo<(outs GPR:$base_wb), def STRH_PRE : AI3sthpr<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base,am3offset:$offset), StMiscFrm, IIC_iStoreru, - "str", "h\t$src, [$base, $offset]!", "$base = $base_wb", + "strh", "\t$src, [$base, $offset]!", "$base = $base_wb", [(set GPR:$base_wb, (pre_truncsti16 GPR:$src, GPR:$base,am3offset:$offset))]>; def STRH_POST: AI3sthpo<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base,am3offset:$offset), StMiscFrm, IIC_iStoreru, - "str", "h\t$src, [$base], $offset", "$base = $base_wb", + "strh", "\t$src, [$base], $offset", "$base = $base_wb", [(set GPR:$base_wb, (post_truncsti16 GPR:$src, GPR:$base, am3offset:$offset))]>; def STRB_PRE : AI2stbpr<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base,am2offset:$offset), StFrm, IIC_iStoreru, - "str", "b\t$src, [$base, $offset]!", "$base = $base_wb", + "strb", "\t$src, [$base, $offset]!", "$base = $base_wb", [(set GPR:$base_wb, (pre_truncsti8 GPR:$src, GPR:$base, am2offset:$offset))]>; def STRB_POST: AI2stbpo<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base,am2offset:$offset), StFrm, IIC_iStoreru, - "str", "b\t$src, [$base], $offset", "$base = $base_wb", + "strb", "\t$src, [$base], $offset", "$base = $base_wb", [(set GPR:$base_wb, (post_truncsti8 GPR:$src, GPR:$base, am2offset:$offset))]>; @@ -947,13 +947,13 @@ def STRB_POST: AI2stbpo<(outs GPR:$base_wb), let mayLoad = 1, hasExtraDefRegAllocReq = 1 in def LDM : AXI4ld<(outs), (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), - LdStMulFrm, IIC_iLoadm, "ldm${p}${addr:submode}\t$addr, $wb", + LdStMulFrm, IIC_iLoadm, "ldm${addr:submode}${p}\t$addr, $wb", []>; let mayStore = 1, hasExtraSrcRegAllocReq = 1 in def STM : AXI4st<(outs), (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), - LdStMulFrm, IIC_iStorem, "stm${p}${addr:submode}\t$addr, $wb", + LdStMulFrm, IIC_iStorem, "stm${addr:submode}${p}\t$addr, $wb", []>; //===----------------------------------------------------------------------===// @@ -963,15 +963,13 @@ def STM : AXI4st<(outs), let neverHasSideEffects = 1 in def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr, "mov", "\t$dst, $src", []>, UnaryDP { - let Inst{4} = 0; + let Inst{11-4} = 0b00000000; let Inst{25} = 0; } def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm, IIC_iMOVsr, "mov", "\t$dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP { - let Inst{4} = 1; - let Inst{7} = 0; let Inst{25} = 0; } @@ -1016,10 +1014,10 @@ def MOVrx : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, IIC_iMOVsi, let Defs = [CPSR] in { def MOVsrl_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, - IIC_iMOVsi, "mov", "s\t$dst, $src, lsr #1", + IIC_iMOVsi, "movs", "\t$dst, $src, lsr #1", [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP; def MOVsra_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, - IIC_iMOVsi, "mov", "s\t$dst, $src, asr #1", + IIC_iMOVsi, "movs", "\t$dst, $src, asr #1", [(set GPR:$dst, (ARMsra_flag GPR:$src))]>, UnaryDP; } @@ -1095,15 +1093,19 @@ defm SUB : AsI1_bin_irs<0b0010, "sub", BinOpFrag<(sub node:$LHS, node:$RHS)>>; // ADD and SUB with 's' bit set. -defm ADDS : AI1_bin_s_irs<0b0100, "add", - BinOpFrag<(addc node:$LHS, node:$RHS)>>; -defm SUBS : AI1_bin_s_irs<0b0010, "sub", +defm ADDS : AI1_bin_s_irs<0b0100, "adds", + BinOpFrag<(addc node:$LHS, node:$RHS)>, 1>; +defm SUBS : AI1_bin_s_irs<0b0010, "subs", BinOpFrag<(subc node:$LHS, node:$RHS)>>; defm ADC : AI1_adde_sube_irs<0b0101, "adc", BinOpFrag<(adde node:$LHS, node:$RHS)>, 1>; defm SBC : AI1_adde_sube_irs<0b0110, "sbc", BinOpFrag<(sube node:$LHS, node:$RHS)>>; +defm ADCS : AI1_adde_sube_s_irs<0b0101, "adcs", + BinOpFrag<(adde node:$LHS, node:$RHS)>, 1>; +defm SBCS : AI1_adde_sube_s_irs<0b0110, "sbcs", + BinOpFrag<(sube node:$LHS, node:$RHS)>>; // These don't define reg/reg forms, because they are handled above. def RSBri : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, @@ -1115,24 +1117,20 @@ def RSBri : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, def RSBrs : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iALUsr, "rsb", "\t$dst, $a, $b", [(set GPR:$dst, (sub so_reg:$b, GPR:$a))]> { - let Inst{4} = 1; - let Inst{7} = 0; let Inst{25} = 0; } // RSB with 's' bit set. let Defs = [CPSR] in { def RSBSri : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, - IIC_iALUi, "rsb", "s\t$dst, $a, $b", + IIC_iALUi, "rsbs", "\t$dst, $a, $b", [(set GPR:$dst, (subc so_imm:$b, GPR:$a))]> { let Inst{20} = 1; let Inst{25} = 1; } def RSBSrs : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, - IIC_iALUsr, "rsb", "s\t$dst, $a, $b", + IIC_iALUsr, "rsbs", "\t$dst, $a, $b", [(set GPR:$dst, (subc so_reg:$b, GPR:$a))]> { - let Inst{4} = 1; - let Inst{7} = 0; let Inst{20} = 1; let Inst{25} = 0; } @@ -1149,8 +1147,6 @@ def RSCrs : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iALUsr, "rsc", "\t$dst, $a, $b", [(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>, Requires<[IsARM, CarryDefIsUnused]> { - let Inst{4} = 1; - let Inst{7} = 0; let Inst{25} = 0; } } @@ -1168,8 +1164,6 @@ def RSCSrs : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iALUsr, "rscs\t$dst, $a, $b", [(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>, Requires<[IsARM, CarryDefIsUnused]> { - let Inst{4} = 1; - let Inst{7} = 0; let Inst{20} = 1; let Inst{25} = 0; } @@ -1216,14 +1210,11 @@ def BFC : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm), def MVNr : AsI1<0b1111, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr, "mvn", "\t$dst, $src", [(set GPR:$dst, (not GPR:$src))]>, UnaryDP { - let Inst{4} = 0; + let Inst{11-4} = 0b00000000; } def MVNs : AsI1<0b1111, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm, IIC_iMOVsr, "mvn", "\t$dst, $src", - [(set GPR:$dst, (not so_reg:$src))]>, UnaryDP { - let Inst{4} = 1; - let Inst{7} = 0; -} + [(set GPR:$dst, (not so_reg:$src))]>, UnaryDP; let isReMaterializable = 1, isAsCheapAsAMove = 1 in def MVNi : AsI1<0b1111, (outs GPR:$dst), (ins so_imm:$imm), DPFrm, IIC_iMOVi, "mvn", "\t$dst, $imm", @@ -1536,7 +1527,7 @@ def MOVCCr : AI1<0b1101, (outs GPR:$dst), (ins GPR:$false, GPR:$true), DPFrm, IIC_iCMOVr, "mov", "\t$dst, $true", [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $dst">, UnaryDP { - let Inst{4} = 0; + let Inst{11-4} = 0b00000000; let Inst{25} = 0; } @@ -1545,8 +1536,6 @@ def MOVCCs : AI1<0b1101, (outs GPR:$dst), "mov", "\t$dst, $true", [/*(set GPR:$dst, (ARMcmov GPR:$false, so_reg:$true, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $dst">, UnaryDP { - let Inst{4} = 1; - let Inst{7} = 0; let Inst{25} = 0; } diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 25c4acd..e1353b7 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -102,6 +102,19 @@ def addrmode_neonldstm : Operand<i32>, } */ +def h8imm : Operand<i8> { + let PrintMethod = "printHex8ImmOperand"; +} +def h16imm : Operand<i16> { + let PrintMethod = "printHex16ImmOperand"; +} +def h32imm : Operand<i32> { + let PrintMethod = "printHex32ImmOperand"; +} +def h64imm : Operand<i64> { + let PrintMethod = "printHex64ImmOperand"; +} + //===----------------------------------------------------------------------===// // NEON load / store instructions //===----------------------------------------------------------------------===// @@ -133,7 +146,7 @@ def VLDMS : NI<(outs), // Use vldmia to load a Q register as a D register pair. def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr), IIC_fpLoadm, - "vldmia $addr, ${dst:dregpair}", + "vldmia\t$addr, ${dst:dregpair}", [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]> { let Inst{27-25} = 0b110; let Inst{24} = 0; // P bit @@ -145,7 +158,7 @@ def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr), // Use vstmia to store a Q register as a D register pair. def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), IIC_fpStorem, - "vstmia $addr, ${src:dregpair}", + "vstmia\t$addr, ${src:dregpair}", [(store (v2f64 QPR:$src), addrmode4:$addr)]> { let Inst{27-25} = 0b110; let Inst{24} = 0; // P bit @@ -2282,7 +2295,7 @@ def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, // VMOV : Vector Move (Register) -def VMOVD : N3V<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src), +def VMOVDneon: N3V<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src), IIC_VMOVD, "vmov\t$dst, $src", "", []>; def VMOVQ : N3V<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src), IIC_VMOVD, "vmov\t$dst, $src", "", []>; @@ -2325,38 +2338,38 @@ def vmovImm64 : PatLeaf<(build_vector), [{ // be encoded based on the immed values. def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst), - (ins i8imm:$SIMM), IIC_VMOVImm, + (ins h8imm:$SIMM), IIC_VMOVImm, "vmov.i8\t$dst, $SIMM", "", [(set DPR:$dst, (v8i8 vmovImm8:$SIMM))]>; def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst), - (ins i8imm:$SIMM), IIC_VMOVImm, + (ins h8imm:$SIMM), IIC_VMOVImm, "vmov.i8\t$dst, $SIMM", "", [(set QPR:$dst, (v16i8 vmovImm8:$SIMM))]>; def VMOVv4i16 : N1ModImm<1, 0b000, 0b1000, 0, 0, 0, 1, (outs DPR:$dst), - (ins i16imm:$SIMM), IIC_VMOVImm, + (ins h16imm:$SIMM), IIC_VMOVImm, "vmov.i16\t$dst, $SIMM", "", [(set DPR:$dst, (v4i16 vmovImm16:$SIMM))]>; def VMOVv8i16 : N1ModImm<1, 0b000, 0b1000, 0, 1, 0, 1, (outs QPR:$dst), - (ins i16imm:$SIMM), IIC_VMOVImm, + (ins h16imm:$SIMM), IIC_VMOVImm, "vmov.i16\t$dst, $SIMM", "", [(set QPR:$dst, (v8i16 vmovImm16:$SIMM))]>; def VMOVv2i32 : N1ModImm<1, 0b000, 0b0000, 0, 0, 0, 1, (outs DPR:$dst), - (ins i32imm:$SIMM), IIC_VMOVImm, + (ins h32imm:$SIMM), IIC_VMOVImm, "vmov.i32\t$dst, $SIMM", "", [(set DPR:$dst, (v2i32 vmovImm32:$SIMM))]>; def VMOVv4i32 : N1ModImm<1, 0b000, 0b0000, 0, 1, 0, 1, (outs QPR:$dst), - (ins i32imm:$SIMM), IIC_VMOVImm, + (ins h32imm:$SIMM), IIC_VMOVImm, "vmov.i32\t$dst, $SIMM", "", [(set QPR:$dst, (v4i32 vmovImm32:$SIMM))]>; def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst), - (ins i64imm:$SIMM), IIC_VMOVImm, + (ins h64imm:$SIMM), IIC_VMOVImm, "vmov.i64\t$dst, $SIMM", "", [(set DPR:$dst, (v1i64 vmovImm64:$SIMM))]>; def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst), - (ins i64imm:$SIMM), IIC_VMOVImm, + (ins h64imm:$SIMM), IIC_VMOVImm, "vmov.i64\t$dst, $SIMM", "", [(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>; diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 5d02925..2796364 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -740,3 +740,13 @@ def : T1Pat<(i32 thumb_immshifted:$src), def : T1Pat<(i32 imm0_255_comp:$src), (tMVN (tMOVi8 (imm_comp_XFORM imm:$src)))>; + +// Pseudo instruction that combines ldr from constpool and add pc. This should +// be expanded into two instructions late to allow if-conversion and +// scheduling. +let isReMaterializable = 1 in +def tLDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp), + NoItinerary, "@ ldr.n\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", + [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)), + imm:$cp))]>, + Requires<[IsThumb1Only]>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 5bfda37..1bb9bfd 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -1179,3 +1179,13 @@ let isReMaterializable = 1 in def t2MOVi32imm : T2Ix2<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi, "movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}", [(set GPR:$dst, (i32 imm:$src))]>; + +// Pseudo instruction that combines ldr from constpool and add pc. This should +// be expanded into two instructions late to allow if-conversion and +// scheduling. +let isReMaterializable = 1 in +def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp), + NoItinerary, "@ ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", + [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)), + imm:$cp))]>, + Requires<[IsThumb2]>; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 455c33b..ba341f4 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -17,7 +17,7 @@ def SDT_ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>; def SDT_CMPFP0 : SDTypeProfile<0, 1, [SDTCisFP<0>]>; -def SDT_FMDRR : +def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; @@ -28,7 +28,7 @@ def arm_uitof : SDNode<"ARMISD::UITOF", SDT_ITOF>; def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInFlag,SDNPOutFlag]>; def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutFlag]>; def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0",SDT_CMPFP0, [SDNPOutFlag]>; -def arm_fmdrr : SDNode<"ARMISD::FMDRR", SDT_FMDRR>; +def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>; //===----------------------------------------------------------------------===// // Operand Definitions. @@ -55,21 +55,21 @@ def vfp_f64imm : Operand<f64>, // let canFoldAsLoad = 1 in { -def FLDD : ADI5<0b1101, 0b01, (outs DPR:$dst), (ins addrmode5:$addr), - IIC_fpLoad64, "fldd", "\t$dst, $addr", +def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$dst), (ins addrmode5:$addr), + IIC_fpLoad64, "vldr", ".64\t$dst, $addr", [(set DPR:$dst, (load addrmode5:$addr))]>; -def FLDS : ASI5<0b1101, 0b01, (outs SPR:$dst), (ins addrmode5:$addr), - IIC_fpLoad32, "flds", "\t$dst, $addr", +def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$dst), (ins addrmode5:$addr), + IIC_fpLoad32, "vldr", ".32\t$dst, $addr", [(set SPR:$dst, (load addrmode5:$addr))]>; } // canFoldAsLoad -def FSTD : ADI5<0b1101, 0b00, (outs), (ins DPR:$src, addrmode5:$addr), - IIC_fpStore64, "fstd", "\t$src, $addr", +def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$src, addrmode5:$addr), + IIC_fpStore64, "vstr", ".64\t$src, $addr", [(store DPR:$src, addrmode5:$addr)]>; -def FSTS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr), - IIC_fpStore32, "fsts", "\t$src, $addr", +def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr), + IIC_fpStore32, "vstr", ".32\t$src, $addr", [(store SPR:$src, addrmode5:$addr)]>; //===----------------------------------------------------------------------===// @@ -77,32 +77,32 @@ def FSTS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr), // let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { -def FLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, +def VLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, variable_ops), IIC_fpLoadm, - "fldm${addr:submode}d${p}\t${addr:base}, $wb", + "vldm${addr:submode}${p}\t${addr:base}, $wb", []> { let Inst{20} = 1; } -def FLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, +def VLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, variable_ops), IIC_fpLoadm, - "fldm${addr:submode}s${p}\t${addr:base}, $wb", + "vldm${addr:submode}${p}\t${addr:base}, $wb", []> { let Inst{20} = 1; } } // mayLoad, hasExtraDefRegAllocReq let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { -def FSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, +def VSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, variable_ops), IIC_fpStorem, - "fstm${addr:submode}d${p}\t${addr:base}, $wb", + "vstm${addr:submode}${p}\t${addr:base}, $wb", []> { let Inst{20} = 0; } -def FSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, +def VSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, variable_ops), IIC_fpStorem, - "fstm${addr:submode}s${p}\t${addr:base}, $wb", + "vstm${addr:submode}${p}\t${addr:base}, $wb", []> { let Inst{20} = 0; } @@ -114,68 +114,68 @@ def FSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, // FP Binary Operations. // -def FADDD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b), - IIC_fpALU64, "faddd", "\t$dst, $a, $b", +def VADDD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b), + IIC_fpALU64, "vadd", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fadd DPR:$a, DPR:$b))]>; -def FADDS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b), - IIC_fpALU32, "fadds", "\t$dst, $a, $b", +def VADDS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b), + IIC_fpALU32, "vadd", ".f32\t$dst, $a, $b", [(set SPR:$dst, (fadd SPR:$a, SPR:$b))]>; // These are encoded as unary instructions. let Defs = [FPSCR] in { -def FCMPED : ADuI<0b11101011, 0b0100, 0b1100, (outs), (ins DPR:$a, DPR:$b), - IIC_fpCMP64, "fcmped", "\t$a, $b", +def VCMPED : ADuI<0b11101011, 0b0100, 0b1100, (outs), (ins DPR:$a, DPR:$b), + IIC_fpCMP64, "vcmpe", ".f64\t$a, $b", [(arm_cmpfp DPR:$a, DPR:$b)]>; -def FCMPES : ASuI<0b11101011, 0b0100, 0b1100, (outs), (ins SPR:$a, SPR:$b), - IIC_fpCMP32, "fcmpes", "\t$a, $b", +def VCMPES : ASuI<0b11101011, 0b0100, 0b1100, (outs), (ins SPR:$a, SPR:$b), + IIC_fpCMP32, "vcmpe", ".f32\t$a, $b", [(arm_cmpfp SPR:$a, SPR:$b)]>; } -def FDIVD : ADbI<0b11101000, (outs DPR:$dst), (ins DPR:$a, DPR:$b), - IIC_fpDIV64, "fdivd", "\t$dst, $a, $b", +def VDIVD : ADbI<0b11101000, (outs DPR:$dst), (ins DPR:$a, DPR:$b), + IIC_fpDIV64, "vdiv", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fdiv DPR:$a, DPR:$b))]>; -def FDIVS : ASbI<0b11101000, (outs SPR:$dst), (ins SPR:$a, SPR:$b), - IIC_fpDIV32, "fdivs", "\t$dst, $a, $b", +def VDIVS : ASbI<0b11101000, (outs SPR:$dst), (ins SPR:$a, SPR:$b), + IIC_fpDIV32, "vdiv", ".f32\t$dst, $a, $b", [(set SPR:$dst, (fdiv SPR:$a, SPR:$b))]>; -def FMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b), - IIC_fpMUL64, "fmuld", "\t$dst, $a, $b", +def VMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b), + IIC_fpMUL64, "vmul", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fmul DPR:$a, DPR:$b))]>; -def FMULS : ASbIn<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b), - IIC_fpMUL32, "fmuls", "\t$dst, $a, $b", +def VMULS : ASbIn<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b), + IIC_fpMUL32, "vmul", ".f32\t$dst, $a, $b", [(set SPR:$dst, (fmul SPR:$a, SPR:$b))]>; - -def FNMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b), - IIC_fpMUL64, "fnmuld", "\t$dst, $a, $b", + +def VNMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b), + IIC_fpMUL64, "vnmul", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fneg (fmul DPR:$a, DPR:$b)))]> { let Inst{6} = 1; } -def FNMULS : ASbI<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b), - IIC_fpMUL32, "fnmuls", "\t$dst, $a, $b", +def VNMULS : ASbI<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b), + IIC_fpMUL32, "vnmul", ".f32\t$dst, $a, $b", [(set SPR:$dst, (fneg (fmul SPR:$a, SPR:$b)))]> { let Inst{6} = 1; } // Match reassociated forms only if not sign dependent rounding. def : Pat<(fmul (fneg DPR:$a), DPR:$b), - (FNMULD DPR:$a, DPR:$b)>, Requires<[NoHonorSignDependentRounding]>; + (VNMULD DPR:$a, DPR:$b)>, Requires<[NoHonorSignDependentRounding]>; def : Pat<(fmul (fneg SPR:$a), SPR:$b), - (FNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>; + (VNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>; -def FSUBD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b), - IIC_fpALU64, "fsubd", "\t$dst, $a, $b", +def VSUBD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b), + IIC_fpALU64, "vsub", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fsub DPR:$a, DPR:$b))]> { let Inst{6} = 1; } -def FSUBS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b), - IIC_fpALU32, "fsubs", "\t$dst, $a, $b", +def VSUBS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b), + IIC_fpALU32, "vsub", ".f32\t$dst, $a, $b", [(set SPR:$dst, (fsub SPR:$a, SPR:$b))]> { let Inst{6} = 1; } @@ -184,31 +184,31 @@ def FSUBS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b), // FP Unary Operations. // -def FABSD : ADuI<0b11101011, 0b0000, 0b1100, (outs DPR:$dst), (ins DPR:$a), - IIC_fpUNA64, "fabsd", "\t$dst, $a", +def VABSD : ADuI<0b11101011, 0b0000, 0b1100, (outs DPR:$dst), (ins DPR:$a), + IIC_fpUNA64, "vabs", ".f64\t$dst, $a", [(set DPR:$dst, (fabs DPR:$a))]>; -def FABSS : ASuIn<0b11101011, 0b0000, 0b1100, (outs SPR:$dst), (ins SPR:$a), - IIC_fpUNA32, "fabss", "\t$dst, $a", +def VABSS : ASuIn<0b11101011, 0b0000, 0b1100, (outs SPR:$dst), (ins SPR:$a), + IIC_fpUNA32, "vabs", ".f32\t$dst, $a", [(set SPR:$dst, (fabs SPR:$a))]>; let Defs = [FPSCR] in { -def FCMPEZD : ADuI<0b11101011, 0b0101, 0b1100, (outs), (ins DPR:$a), - IIC_fpCMP64, "fcmpezd", "\t$a", +def VCMPEZD : ADuI<0b11101011, 0b0101, 0b1100, (outs), (ins DPR:$a), + IIC_fpCMP64, "vcmpe", ".f64\t$a, #0", [(arm_cmpfp0 DPR:$a)]>; -def FCMPEZS : ASuI<0b11101011, 0b0101, 0b1100, (outs), (ins SPR:$a), - IIC_fpCMP32, "fcmpezs", "\t$a", +def VCMPEZS : ASuI<0b11101011, 0b0101, 0b1100, (outs), (ins SPR:$a), + IIC_fpCMP32, "vcmpe", ".f32\t$a, #0", [(arm_cmpfp0 SPR:$a)]>; } -def FCVTDS : ASuI<0b11101011, 0b0111, 0b1100, (outs DPR:$dst), (ins SPR:$a), - IIC_fpCVTDS, "fcvtds", "\t$dst, $a", +def VCVTDS : ASuI<0b11101011, 0b0111, 0b1100, (outs DPR:$dst), (ins SPR:$a), + IIC_fpCVTDS, "vcvt", ".f64.f32\t$dst, $a", [(set DPR:$dst, (fextend SPR:$a))]>; // Special case encoding: bits 11-8 is 0b1011. -def FCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm, - IIC_fpCVTSD, "fcvtsd", "\t$dst, $a", +def VCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm, + IIC_fpCVTSD, "vcvt", ".f32.f64\t$dst, $a", [(set SPR:$dst, (fround DPR:$a))]> { let Inst{27-23} = 0b11101; let Inst{21-16} = 0b110111; @@ -217,52 +217,52 @@ def FCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm, } let neverHasSideEffects = 1 in { -def FCPYD : ADuI<0b11101011, 0b0000, 0b0100, (outs DPR:$dst), (ins DPR:$a), - IIC_fpUNA64, "fcpyd", "\t$dst, $a", []>; +def VMOVD: ADuI<0b11101011, 0b0000, 0b0100, (outs DPR:$dst), (ins DPR:$a), + IIC_fpUNA64, "vmov", ".f64\t$dst, $a", []>; -def FCPYS : ASuI<0b11101011, 0b0000, 0b0100, (outs SPR:$dst), (ins SPR:$a), - IIC_fpUNA32, "fcpys", "\t$dst, $a", []>; +def VMOVS: ASuI<0b11101011, 0b0000, 0b0100, (outs SPR:$dst), (ins SPR:$a), + IIC_fpUNA32, "vmov", ".f32\t$dst, $a", []>; } // neverHasSideEffects -def FNEGD : ADuI<0b11101011, 0b0001, 0b0100, (outs DPR:$dst), (ins DPR:$a), - IIC_fpUNA64, "fnegd", "\t$dst, $a", +def VNEGD : ADuI<0b11101011, 0b0001, 0b0100, (outs DPR:$dst), (ins DPR:$a), + IIC_fpUNA64, "vneg", ".f64\t$dst, $a", [(set DPR:$dst, (fneg DPR:$a))]>; -def FNEGS : ASuIn<0b11101011, 0b0001, 0b0100, (outs SPR:$dst), (ins SPR:$a), - IIC_fpUNA32, "fnegs", "\t$dst, $a", +def VNEGS : ASuIn<0b11101011, 0b0001, 0b0100, (outs SPR:$dst), (ins SPR:$a), + IIC_fpUNA32, "vneg", ".f32\t$dst, $a", [(set SPR:$dst, (fneg SPR:$a))]>; -def FSQRTD : ADuI<0b11101011, 0b0001, 0b1100, (outs DPR:$dst), (ins DPR:$a), - IIC_fpSQRT64, "fsqrtd", "\t$dst, $a", +def VSQRTD : ADuI<0b11101011, 0b0001, 0b1100, (outs DPR:$dst), (ins DPR:$a), + IIC_fpSQRT64, "vsqrt", ".f64\t$dst, $a", [(set DPR:$dst, (fsqrt DPR:$a))]>; -def FSQRTS : ASuI<0b11101011, 0b0001, 0b1100, (outs SPR:$dst), (ins SPR:$a), - IIC_fpSQRT32, "fsqrts", "\t$dst, $a", +def VSQRTS : ASuI<0b11101011, 0b0001, 0b1100, (outs SPR:$dst), (ins SPR:$a), + IIC_fpSQRT32, "vsqrt", ".f32\t$dst, $a", [(set SPR:$dst, (fsqrt SPR:$a))]>; //===----------------------------------------------------------------------===// // FP <-> GPR Copies. Int <-> FP Conversions. // -def FMRS : AVConv2I<0b11100001, 0b1010, (outs GPR:$dst), (ins SPR:$src), - IIC_VMOVSI, "fmrs", "\t$dst, $src", +def VMOVRS : AVConv2I<0b11100001, 0b1010, (outs GPR:$dst), (ins SPR:$src), + IIC_VMOVSI, "vmov", "\t$dst, $src", [(set GPR:$dst, (bitconvert SPR:$src))]>; -def FMSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src), - IIC_VMOVIS, "fmsr", "\t$dst, $src", +def VMOVSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src), + IIC_VMOVIS, "vmov", "\t$dst, $src", [(set SPR:$dst, (bitconvert GPR:$src))]>; -def FMRRD : AVConv3I<0b11000101, 0b1011, +def VMOVRRD : AVConv3I<0b11000101, 0b1011, (outs GPR:$wb, GPR:$dst2), (ins DPR:$src), - IIC_VMOVDI, "fmrrd", "\t$wb, $dst2, $src", + IIC_VMOVDI, "vmov", "\t$wb, $dst2, $src", [/* FIXME: Can't write pattern for multiple result instr*/]>; // FMDHR: GPR -> SPR // FMDLR: GPR -> SPR -def FMDRR : AVConv5I<0b11000100, 0b1011, +def VMOVDRR : AVConv5I<0b11000100, 0b1011, (outs DPR:$dst), (ins GPR:$src1, GPR:$src2), - IIC_VMOVID, "fmdrr", "\t$dst, $src1, $src2", + IIC_VMOVID, "vmov", "\t$dst, $src1, $src2", [(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]>; // FMRDH: SPR -> GPR @@ -277,53 +277,53 @@ def FMDRR : AVConv5I<0b11000100, 0b1011, // Int to FP: -def FSITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a), - IIC_fpCVTID, "fsitod", "\t$dst, $a", +def VSITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a), + IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a", [(set DPR:$dst, (arm_sitof SPR:$a))]> { let Inst{7} = 1; } -def FSITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a), - IIC_fpCVTIS, "fsitos", "\t$dst, $a", +def VSITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a), + IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a", [(set SPR:$dst, (arm_sitof SPR:$a))]> { let Inst{7} = 1; } -def FUITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a), - IIC_fpCVTID, "fuitod", "\t$dst, $a", +def VUITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a), + IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a", [(set DPR:$dst, (arm_uitof SPR:$a))]>; -def FUITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a), - IIC_fpCVTIS, "fuitos", "\t$dst, $a", +def VUITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a), + IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a", [(set SPR:$dst, (arm_uitof SPR:$a))]>; // FP to Int: // Always set Z bit in the instruction, i.e. "round towards zero" variants. -def FTOSIZD : AVConv1I<0b11101011, 0b1101, 0b1011, +def VTOSIZD : AVConv1I<0b11101011, 0b1101, 0b1011, (outs SPR:$dst), (ins DPR:$a), - IIC_fpCVTDI, "ftosizd", "\t$dst, $a", + IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a", [(set SPR:$dst, (arm_ftosi DPR:$a))]> { let Inst{7} = 1; // Z bit } -def FTOSIZS : AVConv1In<0b11101011, 0b1101, 0b1010, +def VTOSIZS : AVConv1In<0b11101011, 0b1101, 0b1010, (outs SPR:$dst), (ins SPR:$a), - IIC_fpCVTSI, "ftosizs", "\t$dst, $a", + IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a", [(set SPR:$dst, (arm_ftosi SPR:$a))]> { let Inst{7} = 1; // Z bit } -def FTOUIZD : AVConv1I<0b11101011, 0b1100, 0b1011, +def VTOUIZD : AVConv1I<0b11101011, 0b1100, 0b1011, (outs SPR:$dst), (ins DPR:$a), - IIC_fpCVTDI, "ftouizd", "\t$dst, $a", + IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a", [(set SPR:$dst, (arm_ftoui DPR:$a))]> { let Inst{7} = 1; // Z bit } -def FTOUIZS : AVConv1In<0b11101011, 0b1100, 0b1010, +def VTOUIZS : AVConv1In<0b11101011, 0b1100, 0b1010, (outs SPR:$dst), (ins SPR:$a), - IIC_fpCVTSI, "ftouizs", "\t$dst, $a", + IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a", [(set SPR:$dst, (arm_ftoui SPR:$a))]> { let Inst{7} = 1; // Z bit } @@ -332,54 +332,54 @@ def FTOUIZS : AVConv1In<0b11101011, 0b1100, 0b1010, // FP FMA Operations. // -def FMACD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), - IIC_fpMAC64, "fmacd", "\t$dst, $a, $b", +def VMLAD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), + IIC_fpMAC64, "vmla", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b), DPR:$dstin))]>, RegConstraint<"$dstin = $dst">; -def FMACS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), - IIC_fpMAC32, "fmacs", "\t$dst, $a, $b", +def VMLAS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), + IIC_fpMAC32, "vmla", ".f32\t$dst, $a, $b", [(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>, RegConstraint<"$dstin = $dst">; -def FMSCD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), - IIC_fpMAC64, "fmscd", "\t$dst, $a, $b", +def VNMLSD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), + IIC_fpMAC64, "vnmls", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fsub (fmul DPR:$a, DPR:$b), DPR:$dstin))]>, RegConstraint<"$dstin = $dst">; -def FMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), - IIC_fpMAC32, "fmscs", "\t$dst, $a, $b", +def VNMLSS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), + IIC_fpMAC32, "vnmls", ".f32\t$dst, $a, $b", [(set SPR:$dst, (fsub (fmul SPR:$a, SPR:$b), SPR:$dstin))]>, RegConstraint<"$dstin = $dst">; -def FNMACD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), - IIC_fpMAC64, "fnmacd", "\t$dst, $a, $b", +def VMLSD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), + IIC_fpMAC64, "vmls", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fadd (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>, RegConstraint<"$dstin = $dst"> { let Inst{6} = 1; } -def FNMACS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), - IIC_fpMAC32, "fnmacs", "\t$dst, $a, $b", +def VMLSS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), + IIC_fpMAC32, "vmls", ".f32\t$dst, $a, $b", [(set SPR:$dst, (fadd (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>, RegConstraint<"$dstin = $dst"> { let Inst{6} = 1; } def : Pat<(fsub DPR:$dstin, (fmul DPR:$a, DPR:$b)), - (FNMACD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>; + (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>; def : Pat<(fsub SPR:$dstin, (fmul SPR:$a, SPR:$b)), - (FNMACS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>; + (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>; -def FNMSCD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), - IIC_fpMAC64, "fnmscd", "\t$dst, $a, $b", +def VNMLAD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), + IIC_fpMAC64, "vnmla", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fsub (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>, RegConstraint<"$dstin = $dst"> { let Inst{6} = 1; } -def FNMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), - IIC_fpMAC32, "fnmscs", "\t$dst, $a, $b", +def VNMLAS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), + IIC_fpMAC32, "vnmla", ".f32\t$dst, $a, $b", [(set SPR:$dst, (fsub (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>, RegConstraint<"$dstin = $dst"> { let Inst{6} = 1; @@ -389,27 +389,27 @@ def FNMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), // FP Conditional moves. // -def FCPYDcc : ADuI<0b11101011, 0b0000, 0b0100, +def VMOVDcc : ADuI<0b11101011, 0b0000, 0b0100, (outs DPR:$dst), (ins DPR:$false, DPR:$true), - IIC_fpUNA64, "fcpyd", "\t$dst, $true", + IIC_fpUNA64, "vmov", ".f64\t$dst, $true", [/*(set DPR:$dst, (ARMcmov DPR:$false, DPR:$true, imm:$cc))*/]>, RegConstraint<"$false = $dst">; -def FCPYScc : ASuI<0b11101011, 0b0000, 0b0100, +def VMOVScc : ASuI<0b11101011, 0b0000, 0b0100, (outs SPR:$dst), (ins SPR:$false, SPR:$true), - IIC_fpUNA32, "fcpys", "\t$dst, $true", + IIC_fpUNA32, "vmov", ".f32\t$dst, $true", [/*(set SPR:$dst, (ARMcmov SPR:$false, SPR:$true, imm:$cc))*/]>, RegConstraint<"$false = $dst">; -def FNEGDcc : ADuI<0b11101011, 0b0001, 0b0100, +def VNEGDcc : ADuI<0b11101011, 0b0001, 0b0100, (outs DPR:$dst), (ins DPR:$false, DPR:$true), - IIC_fpUNA64, "fnegd", "\t$dst, $true", + IIC_fpUNA64, "vneg", ".f64\t$dst, $true", [/*(set DPR:$dst, (ARMcneg DPR:$false, DPR:$true, imm:$cc))*/]>, RegConstraint<"$false = $dst">; -def FNEGScc : ASuI<0b11101011, 0b0001, 0b0100, +def VNEGScc : ASuI<0b11101011, 0b0001, 0b0100, (outs SPR:$dst), (ins SPR:$false, SPR:$true), - IIC_fpUNA32, "fnegs", "\t$dst, $true", + IIC_fpUNA32, "vneg", ".f32\t$dst, $true", [/*(set SPR:$dst, (ARMcneg SPR:$false, SPR:$true, imm:$cc))*/]>, RegConstraint<"$false = $dst">; @@ -418,8 +418,11 @@ def FNEGScc : ASuI<0b11101011, 0b0001, 0b0100, // Misc. // +// APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags +// to APSR. let Defs = [CPSR], Uses = [FPSCR] in -def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "fmstat", "", +def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs", + "\tapsr_nzcv, fpscr", [(arm_fmstat)]> { let Inst{27-20} = 0b11101111; let Inst{19-16} = 0b0001; @@ -431,26 +434,26 @@ def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "fmstat", "", // Materialize FP immediates. VFP3 only. -let isReMaterializable = 1 in -def FCONSTS : VFPAI<(outs SPR:$dst), (ins vfp_f32imm:$imm), +let isReMaterializable = 1 in { +def FCONSTD : VFPAI<(outs DPR:$dst), (ins vfp_f64imm:$imm), VFPMiscFrm, IIC_VMOVImm, - "fconsts", "\t$dst, $imm", - [(set SPR:$dst, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> { + "fconstd", "\t$dst, $imm", + [(set DPR:$dst, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> { let Inst{27-23} = 0b11101; let Inst{21-20} = 0b11; let Inst{11-9} = 0b101; - let Inst{8} = 0; + let Inst{8} = 1; let Inst{7-4} = 0b0000; } -let isReMaterializable = 1 in -def FCONSTD : VFPAI<(outs DPR:$dst), (ins vfp_f64imm:$imm), +def FCONSTS : VFPAI<(outs SPR:$dst), (ins vfp_f32imm:$imm), VFPMiscFrm, IIC_VMOVImm, - "fconstd", "\t$dst, $imm", - [(set DPR:$dst, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> { + "fconsts", "\t$dst, $imm", + [(set SPR:$dst, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> { let Inst{27-23} = 0b11101; let Inst{21-20} = 0b11; let Inst{11-9} = 0b101; - let Inst{8} = 1; + let Inst{8} = 0; let Inst{7-4} = 0b0000; } +} diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 7e1783b..304d0ef 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -41,8 +41,8 @@ using namespace llvm; STATISTIC(NumLDMGened , "Number of ldm instructions generated"); STATISTIC(NumSTMGened , "Number of stm instructions generated"); -STATISTIC(NumFLDMGened, "Number of fldm instructions generated"); -STATISTIC(NumFSTMGened, "Number of fstm instructions generated"); +STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); +STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); @@ -127,18 +127,18 @@ static int getLoadStoreMultipleOpcode(int Opcode) { case ARM::t2STRi12: NumSTMGened++; return ARM::t2STM; - case ARM::FLDS: - NumFLDMGened++; - return ARM::FLDMS; - case ARM::FSTS: - NumFSTMGened++; - return ARM::FSTMS; - case ARM::FLDD: - NumFLDMGened++; - return ARM::FLDMD; - case ARM::FSTD: - NumFSTMGened++; - return ARM::FSTMD; + case ARM::VLDRS: + NumVLDMGened++; + return ARM::VLDMS; + case ARM::VSTRS: + NumVSTMGened++; + return ARM::VSTMS; + case ARM::VLDRD: + NumVLDMGened++; + return ARM::VLDMD; + case ARM::VSTRD: + NumVSTMGened++; + return ARM::VSTMD; default: llvm_unreachable("Unhandled opcode!"); } return 0; @@ -229,8 +229,8 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, BaseKill = true; // New base is always killed right its use. } - bool isDPR = Opcode == ARM::FLDD || Opcode == ARM::FSTD; - bool isDef = isi32Load(Opcode) || Opcode == ARM::FLDS || Opcode == ARM::FLDD; + bool isDPR = Opcode == ARM::VLDRD || Opcode == ARM::VSTRD; + bool isDef = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; Opcode = getLoadStoreMultipleOpcode(Opcode); MachineInstrBuilder MIB = (isAM4) ? BuildMI(MBB, MBBI, dl, TII->get(Opcode)) @@ -373,27 +373,27 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { case ARM::t2LDRi12: case ARM::t2STRi8: case ARM::t2STRi12: - case ARM::FLDS: - case ARM::FSTS: + case ARM::VLDRS: + case ARM::VSTRS: return 4; - case ARM::FLDD: - case ARM::FSTD: + case ARM::VLDRD: + case ARM::VSTRD: return 8; case ARM::LDM: case ARM::STM: case ARM::t2LDM: case ARM::t2STM: return (MI->getNumOperands() - 5) * 4; - case ARM::FLDMS: - case ARM::FSTMS: - case ARM::FLDMD: - case ARM::FSTMD: + case ARM::VLDMS: + case ARM::VSTMS: + case ARM::VLDMD: + case ARM::VSTMD: return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4; } } /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base -/// register into the LDM/STM/FLDM{D|S}/FSTM{D|S} op when possible: +/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: /// /// stmia rn, <ra, rb, rc> /// rn := rn + 4 * 3; @@ -475,7 +475,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, } } } else { - // FLDM{D|S}, FSTM{D|S} addressing mode 5 ops. + // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops. if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm())) return false; @@ -517,10 +517,10 @@ static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) { switch (Opc) { case ARM::LDR: return ARM::LDR_PRE; case ARM::STR: return ARM::STR_PRE; - case ARM::FLDS: return ARM::FLDMS; - case ARM::FLDD: return ARM::FLDMD; - case ARM::FSTS: return ARM::FSTMS; - case ARM::FSTD: return ARM::FSTMD; + case ARM::VLDRS: return ARM::VLDMS; + case ARM::VLDRD: return ARM::VLDMD; + case ARM::VSTRS: return ARM::VSTMS; + case ARM::VSTRD: return ARM::VSTMD; case ARM::t2LDRi8: case ARM::t2LDRi12: return ARM::t2LDR_PRE; @@ -536,10 +536,10 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) { switch (Opc) { case ARM::LDR: return ARM::LDR_POST; case ARM::STR: return ARM::STR_POST; - case ARM::FLDS: return ARM::FLDMS; - case ARM::FLDD: return ARM::FLDMD; - case ARM::FSTS: return ARM::FSTMS; - case ARM::FSTD: return ARM::FSTMD; + case ARM::VLDRS: return ARM::VLDMS; + case ARM::VLDRD: return ARM::VLDMD; + case ARM::VSTRS: return ARM::VSTMS; + case ARM::VSTRD: return ARM::VSTMD; case ARM::t2LDRi8: case ARM::t2LDRi12: return ARM::t2LDR_POST; @@ -564,8 +564,8 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, unsigned Bytes = getLSMultipleTransferSize(MI); int Opcode = MI->getOpcode(); DebugLoc dl = MI->getDebugLoc(); - bool isAM5 = Opcode == ARM::FLDD || Opcode == ARM::FLDS || - Opcode == ARM::FSTD || Opcode == ARM::FSTS; + bool isAM5 = Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || + Opcode == ARM::VSTRD || Opcode == ARM::VSTRS; bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0) return false; @@ -575,7 +575,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, if (MI->getOperand(2).getImm() != 0) return false; - bool isLd = isi32Load(Opcode) || Opcode == ARM::FLDS || Opcode == ARM::FLDD; + bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; // Can't do the merge if the destination register is the same as the would-be // writeback register. if (isLd && MI->getOperand(0).getReg() == Base) @@ -626,7 +626,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, if (!DoMerge) return false; - bool isDPR = NewOpc == ARM::FLDMD || NewOpc == ARM::FSTMD; + bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD; unsigned Offset = 0; if (isAM5) Offset = ARM_AM::getAM5Opc((AddSub == ARM_AM::sub) @@ -638,7 +638,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; if (isLd) { if (isAM5) - // FLDMS, FLDMD + // VLDMS, VLDMD BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) .addReg(Base, getKillRegState(BaseKill)) .addImm(Offset).addImm(Pred).addReg(PredReg) @@ -657,7 +657,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, } else { MachineOperand &MO = MI->getOperand(0); if (isAM5) - // FSTMS, FSTMD + // VSTMS, VSTMD BuildMI(MBB, MBBI, dl, TII->get(NewOpc)).addReg(Base).addImm(Offset) .addImm(Pred).addReg(PredReg) .addReg(Base, getDefRegState(true)) // WB base register @@ -687,11 +687,11 @@ static bool isMemoryOp(const MachineInstr *MI) { case ARM::LDR: case ARM::STR: return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0; - case ARM::FLDS: - case ARM::FSTS: + case ARM::VLDRS: + case ARM::VSTRS: return MI->getOperand(1).isReg(); - case ARM::FLDD: - case ARM::FSTD: + case ARM::VLDRD: + case ARM::VSTRD: return MI->getOperand(1).isReg(); case ARM::t2LDRi8: case ARM::t2LDRi12: @@ -866,6 +866,13 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef, Pred, PredReg, TII, isT2); } else { + if (OddReg == EvenReg && EvenDeadKill) { + // If the two source operands are the same, the kill marker is probably + // on the first one. e.g. + // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 + EvenDeadKill = false; + OddDeadKill = true; + } InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill, EvenUndef, BaseReg, false, BaseUndef, OffReg, false, OffUndef, @@ -1214,7 +1221,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, if (!STI->hasV5TEOps()) return false; - // FIXME: FLDS / FSTS -> FLDD / FSTD + // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD unsigned Scale = 1; unsigned Opcode = Op0->getOpcode(); if (Opcode == ARM::LDR) @@ -1456,7 +1463,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { continue; int Opc = MI->getOpcode(); - bool isLd = isi32Load(Opc) || Opc == ARM::FLDS || Opc == ARM::FLDD; + bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; unsigned Base = MI->getOperand(1).getReg(); int Offset = getMemoryOpOffset(MI); diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 5af95c3..432ed78 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -16,6 +16,7 @@ #include "llvm/GlobalValue.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" +#include "llvm/ADT/SmallVector.h" using namespace llvm; static cl::opt<bool> @@ -108,6 +109,8 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, if (UseNEONFP.getPosition() == 0) UseNEONForSinglePrecisionFP = true; } + HasBranchTargetBuffer = (CPUString == "cortex-a8" || + CPUString == "cortex-a9"); } /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol. @@ -159,3 +162,13 @@ ARMSubtarget::GVIsIndirectSymbol(GlobalValue *GV, Reloc::Model RelocM) const { return false; } + +bool ARMSubtarget::enablePostRAScheduler( + CodeGenOpt::Level OptLevel, + TargetSubtarget::AntiDepBreakMode& Mode, + RegClassVector& CriticalPathRCs) const { + Mode = TargetSubtarget::ANTIDEP_CRITICAL; + CriticalPathRCs.clear(); + CriticalPathRCs.push_back(&ARM::GPRRegClass); + return PostRAScheduler && OptLevel >= CodeGenOpt::Default; +} diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index e721a7f..3d0e01e 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -17,6 +17,7 @@ #include "llvm/Target/TargetInstrItineraries.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSubtarget.h" +#include "ARMBaseRegisterInfo.h" #include <string> namespace llvm { @@ -49,6 +50,9 @@ protected: /// determine if NEON should actually be used. bool UseNEONForSinglePrecisionFP; + /// HasBranchTargetBuffer - True if processor can predict indirect branches. + bool HasBranchTargetBuffer; + /// IsThumb - True if we are in thumb mode, false if in ARM mode. bool IsThumb; @@ -122,17 +126,16 @@ protected: bool isThumb2() const { return IsThumb && (ThumbMode == Thumb2); } bool hasThumb2() const { return ThumbMode >= Thumb2; } + bool hasBranchTargetBuffer() const { return HasBranchTargetBuffer; } + bool isR9Reserved() const { return IsR9Reserved; } const std::string & getCPUString() const { return CPUString; } - /// enablePostRAScheduler - True at 'More' optimization except - /// for Thumb1. + /// enablePostRAScheduler - True at 'More' optimization. bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, - TargetSubtarget::AntiDepBreakMode& mode) const { - mode = TargetSubtarget::ANTIDEP_CRITICAL; - return PostRAScheduler && OptLevel >= CodeGenOpt::Default; - } + TargetSubtarget::AntiDepBreakMode& Mode, + RegClassVector& CriticalPathRCs) const; /// getInstrItins - Return the instruction itineraies based on subtarget /// selection. diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index b4ce1d7..2564ed9 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -21,8 +21,7 @@ #include "llvm/Target/TargetRegistry.h" using namespace llvm; -static const MCAsmInfo *createMCAsmInfo(const Target &T, - const StringRef &TT) { +static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); switch (TheTriple.getOS()) { case Triple::Darwin: @@ -61,8 +60,8 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT, const std::string &FS) : ARMBaseTargetMachine(T, TT, FS, false), InstrInfo(Subtarget), DataLayout(Subtarget.isAPCS_ABI() ? - std::string("e-p:32:32-f64:32:32-i64:32:32") : - std::string("e-p:32:32-f64:64:64-i64:64:64")), + std::string("e-p:32:32-f64:32:32-i64:32:32-n32") : + std::string("e-p:32:32-f64:64:64-i64:64:64-n32")), TLInfo(*this) { } @@ -74,9 +73,9 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT, : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))), DataLayout(Subtarget.isAPCS_ABI() ? std::string("e-p:32:32-f64:32:32-i64:32:32-" - "i16:16:32-i8:8:32-i1:8:32-a:0:32") : + "i16:16:32-i8:8:32-i1:8:32-a:0:32-n32") : std::string("e-p:32:32-f64:64:64-i64:64:64-" - "i16:16:32-i8:8:32-i1:8:32-a:0:32")), + "i16:16:32-i8:8:32-i1:8:32-a:0:32-n32")), TLInfo(*this) { } @@ -94,6 +93,10 @@ bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM, if (Subtarget.hasNEON()) PM.add(createNEONPreAllocPass()); + // Calculate and set max stack object alignment early, so we can decide + // whether we will need stack realignment (and thus FP). + PM.add(createARMMaxStackAlignmentCalculatorPass()); + // FIXME: temporarily disabling load / store optimization pass for Thumb1. if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only()) PM.add(createARMLoadStoreOptimizationPass(true)); @@ -106,6 +109,10 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM, if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only()) PM.add(createARMLoadStoreOptimizationPass()); + // Expand some pseudo instructions into multiple instructions to allow + // proper scheduling. + PM.add(createARMExpandPseudoPass()); + return true; } diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index 6cb3e9e4..0352503 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -43,6 +43,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSet.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" @@ -138,6 +139,19 @@ namespace { void printVFPf32ImmOperand(const MachineInstr *MI, int OpNum); void printVFPf64ImmOperand(const MachineInstr *MI, int OpNum); + void printHex8ImmOperand(const MachineInstr *MI, int OpNum) { + O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xff); + } + void printHex16ImmOperand(const MachineInstr *MI, int OpNum) { + O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffff); + } + void printHex32ImmOperand(const MachineInstr *MI, int OpNum) { + O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffffffff); + } + void printHex64ImmOperand(const MachineInstr *MI, int OpNum) { + O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm()); + } + virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, unsigned AsmVariant, const char *ExtraCode); virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum, @@ -199,7 +213,7 @@ namespace { if (ACPV->hasModifier()) O << "(" << ACPV->getModifier() << ")"; if (ACPV->getPCAdjustment() != 0) { O << "-(" << MAI->getPrivateGlobalPrefix() << "PC" - << ACPV->getLabelId() + << getFunctionNumber() << "_" << ACPV->getLabelId() << "+" << (unsigned)ACPV->getPCAdjustment(); if (ACPV->mustAddCurrentAddress()) O << "-."; @@ -333,6 +347,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, &ARM::DPR_VFP2RegClass); O << getRegisterName(DReg) << '[' << (RegNum & 1) << ']'; } else { + assert(!MO.getSubReg() && "Subregs should be eliminated!"); O << getRegisterName(Reg); } break; @@ -594,12 +609,7 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op, if (Modifier && strcmp(Modifier, "submode") == 0) { ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm()); - if (MO1.getReg() == ARM::SP) { - bool isFLDM = (MI->getOpcode() == ARM::FLDMD || - MI->getOpcode() == ARM::FLDMS); - O << ARM_AM::getAMSubModeAltStr(Mode, isFLDM); - } else - O << ARM_AM::getAMSubModeStr(Mode); + O << ARM_AM::getAMSubModeStr(Mode); return; } else if (Modifier && strcmp(Modifier, "base") == 0) { // Used for FSTM{D|S} and LSTM{D|S} operations. @@ -623,9 +633,14 @@ void ARMAsmPrinter::printAddrMode6Operand(const MachineInstr *MI, int Op) { const MachineOperand &MO1 = MI->getOperand(Op); const MachineOperand &MO2 = MI->getOperand(Op+1); const MachineOperand &MO3 = MI->getOperand(Op+2); + const MachineOperand &MO4 = MI->getOperand(Op+3); - // FIXME: No support yet for specifying alignment. - O << "[" << getRegisterName(MO1.getReg()) << "]"; + O << "[" << getRegisterName(MO1.getReg()); + if (MO4.getImm()) { + // FIXME: Both darwin as and GNU as violate ARM docs here. + O << ", :" << MO4.getImm(); + } + O << "]"; if (ARM_AM::getAM6WBFlag(MO3.getImm())) { if (MO2.getReg() == 0) @@ -697,11 +712,8 @@ ARMAsmPrinter::printThumbAddrModeRI5Operand(const MachineInstr *MI, int Op, O << "[" << getRegisterName(MO1.getReg()); if (MO3.getReg()) O << ", " << getRegisterName(MO3.getReg()); - else if (unsigned ImmOffs = MO2.getImm()) { - O << ", #" << ImmOffs; - if (Scale > 1) - O << " * " << Scale; - } + else if (unsigned ImmOffs = MO2.getImm()) + O << ", #" << ImmOffs * Scale; O << "]"; } @@ -844,7 +856,8 @@ void ARMAsmPrinter::printSBitModifierOperand(const MachineInstr *MI, int OpNum){ void ARMAsmPrinter::printPCLabel(const MachineInstr *MI, int OpNum) { int Id = (int)MI->getOperand(OpNum).getImm(); - O << MAI->getPrivateGlobalPrefix() << "PC" << Id; + O << MAI->getPrivateGlobalPrefix() + << "PC" << getFunctionNumber() << "_" << Id; } void ARMAsmPrinter::printRegisterList(const MachineInstr *MI, int OpNum) { @@ -1070,7 +1083,7 @@ void ARMAsmPrinter::printMachineInstruction(const MachineInstr *MI) { printInstruction(MI); } - if (VerboseAsm && !MI->getDebugLoc().isUnknown()) + if (VerboseAsm) EmitComments(*MI); O << '\n'; processDebugLoc(MI, false); @@ -1107,9 +1120,8 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { } } - // Use unified assembler syntax mode for Thumb. - if (Subtarget->isThumb()) - O << "\t.syntax unified\n"; + // Use unified assembler syntax. + O << "\t.syntax unified\n"; // Emit ARM Build Attributes if (Subtarget->isTargetELF()) { @@ -1349,7 +1361,6 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { printKill(MI); return; case TargetInstrInfo::INLINEASM: - O << '\t'; printInlineAsm(MI); return; case TargetInstrInfo::IMPLICIT_DEF: @@ -1365,7 +1376,8 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { // FIXME: MOVE TO SHARED PLACE. unsigned Id = (unsigned)MI->getOperand(2).getImm(); const char *Prefix = MAI->getPrivateGlobalPrefix(); - MCSymbol *Label =OutContext.GetOrCreateSymbol(Twine(Prefix)+"PC"+Twine(Id)); + MCSymbol *Label =OutContext.GetOrCreateSymbol(Twine(Prefix) + + "PC" + Twine(getFunctionNumber()) + "_" + Twine(Id)); OutStreamer.EmitLabel(Label); diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp index f422798..0047925 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp @@ -259,12 +259,7 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, if (Modifier && strcmp(Modifier, "submode") == 0) { ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm()); - if (MO1.getReg() == ARM::SP) { - bool isFLDM = (MI->getOpcode() == ARM::FLDMD || - MI->getOpcode() == ARM::FLDMS); - O << ARM_AM::getAMSubModeAltStr(Mode, isFLDM); - } else - O << ARM_AM::getAMSubModeStr(Mode); + O << ARM_AM::getAMSubModeStr(Mode); return; } else if (Modifier && strcmp(Modifier, "base") == 0) { // Used for FSTM{D|S} and LSTM{D|S} operations. diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h index 5bf966b..9e7f8d5 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h @@ -80,6 +80,10 @@ public: void printNoHashImmediate(const MCInst *MI, unsigned OpNum); void printVFPf32ImmOperand(const MCInst *MI, int OpNum) {} void printVFPf64ImmOperand(const MCInst *MI, int OpNum) {} + void printHex8ImmOperand(const MCInst *MI, int OpNum) {} + void printHex16ImmOperand(const MCInst *MI, int OpNum) {} + void printHex32ImmOperand(const MCInst *MI, int OpNum) {} + void printHex64ImmOperand(const MCInst *MI, int OpNum) {} void printPCLabel(const MCInst *MI, unsigned OpNum); // FIXME: Implement. diff --git a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp index 8686961..c49fee3 100644 --- a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp @@ -137,6 +137,7 @@ void ARMMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { case MachineOperand::MO_Register: // Ignore all implicit register operands. if (MO.isImplicit()) continue; + assert(!MO.getSubReg() && "Subregs should be eliminated!"); MCOp = MCOperand::CreateReg(MO.getReg()); break; case MachineOperand::MO_Immediate: diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index e071b61..964551f 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -17,6 +17,7 @@ add_llvm_target(ARMCodeGen ARMCodeEmitter.cpp ARMConstantIslandPass.cpp ARMConstantPoolValue.cpp + ARMExpandPseudoInsts.cpp ARMISelDAGToDAG.cpp ARMISelLowering.cpp ARMInstrInfo.cpp diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp index f307e3b..7d767ec 100644 --- a/lib/Target/ARM/NEONMoveFix.cpp +++ b/lib/Target/ARM/NEONMoveFix.cpp @@ -54,10 +54,10 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) { NextMII = next(MII); MachineInstr *MI = &*MII; - if (MI->getOpcode() == ARM::FCPYD && + if (MI->getOpcode() == ARM::VMOVD && !TII->isPredicated(MI)) { unsigned SrcReg = MI->getOperand(1).getReg(); - // If we do not found an instruction defining the reg, this means the + // If we do not find an instruction defining the reg, this means the // register should be live-in for this BB. It's always to better to use // NEON reg-reg moves. unsigned Domain = ARMII::DomainNEON; @@ -71,7 +71,7 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) { } if (Domain & ARMII::DomainNEON) { - // Convert FCPYD to VMOVD. + // Convert VMOVD to VMOVDneon unsigned DestReg = MI->getOperand(0).getReg(); DEBUG({errs() << "vmov convert: "; MI->dump();}); @@ -82,7 +82,7 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) { // - The imp-defs / imp-uses are superregs only, we don't care about // them. BuildMI(MBB, *MI, MI->getDebugLoc(), - TII->get(ARM::VMOVD), DestReg).addReg(SrcReg); + TII->get(ARM::VMOVDneon), DestReg).addReg(SrcReg); MBB.erase(MI); MachineBasicBlock::iterator I = prior(NextMII); MI = &*I; diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp index 8b2bcd0..206677b 100644 --- a/lib/Target/ARM/NEONPreAllocPass.cpp +++ b/lib/Target/ARM/NEONPreAllocPass.cpp @@ -177,20 +177,20 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST2LNd8: case ARM::VST2LNd16: case ARM::VST2LNd32: - FirstOpnd = 3; + FirstOpnd = 4; NumRegs = 2; return true; case ARM::VST2q8: case ARM::VST2q16: case ARM::VST2q32: - FirstOpnd = 3; + FirstOpnd = 4; NumRegs = 4; return true; case ARM::VST2LNq16a: case ARM::VST2LNq32a: - FirstOpnd = 3; + FirstOpnd = 4; NumRegs = 2; Offset = 0; Stride = 2; @@ -198,7 +198,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST2LNq16b: case ARM::VST2LNq32b: - FirstOpnd = 3; + FirstOpnd = 4; NumRegs = 2; Offset = 1; Stride = 2; @@ -211,14 +211,14 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST3LNd8: case ARM::VST3LNd16: case ARM::VST3LNd32: - FirstOpnd = 3; + FirstOpnd = 4; NumRegs = 3; return true; case ARM::VST3q8a: case ARM::VST3q16a: case ARM::VST3q32a: - FirstOpnd = 4; + FirstOpnd = 5; NumRegs = 3; Offset = 0; Stride = 2; @@ -227,7 +227,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST3q8b: case ARM::VST3q16b: case ARM::VST3q32b: - FirstOpnd = 4; + FirstOpnd = 5; NumRegs = 3; Offset = 1; Stride = 2; @@ -235,7 +235,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST3LNq16a: case ARM::VST3LNq32a: - FirstOpnd = 3; + FirstOpnd = 4; NumRegs = 3; Offset = 0; Stride = 2; @@ -243,7 +243,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST3LNq16b: case ARM::VST3LNq32b: - FirstOpnd = 3; + FirstOpnd = 4; NumRegs = 3; Offset = 1; Stride = 2; @@ -256,14 +256,14 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST4LNd8: case ARM::VST4LNd16: case ARM::VST4LNd32: - FirstOpnd = 3; + FirstOpnd = 4; NumRegs = 4; return true; case ARM::VST4q8a: case ARM::VST4q16a: case ARM::VST4q32a: - FirstOpnd = 4; + FirstOpnd = 5; NumRegs = 4; Offset = 0; Stride = 2; @@ -272,7 +272,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST4q8b: case ARM::VST4q16b: case ARM::VST4q32b: - FirstOpnd = 4; + FirstOpnd = 5; NumRegs = 4; Offset = 1; Stride = 2; @@ -280,7 +280,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST4LNq16a: case ARM::VST4LNq32a: - FirstOpnd = 3; + FirstOpnd = 4; NumRegs = 4; Offset = 0; Stride = 2; @@ -288,7 +288,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST4LNq16b: case ARM::VST4LNq32b: - FirstOpnd = 3; + FirstOpnd = 4; NumRegs = 4; Offset = 1; Stride = 2; diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt index e7770b2..6b605bb 100644 --- a/lib/Target/ARM/README-Thumb.txt +++ b/lib/Target/ARM/README-Thumb.txt @@ -37,7 +37,7 @@ LPCRELL0: mov r1, #PCRELV0 add r1, pc ldr r0, [r0, r1] - cpy pc, r0 + mov pc, r0 .align 2 LJTI1_0_0: .long LBB1_3 @@ -51,7 +51,7 @@ We should be able to generate: LPCRELL0: add r1, LJTI1_0_0 ldr r0, [r0, r1] - cpy pc, r0 + mov pc, r0 .align 2 LJTI1_0_0: .long LBB1_3 @@ -206,8 +206,8 @@ LPC0: add r5, pc ldr r6, LCPI1_1 ldr r2, LCPI1_2 - cpy r3, r6 - cpy lr, pc + mov r3, r6 + mov lr, pc bx r5 //===---------------------------------------------------------------------===// diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt index fb64d9f..11c48ad 100644 --- a/lib/Target/ARM/README.txt +++ b/lib/Target/ARM/README.txt @@ -321,7 +321,7 @@ time. 4) Once we added support for multiple result patterns, write indexed loads patterns instead of C++ instruction selection code. -5) Use FLDM / FSTM to emulate indexed FP load / store. +5) Use VLDM / VSTM to emulate indexed FP load / store. //===---------------------------------------------------------------------===// @@ -591,3 +591,8 @@ http://lists.cs.uiuc.edu/pipermail/llvmdev/2009-June/022763.html //===---------------------------------------------------------------------===// Make use of the "rbit" instruction. + +//===---------------------------------------------------------------------===// + +Take a look at test/CodeGen/Thumb2/machine-licm.ll. ARM should be taught how +to licm and cse the unnecessary load from cp#1. diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index b6dd56c..7602b6d 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -1,4 +1,4 @@ -//===- Thumb1InstrInfo.cpp - Thumb-1 Instruction Information --------*- C++ -*-===// +//===- Thumb1InstrInfo.cpp - Thumb-1 Instruction Information ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "ARMInstrInfo.h" +#include "Thumb1InstrInfo.h" #include "ARM.h" #include "ARMGenInstrInfo.inc" #include "ARMMachineFunctionInfo.h" diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h index 13cc578..b28229d 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.h +++ b/lib/Target/ARM/Thumb1InstrInfo.h @@ -1,4 +1,4 @@ -//===- Thumb1InstrInfo.h - Thumb-1 Instruction Information ----------*- C++ -*-===// +//===- Thumb1InstrInfo.h - Thumb-1 Instruction Information ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index 5aaaf9c..37adf37 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -1,4 +1,4 @@ -//===- Thumb1RegisterInfo.cpp - Thumb-1 Register Information -------*- C++ -*-===// +//===- Thumb1RegisterInfo.cpp - Thumb-1 Register Information ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// // -// This file contains the Thumb-1 implementation of the TargetRegisterInfo class. +// This file contains the Thumb-1 implementation of the TargetRegisterInfo +// class. // //===----------------------------------------------------------------------===// @@ -794,7 +795,7 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF, if (NumBytes != 0) emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes); } else { - // Unwind MBBI to point to first LDR / FLDD. + // Unwind MBBI to point to first LDR / VLDRD. const unsigned *CSRegs = getCalleeSavedRegs(); if (MBBI != MBB.begin()) { do diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h index 241f1cc..37ad388 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.h +++ b/lib/Target/ARM/Thumb1RegisterInfo.h @@ -1,4 +1,4 @@ -//===- Thumb1RegisterInfo.h - Thumb-1 Register Information Impl ----*- C++ -*-===// +//===- Thumb1RegisterInfo.h - Thumb-1 Register Information Impl -*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// // -// This file contains the Thumb-1 implementation of the TargetRegisterInfo class. +// This file contains the Thumb-1 implementation of the TargetRegisterInfo +// class. // //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index 462844b..f5ba155 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -1,4 +1,4 @@ -//===-- Thumb2ITBlockPass.cpp - Insert Thumb IT blocks -----------*- C++ -*-===// +//===-- Thumb2ITBlockPass.cpp - Insert Thumb IT blocks ----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -34,10 +34,6 @@ namespace { } private: - MachineBasicBlock::iterator - SplitT2MOV32imm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineInstr *MI, DebugLoc dl, - unsigned PredReg, ARMCC::CondCodes CC); bool InsertITBlocks(MachineBasicBlock &MBB); }; char Thumb2ITBlockPass::ID = 0; @@ -50,34 +46,6 @@ static ARMCC::CondCodes getPredicate(const MachineInstr *MI, unsigned &PredReg){ return llvm::getInstrPredicate(MI, PredReg); } -MachineBasicBlock::iterator -Thumb2ITBlockPass::SplitT2MOV32imm(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - MachineInstr *MI, - DebugLoc dl, unsigned PredReg, - ARMCC::CondCodes CC) { - // Splitting t2MOVi32imm into a pair of t2MOVi16 + t2MOVTi16 here. - // The only reason it was a single instruction was so it could be - // re-materialized. We want to split it before this and the thumb2 - // size reduction pass to make sure the IT mask is correct and expose - // width reduction opportunities. It doesn't make sense to do this in a - // separate pass so here it is. - unsigned DstReg = MI->getOperand(0).getReg(); - bool DstDead = MI->getOperand(0).isDead(); // Is this possible? - unsigned Imm = MI->getOperand(1).getImm(); - unsigned Lo16 = Imm & 0xffff; - unsigned Hi16 = (Imm >> 16) & 0xffff; - BuildMI(MBB, MBBI, dl, TII->get(ARM::t2MOVi16), DstReg) - .addImm(Lo16).addImm(CC).addReg(PredReg); - BuildMI(MBB, MBBI, dl, TII->get(ARM::t2MOVTi16)) - .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead)) - .addReg(DstReg).addImm(Hi16).addImm(CC).addReg(PredReg); - --MBBI; - --MBBI; - MI->eraseFromParent(); - return MBBI; -} - bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) { bool Modified = false; @@ -88,11 +56,6 @@ bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) { unsigned PredReg = 0; ARMCC::CondCodes CC = getPredicate(MI, PredReg); - if (MI->getOpcode() == ARM::t2MOVi32imm) { - MBBI = SplitT2MOV32imm(MBB, MBBI, MI, dl, PredReg, CC); - continue; - } - if (CC == ARMCC::AL) { ++MBBI; continue; @@ -115,11 +78,6 @@ bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) { DebugLoc ndl = NMI->getDebugLoc(); unsigned NPredReg = 0; ARMCC::CondCodes NCC = getPredicate(NMI, NPredReg); - if (NMI->getOpcode() == ARM::t2MOVi32imm) { - MBBI = SplitT2MOV32imm(MBB, MBBI, NMI, ndl, NPredReg, NCC); - continue; - } - if (NCC == OCC) { Mask |= (1 << Pos); } else if (NCC != CC) diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 21fff51..16c1e6f 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -1,4 +1,4 @@ -//===- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information --------*- C++ -*-===// +//===- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -11,8 +11,9 @@ // //===----------------------------------------------------------------------===// -#include "ARMInstrInfo.h" +#include "Thumb2InstrInfo.h" #include "ARM.h" +#include "ARMConstantPoolValue.h" #include "ARMAddressingModes.h" #include "ARMGenInstrInfo.inc" #include "ARMMachineFunctionInfo.h" @@ -132,7 +133,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC); } - void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, unsigned DestReg, unsigned BaseReg, int NumBytes, diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h index f3688c0..663a60b 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.h +++ b/lib/Target/ARM/Thumb2InstrInfo.h @@ -1,4 +1,4 @@ -//===- Thumb2InstrInfo.h - Thumb-2 Instruction Information ----------*- C++ -*-===// +//===- Thumb2InstrInfo.h - Thumb-2 Instruction Information ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h index a295630..b3cf2e5 100644 --- a/lib/Target/ARM/Thumb2RegisterInfo.h +++ b/lib/Target/ARM/Thumb2RegisterInfo.h @@ -1,4 +1,4 @@ -//===- Thumb2RegisterInfo.h - Thumb-2 Register Information Impl ----*- C++ -*-===// +//===- Thumb2RegisterInfo.h - Thumb-2 Register Information Impl -*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// // -// This file contains the Thumb-2 implementation of the TargetRegisterInfo class. +// This file contains the Thumb-2 implementation of the TargetRegisterInfo +// class. // //===----------------------------------------------------------------------===// diff --git a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp index e3587fb..5b0a89d 100644 --- a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp +++ b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp @@ -225,8 +225,6 @@ SDNode *AlphaDAGToDAGISel::getGlobalRetAddr() { /// InstructionSelect - This callback is invoked by /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. void AlphaDAGToDAGISel::InstructionSelect() { - DEBUG(BB->dump()); - // Select target instructions for the DAG. SelectRoot(*CurDAG); CurDAG->RemoveDeadNodes(); diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp index cb03a6f..9217522 100644 --- a/lib/Target/Alpha/AlphaISelLowering.cpp +++ b/lib/Target/Alpha/AlphaISelLowering.cpp @@ -426,7 +426,7 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain, } } else { //more args // Create the frame index object for this incoming parameter... - int FI = MFI->CreateFixedObject(8, 8 * (ArgNo - 6)); + int FI = MFI->CreateFixedObject(8, 8 * (ArgNo - 6), true, false); // Create the SelectionDAG nodes corresponding to a load //from this parameter @@ -444,7 +444,7 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain, if (TargetRegisterInfo::isPhysicalRegister(args_int[i])) args_int[i] = AddLiveIn(MF, args_int[i], &Alpha::GPRCRegClass); SDValue argt = DAG.getCopyFromReg(Chain, dl, args_int[i], MVT::i64); - int FI = MFI->CreateFixedObject(8, -8 * (6 - i)); + int FI = MFI->CreateFixedObject(8, -8 * (6 - i), true, false); if (i == 0) VarArgsBase = FI; SDValue SDFI = DAG.getFrameIndex(FI, MVT::i64); LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0)); @@ -452,7 +452,7 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain, if (TargetRegisterInfo::isPhysicalRegister(args_float[i])) args_float[i] = AddLiveIn(MF, args_float[i], &Alpha::F8RCRegClass); argt = DAG.getCopyFromReg(Chain, dl, args_float[i], MVT::f64); - FI = MFI->CreateFixedObject(8, - 8 * (12 - i)); + FI = MFI->CreateFixedObject(8, - 8 * (12 - i), true, false); SDFI = DAG.getFrameIndex(FI, MVT::i64); LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0)); } diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td index 81e1fb7..8917e86 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.td +++ b/lib/Target/Alpha/AlphaInstrInfo.td @@ -391,7 +391,7 @@ def : Pat<(setune GPRC:$X, GPRC:$Y), (CMPEQi (CMPEQ GPRC:$X, GPRC:$Y), 0)>; def : Pat<(setune GPRC:$X, immUExt8:$Y), (CMPEQi (CMPEQ GPRC:$X, immUExt8:$Y), 0)>; -let isReturn = 1, isTerminator = 1, Ra = 31, Rb = 26, disp = 1, Uses = [R26] in { +let isReturn = 1, isTerminator = 1, isBarrier = 1, Ra = 31, Rb = 26, disp = 1, Uses = [R26] in { def RETDAG : MbrForm< 0x1A, 0x02, (ops), "ret $$31,($$26),1", s_jsr>; //Return from subroutine def RETDAGp : MbrpForm< 0x1A, 0x02, (ops), "ret $$31,($$26),1", [(retflag)], s_jsr>; //Return from subroutine } diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp index 98e9730..64bdd62 100644 --- a/lib/Target/Alpha/AlphaRegisterInfo.cpp +++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp @@ -314,7 +314,7 @@ unsigned AlphaRegisterInfo::getRARegister() const { return 0; } -unsigned AlphaRegisterInfo::getFrameRegister(MachineFunction &MF) const { +unsigned AlphaRegisterInfo::getFrameRegister(const MachineFunction &MF) const { return hasFP(MF) ? Alpha::R15 : Alpha::R30; } diff --git a/lib/Target/Alpha/AlphaRegisterInfo.h b/lib/Target/Alpha/AlphaRegisterInfo.h index 66f0898..a971e21 100644 --- a/lib/Target/Alpha/AlphaRegisterInfo.h +++ b/lib/Target/Alpha/AlphaRegisterInfo.h @@ -52,7 +52,7 @@ struct AlphaRegisterInfo : public AlphaGenRegisterInfo { // Debug information queries. unsigned getRARegister() const; - unsigned getFrameRegister(MachineFunction &MF) const; + unsigned getFrameRegister(const MachineFunction &MF) const; // Exception handling queries. unsigned getEHExceptionRegister() const; diff --git a/lib/Target/Alpha/AlphaTargetMachine.cpp b/lib/Target/Alpha/AlphaTargetMachine.cpp index b8bc13b..d0d5a43 100644 --- a/lib/Target/Alpha/AlphaTargetMachine.cpp +++ b/lib/Target/Alpha/AlphaTargetMachine.cpp @@ -28,7 +28,7 @@ extern "C" void LLVMInitializeAlphaTarget() { AlphaTargetMachine::AlphaTargetMachine(const Target &T, const std::string &TT, const std::string &FS) : LLVMTargetMachine(T, TT), - DataLayout("e-f128:128:128"), + DataLayout("e-f128:128:128-n64"), FrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0), JITInfo(*this), Subtarget(TT, FS), diff --git a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp index 209a5bf..338057b 100644 --- a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp +++ b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp @@ -178,7 +178,7 @@ bool AlphaAsmPrinter::runOnMachineFunction(MachineFunction &MF) { processDebugLoc(II, true); printInstruction(II); - if (VerboseAsm && !II->getDebugLoc().isUnknown()) + if (VerboseAsm) EmitComments(*II); O << '\n'; processDebugLoc(II, false); diff --git a/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp b/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp index 1900d00..917f7f5 100644 --- a/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp +++ b/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp @@ -31,8 +31,8 @@ #include "llvm/Target/TargetRegistry.h" #include "llvm/Support/Mangler.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" - using namespace llvm; STATISTIC(EmittedInsts, "Number of machine instrs printed"); @@ -143,7 +143,7 @@ bool BlackfinAsmPrinter::runOnMachineFunction(MachineFunction &MF) { processDebugLoc(II, true); printInstruction(II); - if (VerboseAsm && !II->getDebugLoc().isUnknown()) + if (VerboseAsm) EmitComments(*II); O << '\n'; diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp index 4b321ec..c5c96f8 100644 --- a/lib/Target/Blackfin/BlackfinISelLowering.cpp +++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp @@ -25,7 +25,7 @@ #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/ADT/VectorExtras.h" #include "llvm/Support/Debug.h" - +#include "llvm/Support/ErrorHandling.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -207,7 +207,8 @@ BlackfinTargetLowering::LowerFormalArguments(SDValue Chain, } else { assert(VA.isMemLoc() && "CCValAssign must be RegLoc or MemLoc"); unsigned ObjSize = VA.getLocVT().getStoreSize(); - int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset()); + int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), + true, false); SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0)); } diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.td b/lib/Target/Blackfin/BlackfinInstrInfo.td index c952af1..88ff85f 100644 --- a/lib/Target/Blackfin/BlackfinInstrInfo.td +++ b/lib/Target/Blackfin/BlackfinInstrInfo.td @@ -174,6 +174,7 @@ def CALLp: F1<(outs), (ins P:$func, variable_ops), let isReturn = 1, isTerminator = 1, + isBarrier = 1, Uses = [RETS] in def RTS: F1<(outs), (ins), "rts;", [(BfinRet)]>; diff --git a/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp b/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp index c8c5925..ea9480d 100644 --- a/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp +++ b/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp @@ -62,7 +62,6 @@ BlackfinIntrinsicInfo::lookupName(const char *Name, unsigned Len) const { bool BlackfinIntrinsicInfo::isOverloaded(unsigned IntrID) const { // Overload Table const bool OTable[] = { - false, // illegal intrinsic #define GET_INTRINSIC_OVERLOAD_TABLE #include "BlackfinGenIntrinsics.inc" #undef GET_INTRINSIC_OVERLOAD_TABLE diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp index 8c0a58a..224165b 100644 --- a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp +++ b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp @@ -368,7 +368,8 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF, if (requiresRegisterScavenging(MF)) { // Reserve a slot close to SP or frame pointer. RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment())); + RC->getAlignment(), + false)); } } @@ -449,7 +450,8 @@ unsigned BlackfinRegisterInfo::getRARegister() const { return BF::RETS; } -unsigned BlackfinRegisterInfo::getFrameRegister(MachineFunction &MF) const { +unsigned +BlackfinRegisterInfo::getFrameRegister(const MachineFunction &MF) const { return hasFP(MF) ? BF::FP : BF::SP; } diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.h b/lib/Target/Blackfin/BlackfinRegisterInfo.h index 501f504..68ef08a 100644 --- a/lib/Target/Blackfin/BlackfinRegisterInfo.h +++ b/lib/Target/Blackfin/BlackfinRegisterInfo.h @@ -76,7 +76,7 @@ namespace llvm { void emitPrologue(MachineFunction &MF) const; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; - unsigned getFrameRegister(MachineFunction &MF) const; + unsigned getFrameRegister(const MachineFunction &MF) const; unsigned getRARegister() const; // Exception handling queries. diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.cpp b/lib/Target/Blackfin/BlackfinTargetMachine.cpp index 47ba2fe..45d7c35 100644 --- a/lib/Target/Blackfin/BlackfinTargetMachine.cpp +++ b/lib/Target/Blackfin/BlackfinTargetMachine.cpp @@ -28,7 +28,7 @@ BlackfinTargetMachine::BlackfinTargetMachine(const Target &T, const std::string &TT, const std::string &FS) : LLVMTargetMachine(T, TT), - DataLayout("e-p:32:32-i64:32-f64:32"), + DataLayout("e-p:32:32-i64:32-f64:32-n32"), Subtarget(TT, FS), TLInfo(*this), InstrInfo(Subtarget), diff --git a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp index 007fe8f..dc9f81c4 100644 --- a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp +++ b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp @@ -406,7 +406,7 @@ void SPUAsmPrinter::printMachineInstruction(const MachineInstr *MI) { ++EmittedInsts; processDebugLoc(MI, true); printInstruction(MI); - if (VerboseAsm && !MI->getDebugLoc().isUnknown()) + if (VerboseAsm) EmitComments(*MI); processDebugLoc(MI, false); O << '\n'; diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index 1f9e5fc..c69a751 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -417,8 +417,6 @@ namespace { void SPUDAGToDAGISel::InstructionSelect() { - DEBUG(BB->dump()); - // Select target instructions for the DAG. SelectRoot(*CurDAG); CurDAG->RemoveDeadNodes(); diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index aaf0783..4dd82a6 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -1090,7 +1090,7 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, // We need to load the argument to a virtual register if we determined // above that we ran out of physical registers of the appropriate type // or we're forced to do vararg - int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); + int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true, false); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0); ArgOffset += StackSlotSize; @@ -1110,7 +1110,8 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, // Create the frame slot for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) { - VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset); + VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset, + true, false); SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8); SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0); diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index 09849da..d3b575a 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -3601,21 +3601,23 @@ def : Pat<(SPUcall (SPUaform texternalsym:$func, 0)), (BRASL texternalsym:$func)>; // Unconditional branches: -let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in { - def BR : - UncondBranch<0b001001100, (outs), (ins brtarget:$dest), - "br\t$dest", - [(br bb:$dest)]>; - - // Unconditional, absolute address branch - def BRA: - UncondBranch<0b001100000, (outs), (ins brtarget:$dest), - "bra\t$dest", - [/* no pattern */]>; +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in { + let isBarrier = 1 in { + def BR : + UncondBranch<0b001001100, (outs), (ins brtarget:$dest), + "br\t$dest", + [(br bb:$dest)]>; + + // Unconditional, absolute address branch + def BRA: + UncondBranch<0b001100000, (outs), (ins brtarget:$dest), + "bra\t$dest", + [/* no pattern */]>; - // Indirect branch - def BI: - BIForm<0b00010101100, "bi\t$func", [(brind R32C:$func)]>; + // Indirect branch + def BI: + BIForm<0b00010101100, "bi\t$func", [(brind R32C:$func)]>; + } // Conditional branches: class BRNZInst<dag IOL, list<dag> pattern>: diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp index 8412006..af94e67 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.cpp +++ b/lib/Target/CellSPU/SPURegisterInfo.cpp @@ -596,7 +596,7 @@ SPURegisterInfo::getRARegister() const } unsigned -SPURegisterInfo::getFrameRegister(MachineFunction &MF) const +SPURegisterInfo::getFrameRegister(const MachineFunction &MF) const { return SPU::R1; } diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h index 1d9d07e..9691cb6 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.h +++ b/lib/Target/CellSPU/SPURegisterInfo.h @@ -78,7 +78,7 @@ namespace llvm { //! Get return address register (LR, aka R0) unsigned getRARegister() const; //! Get the stack frame register (SP, aka R1) - unsigned getFrameRegister(MachineFunction &MF) const; + unsigned getFrameRegister(const MachineFunction &MF) const; //! Perform target-specific stack frame setup. void getInitialFrameState(std::vector<MachineMove> &Moves) const; diff --git a/lib/Target/CellSPU/SPUSubtarget.h b/lib/Target/CellSPU/SPUSubtarget.h index 94ac73c..88201c6 100644 --- a/lib/Target/CellSPU/SPUSubtarget.h +++ b/lib/Target/CellSPU/SPUSubtarget.h @@ -82,7 +82,7 @@ namespace llvm { const char *getTargetDataString() const { return "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128" "-i16:16:128-i8:8:128-i1:8:128-a:0:128-v64:128:128-v128:128:128" - "-s:128:128"; + "-s:128:128-n32:64"; } }; } // End llvm namespace diff --git a/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp b/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp index 11ac931..145359f 100644 --- a/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp +++ b/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp @@ -22,6 +22,7 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" +#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/DwarfWriter.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -70,9 +71,6 @@ namespace { void printSrcMemOperand(const MachineInstr *MI, int OpNum, const char* Modifier = 0); void printCCOperand(const MachineInstr *MI, int OpNum); - void printInstruction(const MachineInstr *MI); // autogenerated. - static const char *getRegisterName(unsigned RegNo); - void printMachineInstruction(const MachineInstr * MI); bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, @@ -82,13 +80,10 @@ namespace { const char *ExtraCode); void printInstructionThroughMCStreamer(const MachineInstr *MI); + void PrintGlobalVariable(const GlobalVariable* GVar); void emitFunctionHeader(const MachineFunction &MF); bool runOnMachineFunction(MachineFunction &F); - virtual void PrintGlobalVariable(const GlobalVariable *GV) { - // FIXME: No support for global variables? - } - void getAnalysisUsage(AnalysisUsage &AU) const { AsmPrinter::getAnalysisUsage(AU); AU.setPreservesAll(); @@ -96,8 +91,89 @@ namespace { }; } // end of anonymous namespace -#include "MSP430GenAsmWriter.inc" +void MSP430AsmPrinter::PrintGlobalVariable(const GlobalVariable* GVar) { + if (!GVar->hasInitializer()) + return; // External global require no code + + // Check to see if this is a special global used by LLVM, if so, emit it. + if (EmitSpecialLLVMGlobal(GVar)) + return; + + const TargetData *TD = TM.getTargetData(); + + std::string name = Mang->getMangledName(GVar); + Constant *C = GVar->getInitializer(); + unsigned Size = TD->getTypeAllocSize(C->getType()); + unsigned Align = TD->getPreferredAlignmentLog(GVar); + + printVisibility(name, GVar->getVisibility()); + + O << "\t.type\t" << name << ",@object\n"; + + OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(GVar, Mang, + TM)); + + if (C->isNullValue() && !GVar->hasSection() && + !GVar->isThreadLocal() && + (GVar->hasLocalLinkage() || GVar->isWeakForLinker())) { + + if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. + + if (GVar->hasLocalLinkage()) + O << "\t.local\t" << name << '\n'; + + O << MAI->getCOMMDirective() << name << ',' << Size; + if (MAI->getCOMMDirectiveTakesAlignment()) + O << ',' << (MAI->getAlignmentIsInBytes() ? (1 << Align) : Align); + + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << ' '; + WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent()); + } + O << '\n'; + return; + } + + switch (GVar->getLinkage()) { + case GlobalValue::CommonLinkage: + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: + O << "\t.weak\t" << name << '\n'; + break; + case GlobalValue::DLLExportLinkage: + case GlobalValue::AppendingLinkage: + // FIXME: appending linkage variables should go into a section of + // their name or something. For now, just emit them as external. + case GlobalValue::ExternalLinkage: + // If external or appending, declare as a global symbol + O << "\t.globl " << name << '\n'; + // FALL THROUGH + case GlobalValue::PrivateLinkage: + case GlobalValue::LinkerPrivateLinkage: + case GlobalValue::InternalLinkage: + break; + default: + assert(0 && "Unknown linkage type!"); + } + + // Use 16-bit alignment by default to simplify bunch of stuff + EmitAlignment(Align, GVar); + O << name << ":"; + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << ' '; + WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent()); + } + O << '\n'; + EmitGlobalConstant(C); + + if (MAI->hasDotTypeDotSizeDirective()) + O << "\t.size\t" << name << ", " << Size << '\n'; +} void MSP430AsmPrinter::emitFunctionHeader(const MachineFunction &MF) { const Function *F = MF.getFunction(); @@ -161,14 +237,9 @@ void MSP430AsmPrinter::printMachineInstruction(const MachineInstr *MI) { processDebugLoc(MI, true); - // Call the autogenerated instruction printer routines. - if (EnableMCInst) { - printInstructionThroughMCStreamer(MI); - } else { - printInstruction(MI); - } + printInstructionThroughMCStreamer(MI); - if (VerboseAsm && !MI->getDebugLoc().isUnknown()) + if (VerboseAsm) EmitComments(*MI); O << '\n'; @@ -180,7 +251,7 @@ void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum, const MachineOperand &MO = MI->getOperand(OpNum); switch (MO.getType()) { case MachineOperand::MO_Register: - O << getRegisterName(MO.getReg()); + O << MSP430InstPrinter::getRegisterName(MO.getReg()); return; case MachineOperand::MO_Immediate: if (!Modifier || strcmp(Modifier, "nohash")) @@ -224,22 +295,23 @@ void MSP430AsmPrinter::printSrcMemOperand(const MachineInstr *MI, int OpNum, const MachineOperand &Base = MI->getOperand(OpNum); const MachineOperand &Disp = MI->getOperand(OpNum+1); - if (Base.isGlobal()) - printOperand(MI, OpNum, "mem"); - else if (Disp.isImm() && !Base.getReg()) + // Print displacement first + if (!Disp.isImm()) { + printOperand(MI, OpNum+1, "mem"); + } else { + if (!Base.getReg()) + O << '&'; + + printOperand(MI, OpNum+1, "nohash"); + } + + + // Print register base field + if (Base.getReg()) { + O << '('; printOperand(MI, OpNum); - else if (Base.getReg()) { - if (Disp.getImm()) { - printOperand(MI, OpNum + 1, "nohash"); - O << '('; - printOperand(MI, OpNum); - O << ')'; - } else { - O << '@'; - printOperand(MI, OpNum); - } - } else - llvm_unreachable("Unsupported memory operand"); + O << ')'; + } } void MSP430AsmPrinter::printCCOperand(const MachineInstr *MI, int OpNum) { @@ -294,8 +366,7 @@ bool MSP430AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, } //===----------------------------------------------------------------------===// -void MSP430AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) -{ +void MSP430AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI){ MSP430MCInstLower MCInstLowering(OutContext, *Mang, *this); @@ -309,7 +380,6 @@ void MSP430AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) printKill(MI); return; case TargetInstrInfo::INLINEASM: - O << '\t'; printInlineAsm(MI); return; case TargetInstrInfo::IMPLICIT_DEF: @@ -324,7 +394,18 @@ void MSP430AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) printMCInst(&TmpInst); } +static MCInstPrinter *createMSP430MCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + raw_ostream &O) { + if (SyntaxVariant == 0) + return new MSP430InstPrinter(O, MAI); + return 0; +} + // Force static initialization. extern "C" void LLVMInitializeMSP430AsmPrinter() { RegisterAsmPrinter<MSP430AsmPrinter> X(TheMSP430Target); + TargetRegistry::RegisterMCInstPrinter(TheMSP430Target, + createMSP430MCInstPrinter); } diff --git a/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp b/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp index a3ecc67..0a403c4 100644 --- a/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp +++ b/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp @@ -25,11 +25,9 @@ using namespace llvm; // Include the auto-generated portion of the assembly writer. #define MachineInstr MCInst -#define MSP430AsmPrinter MSP430InstPrinter // FIXME: REMOVE. #define NO_ASM_WRITER_BOILERPLATE #include "MSP430GenAsmWriter.inc" #undef MachineInstr -#undef MSP430AsmPrinter void MSP430InstPrinter::printInst(const MCInst *MI) { printInstruction(MI); @@ -65,25 +63,22 @@ void MSP430InstPrinter::printSrcMemOperand(const MCInst *MI, unsigned OpNo, const MCOperand &Base = MI->getOperand(OpNo); const MCOperand &Disp = MI->getOperand(OpNo+1); - // FIXME: move global to displacement field! - if (Base.isExpr()) { + // Print displacement first + if (Disp.isExpr()) { O << '&'; - Base.getExpr()->print(O, &MAI); - } else if (Disp.isImm() && !Base.isReg()) - printOperand(MI, OpNo); - else if (Base.isReg()) { - if (Disp.getImm()) { - O << Disp.getImm() << '('; - printOperand(MI, OpNo); - O << ')'; - } else { - O << '@'; - printOperand(MI, OpNo); - } + Disp.getExpr()->print(O, &MAI); } else { - Base.dump(); - Disp.dump(); - llvm_unreachable("Unsupported memory operand"); + assert(Disp.isImm() && "Expected immediate in displacement field"); + if (!Base.getReg()) + O << '&'; + + O << Disp.getImm(); + } + + + // Print register base field + if (Base.getReg()) { + O << '(' << getRegisterName(Base.getReg()) << ')'; } } diff --git a/lib/Target/MSP430/MSP430.td b/lib/Target/MSP430/MSP430.td index 89313ab..870a3df 100644 --- a/lib/Target/MSP430/MSP430.td +++ b/lib/Target/MSP430/MSP430.td @@ -50,11 +50,17 @@ include "MSP430InstrInfo.td" def MSP430InstrInfo : InstrInfo {} + +def MSP430InstPrinter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; +} + //===----------------------------------------------------------------------===// // Target Declaration //===----------------------------------------------------------------------===// def MSP430 : Target { let InstructionSet = MSP430InstrInfo; + let AssemblyWriters = [MSP430InstPrinter]; } diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp index b7d9282..c0084be 100644 --- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp +++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp @@ -45,6 +45,70 @@ static const bool ViewRMWDAGs = false; STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); + +namespace { + struct MSP430ISelAddressMode { + enum { + RegBase, + FrameIndexBase + } BaseType; + + struct { // This is really a union, discriminated by BaseType! + SDValue Reg; + int FrameIndex; + } Base; + + int16_t Disp; + GlobalValue *GV; + Constant *CP; + BlockAddress *BlockAddr; + const char *ES; + int JT; + unsigned Align; // CP alignment. + + MSP430ISelAddressMode() + : BaseType(RegBase), Disp(0), GV(0), CP(0), BlockAddr(0), + ES(0), JT(-1), Align(0) { + } + + bool hasSymbolicDisplacement() const { + return GV != 0 || CP != 0 || ES != 0 || JT != -1; + } + + bool hasBaseReg() const { + return Base.Reg.getNode() != 0; + } + + void setBaseReg(SDValue Reg) { + BaseType = RegBase; + Base.Reg = Reg; + } + + void dump() { + errs() << "MSP430ISelAddressMode " << this << '\n'; + if (Base.Reg.getNode() != 0) { + errs() << "Base.Reg "; + Base.Reg.getNode()->dump(); + } else { + errs() << " Base.FrameIndex " << Base.FrameIndex << '\n'; + } + errs() << " Disp " << Disp << '\n'; + if (GV) { + errs() << "GV "; + GV->dump(); + } else if (CP) { + errs() << " CP "; + CP->dump(); + errs() << " Align" << Align << '\n'; + } else if (ES) { + errs() << "ES "; + errs() << ES << '\n'; + } else if (JT != -1) + errs() << " JT" << JT << " Align" << Align << '\n'; + } + }; +} + /// MSP430DAGToDAGISel - MSP430 specific code to select MSP430 machine /// instructions for SelectionDAG operations. /// @@ -65,6 +129,10 @@ namespace { return "MSP430 DAG->DAG Pattern Instruction Selection"; } + bool MatchAddress(SDValue N, MSP430ISelAddressMode &AM); + bool MatchWrapper(SDValue N, MSP430ISelAddressMode &AM); + bool MatchAddressBase(SDValue N, MSP430ISelAddressMode &AM); + bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U, SDNode *Root) const; @@ -79,6 +147,10 @@ namespace { DenseMap<SDNode*, SDNode*> RMWStores; void PreprocessForRMW(); SDNode *Select(SDValue Op); + SDNode *SelectIndexedLoad(SDValue Op); + SDNode *SelectIndexedBinOp(SDValue Op, SDValue N1, SDValue N2, + unsigned Opc8, unsigned Opc16); + bool SelectAddr(SDValue Op, SDValue Addr, SDValue &Base, SDValue &Disp); #ifndef NDEBUG @@ -95,50 +167,155 @@ FunctionPass *llvm::createMSP430ISelDag(MSP430TargetMachine &TM, return new MSP430DAGToDAGISel(TM, OptLevel); } -// FIXME: This is pretty dummy routine and needs to be rewritten in the future. -bool MSP430DAGToDAGISel::SelectAddr(SDValue Op, SDValue Addr, - SDValue &Base, SDValue &Disp) { - // Try to match frame address first. - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i16); - Disp = CurDAG->getTargetConstant(0, MVT::i16); + +/// MatchWrapper - Try to match MSP430ISD::Wrapper node into an addressing mode. +/// These wrap things that will resolve down into a symbol reference. If no +/// match is possible, this returns true, otherwise it returns false. +bool MSP430DAGToDAGISel::MatchWrapper(SDValue N, MSP430ISelAddressMode &AM) { + // If the addressing mode already has a symbol as the displacement, we can + // never match another symbol. + if (AM.hasSymbolicDisplacement()) return true; + + SDValue N0 = N.getOperand(0); + + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { + AM.GV = G->getGlobal(); + AM.Disp += G->getOffset(); + //AM.SymbolFlags = G->getTargetFlags(); + } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { + AM.CP = CP->getConstVal(); + AM.Align = CP->getAlignment(); + AM.Disp += CP->getOffset(); + //AM.SymbolFlags = CP->getTargetFlags(); + } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) { + AM.ES = S->getSymbol(); + //AM.SymbolFlags = S->getTargetFlags(); + } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) { + AM.JT = J->getIndex(); + //AM.SymbolFlags = J->getTargetFlags(); + } else { + AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress(); + //AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags(); } + return false; +} - switch (Addr.getOpcode()) { - case ISD::ADD: - // Operand is a result from ADD with constant operand which fits into i16. - if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { - uint64_t CVal = CN->getZExtValue(); - // Offset should fit into 16 bits. - if (((CVal << 48) >> 48) == CVal) { - SDValue N0 = Addr.getOperand(0); - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N0)) - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i16); - else - Base = N0; - - Disp = CurDAG->getTargetConstant(CVal, MVT::i16); - return true; - } +/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the +/// specified addressing mode without any further recursion. +bool MSP430DAGToDAGISel::MatchAddressBase(SDValue N, MSP430ISelAddressMode &AM) { + // Is the base register already occupied? + if (AM.BaseType != MSP430ISelAddressMode::RegBase || AM.Base.Reg.getNode()) { + // If so, we cannot select it. + return true; + } + + // Default, generate it as a register. + AM.BaseType = MSP430ISelAddressMode::RegBase; + AM.Base.Reg = N; + return false; +} + +bool MSP430DAGToDAGISel::MatchAddress(SDValue N, MSP430ISelAddressMode &AM) { + DebugLoc dl = N.getDebugLoc(); + DEBUG({ + errs() << "MatchAddress: "; + AM.dump(); + }); + + switch (N.getOpcode()) { + default: break; + case ISD::Constant: { + uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue(); + AM.Disp += Val; + return false; + } + + case MSP430ISD::Wrapper: + if (!MatchWrapper(N, AM)) + return false; + break; + + case ISD::FrameIndex: + if (AM.BaseType == MSP430ISelAddressMode::RegBase + && AM.Base.Reg.getNode() == 0) { + AM.BaseType = MSP430ISelAddressMode::FrameIndexBase; + AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex(); + return false; } break; - case MSP430ISD::Wrapper: - SDValue N0 = Addr.getOperand(0); - if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { - Base = CurDAG->getTargetGlobalAddress(G->getGlobal(), - MVT::i16, G->getOffset()); - Disp = CurDAG->getTargetConstant(0, MVT::i16); - return true; - } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(N0)) { - Base = CurDAG->getTargetExternalSymbol(E->getSymbol(), MVT::i16); - Disp = CurDAG->getTargetConstant(0, MVT::i16); + + case ISD::ADD: { + MSP430ISelAddressMode Backup = AM; + if (!MatchAddress(N.getNode()->getOperand(0), AM) && + !MatchAddress(N.getNode()->getOperand(1), AM)) + return false; + AM = Backup; + if (!MatchAddress(N.getNode()->getOperand(1), AM) && + !MatchAddress(N.getNode()->getOperand(0), AM)) + return false; + AM = Backup; + + break; + } + + case ISD::OR: + // Handle "X | C" as "X + C" iff X is known to have C bits clear. + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) { + MSP430ISelAddressMode Backup = AM; + uint64_t Offset = CN->getSExtValue(); + // Start with the LHS as an addr mode. + if (!MatchAddress(N.getOperand(0), AM) && + // Address could not have picked a GV address for the displacement. + AM.GV == NULL && + // Check to see if the LHS & C is zero. + CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) { + AM.Disp += Offset; + return false; + } + AM = Backup; } break; - }; + } + + return MatchAddressBase(N, AM); +} + +/// SelectAddr - returns true if it is able pattern match an addressing mode. +/// It returns the operands which make up the maximal addressing mode it can +/// match by reference. +bool MSP430DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, + SDValue &Base, SDValue &Disp) { + MSP430ISelAddressMode AM; + + if (MatchAddress(N, AM)) + return false; + + EVT VT = N.getValueType(); + if (AM.BaseType == MSP430ISelAddressMode::RegBase) { + if (!AM.Base.Reg.getNode()) + AM.Base.Reg = CurDAG->getRegister(0, VT); + } - Base = Addr; - Disp = CurDAG->getTargetConstant(0, MVT::i16); + Base = (AM.BaseType == MSP430ISelAddressMode::FrameIndexBase) ? + CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) : + AM.Base.Reg; + + if (AM.GV) + Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i16, AM.Disp, + 0/*AM.SymbolFlags*/); + else if (AM.CP) + Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i16, + AM.Align, AM.Disp, 0/*AM.SymbolFlags*/); + else if (AM.ES) + Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i16, 0/*AM.SymbolFlags*/); + else if (AM.JT != -1) + Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i16, 0/*AM.SymbolFlags*/); + else if (AM.BlockAddr) + Disp = CurDAG->getBlockAddress(AM.BlockAddr, DebugLoc()/*MVT::i32*/, + true /*AM.SymbolFlags*/); + else + Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i16); return true; } @@ -187,7 +364,7 @@ bool MSP430DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, /// TokenFactor by PreprocessForRMW. Query the map Store => Load1 (created /// during preprocessing) to determine whether it's legal to introduce such /// "cycle" for a moment. - DenseMap<SDNode*, SDNode*>::iterator I = RMWStores.find(Root); + DenseMap<SDNode*, SDNode*>::const_iterator I = RMWStores.find(Root); if (I != RMWStores.end() && I->second == N) return true; @@ -423,6 +600,89 @@ void MSP430DAGToDAGISel::PreprocessForRMW() { } } + +static bool isValidIndexedLoad(const LoadSDNode *LD) { + ISD::MemIndexedMode AM = LD->getAddressingMode(); + if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD) + return false; + + EVT VT = LD->getMemoryVT(); + + switch (VT.getSimpleVT().SimpleTy) { + case MVT::i8: + // Sanity check + if (cast<ConstantSDNode>(LD->getOffset())->getZExtValue() != 1) + return false; + + break; + case MVT::i16: + // Sanity check + if (cast<ConstantSDNode>(LD->getOffset())->getZExtValue() != 2) + return false; + + break; + default: + return false; + } + + return true; +} + +SDNode *MSP430DAGToDAGISel::SelectIndexedLoad(SDValue Op) { + LoadSDNode *LD = cast<LoadSDNode>(Op); + if (!isValidIndexedLoad(LD)) + return NULL; + + MVT VT = LD->getMemoryVT().getSimpleVT(); + + unsigned Opcode = 0; + switch (VT.SimpleTy) { + case MVT::i8: + Opcode = MSP430::MOV8rm_POST; + break; + case MVT::i16: + Opcode = MSP430::MOV16rm_POST; + break; + default: + return NULL; + } + + return CurDAG->getMachineNode(Opcode, Op.getDebugLoc(), + VT, MVT::i16, MVT::Other, + LD->getBasePtr(), LD->getChain()); +} + +SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDValue Op, + SDValue N1, SDValue N2, + unsigned Opc8, unsigned Opc16) { + if (N1.getOpcode() == ISD::LOAD && + N1.hasOneUse() && + IsLegalAndProfitableToFold(N1.getNode(), Op.getNode(), Op.getNode())) { + LoadSDNode *LD = cast<LoadSDNode>(N1); + if (!isValidIndexedLoad(LD)) + return NULL; + + MVT VT = LD->getMemoryVT().getSimpleVT(); + unsigned Opc = (VT == MVT::i16 ? Opc16 : Opc8); + MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); + MemRefs0[0] = cast<MemSDNode>(N1)->getMemOperand(); + SDValue Ops0[] = { N2, LD->getBasePtr(), LD->getChain() }; + SDNode *ResNode = + CurDAG->SelectNodeTo(Op.getNode(), Opc, + VT, MVT::i16, MVT::Other, + Ops0, 3); + cast<MachineSDNode>(ResNode)->setMemRefs(MemRefs0, MemRefs0 + 1); + // Transfer chain. + ReplaceUses(SDValue(N1.getNode(), 2), SDValue(ResNode, 2)); + // Transfer writeback. + ReplaceUses(SDValue(N1.getNode(), 1), SDValue(ResNode, 1)); + return ResNode; + } + + return NULL; +} + + /// InstructionSelect - This callback is invoked by /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. void MSP430DAGToDAGISel::InstructionSelect() { @@ -438,8 +698,6 @@ void MSP430DAGToDAGISel::InstructionSelect() { DEBUG(errs() << "Selection DAG after RMW preprocessing:\n"); DEBUG(CurDAG->dump()); - DEBUG(BB->dump()); - // Codegen the basic block. DEBUG(errs() << "===== Instruction selection begins:\n"); DEBUG(Indent = 0); @@ -482,6 +740,72 @@ SDNode *MSP430DAGToDAGISel::Select(SDValue Op) { return CurDAG->getMachineNode(MSP430::ADD16ri, dl, MVT::i16, TFI, CurDAG->getTargetConstant(0, MVT::i16)); } + case ISD::LOAD: + if (SDNode *ResNode = SelectIndexedLoad(Op)) + return ResNode; + // Other cases are autogenerated. + break; + case ISD::ADD: + if (SDNode *ResNode = + SelectIndexedBinOp(Op, + Op.getOperand(0), Op.getOperand(1), + MSP430::ADD8rm_POST, MSP430::ADD16rm_POST)) + return ResNode; + else if (SDNode *ResNode = + SelectIndexedBinOp(Op, Op.getOperand(1), Op.getOperand(0), + MSP430::ADD8rm_POST, MSP430::ADD16rm_POST)) + return ResNode; + + // Other cases are autogenerated. + break; + case ISD::SUB: + if (SDNode *ResNode = + SelectIndexedBinOp(Op, + Op.getOperand(0), Op.getOperand(1), + MSP430::SUB8rm_POST, MSP430::SUB16rm_POST)) + return ResNode; + + // Other cases are autogenerated. + break; + case ISD::AND: + if (SDNode *ResNode = + SelectIndexedBinOp(Op, + Op.getOperand(0), Op.getOperand(1), + MSP430::AND8rm_POST, MSP430::AND16rm_POST)) + return ResNode; + else if (SDNode *ResNode = + SelectIndexedBinOp(Op, Op.getOperand(1), Op.getOperand(0), + MSP430::AND8rm_POST, MSP430::AND16rm_POST)) + return ResNode; + + // Other cases are autogenerated. + break; + case ISD::OR: + if (SDNode *ResNode = + SelectIndexedBinOp(Op, + Op.getOperand(0), Op.getOperand(1), + MSP430::OR8rm_POST, MSP430::OR16rm_POST)) + return ResNode; + else if (SDNode *ResNode = + SelectIndexedBinOp(Op, Op.getOperand(1), Op.getOperand(0), + MSP430::OR8rm_POST, MSP430::OR16rm_POST)) + return ResNode; + + // Other cases are autogenerated. + break; + case ISD::XOR: + if (SDNode *ResNode = + SelectIndexedBinOp(Op, + Op.getOperand(0), Op.getOperand(1), + MSP430::XOR8rm_POST, MSP430::XOR16rm_POST)) + return ResNode; + else if (SDNode *ResNode = + SelectIndexedBinOp(Op, Op.getOperand(1), Op.getOperand(0), + MSP430::XOR8rm_POST, MSP430::XOR16rm_POST)) + return ResNode; + + // Other cases are autogenerated. + break; } // Select the default instruction diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index 34e6d2c..5a925f5 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -62,10 +62,14 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) : setBooleanContents(ZeroOrOneBooleanContent); setSchedulingPreference(SchedulingForLatency); - setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); - setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); - setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); - setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand); + // We have post-incremented loads / stores. + setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal); + setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal); + + setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand); setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand); // We don't have any truncstores @@ -115,12 +119,23 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) : setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); // FIXME: Implement efficiently multiplication by a constant + setOperationAction(ISD::MUL, MVT::i8, Expand); + setOperationAction(ISD::MULHS, MVT::i8, Expand); + setOperationAction(ISD::MULHU, MVT::i8, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand); setOperationAction(ISD::MUL, MVT::i16, Expand); setOperationAction(ISD::MULHS, MVT::i16, Expand); setOperationAction(ISD::MULHU, MVT::i16, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand); setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand); + setOperationAction(ISD::UDIV, MVT::i8, Expand); + setOperationAction(ISD::UDIVREM, MVT::i8, Expand); + setOperationAction(ISD::UREM, MVT::i8, Expand); + setOperationAction(ISD::SDIV, MVT::i8, Expand); + setOperationAction(ISD::SDIVREM, MVT::i8, Expand); + setOperationAction(ISD::SREM, MVT::i8, Expand); setOperationAction(ISD::UDIV, MVT::i16, Expand); setOperationAction(ISD::UDIVREM, MVT::i16, Expand); setOperationAction(ISD::UREM, MVT::i16, Expand); @@ -303,7 +318,7 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain, << "\n"; } // Create the frame index object for this incoming parameter... - int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset()); + int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true, false); // Create the SelectionDAG nodes corresponding to a load //from this parameter @@ -659,6 +674,42 @@ SDValue MSP430TargetLowering::LowerSIGN_EXTEND(SDValue Op, DAG.getValueType(Val.getValueType())); } +/// getPostIndexedAddressParts - returns true by value, base pointer and +/// offset pointer and addressing mode by reference if this node can be +/// combined with a load / store to form a post-indexed load / store. +bool MSP430TargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, + SDValue &Base, + SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const { + + LoadSDNode *LD = cast<LoadSDNode>(N); + if (LD->getExtensionType() != ISD::NON_EXTLOAD) + return false; + + EVT VT = LD->getMemoryVT(); + if (VT != MVT::i8 && VT != MVT::i16) + return false; + + if (Op->getOpcode() != ISD::ADD) + return false; + + if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) { + uint64_t RHSC = RHS->getZExtValue(); + if ((VT == MVT::i16 && RHSC != 2) || + (VT == MVT::i8 && RHSC != 1)) + return false; + + Base = Op->getOperand(0); + Offset = DAG.getConstant(RHSC, VT); + AM = ISD::POST_INC; + return true; + } + + return false; +} + + const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { default: return NULL; diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h index fdbc384..d413ccb 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.h +++ b/lib/Target/MSP430/MSP430ISelLowering.h @@ -136,6 +136,12 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, DebugLoc dl, SelectionDAG &DAG); + virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, + SDValue &Base, + SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const; + const MSP430Subtarget &Subtarget; const MSP430TargetMachine &TM; }; diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp index a6d9638..b2f09c7 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -35,15 +35,23 @@ void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, const TargetRegisterClass *RC) const { DebugLoc DL = DebugLoc::getUnknownLoc(); if (MI != MBB.end()) DL = MI->getDebugLoc(); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + + MachineMemOperand *MMO = + MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx), + MachineMemOperand::MOStore, 0, + MFI.getObjectSize(FrameIdx), + MFI.getObjectAlignment(FrameIdx)); if (RC == &MSP430::GR16RegClass) BuildMI(MBB, MI, DL, get(MSP430::MOV16mr)) .addFrameIndex(FrameIdx).addImm(0) - .addReg(SrcReg, getKillRegState(isKill)); + .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); else if (RC == &MSP430::GR8RegClass) BuildMI(MBB, MI, DL, get(MSP430::MOV8mr)) .addFrameIndex(FrameIdx).addImm(0) - .addReg(SrcReg, getKillRegState(isKill)); + .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); else llvm_unreachable("Cannot store this register to stack slot!"); } @@ -54,13 +62,21 @@ void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, const TargetRegisterClass *RC) const{ DebugLoc DL = DebugLoc::getUnknownLoc(); if (MI != MBB.end()) DL = MI->getDebugLoc(); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + + MachineMemOperand *MMO = + MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx), + MachineMemOperand::MOLoad, 0, + MFI.getObjectSize(FrameIdx), + MFI.getObjectAlignment(FrameIdx)); if (RC == &MSP430::GR16RegClass) BuildMI(MBB, MI, DL, get(MSP430::MOV16rm)) - .addReg(DestReg).addFrameIndex(FrameIdx).addImm(0); + .addReg(DestReg).addFrameIndex(FrameIdx).addImm(0).addMemOperand(MMO); else if (RC == &MSP430::GR8RegClass) BuildMI(MBB, MI, DL, get(MSP430::MOV8rm)) - .addReg(DestReg).addFrameIndex(FrameIdx).addImm(0); + .addReg(DestReg).addFrameIndex(FrameIdx).addImm(0).addMemOperand(MMO); else llvm_unreachable("Cannot store this register to stack slot!"); } diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td index 2b50669..c3bbfe8 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.td +++ b/lib/Target/MSP430/MSP430InstrInfo.td @@ -127,7 +127,7 @@ def NOP : Pseudo<(outs), (ins), "nop", []>; // // FIXME: Provide proper encoding! -let isReturn = 1, isTerminator = 1 in { +let isReturn = 1, isTerminator = 1, isBarrier = 1 in { def RET : Pseudo<(outs), (ins), "ret", [(MSP430retflag)]>; } @@ -142,7 +142,7 @@ let isBarrier = 1 in // Conditional branches let Uses = [SRW] in def JCC : Pseudo<(outs), (ins brtarget:$dst, cc:$cc), - "j$cc $dst", + "j$cc\t$dst", [(MSP430brcc bb:$dst, imm:$cc)]>; } // isBranch, isTerminator @@ -215,6 +215,13 @@ def MOVZX16rm8 : Pseudo<(outs GR16:$dst), (ins memsrc:$src), "mov.b\t{$src, $dst}", [(set GR16:$dst, (zextloadi16i8 addr:$src))]>; +let mayLoad = 1, hasExtraDefRegAllocReq = 1, Constraints = "$base = $base_wb" in { +def MOV8rm_POST : Pseudo<(outs GR8:$dst, GR16:$base_wb), (ins GR16:$base), + "mov.b\t{@$base+, $dst}", []>; +def MOV16rm_POST : Pseudo<(outs GR16:$dst, GR16:$base_wb), (ins GR16:$base), + "mov.w\t{@$base+, $dst}", []>; +} + // Any instruction that defines a 8-bit result leaves the high half of the // register. Truncate can be lowered to EXTRACT_SUBREG, and CopyFromReg may // be copying from a truncate, but any other 8-bit operation will zero-extend @@ -280,6 +287,15 @@ def ADD16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), [(set GR16:$dst, (add GR16:$src1, (load addr:$src2))), (implicit SRW)]>; +let mayLoad = 1, hasExtraDefRegAllocReq = 1, +Constraints = "$base = $base_wb, $src1 = $dst" in { +def ADD8rm_POST : Pseudo<(outs GR8:$dst, GR16:$base_wb), (ins GR8:$src1, GR16:$base), + "add.b\t{@$base+, $dst}", []>; +def ADD16rm_POST : Pseudo<(outs GR16:$dst, GR16:$base_wb), (ins GR16:$src1, GR16:$base), + "add.w\t{@$base+, $dst}", []>; +} + + def ADD8ri : Pseudo<(outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), "add.b\t{$src2, $dst}", [(set GR8:$dst, (add GR8:$src1, imm:$src2)), @@ -409,6 +425,14 @@ def AND16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), [(set GR16:$dst, (and GR16:$src1, (load addr:$src2))), (implicit SRW)]>; +let mayLoad = 1, hasExtraDefRegAllocReq = 1, +Constraints = "$base = $base_wb, $src1 = $dst" in { +def AND8rm_POST : Pseudo<(outs GR8:$dst, GR16:$base_wb), (ins GR8:$src1, GR16:$base), + "and.b\t{@$base+, $dst}", []>; +def AND16rm_POST : Pseudo<(outs GR16:$dst, GR16:$base_wb), (ins GR16:$src1, GR16:$base), + "and.w\t{@$base+, $dst}", []>; +} + let isTwoAddress = 0 in { def AND8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src), "and.b\t{$src, $dst}", @@ -438,6 +462,92 @@ def AND16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src), (implicit SRW)]>; } +let isCommutable = 1 in { // X = OR Y, Z == X = OR Z, Y +def OR8rr : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + "bis.b\t{$src2, $dst}", + [(set GR8:$dst, (or GR8:$src1, GR8:$src2))]>; +def OR16rr : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + "bis.w\t{$src2, $dst}", + [(set GR16:$dst, (or GR16:$src1, GR16:$src2))]>; +} + +def OR8ri : Pseudo<(outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + "bis.b\t{$src2, $dst}", + [(set GR8:$dst, (or GR8:$src1, imm:$src2))]>; +def OR16ri : Pseudo<(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + "bis.w\t{$src2, $dst}", + [(set GR16:$dst, (or GR16:$src1, imm:$src2))]>; + +def OR8rm : Pseudo<(outs GR8:$dst), (ins GR8:$src1, memsrc:$src2), + "bis.b\t{$src2, $dst}", + [(set GR8:$dst, (or GR8:$src1, (load addr:$src2)))]>; +def OR16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), + "bis.w\t{$src2, $dst}", + [(set GR16:$dst, (or GR16:$src1, (load addr:$src2)))]>; + +let mayLoad = 1, hasExtraDefRegAllocReq = 1, +Constraints = "$base = $base_wb, $src1 = $dst" in { +def OR8rm_POST : Pseudo<(outs GR8:$dst, GR16:$base_wb), (ins GR8:$src1, GR16:$base), + "bis.b\t{@$base+, $dst}", []>; +def OR16rm_POST : Pseudo<(outs GR16:$dst, GR16:$base_wb), (ins GR16:$src1, GR16:$base), + "bis.w\t{@$base+, $dst}", []>; +} + +let isTwoAddress = 0 in { +def OR8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src), + "bis.b\t{$src, $dst}", + [(store (or (load addr:$dst), GR8:$src), addr:$dst)]>; +def OR16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src), + "bis.w\t{$src, $dst}", + [(store (or (load addr:$dst), GR16:$src), addr:$dst)]>; + +def OR8mi : Pseudo<(outs), (ins memdst:$dst, i8imm:$src), + "bis.b\t{$src, $dst}", + [(store (or (load addr:$dst), (i8 imm:$src)), addr:$dst)]>; +def OR16mi : Pseudo<(outs), (ins memdst:$dst, i16imm:$src), + "bis.w\t{$src, $dst}", + [(store (or (load addr:$dst), (i16 imm:$src)), addr:$dst)]>; + +def OR8mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src), + "bis.b\t{$src, $dst}", + [(store (or (i8 (load addr:$dst)), + (i8 (load addr:$src))), addr:$dst)]>; +def OR16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src), + "bis.w\t{$src, $dst}", + [(store (or (i16 (load addr:$dst)), + (i16 (load addr:$src))), addr:$dst)]>; +} + +// bic does not modify condition codes +def BIC8rr : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + "bic.b\t{$src2, $dst}", + [(set GR8:$dst, (and GR8:$src1, (not GR8:$src2)))]>; +def BIC16rr : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + "bic.w\t{$src2, $dst}", + [(set GR16:$dst, (and GR16:$src1, (not GR16:$src2)))]>; + +def BIC8rm : Pseudo<(outs GR8:$dst), (ins GR8:$src1, memsrc:$src2), + "bic.b\t{$src2, $dst}", + [(set GR8:$dst, (and GR8:$src1, (not (i8 (load addr:$src2)))))]>; +def BIC16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), + "bic.w\t{$src2, $dst}", + [(set GR16:$dst, (and GR16:$src1, (not (i16 (load addr:$src2)))))]>; + +let isTwoAddress = 0 in { +def BIC8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src), + "bic.b\t{$src, $dst}", + [(store (and (load addr:$dst), (not GR8:$src)), addr:$dst)]>; +def BIC16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src), + "bic.w\t{$src, $dst}", + [(store (and (load addr:$dst), (not GR16:$src)), addr:$dst)]>; + +def BIC8mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src), + "bic.b\t{$src, $dst}", + [(store (and (load addr:$dst), (not (i8 (load addr:$src)))), addr:$dst)]>; +def BIC16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src), + "bic.w\t{$src, $dst}", + [(store (and (load addr:$dst), (not (i16 (load addr:$src)))), addr:$dst)]>; +} let isCommutable = 1 in { // X = XOR Y, Z == X = XOR Z, Y def XOR8rr : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2), @@ -468,6 +578,14 @@ def XOR16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), [(set GR16:$dst, (xor GR16:$src1, (load addr:$src2))), (implicit SRW)]>; +let mayLoad = 1, hasExtraDefRegAllocReq = 1, +Constraints = "$base = $base_wb, $src1 = $dst" in { +def XOR8rm_POST : Pseudo<(outs GR8:$dst, GR16:$base_wb), (ins GR8:$src1, GR16:$base), + "xor.b\t{@$base+, $dst}", []>; +def XOR16rm_POST : Pseudo<(outs GR16:$dst, GR16:$base_wb), (ins GR16:$src1, GR16:$base), + "xor.w\t{@$base+, $dst}", []>; +} + let isTwoAddress = 0 in { def XOR8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src), "xor.b\t{$src, $dst}", @@ -525,6 +643,14 @@ def SUB16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), [(set GR16:$dst, (sub GR16:$src1, (load addr:$src2))), (implicit SRW)]>; +let mayLoad = 1, hasExtraDefRegAllocReq = 1, +Constraints = "$base = $base_wb, $src1 = $dst" in { +def SUB8rm_POST : Pseudo<(outs GR8:$dst, GR16:$base_wb), (ins GR8:$src1, GR16:$base), + "sub.b\t{@$base+, $dst}", []>; +def SUB16rm_POST : Pseudo<(outs GR16:$dst, GR16:$base_wb), (ins GR16:$src1, GR16:$base), + "sub.w\t{@$base+, $dst}", []>; +} + let isTwoAddress = 0 in { def SUB8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src), "sub.b\t{$src, $dst}", @@ -650,58 +776,14 @@ def SEXT16r : Pseudo<(outs GR16:$dst), (ins GR16:$src), } // Defs = [SRW] +def ZEXT16r : Pseudo<(outs GR16:$dst), (ins GR16:$src), + "mov.b\t{$src, $dst}", + [(set GR16:$dst, (zext (trunc GR16:$src)))]>; + def SWPB16r : Pseudo<(outs GR16:$dst), (ins GR16:$src), "swpb\t$dst", [(set GR16:$dst, (bswap GR16:$src))]>; -let isCommutable = 1 in { // X = OR Y, Z == X = OR Z, Y -def OR8rr : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2), - "bis.b\t{$src2, $dst}", - [(set GR8:$dst, (or GR8:$src1, GR8:$src2))]>; -def OR16rr : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "bis.w\t{$src2, $dst}", - [(set GR16:$dst, (or GR16:$src1, GR16:$src2))]>; -} - -def OR8ri : Pseudo<(outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), - "bis.b\t{$src2, $dst}", - [(set GR8:$dst, (or GR8:$src1, imm:$src2))]>; -def OR16ri : Pseudo<(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), - "bis.w\t{$src2, $dst}", - [(set GR16:$dst, (or GR16:$src1, imm:$src2))]>; - -def OR8rm : Pseudo<(outs GR8:$dst), (ins GR8:$src1, memsrc:$src2), - "bis.b\t{$src2, $dst}", - [(set GR8:$dst, (or GR8:$src1, (load addr:$src2)))]>; -def OR16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), - "bis.w\t{$src2, $dst}", - [(set GR16:$dst, (or GR16:$src1, (load addr:$src2)))]>; - -let isTwoAddress = 0 in { -def OR8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src), - "bis.b\t{$src, $dst}", - [(store (or (load addr:$dst), GR8:$src), addr:$dst)]>; -def OR16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src), - "bis.w\t{$src, $dst}", - [(store (or (load addr:$dst), GR16:$src), addr:$dst)]>; - -def OR8mi : Pseudo<(outs), (ins memdst:$dst, i8imm:$src), - "bis.b\t{$src, $dst}", - [(store (or (load addr:$dst), (i8 imm:$src)), addr:$dst)]>; -def OR16mi : Pseudo<(outs), (ins memdst:$dst, i16imm:$src), - "bis.w\t{$src, $dst}", - [(store (or (load addr:$dst), (i16 imm:$src)), addr:$dst)]>; - -def OR8mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src), - "bis.b\t{$src, $dst}", - [(store (or (i8 (load addr:$dst)), - (i8 (load addr:$src))), addr:$dst)]>; -def OR16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src), - "bis.w\t{$src, $dst}", - [(store (or (i16 (load addr:$dst)), - (i16 (load addr:$src))), addr:$dst)]>; -} - } // isTwoAddress = 1 // Integer comparisons @@ -851,3 +933,6 @@ def : Pat<(store (subc (load addr:$dst), GR8:$src), addr:$dst), (SUB8mr addr:$dst, GR8:$src)>; def : Pat<(store (subc (load addr:$dst), (i8 (load addr:$src))), addr:$dst), (SUB8mm addr:$dst, addr:$src)>; + +// peephole patterns +def : Pat<(and GR16:$src, 255), (ZEXT16r GR16:$src)>; diff --git a/lib/Target/MSP430/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MSP430MCAsmInfo.cpp index 069313e..4e3a8d0 100644 --- a/lib/Target/MSP430/MSP430MCAsmInfo.cpp +++ b/lib/Target/MSP430/MSP430MCAsmInfo.cpp @@ -15,6 +15,12 @@ using namespace llvm; MSP430MCAsmInfo::MSP430MCAsmInfo(const Target &T, const StringRef &TT) { + PrivateGlobalPrefix = ".L"; + WeakRefDirective ="\t.weak\t"; + SetDirective = "\t.set\t"; + PCSymbol="."; + AlignmentIsInBytes = false; AllowNameToStartWithDigit = true; + UsesELFSectionDirectiveForBSS = true; } diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp index 1a5893e..92baad9 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -212,7 +212,7 @@ MSP430RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) const { // Create a frame entry for the FPW register that must be saved. if (hasFP(MF)) { - int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4); + int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true, false); assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() && "Slot for FPW register must be last in order to be found!"); FrameIdx = 0; @@ -355,7 +355,7 @@ unsigned MSP430RegisterInfo::getRARegister() const { return MSP430::PCW; } -unsigned MSP430RegisterInfo::getFrameRegister(MachineFunction &MF) const { +unsigned MSP430RegisterInfo::getFrameRegister(const MachineFunction &MF) const { return hasFP(MF) ? MSP430::FPW : MSP430::SPW; } diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h index 5f3a216..aa08787 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.h +++ b/lib/Target/MSP430/MSP430RegisterInfo.h @@ -60,7 +60,7 @@ public: // Debug information queries. unsigned getRARegister() const; - unsigned getFrameRegister(MachineFunction &MF) const; + unsigned getFrameRegister(const MachineFunction &MF) const; //! Get DWARF debugging register number int getDwarfRegNum(unsigned RegNum, bool isEH) const; diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp index da54507..14db406 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -32,7 +32,7 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T, LLVMTargetMachine(T, TT), Subtarget(TT, FS), // FIXME: Check TargetData string. - DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32"), + DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"), InstrInfo(*this), TLInfo(*this), FrameInfo(TargetFrameInfo::StackGrowsDown, 2, -2) { } diff --git a/lib/Target/MSP430/README.txt b/lib/Target/MSP430/README.txt index b14e93d..5b9634b 100644 --- a/lib/Target/MSP430/README.txt +++ b/lib/Target/MSP430/README.txt @@ -11,8 +11,6 @@ available pretty soon. Some things are incomplete / not implemented yet (this list surely is not complete as well): -0. Implement asmprinting for variables :) - 1. Verify, how stuff is handling implicit zext with 8 bit operands (this might be modelled currently in improper way - should we need to mark the superreg as def for every 8 bit instruction?). diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp index 66ade89..4898fae 100644 --- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp @@ -282,7 +282,7 @@ bool MipsAsmPrinter::runOnMachineFunction(MachineFunction &MF) { // Print the assembly for the instruction. printInstruction(II); - if (VerboseAsm && !II->getDebugLoc().isUnknown()) + if (VerboseAsm) EmitComments(*II); O << '\n'; diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index 810dce1..cbcedb8 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -108,7 +108,6 @@ private: /// InstructionSelect - This callback is invoked by /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. void MipsDAGToDAGISel::InstructionSelect() { - DEBUG(BB->dump()); // Codegen the basic block. DEBUG(errs() << "===== Instruction selection begins:\n"); DEBUG(Indent = 0); @@ -171,6 +170,27 @@ SelectAddr(SDValue Op, SDValue Addr, SDValue &Offset, SDValue &Base) return true; } } + + // When loading from constant pools, load the lower address part in + // the instruction itself. Instead of: + // lui $2, %hi($CPI1_0) + // addiu $2, $2, %lo($CPI1_0) + // lwc1 $f0, 0($2) + // Generate: + // lui $2, %hi($CPI1_0) + // lwc1 $f0, %lo($CPI1_0)($2) + if (Addr.getOperand(0).getOpcode() == MipsISD::Hi && + Addr.getOperand(1).getOpcode() == MipsISD::Lo) { + SDValue LoVal = Addr.getOperand(1); + if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>( + LoVal.getOperand(0))) { + if (!CP->getOffset()) { + Base = Addr.getOperand(0); + Offset = LoVal.getOperand(0); + return true; + } + } + } } Base = Addr; @@ -315,6 +335,16 @@ SDNode* MipsDAGToDAGISel::Select(SDValue N) { case ISD::GLOBAL_OFFSET_TABLE: return getGlobalBaseReg(); + case ISD::ConstantFP: { + ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N); + if (N.getValueType() == MVT::f64 && CN->isExactlyValue(+0.0)) { + SDValue Zero = CurDAG->getRegister(Mips::ZERO, MVT::i32); + ReplaceUses(N, Zero); + return Zero.getNode(); + } + break; + } + /// Handle direct and indirect calls when using PIC. On PIC, when /// GOT is smaller than about 64k (small code) the GA target is /// loaded with only one instruction. Otherwise GA's target must diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 61da8f8..c9a43b4 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -568,7 +568,7 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); Constant *C = N->getConstVal(); SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), - MipsII::MO_ABS_HILO); + N->getOffset(), MipsII::MO_ABS_HILO); // FIXME there isn't actually debug info here DebugLoc dl = Op.getDebugLoc(); @@ -704,7 +704,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // the stack (even if less than 4 are used as arguments) if (Subtarget->isABI_O32()) { int VTsize = EVT(MVT::i32).getSizeInBits()/8; - MFI->CreateFixedObject(VTsize, (VTsize*3)); + MFI->CreateFixedObject(VTsize, (VTsize*3), true, false); CCInfo.AnalyzeCallOperands(Outs, CC_MipsO32); } else CCInfo.AnalyzeCallOperands(Outs, CC_Mips); @@ -773,7 +773,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // if O32 ABI is used. For EABI the first address is zero. LastArgStackLoc = (FirstStackArgLoc + VA.getLocMemOffset()); int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, - LastArgStackLoc); + LastArgStackLoc, true, false); SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy()); @@ -849,7 +849,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Create the frame index only once. SPOffset here can be anything // (this will be fixed on processFunctionBeforeFrameFinalized) if (MipsFI->getGPStackOffset() == -1) { - FI = MFI->CreateFixedObject(4, 0); + FI = MFI->CreateFixedObject(4, 0, true, false); MipsFI->setGPFI(FI); } MipsFI->setGPStackOffset(LastArgStackLoc); @@ -1002,7 +1002,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, // be used on emitPrologue) to avoid mis-calc of the first stack // offset on PEI::calculateFrameObjectOffsets. // Arguments are always 32-bit. - int FI = MFI->CreateFixedObject(4, 0); + int FI = MFI->CreateFixedObject(4, 0, true, false); MipsFI->recordStoreVarArgsFI(FI, -(4+(i*4))); SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy()); @@ -1025,7 +1025,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, // offset on PEI::calculateFrameObjectOffsets. // Arguments are always 32-bit. unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; - int FI = MFI->CreateFixedObject(ArgSize, 0); + int FI = MFI->CreateFixedObject(ArgSize, 0, true, false); MipsFI->recordLoadArgsFI(FI, -(ArgSize+ (FirstStackArgLoc + VA.getLocMemOffset()))); diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index bd61738..ce89cfd 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -48,6 +48,7 @@ let PrintMethod = "printFCCOperand" in def In32BitMode : Predicate<"!Subtarget.isFP64bit()">; def IsSingleFloat : Predicate<"Subtarget.isSingleFloat()">; def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">; +def IsNotMipsI : Predicate<"!Subtarget.isMips1()">; //===----------------------------------------------------------------------===// // Instruction Class Templates @@ -173,7 +174,7 @@ let fd = 0 in { } /// Floating Point Memory Instructions -let Predicates = [IsNotSingleFloat] in { +let Predicates = [IsNotSingleFloat, IsNotMipsI] in { def LDC1 : FFI<0b110101, (outs AFGR64:$ft), (ins mem:$addr), "ldc1 $ft, $addr", [(set AFGR64:$ft, (load addr:$addr))]>; @@ -284,7 +285,12 @@ def fpimm0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(+0.0); }]>; +def fpimm0neg : PatLeaf<(fpimm), [{ + return N->isExactlyValue(-0.0); +}]>; + def : Pat<(f32 fpimm0), (MTC1 ZERO)>; +def : Pat<(f32 fpimm0neg), (FNEG_S32 (MTC1 ZERO))>; def : Pat<(f32 (sint_to_fp CPURegs:$src)), (CVTS_W32 (MTC1 CPURegs:$src))>; def : Pat<(f64 (sint_to_fp CPURegs:$src)), (CVTD_W32 (MTC1 CPURegs:$src))>; diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index 9159904..af64c9f 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -134,6 +134,9 @@ copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DestRC, const TargetRegisterClass *SrcRC) const { DebugLoc DL = DebugLoc::getUnknownLoc(); + const MachineFunction *MF = MBB.getParent(); + const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + if (I != MBB.end()) DL = I->getDebugLoc(); if (DestRC != SrcRC) { @@ -153,6 +156,13 @@ copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, else if ((DestRC == Mips::FGR32RegisterClass) && (SrcRC == Mips::CPURegsRegisterClass)) BuildMI(MBB, I, DL, get(Mips::MTC1), DestReg).addReg(SrcReg); + else if ((DestRC == Mips::AFGR64RegisterClass) && + (SrcRC == Mips::CPURegsRegisterClass) && + (SrcReg == Mips::ZERO)) { + const unsigned *AliasSet = TRI->getAliasSet(DestReg); + BuildMI(MBB, I, DL, get(Mips::MTC1), AliasSet[0]).addReg(SrcReg); + BuildMI(MBB, I, DL, get(Mips::MTC1), AliasSet[1]).addReg(SrcReg); + } // Move from/to Hi/Lo registers else if ((DestRC == Mips::HILORegisterClass) && @@ -163,9 +173,8 @@ copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, (DestRC == Mips::CPURegsRegisterClass)) { unsigned Opc = (SrcReg == Mips::HI) ? Mips::MFHI : Mips::MFLO; BuildMI(MBB, I, DL, get(Opc), DestReg); - - // Can't copy this register - } else + } else + // Can't copy this register return false; return true; diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h index 949c78a..a300f49 100644 --- a/lib/Target/Mips/MipsMachineFunction.h +++ b/lib/Target/Mips/MipsMachineFunction.h @@ -103,6 +103,7 @@ public: int getGPFI() const { return GPHolder.FI; } void setGPStackOffset(int Off) { GPHolder.SPOffset = Off; } void setGPFI(int FI) { GPHolder.FI = FI; } + bool needGPSaveRestore() const { return GPHolder.SPOffset != -1; } bool hasLoadArgs() const { return HasLoadArgs; } bool hasStoreVarArgs() const { return HasStoreVarArgs; } diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index d2289e9..ad326db 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -287,7 +287,7 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const } if (hasFP(MF)) { - MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize), + MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true), StackOffset); MipsFI->setFPStackOffset(StackOffset); TopCPUSavedRegOff = StackOffset; @@ -295,7 +295,7 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const } if (MFI->hasCalls()) { - MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize), + MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true), StackOffset); MipsFI->setRAStackOffset(StackOffset); TopCPUSavedRegOff = StackOffset; @@ -438,11 +438,10 @@ emitPrologue(MachineFunction &MF) const .addReg(Mips::SP).addReg(Mips::ZERO); } - // PIC speficic function prologue - if ((isPIC) && (MFI->hasCalls())) { + // Restore GP from the saved stack location + if (MipsFI->needGPSaveRestore()) BuildMI(MBB, MBBI, dl, TII.get(Mips::CPRESTORE)) .addImm(MipsFI->getGPStackOffset()); - } } void MipsRegisterInfo:: @@ -489,13 +488,11 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const void MipsRegisterInfo:: processFunctionBeforeFrameFinalized(MachineFunction &MF) const { - // Set the SPOffset on the FI where GP must be saved/loaded. + // Set the stack offset where GP must be saved/loaded from. MachineFrameInfo *MFI = MF.getFrameInfo(); - bool isPIC = (MF.getTarget().getRelocationModel() == Reloc::PIC_); - if (MFI->hasCalls() && isPIC) { - MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + if (MipsFI->needGPSaveRestore()) MFI->setObjectOffset(MipsFI->getGPFI(), MipsFI->getGPStackOffset()); - } } unsigned MipsRegisterInfo:: @@ -504,7 +501,7 @@ getRARegister() const { } unsigned MipsRegisterInfo:: -getFrameRegister(MachineFunction &MF) const { +getFrameRegister(const MachineFunction &MF) const { return hasFP(MF) ? Mips::FP : Mips::SP; } diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index 122f786..5b45921 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -65,7 +65,7 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo { /// Debug information queries. unsigned getRARegister() const; - unsigned getFrameRegister(MachineFunction &MF) const; + unsigned getFrameRegister(const MachineFunction &MF) const; /// Exception handling queries. unsigned getEHExceptionRegister() const; diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index 4fa5450..b3c2313 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -38,8 +38,8 @@ MipsTargetMachine(const Target &T, const std::string &TT, const std::string &FS, bool isLittle=false): LLVMTargetMachine(T, TT), Subtarget(TT, FS, isLittle), - DataLayout(isLittle ? std::string("e-p:32:32:32-i8:8:32-i16:16:32") : - std::string("E-p:32:32:32-i8:8:32-i16:16:32")), + DataLayout(isLittle ? std::string("e-p:32:32:32-i8:8:32-i16:16:32-n32") : + std::string("E-p:32:32:32-i8:8:32-i16:16:32-n32")), InstrInfo(*this), FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0), TLInfo(*this) { diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp index b2a4c11..e1f2587 100644 --- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp +++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp @@ -46,7 +46,7 @@ PIC16AsmPrinter::PIC16AsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, bool PIC16AsmPrinter::printMachineInstruction(const MachineInstr *MI) { processDebugLoc(MI, true); printInstruction(MI); - if (VerboseAsm && !MI->getDebugLoc().isUnknown()) + if (VerboseAsm) EmitComments(*MI); O << '\n'; processDebugLoc(MI, false); diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp b/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp index cc57d12..e13e6cd 100644 --- a/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp +++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp @@ -30,7 +30,6 @@ FunctionPass *llvm::createPIC16ISelDag(PIC16TargetMachine &TM) { /// InstructionSelect - This callback is invoked by /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. void PIC16DAGToDAGISel::InstructionSelect() { - DEBUG(BB->dump()); SelectRoot(*CurDAG); CurDAG->RemoveDeadNodes(); } diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp index 635befe..71c3d37 100644 --- a/lib/Target/PIC16/PIC16ISelLowering.cpp +++ b/lib/Target/PIC16/PIC16ISelLowering.cpp @@ -1070,7 +1070,7 @@ SDValue PIC16TargetLowering::ConvertToMemOperand(SDValue Op, // Put the value on stack. // Get a stack slot index and convert to es. - int FI = MF.getFrameInfo()->CreateStackObject(1, 1); + int FI = MF.getFrameInfo()->CreateStackObject(1, 1, false); const char *tmpName = createESName(PAN::getTempdataLabel(FuncName)); SDValue ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8); diff --git a/lib/Target/PIC16/PIC16RegisterInfo.cpp b/lib/Target/PIC16/PIC16RegisterInfo.cpp index 47087ab..8ba9a1d 100644 --- a/lib/Target/PIC16/PIC16RegisterInfo.cpp +++ b/lib/Target/PIC16/PIC16RegisterInfo.cpp @@ -72,7 +72,7 @@ getDwarfRegNum(unsigned RegNum, bool isEH) const { return -1; } -unsigned PIC16RegisterInfo::getFrameRegister(MachineFunction &MF) const { +unsigned PIC16RegisterInfo::getFrameRegister(const MachineFunction &MF) const { llvm_unreachable("PIC16 Does not have any frame register"); return 0; } diff --git a/lib/Target/PIC16/PIC16RegisterInfo.h b/lib/Target/PIC16/PIC16RegisterInfo.h index 8aa5a10..1d5dbbf 100644 --- a/lib/Target/PIC16/PIC16RegisterInfo.h +++ b/lib/Target/PIC16/PIC16RegisterInfo.h @@ -59,7 +59,7 @@ class PIC16RegisterInfo : public PIC16GenRegisterInfo { virtual void emitPrologue(MachineFunction &MF) const; virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const; - virtual unsigned getFrameRegister(MachineFunction &MF) const; + virtual unsigned getFrameRegister(const MachineFunction &MF) const; virtual unsigned getRARegister() const; }; diff --git a/lib/Target/PIC16/PIC16TargetMachine.cpp b/lib/Target/PIC16/PIC16TargetMachine.cpp index 08307e7..e2acb85 100644 --- a/lib/Target/PIC16/PIC16TargetMachine.cpp +++ b/lib/Target/PIC16/PIC16TargetMachine.cpp @@ -34,7 +34,7 @@ PIC16TargetMachine::PIC16TargetMachine(const Target &T, const std::string &TT, const std::string &FS, bool Trad) : LLVMTargetMachine(T, TT), Subtarget(TT, FS, Trad), - DataLayout("e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"), + DataLayout("e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8-n8"), InstrInfo(*this), TLInfo(*this), FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0) { } diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp index 2dac18f..aae4607 100644 --- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp @@ -594,7 +594,7 @@ void PPCAsmPrinter::printMachineInstruction(const MachineInstr *MI) { printInstruction(MI); - if (VerboseAsm && !MI->getDebugLoc().isUnknown()) + if (VerboseAsm) EmitComments(*MI); O << '\n'; @@ -672,14 +672,14 @@ bool PPCLinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) { O << "\t.size\t" << CurrentFnName << ",.-" << CurrentFnName << '\n'; - // Print out jump tables referenced by the function. - EmitJumpTableInfo(MF.getJumpTableInfo(), MF); - OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM)); // Emit post-function debug information. DW->EndFunction(&MF); + // Print out jump tables referenced by the function. + EmitJumpTableInfo(MF.getJumpTableInfo(), MF); + // We didn't modify anything. return false; } @@ -853,12 +853,12 @@ bool PPCDarwinAsmPrinter::runOnMachineFunction(MachineFunction &MF) { } } - // Print out jump tables referenced by the function. - EmitJumpTableInfo(MF.getJumpTableInfo(), MF); - // Emit post-function debug information. DW->EndFunction(&MF); + // Print out jump tables referenced by the function. + EmitJumpTableInfo(MF.getJumpTableInfo(), MF); + // We didn't modify anything. return false; } diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index b866240..fb9a240 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -187,8 +187,6 @@ private: /// InstructionSelect - This callback is invoked by /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. void PPCDAGToDAGISel::InstructionSelect() { - DEBUG(BB->dump()); - // Select target instructions for the DAG. SelectRoot(*CurDAG); CurDAG->RemoveDeadNodes(); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 7f48ef0..099fcb5 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -637,7 +637,7 @@ bool PPC::isAllNegativeZeroVector(SDNode *N) { unsigned BitSize; bool HasAnyUndefs; - if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32)) + if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true)) if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) return CFP->getValueAPF().isNegZero(); @@ -1625,7 +1625,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8; int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), - isImmutable); + isImmutable, false); // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, PtrVT); @@ -1690,9 +1690,10 @@ PPCTargetLowering::LowerFormalArguments_SVR4( NumFPArgRegs * EVT(MVT::f64).getSizeInBits()/8; VarArgsStackOffset = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, - CCInfo.getNextStackOffset()); + CCInfo.getNextStackOffset(), + true, false); - VarArgsFrameIndex = MFI->CreateStackObject(Depth, 8); + VarArgsFrameIndex = MFI->CreateStackObject(Depth, 8, false); SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); // The fixed integer arguments of a variadic function are @@ -1895,7 +1896,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( CurArgOffset = CurArgOffset + (4 - ObjSize); } // The value of the object is its address. - int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset); + int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true, false); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); InVals.push_back(FIN); if (ObjSize==1 || ObjSize==2) { @@ -1918,7 +1919,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( // the object. if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); - int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset); + int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true, false); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); @@ -2043,7 +2044,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( if (needsLoad) { int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset + (ArgSize - ObjSize), - isImmutable); + isImmutable, false); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0); } @@ -2076,7 +2077,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( int Depth = ArgOffset; VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, - Depth); + Depth, true, false); SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); // If this function is vararg, store any remaining integer argument regs @@ -2289,7 +2290,8 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, int NewRetAddrLoc = SPDiff + PPCFrameInfo::getReturnSaveOffset(isPPC64, isDarwinABI); int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize, - NewRetAddrLoc); + NewRetAddrLoc, + true, false); EVT VT = isPPC64 ? MVT::i64 : MVT::i32; SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, @@ -2300,7 +2302,8 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, if (isDarwinABI) { int NewFPLoc = SPDiff + PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI); - int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc); + int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc, + true, false); SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, PseudoSourceValue::getFixedStack(NewFPIdx), 0); @@ -2317,7 +2320,7 @@ CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) { int Offset = ArgOffset + SPDiff; uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8; - int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset); + int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true,false); EVT VT = isPPC64 ? MVT::i64 : MVT::i32; SDValue FIN = DAG.getFrameIndex(FI, VT); TailCallArgumentInfo Info; @@ -3224,7 +3227,8 @@ PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { // Find out what the fix offset of the frame pointer save area. int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isDarwinABI); // Allocate the frame index for frame pointer save area. - RASI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, LROffset); + RASI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, LROffset, + true, false); // Save the result. FI->setReturnAddrSaveIndex(RASI); } @@ -3250,7 +3254,8 @@ PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { isDarwinABI); // Allocate the frame index for frame pointer save area. - FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset); + FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset, + true, false); // Save the result. FI->setFramePointerSaveIndex(FPSI); } @@ -3411,7 +3416,7 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { // then lfd it and fcfid it. MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *FrameInfo = MF.getFrameInfo(); - int FrameIdx = FrameInfo->CreateStackObject(8, 8); + int FrameIdx = FrameInfo->CreateStackObject(8, 8, false); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); @@ -3469,7 +3474,7 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) { SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0); // Save FP register to stack slot - int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); + int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, StackSlot, NULL, 0); @@ -3667,7 +3672,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { unsigned SplatBitSize; bool HasAnyUndefs; if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, - HasAnyUndefs) || SplatBitSize > 32) + HasAnyUndefs, 0, true) || SplatBitSize > 32) return SDValue(); unsigned SplatBits = APSplatBits.getZExtValue(); @@ -4137,7 +4142,7 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, DebugLoc dl = Op.getDebugLoc(); // Create a stack slot that is 16-byte aligned. MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); - int FrameIdx = FrameInfo->CreateStackObject(16, 16); + int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index cf5c7c0..e65e644 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -1043,7 +1043,8 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, isDarwinABI); // Allocate the frame index for frame pointer save area. - FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset); + FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset, + true, false); // Save the result. FI->setFramePointerSaveIndex(FPSI); } @@ -1051,7 +1052,8 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Reserve stack space to move the linkage area to in case of a tail call. int TCSPDelta = 0; if (PerformTailCallOpt && (TCSPDelta = FI->getTailCallSPDelta()) < 0) { - MF.getFrameInfo()->CreateFixedObject(-1 * TCSPDelta, TCSPDelta); + MF.getFrameInfo()->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, + true, false); } // Reserve a slot closest to SP or frame pointer if we have a dynalloc or @@ -1067,7 +1069,8 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *RC = IsPPC64 ? G8RC : GPRC; RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment())); + RC->getAlignment(), + false)); } } @@ -1356,12 +1359,6 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); unsigned MaxAlign = MFI->getMaxAlignment(); - if (needsFrameMoves) { - // Mark effective beginning of when frame pointer becomes valid. - FrameLabelId = MMI->NextLabelID(); - BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addImm(FrameLabelId); - } - // Adjust stack pointer: r1 += NegFrameSize. // If there is a preferred stack alignment, align R1 now if (!IsPPC64) { @@ -1431,12 +1428,18 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { .addReg(PPC::X0); } } + + std::vector<MachineMove> &Moves = MMI->getFrameMoves(); + // Add the "machine moves" for the instructions we generated above, but in + // reverse order. if (needsFrameMoves) { - std::vector<MachineMove> &Moves = MMI->getFrameMoves(); - + // Mark effective beginning of when frame pointer becomes valid. + FrameLabelId = MMI->NextLabelID(); + BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addImm(FrameLabelId); + + // Show update of SP. if (NegFrameSize) { - // Show update of SP. MachineLocation SPDst(MachineLocation::VirtualFP); MachineLocation SPSrc(MachineLocation::VirtualFP, NegFrameSize); Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc)); @@ -1451,31 +1454,15 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { Moves.push_back(MachineMove(FrameLabelId, FPDst, FPSrc)); } - // Add callee saved registers to move list. - const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); - for (unsigned I = 0, E = CSI.size(); I != E; ++I) { - int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx()); - unsigned Reg = CSI[I].getReg(); - if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; - MachineLocation CSDst(MachineLocation::VirtualFP, Offset); - MachineLocation CSSrc(Reg); - Moves.push_back(MachineMove(FrameLabelId, CSDst, CSSrc)); + if (MustSaveLR) { + MachineLocation LRDst(MachineLocation::VirtualFP, LROffset); + MachineLocation LRSrc(IsPPC64 ? PPC::LR8 : PPC::LR); + Moves.push_back(MachineMove(FrameLabelId, LRDst, LRSrc)); } - - MachineLocation LRDst(MachineLocation::VirtualFP, LROffset); - MachineLocation LRSrc(IsPPC64 ? PPC::LR8 : PPC::LR); - Moves.push_back(MachineMove(FrameLabelId, LRDst, LRSrc)); - - // Mark effective beginning of when frame pointer is ready. - unsigned ReadyLabelId = MMI->NextLabelID(); - BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addImm(ReadyLabelId); - - MachineLocation FPDst(HasFP ? (IsPPC64 ? PPC::X31 : PPC::R31) : - (IsPPC64 ? PPC::X1 : PPC::R1)); - MachineLocation FPSrc(MachineLocation::VirtualFP); - Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc)); } + unsigned ReadyLabelId = 0; + // If there is a frame pointer, copy R1 into R31 if (HasFP) { if (!IsPPC64) { @@ -1487,6 +1474,33 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { .addReg(PPC::X1) .addReg(PPC::X1); } + + if (needsFrameMoves) { + ReadyLabelId = MMI->NextLabelID(); + + // Mark effective beginning of when frame pointer is ready. + BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addImm(ReadyLabelId); + + MachineLocation FPDst(HasFP ? (IsPPC64 ? PPC::X31 : PPC::R31) : + (IsPPC64 ? PPC::X1 : PPC::R1)); + MachineLocation FPSrc(MachineLocation::VirtualFP); + Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc)); + } + } + + if (needsFrameMoves) { + unsigned LabelId = HasFP ? ReadyLabelId : FrameLabelId; + + // Add callee saved registers to move list. + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + for (unsigned I = 0, E = CSI.size(); I != E; ++I) { + int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx()); + unsigned Reg = CSI[I].getReg(); + if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; + MachineLocation CSDst(MachineLocation::VirtualFP, Offset); + MachineLocation CSSrc(Reg); + Moves.push_back(MachineMove(LabelId, CSDst, CSSrc)); + } } } @@ -1700,7 +1714,7 @@ unsigned PPCRegisterInfo::getRARegister() const { return !Subtarget.isPPC64() ? PPC::LR : PPC::LR8; } -unsigned PPCRegisterInfo::getFrameRegister(MachineFunction &MF) const { +unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const { if (!Subtarget.isPPC64()) return hasFP(MF) ? PPC::R31 : PPC::R1; else diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 1689bc2..3aeed80 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -83,7 +83,7 @@ public: // Debug information queries. unsigned getRARegister() const; - unsigned getFrameRegister(MachineFunction &MF) const; + unsigned getFrameRegister(const MachineFunction &MF) const; void getInitialFrameState(std::vector<MachineMove> &Moves) const; // Exception handling queries. diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 02c8ad7..75fcf62 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -101,8 +101,8 @@ public: const char *getTargetDataString() const { // Note, the alignment values for f64 and i64 on ppc64 in Darwin // documentation are wrong; these are correct (i.e. "what gcc does"). - return isPPC64() ? "E-p:64:64-f64:64:64-i64:64:64-f128:64:128" - : "E-p:32:32-f64:32:64-i64:32:64-f128:64:128"; + return isPPC64() ? "E-p:64:64-f64:64:64-i64:64:64-f128:64:128-n32:64" + : "E-p:32:32-f64:32:64-i64:32:64-f128:64:128-n32"; } /// isPPC64 - Return true if we are generating code for 64-bit pointer mode. diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index 3371954..8079c6e 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -20,8 +20,7 @@ #include "llvm/Support/FormattedStream.h" using namespace llvm; -static const MCAsmInfo *createMCAsmInfo(const Target &T, - const StringRef &TT) { +static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); bool isPPC64 = TheTriple.getArch() == Triple::ppc64; if (TheTriple.getOS() == Triple::Darwin) diff --git a/lib/Target/README.txt b/lib/Target/README.txt index a345d3d..aad621f 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -339,6 +339,8 @@ we don't have whole-function selection dags. On x86, this means we use one extra register for the function when effective_addr2 is declared as U64 than when it is declared U32. +PHI Slicing could be extended to do this. + //===---------------------------------------------------------------------===// LSR should know what GPR types a target has. This code: @@ -406,22 +408,6 @@ return: ; preds = %then.1, %else.0, %then.0 //===---------------------------------------------------------------------===// -Tail recursion elimination is not transforming this function, because it is -returning n, which fails the isDynamicConstant check in the accumulator -recursion checks. - -long long fib(const long long n) { - switch(n) { - case 0: - case 1: - return n; - default: - return fib(n-1) + fib(n-2); - } -} - -//===---------------------------------------------------------------------===// - Tail recursion elimination should handle: int pow2m1(int n) { @@ -1229,6 +1215,40 @@ GCC PR33344 is a similar case. //===---------------------------------------------------------------------===// +[PHI TRANSLATE INDEXED GEPs] PR5313 + +Load redundancy elimination for simple loop. This loop: + +void append_text(const char* text,unsigned char * const io) { + while(*text) + *io=*text++; +} + +Compiles to have a fully redundant load in the loop (%2): + +define void @append_text(i8* nocapture %text, i8* nocapture %io) nounwind { +entry: + %0 = load i8* %text, align 1 ; <i8> [#uses=1] + %1 = icmp eq i8 %0, 0 ; <i1> [#uses=1] + br i1 %1, label %return, label %bb + +bb: ; preds = %bb, %entry + %indvar = phi i32 [ 0, %entry ], [ %tmp, %bb ] ; <i32> [#uses=2] + %text_addr.04 = getelementptr i8* %text, i32 %indvar ; <i8*> [#uses=1] + %2 = load i8* %text_addr.04, align 1 ; <i8> [#uses=1] + store i8 %2, i8* %io, align 1 + %tmp = add i32 %indvar, 1 ; <i32> [#uses=2] + %scevgep = getelementptr i8* %text, i32 %tmp ; <i8*> [#uses=1] + %3 = load i8* %scevgep, align 1 ; <i8> [#uses=1] + %4 = icmp eq i8 %3, 0 ; <i1> [#uses=1] + br i1 %4, label %return, label %bb + +return: ; preds = %bb, %entry + ret void +} + +//===---------------------------------------------------------------------===// + There are many load PRE testcases in testsuite/gcc.dg/tree-ssa/loadpre* in the GCC testsuite. There are many pre testcases as ssa-pre-*.c @@ -1594,12 +1614,6 @@ int int_char(char m) {if(m>7) return 0; return m;} //===---------------------------------------------------------------------===// -IPSCCP is propagating elements of first class aggregates, but is not propagating -the entire aggregate itself. This leads it to miss opportunities, for example -in test/Transforms/SCCP/ipsccp-basic.ll:test5b. - -//===---------------------------------------------------------------------===// - int func(int a, int b) { if (a & 0x80) b |= 0x80; else b &= ~0x80; return b; } Generates this: @@ -1668,3 +1682,55 @@ entry: } //===---------------------------------------------------------------------===// + +IPSCCP does not currently propagate argument dependent constants through +functions where it does not not all of the callers. This includes functions +with normal external linkage as well as templates, C99 inline functions etc. +Specifically, it does nothing to: + +define i32 @test(i32 %x, i32 %y, i32 %z) nounwind { +entry: + %0 = add nsw i32 %y, %z + %1 = mul i32 %0, %x + %2 = mul i32 %y, %z + %3 = add nsw i32 %1, %2 + ret i32 %3 +} + +define i32 @test2() nounwind { +entry: + %0 = call i32 @test(i32 1, i32 2, i32 4) nounwind + ret i32 %0 +} + +It would be interesting extend IPSCCP to be able to handle simple cases like +this, where all of the arguments to a call are constant. Because IPSCCP runs +before inlining, trivial templates and inline functions are not yet inlined. +The results for a function + set of constant arguments should be memoized in a +map. + +//===---------------------------------------------------------------------===// + +The libcall constant folding stuff should be moved out of SimplifyLibcalls into +libanalysis' constantfolding logic. This would allow IPSCCP to be able to +handle simple things like this: + +static int foo(const char *X) { return strlen(X); } +int bar() { return foo("abcd"); } + +//===---------------------------------------------------------------------===// + +InstCombine should use SimplifyDemandedBits to remove the or instruction: + +define i1 @test(i8 %x, i8 %y) { + %A = or i8 %x, 1 + %B = icmp ugt i8 %A, 3 + ret i1 %B +} + +Currently instcombine calls SimplifyDemandedBits with either all bits or just +the sign bit, if the comparison is obviously a sign test. In this case, we only +need all but the bottom two bits from %A, and if we gave that mask to SDB it +would delete the or instruction for us. + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp index 452b46f..cd85dd4 100644 --- a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp +++ b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp @@ -126,7 +126,7 @@ bool SparcAsmPrinter::runOnMachineFunction(MachineFunction &MF) { processDebugLoc(II, true); printInstruction(II); - if (VerboseAsm && !II->getDebugLoc().isUnknown()) + if (VerboseAsm) EmitComments(*II); O << '\n'; processDebugLoc(II, false); diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp index a1a4a8e..b41917e 100644 --- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp +++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp @@ -75,7 +75,6 @@ private: /// InstructionSelect - This callback is invoked by /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. void SparcDAGToDAGISel::InstructionSelect() { - DEBUG(BB->dump()); CurBB = BB; // Select target instructions for the DAG. SelectRoot(*CurDAG); diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 164770d..133f828 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -129,7 +129,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, } InVals.push_back(Arg); } else { - int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset); + int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset, + true, false); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); SDValue Load; if (ObjectVT == MVT::i32) { @@ -163,7 +164,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Arg); InVals.push_back(Arg); } else { - int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset); + int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset, + true, false); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); SDValue Load = DAG.getLoad(MVT::f32, dl, Chain, FIPtr, NULL, 0); InVals.push_back(Load); @@ -184,7 +186,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, MF.getRegInfo().addLiveIn(*CurArgReg++, VRegHi); HiVal = DAG.getCopyFromReg(Chain, dl, VRegHi, MVT::i32); } else { - int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset); + int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset, + true, false); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0); } @@ -195,7 +198,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, MF.getRegInfo().addLiveIn(*CurArgReg++, VRegLo); LoVal = DAG.getCopyFromReg(Chain, dl, VRegLo, MVT::i32); } else { - int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset+4); + int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset+4, + true, false); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0); } @@ -227,7 +231,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, MF.getRegInfo().addLiveIn(*CurArgReg, VReg); SDValue Arg = DAG.getCopyFromReg(DAG.getRoot(), dl, VReg, MVT::i32); - int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset); + int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset, + true, false); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr, NULL, 0)); diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index f2f1b96..d88d508 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -277,7 +277,7 @@ let usesCustomInserter = 1 in { // Expanded after instruction selection. // Section A.3 - Synthetic Instructions, p. 85 // special cases of JMPL: -let isReturn = 1, isTerminator = 1, hasDelaySlot = 1 in { +let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1 in { let rd = O7.Num, rs1 = G0.Num, simm13 = 8 in def RETL: F3_2<2, 0b111000, (outs), (ins), "retl", [(retflag)]>; } diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp index 7883260..6f6183e 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -175,7 +175,7 @@ unsigned SparcRegisterInfo::getRARegister() const { return SP::I7; } -unsigned SparcRegisterInfo::getFrameRegister(MachineFunction &MF) const { +unsigned SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const { return SP::I6; } diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h index 753b1c0..8889ea6 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.h +++ b/lib/Target/Sparc/SparcRegisterInfo.h @@ -54,7 +54,7 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo { // Debug information queries. unsigned getRARegister() const; - unsigned getFrameRegister(MachineFunction &MF) const; + unsigned getFrameRegister(const MachineFunction &MF) const; // Exception handling queries. unsigned getEHExceptionRegister() const; diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index 3a38115..1eec112 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -29,7 +29,7 @@ extern "C" void LLVMInitializeSparcTarget() { SparcTargetMachine::SparcTargetMachine(const Target &T, const std::string &TT, const std::string &FS) : LLVMTargetMachine(T, TT), - DataLayout("E-p:32:32-f128:128:128"), + DataLayout("E-p:32:32-f128:128:128-n32"), Subtarget(TT, FS), TLInfo(*this), InstrInfo(Subtarget), FrameInfo(TargetFrameInfo::StackGrowsDown, 8, 0) { } diff --git a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp index a4a8d6a..e97e7ca 100644 --- a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp +++ b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp @@ -33,6 +33,7 @@ #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/Mangler.h" @@ -154,7 +155,7 @@ void SystemZAsmPrinter::printMachineInstruction(const MachineInstr *MI) { // Call the autogenerated instruction printer routines. printInstruction(MI); - if (VerboseAsm && !MI->getDebugLoc().isUnknown()) + if (VerboseAsm) EmitComments(*MI); O << '\n'; diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 028ee89..d64611d 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -603,8 +603,6 @@ bool SystemZDAGToDAGISel::TryFoldLoad(SDValue P, SDValue N, /// InstructionSelect - This callback is invoked by /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. void SystemZDAGToDAGISel::InstructionSelect() { - DEBUG(BB->dump()); - // Codegen the basic block. DEBUG(errs() << "===== Instruction selection begins:\n"); DEBUG(Indent = 0); diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 5c8cae0..d6b476e 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -34,6 +34,7 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/VectorExtras.h" using namespace llvm; @@ -328,7 +329,7 @@ SystemZTargetLowering::LowerCCCArguments(SDValue Chain, // Create the nodes corresponding to a load from this parameter slot. // Create the frame index object for this incoming parameter... int FI = MFI->CreateFixedObject(LocVT.getSizeInBits()/8, - VA.getLocMemOffset()); + VA.getLocMemOffset(), true, false); // Create the SelectionDAG nodes corresponding to a load // from this parameter diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index 236711c..d82d928 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -22,7 +22,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" - +#include "llvm/Support/ErrorHandling.h" using namespace llvm; SystemZInstrInfo::SystemZInstrInfo(SystemZTargetMachine &tm) diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp index 38460a6..4d1c01f 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -320,7 +320,8 @@ unsigned SystemZRegisterInfo::getRARegister() const { return 0; } -unsigned SystemZRegisterInfo::getFrameRegister(MachineFunction &MF) const { +unsigned +SystemZRegisterInfo::getFrameRegister(const MachineFunction &MF) const { assert(0 && "What is the frame register"); return 0; } diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h index b22b05d..93f6aee 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -68,7 +68,7 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo { // Debug information queries. unsigned getRARegister() const; - unsigned getFrameRegister(MachineFunction &MF) const; + unsigned getFrameRegister(const MachineFunction &MF) const; // Exception handling queries. unsigned getEHExceptionRegister() const; diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp index 990e003..dfa26a1 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -28,7 +28,7 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T, : LLVMTargetMachine(T, TT), Subtarget(TT, FS), DataLayout("E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32" - "-f64:64:64-f128:128:128-a0:16:16"), + "-f64:64:64-f128:128:128-a0:16:16-n32:64"), InstrInfo(*this), TLInfo(*this), FrameInfo(TargetFrameInfo::StackGrowsDown, 8, -160) { diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp index 5bcd658..fc71bc3 100644 --- a/lib/Target/TargetData.cpp +++ b/lib/Target/TargetData.cpp @@ -17,16 +17,16 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetData.h" -#include "llvm/Module.h" -#include "llvm/DerivedTypes.h" #include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/System/Mutex.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/StringExtras.h" #include <algorithm> #include <cstdlib> using namespace llvm; @@ -132,50 +132,18 @@ const TargetAlignElem TargetData::InvalidAlignmentElem = // TargetData Class Implementation //===----------------------------------------------------------------------===// -/*! - A TargetDescription string consists of a sequence of hyphen-delimited - specifiers for target endianness, pointer size and alignments, and various - primitive type sizes and alignments. A typical string looks something like: - <br><br> - "E-p:32:32:32-i1:8:8-i8:8:8-i32:32:32-i64:32:64-f32:32:32-f64:32:64" - <br><br> - (note: this string is not fully specified and is only an example.) - \p - Alignments come in two flavors: ABI and preferred. ABI alignment (abi_align, - below) dictates how a type will be aligned within an aggregate and when used - as an argument. Preferred alignment (pref_align, below) determines a type's - alignment when emitted as a global. - \p - Specifier string details: - <br><br> - <i>[E|e]</i>: Endianness. "E" specifies a big-endian target data model, "e" - specifies a little-endian target data model. - <br><br> - <i>p:@verbatim<size>:<abi_align>:<pref_align>@endverbatim</i>: Pointer size, - ABI and preferred alignment. - <br><br> - <i>@verbatim<type><size>:<abi_align>:<pref_align>@endverbatim</i>: Numeric type - alignment. Type is - one of <i>i|f|v|a</i>, corresponding to integer, floating point, vector, or - aggregate. Size indicates the size, e.g., 32 or 64 bits. - \p - The default string, fully specified, is: - <br><br> - "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64" - "-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64" - "-v64:64:64-v128:128:128" - <br><br> - Note that in the case of aggregates, 0 is the default ABI and preferred - alignment. This is a special case, where the aggregate's computed worst-case - alignment will be used. - */ -void TargetData::init(const std::string &TargetDescription) { - std::string temp = TargetDescription; - +/// getInt - Get an integer ignoring errors. +static unsigned getInt(StringRef R) { + unsigned Result = 0; + R.getAsInteger(10, Result); + return Result; +} + +void TargetData::init(StringRef Desc) { LayoutMap = 0; LittleEndian = false; PointerMemSize = 8; - PointerABIAlign = 8; + PointerABIAlign = 8; PointerPrefAlign = PointerABIAlign; // Default alignments @@ -190,11 +158,21 @@ void TargetData::init(const std::string &TargetDescription) { setAlignment(VECTOR_ALIGN, 16, 16, 128); // v16i8, v8i16, v4i32, ... setAlignment(AGGREGATE_ALIGN, 0, 8, 0); // struct - while (!temp.empty()) { - std::string token = getToken(temp, "-"); - std::string arg0 = getToken(token, ":"); - const char *p = arg0.c_str(); - switch(*p) { + while (!Desc.empty()) { + std::pair<StringRef, StringRef> Split = Desc.split('-'); + StringRef Token = Split.first; + Desc = Split.second; + + if (Token.empty()) + continue; + + Split = Token.split(':'); + StringRef Specifier = Split.first; + Token = Split.second; + + assert(!Specifier.empty() && "Can't be empty here"); + + switch (Specifier[0]) { case 'E': LittleEndian = false; break; @@ -202,9 +180,12 @@ void TargetData::init(const std::string &TargetDescription) { LittleEndian = true; break; case 'p': - PointerMemSize = atoi(getToken(token,":").c_str()) / 8; - PointerABIAlign = atoi(getToken(token,":").c_str()) / 8; - PointerPrefAlign = atoi(getToken(token,":").c_str()) / 8; + Split = Token.split(':'); + PointerMemSize = getInt(Split.first) / 8; + Split = Split.second.split(':'); + PointerABIAlign = getInt(Split.first) / 8; + Split = Split.second.split(':'); + PointerPrefAlign = getInt(Split.first) / 8; if (PointerPrefAlign == 0) PointerPrefAlign = PointerABIAlign; break; @@ -213,28 +194,52 @@ void TargetData::init(const std::string &TargetDescription) { case 'f': case 'a': case 's': { - AlignTypeEnum align_type = STACK_ALIGN; // Dummy init, silence warning - switch(*p) { - case 'i': align_type = INTEGER_ALIGN; break; - case 'v': align_type = VECTOR_ALIGN; break; - case 'f': align_type = FLOAT_ALIGN; break; - case 'a': align_type = AGGREGATE_ALIGN; break; - case 's': align_type = STACK_ALIGN; break; + AlignTypeEnum AlignType; + switch (Specifier[0]) { + default: + case 'i': AlignType = INTEGER_ALIGN; break; + case 'v': AlignType = VECTOR_ALIGN; break; + case 'f': AlignType = FLOAT_ALIGN; break; + case 'a': AlignType = AGGREGATE_ALIGN; break; + case 's': AlignType = STACK_ALIGN; break; } - uint32_t size = (uint32_t) atoi(++p); - unsigned char abi_align = atoi(getToken(token, ":").c_str()) / 8; - unsigned char pref_align = atoi(getToken(token, ":").c_str()) / 8; - if (pref_align == 0) - pref_align = abi_align; - setAlignment(align_type, abi_align, pref_align, size); + unsigned Size = getInt(Specifier.substr(1)); + Split = Token.split(':'); + unsigned char ABIAlign = getInt(Split.first) / 8; + + Split = Split.second.split(':'); + unsigned char PrefAlign = getInt(Split.first) / 8; + if (PrefAlign == 0) + PrefAlign = ABIAlign; + setAlignment(AlignType, ABIAlign, PrefAlign, Size); break; } + case 'n': // Native integer types. + Specifier = Specifier.substr(1); + do { + if (unsigned Width = getInt(Specifier)) + LegalIntWidths.push_back(Width); + Split = Token.split(':'); + Specifier = Split.first; + Token = Split.second; + } while (!Specifier.empty() || !Token.empty()); + break; + default: break; } } } +/// Default ctor. +/// +/// @note This has to exist, because this is a pass, but it should never be +/// used. +TargetData::TargetData() : ImmutablePass(&ID) { + llvm_report_error("Bad TargetData ctor used. " + "Tool did not specify a TargetData to use?"); +} + TargetData::TargetData(const Module *M) : ImmutablePass(&ID) { init(M->getDataLayout()); @@ -318,37 +323,130 @@ unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType, : Alignments[BestMatchIdx].PrefAlign; } -typedef DenseMap<const StructType*, StructLayout*>LayoutInfoTy; +typedef DenseMap<const StructType*, StructLayout*> LayoutInfoTy; -TargetData::~TargetData() { - if (!LayoutMap) - return; - - // Remove any layouts for this TD. - LayoutInfoTy &TheMap = *static_cast<LayoutInfoTy*>(LayoutMap); - for (LayoutInfoTy::iterator I = TheMap.begin(), E = TheMap.end(); I != E; ) { - I->second->~StructLayout(); - free(I->second); - TheMap.erase(I++); +namespace llvm { + +class StructLayoutMap : public AbstractTypeUser { + LayoutInfoTy LayoutInfo; + + /// refineAbstractType - The callback method invoked when an abstract type is + /// resolved to another type. An object must override this method to update + /// its internal state to reference NewType instead of OldType. + /// + virtual void refineAbstractType(const DerivedType *OldTy, + const Type *) { + const StructType *STy = dyn_cast<const StructType>(OldTy); + if (!STy) { + OldTy->removeAbstractTypeUser(this); + return; + } + + StructLayout *SL = LayoutInfo[STy]; + if (SL) { + SL->~StructLayout(); + free(SL); + LayoutInfo[STy] = NULL; + } + + OldTy->removeAbstractTypeUser(this); } - - delete static_cast<LayoutInfoTy*>(LayoutMap); + + /// typeBecameConcrete - The other case which AbstractTypeUsers must be aware + /// of is when a type makes the transition from being abstract (where it has + /// clients on its AbstractTypeUsers list) to concrete (where it does not). + /// This method notifies ATU's when this occurs for a type. + /// + virtual void typeBecameConcrete(const DerivedType *AbsTy) { + const StructType *STy = dyn_cast<const StructType>(AbsTy); + if (!STy) { + AbsTy->removeAbstractTypeUser(this); + return; + } + + StructLayout *SL = LayoutInfo[STy]; + if (SL) { + SL->~StructLayout(); + free(SL); + LayoutInfo[STy] = NULL; + } + + AbsTy->removeAbstractTypeUser(this); + } + + bool insert(const Type *Ty) { + if (Ty->isAbstract()) + Ty->addAbstractTypeUser(this); + return true; + } + +public: + virtual ~StructLayoutMap() { + // Remove any layouts. + for (LayoutInfoTy::iterator + I = LayoutInfo.begin(), E = LayoutInfo.end(); I != E; ++I) + if (StructLayout *SL = I->second) { + SL->~StructLayout(); + free(SL); + } + } + + inline LayoutInfoTy::iterator begin() { + return LayoutInfo.begin(); + } + inline LayoutInfoTy::iterator end() { + return LayoutInfo.end(); + } + inline LayoutInfoTy::const_iterator begin() const { + return LayoutInfo.begin(); + } + inline LayoutInfoTy::const_iterator end() const { + return LayoutInfo.end(); + } + + LayoutInfoTy::iterator find(const StructType *&Val) { + return LayoutInfo.find(Val); + } + LayoutInfoTy::const_iterator find(const StructType *&Val) const { + return LayoutInfo.find(Val); + } + + bool erase(const StructType *&Val) { + return LayoutInfo.erase(Val); + } + bool erase(LayoutInfoTy::iterator I) { + return LayoutInfo.erase(I); + } + + StructLayout *&operator[](const Type *Key) { + const StructType *STy = dyn_cast<const StructType>(Key); + assert(STy && "Trying to access the struct layout map with a non-struct!"); + insert(STy); + return LayoutInfo[STy]; + } + + // for debugging... + virtual void dump() const {} +}; + +} // end namespace llvm + +TargetData::~TargetData() { + delete LayoutMap; } const StructLayout *TargetData::getStructLayout(const StructType *Ty) const { if (!LayoutMap) - LayoutMap = static_cast<void*>(new LayoutInfoTy()); - - LayoutInfoTy &TheMap = *static_cast<LayoutInfoTy*>(LayoutMap); + LayoutMap = new StructLayoutMap(); - StructLayout *&SL = TheMap[Ty]; + StructLayout *&SL = (*LayoutMap)[Ty]; if (SL) return SL; // Otherwise, create the struct layout. Because it is variable length, we // malloc it, then use placement new. int NumElts = Ty->getNumElements(); StructLayout *L = - (StructLayout *)malloc(sizeof(StructLayout)+(NumElts-1)*sizeof(uint64_t)); + (StructLayout *)malloc(sizeof(StructLayout)+(NumElts-1) * sizeof(uint64_t)); // Set SL before calling StructLayout's ctor. The ctor could cause other // entries to be added to TheMap, invalidating our reference. @@ -365,31 +463,35 @@ const StructLayout *TargetData::getStructLayout(const StructType *Ty) const { void TargetData::InvalidateStructLayoutInfo(const StructType *Ty) const { if (!LayoutMap) return; // No cache. - LayoutInfoTy* LayoutInfo = static_cast<LayoutInfoTy*>(LayoutMap); - LayoutInfoTy::iterator I = LayoutInfo->find(Ty); - if (I == LayoutInfo->end()) return; + DenseMap<const StructType*, StructLayout*>::iterator I = LayoutMap->find(Ty); + if (I == LayoutMap->end()) return; I->second->~StructLayout(); free(I->second); - LayoutInfo->erase(I); + LayoutMap->erase(I); } std::string TargetData::getStringRepresentation() const { - std::string repr; - repr.append(LittleEndian ? "e" : "E"); - repr.append("-p:").append(itostr((int64_t) (PointerMemSize * 8))). - append(":").append(itostr((int64_t) (PointerABIAlign * 8))). - append(":").append(itostr((int64_t) (PointerPrefAlign * 8))); - for (align_const_iterator I = Alignments.begin(); - I != Alignments.end(); - ++I) { - repr.append("-").append(1, (char) I->AlignType). - append(utostr((int64_t) I->TypeBitWidth)). - append(":").append(utostr((uint64_t) (I->ABIAlign * 8))). - append(":").append(utostr((uint64_t) (I->PrefAlign * 8))); + std::string Result; + raw_string_ostream OS(Result); + + OS << (LittleEndian ? "e" : "E") + << "-p:" << PointerMemSize*8 << ':' << PointerABIAlign*8 + << ':' << PointerPrefAlign*8; + for (unsigned i = 0, e = Alignments.size(); i != e; ++i) { + const TargetAlignElem &AI = Alignments[i]; + OS << '-' << (char)AI.AlignType << AI.TypeBitWidth << ':' + << AI.ABIAlign*8 << ':' << AI.PrefAlign*8; + } + + if (!LegalIntWidths.empty()) { + OS << "-n" << (unsigned)LegalIntWidths[0]; + + for (unsigned i = 1, e = LegalIntWidths.size(); i != e; ++i) + OS << ':' << (unsigned)LegalIntWidths[i]; } - return repr; + return OS.str(); } diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index c1aab99..f887523 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -24,6 +24,7 @@ #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Mangler.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" @@ -151,7 +152,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV, // relocation, then we may have to drop this into a wriable data section // even though it is marked const. switch (C->getRelocationInfo()) { - default: llvm_unreachable("unknown relocation info kind"); + default: assert(0 && "unknown relocation info kind"); case Constant::NoRelocation: // If initializer is a null-terminated string, put it in a "cstring" // section of the right width. @@ -219,7 +220,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV, return SectionKind::getDataNoRel(); switch (C->getRelocationInfo()) { - default: llvm_unreachable("unknown relocation info kind"); + default: assert(0 && "unknown relocation info kind"); case Constant::NoRelocation: return SectionKind::getDataNoRel(); case Constant::LocalRelocation: @@ -671,7 +672,7 @@ TargetLoweringObjectFileMachO::~TargetLoweringObjectFileMachO() { const MCSectionMachO *TargetLoweringObjectFileMachO:: -getMachOSection(const StringRef &Segment, const StringRef &Section, +getMachOSection(StringRef Segment, StringRef Section, unsigned TypeAndAttributes, unsigned Reserved2, SectionKind Kind) const { // We unique sections by their segment/section pair. The returned section diff --git a/lib/Target/TargetSubtarget.cpp b/lib/Target/TargetSubtarget.cpp index 95c92ca..edb76f9 100644 --- a/lib/Target/TargetSubtarget.cpp +++ b/lib/Target/TargetSubtarget.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetSubtarget.h" +#include "llvm/ADT/SmallVector.h" using namespace llvm; //--------------------------------------------------------------------------- @@ -20,3 +21,13 @@ using namespace llvm; TargetSubtarget::TargetSubtarget() {} TargetSubtarget::~TargetSubtarget() {} + +bool TargetSubtarget::enablePostRAScheduler( + CodeGenOpt::Level OptLevel, + AntiDepBreakMode& Mode, + RegClassVector& CriticalPathRCs) const { + Mode = ANTIDEP_NONE; + CriticalPathRCs.clear(); + return false; +} + diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp index ae8e6d3..b88063f 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp @@ -651,7 +651,7 @@ void X86AsmPrinter::printMachineInstruction(const MachineInstr *MI) { printInstructionThroughMCStreamer(MI); - if (VerboseAsm && !MI->getDebugLoc().isUnknown()) + if (VerboseAsm) EmitComments(*MI); O << '\n'; diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp index 821cca4..be9f4b2 100644 --- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp +++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp @@ -21,6 +21,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/Mangler.h" #include "llvm/ADT/SmallString.h" @@ -405,7 +406,6 @@ void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { printLabel(MI); return; case TargetInstrInfo::INLINEASM: - O << '\t'; printInlineAsm(MI); return; case TargetInstrInfo::IMPLICIT_DEF: diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index a0bded3..4497931 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -82,7 +82,7 @@ namespace { void emitPCRelativeBlockAddress(MachineBasicBlock *MBB); void emitGlobalAddress(GlobalValue *GV, unsigned Reloc, intptr_t Disp = 0, intptr_t PCAdj = 0, - bool NeedStub = false, bool Indirect = false); + bool Indirect = false); void emitExternalSymbolAddress(const char *ES, unsigned Reloc); void emitConstPoolAddress(unsigned CPI, unsigned Reloc, intptr_t Disp = 0, intptr_t PCAdj = 0); @@ -176,7 +176,6 @@ template<class CodeEmitter> void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc, intptr_t Disp /* = 0 */, intptr_t PCAdj /* = 0 */, - bool NeedStub /* = false */, bool Indirect /* = false */) { intptr_t RelocCST = Disp; if (Reloc == X86::reloc_picrel_word) @@ -185,9 +184,9 @@ void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc, RelocCST = PCAdj; MachineRelocation MR = Indirect ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc, - GV, RelocCST, NeedStub) + GV, RelocCST, false) : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, - GV, RelocCST, NeedStub); + GV, RelocCST, false); MCE.addRelocation(MR); // The relocated value will be added to the displacement if (Reloc == X86::reloc_absolute_dword) @@ -333,10 +332,9 @@ void Emitter<CodeEmitter>::emitDisplacementField(const MachineOperand *RelocOp, // do it, otherwise fallback to absolute (this is determined by IsPCRel). // 89 05 00 00 00 00 mov %eax,0(%rip) # PC-relative // 89 04 25 00 00 00 00 mov %eax,0x0 # Absolute - bool NeedStub = isa<Function>(RelocOp->getGlobal()); bool Indirect = gvNeedsNonLazyPtr(*RelocOp, TM); emitGlobalAddress(RelocOp->getGlobal(), RelocType, RelocOp->getOffset(), - Adj, NeedStub, Indirect); + Adj, Indirect); } else if (RelocOp->isSymbol()) { emitExternalSymbolAddress(RelocOp->getSymbolName(), RelocType); } else if (RelocOp->isCPI()) { @@ -633,14 +631,8 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, } if (MO.isGlobal()) { - // Assume undefined functions may be outside the Small codespace. - bool NeedStub = - (Is64BitMode && - (TM.getCodeModel() == CodeModel::Large || - TM.getSubtarget<X86Subtarget>().isTargetDarwin())) || - Opcode == X86::TAILJMPd; emitGlobalAddress(MO.getGlobal(), X86::reloc_pcrel_word, - MO.getOffset(), 0, NeedStub); + MO.getOffset(), 0); break; } @@ -681,10 +673,9 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, if (Opcode == X86::MOV64ri) rt = X86::reloc_absolute_dword; // FIXME: add X86II flag? if (MO1.isGlobal()) { - bool NeedStub = isa<Function>(MO1.getGlobal()); bool Indirect = gvNeedsNonLazyPtr(MO1, TM); emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0, - NeedStub, Indirect); + Indirect); } else if (MO1.isSymbol()) emitExternalSymbolAddress(MO1.getSymbolName(), rt); else if (MO1.isCPI()) @@ -790,10 +781,9 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, if (Opcode == X86::MOV64ri32) rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag? if (MO1.isGlobal()) { - bool NeedStub = isa<Function>(MO1.getGlobal()); bool Indirect = gvNeedsNonLazyPtr(MO1, TM); emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0, - NeedStub, Indirect); + Indirect); } else if (MO1.isSymbol()) emitExternalSymbolAddress(MO1.getSymbolName(), rt); else if (MO1.isCPI()) @@ -831,10 +821,9 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, if (Opcode == X86::MOV64mi32) rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag? if (MO.isGlobal()) { - bool NeedStub = isa<Function>(MO.getGlobal()); bool Indirect = gvNeedsNonLazyPtr(MO, TM); emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0, - NeedStub, Indirect); + Indirect); } else if (MO.isSymbol()) emitExternalSymbolAddress(MO.getSymbolName(), rt); else if (MO.isCPI()) diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 3401df0..431c120 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -1493,7 +1493,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) { EVT ResVT = RVLocs[0].getValVT(); unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64; unsigned MemSize = ResVT.getSizeInBits()/8; - int FI = MFI.CreateStackObject(MemSize, MemSize); + int FI = MFI.CreateStackObject(MemSize, MemSize, false); addFrameReference(BuildMI(MBB, DL, TII.get(Opc)), FI).addReg(ResultReg); DstRC = ResVT == MVT::f32 ? X86::FR32RegisterClass : X86::FR64RegisterClass; diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 122f515..6a3577a 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -12,6 +12,15 @@ // //===----------------------------------------------------------------------===// +// Force NDEBUG on in any optimized build on Darwin. +// +// FIXME: This is a huge hack, to work around ridiculously awful compile times +// on this file with gcc-4.2 on Darwin, in Release mode. +#if (!defined(__llvm__) && defined(__APPLE__) && \ + defined(__OPTIMIZE__) && !defined(NDEBUG)) +#define NDEBUG +#endif + #define DEBUG_TYPE "x86-isel" #include "X86.h" #include "X86InstrBuilder.h" @@ -661,7 +670,6 @@ void X86DAGToDAGISel::InstructionSelect() { const Function *F = MF->getFunction(); OptForSize = F->hasFnAttr(Attribute::OptimizeForSize); - DEBUG(BB->dump()); if (OptLevel != CodeGenOpt::None) PreprocessForRMW(); @@ -1950,14 +1958,12 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { 0); // We just did a 32-bit clear, insert it into a 64-bit register to // clear the whole 64-bit reg. - SDValue Undef = - SDValue(CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF, - dl, MVT::i64), 0); + SDValue Zero = CurDAG->getTargetConstant(0, MVT::i64); SDValue SubRegNo = CurDAG->getTargetConstant(X86::SUBREG_32BIT, MVT::i32); ClrNode = - SDValue(CurDAG->getMachineNode(TargetInstrInfo::INSERT_SUBREG, dl, - MVT::i64, Undef, ClrNode, SubRegNo), + SDValue(CurDAG->getMachineNode(TargetInstrInfo::SUBREG_TO_REG, dl, + MVT::i64, Zero, ClrNode, SubRegNo), 0); } else { ClrNode = SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 86ec9f2..6018cf5 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1087,6 +1087,17 @@ unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const { #include "X86GenCallingConv.inc" +bool +X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<EVT> &OutTys, + const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags, + SelectionDAG &DAG) { + SmallVector<CCValAssign, 16> RVLocs; + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + RVLocs, *DAG.getContext()); + return CCInfo.CheckReturn(OutTys, ArgsFlags, RetCC_X86); +} + SDValue X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -1370,7 +1381,7 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, // In case of tail call optimization mark all arguments mutable. Since they // could be overwritten by lowering of arguments in case of a tail call. int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, - VA.getLocMemOffset(), isImmutable); + VA.getLocMemOffset(), isImmutable, false); SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); if (Flags.isByVal()) return FIN; @@ -1499,7 +1510,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { if (Is64Bit || CallConv != CallingConv::X86_FastCall) { - VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); + VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize, true, false); } if (Is64Bit) { unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0; @@ -1550,7 +1561,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, VarArgsGPOffset = NumIntRegs * 8; VarArgsFPOffset = TotalNumIntRegs * 8 + NumXMMRegs * 16; RegSaveFrameIndex = MFI->CreateStackObject(TotalNumIntRegs * 8 + - TotalNumXMMRegs * 16, 16); + TotalNumXMMRegs * 16, 16, + false); // Store the integer parameter registers. SmallVector<SDValue, 8> MemOps; @@ -1671,7 +1683,8 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF, // Calculate the new stack slot for the return address. int SlotSize = Is64Bit ? 8 : 4; int NewReturnAddrFI = - MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize); + MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, + true, false); EVT VT = Is64Bit ? MVT::i64 : MVT::i32; SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT); Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx, @@ -1884,7 +1897,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Create frame index. int32_t Offset = VA.getLocMemOffset()+FPDiff; uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8; - FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset); + FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true, false); FIN = DAG.getFrameIndex(FI, getPointerTy()); if (Flags.isByVal()) { @@ -1924,9 +1937,19 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, FPDiff, dl); } - // If the callee is a GlobalAddress node (quite common, every direct call is) - // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. - if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + bool WasGlobalOrExternal = false; + if (getTargetMachine().getCodeModel() == CodeModel::Large) { + assert(Is64Bit && "Large code model is only legal in 64-bit mode."); + // In the 64-bit large code model, we have to make all calls + // through a register, since the call instruction's 32-bit + // pc-relative offset may not be large enough to hold the whole + // address. + } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + WasGlobalOrExternal = true; + // If the callee is a GlobalAddress node (quite common, every direct call + // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack + // it. + // We should use extra load for direct calls to dllimported functions in // non-JIT mode. GlobalValue *GV = G->getGlobal(); @@ -1954,6 +1977,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, G->getOffset(), OpFlags); } } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { + WasGlobalOrExternal = true; unsigned char OpFlags = 0; // On ELF targets, in either X86-64 or X86-32 mode, direct calls to external @@ -1971,7 +1995,9 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(), OpFlags); - } else if (isTailCall) { + } + + if (isTailCall && !WasGlobalOrExternal) { unsigned Opc = Is64Bit ? X86::R11 : X86::EAX; Chain = DAG.getCopyToReg(Chain, dl, @@ -2169,7 +2195,8 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { if (ReturnAddrIndex == 0) { // Set up a frame object for the return address. uint64_t SlotSize = TD->getPointerSize(); - ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize); + ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize, + true, false); FuncInfo->setRAIndex(ReturnAddrIndex); } @@ -2517,6 +2544,21 @@ bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) { isUndefOrEqual(N->getMaskElt(3), 3); } +/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form +/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, +/// <2, 3, 2, 3> +bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) { + unsigned NumElems = N->getValueType(0).getVectorNumElements(); + + if (NumElems != 4) + return false; + + return isUndefOrEqual(N->getMaskElt(0), 2) && + isUndefOrEqual(N->getMaskElt(1), 3) && + isUndefOrEqual(N->getMaskElt(2), 2) && + isUndefOrEqual(N->getMaskElt(3), 3); +} + /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. bool X86::isMOVLPMask(ShuffleVectorSDNode *N) { @@ -2536,10 +2578,9 @@ bool X86::isMOVLPMask(ShuffleVectorSDNode *N) { return true; } -/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand -/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} -/// and MOVLHPS. -bool X86::isMOVHPMask(ShuffleVectorSDNode *N) { +/// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to MOVLHPS. +bool X86::isMOVLHPSMask(ShuffleVectorSDNode *N) { unsigned NumElems = N->getValueType(0).getVectorNumElements(); if (NumElems != 2 && NumElems != 4) @@ -2556,21 +2597,6 @@ bool X86::isMOVHPMask(ShuffleVectorSDNode *N) { return true; } -/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form -/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, -/// <2, 3, 2, 3> -bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) { - unsigned NumElems = N->getValueType(0).getVectorNumElements(); - - if (NumElems != 4) - return false; - - return isUndefOrEqual(N->getMaskElt(0), 2) && - isUndefOrEqual(N->getMaskElt(1), 3) && - isUndefOrEqual(N->getMaskElt(2), 2) && - isUndefOrEqual(N->getMaskElt(3), 3); -} - /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKL. static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT, @@ -4264,7 +4290,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { if (!isMMX && (X86::isMOVSHDUPMask(SVOp) || X86::isMOVSLDUPMask(SVOp) || X86::isMOVHLPSMask(SVOp) || - X86::isMOVHPMask(SVOp) || + X86::isMOVLHPSMask(SVOp) || X86::isMOVLPMask(SVOp))) return Op; @@ -4961,7 +4987,7 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); unsigned Size = SrcVT.getSizeInBits()/8; MachineFunction &MF = DAG.getMachineFunction(); - int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); + int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), StackSlot, @@ -4995,7 +5021,7 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, // shouldn't be necessary except that RFP cannot be live across // multiple blocks. When stackifier is fixed, they can be uncoupled. MachineFunction &MF = DAG.getMachineFunction(); - int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); + int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); Tys = DAG.getVTList(MVT::Other); SmallVector<SDValue, 8> Ops; @@ -5205,7 +5231,7 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) { // stack slot. MachineFunction &MF = DAG.getMachineFunction(); unsigned MemSize = DstTy.getSizeInBits()/8; - int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); + int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); unsigned Opc; @@ -5228,7 +5254,7 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) { }; Value = DAG.getNode(X86ISD::FLD, dl, Tys, Ops, 3); Chain = Value.getValue(1); - SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); + SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false); StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); } @@ -6752,7 +6778,7 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); // Save FP Control Word to stack slot - int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment); + int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); SDValue Chain = DAG.getNode(X86ISD::FNSTCW16m, dl, MVT::Other, @@ -7977,7 +8003,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Change the floating point control register to use "round towards zero" // mode when truncating to an integer value. MachineFunction *F = BB->getParent(); - int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); + int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2, false); addFrameReference(BuildMI(BB, DL, TII->get(X86::FNSTCW16m)), CWFrameIdx); // Load the old value of the high byte of the control word... @@ -9585,14 +9611,14 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, } // GCC allows "st(0)" to be called just plain "st". - if (StringsEqualNoCase("{st}", Constraint)) { + if (StringRef("{st}").equals_lower(Constraint)) { Res.first = X86::ST0; Res.second = X86::RFP80RegisterClass; return Res; } // flags -> EFLAGS - if (StringsEqualNoCase("{flags}", Constraint)) { + if (StringRef("{flags}").equals_lower(Constraint)) { Res.first = X86::EFLAGS; Res.second = X86::CCRRegisterClass; return Res; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 7b59b81..7b4ab62 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -286,7 +286,7 @@ namespace llvm { /// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for MOVHP{S|D}. /// as well as MOVLHPS. - bool isMOVHPMask(ShuffleVectorSDNode *N); + bool isMOVLHPSMask(ShuffleVectorSDNode *N); /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKL. @@ -699,6 +699,12 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, DebugLoc dl, SelectionDAG &DAG); + virtual bool + CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<EVT> &OutTys, + const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags, + SelectionDAG &DAG); + void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG, unsigned NewOp); diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 3edced7..a01534b 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -309,7 +309,7 @@ def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src), [(set GR64:$dst, i64immSExt32:$src)]>; } -let canFoldAsLoad = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "mov{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (load addr:$src))]>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 87bc10d..1ddceb1 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -26,11 +26,15 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" #include "llvm/MC/MCAsmInfo.h" + +#include <limits> + using namespace llvm; static cl::opt<bool> @@ -707,9 +711,23 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI, } } -unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const { - switch (MI->getOpcode()) { +/// isFrameOperand - Return true and the FrameIndex if the specified +/// operand and follow operands form a reference to the stack frame. +bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op, + int &FrameIndex) const { + if (MI->getOperand(Op).isFI() && MI->getOperand(Op+1).isImm() && + MI->getOperand(Op+2).isReg() && MI->getOperand(Op+3).isImm() && + MI->getOperand(Op+1).getImm() == 1 && + MI->getOperand(Op+2).getReg() == 0 && + MI->getOperand(Op+3).getImm() == 0) { + FrameIndex = MI->getOperand(Op).getIndex(); + return true; + } + return false; +} + +static bool isFrameLoadOpcode(int Opcode) { + switch (Opcode) { default: break; case X86::MOV8rm: case X86::MOV16rm: @@ -723,22 +741,14 @@ unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, case X86::MOVDQArm: case X86::MMX_MOVD64rm: case X86::MMX_MOVQ64rm: - if (MI->getOperand(1).isFI() && MI->getOperand(2).isImm() && - MI->getOperand(3).isReg() && MI->getOperand(4).isImm() && - MI->getOperand(2).getImm() == 1 && - MI->getOperand(3).getReg() == 0 && - MI->getOperand(4).getImm() == 0) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); - } + return true; break; } - return 0; + return false; } -unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const { - switch (MI->getOpcode()) { +static bool isFrameStoreOpcode(int Opcode) { + switch (Opcode) { default: break; case X86::MOV8mr: case X86::MOV16mr: @@ -753,19 +763,83 @@ unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI, case X86::MMX_MOVD64mr: case X86::MMX_MOVQ64mr: case X86::MMX_MOVNTQmr: - if (MI->getOperand(0).isFI() && MI->getOperand(1).isImm() && - MI->getOperand(2).isReg() && MI->getOperand(3).isImm() && - MI->getOperand(1).getImm() == 1 && - MI->getOperand(2).getReg() == 0 && - MI->getOperand(3).getImm() == 0) { - FrameIndex = MI->getOperand(0).getIndex(); + return true; + } + return false; +} + +unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + if (isFrameLoadOpcode(MI->getOpcode())) + if (isFrameOperand(MI, 1, FrameIndex)) + return MI->getOperand(0).getReg(); + return 0; +} + +unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const { + if (isFrameLoadOpcode(MI->getOpcode())) { + unsigned Reg; + if ((Reg = isLoadFromStackSlot(MI, FrameIndex))) + return Reg; + // Check for post-frame index elimination operations + return hasLoadFromStackSlot(MI, FrameIndex); + } + return 0; +} + +bool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), + oe = MI->memoperands_end(); + o != oe; + ++o) { + if ((*o)->isLoad() && (*o)->getValue()) + if (const FixedStackPseudoSourceValue *Value = + dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { + FrameIndex = Value->getFrameIndex(); + return true; + } + } + return false; +} + +unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + if (isFrameStoreOpcode(MI->getOpcode())) + if (isFrameOperand(MI, 0, FrameIndex)) return MI->getOperand(X86AddrNumOperands).getReg(); - } - break; + return 0; +} + +unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const { + if (isFrameStoreOpcode(MI->getOpcode())) { + unsigned Reg; + if ((Reg = isStoreToStackSlot(MI, FrameIndex))) + return Reg; + // Check for post-frame index elimination operations + return hasStoreToStackSlot(MI, FrameIndex); } return 0; } +bool X86InstrInfo::hasStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), + oe = MI->memoperands_end(); + o != oe; + ++o) { + if ((*o)->isStore() && (*o)->getValue()) + if (const FixedStackPseudoSourceValue *Value = + dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { + FrameIndex = Value->getFrameIndex(); + return true; + } + } + return false; +} + /// regIsPICBase - Return true if register is PIC base (i.e.g defined by /// X86::MOVPC32r. static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) { @@ -794,10 +868,14 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, case X86::MOVSSrm: case X86::MOVSDrm: case X86::MOVAPSrm: + case X86::MOVUPSrm: + case X86::MOVUPSrm_Int: case X86::MOVAPDrm: case X86::MOVDQArm: case X86::MMX_MOVD64rm: - case X86::MMX_MOVQ64rm: { + case X86::MMX_MOVQ64rm: + case X86::FsMOVAPSrm: + case X86::FsMOVAPDrm: { // Loads from constant pools are trivially rematerializable. if (MI->getOperand(1).isReg() && MI->getOperand(2).isImm() && @@ -917,12 +995,13 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SubIdx, - const MachineInstr *Orig) const { + const MachineInstr *Orig, + const TargetRegisterInfo *TRI) const { DebugLoc DL = DebugLoc::getUnknownLoc(); if (I != MBB.end()) DL = I->getDebugLoc(); if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) { - DestReg = RI.getSubReg(DestReg, SubIdx); + DestReg = TRI->getSubReg(DestReg, SubIdx); SubIdx = 0; } @@ -1891,8 +1970,7 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl<MachineInstr*> &NewMIs) const { - bool isAligned = (RI.getStackAlignment() >= 16) || - RI.needsStackRealignment(MF); + bool isAligned = (*MMOBegin)->getAlignment() >= 16; unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); DebugLoc DL = DebugLoc::getUnknownLoc(); MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); @@ -1985,8 +2063,7 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl<MachineInstr*> &NewMIs) const { - bool isAligned = (RI.getStackAlignment() >= 16) || - RI.needsStackRealignment(MF); + bool isAligned = (*MMOBegin)->getAlignment() >= 16; unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); DebugLoc DL = DebugLoc::getUnknownLoc(); MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); @@ -2170,7 +2247,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // If table selected... if (OpcodeTablePtr) { // Find the Opcode to fuse - DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I = + DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = OpcodeTablePtr->find((unsigned*)MI->getOpcode()); if (I != OpcodeTablePtr->end()) { unsigned Opcode = I->second.first; @@ -2402,7 +2479,7 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, if (OpcodeTablePtr) { // Find the Opcode to fuse - DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I = + DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = OpcodeTablePtr->find((unsigned*)Opc); if (I != OpcodeTablePtr->end()) return true; @@ -2413,7 +2490,7 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, unsigned Reg, bool UnfoldLoad, bool UnfoldStore, SmallVectorImpl<MachineInstr*> &NewMIs) const { - DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I = + DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = MemOp2RegOpTable.find((unsigned*)MI->getOpcode()); if (I == MemOp2RegOpTable.end()) return false; @@ -2530,7 +2607,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, if (!N->isMachineOpcode()) return false; - DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I = + DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = MemOp2RegOpTable.find((unsigned*)N->getMachineOpcode()); if (I == MemOp2RegOpTable.end()) return false; @@ -2563,17 +2640,16 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, MachineFunction &MF = DAG.getMachineFunction(); if (FoldedLoad) { EVT VT = *RC->vt_begin(); - bool isAligned = (RI.getStackAlignment() >= 16) || - RI.needsStackRealignment(MF); + std::pair<MachineInstr::mmo_iterator, + MachineInstr::mmo_iterator> MMOs = + MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(), + cast<MachineSDNode>(N)->memoperands_end()); + bool isAligned = (*MMOs.first)->getAlignment() >= 16; Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl, VT, MVT::Other, &AddrOps[0], AddrOps.size()); NewNodes.push_back(Load); // Preserve memory reference information. - std::pair<MachineInstr::mmo_iterator, - MachineInstr::mmo_iterator> MMOs = - MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(), - cast<MachineSDNode>(N)->memoperands_end()); cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second); } @@ -2601,8 +2677,11 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, AddrOps.pop_back(); AddrOps.push_back(SDValue(NewNode, 0)); AddrOps.push_back(Chain); - bool isAligned = (RI.getStackAlignment() >= 16) || - RI.needsStackRealignment(MF); + std::pair<MachineInstr::mmo_iterator, + MachineInstr::mmo_iterator> MMOs = + MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(), + cast<MachineSDNode>(N)->memoperands_end()); + bool isAligned = (*MMOs.first)->getAlignment() >= 16; SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC, isAligned, TM), dl, MVT::Other, @@ -2610,10 +2689,6 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, NewNodes.push_back(Store); // Preserve memory reference information. - std::pair<MachineInstr::mmo_iterator, - MachineInstr::mmo_iterator> MMOs = - MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(), - cast<MachineSDNode>(N)->memoperands_end()); cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second); } @@ -2623,7 +2698,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, bool UnfoldLoad, bool UnfoldStore, unsigned *LoadRegIndex) const { - DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I = + DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = MemOp2RegOpTable.find((unsigned*)Opc); if (I == MemOp2RegOpTable.end()) return 0; diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 6eb07d5..c6daa25 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -449,13 +449,41 @@ public: unsigned &SrcSubIdx, unsigned &DstSubIdx) const; unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; + /// isLoadFromStackSlotPostFE - Check for post-frame ptr elimination + /// stack locations as well. This uses a heuristic so it isn't + /// reliable for correctness. + unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const; + + /// hasLoadFromStackSlot - If the specified machine instruction has + /// a load from a stack slot, return true along with the FrameIndex + /// of the loaded stack slot. If not, return false. Unlike + /// isLoadFromStackSlot, this returns true for any instructions that + /// loads from the stack. This is a hint only and may not catch all + /// cases. + bool hasLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; + unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; + /// isStoreToStackSlotPostFE - Check for post-frame ptr elimination + /// stack locations as well. This uses a heuristic so it isn't + /// reliable for correctness. + unsigned isStoreToStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const; + + /// hasStoreToStackSlot - If the specified machine instruction has a + /// store to a stack slot, return true along with the FrameIndex of + /// the loaded stack slot. If not, return false. Unlike + /// isStoreToStackSlot, this returns true for any instructions that + /// loads from the stack. This is a hint only and may not catch all + /// cases. + bool hasStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; bool isReallyTriviallyReMaterializable(const MachineInstr *MI, AliasAnalysis *AA) const; void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SubIdx, - const MachineInstr *Orig) const; + const MachineInstr *Orig, + const TargetRegisterInfo *TRI) const; /// convertToThreeAddress - This method must be implemented by targets that /// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target @@ -610,6 +638,11 @@ private: unsigned OpNum, const SmallVectorImpl<MachineOperand> &MOs, unsigned Size, unsigned Alignment) const; + + /// isFrameOperand - Return true and the FrameIndex if the specified + /// operand and follow operands form a reference to the stack frame. + bool isFrameOperand(const MachineInstr *MI, unsigned int Op, + int &FrameIndex) const; }; } // End llvm namespace diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 9b82e1e..a79f262 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -543,7 +543,7 @@ let neverHasSideEffects = 1 in { } // Trap -def INT3 : I<0xcc, RawFrm, (outs), (ins), "int 3", []>; +def INT3 : I<0xcc, RawFrm, (outs), (ins), "int\t3", []>; def INT : I<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", []>; // PIC base construction. This expands to code that looks like this: diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index be242a0..ee63d56 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -225,9 +225,9 @@ def movhlps_undef : PatFrag<(ops node:$lhs, node:$rhs), return X86::isMOVHLPS_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); }]>; -def movhp : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVHPMask(cast<ShuffleVectorSDNode>(N)); +def movlhps : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVLHPSMask(cast<ShuffleVectorSDNode>(N)); }]>; def movlp : PatFrag<(ops node:$lhs, node:$rhs), @@ -497,7 +497,7 @@ def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), // Alias instruction to load FR32 from f128mem using movaps. Upper bits are // disregarded. -let canFoldAsLoad = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), "movaps\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>; @@ -706,7 +706,7 @@ def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), let neverHasSideEffects = 1 in def MOVUPSrr : PSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movups\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in def MOVUPSrm : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "movups\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (loadv4f32 addr:$src))]>; @@ -715,7 +715,7 @@ def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), [(store (v4f32 VR128:$src), addr:$dst)]>; // Intrinsic forms of MOVUPS load and store -let canFoldAsLoad = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "movups\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>; @@ -735,7 +735,7 @@ let Constraints = "$src1 = $dst" in { (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), "movhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (movhp VR128:$src1, + (movlhps VR128:$src1, (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>; } // AddedComplexity } // Constraints = "$src1 = $dst" @@ -760,7 +760,7 @@ def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movlhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4f32 (movhp VR128:$src1, VR128:$src2)))]>; + (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>; def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), @@ -1256,7 +1256,7 @@ def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), // Alias instruction to load FR64 from f128mem using movapd. Upper bits are // disregarded. -let canFoldAsLoad = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), "movapd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>; @@ -1494,7 +1494,7 @@ let Constraints = "$src1 = $dst" in { (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), "movhpd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2f64 (movhp VR128:$src1, + (v2f64 (movlhps VR128:$src1, (scalar_to_vector (loadf64 addr:$src2)))))]>; } // AddedComplexity } // Constraints = "$src1 = $dst" @@ -2085,7 +2085,7 @@ def PSHUFDmi : PDIi8<0x70, MRMSrcMem, [(set VR128:$dst, (v4i32 (pshufd:$src2 (bc_v4i32(memopv2i64 addr:$src1)), (undef))))]>; -} +} // SSE2 with ImmT == Imm8 and XS prefix. def PSHUFHWri : Ii8<0x70, MRMSrcReg, @@ -2874,7 +2874,7 @@ def : Pat<(v16i8 (palign:$src3 VR128:$src1, VR128:$src2)), (PALIGNR128rr VR128:$src2, VR128:$src1, (SHUFFLE_get_palign_imm VR128:$src3))>, Requires<[HasSSSE3]>; -} +} def : Pat<(X86pshufb VR128:$src, VR128:$mask), (PSHUFBrr128 VR128:$src, VR128:$mask)>, Requires<[HasSSSE3]>; @@ -3035,7 +3035,7 @@ def : Pat<(v4i32 (unpckh_undef VR128:$src, (undef))), let AddedComplexity = 20 in { // vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS -def : Pat<(v4i32 (movhp VR128:$src1, VR128:$src2)), +def : Pat<(v4i32 (movlhps VR128:$src1, VR128:$src2)), (MOVLHPSrr VR128:$src1, VR128:$src2)>; // vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS @@ -3051,48 +3051,26 @@ def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))), let AddedComplexity = 20 in { // vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS -// vector_shuffle v1, (load v2) <0, 1, 4, 5> using MOVHPS def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))), (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>; def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))), (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v4f32 (movhp VR128:$src1, (load addr:$src2))), - (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>; -def : Pat<(v2f64 (movhp VR128:$src1, (load addr:$src2))), - (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; - def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))), (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))), (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v4i32 (movhp VR128:$src1, (load addr:$src2))), - (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>; -def : Pat<(v2i64 (movhp VR128:$src1, (load addr:$src2))), - (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; } // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS -// (store (vector_shuffle (load addr), v2, <0, 1, 4, 5>), addr) using MOVHPS def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; -def : Pat<(store (v4f32 (movhp (load addr:$src1), VR128:$src2)), addr:$src1), - (MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; -def : Pat<(store (v2f64 (movhp (load addr:$src1), VR128:$src2)), addr:$src1), - (MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; - def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), addr:$src1), (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; -def : Pat<(store (v4i32 (movhp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), - addr:$src1), - (MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; -def : Pat<(store (v2i64 (movhp (load addr:$src1), VR128:$src2)), addr:$src1), - (MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; - let AddedComplexity = 15 in { // Setting the lowest element in the vector. diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index 62ca47f..0792bdd 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -367,8 +367,9 @@ X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { // Rewrite the call target... so that we don't end up here every time we // execute the call. #if defined (X86_64_JIT) - if (!isStub) - *(intptr_t *)(RetAddr - 0xa) = NewVal; + assert(isStub && + "X86-64 doesn't support rewriting non-stub lazy compilation calls:" + " the call instruction varies too much."); #else *(intptr_t *)RetAddr = (intptr_t)(NewVal-RetAddr-4); #endif diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index c5ff525..f577fcf 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -392,6 +392,11 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(X86::SP); Reserved.set(X86::SPL); + // Set the instruction pointer register and its aliases as reserved. + Reserved.set(X86::RIP); + Reserved.set(X86::EIP); + Reserved.set(X86::IP); + // Set the frame-pointer register and its aliases as reserved if needed. if (hasFP(MF)) { Reserved.set(X86::RBP); @@ -450,12 +455,17 @@ bool X86RegisterInfo::hasFP(const MachineFunction &MF) const { bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); + bool requiresRealignment = + RealignStack && (MFI->getMaxAlignment() > StackAlign); // FIXME: Currently we don't support stack realignment for functions with - // variable-sized allocas - return (RealignStack && - (MFI->getMaxAlignment() > StackAlign && - !MFI->hasVarSizedObjects())); + // variable-sized allocas. + // FIXME: Temporary disable the error - it seems to be too conservative. + if (0 && requiresRealignment && MFI->hasVarSizedObjects()) + llvm_report_error( + "Stack realignment in presense of dynamic allocas is not supported"); + + return (requiresRealignment && !MFI->hasVarSizedObjects()); } bool X86RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const { @@ -610,8 +620,8 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Offset is a 32-bit integer. int Offset = getFrameIndexOffset(MF, FrameIndex) + (int)(MI.getOperand(i + 3).getImm()); - - MI.getOperand(i + 3).ChangeToImmediate(Offset); + + MI.getOperand(i + 3).ChangeToImmediate(Offset); } else { // Offset is symbolic. This is extremely rare. uint64_t Offset = getFrameIndexOffset(MF, FrameIndex) + @@ -647,7 +657,8 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // } // [EBP] MFI->CreateFixedObject(-TailCallReturnAddrDelta, - (-1U*SlotSize)+TailCallReturnAddrDelta); + (-1U*SlotSize)+TailCallReturnAddrDelta, + true, false); } if (hasFP(MF)) { @@ -659,7 +670,8 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, int FrameIdx = MFI->CreateFixedObject(SlotSize, -(int)SlotSize + TFI.getOffsetOfLocalArea() + - TailCallReturnAddrDelta); + TailCallReturnAddrDelta, + true, false); assert(FrameIdx == MFI->getObjectIndexBegin() && "Slot for EBP register must be last in order to be found!"); FrameIdx = 0; @@ -1271,7 +1283,7 @@ unsigned X86RegisterInfo::getRARegister() const { : X86::EIP; // Should have dwarf #8. } -unsigned X86RegisterInfo::getFrameRegister(MachineFunction &MF) const { +unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const { return hasFP(MF) ? FramePtr : StackPtr; } diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index f635707..f281a3c 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -153,7 +153,7 @@ public: // Debug information queries. unsigned getRARegister() const; - unsigned getFrameRegister(MachineFunction &MF) const; + unsigned getFrameRegister(const MachineFunction &MF) const; int getFrameIndexOffset(MachineFunction &MF, int FI) const; void getInitialFrameState(std::vector<MachineMove> &Moves) const; diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 9525f04..b901c14 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -18,8 +18,10 @@ #include "llvm/GlobalValue.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/System/Host.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/SmallVector.h" using namespace llvm; #if defined(_MSC_VER) @@ -257,118 +259,6 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { } } -static const char *GetCurrentX86CPU() { - unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; - if (GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX)) - return "generic"; - unsigned Family = 0; - unsigned Model = 0; - DetectFamilyModel(EAX, Family, Model); - - GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); - bool Em64T = (EDX >> 29) & 0x1; - bool HasSSE3 = (ECX & 0x1); - - union { - unsigned u[3]; - char c[12]; - } text; - - GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1); - if (memcmp(text.c, "GenuineIntel", 12) == 0) { - switch (Family) { - case 3: - return "i386"; - case 4: - return "i486"; - case 5: - switch (Model) { - case 4: return "pentium-mmx"; - default: return "pentium"; - } - case 6: - switch (Model) { - case 1: return "pentiumpro"; - case 3: - case 5: - case 6: return "pentium2"; - case 7: - case 8: - case 10: - case 11: return "pentium3"; - case 9: - case 13: return "pentium-m"; - case 14: return "yonah"; - case 15: - case 22: // Celeron M 540 - return "core2"; - case 23: // 45nm: Penryn , Wolfdale, Yorkfield (XE) - return "penryn"; - default: return "i686"; - } - case 15: { - switch (Model) { - case 3: - case 4: - case 6: // same as 4, but 65nm - return (Em64T) ? "nocona" : "prescott"; - case 26: - return "corei7"; - case 28: - return "atom"; - default: - return (Em64T) ? "x86-64" : "pentium4"; - } - } - - default: - return "generic"; - } - } else if (memcmp(text.c, "AuthenticAMD", 12) == 0) { - // FIXME: this poorly matches the generated SubtargetFeatureKV table. There - // appears to be no way to generate the wide variety of AMD-specific targets - // from the information returned from CPUID. - switch (Family) { - case 4: - return "i486"; - case 5: - switch (Model) { - case 6: - case 7: return "k6"; - case 8: return "k6-2"; - case 9: - case 13: return "k6-3"; - default: return "pentium"; - } - case 6: - switch (Model) { - case 4: return "athlon-tbird"; - case 6: - case 7: - case 8: return "athlon-mp"; - case 10: return "athlon-xp"; - default: return "athlon"; - } - case 15: - if (HasSSE3) { - return "k8-sse3"; - } else { - switch (Model) { - case 1: return "opteron"; - case 5: return "athlon-fx"; // also opteron - default: return "athlon64"; - } - } - case 16: - return "amdfam10"; - default: - return "generic"; - } - } else { - return "generic"; - } -} - X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, bool is64Bit) : PICStyle(PICStyles::None) @@ -395,7 +285,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, // Determine default and user specified characteristics if (!FS.empty()) { // If feature string is not empty, parse features string. - std::string CPU = GetCurrentX86CPU(); + std::string CPU = sys::getHostCPUName(); ParseSubtargetFeatures(FS, CPU); // All X86-64 CPUs also have SSE2, however user might request no SSE via // -mattr, so don't force SSELevel here. @@ -455,3 +345,12 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, if (StackAlignment) stackAlignment = StackAlignment; } + +bool X86Subtarget::enablePostRAScheduler( + CodeGenOpt::Level OptLevel, + TargetSubtarget::AntiDepBreakMode& Mode, + RegClassVector& CriticalPathRCs) const { + Mode = TargetSubtarget::ANTIDEP_CRITICAL; + CriticalPathRCs.clear(); + return OptLevel >= CodeGenOpt::Default; +} diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index e64b854..23f2841 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -166,11 +166,11 @@ public: std::string getDataLayout() const { const char *p; if (is64Bit()) - p = "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128"; + p = "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-n8:16:32:64"; else if (isTargetDarwin()) - p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128"; + p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32"; else - p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32"; + p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32"; return std::string(p); } @@ -219,10 +219,8 @@ public: /// enablePostRAScheduler - X86 target is enabling post-alloc scheduling /// at 'More' optimization level. bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, - TargetSubtarget::AntiDepBreakMode& mode) const { - mode = TargetSubtarget::ANTIDEP_CRITICAL; - return OptLevel >= CodeGenOpt::Default; - } + TargetSubtarget::AntiDepBreakMode& Mode, + RegClassVector& CriticalPathRCs) const; }; } // End llvm namespace diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index a61de1c..0cda8bc 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -22,8 +22,7 @@ #include "llvm/Target/TargetRegistry.h" using namespace llvm; -static const MCAsmInfo *createMCAsmInfo(const Target &T, - const StringRef &TT) { +static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); switch (TheTriple.getOS()) { case Triple::Darwin: @@ -186,14 +185,8 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, } // 64-bit JIT places everything in the same buffer except external functions. - // On Darwin, use small code model but hack the call instruction for - // externals. Elsewhere, do not assume globals are in the lower 4G. - if (Subtarget.is64Bit()) { - if (Subtarget.isTargetDarwin()) - setCodeModel(CodeModel::Small); - else + if (Subtarget.is64Bit()) setCodeModel(CodeModel::Large); - } PM.add(createX86CodeEmitterPass(*this, MCE)); @@ -212,14 +205,8 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, } // 64-bit JIT places everything in the same buffer except external functions. - // On Darwin, use small code model but hack the call instruction for - // externals. Elsewhere, do not assume globals are in the lower 4G. - if (Subtarget.is64Bit()) { - if (Subtarget.isTargetDarwin()) - setCodeModel(CodeModel::Small); - else + if (Subtarget.is64Bit()) setCodeModel(CodeModel::Large); - } PM.add(createX86JITCodeEmitterPass(*this, JCE)); diff --git a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp index bc1bbc3..d7106a0 100644 --- a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp +++ b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp @@ -361,7 +361,7 @@ void XCoreAsmPrinter::printMachineInstruction(const MachineInstr *MI) { return; } printInstruction(MI); - if (VerboseAsm && !MI->getDebugLoc().isUnknown()) + if (VerboseAsm) EmitComments(*MI); O << '\n'; diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp index 860b72f..da2fb04 100644 --- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp +++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp @@ -149,10 +149,7 @@ bool XCoreDAGToDAGISel::SelectADDRcpii(SDValue Op, SDValue Addr, /// InstructionSelect - This callback is invoked by /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. -void XCoreDAGToDAGISel:: -InstructionSelect() { - DEBUG(BB->dump()); - +void XCoreDAGToDAGISel::InstructionSelect() { // Select target instructions for the DAG. SelectRoot(*CurDAG); diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 5ef56c9..16e68fe 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -860,7 +860,8 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, } // Create the frame index object for this incoming parameter... int FI = MFI->CreateFixedObject(ObjSize, - LRSaveSize + VA.getLocMemOffset()); + LRSaveSize + VA.getLocMemOffset(), + true, false); // Create the SelectionDAG nodes corresponding to a load //from this parameter @@ -884,7 +885,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, // address for (unsigned i = array_lengthof(ArgRegs) - 1; i >= FirstVAReg; --i) { // Create a stack slot - int FI = MFI->CreateFixedObject(4, offset); + int FI = MFI->CreateFixedObject(4, offset, true, false); if (i == FirstVAReg) { XFI->setVarArgsFrameIndex(FI); } @@ -905,7 +906,8 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, } else { // This will point to the next argument passed via stack. XFI->setVarArgsFrameIndex( - MFI->CreateFixedObject(4, LRSaveSize + CCInfo.getNextStackOffset())); + MFI->CreateFixedObject(4, LRSaveSize + CCInfo.getNextStackOffset(), + true, false)); } } @@ -916,6 +918,17 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, // Return Value Calling Convention Implementation //===----------------------------------------------------------------------===// +bool XCoreTargetLowering:: +CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<EVT> &OutTys, + const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags, + SelectionDAG &DAG) { + SmallVector<CCValAssign, 16> RVLocs; + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + RVLocs, *DAG.getContext()); + return CCInfo.CheckReturn(OutTys, ArgsFlags, RetCC_XCore); +} + SDValue XCoreTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index ef8555e..10631af 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -159,6 +159,12 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, DebugLoc dl, SelectionDAG &DAG); + + virtual bool + CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<EVT> &OutTys, + const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags, + SelectionDAG &DAG); }; } diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index 68e69a2..4ed4ed4 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -617,7 +617,7 @@ defm EXTSP : FU6_LU6_np<"extsp">; let mayStore = 1 in defm ENTSP : FU6_LU6_np<"entsp">; -let isReturn = 1, isTerminator = 1, mayLoad = 1 in { +let isReturn = 1, isTerminator = 1, mayLoad = 1, isBarrier = 1 in { defm RETSP : FU6_LU6<"retsp", XCoreRetsp>; } } diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp index 136a035..c7c8c7b 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -330,9 +330,10 @@ XCoreRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, int FrameIdx; if (! isVarArg) { // A fixed offset of 0 allows us to save / restore LR using entsp / retsp. - FrameIdx = MFI->CreateFixedObject(RC->getSize(), 0); + FrameIdx = MFI->CreateFixedObject(RC->getSize(), 0, true, false); } else { - FrameIdx = MFI->CreateStackObject(RC->getSize(), RC->getAlignment()); + FrameIdx = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), + false); } XFI->setUsesLR(FrameIdx); XFI->setLRSpillSlot(FrameIdx); @@ -340,13 +341,15 @@ XCoreRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, if (requiresRegisterScavenging(MF)) { // Reserve a slot close to SP or frame pointer. RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment())); + RC->getAlignment(), + false)); } if (hasFP(MF)) { // A callee save register is used to hold the FP. // This needs saving / restoring in the epilogue / prologue. XFI->setFPSpillSlot(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment())); + RC->getAlignment(), + false)); } } @@ -593,7 +596,7 @@ int XCoreRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { return XCoreGenRegisterInfo::getDwarfRegNumFull(RegNum, 0); } -unsigned XCoreRegisterInfo::getFrameRegister(MachineFunction &MF) const { +unsigned XCoreRegisterInfo::getFrameRegister(const MachineFunction &MF) const { bool FP = hasFP(MF); return FP ? XCore::R10 : XCore::SP; diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h index a7df510..8ab1750 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.h +++ b/lib/Target/XCore/XCoreRegisterInfo.h @@ -60,7 +60,7 @@ public: unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, int *Value = NULL, RegScavenger *RS = NULL) const; - + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; @@ -71,7 +71,7 @@ public: // Debug information queries. unsigned getRARegister() const; - unsigned getFrameRegister(MachineFunction &MF) const; + unsigned getFrameRegister(const MachineFunction &MF) const; void getInitialFrameState(std::vector<MachineMove> &Moves) const; //! Return the array of argument passing registers diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp index 75f2055..267f46a 100644 --- a/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -25,7 +25,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, const std::string &TT, : LLVMTargetMachine(T, TT), Subtarget(TT, FS), DataLayout("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-" - "i16:16:32-i32:32:32-i64:32:32"), + "i16:16:32-i32:32:32-i64:32:32-n32"), InstrInfo(), FrameInfo(*this), TLInfo(*this) { diff --git a/lib/Transforms/Hello/CMakeLists.txt b/lib/Transforms/Hello/CMakeLists.txt index b80d15b..917b745 100644 --- a/lib/Transforms/Hello/CMakeLists.txt +++ b/lib/Transforms/Hello/CMakeLists.txt @@ -1,3 +1,3 @@ -add_llvm_library( LLVMHello +add_llvm_loadable_module( LLVMHello Hello.cpp ) diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 234d0ec..442f2fb 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -20,7 +20,6 @@ #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Analysis/ConstantFolding.h" @@ -245,8 +244,7 @@ static bool AnalyzeGlobal(Value *V, GlobalStatus &GS, return false; } -static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx, - LLVMContext &Context) { +static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx) { ConstantInt *CI = dyn_cast<ConstantInt>(Idx); if (!CI) return 0; unsigned IdxV = CI->getZExtValue(); @@ -282,8 +280,7 @@ static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx, /// users of the global, cleaning up the obvious ones. This is largely just a /// quick scan over the use list to clean up the easy and obvious cruft. This /// returns true if it made a change. -static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, - LLVMContext &Context) { +static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) { bool Changed = false; for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;) { User *U = *UI++; @@ -304,11 +301,11 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, Constant *SubInit = 0; if (Init) SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); - Changed |= CleanupConstantGlobalUsers(CE, SubInit, Context); + Changed |= CleanupConstantGlobalUsers(CE, SubInit); } else if (CE->getOpcode() == Instruction::BitCast && isa<PointerType>(CE->getType())) { // Pointer cast, delete any stores and memsets to the global. - Changed |= CleanupConstantGlobalUsers(CE, 0, Context); + Changed |= CleanupConstantGlobalUsers(CE, 0); } if (CE->use_empty()) { @@ -322,11 +319,11 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, Constant *SubInit = 0; if (!isa<ConstantExpr>(GEP->getOperand(0))) { ConstantExpr *CE = - dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP, Context)); + dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP)); if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr) SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); } - Changed |= CleanupConstantGlobalUsers(GEP, SubInit, Context); + Changed |= CleanupConstantGlobalUsers(GEP, SubInit); if (GEP->use_empty()) { GEP->eraseFromParent(); @@ -344,7 +341,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, if (SafeToDestroyConstant(C)) { C->destroyConstant(); // This could have invalidated UI, start over from scratch. - CleanupConstantGlobalUsers(V, Init, Context); + CleanupConstantGlobalUsers(V, Init); return true; } } @@ -469,8 +466,7 @@ static bool GlobalUsersSafeToSRA(GlobalValue *GV) { /// behavior of the program in a more fine-grained way. We have determined that /// this transformation is safe already. We return the first global variable we /// insert so that the caller can reprocess it. -static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD, - LLVMContext &Context) { +static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { // Make sure this global only has simple uses that we can SRA. if (!GlobalUsersSafeToSRA(GV)) return 0; @@ -492,11 +488,9 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD, const StructLayout &Layout = *TD.getStructLayout(STy); for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Constant *In = getAggregateConstantElement(Init, - ConstantInt::get(Type::getInt32Ty(Context), i), - Context); + ConstantInt::get(Type::getInt32Ty(STy->getContext()), i)); assert(In && "Couldn't get element of initializer?"); - GlobalVariable *NGV = new GlobalVariable(Context, - STy->getElementType(i), false, + GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false, GlobalVariable::InternalLinkage, In, GV->getName()+"."+Twine(i), GV->isThreadLocal(), @@ -527,12 +521,10 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD, unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType()); for (unsigned i = 0, e = NumElements; i != e; ++i) { Constant *In = getAggregateConstantElement(Init, - ConstantInt::get(Type::getInt32Ty(Context), i), - Context); + ConstantInt::get(Type::getInt32Ty(Init->getContext()), i)); assert(In && "Couldn't get element of initializer?"); - GlobalVariable *NGV = new GlobalVariable(Context, - STy->getElementType(), false, + GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false, GlobalVariable::InternalLinkage, In, GV->getName()+"."+Twine(i), GV->isThreadLocal(), @@ -554,7 +546,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD, DEBUG(errs() << "PERFORMING GLOBAL SRA ON: " << *GV); - Constant *NullInt = Constant::getNullValue(Type::getInt32Ty(Context)); + Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext())); // Loop over all of the uses of the global, replacing the constantexpr geps, // with smaller constantexpr geps or direct references. @@ -678,8 +670,7 @@ static bool AllUsesOfLoadedValueWillTrapIfNull(GlobalVariable *GV) { return true; } -static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV, - LLVMContext &Context) { +static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { bool Changed = false; for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) { Instruction *I = cast<Instruction>(*UI++); @@ -712,7 +703,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV, } else if (CastInst *CI = dyn_cast<CastInst>(I)) { Changed |= OptimizeAwayTrappingUsesOfValue(CI, ConstantExpr::getCast(CI->getOpcode(), - NewV, CI->getType()), Context); + NewV, CI->getType())); if (CI->use_empty()) { Changed = true; CI->eraseFromParent(); @@ -730,7 +721,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV, if (Idxs.size() == GEPI->getNumOperands()-1) Changed |= OptimizeAwayTrappingUsesOfValue(GEPI, ConstantExpr::getGetElementPtr(NewV, &Idxs[0], - Idxs.size()), Context); + Idxs.size())); if (GEPI->use_empty()) { Changed = true; GEPI->eraseFromParent(); @@ -746,8 +737,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV, /// value stored into it. If there are uses of the loaded value that would trap /// if the loaded value is dynamically null, then we know that they cannot be /// reachable with a null optimize away the load. -static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, - LLVMContext &Context) { +static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) { bool Changed = false; // Keep track of whether we are able to remove all the uses of the global @@ -758,7 +748,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){ User *GlobalUser = *GUI++; if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) { - Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV, Context); + Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV); // If we were able to delete all uses of the loads if (LI->use_empty()) { LI->eraseFromParent(); @@ -789,7 +779,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, // nor is the global. if (AllNonStoreUsesGone) { DEBUG(errs() << " *** GLOBAL NOW DEAD!\n"); - CleanupConstantGlobalUsers(GV, 0, Context); + CleanupConstantGlobalUsers(GV, 0); if (GV->use_empty()) { GV->eraseFromParent(); ++NumDeleted; @@ -801,10 +791,10 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, /// ConstantPropUsersOf - Walk the use list of V, constant folding all of the /// instructions that are foldable. -static void ConstantPropUsersOf(Value *V, LLVMContext &Context) { +static void ConstantPropUsersOf(Value *V) { for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) if (Instruction *I = dyn_cast<Instruction>(*UI++)) - if (Constant *NewC = ConstantFoldInstruction(I, Context)) { + if (Constant *NewC = ConstantFoldInstruction(I)) { I->replaceAllUsesWith(NewC); // Advance UI to the next non-I use to avoid invalidating it! @@ -824,11 +814,10 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, const Type *AllocTy, Value* NElems, - LLVMContext &Context, TargetData* TD) { DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n'); - const Type *IntPtrTy = TD->getIntPtrType(Context); + const Type *IntPtrTy = TD->getIntPtrType(GV->getContext()); // CI has either 0 or 1 bitcast uses (getMallocType() would otherwise have // returned NULL and we would not be here). @@ -883,10 +872,10 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, // If there is a comparison against null, we will insert a global bool to // keep track of whether the global was initialized yet or not. GlobalVariable *InitBool = - new GlobalVariable(Context, Type::getInt1Ty(Context), false, + new GlobalVariable(Type::getInt1Ty(GV->getContext()), false, GlobalValue::InternalLinkage, - ConstantInt::getFalse(Context), GV->getName()+".init", - GV->isThreadLocal()); + ConstantInt::getFalse(GV->getContext()), + GV->getName()+".init", GV->isThreadLocal()); bool InitBoolUsed = false; // Loop over all uses of GV, processing them in turn. @@ -905,8 +894,8 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, switch (ICI->getPredicate()) { default: llvm_unreachable("Unknown ICmp Predicate!"); case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_SLT: - LV = ConstantInt::getFalse(Context); // X < null -> always false + case ICmpInst::ICMP_SLT: // X < null -> always false + LV = ConstantInt::getFalse(GV->getContext()); break; case ICmpInst::ICMP_ULE: case ICmpInst::ICMP_SLE: @@ -928,7 +917,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, } else { StoreInst *SI = cast<StoreInst>(GV->use_back()); // The global is initialized when the store to it occurs. - new StoreInst(ConstantInt::getTrue(Context), InitBool, SI); + new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, SI); SI->eraseFromParent(); } @@ -949,9 +938,9 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, // To further other optimizations, loop over all users of NewGV and try to // constant prop them. This will promote GEP instructions with constant // indices into GEP constant-exprs, which will allow global-opt to hack on it. - ConstantPropUsersOf(NewGV, Context); + ConstantPropUsersOf(NewGV); if (RepValue != NewGV) - ConstantPropUsersOf(RepValue, Context); + ConstantPropUsersOf(RepValue); return NewGV; } @@ -1153,8 +1142,7 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV, static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, - std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite, - LLVMContext &Context) { + std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { std::vector<Value*> &FieldVals = InsertedScalarizedValues[V]; if (FieldNo >= FieldVals.size()) @@ -1172,7 +1160,7 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, // a new Load of the scalarized global. Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo, InsertedScalarizedValues, - PHIsToRewrite, Context), + PHIsToRewrite), LI->getName()+".f"+Twine(FieldNo), LI); } else if (PHINode *PN = dyn_cast<PHINode>(V)) { // PN's type is pointer to struct. Make a new PHI of pointer to struct @@ -1196,16 +1184,14 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, /// the load, rewrite the derived value to use the HeapSRoA'd load. static void RewriteHeapSROALoadUser(Instruction *LoadUser, DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, - std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite, - LLVMContext &Context) { + std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { // If this is a comparison against null, handle it. if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) { assert(isa<ConstantPointerNull>(SCI->getOperand(1))); // If we have a setcc of the loaded pointer, we can use a setcc of any // field. Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0, - InsertedScalarizedValues, PHIsToRewrite, - Context); + InsertedScalarizedValues, PHIsToRewrite); Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr, Constant::getNullValue(NPtr->getType()), @@ -1223,8 +1209,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser, // Load the pointer for this field. unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue(); Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo, - InsertedScalarizedValues, PHIsToRewrite, - Context); + InsertedScalarizedValues, PHIsToRewrite); // Create the new GEP idx vector. SmallVector<Value*, 8> GEPIdx; @@ -1256,8 +1241,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser, // users. for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) { Instruction *User = cast<Instruction>(*UI++); - RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite, - Context); + RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); } } @@ -1267,13 +1251,11 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser, /// AllGlobalLoadUsesSimpleEnoughForHeapSRA. static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, - std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite, - LLVMContext &Context) { + std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end(); UI != E; ) { Instruction *User = cast<Instruction>(*UI++); - RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite, - Context); + RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); } if (Load->use_empty()) { @@ -1285,8 +1267,7 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, /// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break /// it up into multiple allocations of arrays of the fields. static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, - Value* NElems, LLVMContext &Context, - TargetData *TD) { + Value* NElems, TargetData *TD) { DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n'); const Type* MAT = getMallocAllocatedType(CI); const StructType *STy = cast<StructType>(MAT); @@ -1315,14 +1296,16 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, FieldGlobals.push_back(NGV); unsigned TypeSize = TD->getTypeAllocSize(FieldTy); - if (const StructType* ST = dyn_cast<StructType>(FieldTy)) + if (const StructType *ST = dyn_cast<StructType>(FieldTy)) TypeSize = TD->getStructLayout(ST)->getSizeInBytes(); - const Type* IntPtrTy = TD->getIntPtrType(Context); + const Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy, ConstantInt::get(IntPtrTy, TypeSize), NElems, CI->getName() + ".f" + Twine(FieldNo)); - FieldMallocs.push_back(NMI); + CallInst *NCI = dyn_cast<BitCastInst>(NMI) ? + extractMallocCallFromBitCast(NMI) : cast<CallInst>(NMI); + FieldMallocs.push_back(NCI); new StoreInst(NMI, NGV, CI); } @@ -1338,15 +1321,15 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, // if (F1) { free(F1); F1 = 0; } // if (F2) { free(F2); F2 = 0; } // } - Value *RunningOr = 0; + // The malloc can also fail if its argument is too large. + Constant *ConstantZero = ConstantInt::get(CI->getOperand(1)->getType(), 0); + Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getOperand(1), + ConstantZero, "isneg"); for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) { Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i], Constant::getNullValue(FieldMallocs[i]->getType()), "isnull"); - if (!RunningOr) - RunningOr = Cond; // First seteq - else - RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI); + RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI); } // Split the basic block at the old malloc. @@ -1355,7 +1338,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, // Create the block to check the first condition. Put all these blocks at the // end of the function as they are unlikely to be executed. - BasicBlock *NullPtrBlock = BasicBlock::Create(Context, "malloc_ret_null", + BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(), + "malloc_ret_null", OrigBB->getParent()); // Remove the uncond branch from OrigBB to ContBB, turning it into a cond @@ -1370,9 +1354,9 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal, Constant::getNullValue(GVVal->getType()), "tmp"); - BasicBlock *FreeBlock = BasicBlock::Create(Context, "free_it", + BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it", OrigBB->getParent()); - BasicBlock *NextBlock = BasicBlock::Create(Context, "next", + BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next", OrigBB->getParent()); Instruction *BI = BranchInst::Create(FreeBlock, NextBlock, Cmp, NullPtrBlock); @@ -1406,8 +1390,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, Instruction *User = cast<Instruction>(*UI++); if (LoadInst *LI = dyn_cast<LoadInst>(User)) { - RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite, - Context); + RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite); continue; } @@ -1438,7 +1421,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *InVal = PN->getIncomingValue(i); InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues, - PHIsToRewrite, Context); + PHIsToRewrite); FieldPN->addIncoming(InVal, PN->getIncomingBlock(i)); } } @@ -1477,8 +1460,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI, const Type *AllocTy, Module::global_iterator &GVI, - TargetData *TD, - LLVMContext &Context) { + TargetData *TD) { // If this is a malloc of an abstract type, don't touch it. if (!AllocTy->isSized()) return false; @@ -1508,15 +1490,14 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // This eliminates dynamic allocation, avoids an indirection accessing the // data, and exposes the resultant global to further GlobalOpt. // We cannot optimize the malloc if we cannot determine malloc array size. - if (Value *NElems = getMallocArraySize(CI, Context, TD)) { + if (Value *NElems = getMallocArraySize(CI, TD, true)) { if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems)) // Restrict this transformation to only working on small allocations // (2048 bytes currently), as we don't want to introduce a 16M global or // something. if (TD && NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) { - GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElems, - Context, TD); + GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElems, TD); return true; } @@ -1540,7 +1521,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // structs. malloc [100 x struct],1 -> malloc struct, 100 if (const ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) { - const Type *IntPtrTy = TD->getIntPtrType(Context); + const Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes(); Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize); Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements()); @@ -1551,12 +1532,10 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CI->replaceAllUsesWith(Cast); CI->eraseFromParent(); CI = dyn_cast<BitCastInst>(Malloc) ? - extractMallocCallFromBitCast(Malloc): - cast<CallInst>(Malloc); + extractMallocCallFromBitCast(Malloc) : cast<CallInst>(Malloc); } - GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, Context, TD), - Context, TD); + GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true),TD); return true; } } @@ -1569,7 +1548,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // that only one value (besides its initializer) is ever stored to the global. static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, Module::global_iterator &GVI, - TargetData *TD, LLVMContext &Context) { + TargetData *TD) { // Ignore no-op GEPs and bitcasts. StoredOnceVal = StoredOnceVal->stripPointerCasts(); @@ -1585,12 +1564,12 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType()); // Optimize away any trapping uses of the loaded value. - if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, Context)) + if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC)) return true; } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) { const Type* MallocType = getMallocAllocatedType(CI); if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, - GVI, TD, Context)) + GVI, TD)) return true; } } @@ -1602,8 +1581,7 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, /// two values ever stored into GV are its initializer and OtherVal. See if we /// can shrink the global into a boolean and select between the two values /// whenever it is used. This exposes the values to other scalar optimizations. -static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal, - LLVMContext &Context) { +static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { const Type *GVElType = GV->getType()->getElementType(); // If GVElType is already i1, it is already shrunk. If the type of the GV is @@ -1611,7 +1589,8 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal, // between them is very expensive and unlikely to lead to later // simplification. In these cases, we typically end up with "cond ? v1 : v2" // where v1 and v2 both require constant pool loads, a big loss. - if (GVElType == Type::getInt1Ty(Context) || GVElType->isFloatingPoint() || + if (GVElType == Type::getInt1Ty(GV->getContext()) || + GVElType->isFloatingPoint() || isa<PointerType>(GVElType) || isa<VectorType>(GVElType)) return false; @@ -1624,15 +1603,16 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal, DEBUG(errs() << " *** SHRINKING TO BOOL: " << *GV); // Create the new global, initializing it to false. - GlobalVariable *NewGV = new GlobalVariable(Context, - Type::getInt1Ty(Context), false, - GlobalValue::InternalLinkage, ConstantInt::getFalse(Context), + GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()), + false, + GlobalValue::InternalLinkage, + ConstantInt::getFalse(GV->getContext()), GV->getName()+".b", GV->isThreadLocal()); GV->getParent()->getGlobalList().insert(GV, NewGV); Constant *InitVal = GV->getInitializer(); - assert(InitVal->getType() != Type::getInt1Ty(Context) && + assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) && "No reason to shrink to bool!"); // If initialized to zero and storing one into the global, we can use a cast @@ -1649,7 +1629,8 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal, // Only do this if we weren't storing a loaded value. Value *StoreVal; if (StoringOther || SI->getOperand(0) == InitVal) - StoreVal = ConstantInt::get(Type::getInt1Ty(Context), StoringOther); + StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()), + StoringOther); else { // Otherwise, we are storing a previously loaded copy. To do this, // change the copy from copying the original value to just copying the @@ -1708,24 +1689,26 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, if (!AnalyzeGlobal(GV, GS, PHIUsers)) { #if 0 - cerr << "Global: " << *GV; - cerr << " isLoaded = " << GS.isLoaded << "\n"; - cerr << " StoredType = "; + DEBUG(errs() << "Global: " << *GV); + DEBUG(errs() << " isLoaded = " << GS.isLoaded << "\n"); + DEBUG(errs() << " StoredType = "); switch (GS.StoredType) { - case GlobalStatus::NotStored: cerr << "NEVER STORED\n"; break; - case GlobalStatus::isInitializerStored: cerr << "INIT STORED\n"; break; - case GlobalStatus::isStoredOnce: cerr << "STORED ONCE\n"; break; - case GlobalStatus::isStored: cerr << "stored\n"; break; + case GlobalStatus::NotStored: DEBUG(errs() << "NEVER STORED\n"); break; + case GlobalStatus::isInitializerStored: DEBUG(errs() << "INIT STORED\n"); + break; + case GlobalStatus::isStoredOnce: DEBUG(errs() << "STORED ONCE\n"); break; + case GlobalStatus::isStored: DEBUG(errs() << "stored\n"); break; } if (GS.StoredType == GlobalStatus::isStoredOnce && GS.StoredOnceValue) - cerr << " StoredOnceValue = " << *GS.StoredOnceValue << "\n"; + DEBUG(errs() << " StoredOnceValue = " << *GS.StoredOnceValue << "\n"); if (GS.AccessingFunction && !GS.HasMultipleAccessingFunctions) - cerr << " AccessingFunction = " << GS.AccessingFunction->getName() - << "\n"; - cerr << " HasMultipleAccessingFunctions = " - << GS.HasMultipleAccessingFunctions << "\n"; - cerr << " HasNonInstructionUser = " << GS.HasNonInstructionUser<<"\n"; - cerr << "\n"; + DEBUG(errs() << " AccessingFunction = " << GS.AccessingFunction->getName() + << "\n"); + DEBUG(errs() << " HasMultipleAccessingFunctions = " + << GS.HasMultipleAccessingFunctions << "\n"); + DEBUG(errs() << " HasNonInstructionUser = " + << GS.HasNonInstructionUser<<"\n"); + DEBUG(errs() << "\n"); #endif // If this is a first class global and has only one accessing function @@ -1764,8 +1747,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, // Delete any stores we can find to the global. We may not be able to // make it completely dead though. - bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(), - GV->getContext()); + bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer()); // If the global is dead now, delete it. if (GV->use_empty()) { @@ -1780,7 +1762,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, GV->setConstant(true); // Clean up any obviously simplifiable users now. - CleanupConstantGlobalUsers(GV, GV->getInitializer(), GV->getContext()); + CleanupConstantGlobalUsers(GV, GV->getInitializer()); // If the global is dead now, just nuke it. if (GV->use_empty()) { @@ -1794,8 +1776,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, return true; } else if (!GV->getInitializer()->getType()->isSingleValueType()) { if (TargetData *TD = getAnalysisIfAvailable<TargetData>()) - if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD, - GV->getContext())) { + if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD)) { GVI = FirstNewGV; // Don't skip the newly produced globals! return true; } @@ -1810,8 +1791,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, GV->setInitializer(SOVConstant); // Clean up any obviously simplifiable users now. - CleanupConstantGlobalUsers(GV, GV->getInitializer(), - GV->getContext()); + CleanupConstantGlobalUsers(GV, GV->getInitializer()); if (GV->use_empty()) { DEBUG(errs() << " *** Substituting initializer allowed us to " @@ -1828,14 +1808,13 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, // Try to optimize globals based on the knowledge that only one value // (besides its initializer) is ever stored to the global. if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI, - getAnalysisIfAvailable<TargetData>(), - GV->getContext())) + getAnalysisIfAvailable<TargetData>())) return true; // Otherwise, if the global was not a boolean, we can shrink it to be a // boolean. if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) - if (TryToShrinkGlobalToBoolean(GV, SOVConstant, GV->getContext())) { + if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) { ++NumShrunkToBool; return true; } @@ -1987,11 +1966,10 @@ static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) { /// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the /// specified array, returning the new global to use. static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, - const std::vector<Function*> &Ctors, - LLVMContext &Context) { + const std::vector<Function*> &Ctors) { // If we made a change, reassemble the initializer list. std::vector<Constant*> CSVals; - CSVals.push_back(ConstantInt::get(Type::getInt32Ty(Context), 65535)); + CSVals.push_back(ConstantInt::get(Type::getInt32Ty(GCL->getContext()),65535)); CSVals.push_back(0); // Create the new init list. @@ -2000,12 +1978,14 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, if (Ctors[i]) { CSVals[1] = Ctors[i]; } else { - const Type *FTy = FunctionType::get(Type::getVoidTy(Context), false); + const Type *FTy = FunctionType::get(Type::getVoidTy(GCL->getContext()), + false); const PointerType *PFTy = PointerType::getUnqual(FTy); CSVals[1] = Constant::getNullValue(PFTy); - CSVals[0] = ConstantInt::get(Type::getInt32Ty(Context), 2147483647); + CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), + 2147483647); } - CAList.push_back(ConstantStruct::get(Context, CSVals, false)); + CAList.push_back(ConstantStruct::get(GCL->getContext(), CSVals, false)); } // Create the array initializer. @@ -2021,8 +2001,7 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, } // Create the new global and insert it next to the existing list. - GlobalVariable *NGV = new GlobalVariable(Context, CA->getType(), - GCL->isConstant(), + GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(), GCL->getLinkage(), CA, "", GCL->isThreadLocal()); GCL->getParent()->getGlobalList().insert(GCL, NGV); @@ -2056,7 +2035,7 @@ static Constant *getVal(DenseMap<Value*, Constant*> &ComputedValues, /// enough for us to understand. In particular, if it is a cast of something, /// we punt. We basically just support direct accesses to globals and GEP's of /// globals. This should be kept up to date with CommitValueTo. -static bool isSimpleEnoughPointerToCommit(Constant *C, LLVMContext &Context) { +static bool isSimpleEnoughPointerToCommit(Constant *C) { // Conservatively, avoid aggregate types. This is because we don't // want to worry about them partially overlapping other stores. if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType()) @@ -2096,8 +2075,7 @@ static bool isSimpleEnoughPointerToCommit(Constant *C, LLVMContext &Context) { /// initializer. This returns 'Init' modified to reflect 'Val' stored into it. /// At this point, the GEP operands of Addr [0, OpNo) have been stepped into. static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, - ConstantExpr *Addr, unsigned OpNo, - LLVMContext &Context) { + ConstantExpr *Addr, unsigned OpNo) { // Base case of the recursion. if (OpNo == Addr->getNumOperands()) { assert(Val->getType() == Init->getType() && "Type mismatch!"); @@ -2126,10 +2104,11 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo)); unsigned Idx = CU->getZExtValue(); assert(Idx < STy->getNumElements() && "Struct index out of range!"); - Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1, Context); + Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1); // Return the modified struct. - return ConstantStruct::get(Context, &Elts[0], Elts.size(), STy->isPacked()); + return ConstantStruct::get(Init->getContext(), &Elts[0], Elts.size(), + STy->isPacked()); } else { ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo)); const ArrayType *ATy = cast<ArrayType>(Init->getType()); @@ -2152,15 +2131,14 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, assert(CI->getZExtValue() < ATy->getNumElements()); Elts[CI->getZExtValue()] = - EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1, Context); + EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1); return ConstantArray::get(ATy, Elts); } } /// CommitValueTo - We have decided that Addr (which satisfies the predicate /// isSimpleEnoughPointerToCommit) should get Val as its value. Make it happen. -static void CommitValueTo(Constant *Val, Constant *Addr, - LLVMContext &Context) { +static void CommitValueTo(Constant *Val, Constant *Addr) { if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) { assert(GV->hasInitializer()); GV->setInitializer(Val); @@ -2171,7 +2149,7 @@ static void CommitValueTo(Constant *Val, Constant *Addr, GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); Constant *Init = GV->getInitializer(); - Init = EvaluateStoreInto(Init, Val, CE, 2, Context); + Init = EvaluateStoreInto(Init, Val, CE, 2); GV->setInitializer(Init); } @@ -2179,8 +2157,7 @@ static void CommitValueTo(Constant *Val, Constant *Addr, /// P after the stores reflected by 'memory' have been performed. If we can't /// decide, return null. static Constant *ComputeLoadResult(Constant *P, - const DenseMap<Constant*, Constant*> &Memory, - LLVMContext &Context) { + const DenseMap<Constant*, Constant*> &Memory) { // If this memory location has been recently stored, use the stored value: it // is the most up-to-date. DenseMap<Constant*, Constant*>::const_iterator I = Memory.find(P); @@ -2218,8 +2195,6 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end()) return false; - LLVMContext &Context = F->getContext(); - CallStack.push_back(F); /// Values - As we compute SSA register values, we store their contents here. @@ -2246,7 +2221,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) { if (SI->isVolatile()) return false; // no volatile accesses. Constant *Ptr = getVal(Values, SI->getOperand(1)); - if (!isSimpleEnoughPointerToCommit(Ptr, Context)) + if (!isSimpleEnoughPointerToCommit(Ptr)) // If this is too complex for us to commit, reject it. return false; Constant *Val = getVal(Values, SI->getOperand(0)); @@ -2280,12 +2255,12 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) { if (LI->isVolatile()) return false; // no volatile accesses. InstResult = ComputeLoadResult(getVal(Values, LI->getOperand(0)), - MutatedMemory, Context); + MutatedMemory); if (InstResult == 0) return false; // Could not evaluate load. } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) { if (AI->isArrayAllocation()) return false; // Cannot handle array allocs. const Type *Ty = AI->getType()->getElementType(); - AllocaTmps.push_back(new GlobalVariable(Context, Ty, false, + AllocaTmps.push_back(new GlobalVariable(Ty, false, GlobalValue::InternalLinkage, UndefValue::get(Ty), AI->getName())); @@ -2423,7 +2398,7 @@ static bool EvaluateStaticConstructor(Function *F) { << " stores.\n"); for (DenseMap<Constant*, Constant*>::iterator I = MutatedMemory.begin(), E = MutatedMemory.end(); I != E; ++I) - CommitValueTo(I->second, I->first, F->getContext()); + CommitValueTo(I->second, I->first); } // At this point, we are done interpreting. If we created any 'alloca' @@ -2480,7 +2455,7 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) { if (!MadeChange) return false; - GCL = InstallGlobalCtors(GCL, Ctors, GCL->getContext()); + GCL = InstallGlobalCtors(GCL, Ctors); return true; } diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index ea47366..6918fe8 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -19,10 +19,11 @@ #include "llvm/IntrinsicInst.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" -#include "llvm/Support/CallSite.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/IPO/InlinerPass.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Support/CallSite.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -32,6 +33,7 @@ using namespace llvm; STATISTIC(NumInlined, "Number of functions inlined"); +STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined"); STATISTIC(NumDeleted, "Number of functions deleted because all callers found"); STATISTIC(NumMergedAllocas, "Number of allocas merged together"); @@ -336,23 +338,38 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) { for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) { CallSite CS = CallSites[CSi]; + Function *Caller = CS.getCaller(); Function *Callee = CS.getCalledFunction(); - // We can only inline direct calls to non-declarations. - if (Callee == 0 || Callee->isDeclaration()) continue; + + // If this call site is dead and it is to a readonly function, we should + // just delete the call instead of trying to inline it, regardless of + // size. This happens because IPSCCP propagates the result out of the + // call and then we're left with the dead call. + if (isInstructionTriviallyDead(CS.getInstruction())) { + DEBUG(errs() << " -> Deleting dead call: " + << *CS.getInstruction() << "\n"); + // Update the call graph by deleting the edge from Callee to Caller. + CG[Caller]->removeCallEdgeFor(CS); + CS.getInstruction()->eraseFromParent(); + ++NumCallsDeleted; + } else { + // We can only inline direct calls to non-declarations. + if (Callee == 0 || Callee->isDeclaration()) continue; - // If the policy determines that we should inline this function, - // try to do so. - if (!shouldInline(CS)) - continue; + // If the policy determines that we should inline this function, + // try to do so. + if (!shouldInline(CS)) + continue; - Function *Caller = CS.getCaller(); - // Attempt to inline the function... - if (!InlineCallIfPossible(CS, CG, TD, InlinedArrayAllocas)) - continue; + // Attempt to inline the function... + if (!InlineCallIfPossible(CS, CG, TD, InlinedArrayAllocas)) + continue; + ++NumInlined; + } - // If we inlined the last possible call site to the function, delete the - // function body now. - if (Callee->use_empty() && Callee->hasLocalLinkage() && + // If we inlined or deleted the last possible call site to the function, + // delete the function body now. + if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() && // TODO: Can remove if in SCC now. !SCCFunctions.count(Callee) && @@ -391,7 +408,6 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) { } --CSi; - ++NumInlined; Changed = true; LocalChange = true; } diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp index fd69aeb..cb81330 100644 --- a/lib/Transforms/IPO/LoopExtractor.cpp +++ b/lib/Transforms/IPO/LoopExtractor.cpp @@ -75,6 +75,10 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) { if (L->getParentLoop()) return false; + // If LoopSimplify form is not available, stay out of trouble. + if (!L->isLoopSimplifyForm()) + return false; + DominatorTree &DT = getAnalysis<DominatorTree>(); bool Changed = false; diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp index 4f6369e..0b5e007 100644 --- a/lib/Transforms/IPO/StripSymbols.cpp +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -202,53 +202,35 @@ static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) { // llvm.dbg.region.end calls, and any globals they point to if now dead. static bool StripDebugInfo(Module &M) { + bool Changed = false; + // Remove all of the calls to the debugger intrinsics, and remove them from // the module. - Function *FuncStart = M.getFunction("llvm.dbg.func.start"); - Function *StopPoint = M.getFunction("llvm.dbg.stoppoint"); - Function *RegionStart = M.getFunction("llvm.dbg.region.start"); - Function *RegionEnd = M.getFunction("llvm.dbg.region.end"); - Function *Declare = M.getFunction("llvm.dbg.declare"); - - if (FuncStart) { - while (!FuncStart->use_empty()) { - CallInst *CI = cast<CallInst>(FuncStart->use_back()); - CI->eraseFromParent(); - } - FuncStart->eraseFromParent(); - } - if (StopPoint) { - while (!StopPoint->use_empty()) { - CallInst *CI = cast<CallInst>(StopPoint->use_back()); - CI->eraseFromParent(); - } - StopPoint->eraseFromParent(); - } - if (RegionStart) { - while (!RegionStart->use_empty()) { - CallInst *CI = cast<CallInst>(RegionStart->use_back()); - CI->eraseFromParent(); - } - RegionStart->eraseFromParent(); - } - if (RegionEnd) { - while (!RegionEnd->use_empty()) { - CallInst *CI = cast<CallInst>(RegionEnd->use_back()); - CI->eraseFromParent(); - } - RegionEnd->eraseFromParent(); - } - if (Declare) { + if (Function *Declare = M.getFunction("llvm.dbg.declare")) { while (!Declare->use_empty()) { CallInst *CI = cast<CallInst>(Declare->use_back()); CI->eraseFromParent(); } Declare->eraseFromParent(); + Changed = true; } NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv"); - if (NMD) + if (NMD) { + Changed = true; NMD->eraseFromParent(); + } + MetadataContext &TheMetadata = M.getContext().getMetadata(); + unsigned MDDbgKind = TheMetadata.getMDKind("dbg"); + if (!MDDbgKind) + return Changed; + + for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI) + for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE; + ++FI) + for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; + ++BI) + TheMetadata.removeMD(MDDbgKind, BI); return true; } diff --git a/lib/Transforms/Scalar/ABCD.cpp b/lib/Transforms/Scalar/ABCD.cpp index c8541d7..e58fa63 100644 --- a/lib/Transforms/Scalar/ABCD.cpp +++ b/lib/Transforms/Scalar/ABCD.cpp @@ -412,7 +412,9 @@ class ABCD : public FunctionPass { /// If PN_op1 and PN_o2 are different from NULL, create a constraint /// PN_op2 -> PN_op1 with value. In case any of them is NULL, replace /// with the respective V_op#, if V_op# is a ConstantInt. - void createConstraintSigSig(PHINode *SIG_op1, PHINode *SIG_op2, APInt value); + void createConstraintSigSig(PHINode *SIG_op1, PHINode *SIG_op2, + ConstantInt *V_op1, ConstantInt *V_op2, + APInt value); /// Returns the sigma representing the Instruction I in BasicBlock BB. /// Returns NULL in case there is no sigma for this Instruction in this @@ -735,25 +737,27 @@ void ABCD::createConstraintCmpInst(ICmpInst *ICI, TerminatorInst *TI) { APInt Zero = APInt::getNullValue(width); CmpInst::Predicate Pred = ICI->getPredicate(); + ConstantInt *CI1 = dyn_cast<ConstantInt>(V_op1); + ConstantInt *CI2 = dyn_cast<ConstantInt>(V_op2); switch (Pred) { case CmpInst::ICMP_SGT: // signed greater than - createConstraintSigSig(SIG_op2_t, SIG_op1_t, MinusOne); - createConstraintSigSig(SIG_op1_f, SIG_op2_f, Zero); + createConstraintSigSig(SIG_op2_t, SIG_op1_t, CI2, CI1, MinusOne); + createConstraintSigSig(SIG_op1_f, SIG_op2_f, CI1, CI2, Zero); break; case CmpInst::ICMP_SGE: // signed greater or equal - createConstraintSigSig(SIG_op2_t, SIG_op1_t, Zero); - createConstraintSigSig(SIG_op1_f, SIG_op2_f, MinusOne); + createConstraintSigSig(SIG_op2_t, SIG_op1_t, CI2, CI1, Zero); + createConstraintSigSig(SIG_op1_f, SIG_op2_f, CI1, CI2, MinusOne); break; case CmpInst::ICMP_SLT: // signed less than - createConstraintSigSig(SIG_op1_t, SIG_op2_t, MinusOne); - createConstraintSigSig(SIG_op2_f, SIG_op1_f, Zero); + createConstraintSigSig(SIG_op1_t, SIG_op2_t, CI1, CI2, MinusOne); + createConstraintSigSig(SIG_op2_f, SIG_op1_f, CI2, CI1, Zero); break; case CmpInst::ICMP_SLE: // signed less or equal - createConstraintSigSig(SIG_op1_t, SIG_op2_t, Zero); - createConstraintSigSig(SIG_op2_f, SIG_op1_f, MinusOne); + createConstraintSigSig(SIG_op1_t, SIG_op2_t, CI1, CI2, Zero); + createConstraintSigSig(SIG_op2_f, SIG_op1_f, CI2, CI1, MinusOne); break; default: @@ -772,6 +776,10 @@ void ABCD::createConstraintCmpInst(ICmpInst *ICI, TerminatorInst *TI) { /// b->a and c->a with weight 0 in the lower bound graph, and the edges /// a->b and a->c with weight 0 in the upper bound graph. void ABCD::createConstraintPHINode(PHINode *PN) { + // FIXME: We really want to disallow sigma nodes, but I don't know the best + // way to detect the other than this. + if (PN->getNumOperands() == 2) return; + int32_t width = cast<IntegerType>(PN->getType())->getBitWidth(); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *V = PN->getIncomingValue(i); @@ -796,13 +804,11 @@ void ABCD::createConstraintSigInst(Instruction *I_op, BasicBlock *BB_succ_t, int32_t width = cast<IntegerType>((*SIG_op_t)->getType())->getBitWidth(); inequality_graph.addEdge(I_op, *SIG_op_t, APInt(width, 0), true); inequality_graph.addEdge(*SIG_op_t, I_op, APInt(width, 0), false); - created.insert(*SIG_op_t); } if (*SIG_op_f) { int32_t width = cast<IntegerType>((*SIG_op_f)->getType())->getBitWidth(); inequality_graph.addEdge(I_op, *SIG_op_f, APInt(width, 0), true); inequality_graph.addEdge(*SIG_op_f, I_op, APInt(width, 0), false); - created.insert(*SIG_op_f); } } @@ -810,10 +816,17 @@ void ABCD::createConstraintSigInst(Instruction *I_op, BasicBlock *BB_succ_t, /// PN_op2 -> PN_op1 with value. In case any of them is NULL, replace /// with the respective V_op#, if V_op# is a ConstantInt. void ABCD::createConstraintSigSig(PHINode *SIG_op1, PHINode *SIG_op2, + ConstantInt *V_op1, ConstantInt *V_op2, APInt value) { if (SIG_op1 && SIG_op2) { inequality_graph.addEdge(SIG_op2, SIG_op1, value, true); inequality_graph.addEdge(SIG_op1, SIG_op2, -value, false); + } else if (SIG_op1 && V_op2) { + inequality_graph.addEdge(V_op2, SIG_op1, value, true); + inequality_graph.addEdge(SIG_op1, V_op2, -value, false); + } else if (SIG_op2 && V_op1) { + inequality_graph.addEdge(SIG_op2, V_op1, value, true); + inequality_graph.addEdge(V_op1, SIG_op2, -value, false); } } @@ -1036,7 +1049,7 @@ void ABCD::InequalityGraph::printHeader(raw_ostream &OS, Function &F) const { /// Prints the body of the dot file void ABCD::InequalityGraph::printBody(raw_ostream &OS) const { - DenseMap<Value *, SmallPtrSet<Edge *, 16> >::iterator begin = + DenseMap<Value *, SmallPtrSet<Edge *, 16> >::const_iterator begin = graph.begin(), end = graph.end(); for (; begin != end ; ++begin) { diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index e048518..5a92399 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -3,7 +3,6 @@ add_llvm_library(LLVMScalarOpts ADCE.cpp BasicBlockPlacement.cpp CodeGenPrepare.cpp - CondPropagate.cpp ConstantProp.cpp DCE.cpp DeadStoreElimination.cpp diff --git a/lib/Transforms/Scalar/CondPropagate.cpp b/lib/Transforms/Scalar/CondPropagate.cpp deleted file mode 100644 index 8a6c556..0000000 --- a/lib/Transforms/Scalar/CondPropagate.cpp +++ /dev/null @@ -1,289 +0,0 @@ -//===-- CondPropagate.cpp - Propagate Conditional Expressions -------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass propagates information about conditional expressions through the -// program, allowing it to eliminate conditional branches in some cases. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "condprop" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Pass.h" -#include "llvm/Type.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/SmallVector.h" -using namespace llvm; - -STATISTIC(NumBrThread, "Number of CFG edges threaded through branches"); -STATISTIC(NumSwThread, "Number of CFG edges threaded through switches"); - -namespace { - struct CondProp : public FunctionPass { - static char ID; // Pass identification, replacement for typeid - CondProp() : FunctionPass(&ID) {} - - virtual bool runOnFunction(Function &F); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequiredID(BreakCriticalEdgesID); - //AU.addRequired<DominanceFrontier>(); - } - - private: - bool MadeChange; - SmallVector<BasicBlock *, 4> DeadBlocks; - void SimplifyBlock(BasicBlock *BB); - void SimplifyPredecessors(BranchInst *BI); - void SimplifyPredecessors(SwitchInst *SI); - void RevectorBlockTo(BasicBlock *FromBB, BasicBlock *ToBB); - bool RevectorBlockTo(BasicBlock *FromBB, Value *Cond, BranchInst *BI); - }; -} - -char CondProp::ID = 0; -static RegisterPass<CondProp> X("condprop", "Conditional Propagation"); - -FunctionPass *llvm::createCondPropagationPass() { - return new CondProp(); -} - -bool CondProp::runOnFunction(Function &F) { - bool EverMadeChange = false; - DeadBlocks.clear(); - - // While we are simplifying blocks, keep iterating. - do { - MadeChange = false; - for (Function::iterator BB = F.begin(), E = F.end(); BB != E;) - SimplifyBlock(BB++); - EverMadeChange = EverMadeChange || MadeChange; - } while (MadeChange); - - if (EverMadeChange) { - while (!DeadBlocks.empty()) { - BasicBlock *BB = DeadBlocks.back(); DeadBlocks.pop_back(); - DeleteDeadBlock(BB); - } - } - return EverMadeChange; -} - -void CondProp::SimplifyBlock(BasicBlock *BB) { - if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { - // If this is a conditional branch based on a phi node that is defined in - // this block, see if we can simplify predecessors of this block. - if (BI->isConditional() && isa<PHINode>(BI->getCondition()) && - cast<PHINode>(BI->getCondition())->getParent() == BB) - SimplifyPredecessors(BI); - - } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { - if (isa<PHINode>(SI->getCondition()) && - cast<PHINode>(SI->getCondition())->getParent() == BB) - SimplifyPredecessors(SI); - } - - // If possible, simplify the terminator of this block. - if (ConstantFoldTerminator(BB)) - MadeChange = true; - - // If this block ends with an unconditional branch and the only successor has - // only this block as a predecessor, merge the two blocks together. - if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) - if (BI->isUnconditional() && BI->getSuccessor(0)->getSinglePredecessor() && - BB != BI->getSuccessor(0)) { - BasicBlock *Succ = BI->getSuccessor(0); - - // If Succ has any PHI nodes, they are all single-entry PHI's. Eliminate - // them. - FoldSingleEntryPHINodes(Succ); - - // Remove BI. - BI->eraseFromParent(); - - // Move over all of the instructions. - BB->getInstList().splice(BB->end(), Succ->getInstList()); - - // Any phi nodes that had entries for Succ now have entries from BB. - Succ->replaceAllUsesWith(BB); - - // Succ is now dead, but we cannot delete it without potentially - // invalidating iterators elsewhere. Just insert an unreachable - // instruction in it and delete this block later on. - new UnreachableInst(BB->getContext(), Succ); - DeadBlocks.push_back(Succ); - MadeChange = true; - } -} - -// SimplifyPredecessors(branches) - We know that BI is a conditional branch -// based on a PHI node defined in this block. If the phi node contains constant -// operands, then the blocks corresponding to those operands can be modified to -// jump directly to the destination instead of going through this block. -void CondProp::SimplifyPredecessors(BranchInst *BI) { - // TODO: We currently only handle the most trival case, where the PHI node has - // one use (the branch), and is the only instruction besides the branch and dbg - // intrinsics in the block. - PHINode *PN = cast<PHINode>(BI->getCondition()); - - if (PN->getNumIncomingValues() == 1) { - // Eliminate single-entry PHI nodes. - FoldSingleEntryPHINodes(PN->getParent()); - return; - } - - - if (!PN->hasOneUse()) return; - - BasicBlock *BB = BI->getParent(); - if (&*BB->begin() != PN) - return; - BasicBlock::iterator BBI = BB->begin(); - BasicBlock::iterator BBE = BB->end(); - while (BBI != BBE && isa<DbgInfoIntrinsic>(++BBI)) /* empty */; - if (&*BBI != BI) - return; - - // Ok, we have this really simple case, walk the PHI operands, looking for - // constants. Walk from the end to remove operands from the end when - // possible, and to avoid invalidating "i". - for (unsigned i = PN->getNumIncomingValues(); i != 0; --i) { - Value *InVal = PN->getIncomingValue(i-1); - if (!RevectorBlockTo(PN->getIncomingBlock(i-1), InVal, BI)) - continue; - - ++NumBrThread; - - // If there were two predecessors before this simplification, or if the - // PHI node contained all the same value except for the one we just - // substituted, the PHI node may be deleted. Don't iterate through it the - // last time. - if (BI->getCondition() != PN) return; - } -} - -// SimplifyPredecessors(switch) - We know that SI is switch based on a PHI node -// defined in this block. If the phi node contains constant operands, then the -// blocks corresponding to those operands can be modified to jump directly to -// the destination instead of going through this block. -void CondProp::SimplifyPredecessors(SwitchInst *SI) { - // TODO: We currently only handle the most trival case, where the PHI node has - // one use (the branch), and is the only instruction besides the branch and - // dbg intrinsics in the block. - PHINode *PN = cast<PHINode>(SI->getCondition()); - if (!PN->hasOneUse()) return; - - BasicBlock *BB = SI->getParent(); - if (&*BB->begin() != PN) - return; - BasicBlock::iterator BBI = BB->begin(); - BasicBlock::iterator BBE = BB->end(); - while (BBI != BBE && isa<DbgInfoIntrinsic>(++BBI)) /* empty */; - if (&*BBI != SI) - return; - - // Ok, we have this really simple case, walk the PHI operands, looking for - // constants. Walk from the end to remove operands from the end when - // possible, and to avoid invalidating "i". - for (unsigned i = PN->getNumIncomingValues(); i != 0; --i) - if (ConstantInt *CI = dyn_cast<ConstantInt>(PN->getIncomingValue(i-1))) { - BasicBlock *PredBB = PN->getIncomingBlock(i-1); - if (isa<BranchInst>(PredBB->getTerminator())) { - // If we have a constant, forward the edge from its current to its - // ultimate destination. - unsigned DestCase = SI->findCaseValue(CI); - RevectorBlockTo(PredBB, SI->getSuccessor(DestCase)); - ++NumSwThread; - - // If there were two predecessors before this simplification, or if the - // PHI node contained all the same value except for the one we just - // substituted, the PHI node may be deleted. Don't iterate through it the - // last time. - if (SI->getCondition() != PN) return; - } - } -} - - -// RevectorBlockTo - Revector the unconditional branch at the end of FromBB to -// the ToBB block, which is one of the successors of its current successor. -void CondProp::RevectorBlockTo(BasicBlock *FromBB, BasicBlock *ToBB) { - BranchInst *FromBr = cast<BranchInst>(FromBB->getTerminator()); - assert(FromBr->isUnconditional() && "FromBB should end with uncond br!"); - - // Get the old block we are threading through. - BasicBlock *OldSucc = FromBr->getSuccessor(0); - - // OldSucc had multiple successors. If ToBB has multiple predecessors, then - // the edge between them would be critical, which we already took care of. - // If ToBB has single operand PHI node then take care of it here. - FoldSingleEntryPHINodes(ToBB); - - // Update PHI nodes in OldSucc to know that FromBB no longer branches to it. - OldSucc->removePredecessor(FromBB); - - // Change FromBr to branch to the new destination. - FromBr->setSuccessor(0, ToBB); - - MadeChange = true; -} - -bool CondProp::RevectorBlockTo(BasicBlock *FromBB, Value *Cond, BranchInst *BI){ - BranchInst *FromBr = cast<BranchInst>(FromBB->getTerminator()); - if (!FromBr->isUnconditional()) - return false; - - // Get the old block we are threading through. - BasicBlock *OldSucc = FromBr->getSuccessor(0); - - // If the condition is a constant, simply revector the unconditional branch at - // the end of FromBB to one of the successors of its current successor. - if (ConstantInt *CB = dyn_cast<ConstantInt>(Cond)) { - BasicBlock *ToBB = BI->getSuccessor(CB->isZero()); - - // OldSucc had multiple successors. If ToBB has multiple predecessors, then - // the edge between them would be critical, which we already took care of. - // If ToBB has single operand PHI node then take care of it here. - FoldSingleEntryPHINodes(ToBB); - - // Update PHI nodes in OldSucc to know that FromBB no longer branches to it. - OldSucc->removePredecessor(FromBB); - - // Change FromBr to branch to the new destination. - FromBr->setSuccessor(0, ToBB); - } else { - BasicBlock *Succ0 = BI->getSuccessor(0); - // Do not perform transform if the new destination has PHI nodes. The - // transform will add new preds to the PHI's. - if (isa<PHINode>(Succ0->begin())) - return false; - - BasicBlock *Succ1 = BI->getSuccessor(1); - if (isa<PHINode>(Succ1->begin())) - return false; - - // Insert the new conditional branch. - BranchInst::Create(Succ0, Succ1, Cond, FromBr); - - FoldSingleEntryPHINodes(Succ0); - FoldSingleEntryPHINodes(Succ1); - - // Update PHI nodes in OldSucc to know that FromBB no longer branches to it. - OldSucc->removePredecessor(FromBB); - - // Delete the old branch. - FromBr->eraseFromParent(); - } - - MadeChange = true; - return true; -} diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp index 4fee327..ea20813 100644 --- a/lib/Transforms/Scalar/ConstantProp.cpp +++ b/lib/Transforms/Scalar/ConstantProp.cpp @@ -66,7 +66,7 @@ bool ConstantPropagation::runOnFunction(Function &F) { WorkList.erase(WorkList.begin()); // Get an element from the worklist... if (!I->use_empty()) // Don't muck with dead instructions... - if (Constant *C = ConstantFoldInstruction(I, F.getContext())) { + if (Constant *C = ConstantFoldInstruction(I)) { // Add all of the users of this instruction to the worklist, they might // be constant propagatable now... for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 90436f4..b0988b5 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -78,19 +78,96 @@ static RegisterPass<DSE> X("dse", "Dead Store Elimination"); FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); } -/// isValueAtLeastAsBigAs - Return true if V1 is greater than or equal to the -/// stored size of V2. This returns false if we don't know. +/// doesClobberMemory - Does this instruction clobber (write without reading) +/// some memory? +static bool doesClobberMemory(Instruction *I) { + if (isa<StoreInst>(I)) + return true; + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + default: return false; + case Intrinsic::memset: case Intrinsic::memmove: case Intrinsic::memcpy: + case Intrinsic::init_trampoline: case Intrinsic::lifetime_end: return true; + } + } + return false; +} + +/// isElidable - If the value of this instruction and the memory it writes to is +/// unused, may we delete this instrtction? +static bool isElidable(Instruction *I) { + assert(doesClobberMemory(I)); + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) + return II->getIntrinsicID() != Intrinsic::lifetime_end; + if (StoreInst *SI = dyn_cast<StoreInst>(I)) + return !SI->isVolatile(); + return true; +} + +/// getPointerOperand - Return the pointer that is being clobbered. +static Value *getPointerOperand(Instruction *I) { + assert(doesClobberMemory(I)); + if (StoreInst *SI = dyn_cast<StoreInst>(I)) + return SI->getPointerOperand(); + if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) + return MI->getOperand(1); + IntrinsicInst *II = cast<IntrinsicInst>(I); + switch (II->getIntrinsicID()) { + default: + assert(false && "Unexpected intrinsic!"); + case Intrinsic::init_trampoline: + return II->getOperand(1); + case Intrinsic::lifetime_end: + return II->getOperand(2); + } +} + +/// getStoreSize - Return the length in bytes of the write by the clobbering +/// instruction. If variable or unknown, returns -1. +static unsigned getStoreSize(Instruction *I, const TargetData *TD) { + assert(doesClobberMemory(I)); + if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + if (!TD) return -1u; + return TD->getTypeStoreSize(SI->getOperand(0)->getType()); + } + + Value *Len; + if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) { + Len = MI->getLength(); + } else { + IntrinsicInst *II = cast<IntrinsicInst>(I); + switch (II->getIntrinsicID()) { + default: + assert(false && "Unexpected intrinsic!"); + case Intrinsic::init_trampoline: + return -1u; + case Intrinsic::lifetime_end: + Len = II->getOperand(1); + break; + } + } + if (ConstantInt *LenCI = dyn_cast<ConstantInt>(Len)) + if (!LenCI->isAllOnesValue()) + return LenCI->getZExtValue(); + return -1u; +} + +/// isStoreAtLeastAsWideAs - Return true if the size of the store in I1 is +/// greater than or equal to the store in I2. This returns false if we don't +/// know. /// -static bool isValueAtLeastAsBigAs(Value *V1, Value *V2, const TargetData *TD) { - const Type *V1Ty = V1->getType(), *V2Ty = V2->getType(); +static bool isStoreAtLeastAsWideAs(Instruction *I1, Instruction *I2, + const TargetData *TD) { + const Type *I1Ty = getPointerOperand(I1)->getType(); + const Type *I2Ty = getPointerOperand(I2)->getType(); // Exactly the same type, must have exactly the same size. - if (V1Ty == V2Ty) return true; + if (I1Ty == I2Ty) return true; - // If we don't have target data, we don't know. - if (TD == 0) return false; + int I1Size = getStoreSize(I1, TD); + int I2Size = getStoreSize(I2, TD); - return TD->getTypeStoreSize(V1Ty) >= TD->getTypeStoreSize(V2Ty); + return I1Size != -1 && I2Size != -1 && I1Size >= I2Size; } bool DSE::runOnBasicBlock(BasicBlock &BB) { @@ -104,14 +181,9 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { Instruction *Inst = BBI++; // If we find a store or a free, get its memory dependence. - if (!isa<StoreInst>(Inst) && !isFreeCall(Inst)) + if (!doesClobberMemory(Inst) && !isFreeCall(Inst)) continue; - // Don't molest volatile stores or do queries that will return "clobber". - if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) - if (SI->isVolatile()) - continue; - MemDepResult InstDep = MD.getDependency(Inst); // Ignore non-local stores. @@ -124,16 +196,16 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { continue; } - StoreInst *SI = cast<StoreInst>(Inst); - // If not a definite must-alias dependency, ignore it. if (!InstDep.isDef()) continue; // If this is a store-store dependence, then the previous store is dead so // long as this store is at least as big as it. - if (StoreInst *DepStore = dyn_cast<StoreInst>(InstDep.getInst())) - if (isValueAtLeastAsBigAs(SI->getOperand(0), DepStore->getOperand(0),TD)){ + if (doesClobberMemory(InstDep.getInst())) { + Instruction *DepStore = InstDep.getInst(); + if (isStoreAtLeastAsWideAs(Inst, DepStore, TD) && + isElidable(DepStore)) { // Delete the store and now-dead instructions that feed it. DeleteDeadInstruction(DepStore); NumFastStores++; @@ -146,37 +218,43 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { --BBI; continue; } + } + + if (!isElidable(Inst)) + continue; // If we're storing the same value back to a pointer that we just // loaded from, then the store can be removed. - if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) { - if (SI->getPointerOperand() == DepLoad->getPointerOperand() && - SI->getOperand(0) == DepLoad) { - // DeleteDeadInstruction can delete the current instruction. Save BBI - // in case we need it. - WeakVH NextInst(BBI); - - DeleteDeadInstruction(SI); - - if (NextInst == 0) // Next instruction deleted. - BBI = BB.begin(); - else if (BBI != BB.begin()) // Revisit this instruction if possible. - --BBI; - NumFastStores++; - MadeChange = true; - continue; + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) { + if (SI->getPointerOperand() == DepLoad->getPointerOperand() && + SI->getOperand(0) == DepLoad) { + // DeleteDeadInstruction can delete the current instruction. Save BBI + // in case we need it. + WeakVH NextInst(BBI); + + DeleteDeadInstruction(SI); + + if (NextInst == 0) // Next instruction deleted. + BBI = BB.begin(); + else if (BBI != BB.begin()) // Revisit this instruction if possible. + --BBI; + NumFastStores++; + MadeChange = true; + continue; + } } } // If this is a lifetime end marker, we can throw away the store. - if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(InstDep.getInst())) { + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(InstDep.getInst())) { if (II->getIntrinsicID() == Intrinsic::lifetime_end) { // Delete the store and now-dead instructions that feed it. // DeleteDeadInstruction can delete the current instruction. Save BBI // in case we need it. WeakVH NextInst(BBI); - DeleteDeadInstruction(SI); + DeleteDeadInstruction(Inst); if (NextInst == 0) // Next instruction deleted. BBI = BB.begin(); @@ -202,11 +280,11 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { bool DSE::handleFreeWithNonTrivialDependency(Instruction *F, MemDepResult Dep) { AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); - StoreInst *Dependency = dyn_cast_or_null<StoreInst>(Dep.getInst()); - if (!Dependency || Dependency->isVolatile()) + Instruction *Dependency = Dep.getInst(); + if (!Dependency || !doesClobberMemory(Dependency) || !isElidable(Dependency)) return false; - Value *DepPointer = Dependency->getPointerOperand()->getUnderlyingObject(); + Value *DepPointer = getPointerOperand(Dependency)->getUnderlyingObject(); // Check for aliasing. if (AA.alias(F->getOperand(1), 1, DepPointer, 1) != @@ -251,39 +329,28 @@ bool DSE::handleEndBlock(BasicBlock &BB) { --BBI; // If we find a store whose pointer is dead. - if (StoreInst* S = dyn_cast<StoreInst>(BBI)) { - if (!S->isVolatile()) { + if (doesClobberMemory(BBI)) { + if (isElidable(BBI)) { // See through pointer-to-pointer bitcasts - Value* pointerOperand = S->getPointerOperand()->getUnderlyingObject(); + Value *pointerOperand = getPointerOperand(BBI)->getUnderlyingObject(); // Alloca'd pointers or byval arguments (which are functionally like // alloca's) are valid candidates for removal. if (deadPointers.count(pointerOperand)) { // DCE instructions only used to calculate that store. + Instruction *Dead = BBI; BBI++; - DeleteDeadInstruction(S, &deadPointers); + DeleteDeadInstruction(Dead, &deadPointers); NumFastStores++; MadeChange = true; + continue; } } - continue; - } - - // We can also remove memcpy's to local variables at the end of a function. - if (MemCpyInst *M = dyn_cast<MemCpyInst>(BBI)) { - Value *dest = M->getDest()->getUnderlyingObject(); - - if (deadPointers.count(dest)) { - BBI++; - DeleteDeadInstruction(M, &deadPointers); - NumFastOther++; - MadeChange = true; + // Because a memcpy or memmove is also a load, we can't skip it if we + // didn't remove it. + if (!isa<MemTransferInst>(BBI)) continue; - } - - // Because a memcpy is also a load, we can't skip it if we didn't remove - // it. } Value* killPointer = 0; @@ -304,11 +371,11 @@ bool DSE::handleEndBlock(BasicBlock &BB) { killPointer = L->getPointerOperand(); } else if (VAArgInst* V = dyn_cast<VAArgInst>(BBI)) { killPointer = V->getOperand(0); - } else if (isa<MemCpyInst>(BBI) && - isa<ConstantInt>(cast<MemCpyInst>(BBI)->getLength())) { - killPointer = cast<MemCpyInst>(BBI)->getSource(); + } else if (isa<MemTransferInst>(BBI) && + isa<ConstantInt>(cast<MemTransferInst>(BBI)->getLength())) { + killPointer = cast<MemTransferInst>(BBI)->getSource(); killPointerSize = cast<ConstantInt>( - cast<MemCpyInst>(BBI)->getLength())->getZExtValue(); + cast<MemTransferInst>(BBI)->getLength())->getZExtValue(); } else if (AllocaInst* A = dyn_cast<AllocaInst>(BBI)) { deadPointers.erase(A); diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 0e3f750..a8f39c1 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -443,6 +443,11 @@ uint32_t ValueTable::lookup_or_add_call(CallInst* C) { valueNumbering[C] = e; return e; } + if (!MD) { + e = nextValueNumber++; + valueNumbering[C] = e; + return e; + } MemDepResult local_dep = MD->getDependency(C); @@ -624,7 +629,7 @@ uint32_t ValueTable::lookup_or_add(Value *V) { /// lookup - Returns the value number of the specified value. Fails if /// the value has not yet been numbered. uint32_t ValueTable::lookup(Value *V) const { - DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V); + DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V); assert(VI != valueNumbering.end() && "Value not numbered?"); return VI->second; } @@ -644,7 +649,7 @@ void ValueTable::erase(Value *V) { /// verifyRemoved - Verify that the value is removed from all internal data /// structures. void ValueTable::verifyRemoved(const Value *V) const { - for (DenseMap<Value*, uint32_t>::iterator + for (DenseMap<Value*, uint32_t>::const_iterator I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) { assert(I->first != V && "Inst still occurs in value numbering map!"); } @@ -669,10 +674,12 @@ namespace { bool runOnFunction(Function &F); public: static char ID; // Pass identification, replacement for typeid - GVN(bool nopre = false) : FunctionPass(&ID), NoPRE(nopre) { } + explicit GVN(bool nopre = false, bool noloads = false) + : FunctionPass(&ID), NoPRE(nopre), NoLoads(noloads), MD(0) { } private: bool NoPRE; + bool NoLoads; MemoryDependenceAnalysis *MD; DominatorTree *DT; @@ -682,7 +689,8 @@ namespace { // This transformation requires dominator postdominator info virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<DominatorTree>(); - AU.addRequired<MemoryDependenceAnalysis>(); + if (!NoLoads) + AU.addRequired<MemoryDependenceAnalysis>(); AU.addRequired<AliasAnalysis>(); AU.addPreserved<DominatorTree>(); @@ -711,7 +719,9 @@ namespace { } // createGVNPass - The public interface to this file... -FunctionPass *llvm::createGVNPass(bool NoPRE) { return new GVN(NoPRE); } +FunctionPass *llvm::createGVNPass(bool NoPRE, bool NoLoads) { + return new GVN(NoPRE, NoLoads); +} static RegisterPass<GVN> X("gvn", "Global Value Numbering"); @@ -1476,6 +1486,9 @@ bool GVN::processNonLocalLoad(LoadInst *LI, /// processLoad - Attempt to eliminate a load, first by eliminating it /// locally, and then attempting non-local elimination if that fails. bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { + if (!MD) + return false; + if (L->isVolatile()) return false; @@ -1686,7 +1699,7 @@ bool GVN::processInstruction(Instruction *I, if (constVal) { p->replaceAllUsesWith(constVal); - if (isa<PointerType>(constVal->getType())) + if (MD && isa<PointerType>(constVal->getType())) MD->invalidateCachedPointerInfo(constVal); VN.erase(p); @@ -1707,7 +1720,7 @@ bool GVN::processInstruction(Instruction *I, // Remove it! VN.erase(I); I->replaceAllUsesWith(repl); - if (isa<PointerType>(repl->getType())) + if (MD && isa<PointerType>(repl->getType())) MD->invalidateCachedPointerInfo(repl); toErase.push_back(I); return true; @@ -1721,7 +1734,8 @@ bool GVN::processInstruction(Instruction *I, /// runOnFunction - This is the main transformation entry point for a function. bool GVN::runOnFunction(Function& F) { - MD = &getAnalysis<MemoryDependenceAnalysis>(); + if (!NoLoads) + MD = &getAnalysis<MemoryDependenceAnalysis>(); DT = &getAnalysis<DominatorTree>(); VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>()); VN.setMemDep(MD); @@ -1793,7 +1807,7 @@ bool GVN::processBlock(BasicBlock *BB) { for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(), E = toErase.end(); I != E; ++I) { DEBUG(errs() << "GVN removed: " << **I << '\n'); - MD->removeInstruction(*I); + if (MD) MD->removeInstruction(*I); (*I)->eraseFromParent(); DEBUG(verifyRemoved(*I)); } @@ -1946,12 +1960,12 @@ bool GVN::performPRE(Function &F) { localAvail[CurrentBlock]->table[ValNo] = Phi; CurInst->replaceAllUsesWith(Phi); - if (isa<PointerType>(Phi->getType())) + if (MD && isa<PointerType>(Phi->getType())) MD->invalidateCachedPointerInfo(Phi); VN.erase(CurInst); DEBUG(errs() << "GVN PRE removed: " << *CurInst << '\n'); - MD->removeInstruction(CurInst); + if (MD) MD->removeInstruction(CurInst); CurInst->eraseFromParent(); DEBUG(verifyRemoved(CurInst)); Changed = true; @@ -2011,12 +2025,12 @@ void GVN::verifyRemoved(const Instruction *Inst) const { // Walk through the value number scope to make sure the instruction isn't // ferreted away in it. - for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator + for (DenseMap<BasicBlock*, ValueNumberScope*>::const_iterator I = localAvail.begin(), E = localAvail.end(); I != E; ++I) { const ValueNumberScope *VNS = I->second; while (VNS) { - for (DenseMap<uint32_t, Value*>::iterator + for (DenseMap<uint32_t, Value*>::const_iterator II = VNS->table.begin(), IE = VNS->table.end(); II != IE; ++II) { assert(II->second != Inst && "Inst still in value numbering scope!"); } diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index b0bc70c..2912421 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -536,8 +536,10 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) { BasicBlock *ExitBlock = L->getExitBlock(); if (!ExitBlock) return; - Instruction *InsertPt = ExitBlock->getFirstNonPHI(); BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) return; + + Instruction *InsertPt = ExitBlock->getFirstNonPHI(); BasicBlock::iterator I = Preheader->getTerminator(); while (I != Preheader->begin()) { --I; diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp index 7e75cfb..1c48366 100644 --- a/lib/Transforms/Scalar/InstructionCombining.cpp +++ b/lib/Transforms/Scalar/InstructionCombining.cpp @@ -42,6 +42,7 @@ #include "llvm/GlobalVariable.h" #include "llvm/Operator.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" @@ -283,6 +284,8 @@ namespace { Instruction *visitSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI); Instruction *visitCallInst(CallInst &CI); Instruction *visitInvokeInst(InvokeInst &II); + + Instruction *SliceUpIllegalIntegerPHI(PHINode &PN); Instruction *visitPHINode(PHINode &PN); Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP); Instruction *visitAllocaInst(AllocaInst &AI); @@ -380,10 +383,6 @@ namespace { /// commutative operators. bool SimplifyCommutative(BinaryOperator &I); - /// SimplifyCompare - This reorders the operands of a CmpInst to get them in - /// most-complex to least-complex order. - bool SimplifyCompare(CmpInst &I); - /// SimplifyDemandedUseBits - Attempts to replace V with a simpler value /// based on the demanded bits. Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, @@ -478,6 +477,34 @@ static const Type *getPromotedType(const Type *Ty) { return Ty; } +/// ShouldChangeType - Return true if it is desirable to convert a computation +/// from 'From' to 'To'. We don't want to convert from a legal to an illegal +/// type for example, or from a smaller to a larger illegal type. +static bool ShouldChangeType(const Type *From, const Type *To, + const TargetData *TD) { + assert(isa<IntegerType>(From) && isa<IntegerType>(To)); + + // If we don't have TD, we don't know if the source/dest are legal. + if (!TD) return false; + + unsigned FromWidth = From->getPrimitiveSizeInBits(); + unsigned ToWidth = To->getPrimitiveSizeInBits(); + bool FromLegal = TD->isLegalInteger(FromWidth); + bool ToLegal = TD->isLegalInteger(ToWidth); + + // If this is a legal integer from type, and the result would be an illegal + // type, don't do the transformation. + if (FromLegal && !ToLegal) + return false; + + // Otherwise, if both are illegal, do not increase the size of the result. We + // do allow things like i160 -> i64, but not i64 -> i160. + if (!FromLegal && !ToLegal && ToWidth > FromWidth) + return false; + + return true; +} + /// getBitCastOperand - If the specified operand is a CastInst, a constant /// expression bitcast, or a GetElementPtrInst with all zero indices, return the /// operand value, otherwise return null. @@ -584,17 +611,6 @@ bool InstCombiner::SimplifyCommutative(BinaryOperator &I) { return Changed; } -/// SimplifyCompare - For a CmpInst this function just orders the operands -/// so that theyare listed from right (least complex) to left (most complex). -/// This puts constants before unary operators before binary operators. -bool InstCombiner::SimplifyCompare(CmpInst &I) { - if (getComplexity(I.getOperand(0)) >= getComplexity(I.getOperand(1))) - return false; - I.swapOperands(); - // Compare instructions are not associative so there's nothing else we can do. - return true; -} - // dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction // if the LHS is a constant zero (which is the 'negate' form). // @@ -4304,25 +4320,15 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { bool Changed = SimplifyCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (isa<UndefValue>(Op1)) // X & undef -> 0 - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - - // and X, X = X - if (Op0 == Op1) - return ReplaceInstUsesWith(I, Op1); + if (Value *V = SimplifyAndInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(I)) return &I; - if (isa<VectorType>(I.getType())) { - if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) { - if (CP->isAllOnesValue()) // X & <-1,-1> -> X - return ReplaceInstUsesWith(I, I.getOperand(0)); - } else if (isa<ConstantAggregateZero>(Op1)) { - return ReplaceInstUsesWith(I, Op1); // X & <0,0> -> <0,0> - } - } + if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) { const APInt &AndRHSMask = AndRHS->getValue(); @@ -4443,42 +4449,29 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { return NV; } - Value *Op0NotVal = dyn_castNotVal(Op0); - Value *Op1NotVal = dyn_castNotVal(Op1); - - if (Op0NotVal == Op1 || Op1NotVal == Op0) // A & ~A == ~A & A == 0 - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); // (~A & ~B) == (~(A | B)) - De Morgan's Law - if (Op0NotVal && Op1NotVal && isOnlyUse(Op0) && isOnlyUse(Op1)) { - Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal, - I.getName()+".demorgan"); - return BinaryOperator::CreateNot(Or); - } - + if (Value *Op0NotVal = dyn_castNotVal(Op0)) + if (Value *Op1NotVal = dyn_castNotVal(Op1)) + if (Op0->hasOneUse() && Op1->hasOneUse()) { + Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal, + I.getName()+".demorgan"); + return BinaryOperator::CreateNot(Or); + } + { Value *A = 0, *B = 0, *C = 0, *D = 0; - if (match(Op0, m_Or(m_Value(A), m_Value(B)))) { - if (A == Op1 || B == Op1) // (A | ?) & A --> A - return ReplaceInstUsesWith(I, Op1); - - // (A|B) & ~(A&B) -> A^B - if (match(Op1, m_Not(m_And(m_Value(C), m_Value(D))))) { - if ((A == C && B == D) || (A == D && B == C)) - return BinaryOperator::CreateXor(A, B); - } - } + // (A|B) & ~(A&B) -> A^B + if (match(Op0, m_Or(m_Value(A), m_Value(B))) && + match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) && + ((A == C && B == D) || (A == D && B == C))) + return BinaryOperator::CreateXor(A, B); - if (match(Op1, m_Or(m_Value(A), m_Value(B)))) { - if (A == Op0 || B == Op0) // A & (A | ?) --> A - return ReplaceInstUsesWith(I, Op0); - - // ~(A&B) & (A|B) -> A^B - if (match(Op0, m_Not(m_And(m_Value(C), m_Value(D))))) { - if ((A == C && B == D) || (A == D && B == C)) - return BinaryOperator::CreateXor(A, B); - } - } + // ~(A&B) & (A|B) -> A^B + if (match(Op1, m_Or(m_Value(A), m_Value(B))) && + match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) && + ((A == C && B == D) || (A == D && B == C))) + return BinaryOperator::CreateXor(A, B); if (Op0->hasOneUse() && match(Op0, m_Xor(m_Value(A), m_Value(B)))) { @@ -5010,27 +5003,15 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { bool Changed = SimplifyCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (isa<UndefValue>(Op1)) // X | undef -> -1 - return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); - - // or X, X = X - if (Op0 == Op1) - return ReplaceInstUsesWith(I, Op0); - + if (Value *V = SimplifyOrInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + + // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(I)) return &I; - if (isa<VectorType>(I.getType())) { - if (isa<ConstantAggregateZero>(Op1)) { - return ReplaceInstUsesWith(I, Op0); // X | <0,0> -> X - } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) { - if (CP->isAllOnesValue()) // X | <-1,-1> -> <-1,-1> - return ReplaceInstUsesWith(I, I.getOperand(1)); - } - } - // or X, -1 == -1 if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { ConstantInt *C1 = 0; Value *X = 0; // (X & C1) | C2 --> (X | C2) & (C1|C2) @@ -5063,13 +5044,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { Value *A = 0, *B = 0; ConstantInt *C1 = 0, *C2 = 0; - if (match(Op0, m_And(m_Value(A), m_Value(B)))) - if (A == Op1 || B == Op1) // (A & ?) | A --> A - return ReplaceInstUsesWith(I, Op1); - if (match(Op1, m_And(m_Value(A), m_Value(B)))) - if (A == Op0 || B == Op0) // A | (A & ?) --> A - return ReplaceInstUsesWith(I, Op0); - // (A | B) | C and A | (B | C) -> bswap if possible. // (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible. if (match(Op0, m_Or(m_Value(), m_Value())) || @@ -5203,23 +5177,14 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (Ret) return Ret; } - if ((A = dyn_castNotVal(Op0))) { // ~A | Op1 - if (A == Op1) // ~A | A == -1 - return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); - } else { - A = 0; - } - // Note, A is still live here! - if ((B = dyn_castNotVal(Op1))) { // Op0 | ~B - if (Op0 == B) - return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); - - // (~A | ~B) == (~(A & B)) - De Morgan's Law - if (A && isOnlyUse(Op0) && isOnlyUse(Op1)) { - Value *And = Builder->CreateAnd(A, B, I.getName()+".demorgan"); - return BinaryOperator::CreateNot(And); - } - } + // (~A | ~B) == (~(A & B)) - De Morgan's Law + if (Value *Op0NotVal = dyn_castNotVal(Op0)) + if (Value *Op1NotVal = dyn_castNotVal(Op1)) + if (Op0->hasOneUse() && Op1->hasOneUse()) { + Value *And = Builder->CreateAnd(Op0NotVal, Op1NotVal, + I.getName()+".demorgan"); + return BinaryOperator::CreateNot(And); + } // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B) if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) { @@ -5942,28 +5907,25 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, } Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { - bool Changed = SimplifyCompare(I); - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + bool Changed = false; + + /// Orders the operands of the compare so that they are listed from most + /// complex to least complex. This puts constants before unary operators, + /// before binary operators. + if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) { + I.swapOperands(); + Changed = true; + } - // Fold trivial predicates. - if (I.getPredicate() == FCmpInst::FCMP_FALSE) - return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 0)); - if (I.getPredicate() == FCmpInst::FCMP_TRUE) - return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 1)); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + // Simplify 'fcmp pred X, X' if (Op0 == Op1) { switch (I.getPredicate()) { default: llvm_unreachable("Unknown predicate!"); - case FCmpInst::FCMP_UEQ: // True if unordered or equal - case FCmpInst::FCMP_UGE: // True if unordered, greater than, or equal - case FCmpInst::FCMP_ULE: // True if unordered, less than, or equal - return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 1)); - case FCmpInst::FCMP_OGT: // True if ordered and greater than - case FCmpInst::FCMP_OLT: // True if ordered and less than - case FCmpInst::FCMP_ONE: // True if ordered and operands are unequal - return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 0)); - case FCmpInst::FCMP_UNO: // True if unordered: isnan(X) | isnan(Y) case FCmpInst::FCMP_ULT: // True if unordered or less than case FCmpInst::FCMP_UGT: // True if unordered or greater than @@ -5984,23 +5946,8 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { } } - if (isa<UndefValue>(Op1)) // fcmp pred X, undef -> undef - return ReplaceInstUsesWith(I, UndefValue::get(I.getType())); - // Handle fcmp with constant RHS if (Constant *RHSC = dyn_cast<Constant>(Op1)) { - // If the constant is a nan, see if we can fold the comparison based on it. - if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) { - if (CFP->getValueAPF().isNaN()) { - if (FCmpInst::isOrdered(I.getPredicate())) // True if ordered and... - return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); - assert(FCmpInst::isUnordered(I.getPredicate()) && - "Comparison must be either ordered or unordered!"); - // True if unordered. - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); - } - } - if (Instruction *LHSI = dyn_cast<Instruction>(Op0)) switch (LHSI->getOpcode()) { case Instruction::PHI: @@ -6047,26 +5994,22 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { } Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { - bool Changed = SimplifyCompare(I); + bool Changed = false; + + /// Orders the operands of the compare so that they are listed from most + /// complex to least complex. This puts constants before unary operators, + /// before binary operators. + if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) { + I.swapOperands(); + Changed = true; + } + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - const Type *Ty = Op0->getType(); - - // icmp X, X - if (Op0 == Op1) - return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), - I.isTrueWhenEqual())); - - if (isa<UndefValue>(Op1)) // X icmp undef -> undef - return ReplaceInstUsesWith(I, UndefValue::get(I.getType())); - // icmp <global/alloca*/null>, <global/alloca*/null> - Global/Stack value - // addresses never equal each other! We already know that Op0 != Op1. - if ((isa<GlobalValue>(Op0) || isa<AllocaInst>(Op0) || - isa<ConstantPointerNull>(Op0)) && - (isa<GlobalValue>(Op1) || isa<AllocaInst>(Op1) || - isa<ConstantPointerNull>(Op1))) - return ReplaceInstUsesWith(I, ConstantInt::get(Type::getInt1Ty(*Context), - !I.isTrueWhenEqual())); + if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + + const Type *Ty = Op0->getType(); // icmp's with boolean values can always be turned into bitwise operations if (Ty == Type::getInt1Ty(*Context)) { @@ -6131,27 +6074,24 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // If we have an icmp le or icmp ge instruction, turn it into the // appropriate icmp lt or icmp gt instruction. This allows us to rely on - // them being folded in the code below. + // them being folded in the code below. The SimplifyICmpInst code has + // already handled the edge cases for us, so we just assert on them. switch (I.getPredicate()) { default: break; case ICmpInst::ICMP_ULE: - if (CI->isMaxValue(false)) // A <=u MAX -> TRUE - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); + assert(!CI->isMaxValue(false)); // A <=u MAX -> TRUE return new ICmpInst(ICmpInst::ICMP_ULT, Op0, AddOne(CI)); case ICmpInst::ICMP_SLE: - if (CI->isMaxValue(true)) // A <=s MAX -> TRUE - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); + assert(!CI->isMaxValue(true)); // A <=s MAX -> TRUE return new ICmpInst(ICmpInst::ICMP_SLT, Op0, AddOne(CI)); case ICmpInst::ICMP_UGE: - if (CI->isMinValue(false)) // A >=u MIN -> TRUE - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); + assert(!CI->isMinValue(false)); // A >=u MIN -> TRUE return new ICmpInst(ICmpInst::ICMP_UGT, Op0, SubOne(CI)); case ICmpInst::ICMP_SGE: - if (CI->isMinValue(true)) // A >=s MIN -> TRUE - return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); + assert(!CI->isMinValue(true)); // A >=s MIN -> TRUE return new ICmpInst(ICmpInst::ICMP_SGT, Op0, SubOne(CI)); } @@ -8083,8 +8023,7 @@ bool InstCombiner::CanEvaluateInDifferentType(Value *V, const Type *Ty, Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned) { if (Constant *C = dyn_cast<Constant>(V)) - return ConstantExpr::getIntegerCast(C, Ty, - isSigned /*Sext or ZExt*/); + return ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/); // Otherwise, it must be an instruction. Instruction *I = cast<Instruction>(V); @@ -8117,8 +8056,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, return I->getOperand(0); // Otherwise, must be the same type of cast, so just reinsert a new one. - Res = CastInst::Create(cast<CastInst>(I)->getOpcode(), I->getOperand(0), - Ty); + Res = CastInst::Create(cast<CastInst>(I)->getOpcode(), I->getOperand(0),Ty); break; case Instruction::Select: { Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned); @@ -8167,9 +8105,15 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { return NV; // If we are casting a PHI then fold the cast into the PHI - if (isa<PHINode>(Src)) - if (Instruction *NV = FoldOpIntoPhi(CI)) - return NV; + if (isa<PHINode>(Src)) { + // We don't do this if this would create a PHI node with an illegal type if + // it is currently legal. + if (!isa<IntegerType>(Src->getType()) || + !isa<IntegerType>(CI.getType()) || + ShouldChangeType(CI.getType(), Src->getType(), TD)) + if (Instruction *NV = FoldOpIntoPhi(CI)) + return NV; + } return 0; } @@ -8289,23 +8233,6 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { return commonCastTransforms(CI); } -/// isSafeIntegerType - Return true if this is a basic integer type, not a crazy -/// type like i42. We don't want to introduce operations on random non-legal -/// integer types where they don't already exist in the code. In the future, -/// we should consider making this based off target-data, so that 32-bit targets -/// won't get i64 operations etc. -static bool isSafeIntegerType(const Type *Ty) { - switch (Ty->getPrimitiveSizeInBits()) { - case 8: - case 16: - case 32: - case 64: - return true; - default: - return false; - } -} - /// commonIntCastTransforms - This function implements the common transforms /// for trunc, zext, and sext. Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { @@ -8334,8 +8261,8 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { // Only do this if the dest type is a simple type, don't convert the // expression tree to something weird like i93 unless the source is also // strange. - if ((isSafeIntegerType(DestTy->getScalarType()) || - !isSafeIntegerType(SrcI->getType()->getScalarType())) && + if ((isa<VectorType>(DestTy) || + ShouldChangeType(SrcI->getType(), DestTy, TD)) && CanEvaluateInDifferentType(SrcI, DestTy, CI.getOpcode(), NumCastsRemoved)) { // If this cast is a truncate, evaluting in a different type always @@ -8356,6 +8283,7 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { break; case Instruction::ZExt: { DoXForm = NumCastsRemoved >= 1; + if (!DoXForm && 0) { // If it's unnecessary to issue an AND to clear the high bits, it's // always profitable to do this xform. @@ -8522,7 +8450,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { return BinaryOperator::CreateLShr(V1, V2); } } - + return 0; } @@ -10880,9 +10808,10 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { } -// FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary" -// operator and they all are only used by the PHI, PHI together their -// inputs, and do the operation once, to the result of the PHI. + +/// FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary" +/// operator and they all are only used by the PHI, PHI together their +/// inputs, and do the operation once, to the result of the PHI. Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0)); @@ -10900,6 +10829,13 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { if (isa<CastInst>(FirstInst)) { CastSrcTy = FirstInst->getOperand(0)->getType(); + + // Be careful about transforming integer PHIs. We don't want to pessimize + // the code by turning an i32 into an i1293. + if (isa<IntegerType>(PN.getType()) && isa<IntegerType>(CastSrcTy)) { + if (!ShouldChangeType(PN.getType(), CastSrcTy, TD)) + return 0; + } } else if (isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)) { // Can fold binop, compare or shift here if the RHS is a constant, // otherwise call FoldPHIArgBinOpIntoPHI. @@ -11012,6 +10948,222 @@ static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal, } +namespace { +struct PHIUsageRecord { + unsigned PHIId; // The ID # of the PHI (something determinstic to sort on) + unsigned Shift; // The amount shifted. + Instruction *Inst; // The trunc instruction. + + PHIUsageRecord(unsigned pn, unsigned Sh, Instruction *User) + : PHIId(pn), Shift(Sh), Inst(User) {} + + bool operator<(const PHIUsageRecord &RHS) const { + if (PHIId < RHS.PHIId) return true; + if (PHIId > RHS.PHIId) return false; + if (Shift < RHS.Shift) return true; + if (Shift > RHS.Shift) return false; + return Inst->getType()->getPrimitiveSizeInBits() < + RHS.Inst->getType()->getPrimitiveSizeInBits(); + } +}; + +struct LoweredPHIRecord { + PHINode *PN; // The PHI that was lowered. + unsigned Shift; // The amount shifted. + unsigned Width; // The width extracted. + + LoweredPHIRecord(PHINode *pn, unsigned Sh, const Type *Ty) + : PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {} + + // Ctor form used by DenseMap. + LoweredPHIRecord(PHINode *pn, unsigned Sh) + : PN(pn), Shift(Sh), Width(0) {} +}; +} + +namespace llvm { + template<> + struct DenseMapInfo<LoweredPHIRecord> { + static inline LoweredPHIRecord getEmptyKey() { + return LoweredPHIRecord(0, 0); + } + static inline LoweredPHIRecord getTombstoneKey() { + return LoweredPHIRecord(0, 1); + } + static unsigned getHashValue(const LoweredPHIRecord &Val) { + return DenseMapInfo<PHINode*>::getHashValue(Val.PN) ^ (Val.Shift>>3) ^ + (Val.Width>>3); + } + static bool isEqual(const LoweredPHIRecord &LHS, + const LoweredPHIRecord &RHS) { + return LHS.PN == RHS.PN && LHS.Shift == RHS.Shift && + LHS.Width == RHS.Width; + } + static bool isPod() { return true; } + }; +} + + +/// SliceUpIllegalIntegerPHI - This is an integer PHI and we know that it has an +/// illegal type: see if it is only used by trunc or trunc(lshr) operations. If +/// so, we split the PHI into the various pieces being extracted. This sort of +/// thing is introduced when SROA promotes an aggregate to large integer values. +/// +/// TODO: The user of the trunc may be an bitcast to float/double/vector or an +/// inttoptr. We should produce new PHIs in the right type. +/// +Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { + // PHIUsers - Keep track of all of the truncated values extracted from a set + // of PHIs, along with their offset. These are the things we want to rewrite. + SmallVector<PHIUsageRecord, 16> PHIUsers; + + // PHIs are often mutually cyclic, so we keep track of a whole set of PHI + // nodes which are extracted from. PHIsToSlice is a set we use to avoid + // revisiting PHIs, PHIsInspected is a ordered list of PHIs that we need to + // check the uses of (to ensure they are all extracts). + SmallVector<PHINode*, 8> PHIsToSlice; + SmallPtrSet<PHINode*, 8> PHIsInspected; + + PHIsToSlice.push_back(&FirstPhi); + PHIsInspected.insert(&FirstPhi); + + for (unsigned PHIId = 0; PHIId != PHIsToSlice.size(); ++PHIId) { + PHINode *PN = PHIsToSlice[PHIId]; + + for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); + UI != E; ++UI) { + Instruction *User = cast<Instruction>(*UI); + + // If the user is a PHI, inspect its uses recursively. + if (PHINode *UserPN = dyn_cast<PHINode>(User)) { + if (PHIsInspected.insert(UserPN)) + PHIsToSlice.push_back(UserPN); + continue; + } + + // Truncates are always ok. + if (isa<TruncInst>(User)) { + PHIUsers.push_back(PHIUsageRecord(PHIId, 0, User)); + continue; + } + + // Otherwise it must be a lshr which can only be used by one trunc. + if (User->getOpcode() != Instruction::LShr || + !User->hasOneUse() || !isa<TruncInst>(User->use_back()) || + !isa<ConstantInt>(User->getOperand(1))) + return 0; + + unsigned Shift = cast<ConstantInt>(User->getOperand(1))->getZExtValue(); + PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, User->use_back())); + } + } + + // If we have no users, they must be all self uses, just nuke the PHI. + if (PHIUsers.empty()) + return ReplaceInstUsesWith(FirstPhi, UndefValue::get(FirstPhi.getType())); + + // If this phi node is transformable, create new PHIs for all the pieces + // extracted out of it. First, sort the users by their offset and size. + array_pod_sort(PHIUsers.begin(), PHIUsers.end()); + + DEBUG(errs() << "SLICING UP PHI: " << FirstPhi << '\n'; + for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i) + errs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] <<'\n'; + ); + + // PredValues - This is a temporary used when rewriting PHI nodes. It is + // hoisted out here to avoid construction/destruction thrashing. + DenseMap<BasicBlock*, Value*> PredValues; + + // ExtractedVals - Each new PHI we introduce is saved here so we don't + // introduce redundant PHIs. + DenseMap<LoweredPHIRecord, PHINode*> ExtractedVals; + + for (unsigned UserI = 0, UserE = PHIUsers.size(); UserI != UserE; ++UserI) { + unsigned PHIId = PHIUsers[UserI].PHIId; + PHINode *PN = PHIsToSlice[PHIId]; + unsigned Offset = PHIUsers[UserI].Shift; + const Type *Ty = PHIUsers[UserI].Inst->getType(); + + PHINode *EltPHI; + + // If we've already lowered a user like this, reuse the previously lowered + // value. + if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) { + + // Otherwise, Create the new PHI node for this user. + EltPHI = PHINode::Create(Ty, PN->getName()+".off"+Twine(Offset), PN); + assert(EltPHI->getType() != PN->getType() && + "Truncate didn't shrink phi?"); + + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *Pred = PN->getIncomingBlock(i); + Value *&PredVal = PredValues[Pred]; + + // If we already have a value for this predecessor, reuse it. + if (PredVal) { + EltPHI->addIncoming(PredVal, Pred); + continue; + } + + // Handle the PHI self-reuse case. + Value *InVal = PN->getIncomingValue(i); + if (InVal == PN) { + PredVal = EltPHI; + EltPHI->addIncoming(PredVal, Pred); + continue; + } else if (PHINode *InPHI = dyn_cast<PHINode>(PN)) { + // If the incoming value was a PHI, and if it was one of the PHIs we + // already rewrote it, just use the lowered value. + if (Value *Res = ExtractedVals[LoweredPHIRecord(InPHI, Offset, Ty)]) { + PredVal = Res; + EltPHI->addIncoming(PredVal, Pred); + continue; + } + } + + // Otherwise, do an extract in the predecessor. + Builder->SetInsertPoint(Pred, Pred->getTerminator()); + Value *Res = InVal; + if (Offset) + Res = Builder->CreateLShr(Res, ConstantInt::get(InVal->getType(), + Offset), "extract"); + Res = Builder->CreateTrunc(Res, Ty, "extract.t"); + PredVal = Res; + EltPHI->addIncoming(Res, Pred); + + // If the incoming value was a PHI, and if it was one of the PHIs we are + // rewriting, we will ultimately delete the code we inserted. This + // means we need to revisit that PHI to make sure we extract out the + // needed piece. + if (PHINode *OldInVal = dyn_cast<PHINode>(PN->getIncomingValue(i))) + if (PHIsInspected.count(OldInVal)) { + unsigned RefPHIId = std::find(PHIsToSlice.begin(),PHIsToSlice.end(), + OldInVal)-PHIsToSlice.begin(); + PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset, + cast<Instruction>(Res))); + ++UserE; + } + } + PredValues.clear(); + + DEBUG(errs() << " Made element PHI for offset " << Offset << ": " + << *EltPHI << '\n'); + ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI; + } + + // Replace the use of this piece with the PHI node. + ReplaceInstUsesWith(*PHIUsers[UserI].Inst, EltPHI); + } + + // Replace all the remaining uses of the PHI nodes (self uses and the lshrs) + // with undefs. + Value *Undef = UndefValue::get(FirstPhi.getType()); + for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i) + ReplaceInstUsesWith(*PHIsToSlice[i], Undef); + return ReplaceInstUsesWith(FirstPhi, Undef); +} + // PHINode simplification // Instruction *InstCombiner::visitPHINode(PHINode &PN) { @@ -11117,6 +11269,15 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) { } } + // If this is an integer PHI and we know that it has an illegal type, see if + // it is only used by trunc or trunc(lshr) operations. If so, we split the + // PHI into the various pieces being extracted. This sort of thing is + // introduced when SROA promotes an aggregate to a single large integer type. + if (isa<IntegerType>(PN.getType()) && TD && + !TD->isLegalInteger(PN.getType()->getPrimitiveSizeInBits())) + if (Instruction *Res = SliceUpIllegalIntegerPHI(PN)) + return Res; + return 0; } @@ -12210,6 +12371,47 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { return ExtractValueInst::Create(IV->getInsertedValueOperand(), exti, exte); } + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) { + // We're extracting from an intrinsic, see if we're the only user, which + // allows us to simplify multiple result intrinsics to simpler things that + // just get one value.. + if (II->hasOneUse()) { + // Check if we're grabbing the overflow bit or the result of a 'with + // overflow' intrinsic. If it's the latter we can remove the intrinsic + // and replace it with a traditional binary instruction. + switch (II->getIntrinsicID()) { + case Intrinsic::uadd_with_overflow: + case Intrinsic::sadd_with_overflow: + if (*EV.idx_begin() == 0) { // Normal result. + Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); + II->replaceAllUsesWith(UndefValue::get(II->getType())); + EraseInstFromFunction(*II); + return BinaryOperator::CreateAdd(LHS, RHS); + } + break; + case Intrinsic::usub_with_overflow: + case Intrinsic::ssub_with_overflow: + if (*EV.idx_begin() == 0) { // Normal result. + Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); + II->replaceAllUsesWith(UndefValue::get(II->getType())); + EraseInstFromFunction(*II); + return BinaryOperator::CreateSub(LHS, RHS); + } + break; + case Intrinsic::umul_with_overflow: + case Intrinsic::smul_with_overflow: + if (*EV.idx_begin() == 0) { // Normal result. + Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); + II->replaceAllUsesWith(UndefValue::get(II->getType())); + EraseInstFromFunction(*II); + return BinaryOperator::CreateMul(LHS, RHS); + } + break; + default: + break; + } + } + } // Can't simplify extracts from other values. Note that nested extracts are // already simplified implicitely by the above (extract ( extract (insert) ) // will be translated into extract ( insert ( extract ) ) first and then just @@ -12715,29 +12917,33 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { if (isa<UndefValue>(RHS)) { std::vector<unsigned> LHSMask = getShuffleMask(LHSSVI); - std::vector<unsigned> NewMask; - for (unsigned i = 0, e = Mask.size(); i != e; ++i) - if (Mask[i] >= 2*e) - NewMask.push_back(2*e); - else - NewMask.push_back(LHSMask[Mask[i]]); + if (LHSMask.size() == Mask.size()) { + std::vector<unsigned> NewMask; + for (unsigned i = 0, e = Mask.size(); i != e; ++i) + if (Mask[i] >= 2*e) + NewMask.push_back(2*e); + else + NewMask.push_back(LHSMask[Mask[i]]); - // If the result mask is equal to the src shuffle or this shuffle mask, do - // the replacement. - if (NewMask == LHSMask || NewMask == Mask) { - unsigned LHSInNElts = - cast<VectorType>(LHSSVI->getOperand(0)->getType())->getNumElements(); - std::vector<Constant*> Elts; - for (unsigned i = 0, e = NewMask.size(); i != e; ++i) { - if (NewMask[i] >= LHSInNElts*2) { - Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context))); - } else { - Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context), NewMask[i])); + // If the result mask is equal to the src shuffle or this + // shuffle mask, do the replacement. + if (NewMask == LHSMask || NewMask == Mask) { + unsigned LHSInNElts = + cast<VectorType>(LHSSVI->getOperand(0)->getType())-> + getNumElements(); + std::vector<Constant*> Elts; + for (unsigned i = 0, e = NewMask.size(); i != e; ++i) { + if (NewMask[i] >= LHSInNElts*2) { + Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context))); + } else { + Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context), + NewMask[i])); + } } + return new ShuffleVectorInst(LHSSVI->getOperand(0), + LHSSVI->getOperand(1), + ConstantVector::get(Elts)); } - return new ShuffleVectorInst(LHSSVI->getOperand(0), - LHSSVI->getOperand(1), - ConstantVector::get(Elts)); } } } @@ -12824,7 +13030,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, // ConstantProp instruction if trivially constant. if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0))) - if (Constant *C = ConstantFoldInstruction(Inst, BB->getContext(), TD)) { + if (Constant *C = ConstantFoldInstruction(Inst, TD)) { DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *Inst << '\n'); Inst->replaceAllUsesWith(C); @@ -12846,8 +13052,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, if (!FoldedConstants.insert(CE)) continue; - Constant *NewC = - ConstantFoldConstantExpression(CE, BB->getContext(), TD); + Constant *NewC = ConstantFoldConstantExpression(CE, TD); if (NewC && NewC != CE) { *i = NewC; MadeIRChange = true; @@ -12954,7 +13159,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { // Instruction isn't dead, see if we can constant propagate it. if (!I->use_empty() && isa<Constant>(I->getOperand(0))) - if (Constant *C = ConstantFoldInstruction(I, F.getContext(), TD)) { + if (Constant *C = ConstantFoldInstruction(I, TD)) { DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n'); // Add operands to the worklist. @@ -13065,7 +13270,7 @@ bool InstCombiner::runOnFunction(Function &F) { /// Builder - This is an IRBuilder that automatically inserts new /// instructions into the worklist when they are created. IRBuilder<true, TargetFolder, InstCombineIRInserter> - TheBuilder(F.getContext(), TargetFolder(TD, F.getContext()), + TheBuilder(F.getContext(), TargetFolder(TD), InstCombineIRInserter(Worklist)); Builder = &TheBuilder; diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 10c9ec6..5864113 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -16,7 +16,8 @@ #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" #include "llvm/Pass.h" -#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" @@ -40,6 +41,12 @@ Threshold("jump-threading-threshold", cl::desc("Max block size to duplicate for jump threading"), cl::init(6), cl::Hidden); +// Turn on use of LazyValueInfo. +static cl::opt<bool> +EnableLVI("enable-jump-threading-lvi", cl::ReallyHidden); + + + namespace { /// This pass performs 'jump threading', which looks at blocks that have /// multiple predecessors and multiple successors. If one or more of the @@ -59,6 +66,7 @@ namespace { /// class JumpThreading : public FunctionPass { TargetData *TD; + LazyValueInfo *LVI; #ifdef NDEBUG SmallPtrSet<BasicBlock*, 16> LoopHeaders; #else @@ -69,20 +77,31 @@ namespace { JumpThreading() : FunctionPass(&ID) {} bool runOnFunction(Function &F); - void FindLoopHeaders(Function &F); + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + if (EnableLVI) + AU.addRequired<LazyValueInfo>(); + } + + void FindLoopHeaders(Function &F); bool ProcessBlock(BasicBlock *BB); - bool ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, BasicBlock *SuccBB); + bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock*> &PredBBs, + BasicBlock *SuccBB); bool DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, BasicBlock *PredBB); - - BasicBlock *FactorCommonPHIPreds(PHINode *PN, Value *Val); + + typedef SmallVectorImpl<std::pair<ConstantInt*, + BasicBlock*> > PredValueInfo; + + bool ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, + PredValueInfo &Result); + bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB); + + bool ProcessBranchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB); bool ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB); bool ProcessJumpOnPHI(PHINode *PN); - bool ProcessBranchOnLogical(Value *V, BasicBlock *BB, bool isAnd); - bool ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB); bool SimplifyPartiallyRedundantLoad(LoadInst *LI); }; @@ -100,6 +119,7 @@ FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); } bool JumpThreading::runOnFunction(Function &F) { DEBUG(errs() << "Jump threading on function '" << F.getName() << "'\n"); TD = getAnalysisIfAvailable<TargetData>(); + LVI = EnableLVI ? &getAnalysis<LazyValueInfo>() : 0; FindLoopHeaders(F); @@ -109,6 +129,7 @@ bool JumpThreading::runOnFunction(Function &F) { bool Changed = false; for (Function::iterator I = F.begin(), E = F.end(); I != E;) { BasicBlock *BB = I; + // Thread all of the branches we can over this block. while (ProcessBlock(BB)) Changed = true; @@ -123,6 +144,29 @@ bool JumpThreading::runOnFunction(Function &F) { LoopHeaders.erase(BB); DeleteDeadBlock(BB); Changed = true; + } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { + // Can't thread an unconditional jump, but if the block is "almost + // empty", we can replace uses of it with uses of the successor and make + // this dead. + if (BI->isUnconditional() && + BB != &BB->getParent()->getEntryBlock()) { + BasicBlock::iterator BBI = BB->getFirstNonPHI(); + // Ignore dbg intrinsics. + while (isa<DbgInfoIntrinsic>(BBI)) + ++BBI; + // If the terminator is the only non-phi instruction, try to nuke it. + if (BBI->isTerminator()) { + // Since TryToSimplifyUncondBranchFromEmptyBlock may delete the + // block, we have to make sure it isn't in the LoopHeaders set. We + // reinsert afterward in the rare case when the block isn't deleted. + bool ErasedFromLoopHeaders = LoopHeaders.erase(BB); + + if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) + Changed = true; + else if (ErasedFromLoopHeaders) + LoopHeaders.insert(BB); + } + } } } AnotherIteration = Changed; @@ -139,6 +183,10 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) { /// Ignore PHI nodes, these will be flattened when duplication happens. BasicBlock::const_iterator I = BB->getFirstNonPHI(); + // FIXME: THREADING will delete values that are just used to compute the + // branch, so they shouldn't count against the duplication cost. + + // Sum up the cost of each instruction until we get to the terminator. Don't // include the terminator because the copy won't include it. unsigned Size = 0; @@ -173,8 +221,6 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) { return Size; } - - /// FindLoopHeaders - We do not want jump threading to turn proper loop /// structures into irreducible loops. Doing this breaks up the loop nesting /// hierarchy and pessimizes later transformations. To prevent this from @@ -198,29 +244,181 @@ void JumpThreading::FindLoopHeaders(Function &F) { LoopHeaders.insert(const_cast<BasicBlock*>(Edges[i].second)); } - -/// FactorCommonPHIPreds - If there are multiple preds with the same incoming -/// value for the PHI, factor them together so we get one block to thread for -/// the whole group. -/// This is important for things like "phi i1 [true, true, false, true, x]" -/// where we only need to clone the block for the true blocks once. +/// ComputeValueKnownInPredecessors - Given a basic block BB and a value V, see +/// if we can infer that the value is a known ConstantInt in any of our +/// predecessors. If so, return the known list of value and pred BB in the +/// result vector. If a value is known to be undef, it is returned as null. +/// +/// This returns true if there were any known values. /// -BasicBlock *JumpThreading::FactorCommonPHIPreds(PHINode *PN, Value *Val) { - SmallVector<BasicBlock*, 16> CommonPreds; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (PN->getIncomingValue(i) == Val) - CommonPreds.push_back(PN->getIncomingBlock(i)); - - if (CommonPreds.size() == 1) - return CommonPreds[0]; +bool JumpThreading:: +ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ + // If V is a constantint, then it is known in all predecessors. + if (isa<ConstantInt>(V) || isa<UndefValue>(V)) { + ConstantInt *CI = dyn_cast<ConstantInt>(V); - DEBUG(errs() << " Factoring out " << CommonPreds.size() - << " common predecessors.\n"); - return SplitBlockPredecessors(PN->getParent(), - &CommonPreds[0], CommonPreds.size(), - ".thr_comm", this); -} + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + Result.push_back(std::make_pair(CI, *PI)); + return true; + } + + // If V is a non-instruction value, or an instruction in a different block, + // then it can't be derived from a PHI. + Instruction *I = dyn_cast<Instruction>(V); + if (I == 0 || I->getParent() != BB) { + + // Okay, if this is a live-in value, see if it has a known value at the end + // of any of our predecessors. + // + // FIXME: This should be an edge property, not a block end property. + /// TODO: Per PR2563, we could infer value range information about a + /// predecessor based on its terminator. + // + if (LVI) { + // FIXME: change this to use the more-rich 'getPredicateOnEdge' method if + // "I" is a non-local compare-with-a-constant instruction. This would be + // able to handle value inequalities better, for example if the compare is + // "X < 4" and "X < 3" is known true but "X < 4" itself is not available. + // Perhaps getConstantOnEdge should be smart enough to do this? + + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + // If the value is known by LazyValueInfo to be a constant in a + // predecessor, use that information to try to thread this block. + Constant *PredCst = LVI->getConstantOnEdge(V, *PI, BB); + if (PredCst == 0 || + (!isa<ConstantInt>(PredCst) && !isa<UndefValue>(PredCst))) + continue; + + Result.push_back(std::make_pair(dyn_cast<ConstantInt>(PredCst), *PI)); + } + + return !Result.empty(); + } + + return false; + } + + /// If I is a PHI node, then we know the incoming values for any constants. + if (PHINode *PN = dyn_cast<PHINode>(I)) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *InVal = PN->getIncomingValue(i); + if (isa<ConstantInt>(InVal) || isa<UndefValue>(InVal)) { + ConstantInt *CI = dyn_cast<ConstantInt>(InVal); + Result.push_back(std::make_pair(CI, PN->getIncomingBlock(i))); + } + } + return !Result.empty(); + } + + SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals, RHSVals; + + // Handle some boolean conditions. + if (I->getType()->getPrimitiveSizeInBits() == 1) { + // X | true -> true + // X & false -> false + if (I->getOpcode() == Instruction::Or || + I->getOpcode() == Instruction::And) { + ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals); + ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals); + + if (LHSVals.empty() && RHSVals.empty()) + return false; + + ConstantInt *InterestingVal; + if (I->getOpcode() == Instruction::Or) + InterestingVal = ConstantInt::getTrue(I->getContext()); + else + InterestingVal = ConstantInt::getFalse(I->getContext()); + + // Scan for the sentinel. + for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) + if (LHSVals[i].first == InterestingVal || LHSVals[i].first == 0) + Result.push_back(LHSVals[i]); + for (unsigned i = 0, e = RHSVals.size(); i != e; ++i) + if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0) + Result.push_back(RHSVals[i]); + return !Result.empty(); + } + + // Handle the NOT form of XOR. + if (I->getOpcode() == Instruction::Xor && + isa<ConstantInt>(I->getOperand(1)) && + cast<ConstantInt>(I->getOperand(1))->isOne()) { + ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result); + if (Result.empty()) + return false; + + // Invert the known values. + for (unsigned i = 0, e = Result.size(); i != e; ++i) + if (Result[i].first) + Result[i].first = + cast<ConstantInt>(ConstantExpr::getNot(Result[i].first)); + return true; + } + } + // Handle compare with phi operand, where the PHI is defined in this block. + if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) { + PHINode *PN = dyn_cast<PHINode>(Cmp->getOperand(0)); + if (PN && PN->getParent() == BB) { + // We can do this simplification if any comparisons fold to true or false. + // See if any do. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *PredBB = PN->getIncomingBlock(i); + Value *LHS = PN->getIncomingValue(i); + Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB); + + Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, TD); + if (Res == 0) { + if (!LVI || !isa<Constant>(RHS)) + continue; + + LazyValueInfo::Tristate + ResT = LVI->getPredicateOnEdge(Cmp->getPredicate(), LHS, + cast<Constant>(RHS), PredBB, BB); + if (ResT == LazyValueInfo::Unknown) + continue; + Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT); + } + + if (isa<UndefValue>(Res)) + Result.push_back(std::make_pair((ConstantInt*)0, PredBB)); + else if (ConstantInt *CI = dyn_cast<ConstantInt>(Res)) + Result.push_back(std::make_pair(CI, PredBB)); + } + + return !Result.empty(); + } + + + // If comparing a live-in value against a constant, see if we know the + // live-in value on any predecessors. + if (LVI && isa<Constant>(Cmp->getOperand(1)) && + Cmp->getType()->isInteger() && // Not vector compare. + (!isa<Instruction>(Cmp->getOperand(0)) || + cast<Instruction>(Cmp->getOperand(0))->getParent() != BB)) { + Constant *RHSCst = cast<Constant>(Cmp->getOperand(1)); + + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + // If the value is known by LazyValueInfo to be a constant in a + // predecessor, use that information to try to thread this block. + LazyValueInfo::Tristate + Res = LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0), + RHSCst, *PI, BB); + if (Res == LazyValueInfo::Unknown) + continue; + + Constant *ResC = ConstantInt::get(Cmp->getType(), Res); + Result.push_back(std::make_pair(cast<ConstantInt>(ResC), *PI)); + } + + return !Result.empty(); + } + } + return false; +} + + /// GetBestDestForBranchOnUndef - If we determine that the specified block ends /// in an undefined jump, decide which block is best to revector to. @@ -251,7 +449,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { // successor, merge the blocks. This encourages recursive jump threading // because now the condition in this block can be threaded through // predecessors of our predecessor block. - if (BasicBlock *SinglePred = BB->getSinglePredecessor()) + if (BasicBlock *SinglePred = BB->getSinglePredecessor()) { if (SinglePred->getTerminator()->getNumSuccessors() == 1 && SinglePred != BB) { // If SinglePred was a loop header, BB becomes one. @@ -267,10 +465,10 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { BB->moveBefore(&BB->getParent()->getEntryBlock()); return true; } - - // See if this block ends with a branch or switch. If so, see if the - // condition is a phi node. If so, and if an entry of the phi node is a - // constant, we can thread the block. + } + + // Look to see if the terminator is a branch of switch, if not we can't thread + // it. Value *Condition; if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { // Can't thread an unconditional jump. @@ -301,7 +499,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { TerminatorInst *BBTerm = BB->getTerminator(); for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) { if (i == BestSucc) continue; - BBTerm->getSuccessor(i)->removePredecessor(BB); + RemovePredecessorAndSimplify(BBTerm->getSuccessor(i), BB, TD); } DEBUG(errs() << " In block '" << BB->getName() @@ -318,7 +516,8 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { // br COND, BBX, BBY // BBX: // br COND, BBZ, BBW - if (!Condition->hasOneUse() && // Multiple uses. + if (!LVI && + !Condition->hasOneUse() && // Multiple uses. (CondInst == 0 || CondInst->getParent() != BB)) { // Non-local definition. pred_iterator PI = pred_begin(BB), E = pred_end(BB); if (isa<BranchInst>(BB->getTerminator())) { @@ -338,52 +537,40 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { } // All the rest of our checks depend on the condition being an instruction. - if (CondInst == 0) + if (CondInst == 0) { + // FIXME: Unify this with code below. + if (LVI && ProcessThreadableEdges(Condition, BB)) + return true; return false; + } + // See if this is a phi node in the current block. if (PHINode *PN = dyn_cast<PHINode>(CondInst)) if (PN->getParent() == BB) return ProcessJumpOnPHI(PN); - // If this is a conditional branch whose condition is and/or of a phi, try to - // simplify it. - if ((CondInst->getOpcode() == Instruction::And || - CondInst->getOpcode() == Instruction::Or) && - isa<BranchInst>(BB->getTerminator()) && - ProcessBranchOnLogical(CondInst, BB, - CondInst->getOpcode() == Instruction::And)) - return true; - if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) { - if (isa<PHINode>(CondCmp->getOperand(0))) { - // If we have "br (phi != 42)" and the phi node has any constant values - // as operands, we can thread through this block. - // - // If we have "br (cmp phi, x)" and the phi node contains x such that the - // comparison uniquely identifies the branch target, we can thread - // through this block. - - if (ProcessBranchOnCompare(CondCmp, BB)) - return true; - } - - // If we have a comparison, loop over the predecessors to see if there is - // a condition with the same value. - pred_iterator PI = pred_begin(BB), E = pred_end(BB); - for (; PI != E; ++PI) - if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) - if (PBI->isConditional() && *PI != BB) { - if (CmpInst *CI = dyn_cast<CmpInst>(PBI->getCondition())) { - if (CI->getOperand(0) == CondCmp->getOperand(0) && - CI->getOperand(1) == CondCmp->getOperand(1) && - CI->getPredicate() == CondCmp->getPredicate()) { - // TODO: Could handle things like (x != 4) --> (x == 17) - if (ProcessBranchOnDuplicateCond(*PI, BB)) - return true; + if (!LVI && + (!isa<PHINode>(CondCmp->getOperand(0)) || + cast<PHINode>(CondCmp->getOperand(0))->getParent() != BB)) { + // If we have a comparison, loop over the predecessors to see if there is + // a condition with a lexically identical value. + pred_iterator PI = pred_begin(BB), E = pred_end(BB); + for (; PI != E; ++PI) + if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) + if (PBI->isConditional() && *PI != BB) { + if (CmpInst *CI = dyn_cast<CmpInst>(PBI->getCondition())) { + if (CI->getOperand(0) == CondCmp->getOperand(0) && + CI->getOperand(1) == CondCmp->getOperand(1) && + CI->getPredicate() == CondCmp->getPredicate()) { + // TODO: Could handle things like (x != 4) --> (x == 17) + if (ProcessBranchOnDuplicateCond(*PI, BB)) + return true; + } } } - } + } } // Check for some cases that are worth simplifying. Right now we want to look @@ -398,10 +585,21 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { if (isa<Constant>(CondCmp->getOperand(1))) SimplifyValue = CondCmp->getOperand(0); + // TODO: There are other places where load PRE would be profitable, such as + // more complex comparisons. if (LoadInst *LI = dyn_cast<LoadInst>(SimplifyValue)) if (SimplifyPartiallyRedundantLoad(LI)) return true; + + // Handle a variety of cases where we are branching on something derived from + // a PHI node in the current block. If we can prove that any predecessors + // compute a predictable value based on a PHI node, thread those predecessors. + // + if (ProcessThreadableEdges(CondInst, BB)) + return true; + + // TODO: If we have: "br (X > 0)" and we have a predecessor where we know // "(X == 4)" thread through this block. @@ -459,8 +657,11 @@ bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB, // Next, figure out which successor we are threading to. BasicBlock *SuccBB = DestBI->getSuccessor(!BranchDir); + SmallVector<BasicBlock*, 2> Preds; + Preds.push_back(PredBB); + // Ok, try to thread it! - return ThreadEdge(BB, PredBB, SuccBB); + return ThreadEdge(BB, Preds, SuccBB); } /// ProcessSwitchOnDuplicateCond - We found a block and a predecessor of that @@ -553,7 +754,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { Value *LoadedPtr = LI->getOperand(0); // If the loaded operand is defined in the LoadBB, it can't be available. - // FIXME: Could do PHI translation, that would be fun :) + // TODO: Could do simple PHI translation, that would be fun :) if (Instruction *PtrOp = dyn_cast<Instruction>(LoadedPtr)) if (PtrOp->getParent() == LoadBB) return false; @@ -562,8 +763,8 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // the entry to its block. BasicBlock::iterator BBIt = LI; - if (Value *AvailableVal = FindAvailableLoadedValue(LoadedPtr, LoadBB, - BBIt, 6)) { + if (Value *AvailableVal = + FindAvailableLoadedValue(LoadedPtr, LoadBB, BBIt, 6)) { // If the value if the load is locally available within the block, just use // it. This frequently occurs for reg2mem'd allocas. //cerr << "LOAD ELIMINATED:\n" << *BBIt << *LI << "\n"; @@ -646,7 +847,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // Split them out to their own block. UnavailablePred = SplitBlockPredecessors(LoadBB, &PredsToSplit[0], PredsToSplit.size(), - "thread-split", this); + "thread-pre-split", this); } // If the value isn't available in all predecessors, then there will be @@ -655,7 +856,8 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { if (UnavailablePred) { assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 && "Can't handle critical edge here!"); - Value *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr", + Value *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr", false, + LI->getAlignment(), UnavailablePred->getTerminator()); AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal)); } @@ -690,55 +892,183 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { return true; } - -/// ProcessJumpOnPHI - We have a conditional branch or switch on a PHI node in -/// the current block. See if there are any simplifications we can do based on -/// inputs to the phi node. -/// -bool JumpThreading::ProcessJumpOnPHI(PHINode *PN) { - BasicBlock *BB = PN->getParent(); +/// FindMostPopularDest - The specified list contains multiple possible +/// threadable destinations. Pick the one that occurs the most frequently in +/// the list. +static BasicBlock * +FindMostPopularDest(BasicBlock *BB, + const SmallVectorImpl<std::pair<BasicBlock*, + BasicBlock*> > &PredToDestList) { + assert(!PredToDestList.empty()); + + // Determine popularity. If there are multiple possible destinations, we + // explicitly choose to ignore 'undef' destinations. We prefer to thread + // blocks with known and real destinations to threading undef. We'll handle + // them later if interesting. + DenseMap<BasicBlock*, unsigned> DestPopularity; + for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i) + if (PredToDestList[i].second) + DestPopularity[PredToDestList[i].second]++; + + // Find the most popular dest. + DenseMap<BasicBlock*, unsigned>::iterator DPI = DestPopularity.begin(); + BasicBlock *MostPopularDest = DPI->first; + unsigned Popularity = DPI->second; + SmallVector<BasicBlock*, 4> SamePopularity; + + for (++DPI; DPI != DestPopularity.end(); ++DPI) { + // If the popularity of this entry isn't higher than the popularity we've + // seen so far, ignore it. + if (DPI->second < Popularity) + ; // ignore. + else if (DPI->second == Popularity) { + // If it is the same as what we've seen so far, keep track of it. + SamePopularity.push_back(DPI->first); + } else { + // If it is more popular, remember it. + SamePopularity.clear(); + MostPopularDest = DPI->first; + Popularity = DPI->second; + } + } - // See if the phi node has any constant integer or undef values. If so, we - // can determine where the corresponding predecessor will branch. - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - Value *PredVal = PN->getIncomingValue(i); - - // Check to see if this input is a constant integer. If so, the direction - // of the branch is predictable. - if (ConstantInt *CI = dyn_cast<ConstantInt>(PredVal)) { - // Merge any common predecessors that will act the same. - BasicBlock *PredBB = FactorCommonPHIPreds(PN, CI); + // Okay, now we know the most popular destination. If there is more than + // destination, we need to determine one. This is arbitrary, but we need + // to make a deterministic decision. Pick the first one that appears in the + // successor list. + if (!SamePopularity.empty()) { + SamePopularity.push_back(MostPopularDest); + TerminatorInst *TI = BB->getTerminator(); + for (unsigned i = 0; ; ++i) { + assert(i != TI->getNumSuccessors() && "Didn't find any successor!"); - BasicBlock *SuccBB; - if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) - SuccBB = BI->getSuccessor(CI->isZero()); - else { - SwitchInst *SI = cast<SwitchInst>(BB->getTerminator()); - SuccBB = SI->getSuccessor(SI->findCaseValue(CI)); - } + if (std::find(SamePopularity.begin(), SamePopularity.end(), + TI->getSuccessor(i)) == SamePopularity.end()) + continue; - // Ok, try to thread it! - return ThreadEdge(BB, PredBB, SuccBB); + MostPopularDest = TI->getSuccessor(i); + break; } + } + + // Okay, we have finally picked the most popular destination. + return MostPopularDest; +} + +bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) { + // If threading this would thread across a loop header, don't even try to + // thread the edge. + if (LoopHeaders.count(BB)) + return false; + + SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> PredValues; + if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues)) + return false; + assert(!PredValues.empty() && + "ComputeValueKnownInPredecessors returned true with no values"); + + DEBUG(errs() << "IN BB: " << *BB; + for (unsigned i = 0, e = PredValues.size(); i != e; ++i) { + errs() << " BB '" << BB->getName() << "': FOUND condition = "; + if (PredValues[i].first) + errs() << *PredValues[i].first; + else + errs() << "UNDEF"; + errs() << " for pred '" << PredValues[i].second->getName() + << "'.\n"; + }); + + // Decide what we want to thread through. Convert our list of known values to + // a list of known destinations for each pred. This also discards duplicate + // predecessors and keeps track of the undefined inputs (which are represented + // as a null dest in the PredToDestList). + SmallPtrSet<BasicBlock*, 16> SeenPreds; + SmallVector<std::pair<BasicBlock*, BasicBlock*>, 16> PredToDestList; + + BasicBlock *OnlyDest = 0; + BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL; + + for (unsigned i = 0, e = PredValues.size(); i != e; ++i) { + BasicBlock *Pred = PredValues[i].second; + if (!SeenPreds.insert(Pred)) + continue; // Duplicate predecessor entry. - // If the input is an undef, then it doesn't matter which way it will go. - // Pick an arbitrary dest and thread the edge. - if (UndefValue *UV = dyn_cast<UndefValue>(PredVal)) { - // Merge any common predecessors that will act the same. - BasicBlock *PredBB = FactorCommonPHIPreds(PN, UV); - BasicBlock *SuccBB = - BB->getTerminator()->getSuccessor(GetBestDestForJumpOnUndef(BB)); - - // Ok, try to thread it! - return ThreadEdge(BB, PredBB, SuccBB); + // If the predecessor ends with an indirect goto, we can't change its + // destination. + if (isa<IndirectBrInst>(Pred->getTerminator())) + continue; + + ConstantInt *Val = PredValues[i].first; + + BasicBlock *DestBB; + if (Val == 0) // Undef. + DestBB = 0; + else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) + DestBB = BI->getSuccessor(Val->isZero()); + else { + SwitchInst *SI = cast<SwitchInst>(BB->getTerminator()); + DestBB = SI->getSuccessor(SI->findCaseValue(Val)); } + + // If we have exactly one destination, remember it for efficiency below. + if (i == 0) + OnlyDest = DestBB; + else if (OnlyDest != DestBB) + OnlyDest = MultipleDestSentinel; + + PredToDestList.push_back(std::make_pair(Pred, DestBB)); } - // If the incoming values are all variables, we don't know the destination of - // any predecessors. However, if any of the predecessor blocks end in an - // unconditional branch, we can *duplicate* the jump into that block in order - // to further encourage jump threading and to eliminate cases where we have - // branch on a phi of an icmp (branch on icmp is much better). + // If all edges were unthreadable, we fail. + if (PredToDestList.empty()) + return false; + + // Determine which is the most common successor. If we have many inputs and + // this block is a switch, we want to start by threading the batch that goes + // to the most popular destination first. If we only know about one + // threadable destination (the common case) we can avoid this. + BasicBlock *MostPopularDest = OnlyDest; + + if (MostPopularDest == MultipleDestSentinel) + MostPopularDest = FindMostPopularDest(BB, PredToDestList); + + // Now that we know what the most popular destination is, factor all + // predecessors that will jump to it into a single predecessor. + SmallVector<BasicBlock*, 16> PredsToFactor; + for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i) + if (PredToDestList[i].second == MostPopularDest) { + BasicBlock *Pred = PredToDestList[i].first; + + // This predecessor may be a switch or something else that has multiple + // edges to the block. Factor each of these edges by listing them + // according to # occurrences in PredsToFactor. + TerminatorInst *PredTI = Pred->getTerminator(); + for (unsigned i = 0, e = PredTI->getNumSuccessors(); i != e; ++i) + if (PredTI->getSuccessor(i) == BB) + PredsToFactor.push_back(Pred); + } + + // If the threadable edges are branching on an undefined value, we get to pick + // the destination that these predecessors should get to. + if (MostPopularDest == 0) + MostPopularDest = BB->getTerminator()-> + getSuccessor(GetBestDestForJumpOnUndef(BB)); + + // Ok, try to thread it! + return ThreadEdge(BB, PredsToFactor, MostPopularDest); +} + +/// ProcessJumpOnPHI - We have a conditional branch or switch on a PHI node in +/// the current block. See if there are any simplifications we can do based on +/// inputs to the phi node. +/// +bool JumpThreading::ProcessJumpOnPHI(PHINode *PN) { + BasicBlock *BB = PN->getParent(); + + // If any of the predecessor blocks end in an unconditional branch, we can + // *duplicate* the jump into that block in order to further encourage jump + // threading and to eliminate cases where we have branch on a phi of an icmp + // (branch on icmp is much better). // We don't want to do this tranformation for switches, because we don't // really want to duplicate a switch. @@ -759,137 +1089,6 @@ bool JumpThreading::ProcessJumpOnPHI(PHINode *PN) { } -/// ProcessJumpOnLogicalPHI - PN's basic block contains a conditional branch -/// whose condition is an AND/OR where one side is PN. If PN has constant -/// operands that permit us to evaluate the condition for some operand, thread -/// through the block. For example with: -/// br (and X, phi(Y, Z, false)) -/// the predecessor corresponding to the 'false' will always jump to the false -/// destination of the branch. -/// -bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB, - bool isAnd) { - // If this is a binary operator tree of the same AND/OR opcode, check the - // LHS/RHS. - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V)) - if ((isAnd && BO->getOpcode() == Instruction::And) || - (!isAnd && BO->getOpcode() == Instruction::Or)) { - if (ProcessBranchOnLogical(BO->getOperand(0), BB, isAnd)) - return true; - if (ProcessBranchOnLogical(BO->getOperand(1), BB, isAnd)) - return true; - } - - // If this isn't a PHI node, we can't handle it. - PHINode *PN = dyn_cast<PHINode>(V); - if (!PN || PN->getParent() != BB) return false; - - // We can only do the simplification for phi nodes of 'false' with AND or - // 'true' with OR. See if we have any entries in the phi for this. - unsigned PredNo = ~0U; - ConstantInt *PredCst = ConstantInt::get(Type::getInt1Ty(BB->getContext()), - !isAnd); - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - if (PN->getIncomingValue(i) == PredCst) { - PredNo = i; - break; - } - } - - // If no match, bail out. - if (PredNo == ~0U) - return false; - - // If so, we can actually do this threading. Merge any common predecessors - // that will act the same. - BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst); - - // Next, figure out which successor we are threading to. If this was an AND, - // the constant must be FALSE, and we must be targeting the 'false' block. - // If this is an OR, the constant must be TRUE, and we must be targeting the - // 'true' block. - BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(isAnd); - - // Ok, try to thread it! - return ThreadEdge(BB, PredBB, SuccBB); -} - -/// GetResultOfComparison - Given an icmp/fcmp predicate and the left and right -/// hand sides of the compare instruction, try to determine the result. If the -/// result can not be determined, a null pointer is returned. -static Constant *GetResultOfComparison(CmpInst::Predicate pred, - Value *LHS, Value *RHS, - LLVMContext &Context) { - if (Constant *CLHS = dyn_cast<Constant>(LHS)) - if (Constant *CRHS = dyn_cast<Constant>(RHS)) - return ConstantExpr::getCompare(pred, CLHS, CRHS); - - if (LHS == RHS) - if (isa<IntegerType>(LHS->getType()) || isa<PointerType>(LHS->getType())) - return ICmpInst::isTrueWhenEqual(pred) ? - ConstantInt::getTrue(Context) : ConstantInt::getFalse(Context); - - return 0; -} - -/// ProcessBranchOnCompare - We found a branch on a comparison between a phi -/// node and a value. If we can identify when the comparison is true between -/// the phi inputs and the value, we can fold the compare for that edge and -/// thread through it. -bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) { - PHINode *PN = cast<PHINode>(Cmp->getOperand(0)); - Value *RHS = Cmp->getOperand(1); - - // If the phi isn't in the current block, an incoming edge to this block - // doesn't control the destination. - if (PN->getParent() != BB) - return false; - - // We can do this simplification if any comparisons fold to true or false. - // See if any do. - Value *PredVal = 0; - bool TrueDirection = false; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - PredVal = PN->getIncomingValue(i); - - Constant *Res = GetResultOfComparison(Cmp->getPredicate(), PredVal, - RHS, Cmp->getContext()); - if (!Res) { - PredVal = 0; - continue; - } - - // If this folded to a constant expr, we can't do anything. - if (ConstantInt *ResC = dyn_cast<ConstantInt>(Res)) { - TrueDirection = ResC->getZExtValue(); - break; - } - // If this folded to undef, just go the false way. - if (isa<UndefValue>(Res)) { - TrueDirection = false; - break; - } - - // Otherwise, we can't fold this input. - PredVal = 0; - } - - // If no match, bail out. - if (PredVal == 0) - return false; - - // If so, we can actually do this threading. Merge any common predecessors - // that will act the same. - BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredVal); - - // Next, get our successor. - BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(!TrueDirection); - - // Ok, try to thread it! - return ThreadEdge(BB, PredBB, SuccBB); -} - - /// AddPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new /// predecessor to the PHIBB block. If it has PHI nodes, add entries for /// NewPred using the entries from OldPred (suitably mapped). @@ -914,10 +1113,11 @@ static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB, } } -/// ThreadEdge - We have decided that it is safe and profitable to thread an -/// edge from PredBB to SuccBB across BB. Transform the IR to reflect this -/// change. -bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, +/// ThreadEdge - We have decided that it is safe and profitable to factor the +/// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB +/// across BB. Transform the IR to reflect this change. +bool JumpThreading::ThreadEdge(BasicBlock *BB, + const SmallVectorImpl<BasicBlock*> &PredBBs, BasicBlock *SuccBB) { // If threading to the same block as we come from, we would infinite loop. if (SuccBB == BB) { @@ -929,8 +1129,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, // If threading this would thread across a loop header, don't thread the edge. // See the comments above FindLoopHeaders for justifications and caveats. if (LoopHeaders.count(BB)) { - DEBUG(errs() << " Not threading from '" << PredBB->getName() - << "' across loop header BB '" << BB->getName() + DEBUG(errs() << " Not threading across loop header BB '" << BB->getName() << "' to dest BB '" << SuccBB->getName() << "' - it might create an irreducible loop!\n"); return false; @@ -943,6 +1142,17 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, return false; } + // And finally, do it! Start by factoring the predecessors is needed. + BasicBlock *PredBB; + if (PredBBs.size() == 1) + PredBB = PredBBs[0]; + else { + DEBUG(errs() << " Factoring out " << PredBBs.size() + << " common predecessors.\n"); + PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(), + ".thr_comm", this); + } + // And finally, do it! DEBUG(errs() << " Threading edge from '" << PredBB->getName() << "' to '" << SuccBB->getName() << "' with cost: " << JumpThreadCost @@ -1034,7 +1244,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, TerminatorInst *PredTerm = PredBB->getTerminator(); for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) if (PredTerm->getSuccessor(i) == BB) { - BB->removePredecessor(PredBB); + RemovePredecessorAndSimplify(BB, PredBB, TD); PredTerm->setSuccessor(i, NewBB); } @@ -1044,9 +1254,12 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, BI = NewBB->begin(); for (BasicBlock::iterator E = NewBB->end(); BI != E; ) { Instruction *Inst = BI++; - if (Constant *C = ConstantFoldInstruction(Inst, BB->getContext(), TD)) { - Inst->replaceAllUsesWith(C); - Inst->eraseFromParent(); + + if (Value *V = SimplifyInstruction(Inst, TD)) { + WeakVH BIHandle(BI); + ReplaceAndSimplifyAllUses(Inst, V, TD); + if (BIHandle == 0) + BI = NewBB->begin(); continue; } @@ -1164,7 +1377,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, // PredBB no longer jumps to BB, remove entries in the PHI node for the edge // that we nuked. - BB->removePredecessor(PredBB); + RemovePredecessorAndSimplify(BB, PredBB, TD); // Remove the unconditional branch at the end of the PredBB block. OldPredBranch->eraseFromParent(); diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 756fbf3..104c873 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -263,7 +263,6 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { // Get the preheader block to move instructions into... Preheader = L->getLoopPreheader(); - assert(Preheader&&"Preheader insertion pass guarantees we have a preheader!"); // Loop over the body of this loop, looking for calls, invokes, and stores. // Because subloops have already been incorporated into AST, we skip blocks in @@ -286,12 +285,14 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { // us to sink instructions in one pass, without iteration. After sinking // instructions, we perform another pass to hoist them out of the loop. // - SinkRegion(DT->getNode(L->getHeader())); - HoistRegion(DT->getNode(L->getHeader())); + if (L->hasDedicatedExits()) + SinkRegion(DT->getNode(L->getHeader())); + if (Preheader) + HoistRegion(DT->getNode(L->getHeader())); // Now that all loop invariants have been removed from the loop, promote any // memory references to scalars that we can... - if (!DisablePromotion) + if (!DisablePromotion && Preheader && L->hasDedicatedExits()) PromoteValuesInLoop(); // Clear out loops state information for the next iteration diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp index 866d8b4..48817ab 100644 --- a/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -115,6 +115,10 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { if (!preheader) return false; + // If LoopSimplify form is not available, stay out of trouble. + if (!L->hasDedicatedExits()) + return false; + // We can't remove loops that contain subloops. If the subloops were dead, // they would already have been removed in earlier executions of this pass. if (L->begin() != L->end()) diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp index 920d85c..8b6a233 100644 --- a/lib/Transforms/Scalar/LoopIndexSplit.cpp +++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp @@ -209,6 +209,10 @@ bool LoopIndexSplit::runOnLoop(Loop *IncomingLoop, LPPassManager &LPM_Ref) { L = IncomingLoop; LPM = &LPM_Ref; + // If LoopSimplify form is not available, stay out of trouble. + if (!L->isLoopSimplifyForm()) + return false; + // FIXME - Nested loops make dominator info updates tricky. if (!L->getSubLoops().empty()) return false; diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 7a4bb35..5004483 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -15,7 +15,6 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Function.h" #include "llvm/IntrinsicInst.h" -#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ScalarEvolution.h" @@ -49,6 +48,7 @@ namespace { AU.addRequiredID(LCSSAID); AU.addPreservedID(LCSSAID); AU.addPreserved<ScalarEvolution>(); + AU.addRequired<LoopInfo>(); AU.addPreserved<LoopInfo>(); AU.addPreserved<DominatorTree>(); AU.addPreserved<DominanceFrontier>(); @@ -104,17 +104,18 @@ bool LoopRotate::runOnLoop(Loop *Lp, LPPassManager &LPM) { bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) { L = Lp; - OrigHeader = L->getHeader(); OrigPreHeader = L->getLoopPreheader(); + if (!OrigPreHeader) return false; + OrigLatch = L->getLoopLatch(); + if (!OrigLatch) return false; + + OrigHeader = L->getHeader(); // If the loop has only one block then there is not much to rotate. if (L->getBlocks().size() == 1) return false; - assert(OrigHeader && OrigLatch && OrigPreHeader && - "Loop is not in canonical form"); - // If the loop header is not one of the loop exiting blocks then // either this loop is already rotated or it is not // suitable for loop rotation transformations. @@ -287,7 +288,7 @@ void LoopRotate::preserveCanonicalLoopForm(LPPassManager &LPM) { "bb.nph", OrigHeader->getParent(), NewHeader); - LoopInfo &LI = LPM.getAnalysis<LoopInfo>(); + LoopInfo &LI = getAnalysis<LoopInfo>(); if (Loop *PL = LI.getLoopFor(OrigPreHeader)) PL->addBasicBlockToLoop(NewPreHeader, LI.getBase()); BranchInst::Create(NewHeader, NewPreHeader); diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index e20fb16..564c7ac 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -51,6 +51,7 @@ STATISTIC(NumEliminated, "Number of strides eliminated"); STATISTIC(NumShadow, "Number of Shadow IVs optimized"); STATISTIC(NumImmSunk, "Number of common expr immediates sunk into uses"); STATISTIC(NumLoopCond, "Number of loop terminating conds optimized"); +STATISTIC(NumCountZero, "Number of count iv optimized to count toward zero"); static cl::opt<bool> EnableFullLSRMode("enable-full-lsr", cl::init(false), @@ -107,7 +108,7 @@ namespace { public: static char ID; // Pass ID, replacement for typeid - explicit LoopStrengthReduce(const TargetLowering *tli = NULL) : + explicit LoopStrengthReduce(const TargetLowering *tli = NULL) : LoopPass(&ID), TLI(tli) { } @@ -131,12 +132,10 @@ namespace { } private: - ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond, - IVStrideUse* &CondUse, - const SCEV *const * &CondStride); - void OptimizeIndvars(Loop *L); - void OptimizeLoopCountIV(Loop *L); + + /// OptimizeLoopTermCond - Change loop terminating condition to use the + /// postinc iv when possible. void OptimizeLoopTermCond(Loop *L); /// OptimizeShadowIV - If IV is used in a int-to-float cast @@ -148,8 +147,28 @@ namespace { ICmpInst *OptimizeMax(Loop *L, ICmpInst *Cond, IVStrideUse* &CondUse); + /// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for + /// deciding when to exit the loop is used only for that purpose, try to + /// rearrange things so it counts down to a test against zero. + bool OptimizeLoopCountIV(Loop *L); + bool OptimizeLoopCountIVOfStride(const SCEV* &Stride, + IVStrideUse* &CondUse, Loop *L); + + /// StrengthReduceIVUsersOfStride - Strength reduce all of the users of a + /// single stride of IV. All of the users may have different starting + /// values, and this may not be the only stride. + void StrengthReduceIVUsersOfStride(const SCEV *const &Stride, + IVUsersOfOneStride &Uses, + Loop *L); + void StrengthReduceIVUsers(Loop *L); + + ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond, + IVStrideUse* &CondUse, + const SCEV* &CondStride, + bool PostPass = false); + bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse, - const SCEV *const * &CondStride); + const SCEV* &CondStride); bool RequiresTypeConversion(const Type *Ty, const Type *NewTy); const SCEV *CheckForIVReuse(bool, bool, bool, const SCEV *const&, IVExpr&, const Type*, @@ -164,6 +183,7 @@ namespace { bool &AllUsesAreAddresses, bool &AllUsesAreOutsideLoop, std::vector<BasedUser> &UsersToProcess); + bool StrideMightBeShared(const SCEV *Stride, Loop *L, bool CheckPreInc); bool ShouldUseFullStrengthReductionMode( const std::vector<BasedUser> &UsersToProcess, const Loop *L, @@ -188,9 +208,7 @@ namespace { Instruction *IVIncInsertPt, const Loop *L, SCEVExpander &PreheaderRewriter); - void StrengthReduceStridedIVUsers(const SCEV *const &Stride, - IVUsersOfOneStride &Uses, - Loop *L); + void DeleteTriviallyDeadInstructions(); }; } @@ -208,11 +226,11 @@ Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) { /// their operands subsequently dead. void LoopStrengthReduce::DeleteTriviallyDeadInstructions() { if (DeadInsts.empty()) return; - + while (!DeadInsts.empty()) { Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.back()); DeadInsts.pop_back(); - + if (I == 0 || !isInstructionTriviallyDead(I)) continue; @@ -223,14 +241,14 @@ void LoopStrengthReduce::DeleteTriviallyDeadInstructions() { DeadInsts.push_back(U); } } - + I->eraseFromParent(); Changed = true; } } -/// containsAddRecFromDifferentLoop - Determine whether expression S involves a -/// subexpression that is an AddRec from a loop other than L. An outer loop +/// containsAddRecFromDifferentLoop - Determine whether expression S involves a +/// subexpression that is an AddRec from a loop other than L. An outer loop /// of L is OK, but not an inner loop nor a disjoint loop. static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) { // This is very common, put it first. @@ -256,7 +274,7 @@ static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) { return containsAddRecFromDifferentLoop(DE->getLHS(), L) || containsAddRecFromDifferentLoop(DE->getRHS(), L); #if 0 - // SCEVSDivExpr has been backed out temporarily, but will be back; we'll + // SCEVSDivExpr has been backed out temporarily, but will be back; we'll // need this when it is. if (const SCEVSDivExpr *DE = dyn_cast<SCEVSDivExpr>(S)) return containsAddRecFromDifferentLoop(DE->getLHS(), L) || @@ -328,7 +346,7 @@ namespace { /// field to the Imm field (below). BasedUser values are sorted by this /// field. const SCEV *Base; - + /// Inst - The instruction using the induction variable. Instruction *Inst; @@ -352,11 +370,11 @@ namespace { // instruction for a loop and uses outside the loop that are dominated by // the loop. bool isUseOfPostIncrementedValue; - + BasedUser(IVStrideUse &IVSU, ScalarEvolution *se) : SE(se), Base(IVSU.getOffset()), Inst(IVSU.getUser()), OperandValToReplace(IVSU.getOperandValToReplace()), - Imm(SE->getIntegerSCEV(0, Base->getType())), + Imm(SE->getIntegerSCEV(0, Base->getType())), isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {} // Once we rewrite the code to insert the new IVs we want, update the @@ -367,8 +385,8 @@ namespace { SCEVExpander &Rewriter, Loop *L, Pass *P, LoopInfo &LI, SmallVectorImpl<WeakVH> &DeadInsts); - - Value *InsertCodeForBaseAtPosition(const SCEV *const &NewBase, + + Value *InsertCodeForBaseAtPosition(const SCEV *const &NewBase, const Type *Ty, SCEVExpander &Rewriter, Instruction *IP, Loop *L, @@ -383,7 +401,7 @@ void BasedUser::dump() const { errs() << " Inst: " << *Inst; } -Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase, +Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase, const Type *Ty, SCEVExpander &Rewriter, Instruction *IP, Loop *L, @@ -393,10 +411,10 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase, // want to insert this expression before the user, we'd rather pull it out as // many loops as possible. Instruction *BaseInsertPt = IP; - + // Figure out the most-nested loop that IP is in. Loop *InsertLoop = LI.getLoopFor(IP->getParent()); - + // If InsertLoop is not L, and InsertLoop is nested inside of L, figure out // the preheader of the outer-most loop where NewBase is not loop invariant. if (L->contains(IP->getParent())) @@ -404,7 +422,7 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase, BaseInsertPt = InsertLoop->getLoopPreheader()->getTerminator(); InsertLoop = InsertLoop->getParentLoop(); } - + Value *Base = Rewriter.expandCodeFor(NewBase, 0, BaseInsertPt); const SCEV *NewValSCEV = SE->getUnknown(Base); @@ -430,7 +448,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase, if (!isa<PHINode>(Inst)) { // By default, insert code at the user instruction. BasicBlock::iterator InsertPt = Inst; - + // However, if the Operand is itself an instruction, the (potentially // complex) inserted code may be shared by many users. Because of this, we // want to emit code for the computation of the operand right before its old @@ -442,7 +460,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase, // // If this is a use outside the loop (which means after, since it is based // on a loop indvar) we use the post-incremented value, so that we don't - // artificially make the preinc value live out the bottom of the loop. + // artificially make the preinc value live out the bottom of the loop. if (!isUseOfPostIncrementedValue && L->contains(Inst->getParent())) { if (NewBasePt && isa<PHINode>(OperandValToReplace)) { InsertPt = NewBasePt; @@ -477,7 +495,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase, if (PN->getIncomingValue(i) == OperandValToReplace) { // If the original expression is outside the loop, put the replacement // code in the same place as the original expression, - // which need not be an immediate predecessor of this PHI. This way we + // which need not be an immediate predecessor of this PHI. This way we // need only one copy of it even if it is referenced multiple times in // the PHI. We don't do this when the original expression is inside the // loop because multiple copies sometimes do useful sinking of code in @@ -490,6 +508,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase, // is the canonical backedge for this loop, as this can make some // inserted code be in an illegal position. if (e != 1 && PHIPred->getTerminator()->getNumSuccessors() > 1 && + !isa<IndirectBrInst>(PHIPred->getTerminator()) && (PN->getParent() != L->getHeader() || !L->contains(PHIPred))) { // First step, split the critical edge. @@ -572,11 +591,11 @@ static bool fitsInAddressMode(const SCEV *const &V, const Type *AccessTy, static void MoveLoopVariantsToImmediateField(const SCEV *&Val, const SCEV *&Imm, Loop *L, ScalarEvolution *SE) { if (Val->isLoopInvariant(L)) return; // Nothing to do. - + if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) { SmallVector<const SCEV *, 4> NewOps; NewOps.reserve(SAE->getNumOperands()); - + for (unsigned i = 0; i != SAE->getNumOperands(); ++i) if (!SAE->getOperand(i)->isLoopInvariant(L)) { // If this is a loop-variant expression, it must stay in the immediate @@ -594,7 +613,7 @@ static void MoveLoopVariantsToImmediateField(const SCEV *&Val, const SCEV *&Imm, // Try to pull immediates out of the start value of nested addrec's. const SCEV *Start = SARE->getStart(); MoveLoopVariantsToImmediateField(Start, Imm, L, SE); - + SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end()); Ops[0] = Start; Val = SE->getAddRecExpr(Ops, SARE->getLoop()); @@ -617,11 +636,11 @@ static void MoveImmediateValues(const TargetLowering *TLI, if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) { SmallVector<const SCEV *, 4> NewOps; NewOps.reserve(SAE->getNumOperands()); - + for (unsigned i = 0; i != SAE->getNumOperands(); ++i) { const SCEV *NewOp = SAE->getOperand(i); MoveImmediateValues(TLI, AccessTy, NewOp, Imm, isAddress, L, SE); - + if (!NewOp->isLoopInvariant(L)) { // If this is a loop-variant expression, it must stay in the immediate // field of the expression. @@ -640,7 +659,7 @@ static void MoveImmediateValues(const TargetLowering *TLI, // Try to pull immediates out of the start value of nested addrec's. const SCEV *Start = SARE->getStart(); MoveImmediateValues(TLI, AccessTy, Start, Imm, isAddress, L, SE); - + if (Start != SARE->getStart()) { SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end()); Ops[0] = Start; @@ -656,8 +675,8 @@ static void MoveImmediateValues(const TargetLowering *TLI, const SCEV *SubImm = SE->getIntegerSCEV(0, Val->getType()); const SCEV *NewOp = SME->getOperand(1); MoveImmediateValues(TLI, AccessTy, NewOp, SubImm, isAddress, L, SE); - - // If we extracted something out of the subexpressions, see if we can + + // If we extracted something out of the subexpressions, see if we can // simplify this! if (NewOp != SME->getOperand(1)) { // Scale SubImm up by "8". If the result is a target constant, we are @@ -666,7 +685,7 @@ static void MoveImmediateValues(const TargetLowering *TLI, if (fitsInAddressMode(SubImm, AccessTy, TLI, false)) { // Accumulate the immediate. Imm = SE->getAddExpr(Imm, SubImm); - + // Update what is left of 'Val'. Val = SE->getMulExpr(SME->getOperand(0), NewOp); return; @@ -714,7 +733,7 @@ static void SeparateSubExprs(SmallVector<const SCEV *, 16> &SubExprs, SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end()); Ops[0] = Zero; // Start with zero base. SubExprs.push_back(SE->getAddRecExpr(Ops, SARE->getLoop())); - + SeparateSubExprs(SubExprs, SARE->getOperand(0), SE); } @@ -724,7 +743,7 @@ static void SeparateSubExprs(SmallVector<const SCEV *, 16> &SubExprs, } } -// This is logically local to the following function, but C++ says we have +// This is logically local to the following function, but C++ says we have // to make it file scope. struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; }; @@ -762,7 +781,7 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses, // an addressing mode "for free"; such expressions are left within the loop. // struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; }; std::map<const SCEV *, SubExprUseData> SubExpressionUseData; - + // UniqueSubExprs - Keep track of all of the subexpressions we see in the // order we see them. SmallVector<const SCEV *, 16> UniqueSubExprs; @@ -779,7 +798,7 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses, if (!L->contains(Uses[i].Inst->getParent())) continue; NumUsesInsideLoop++; - + // If the base is zero (which is common), return zero now, there are no // CSEs we can find. if (Uses[i].Base == Zero) return Zero; @@ -811,13 +830,13 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses, // Now that we know how many times each is used, build Result. Iterate over // UniqueSubexprs so that we have a stable ordering. for (unsigned i = 0, e = UniqueSubExprs.size(); i != e; ++i) { - std::map<const SCEV *, SubExprUseData>::iterator I = + std::map<const SCEV *, SubExprUseData>::iterator I = SubExpressionUseData.find(UniqueSubExprs[i]); assert(I != SubExpressionUseData.end() && "Entry not found?"); - if (I->second.Count == NumUsesInsideLoop) { // Found CSE! + if (I->second.Count == NumUsesInsideLoop) { // Found CSE! if (I->second.notAllUsesAreFree) Result = SE->getAddExpr(Result, I->first); - else + else FreeResult = SE->getAddExpr(FreeResult, I->first); } else // Remove non-cse's from SubExpressionUseData. @@ -849,13 +868,13 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses, // If we found no CSE's, return now. if (Result == Zero) return Result; - + // If we still have a FreeResult, remove its subexpressions from // SubExpressionUseData. This means they will remain in the use Bases. if (FreeResult != Zero) { SeparateSubExprs(SubExprs, FreeResult, SE); for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) { - std::map<const SCEV *, SubExprUseData>::iterator I = + std::map<const SCEV *, SubExprUseData>::iterator I = SubExpressionUseData.find(SubExprs[j]); SubExpressionUseData.erase(I); } @@ -882,7 +901,7 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses, SubExprs.erase(SubExprs.begin()+j); --j; --e; } - + // Finally, add the non-shared expressions together. if (SubExprs.empty()) Uses[i].Base = Zero; @@ -890,11 +909,11 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses, Uses[i].Base = SE->getAddExpr(SubExprs); SubExprs.clear(); } - + return Result; } -/// ValidScale - Check whether the given Scale is valid for all loads and +/// ValidScale - Check whether the given Scale is valid for all loads and /// stores in UsersToProcess. /// bool LoopStrengthReduce::ValidScale(bool HasBaseReg, int64_t Scale, @@ -911,7 +930,7 @@ bool LoopStrengthReduce::ValidScale(bool HasBaseReg, int64_t Scale, AccessTy = getAccessType(UsersToProcess[i].Inst); else if (isa<PHINode>(UsersToProcess[i].Inst)) continue; - + TargetLowering::AddrMode AM; if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(UsersToProcess[i].Imm)) AM.BaseOffs = SC->getValue()->getSExtValue(); @@ -983,13 +1002,13 @@ bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty1, /// reuse is possible. Factors can be negative on same targets, e.g. ARM. /// /// If all uses are outside the loop, we don't require that all multiplies -/// be folded into the addressing mode, nor even that the factor be constant; -/// a multiply (executed once) outside the loop is better than another IV +/// be folded into the addressing mode, nor even that the factor be constant; +/// a multiply (executed once) outside the loop is better than another IV /// within. Well, usually. const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, bool AllUsesAreAddresses, bool AllUsesAreOutsideLoop, - const SCEV *const &Stride, + const SCEV *const &Stride, IVExpr &IV, const Type *Ty, const std::vector<BasedUser>& UsersToProcess) { if (StrideNoReuse.count(Stride)) @@ -999,11 +1018,16 @@ const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, int64_t SInt = SC->getValue()->getSExtValue(); for (unsigned NewStride = 0, e = IU->StrideOrder.size(); NewStride != e; ++NewStride) { - std::map<const SCEV *, IVsOfOneStride>::iterator SI = + std::map<const SCEV *, IVsOfOneStride>::iterator SI = IVsByStride.find(IU->StrideOrder[NewStride]); if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first) || StrideNoReuse.count(SI->first)) continue; + // The other stride has no uses, don't reuse it. + std::map<const SCEV *, IVUsersOfOneStride *>::iterator UI = + IU->IVUsesByStride.find(IU->StrideOrder[NewStride]); + if (UI->second->Users.empty()) + continue; int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue(); if (SI->first != Stride && (unsigned(abs64(SInt)) < SSInt || (SInt % SSInt) != 0)) @@ -1052,7 +1076,7 @@ const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, // an existing IV if we can. for (unsigned NewStride = 0, e = IU->StrideOrder.size(); NewStride != e; ++NewStride) { - std::map<const SCEV *, IVsOfOneStride>::iterator SI = + std::map<const SCEV *, IVsOfOneStride>::iterator SI = IVsByStride.find(IU->StrideOrder[NewStride]); if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first)) continue; @@ -1072,9 +1096,9 @@ const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, // -1*old. for (unsigned NewStride = 0, e = IU->StrideOrder.size(); NewStride != e; ++NewStride) { - std::map<const SCEV *, IVsOfOneStride>::iterator SI = + std::map<const SCEV *, IVsOfOneStride>::iterator SI = IVsByStride.find(IU->StrideOrder[NewStride]); - if (SI == IVsByStride.end()) + if (SI == IVsByStride.end()) continue; if (const SCEVMulExpr *ME = dyn_cast<SCEVMulExpr>(SI->first)) if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(ME->getOperand(0))) @@ -1104,18 +1128,18 @@ static bool PartitionByIsUseOfPostIncrementedValue(const BasedUser &Val) { static bool isNonConstantNegative(const SCEV *const &Expr) { const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Expr); if (!Mul) return false; - + // If there is a constant factor, it will be first. const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0)); if (!SC) return false; - + // Return true if the value is negative, this matches things like (-42 * V). return SC->getValue()->getValue().isNegative(); } /// CollectIVUsers - Transform our list of users and offsets to a bit more -/// complex table. In this new vector, each 'BasedUser' contains 'Base', the base -/// of the strided accesses, as well as the old information from Uses. We +/// complex table. In this new vector, each 'BasedUser' contains 'Base', the +/// base of the strided accesses, as well as the old information from Uses. We /// progressively move information from the Base field to the Imm field, until /// we eventually have the full access expression to rewrite the use. const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride, @@ -1145,7 +1169,7 @@ const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride, // We now have a whole bunch of uses of like-strided induction variables, but // they might all have different bases. We want to emit one PHI node for this // stride which we fold as many common expressions (between the IVs) into as - // possible. Start by identifying the common expressions in the base values + // possible. Start by identifying the common expressions in the base values // for the strides (e.g. if we have "A+C+B" and "A+B+D" as our bases, find // "A+B"), emit it to the preheader, then remove the expression from the // UsersToProcess base values. @@ -1165,11 +1189,11 @@ const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride, if (!L->contains(UsersToProcess[i].Inst->getParent())) { UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm, UsersToProcess[i].Base); - UsersToProcess[i].Base = + UsersToProcess[i].Base = SE->getIntegerSCEV(0, UsersToProcess[i].Base->getType()); } else { // Not all uses are outside the loop. - AllUsesAreOutsideLoop = false; + AllUsesAreOutsideLoop = false; // Addressing modes can be folded into loads and stores. Be careful that // the store is through the expression, not of the expression though. @@ -1183,11 +1207,11 @@ const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride, if (isAddress) HasAddress = true; - + // If this use isn't an address, then not all uses are addresses. if (!isAddress && !isPHI) AllUsesAreAddresses = false; - + MoveImmediateValues(TLI, UsersToProcess[i].Inst, UsersToProcess[i].Base, UsersToProcess[i].Imm, isAddress, L, SE); } @@ -1198,7 +1222,7 @@ const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride, // for one fewer iv. if (NumPHI > 1) AllUsesAreAddresses = false; - + // There are no in-loop address uses. if (AllUsesAreAddresses && (!HasAddress && !AllUsesAreOutsideLoop)) AllUsesAreAddresses = false; @@ -1491,12 +1515,13 @@ static bool IsImmFoldedIntoAddrMode(GlobalValue *GV, int64_t Offset, return true; } -/// StrengthReduceStridedIVUsers - Strength reduce all of the users of a single +/// StrengthReduceIVUsersOfStride - Strength reduce all of the users of a single /// stride of IV. All of the users may have different starting values, and this /// may not be the only stride. -void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride, - IVUsersOfOneStride &Uses, - Loop *L) { +void +LoopStrengthReduce::StrengthReduceIVUsersOfStride(const SCEV *const &Stride, + IVUsersOfOneStride &Uses, + Loop *L) { // If all the users are moved to another stride, then there is nothing to do. if (Uses.Users.empty()) return; @@ -1518,8 +1543,8 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride, // have the full access expression to rewrite the use. std::vector<BasedUser> UsersToProcess; const SCEV *CommonExprs = CollectIVUsers(Stride, Uses, L, AllUsesAreAddresses, - AllUsesAreOutsideLoop, - UsersToProcess); + AllUsesAreOutsideLoop, + UsersToProcess); // Sort the UsersToProcess array so that users with common bases are // next to each other. @@ -1588,12 +1613,12 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride, const SCEV *RewriteFactor = SE->getIntegerSCEV(0, ReplacedTy); IVExpr ReuseIV(SE->getIntegerSCEV(0, Type::getInt32Ty(Preheader->getContext())), - SE->getIntegerSCEV(0, + SE->getIntegerSCEV(0, Type::getInt32Ty(Preheader->getContext())), 0); - /// Choose a strength-reduction strategy and prepare for it by creating - /// the necessary PHIs and adjusting the bookkeeping. + // Choose a strength-reduction strategy and prepare for it by creating + // the necessary PHIs and adjusting the bookkeeping. if (ShouldUseFullStrengthReductionMode(UsersToProcess, L, AllUsesAreAddresses, Stride)) { PrepareToStrengthReduceFully(UsersToProcess, Stride, CommonExprs, L, @@ -1606,7 +1631,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride, // If all uses are addresses, check if it is possible to reuse an IV. The // new IV must have a stride that is a multiple of the old stride; the // multiple must be a number that can be encoded in the scale field of the - // target addressing mode; and we must have a valid instruction after this + // target addressing mode; and we must have a valid instruction after this // substitution, including the immediate field, if any. RewriteFactor = CheckForIVReuse(HaveCommonExprs, AllUsesAreAddresses, AllUsesAreOutsideLoop, @@ -1649,7 +1674,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride, // We want this constant emitted into the preheader! This is just // using cast as a copy so BitCast (no-op cast) is appropriate BaseV = new BitCastInst(BaseV, BaseV->getType(), "preheaderinsert", - PreInsertPt); + PreInsertPt); } } @@ -1723,7 +1748,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride, assert(SE->getTypeSizeInBits(RewriteExpr->getType()) < SE->getTypeSizeInBits(ReuseIV.Base->getType()) && "Unexpected lengthening conversion!"); - typedBase = SE->getTruncateExpr(ReuseIV.Base, + typedBase = SE->getTruncateExpr(ReuseIV.Base, RewriteExpr->getType()); } RewriteExpr = SE->getMinusSCEV(RewriteExpr, typedBase); @@ -1775,11 +1800,29 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride, // different starting values, into different PHIs. } +void LoopStrengthReduce::StrengthReduceIVUsers(Loop *L) { + // Note: this processes each stride/type pair individually. All users + // passed into StrengthReduceIVUsersOfStride have the same type AND stride. + // Also, note that we iterate over IVUsesByStride indirectly by using + // StrideOrder. This extra layer of indirection makes the ordering of + // strides deterministic - not dependent on map order. + for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e; ++Stride) { + std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = + IU->IVUsesByStride.find(IU->StrideOrder[Stride]); + assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); + // FIXME: Generalize to non-affine IV's. + if (!SI->first->isLoopInvariant(L)) + continue; + StrengthReduceIVUsersOfStride(SI->first, *SI->second, L); + } +} + /// FindIVUserForCond - If Cond has an operand that is an expression of an IV, /// set the IV user and stride information and return true, otherwise return /// false. -bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse, - const SCEV *const * &CondStride) { +bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond, + IVStrideUse *&CondUse, + const SCEV* &CondStride) { for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e && !CondUse; ++Stride) { std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = @@ -1793,12 +1836,12 @@ bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse // InstCombine does it as well for simple uses, it's not clear that it // occurs enough in real life to handle. CondUse = UI; - CondStride = &SI->first; + CondStride = SI->first; return true; } } return false; -} +} namespace { // Constant strides come first which in turns are sorted by their absolute @@ -1851,8 +1894,9 @@ namespace { /// v1 = v1 + 3 /// if (v1 < 30) goto loop ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, - IVStrideUse* &CondUse, - const SCEV *const* &CondStride) { + IVStrideUse* &CondUse, + const SCEV* &CondStride, + bool PostPass) { // If there's only one stride in the loop, there's nothing to do here. if (IU->StrideOrder.size() < 2) return Cond; @@ -1860,23 +1904,31 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, // trying to change the condition because the stride will still // remain. std::map<const SCEV *, IVUsersOfOneStride *>::iterator I = - IU->IVUsesByStride.find(*CondStride); - if (I == IU->IVUsesByStride.end() || - I->second->Users.size() != 1) + IU->IVUsesByStride.find(CondStride); + if (I == IU->IVUsesByStride.end()) return Cond; + if (I->second->Users.size() > 1) { + for (ilist<IVStrideUse>::iterator II = I->second->Users.begin(), + EE = I->second->Users.end(); II != EE; ++II) { + if (II->getUser() == Cond) + continue; + if (!isInstructionTriviallyDead(II->getUser())) + return Cond; + } + } // Only handle constant strides for now. - const SCEVConstant *SC = dyn_cast<SCEVConstant>(*CondStride); + const SCEVConstant *SC = dyn_cast<SCEVConstant>(CondStride); if (!SC) return Cond; ICmpInst::Predicate Predicate = Cond->getPredicate(); int64_t CmpSSInt = SC->getValue()->getSExtValue(); - unsigned BitWidth = SE->getTypeSizeInBits((*CondStride)->getType()); + unsigned BitWidth = SE->getTypeSizeInBits(CondStride->getType()); uint64_t SignBit = 1ULL << (BitWidth-1); const Type *CmpTy = Cond->getOperand(0)->getType(); const Type *NewCmpTy = NULL; unsigned TyBits = SE->getTypeSizeInBits(CmpTy); unsigned NewTyBits = 0; - const SCEV **NewStride = NULL; + const SCEV *NewStride = NULL; Value *NewCmpLHS = NULL; Value *NewCmpRHS = NULL; int64_t Scale = 1; @@ -1885,16 +1937,31 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, if (ConstantInt *C = dyn_cast<ConstantInt>(Cond->getOperand(1))) { int64_t CmpVal = C->getValue().getSExtValue(); + // Check the relevant induction variable for conformance to + // the pattern. + const SCEV *IV = SE->getSCEV(Cond->getOperand(0)); + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV); + if (!AR || !AR->isAffine()) + return Cond; + + const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart()); // Check stride constant and the comparision constant signs to detect // overflow. - if ((CmpVal & SignBit) != (CmpSSInt & SignBit)) - return Cond; + if (StartC) { + if ((StartC->getValue()->getSExtValue() < CmpVal && CmpSSInt < 0) || + (StartC->getValue()->getSExtValue() > CmpVal && CmpSSInt > 0)) + return Cond; + } else { + // More restrictive check for the other cases. + if ((CmpVal & SignBit) != (CmpSSInt & SignBit)) + return Cond; + } // Look for a suitable stride / iv as replacement. for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = IU->IVUsesByStride.find(IU->StrideOrder[i]); - if (!isa<SCEVConstant>(SI->first)) + if (!isa<SCEVConstant>(SI->first) || SI->second->Users.empty()) continue; int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue(); if (SSInt == CmpSSInt || @@ -1904,6 +1971,14 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, Scale = SSInt / CmpSSInt; int64_t NewCmpVal = CmpVal * Scale; + + // If old icmp value fits in icmp immediate field, but the new one doesn't + // try something else. + if (TLI && + TLI->isLegalICmpImmediate(CmpVal) && + !TLI->isLegalICmpImmediate(NewCmpVal)) + continue; + APInt Mul = APInt(BitWidth*2, CmpVal, true); Mul = Mul * APInt(BitWidth*2, Scale, true); // Check for overflow. @@ -1918,8 +1993,6 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, (CmpVal & SignBit) != (NewCmpVal & SignBit)) continue; - if (NewCmpVal == CmpVal) - continue; // Pick the best iv to use trying to avoid a cast. NewCmpLHS = NULL; for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(), @@ -1969,19 +2042,21 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, if (NewTyBits != TyBits && !isa<SCEVConstant>(CondUse->getOffset())) continue; - bool AllUsesAreAddresses = true; - bool AllUsesAreOutsideLoop = true; - std::vector<BasedUser> UsersToProcess; - const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L, - AllUsesAreAddresses, - AllUsesAreOutsideLoop, - UsersToProcess); - // Avoid rewriting the compare instruction with an iv of new stride - // if it's likely the new stride uses will be rewritten using the - // stride of the compare instruction. - if (AllUsesAreAddresses && - ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess)) - continue; + if (!PostPass) { + bool AllUsesAreAddresses = true; + bool AllUsesAreOutsideLoop = true; + std::vector<BasedUser> UsersToProcess; + const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L, + AllUsesAreAddresses, + AllUsesAreOutsideLoop, + UsersToProcess); + // Avoid rewriting the compare instruction with an iv of new stride + // if it's likely the new stride uses will be rewritten using the + // stride of the compare instruction. + if (AllUsesAreAddresses && + ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess)) + continue; + } // Avoid rewriting the compare instruction with an iv which has // implicit extension or truncation built into it. @@ -1994,7 +2069,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, if (Scale < 0 && !Cond->isEquality()) Predicate = ICmpInst::getSwappedPredicate(Predicate); - NewStride = &IU->StrideOrder[i]; + NewStride = IU->StrideOrder[i]; if (!isa<PointerType>(NewCmpTy)) NewCmpRHS = ConstantInt::get(NewCmpTy, NewCmpVal); else { @@ -2031,13 +2106,16 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, Cond = new ICmpInst(OldCond, Predicate, NewCmpLHS, NewCmpRHS, L->getHeader()->getName() + ".termcond"); + DEBUG(errs() << " Change compare stride in Inst " << *OldCond); + DEBUG(errs() << " to " << *Cond << '\n'); + // Remove the old compare instruction. The old indvar is probably dead too. DeadInsts.push_back(CondUse->getOperandValToReplace()); OldCond->replaceAllUsesWith(Cond); OldCond->eraseFromParent(); - IU->IVUsesByStride[*NewStride]->addUser(NewOffset, Cond, NewCmpLHS); - CondUse = &IU->IVUsesByStride[*NewStride]->Users.back(); + IU->IVUsesByStride[NewStride]->addUser(NewOffset, Cond, NewCmpLHS); + CondUse = &IU->IVUsesByStride[NewStride]->Users.back(); CondStride = NewStride; ++NumEliminated; Changed = true; @@ -2180,7 +2258,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) return; - + for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e; ++Stride) { std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = @@ -2199,13 +2277,13 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { /* If shadow use is a int->float cast then insert a second IV to eliminate this cast. - for (unsigned i = 0; i < n; ++i) + for (unsigned i = 0; i < n; ++i) foo((double)i); is transformed into double d = 0.0; - for (unsigned i = 0; i < n; ++i, ++d) + for (unsigned i = 0; i < n; ++i, ++d) foo(d); */ if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) @@ -2227,7 +2305,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { const Type *SrcTy = PH->getType(); int Mantissa = DestTy->getFPMantissaWidth(); - if (Mantissa == -1) continue; + if (Mantissa == -1) continue; if ((int)SE->getTypeSizeInBits(SrcTy) > Mantissa) continue; @@ -2239,12 +2317,12 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { Entry = 1; Latch = 0; } - + ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry)); if (!Init) continue; Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue()); - BinaryOperator *Incr = + BinaryOperator *Incr = dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch)); if (!Incr) continue; if (Incr->getOpcode() != Instruction::Add @@ -2271,7 +2349,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { /* create new increment. '++d' in above example. */ Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue()); - BinaryOperator *NewIncr = + BinaryOperator *NewIncr = BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ? Instruction::FAdd : Instruction::FSub, NewPH, CFP, "IV.S.next.", Incr); @@ -2297,237 +2375,385 @@ void LoopStrengthReduce::OptimizeIndvars(Loop *L) { OptimizeShadowIV(L); } -/// OptimizeLoopTermCond - Change loop terminating condition to use the +bool LoopStrengthReduce::StrideMightBeShared(const SCEV* Stride, Loop *L, + bool CheckPreInc) { + int64_t SInt = cast<SCEVConstant>(Stride)->getValue()->getSExtValue(); + for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { + std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = + IU->IVUsesByStride.find(IU->StrideOrder[i]); + const SCEV *Share = SI->first; + if (!isa<SCEVConstant>(SI->first) || Share == Stride) + continue; + int64_t SSInt = cast<SCEVConstant>(Share)->getValue()->getSExtValue(); + if (SSInt == SInt) + return true; // This can definitely be reused. + if (unsigned(abs64(SSInt)) < SInt || (SSInt % SInt) != 0) + continue; + int64_t Scale = SSInt / SInt; + bool AllUsesAreAddresses = true; + bool AllUsesAreOutsideLoop = true; + std::vector<BasedUser> UsersToProcess; + const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L, + AllUsesAreAddresses, + AllUsesAreOutsideLoop, + UsersToProcess); + if (AllUsesAreAddresses && + ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess)) { + if (!CheckPreInc) + return true; + // Any pre-inc iv use? + IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[Share]; + for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(), + E = StrideUses.Users.end(); I != E; ++I) { + if (!I->isUseOfPostIncrementedValue()) + return true; + } + } + } + return false; +} + +/// isUsedByExitBranch - Return true if icmp is used by a loop terminating +/// conditional branch or it's and / or with other conditions before being used +/// as the condition. +static bool isUsedByExitBranch(ICmpInst *Cond, Loop *L) { + BasicBlock *CondBB = Cond->getParent(); + if (!L->isLoopExiting(CondBB)) + return false; + BranchInst *TermBr = dyn_cast<BranchInst>(CondBB->getTerminator()); + if (!TermBr || !TermBr->isConditional()) + return false; + + Value *User = *Cond->use_begin(); + Instruction *UserInst = dyn_cast<Instruction>(User); + while (UserInst && + (UserInst->getOpcode() == Instruction::And || + UserInst->getOpcode() == Instruction::Or)) { + if (!UserInst->hasOneUse() || UserInst->getParent() != CondBB) + return false; + User = *User->use_begin(); + UserInst = dyn_cast<Instruction>(User); + } + return User == TermBr; +} + +static bool ShouldCountToZero(ICmpInst *Cond, IVStrideUse* &CondUse, + ScalarEvolution *SE, Loop *L, + const TargetLowering *TLI = 0) { + if (!L->contains(Cond->getParent())) + return false; + + if (!isa<SCEVConstant>(CondUse->getOffset())) + return false; + + // Handle only tests for equality for the moment. + if (!Cond->isEquality() || !Cond->hasOneUse()) + return false; + if (!isUsedByExitBranch(Cond, L)) + return false; + + Value *CondOp0 = Cond->getOperand(0); + const SCEV *IV = SE->getSCEV(CondOp0); + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV); + if (!AR || !AR->isAffine()) + return false; + + const SCEVConstant *SC = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); + if (!SC || SC->getValue()->getSExtValue() < 0) + // If it's already counting down, don't do anything. + return false; + + // If the RHS of the comparison is not an loop invariant, the rewrite + // cannot be done. Also bail out if it's already comparing against a zero. + // If we are checking this before cmp stride optimization, check if it's + // comparing against a already legal immediate. + Value *RHS = Cond->getOperand(1); + ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS); + if (!L->isLoopInvariant(RHS) || + (RHSC && RHSC->isZero()) || + (RHSC && TLI && TLI->isLegalICmpImmediate(RHSC->getSExtValue()))) + return false; + + // Make sure the IV is only used for counting. Value may be preinc or + // postinc; 2 uses in either case. + if (!CondOp0->hasNUses(2)) + return false; + + return true; +} + +/// OptimizeLoopTermCond - Change loop terminating condition to use the /// postinc iv when possible. void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { - // Finally, get the terminating condition for the loop if possible. If we - // can, we want to change it to use a post-incremented version of its - // induction variable, to allow coalescing the live ranges for the IV into - // one register value. BasicBlock *LatchBlock = L->getLoopLatch(); - BasicBlock *ExitingBlock = L->getExitingBlock(); - - if (!ExitingBlock) - // Multiple exits, just look at the exit in the latch block if there is one. - ExitingBlock = LatchBlock; - BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); - if (!TermBr) - return; - if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition())) - return; + bool LatchExit = L->isLoopExiting(LatchBlock); + SmallVector<BasicBlock*, 8> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); - // Search IVUsesByStride to find Cond's IVUse if there is one. - IVStrideUse *CondUse = 0; - const SCEV *const *CondStride = 0; - ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition()); - if (!FindIVUserForCond(Cond, CondUse, CondStride)) - return; // setcc doesn't use the IV. - - if (ExitingBlock != LatchBlock) { - if (!Cond->hasOneUse()) - // See below, we don't want the condition to be cloned. - return; - - // If exiting block is the latch block, we know it's safe and profitable to - // transform the icmp to use post-inc iv. Otherwise do so only if it would - // not reuse another iv and its iv would be reused by other uses. We are - // optimizing for the case where the icmp is the only use of the iv. - IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[*CondStride]; - for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(), - E = StrideUses.Users.end(); I != E; ++I) { - if (I->getUser() == Cond) - continue; - if (!I->isUseOfPostIncrementedValue()) - return; - } + for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { + BasicBlock *ExitingBlock = ExitingBlocks[i]; - // FIXME: This is expensive, and worse still ChangeCompareStride does a - // similar check. Can we perform all the icmp related transformations after - // StrengthReduceStridedIVUsers? - if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(*CondStride)) { - int64_t SInt = SC->getValue()->getSExtValue(); - for (unsigned NewStride = 0, ee = IU->StrideOrder.size(); NewStride != ee; - ++NewStride) { - std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = - IU->IVUsesByStride.find(IU->StrideOrder[NewStride]); - if (!isa<SCEVConstant>(SI->first) || SI->first == *CondStride) - continue; - int64_t SSInt = - cast<SCEVConstant>(SI->first)->getValue()->getSExtValue(); - if (SSInt == SInt) - return; // This can definitely be reused. - if (unsigned(abs64(SSInt)) < SInt || (SSInt % SInt) != 0) - continue; - int64_t Scale = SSInt / SInt; - bool AllUsesAreAddresses = true; - bool AllUsesAreOutsideLoop = true; - std::vector<BasedUser> UsersToProcess; - const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L, - AllUsesAreAddresses, - AllUsesAreOutsideLoop, - UsersToProcess); - // Avoid rewriting the compare instruction with an iv of new stride - // if it's likely the new stride uses will be rewritten using the - // stride of the compare instruction. - if (AllUsesAreAddresses && - ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess)) - return; - } - } + // Finally, get the terminating condition for the loop if possible. If we + // can, we want to change it to use a post-incremented version of its + // induction variable, to allow coalescing the live ranges for the IV into + // one register value. - StrideNoReuse.insert(*CondStride); - } + BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); + if (!TermBr) + continue; + // FIXME: Overly conservative, termination condition could be an 'or' etc.. + if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition())) + continue; - // If the trip count is computed in terms of a max (due to ScalarEvolution - // being unable to find a sufficient guard, for example), change the loop - // comparison to use SLT or ULT instead of NE. - Cond = OptimizeMax(L, Cond, CondUse); - - // If possible, change stride and operands of the compare instruction to - // eliminate one stride. - if (ExitingBlock == LatchBlock) - Cond = ChangeCompareStride(L, Cond, CondUse, CondStride); - - // It's possible for the setcc instruction to be anywhere in the loop, and - // possible for it to have multiple users. If it is not immediately before - // the latch block branch, move it. - if (&*++BasicBlock::iterator(Cond) != (Instruction*)TermBr) { - if (Cond->hasOneUse()) { // Condition has a single use, just move it. - Cond->moveBefore(TermBr); - } else { - // Otherwise, clone the terminating condition and insert into the loopend. - Cond = cast<ICmpInst>(Cond->clone()); - Cond->setName(L->getHeader()->getName() + ".termcond"); - LatchBlock->getInstList().insert(TermBr, Cond); - - // Clone the IVUse, as the old use still exists! - IU->IVUsesByStride[*CondStride]->addUser(CondUse->getOffset(), Cond, - CondUse->getOperandValToReplace()); - CondUse = &IU->IVUsesByStride[*CondStride]->Users.back(); + // Search IVUsesByStride to find Cond's IVUse if there is one. + IVStrideUse *CondUse = 0; + const SCEV *CondStride = 0; + ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition()); + if (!FindIVUserForCond(Cond, CondUse, CondStride)) + continue; + + // If the latch block is exiting and it's not a single block loop, it's + // not safe to use postinc iv in other exiting blocks. FIXME: overly + // conservative? How about icmp stride optimization? + bool UsePostInc = !(e > 1 && LatchExit && ExitingBlock != LatchBlock); + if (UsePostInc && ExitingBlock != LatchBlock) { + if (!Cond->hasOneUse()) + // See below, we don't want the condition to be cloned. + UsePostInc = false; + else { + // If exiting block is the latch block, we know it's safe and profitable + // to transform the icmp to use post-inc iv. Otherwise do so only if it + // would not reuse another iv and its iv would be reused by other uses. + // We are optimizing for the case where the icmp is the only use of the + // iv. + IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[CondStride]; + for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(), + E = StrideUses.Users.end(); I != E; ++I) { + if (I->getUser() == Cond) + continue; + if (!I->isUseOfPostIncrementedValue()) { + UsePostInc = false; + break; + } + } + } + + // If iv for the stride might be shared and any of the users use pre-inc + // iv might be used, then it's not safe to use post-inc iv. + if (UsePostInc && + isa<SCEVConstant>(CondStride) && + StrideMightBeShared(CondStride, L, true)) + UsePostInc = false; } - } - // If we get to here, we know that we can transform the setcc instruction to - // use the post-incremented version of the IV, allowing us to coalesce the - // live ranges for the IV correctly. - CondUse->setOffset(SE->getMinusSCEV(CondUse->getOffset(), *CondStride)); - CondUse->setIsUseOfPostIncrementedValue(true); - Changed = true; + // If the trip count is computed in terms of a max (due to ScalarEvolution + // being unable to find a sufficient guard, for example), change the loop + // comparison to use SLT or ULT instead of NE. + Cond = OptimizeMax(L, Cond, CondUse); + + // If possible, change stride and operands of the compare instruction to + // eliminate one stride. However, avoid rewriting the compare instruction + // with an iv of new stride if it's likely the new stride uses will be + // rewritten using the stride of the compare instruction. + if (ExitingBlock == LatchBlock && isa<SCEVConstant>(CondStride)) { + // If the condition stride is a constant and it's the only use, we might + // want to optimize it first by turning it to count toward zero. + if (!StrideMightBeShared(CondStride, L, false) && + !ShouldCountToZero(Cond, CondUse, SE, L, TLI)) + Cond = ChangeCompareStride(L, Cond, CondUse, CondStride); + } - ++NumLoopCond; -} + if (!UsePostInc) + continue; -/// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding -/// when to exit the loop is used only for that purpose, try to rearrange things -/// so it counts down to a test against zero. -void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) { + DEBUG(errs() << " Change loop exiting icmp to use postinc iv: " + << *Cond << '\n'); - // If the number of times the loop is executed isn't computable, give up. - const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); - if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) - return; + // It's possible for the setcc instruction to be anywhere in the loop, and + // possible for it to have multiple users. If it is not immediately before + // the exiting block branch, move it. + if (&*++BasicBlock::iterator(Cond) != (Instruction*)TermBr) { + if (Cond->hasOneUse()) { // Condition has a single use, just move it. + Cond->moveBefore(TermBr); + } else { + // Otherwise, clone the terminating condition and insert into the + // loopend. + Cond = cast<ICmpInst>(Cond->clone()); + Cond->setName(L->getHeader()->getName() + ".termcond"); + ExitingBlock->getInstList().insert(TermBr, Cond); + + // Clone the IVUse, as the old use still exists! + IU->IVUsesByStride[CondStride]->addUser(CondUse->getOffset(), Cond, + CondUse->getOperandValToReplace()); + CondUse = &IU->IVUsesByStride[CondStride]->Users.back(); + } + } - // Get the terminating condition for the loop if possible (this isn't - // necessarily in the latch, or a block that's a predecessor of the header). - if (!L->getExitBlock()) - return; // More than one loop exit blocks. + // If we get to here, we know that we can transform the setcc instruction to + // use the post-incremented version of the IV, allowing us to coalesce the + // live ranges for the IV correctly. + CondUse->setOffset(SE->getMinusSCEV(CondUse->getOffset(), CondStride)); + CondUse->setIsUseOfPostIncrementedValue(true); + Changed = true; - // Okay, there is one exit block. Try to find the condition that causes the - // loop to be exited. - BasicBlock *ExitingBlock = L->getExitingBlock(); - if (!ExitingBlock) - return; // More than one block exiting! + ++NumLoopCond; + } +} - // Okay, we've computed the exiting block. See what condition causes us to - // exit. - // - // FIXME: we should be able to handle switch instructions (with a single exit) - BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); - if (TermBr == 0) return; - assert(TermBr->isConditional() && "If unconditional, it can't be in loop!"); - if (!isa<ICmpInst>(TermBr->getCondition())) - return; - ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition()); +bool LoopStrengthReduce::OptimizeLoopCountIVOfStride(const SCEV* &Stride, + IVStrideUse* &CondUse, + Loop *L) { + // If the only use is an icmp of a loop exiting conditional branch, then + // attempt the optimization. + BasedUser User = BasedUser(*CondUse, SE); + assert(isa<ICmpInst>(User.Inst) && "Expecting an ICMPInst!"); + ICmpInst *Cond = cast<ICmpInst>(User.Inst); + + // Less strict check now that compare stride optimization is done. + if (!ShouldCountToZero(Cond, CondUse, SE, L)) + return false; - // Handle only tests for equality for the moment, and only stride 1. - if (Cond->getPredicate() != CmpInst::ICMP_EQ) - return; - const SCEV *IV = SE->getSCEV(Cond->getOperand(0)); - const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV); - const SCEV *One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType()); - if (!AR || !AR->isAffine() || AR->getStepRecurrence(*SE) != One) - return; - // If the RHS of the comparison is defined inside the loop, the rewrite - // cannot be done. - if (Instruction *CR = dyn_cast<Instruction>(Cond->getOperand(1))) - if (L->contains(CR->getParent())) - return; + Value *CondOp0 = Cond->getOperand(0); + PHINode *PHIExpr = dyn_cast<PHINode>(CondOp0); + Instruction *Incr; + if (!PHIExpr) { + // Value tested is postinc. Find the phi node. + Incr = dyn_cast<BinaryOperator>(CondOp0); + // FIXME: Just use User.OperandValToReplace here? + if (!Incr || Incr->getOpcode() != Instruction::Add) + return false; - // Make sure the IV is only used for counting. Value may be preinc or - // postinc; 2 uses in either case. - if (!Cond->getOperand(0)->hasNUses(2)) - return; - PHINode *phi = dyn_cast<PHINode>(Cond->getOperand(0)); - Instruction *incr; - if (phi && phi->getParent()==L->getHeader()) { - // value tested is preinc. Find the increment. - // A CmpInst is not a BinaryOperator; we depend on this. - Instruction::use_iterator UI = phi->use_begin(); - incr = dyn_cast<BinaryOperator>(UI); - if (!incr) - incr = dyn_cast<BinaryOperator>(++UI); - // 1 use for postinc value, the phi. Unnecessarily conservative? - if (!incr || !incr->hasOneUse() || incr->getOpcode()!=Instruction::Add) - return; - } else { - // Value tested is postinc. Find the phi node. - incr = dyn_cast<BinaryOperator>(Cond->getOperand(0)); - if (!incr || incr->getOpcode()!=Instruction::Add) - return; - - Instruction::use_iterator UI = Cond->getOperand(0)->use_begin(); - phi = dyn_cast<PHINode>(UI); - if (!phi) - phi = dyn_cast<PHINode>(++UI); + PHIExpr = dyn_cast<PHINode>(Incr->getOperand(0)); + if (!PHIExpr) + return false; // 1 use for preinc value, the increment. - if (!phi || phi->getParent()!=L->getHeader() || !phi->hasOneUse()) - return; + if (!PHIExpr->hasOneUse()) + return false; + } else { + assert(isa<PHINode>(CondOp0) && + "Unexpected loop exiting counting instruction sequence!"); + PHIExpr = cast<PHINode>(CondOp0); + // Value tested is preinc. Find the increment. + // A CmpInst is not a BinaryOperator; we depend on this. + Instruction::use_iterator UI = PHIExpr->use_begin(); + Incr = dyn_cast<BinaryOperator>(UI); + if (!Incr) + Incr = dyn_cast<BinaryOperator>(++UI); + // One use for postinc value, the phi. Unnecessarily conservative? + if (!Incr || !Incr->hasOneUse() || Incr->getOpcode() != Instruction::Add) + return false; } // Replace the increment with a decrement. - BinaryOperator *decr = - BinaryOperator::Create(Instruction::Sub, incr->getOperand(0), - incr->getOperand(1), "tmp", incr); - incr->replaceAllUsesWith(decr); - incr->eraseFromParent(); + DEBUG(errs() << "LSR: Examining use "); + DEBUG(WriteAsOperand(errs(), CondOp0, /*PrintType=*/false)); + DEBUG(errs() << " in Inst: " << *Cond << '\n'); + BinaryOperator *Decr = BinaryOperator::Create(Instruction::Sub, + Incr->getOperand(0), Incr->getOperand(1), "tmp", Incr); + Incr->replaceAllUsesWith(Decr); + Incr->eraseFromParent(); // Substitute endval-startval for the original startval, and 0 for the - // original endval. Since we're only testing for equality this is OK even + // original endval. Since we're only testing for equality this is OK even // if the computation wraps around. BasicBlock *Preheader = L->getLoopPreheader(); Instruction *PreInsertPt = Preheader->getTerminator(); - int inBlock = L->contains(phi->getIncomingBlock(0)) ? 1 : 0; - Value *startVal = phi->getIncomingValue(inBlock); - Value *endVal = Cond->getOperand(1); - // FIXME check for case where both are constant + unsigned InBlock = L->contains(PHIExpr->getIncomingBlock(0)) ? 1 : 0; + Value *StartVal = PHIExpr->getIncomingValue(InBlock); + Value *EndVal = Cond->getOperand(1); + DEBUG(errs() << " Optimize loop counting iv to count down [" + << *EndVal << " .. " << *StartVal << "]\n"); + + // FIXME: check for case where both are constant. Constant* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0); - BinaryOperator *NewStartVal = - BinaryOperator::Create(Instruction::Sub, endVal, startVal, - "tmp", PreInsertPt); - phi->setIncomingValue(inBlock, NewStartVal); + BinaryOperator *NewStartVal = BinaryOperator::Create(Instruction::Sub, + EndVal, StartVal, "tmp", PreInsertPt); + PHIExpr->setIncomingValue(InBlock, NewStartVal); Cond->setOperand(1, Zero); + DEBUG(errs() << " New icmp: " << *Cond << "\n"); + + int64_t SInt = cast<SCEVConstant>(Stride)->getValue()->getSExtValue(); + const SCEV *NewStride = 0; + bool Found = false; + for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { + const SCEV *OldStride = IU->StrideOrder[i]; + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(OldStride)) + if (SC->getValue()->getSExtValue() == -SInt) { + Found = true; + NewStride = OldStride; + break; + } + } + + if (!Found) + NewStride = SE->getIntegerSCEV(-SInt, Stride->getType()); + IU->AddUser(NewStride, CondUse->getOffset(), Cond, Cond->getOperand(0)); + IU->IVUsesByStride[Stride]->removeUser(CondUse); + + CondUse = &IU->IVUsesByStride[NewStride]->Users.back(); + Stride = NewStride; - Changed = true; + ++NumCountZero; + + return true; } -bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { +/// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding +/// when to exit the loop is used only for that purpose, try to rearrange things +/// so it counts down to a test against zero. +bool LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) { + bool ThisChanged = false; + for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { + const SCEV *Stride = IU->StrideOrder[i]; + std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = + IU->IVUsesByStride.find(Stride); + assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); + // FIXME: Generalize to non-affine IV's. + if (!SI->first->isLoopInvariant(L)) + continue; + // If stride is a constant and it has an icmpinst use, check if we can + // optimize the loop to count down. + if (isa<SCEVConstant>(Stride) && SI->second->Users.size() == 1) { + Instruction *User = SI->second->Users.begin()->getUser(); + if (!isa<ICmpInst>(User)) + continue; + const SCEV *CondStride = Stride; + IVStrideUse *Use = &*SI->second->Users.begin(); + if (!OptimizeLoopCountIVOfStride(CondStride, Use, L)) + continue; + ThisChanged = true; + // Now check if it's possible to reuse this iv for other stride uses. + for (unsigned j = 0, ee = IU->StrideOrder.size(); j != ee; ++j) { + const SCEV *SStride = IU->StrideOrder[j]; + if (SStride == CondStride) + continue; + std::map<const SCEV *, IVUsersOfOneStride *>::iterator SII = + IU->IVUsesByStride.find(SStride); + assert(SII != IU->IVUsesByStride.end() && "Stride doesn't exist!"); + // FIXME: Generalize to non-affine IV's. + if (!SII->first->isLoopInvariant(L)) + continue; + // FIXME: Rewrite other stride using CondStride. + } + } + } + + Changed |= ThisChanged; + return ThisChanged; +} + +bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { IU = &getAnalysis<IVUsers>(); LI = &getAnalysis<LoopInfo>(); DT = &getAnalysis<DominatorTree>(); SE = &getAnalysis<ScalarEvolution>(); Changed = false; + // If LoopSimplify form is not available, stay out of trouble. + if (!L->getLoopPreheader() || !L->getLoopLatch()) + return false; + if (!IU->IVUsesByStride.empty()) { DEBUG(errs() << "\nLSR on \"" << L->getHeader()->getParent()->getName() << "\" "; @@ -2545,7 +2771,7 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { // Change loop terminating condition to use the postinc iv when possible // and optimize loop terminating compare. FIXME: Move this after - // StrengthReduceStridedIVUsers? + // StrengthReduceIVUsersOfStride? OptimizeLoopTermCond(L); // FIXME: We can shrink overlarge IV's here. e.g. if the code has @@ -2561,26 +2787,12 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { // IVsByStride keeps IVs for one particular loop. assert(IVsByStride.empty() && "Stale entries in IVsByStride?"); - // Note: this processes each stride/type pair individually. All users - // passed into StrengthReduceStridedIVUsers have the same type AND stride. - // Also, note that we iterate over IVUsesByStride indirectly by using - // StrideOrder. This extra layer of indirection makes the ordering of - // strides deterministic - not dependent on map order. - for (unsigned Stride = 0, e = IU->StrideOrder.size(); - Stride != e; ++Stride) { - std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI = - IU->IVUsesByStride.find(IU->StrideOrder[Stride]); - assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); - // FIXME: Generalize to non-affine IV's. - if (!SI->first->isLoopInvariant(L)) - continue; - StrengthReduceStridedIVUsers(SI->first, *SI->second, L); - } - } + StrengthReduceIVUsers(L); - // After all sharing is done, see if we can adjust the loop to test against - // zero instead of counting up to a maximum. This is usually faster. - OptimizeLoopCountIV(L); + // After all sharing is done, see if we can adjust the loop to test against + // zero instead of counting up to a maximum. This is usually faster. + OptimizeLoopCountIV(L); + } // We're done analyzing this loop; release all the state we built up for it. IVsByStride.clear(); diff --git a/lib/Transforms/Scalar/LoopUnroll.cpp b/lib/Transforms/Scalar/LoopUnroll.cpp deleted file mode 100644 index 837ec59..0000000 --- a/lib/Transforms/Scalar/LoopUnroll.cpp +++ /dev/null @@ -1,177 +0,0 @@ -//===-- LoopUnroll.cpp - Loop unroller pass -------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass implements a simple loop unroller. It works best when loops have -// been canonicalized by the -indvars pass, allowing it to determine the trip -// counts of loops easily. -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "loop-unroll" -#include "llvm/IntrinsicInst.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/LoopPass.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/UnrollLoop.h" -#include <climits> - -using namespace llvm; - -static cl::opt<unsigned> -UnrollThreshold("unroll-threshold", cl::init(100), cl::Hidden, - cl::desc("The cut-off point for automatic loop unrolling")); - -static cl::opt<unsigned> -UnrollCount("unroll-count", cl::init(0), cl::Hidden, - cl::desc("Use this unroll count for all loops, for testing purposes")); - -static cl::opt<bool> -UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden, - cl::desc("Allows loops to be partially unrolled until " - "-unroll-threshold loop size is reached.")); - -namespace { - class LoopUnroll : public LoopPass { - public: - static char ID; // Pass ID, replacement for typeid - LoopUnroll() : LoopPass(&ID) {} - - /// A magic value for use with the Threshold parameter to indicate - /// that the loop unroll should be performed regardless of how much - /// code expansion would result. - static const unsigned NoThreshold = UINT_MAX; - - bool runOnLoop(Loop *L, LPPassManager &LPM); - - /// This transformation requires natural loop information & requires that - /// loop preheaders be inserted into the CFG... - /// - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequiredID(LoopSimplifyID); - AU.addRequiredID(LCSSAID); - AU.addRequired<LoopInfo>(); - AU.addPreservedID(LCSSAID); - AU.addPreserved<LoopInfo>(); - // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info. - // If loop unroll does not preserve dom info then LCSSA pass on next - // loop will receive invalid dom info. - // For now, recreate dom info, if loop is unrolled. - AU.addPreserved<DominatorTree>(); - AU.addPreserved<DominanceFrontier>(); - } - }; -} - -char LoopUnroll::ID = 0; -static RegisterPass<LoopUnroll> X("loop-unroll", "Unroll loops"); - -Pass *llvm::createLoopUnrollPass() { return new LoopUnroll(); } - -/// ApproximateLoopSize - Approximate the size of the loop. -static unsigned ApproximateLoopSize(const Loop *L) { - unsigned Size = 0; - for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); - I != E; ++I) { - BasicBlock *BB = *I; - Instruction *Term = BB->getTerminator(); - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - if (isa<PHINode>(I) && BB == L->getHeader()) { - // Ignore PHI nodes in the header. - } else if (I->hasOneUse() && I->use_back() == Term) { - // Ignore instructions only used by the loop terminator. - } else if (isa<DbgInfoIntrinsic>(I)) { - // Ignore debug instructions - } else if (isa<GetElementPtrInst>(I) && I->hasOneUse()) { - // Ignore GEP as they generally are subsumed into a load or store. - } else if (isa<CallInst>(I)) { - // Estimate size overhead introduced by call instructions which - // is higher than other instructions. Here 3 and 10 are magic - // numbers that help one isolated test case from PR2067 without - // negatively impacting measured benchmarks. - Size += isa<IntrinsicInst>(I) ? 3 : 10; - } else { - ++Size; - } - - // TODO: Ignore expressions derived from PHI and constants if inval of phi - // is a constant, or if operation is associative. This will get induction - // variables. - } - } - - return Size; -} - -bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { - assert(L->isLCSSAForm()); - LoopInfo *LI = &getAnalysis<LoopInfo>(); - - BasicBlock *Header = L->getHeader(); - DEBUG(errs() << "Loop Unroll: F[" << Header->getParent()->getName() - << "] Loop %" << Header->getName() << "\n"); - (void)Header; - - // Find trip count - unsigned TripCount = L->getSmallConstantTripCount(); - unsigned Count = UnrollCount; - - // Automatically select an unroll count. - if (Count == 0) { - // Conservative heuristic: if we know the trip count, see if we can - // completely unroll (subject to the threshold, checked below); otherwise - // try to find greatest modulo of the trip count which is still under - // threshold value. - if (TripCount == 0) - return false; - Count = TripCount; - } - - // Enforce the threshold. - if (UnrollThreshold != NoThreshold) { - unsigned LoopSize = ApproximateLoopSize(L); - DEBUG(errs() << " Loop Size = " << LoopSize << "\n"); - uint64_t Size = (uint64_t)LoopSize*Count; - if (TripCount != 1 && Size > UnrollThreshold) { - DEBUG(errs() << " Too large to fully unroll with count: " << Count - << " because size: " << Size << ">" << UnrollThreshold << "\n"); - if (!UnrollAllowPartial) { - DEBUG(errs() << " will not try to unroll partially because " - << "-unroll-allow-partial not given\n"); - return false; - } - // Reduce unroll count to be modulo of TripCount for partial unrolling - Count = UnrollThreshold / LoopSize; - while (Count != 0 && TripCount%Count != 0) { - Count--; - } - if (Count < 2) { - DEBUG(errs() << " could not unroll partially\n"); - return false; - } - DEBUG(errs() << " partially unrolling with count: " << Count << "\n"); - } - } - - // Unroll the loop. - Function *F = L->getHeader()->getParent(); - if (!UnrollLoop(L, Count, LI, &LPM)) - return false; - - // FIXME: Reconstruct dom info, because it is not preserved properly. - DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>(); - if (DT) { - DT->runOnFunction(*F); - DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>(); - if (DF) - DF->runOnFunction(*F); - } - return true; -} diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index c7b00da..38d267a 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -32,7 +32,6 @@ #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/Instructions.h" -#include "llvm/LLVMContext.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LoopInfo.h" @@ -407,6 +406,10 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val){ initLoopData(); Function *F = loopHeader->getParent(); + // If LoopSimplify was unable to form a preheader, don't do any unswitching. + if (!loopPreheader) + return false; + // If the condition is trivial, always unswitch. There is no code growth for // this case. if (!IsTrivialUnswitchCondition(LoopCond)) { @@ -957,7 +960,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { Worklist.pop_back(); // Simple constant folding. - if (Constant *C = ConstantFoldInstruction(I, I->getContext())) { + if (Constant *C = ConstantFoldInstruction(I)) { ReplaceUsesOfWith(I, C, Worklist, L, LPM); continue; } diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index af29f97..8466918 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -27,7 +27,6 @@ #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" #include "llvm/Pass.h" #include "llvm/Assembly/Writer.h" #include "llvm/Support/CFG.h" @@ -198,8 +197,7 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) { /// LowerNegateToMultiply - Replace 0-X with X*-1. /// static Instruction *LowerNegateToMultiply(Instruction *Neg, - std::map<AssertingVH<>, unsigned> &ValueRankMap, - LLVMContext &Context) { + std::map<AssertingVH<>, unsigned> &ValueRankMap) { Constant *Cst = Constant::getAllOnesValue(Neg->getType()); Instruction *Res = BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg); @@ -255,7 +253,6 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I, std::vector<ValueEntry> &Ops) { Value *LHS = I->getOperand(0), *RHS = I->getOperand(1); unsigned Opcode = I->getOpcode(); - LLVMContext &Context = I->getContext(); // First step, linearize the expression if it is in ((A+B)+(C+D)) form. BinaryOperator *LHSBO = isReassociableOp(LHS, Opcode); @@ -265,13 +262,11 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I, // transform them into multiplies by -1 so they can be reassociated. if (I->getOpcode() == Instruction::Mul) { if (!LHSBO && LHS->hasOneUse() && BinaryOperator::isNeg(LHS)) { - LHS = LowerNegateToMultiply(cast<Instruction>(LHS), - ValueRankMap, Context); + LHS = LowerNegateToMultiply(cast<Instruction>(LHS), ValueRankMap); LHSBO = isReassociableOp(LHS, Opcode); } if (!RHSBO && RHS->hasOneUse() && BinaryOperator::isNeg(RHS)) { - RHS = LowerNegateToMultiply(cast<Instruction>(RHS), - ValueRankMap, Context); + RHS = LowerNegateToMultiply(cast<Instruction>(RHS), ValueRankMap); RHSBO = isReassociableOp(RHS, Opcode); } } @@ -373,7 +368,7 @@ void Reassociate::RewriteExprTree(BinaryOperator *I, // version of the value is returned, and BI is left pointing at the instruction // that should be processed next by the reassociation pass. // -static Value *NegateValue(LLVMContext &Context, Value *V, Instruction *BI) { +static Value *NegateValue(Value *V, Instruction *BI) { // We are trying to expose opportunity for reassociation. One of the things // that we want to do to achieve this is to push a negation as deep into an // expression chain as possible, to expose the add instructions. In practice, @@ -386,8 +381,8 @@ static Value *NegateValue(LLVMContext &Context, Value *V, Instruction *BI) { if (Instruction *I = dyn_cast<Instruction>(V)) if (I->getOpcode() == Instruction::Add && I->hasOneUse()) { // Push the negates through the add. - I->setOperand(0, NegateValue(Context, I->getOperand(0), BI)); - I->setOperand(1, NegateValue(Context, I->getOperand(1), BI)); + I->setOperand(0, NegateValue(I->getOperand(0), BI)); + I->setOperand(1, NegateValue(I->getOperand(1), BI)); // We must move the add instruction here, because the neg instructions do // not dominate the old add instruction in general. By moving it, we are @@ -407,7 +402,7 @@ static Value *NegateValue(LLVMContext &Context, Value *V, Instruction *BI) { /// ShouldBreakUpSubtract - Return true if we should break up this subtract of /// X-Y into (X + -Y). -static bool ShouldBreakUpSubtract(LLVMContext &Context, Instruction *Sub) { +static bool ShouldBreakUpSubtract(Instruction *Sub) { // If this is a negation, we can't split it up! if (BinaryOperator::isNeg(Sub)) return false; @@ -431,7 +426,7 @@ static bool ShouldBreakUpSubtract(LLVMContext &Context, Instruction *Sub) { /// BreakUpSubtract - If we have (X-Y), and if either X is an add, or if this is /// only used by an add, transform this into (X+(0-Y)) to promote better /// reassociation. -static Instruction *BreakUpSubtract(LLVMContext &Context, Instruction *Sub, +static Instruction *BreakUpSubtract(Instruction *Sub, std::map<AssertingVH<>, unsigned> &ValueRankMap) { // Convert a subtract into an add and a neg instruction... so that sub // instructions can be commuted with other add instructions... @@ -439,7 +434,7 @@ static Instruction *BreakUpSubtract(LLVMContext &Context, Instruction *Sub, // Calculate the negative value of Operand 1 of the sub instruction... // and set it as the RHS of the add instruction we just made... // - Value *NegVal = NegateValue(Context, Sub->getOperand(1), Sub); + Value *NegVal = NegateValue(Sub->getOperand(1), Sub); Instruction *New = BinaryOperator::CreateAdd(Sub->getOperand(0), NegVal, "", Sub); New->takeName(Sub); @@ -457,8 +452,7 @@ static Instruction *BreakUpSubtract(LLVMContext &Context, Instruction *Sub, /// by one, change this into a multiply by a constant to assist with further /// reassociation. static Instruction *ConvertShiftToMul(Instruction *Shl, - std::map<AssertingVH<>, unsigned> &ValueRankMap, - LLVMContext &Context) { + std::map<AssertingVH<>, unsigned> &ValueRankMap) { // If an operand of this shift is a reassociable multiply, or if the shift // is used by a reassociable multiply or add, turn into a multiply. if (isReassociableOp(Shl->getOperand(0), Instruction::Mul) || @@ -781,13 +775,11 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I, /// ReassociateBB - Inspect all of the instructions in this basic block, /// reassociating them as we go. void Reassociate::ReassociateBB(BasicBlock *BB) { - LLVMContext &Context = BB->getContext(); - for (BasicBlock::iterator BBI = BB->begin(); BBI != BB->end(); ) { Instruction *BI = BBI++; if (BI->getOpcode() == Instruction::Shl && isa<ConstantInt>(BI->getOperand(1))) - if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap, Context)) { + if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) { MadeChange = true; BI = NI; } @@ -800,8 +792,8 @@ void Reassociate::ReassociateBB(BasicBlock *BB) { // If this is a subtract instruction which is not already in negate form, // see if we can convert it to X+-Y. if (BI->getOpcode() == Instruction::Sub) { - if (ShouldBreakUpSubtract(Context, BI)) { - BI = BreakUpSubtract(Context, BI, ValueRankMap); + if (ShouldBreakUpSubtract(BI)) { + BI = BreakUpSubtract(BI, ValueRankMap); MadeChange = true; } else if (BinaryOperator::isNeg(BI)) { // Otherwise, this is a negation. See if the operand is a multiply tree @@ -809,7 +801,7 @@ void Reassociate::ReassociateBB(BasicBlock *BB) { if (isReassociableOp(BI->getOperand(1), Instruction::Mul) && (!BI->hasOneUse() || !isReassociableOp(BI->use_back(), Instruction::Mul))) { - BI = LowerNegateToMultiply(BI, ValueRankMap, Context); + BI = LowerNegateToMultiply(BI, ValueRankMap); MadeChange = true; } } diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 509a6db..c202a2c 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -795,9 +795,14 @@ void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) { return markOverdefined(&EVI); Value *AggVal = EVI.getAggregateOperand(); - unsigned i = *EVI.idx_begin(); - LatticeVal EltVal = getStructValueState(AggVal, i); - mergeInValue(getValueState(&EVI), &EVI, EltVal); + if (isa<StructType>(AggVal->getType())) { + unsigned i = *EVI.idx_begin(); + LatticeVal EltVal = getStructValueState(AggVal, i); + mergeInValue(getValueState(&EVI), &EVI, EltVal); + } else { + // Otherwise, must be extracting from an array. + return markOverdefined(&EVI); + } } void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) { diff --git a/lib/Transforms/Scalar/SCCVN.cpp b/lib/Transforms/Scalar/SCCVN.cpp index c047fca..001267a 100644 --- a/lib/Transforms/Scalar/SCCVN.cpp +++ b/lib/Transforms/Scalar/SCCVN.cpp @@ -507,7 +507,7 @@ void ValueTable::erase(Value *V) { /// verifyRemoved - Verify that the value is removed from all internal data /// structures. void ValueTable::verifyRemoved(const Value *V) const { - for (DenseMap<Value*, uint32_t>::iterator + for (DenseMap<Value*, uint32_t>::const_iterator I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) { assert(I->first != V && "Inst still occurs in value numbering map!"); } @@ -629,9 +629,6 @@ bool SCCVN::runOnFunction(Function& F) { } } - // FIXME: This code is commented out for now, because it can lead to the - // insertion of a lot of redundant PHIs being inserted by SSAUpdater. -#if 0 // Perform a forward data-flow to compute availability at all points on // the CFG. do { @@ -709,7 +706,6 @@ bool SCCVN::runOnFunction(Function& F) { CurInst->eraseFromParent(); } } -#endif VT.clear(); for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index 5669da0..b54565c 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -26,10 +26,6 @@ void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createCFGSimplificationPass()); } -void LLVMAddCondPropagationPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createCondPropagationPass()); -} - void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createDeadStoreEliminationPass()); } diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 575c93b..611505e 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -100,7 +100,7 @@ public: /// EmitPutChar - Emit a call to the putchar function. This assumes that Char /// is an integer. - void EmitPutChar(Value *Char, IRBuilder<> &B); + Value *EmitPutChar(Value *Char, IRBuilder<> &B); /// EmitPutS - Emit a call to the puts function. This assumes that Str is /// some pointer. @@ -252,18 +252,20 @@ Value *LibCallOptimization::EmitUnaryFloatFnCall(Value *Op, const char *Name, /// EmitPutChar - Emit a call to the putchar function. This assumes that Char /// is an integer. -void LibCallOptimization::EmitPutChar(Value *Char, IRBuilder<> &B) { +Value *LibCallOptimization::EmitPutChar(Value *Char, IRBuilder<> &B) { Module *M = Caller->getParent(); Value *PutChar = M->getOrInsertFunction("putchar", Type::getInt32Ty(*Context), Type::getInt32Ty(*Context), NULL); CallInst *CI = B.CreateCall(PutChar, B.CreateIntCast(Char, Type::getInt32Ty(*Context), + /*isSigned*/true, "chari"), "putchar"); if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); + return CI; } /// EmitPutS - Emit a call to the puts function. This assumes that Str is @@ -302,7 +304,8 @@ void LibCallOptimization::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B) { Type::getInt32Ty(*Context), Type::getInt32Ty(*Context), File->getType(), NULL); - Char = B.CreateIntCast(Char, Type::getInt32Ty(*Context), "chari"); + Char = B.CreateIntCast(Char, Type::getInt32Ty(*Context), /*isSigned*/true, + "chari"); CallInst *CI = B.CreateCall2(F, Char, File, "fputc"); if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) @@ -955,6 +958,17 @@ struct MemCmpOpt : public LibCallOptimization { return B.CreateZExt(B.CreateXor(LHSV, RHSV, "shortdiff"), CI->getType()); } + // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant) + std::string LHSStr, RHSStr; + if (GetConstantStringInfo(LHS, LHSStr) && + GetConstantStringInfo(RHS, RHSStr)) { + // Make sure we're not reading out-of-bounds memory. + if (Len > LHSStr.length() || Len > RHSStr.length()) + return 0; + uint64_t Ret = memcmp(LHSStr.data(), RHSStr.data(), Len); + return ConstantInt::get(CI->getType(), Ret); + } + return 0; } }; @@ -1314,11 +1328,13 @@ struct PrintFOpt : public LibCallOptimization { return CI->use_empty() ? (Value*)CI : ConstantInt::get(CI->getType(), 0); - // printf("x") -> putchar('x'), even for '%'. + // printf("x") -> putchar('x'), even for '%'. Return the result of putchar + // in case there is an error writing to stdout. if (FormatStr.size() == 1) { - EmitPutChar(ConstantInt::get(Type::getInt32Ty(*Context), FormatStr[0]), B); - return CI->use_empty() ? (Value*)CI : - ConstantInt::get(CI->getType(), 1); + Value *Res = EmitPutChar(ConstantInt::get(Type::getInt32Ty(*Context), + FormatStr[0]), B); + if (CI->use_empty()) return CI; + return B.CreateIntCast(Res, CI->getType(), true); } // printf("foo\n") --> puts("foo") @@ -1339,9 +1355,10 @@ struct PrintFOpt : public LibCallOptimization { // printf("%c", chr) --> putchar(*(i8*)dst) if (FormatStr == "%c" && CI->getNumOperands() > 2 && isa<IntegerType>(CI->getOperand(2)->getType())) { - EmitPutChar(CI->getOperand(2), B); - return CI->use_empty() ? (Value*)CI : - ConstantInt::get(CI->getType(), 1); + Value *Res = EmitPutChar(CI->getOperand(2), B); + + if (CI->use_empty()) return CI; + return B.CreateIntCast(Res, CI->getType(), true); } // printf("%s\n", str) --> puts(str) @@ -2479,10 +2496,6 @@ bool SimplifyLibCalls::doInitialization(Module &M) { // lround, lroundf, lroundl: // * lround(cnst) -> cnst' // -// memcmp: -// * memcmp(x,y,l) -> cnst -// (if all arguments are constant and strlen(x) <= l and strlen(y) <= l) -// // pow, powf, powl: // * pow(exp(x),y) -> exp(x*y) // * pow(sqrt(x),y) -> pow(x,y*0.5) diff --git a/lib/Transforms/Scalar/TailDuplication.cpp b/lib/Transforms/Scalar/TailDuplication.cpp index 4864e23..b06ae3d 100644 --- a/lib/Transforms/Scalar/TailDuplication.cpp +++ b/lib/Transforms/Scalar/TailDuplication.cpp @@ -359,8 +359,7 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) { Instruction *Inst = BI++; if (isInstructionTriviallyDead(Inst)) Inst->eraseFromParent(); - else if (Constant *C = ConstantFoldInstruction(Inst, - Inst->getContext())) { + else if (Constant *C = ConstantFoldInstruction(Inst)) { Inst->replaceAllUsesWith(C); Inst->eraseFromParent(); } diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index b56e170..4119cb9 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -25,7 +25,7 @@ // unlikely, that the return returns something else (like constant 0), and // can still be TRE'd. It can be TRE'd if ALL OTHER return instructions in // the function return the exact same value. -// 4. If it can prove that callees do not access theier caller stack frame, +// 4. If it can prove that callees do not access their caller stack frame, // they are marked as eligible for tail call elimination (by the code // generator). // @@ -58,6 +58,7 @@ #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/Pass.h" +#include "llvm/Analysis/CaptureTracking.h" #include "llvm/Support/CFG.h" #include "llvm/ADT/Statistic.h" using namespace llvm; @@ -75,7 +76,7 @@ namespace { private: bool ProcessReturningBlock(ReturnInst *RI, BasicBlock *&OldEntry, bool &TailCallsAreMarkedTail, - std::vector<PHINode*> &ArgumentPHIs, + SmallVector<PHINode*, 8> &ArgumentPHIs, bool CannotTailCallElimCallsMarkedTail); bool CanMoveAboveCall(Instruction *I, CallInst *CI); Value *CanTransformAccumulatorRecursion(Instruction *I, CallInst *CI); @@ -90,7 +91,6 @@ FunctionPass *llvm::createTailCallEliminationPass() { return new TailCallElim(); } - /// AllocaMightEscapeToCalls - Return true if this alloca may be accessed by /// callees of this function. We only do very simple analysis right now, this /// could be expanded in the future to use mod/ref information for particular @@ -100,7 +100,7 @@ static bool AllocaMightEscapeToCalls(AllocaInst *AI) { return true; } -/// FunctionContainsAllocas - Scan the specified basic block for alloca +/// CheckForEscapingAllocas - Scan the specified basic block for alloca /// instructions. If it contains any that might be accessed by calls, return /// true. static bool CheckForEscapingAllocas(BasicBlock *BB, @@ -127,7 +127,7 @@ bool TailCallElim::runOnFunction(Function &F) { BasicBlock *OldEntry = 0; bool TailCallsAreMarkedTail = false; - std::vector<PHINode*> ArgumentPHIs; + SmallVector<PHINode*, 8> ArgumentPHIs; bool MadeChange = false; bool FunctionContainsEscapingAllocas = false; @@ -154,7 +154,6 @@ bool TailCallElim::runOnFunction(Function &F) { /// happen. This bug is PR962. if (FunctionContainsEscapingAllocas) return false; - // Second pass, change any tail calls to loops. for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) @@ -204,7 +203,7 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) { if (I->mayHaveSideEffects()) // This also handles volatile loads. return false; - if (LoadInst* L = dyn_cast<LoadInst>(I)) { + if (LoadInst *L = dyn_cast<LoadInst>(I)) { // Loads may always be moved above calls without side effects. if (CI->mayHaveSideEffects()) { // Non-volatile loads may be moved above a call with side effects if it @@ -235,7 +234,7 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) { // We currently handle static constants and arguments that are not modified as // part of the recursion. // -static bool isDynamicConstant(Value *V, CallInst *CI) { +static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) { if (isa<Constant>(V)) return true; // Static constants are always dyn consts // Check to see if this is an immutable argument, if so, the value @@ -253,6 +252,15 @@ static bool isDynamicConstant(Value *V, CallInst *CI) { if (CI->getOperand(ArgNo+1) == Arg) return true; } + + // Switch cases are always constant integers. If the value is being switched + // on and the return is only reachable from one of its cases, it's + // effectively constant. + if (BasicBlock *UniquePred = RI->getParent()->getUniquePredecessor()) + if (SwitchInst *SI = dyn_cast<SwitchInst>(UniquePred->getTerminator())) + if (SI->getCondition() == V) + return SI->getDefaultDest() != RI->getParent(); + // Not a constant or immutable argument, we can't safely transform. return false; } @@ -265,10 +273,6 @@ static Value *getCommonReturnValue(ReturnInst *TheRI, CallInst *CI) { Function *F = TheRI->getParent()->getParent(); Value *ReturnedValue = 0; - // TODO: Handle multiple value ret instructions; - if (isa<StructType>(F->getReturnType())) - return 0; - for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator())) if (RI != TheRI) { @@ -278,7 +282,7 @@ static Value *getCommonReturnValue(ReturnInst *TheRI, CallInst *CI) { // evaluatable at the start of the initial invocation of the function, // instead of at the end of the evaluation. // - if (!isDynamicConstant(RetOp, CI)) + if (!isDynamicConstant(RetOp, CI, RI)) return 0; if (ReturnedValue && RetOp != ReturnedValue) @@ -315,7 +319,7 @@ Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I, bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, bool &TailCallsAreMarkedTail, - std::vector<PHINode*> &ArgumentPHIs, + SmallVector<PHINode*, 8> &ArgumentPHIs, bool CannotTailCallElimCallsMarkedTail) { BasicBlock *BB = Ret->getParent(); Function *F = BB->getParent(); diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index c728c0b..2974592 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -275,8 +275,6 @@ void llvm::RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum) { /// SplitEdge - Split the edge connecting specified block. Pass P must /// not be NULL. BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) { - assert(!isa<IndirectBrInst>(BB->getTerminator()) && - "Cannot split an edge from an IndirectBrInst"); TerminatorInst *LatchTerm = BB->getTerminator(); unsigned SuccNum = 0; #ifndef NDEBUG @@ -386,6 +384,12 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, bool IsLoopEntry = !!L; bool SplitMakesNewLoopHeader = false; for (unsigned i = 0; i != NumPreds; ++i) { + // This is slightly more strict than necessary; the minimum requirement + // is that there be no more than one indirectbr branching to BB. And + // all BlockAddress uses would need to be updated. + assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) && + "Cannot split an edge from an IndirectBrInst"); + Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB); if (LI) { diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index fd8862c..162d7b3 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -20,6 +20,7 @@ #include "llvm/IntrinsicInst.h" #include "llvm/GlobalVariable.h" #include "llvm/Function.h" +#include "llvm/LLVMContext.h" #include "llvm/Support/CFG.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include "llvm/Analysis/ConstantFolding.h" @@ -322,8 +323,6 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, /// mapping its operands through ValueMap if they are available. Constant *PruningFunctionCloner:: ConstantFoldMappedInstruction(const Instruction *I) { - LLVMContext &Context = I->getContext(); - SmallVector<Constant*, 8> Ops; for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i), @@ -333,9 +332,8 @@ ConstantFoldMappedInstruction(const Instruction *I) { return 0; // All operands not constant! if (const CmpInst *CI = dyn_cast<CmpInst>(I)) - return ConstantFoldCompareInstOperands(CI->getPredicate(), - &Ops[0], Ops.size(), - Context, TD); + return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1], + TD); if (const LoadInst *LI = dyn_cast<LoadInst>(I)) if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) @@ -346,7 +344,28 @@ ConstantFoldMappedInstruction(const Instruction *I) { CE); return ConstantFoldInstOperands(I->getOpcode(), I->getType(), &Ops[0], - Ops.size(), Context, TD); + Ops.size(), TD); +} + +static MDNode *UpdateInlinedAtInfo(MDNode *InsnMD, MDNode *TheCallMD, + LLVMContext &Context) { + DILocation ILoc(InsnMD); + if (ILoc.isNull()) return InsnMD; + + DILocation CallLoc(TheCallMD); + if (CallLoc.isNull()) return InsnMD; + + DILocation OrigLocation = ILoc.getOrigLocation(); + MDNode *NewLoc = TheCallMD; + if (!OrigLocation.isNull()) + NewLoc = UpdateInlinedAtInfo(OrigLocation.getNode(), TheCallMD, Context); + + SmallVector<Value *, 4> MDVs; + MDVs.push_back(InsnMD->getElement(0)); // Line + MDVs.push_back(InsnMD->getElement(1)); // Col + MDVs.push_back(InsnMD->getElement(2)); // Scope + MDVs.push_back(NewLoc); + return MDNode::get(Context, MDVs.data(), MDVs.size()); } /// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto, @@ -361,7 +380,8 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, SmallVectorImpl<ReturnInst*> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo, - const TargetData *TD) { + const TargetData *TD, + Instruction *TheCall) { assert(NameSuffix && "NameSuffix cannot be null!"); #ifndef NDEBUG @@ -400,19 +420,52 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, // references as we go. This uses ValueMap to do all the hard work. // BasicBlock::iterator I = NewBB->begin(); + + LLVMContext &Context = OldFunc->getContext(); + unsigned DbgKind = Context.getMetadata().getMDKind("dbg"); + MDNode *TheCallMD = NULL; + SmallVector<Value *, 4> MDVs; + if (TheCall && TheCall->hasMetadata()) + TheCallMD = Context.getMetadata().getMD(DbgKind, TheCall); // Handle PHI nodes specially, as we have to remove references to dead // blocks. if (PHINode *PN = dyn_cast<PHINode>(I)) { // Skip over all PHI nodes, remembering them for later. BasicBlock::const_iterator OldI = BI->begin(); - for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI) + for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI) { + if (I->hasMetadata()) { + if (TheCallMD) { + if (MDNode *IMD = Context.getMetadata().getMD(DbgKind, I)) { + MDNode *NewMD = UpdateInlinedAtInfo(IMD, TheCallMD, Context); + Context.getMetadata().addMD(DbgKind, NewMD, I); + } + } else { + // The cloned instruction has dbg info but the call instruction + // does not have dbg info. Remove dbg info from cloned instruction. + Context.getMetadata().removeMD(DbgKind, I); + } + } PHIToResolve.push_back(cast<PHINode>(OldI)); + } } // Otherwise, remap the rest of the instructions normally. - for (; I != NewBB->end(); ++I) + for (; I != NewBB->end(); ++I) { + if (I->hasMetadata()) { + if (TheCallMD) { + if (MDNode *IMD = Context.getMetadata().getMD(DbgKind, I)) { + MDNode *NewMD = UpdateInlinedAtInfo(IMD, TheCallMD, Context); + Context.getMetadata().addMD(DbgKind, NewMD, I); + } + } else { + // The cloned instruction has dbg info but the call instruction + // does not have dbg info. Remove dbg info from cloned instruction. + Context.getMetadata().removeMD(DbgKind, I); + } + } RemapInstruction(I, ValueMap); + } } // Defer PHI resolution until rest of function is resolved, PHI resolution diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 20f5a4a..043046c 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -386,7 +386,7 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD, // (which can happen, e.g., because an argument was constant), but we'll be // happy with whatever the cloner can do. CloneAndPruneFunctionInto(Caller, CalledFunc, ValueMap, Returns, ".i", - &InlinedFunctionInfo, TD); + &InlinedFunctionInfo, TD, TheCall); // Remember the first block that is newly cloned over. FirstNewBlock = LastBlock; ++FirstNewBlock; diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp index 56e662e..590d667 100644 --- a/lib/Transforms/Utils/LCSSA.cpp +++ b/lib/Transforms/Utils/LCSSA.cpp @@ -50,7 +50,6 @@ namespace { LCSSA() : LoopPass(&ID) {} // Cached analysis information for the current function. - LoopInfo *LI; DominatorTree *DT; std::vector<BasicBlock*> LoopBlocks; PredIteratorCache PredCache; @@ -64,6 +63,9 @@ namespace { /// virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); + + // LCSSA doesn't actually require LoopSimplify, but the PassManager + // doesn't know how to schedule LoopSimplify by itself. AU.addRequiredID(LoopSimplifyID); AU.addPreservedID(LoopSimplifyID); AU.addRequiredTransitive<LoopInfo>(); @@ -121,7 +123,6 @@ static bool BlockDominatesAnExit(BasicBlock *BB, bool LCSSA::runOnLoop(Loop *TheLoop, LPPassManager &LPM) { L = TheLoop; - LI = &LPM.getAnalysis<LoopInfo>(); DT = &getAnalysis<DominatorTree>(); // Get the set of exiting blocks. @@ -216,7 +217,7 @@ bool LCSSA::ProcessInstruction(Instruction *Inst, SSAUpdate.Initialize(Inst); // Insert the LCSSA phi's into all of the exit blocks dominated by the - // value., and add them to the Phi's map. + // value, and add them to the Phi's map. for (SmallVectorImpl<BasicBlock*>::const_iterator BBI = ExitBlocks.begin(), BBE = ExitBlocks.end(); BBI != BBE; ++BBI) { BasicBlock *ExitBB = *BBI; @@ -230,8 +231,17 @@ bool LCSSA::ProcessInstruction(Instruction *Inst, PN->reserveOperandSpace(PredCache.GetNumPreds(ExitBB)); // Add inputs from inside the loop for this PHI. - for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI) + for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI) { PN->addIncoming(Inst, *PI); + + // If the exit block has a predecessor not within the loop, arrange for + // the incoming value use corresponding to that predecessor to be + // rewritten in terms of a different LCSSA PHI. + if (!inLoop(*PI)) + UsesToRewrite.push_back( + &PN->getOperandUse( + PN->getOperandNumForIncomingValue(PN->getNumIncomingValues()-1))); + } // Remember that this phi makes the value alive in this block. SSAUpdate.AddAvailableValue(ExitBB, PN); diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 543ddf1..aef0f5f 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -24,10 +24,14 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/DebugInfo.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ProfileInfo.h" #include "llvm/Target/TargetData.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -236,7 +240,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) { //===----------------------------------------------------------------------===// -// Local dead code elimination... +// Local dead code elimination. // /// isInstructionTriviallyDead - Return true if the result produced by the @@ -248,6 +252,9 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) { // We don't want debug info removed by anything this general. if (isa<DbgInfoIntrinsic>(I)) return false; + // Likewise for memory use markers. + if (isa<MemoryUseIntrinsic>(I)) return false; + if (!I->mayHaveSideEffects()) return true; // Special case intrinsics that "may have side effects" but can be deleted @@ -323,9 +330,53 @@ llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) { } //===----------------------------------------------------------------------===// -// Control Flow Graph Restructuring... +// Control Flow Graph Restructuring. // + +/// RemovePredecessorAndSimplify - Like BasicBlock::removePredecessor, this +/// method is called when we're about to delete Pred as a predecessor of BB. If +/// BB contains any PHI nodes, this drops the entries in the PHI nodes for Pred. +/// +/// Unlike the removePredecessor method, this attempts to simplify uses of PHI +/// nodes that collapse into identity values. For example, if we have: +/// x = phi(1, 0, 0, 0) +/// y = and x, z +/// +/// .. and delete the predecessor corresponding to the '1', this will attempt to +/// recursively fold the and to 0. +void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred, + TargetData *TD) { + // This only adjusts blocks with PHI nodes. + if (!isa<PHINode>(BB->begin())) + return; + + // Remove the entries for Pred from the PHI nodes in BB, but do not simplify + // them down. This will leave us with single entry phi nodes and other phis + // that can be removed. + BB->removePredecessor(Pred, true); + + WeakVH PhiIt = &BB->front(); + while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) { + PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt)); + + Value *PNV = PN->hasConstantValue(); + if (PNV == 0) continue; + + // If we're able to simplify the phi to a single value, substitute the new + // value into all of its uses. + assert(PNV != PN && "hasConstantValue broken"); + + ReplaceAndSimplifyAllUses(PN, PNV, TD); + + // If recursive simplification ended up deleting the next PHI node we would + // iterate to, then our iterator is invalid, restart scanning from the top + // of the block. + if (PhiIt == 0) PhiIt = &BB->front(); + } +} + + /// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its /// predecessor is known to have one successor (DestBB!). Eliminate the edge /// between them, moving the instructions in the predecessor into DestBB and @@ -362,6 +413,174 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) { PredBB->eraseFromParent(); } +/// CanPropagatePredecessorsForPHIs - Return true if we can fold BB, an +/// almost-empty BB ending in an unconditional branch to Succ, into succ. +/// +/// Assumption: Succ is the single successor for BB. +/// +static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { + assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!"); + + DEBUG(errs() << "Looking to fold " << BB->getName() << " into " + << Succ->getName() << "\n"); + // Shortcut, if there is only a single predecessor it must be BB and merging + // is always safe + if (Succ->getSinglePredecessor()) return true; + + // Make a list of the predecessors of BB + typedef SmallPtrSet<BasicBlock*, 16> BlockSet; + BlockSet BBPreds(pred_begin(BB), pred_end(BB)); + + // Use that list to make another list of common predecessors of BB and Succ + BlockSet CommonPreds; + for (pred_iterator PI = pred_begin(Succ), PE = pred_end(Succ); + PI != PE; ++PI) + if (BBPreds.count(*PI)) + CommonPreds.insert(*PI); + + // Shortcut, if there are no common predecessors, merging is always safe + if (CommonPreds.empty()) + return true; + + // Look at all the phi nodes in Succ, to see if they present a conflict when + // merging these blocks + for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + + // If the incoming value from BB is again a PHINode in + // BB which has the same incoming value for *PI as PN does, we can + // merge the phi nodes and then the blocks can still be merged + PHINode *BBPN = dyn_cast<PHINode>(PN->getIncomingValueForBlock(BB)); + if (BBPN && BBPN->getParent() == BB) { + for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end(); + PI != PE; PI++) { + if (BBPN->getIncomingValueForBlock(*PI) + != PN->getIncomingValueForBlock(*PI)) { + DEBUG(errs() << "Can't fold, phi node " << PN->getName() << " in " + << Succ->getName() << " is conflicting with " + << BBPN->getName() << " with regard to common predecessor " + << (*PI)->getName() << "\n"); + return false; + } + } + } else { + Value* Val = PN->getIncomingValueForBlock(BB); + for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end(); + PI != PE; PI++) { + // See if the incoming value for the common predecessor is equal to the + // one for BB, in which case this phi node will not prevent the merging + // of the block. + if (Val != PN->getIncomingValueForBlock(*PI)) { + DEBUG(errs() << "Can't fold, phi node " << PN->getName() << " in " + << Succ->getName() << " is conflicting with regard to common " + << "predecessor " << (*PI)->getName() << "\n"); + return false; + } + } + } + } + + return true; +} + +/// TryToSimplifyUncondBranchFromEmptyBlock - BB is known to contain an +/// unconditional branch, and contains no instructions other than PHI nodes, +/// potential debug intrinsics and the branch. If possible, eliminate BB by +/// rewriting all the predecessors to branch to the successor block and return +/// true. If we can't transform, return false. +bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) { + // We can't eliminate infinite loops. + BasicBlock *Succ = cast<BranchInst>(BB->getTerminator())->getSuccessor(0); + if (BB == Succ) return false; + + // Check to see if merging these blocks would cause conflicts for any of the + // phi nodes in BB or Succ. If not, we can safely merge. + if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false; + + // Check for cases where Succ has multiple predecessors and a PHI node in BB + // has uses which will not disappear when the PHI nodes are merged. It is + // possible to handle such cases, but difficult: it requires checking whether + // BB dominates Succ, which is non-trivial to calculate in the case where + // Succ has multiple predecessors. Also, it requires checking whether + // constructing the necessary self-referential PHI node doesn't intoduce any + // conflicts; this isn't too difficult, but the previous code for doing this + // was incorrect. + // + // Note that if this check finds a live use, BB dominates Succ, so BB is + // something like a loop pre-header (or rarely, a part of an irreducible CFG); + // folding the branch isn't profitable in that case anyway. + if (!Succ->getSinglePredecessor()) { + BasicBlock::iterator BBI = BB->begin(); + while (isa<PHINode>(*BBI)) { + for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end(); + UI != E; ++UI) { + if (PHINode* PN = dyn_cast<PHINode>(*UI)) { + if (PN->getIncomingBlock(UI) != BB) + return false; + } else { + return false; + } + } + ++BBI; + } + } + + DEBUG(errs() << "Killing Trivial BB: \n" << *BB); + + if (isa<PHINode>(Succ->begin())) { + // If there is more than one pred of succ, and there are PHI nodes in + // the successor, then we need to add incoming edges for the PHI nodes + // + const SmallVector<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB)); + + // Loop over all of the PHI nodes in the successor of BB. + for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + Value *OldVal = PN->removeIncomingValue(BB, false); + assert(OldVal && "No entry in PHI for Pred BB!"); + + // If this incoming value is one of the PHI nodes in BB, the new entries + // in the PHI node are the entries from the old PHI. + if (isa<PHINode>(OldVal) && cast<PHINode>(OldVal)->getParent() == BB) { + PHINode *OldValPN = cast<PHINode>(OldVal); + for (unsigned i = 0, e = OldValPN->getNumIncomingValues(); i != e; ++i) + // Note that, since we are merging phi nodes and BB and Succ might + // have common predecessors, we could end up with a phi node with + // identical incoming branches. This will be cleaned up later (and + // will trigger asserts if we try to clean it up now, without also + // simplifying the corresponding conditional branch). + PN->addIncoming(OldValPN->getIncomingValue(i), + OldValPN->getIncomingBlock(i)); + } else { + // Add an incoming value for each of the new incoming values. + for (unsigned i = 0, e = BBPreds.size(); i != e; ++i) + PN->addIncoming(OldVal, BBPreds[i]); + } + } + } + + while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) { + if (Succ->getSinglePredecessor()) { + // BB is the only predecessor of Succ, so Succ will end up with exactly + // the same predecessors BB had. + Succ->getInstList().splice(Succ->begin(), + BB->getInstList(), BB->begin()); + } else { + // We explicitly check for such uses in CanPropagatePredecessorsForPHIs. + assert(PN->use_empty() && "There shouldn't be any uses here!"); + PN->eraseFromParent(); + } + } + + // Everything that jumped to BB now goes to Succ. + BB->replaceAllUsesWith(Succ); + if (!Succ->hasName()) Succ->takeName(BB); + BB->eraseFromParent(); // Delete the old basic block. + return true; +} + + + /// OnlyUsedByDbgIntrinsics - Return true if the instruction I is only used /// by DbgIntrinsics. If DbgInUses is specified then the vector is filled /// with the DbgInfoIntrinsic that use the instruction I. diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index cd8d952..2ab0972 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -23,6 +23,11 @@ // // This pass also guarantees that loops will have exactly one backedge. // +// Indirectbr instructions introduce several complications. If the loop +// contains or is entered by an indirectbr instruction, it may not be possible +// to transform the loop and make these guarantees. Client code should check +// that these conditions are true before relying on them. +// // Note that the simplifycfg pass will clean up blocks which are split out but // end up being unnecessary, so usage of this pass should not pessimize // generated code. @@ -81,17 +86,15 @@ namespace { AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added. } - /// verifyAnalysis() - Verify loop nest. - void verifyAnalysis() const { - assert(L->isLoopSimplifyForm() && "LoopSimplify form not preserved!"); - } + /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees. + void verifyAnalysis() const; private: bool ProcessLoop(Loop *L, LPPassManager &LPM); BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit); BasicBlock *InsertPreheaderForLoop(Loop *L); Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM); - void InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader); + BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader); void PlaceSplitBlockCarefully(BasicBlock *NewBB, SmallVectorImpl<BasicBlock*> &SplitPreds, Loop *L); @@ -160,8 +163,10 @@ ReprocessLoop: BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { Preheader = InsertPreheaderForLoop(L); - NumInserted++; - Changed = true; + if (Preheader) { + NumInserted++; + Changed = true; + } } // Next, check to make sure that all exit nodes of the loop only have @@ -180,21 +185,22 @@ ReprocessLoop: // Must be exactly this loop: no subloops, parent loops, or non-loop preds // allowed. if (!L->contains(*PI)) { - RewriteLoopExitBlock(L, ExitBlock); - NumInserted++; - Changed = true; + if (RewriteLoopExitBlock(L, ExitBlock)) { + NumInserted++; + Changed = true; + } break; } } // If the header has more than two predecessors at this point (from the // preheader and from multiple backedges), we must adjust the loop. - unsigned NumBackedges = L->getNumBackEdges(); - if (NumBackedges != 1) { + BasicBlock *LoopLatch = L->getLoopLatch(); + if (!LoopLatch) { // If this is really a nested loop, rip it out into a child loop. Don't do // this for loops with a giant number of backedges, just factor them into a // common backedge instead. - if (NumBackedges < 8) { + if (L->getNumBackEdges() < 8) { if (SeparateNestedLoop(L, LPM)) { ++NumNested; // This is a big restructuring change, reprocess the whole loop. @@ -207,9 +213,11 @@ ReprocessLoop: // If we either couldn't, or didn't want to, identify nesting of the loops, // insert a new block that all backedges target, then make it jump to the // loop header. - InsertUniqueBackedgeBlock(L, Preheader); - NumInserted++; - Changed = true; + LoopLatch = InsertUniqueBackedgeBlock(L, Preheader); + if (LoopLatch) { + NumInserted++; + Changed = true; + } } // Scan over the PHI nodes in the loop header. Since they now have only two @@ -233,7 +241,14 @@ ReprocessLoop: // loop-invariant instructions out of the way to open up more // opportunities, and the disadvantage of having the responsibility // to preserve dominator information. - if (ExitBlocks.size() > 1 && L->getUniqueExitBlock()) { + bool UniqueExit = true; + if (!ExitBlocks.empty()) + for (unsigned i = 1, e = ExitBlocks.size(); i != e; ++i) + if (ExitBlocks[i] != ExitBlocks[0]) { + UniqueExit = false; + break; + } + if (UniqueExit) { SmallVector<BasicBlock*, 8> ExitingBlocks; L->getExitingBlocks(ExitingBlocks); for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { @@ -251,7 +266,8 @@ ReprocessLoop: Instruction *Inst = I++; if (Inst == CI) continue; - if (!L->makeLoopInvariant(Inst, Changed, Preheader->getTerminator())) { + if (!L->makeLoopInvariant(Inst, Changed, + Preheader ? Preheader->getTerminator() : 0)) { AllInvariant = false; break; } @@ -303,8 +319,15 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) { SmallVector<BasicBlock*, 8> OutsideBlocks; for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header); PI != PE; ++PI) - if (!L->contains(*PI)) // Coming in from outside the loop? - OutsideBlocks.push_back(*PI); // Keep track of it... + if (!L->contains(*PI)) { // Coming in from outside the loop? + // If the loop is branched to from an indirect branch, we won't + // be able to fully transform the loop, because it prohibits + // edge splitting. + if (isa<IndirectBrInst>((*PI)->getTerminator())) return 0; + + // Keep track of it. + OutsideBlocks.push_back(*PI); + } // Split out the loop pre-header. BasicBlock *NewBB = @@ -324,8 +347,12 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) { BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) { SmallVector<BasicBlock*, 8> LoopBlocks; for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) - if (L->contains(*I)) + if (L->contains(*I)) { + // Don't do this if the loop is exited via an indirect branch. + if (isa<IndirectBrInst>((*I)->getTerminator())) return 0; + LoopBlocks.push_back(*I); + } assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?"); BasicBlock *NewBB = SplitBlockPredecessors(Exit, &LoopBlocks[0], @@ -519,13 +546,18 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) { /// backedges to target a new basic block and have that block branch to the loop /// header. This ensures that loops have exactly one backedge. /// -void LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) { +BasicBlock * +LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) { assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!"); // Get information about the loop BasicBlock *Header = L->getHeader(); Function *F = Header->getParent(); + // Unique backedge insertion currently depends on having a preheader. + if (!Preheader) + return 0; + // Figure out which basic blocks contain back-edges to the loop header. std::vector<BasicBlock*> BackedgeBlocks; for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I) @@ -612,4 +644,40 @@ void LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) { DT->splitBlock(BEBlock); if (DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>()) DF->splitBlock(BEBlock); + + return BEBlock; +} + +void LoopSimplify::verifyAnalysis() const { + // It used to be possible to just assert L->isLoopSimplifyForm(), however + // with the introduction of indirectbr, there are now cases where it's + // not possible to transform a loop as necessary. We can at least check + // that there is an indirectbr near any time there's trouble. + + // Indirectbr can interfere with preheader and unique backedge insertion. + if (!L->getLoopPreheader() || !L->getLoopLatch()) { + bool HasIndBrPred = false; + for (pred_iterator PI = pred_begin(L->getHeader()), + PE = pred_end(L->getHeader()); PI != PE; ++PI) + if (isa<IndirectBrInst>((*PI)->getTerminator())) { + HasIndBrPred = true; + break; + } + assert(HasIndBrPred && + "LoopSimplify has no excuse for missing loop header info!"); + } + + // Indirectbr can interfere with exit block canonicalization. + if (!L->hasDedicatedExits()) { + bool HasIndBrExiting = false; + SmallVector<BasicBlock*, 8> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) + if (isa<IndirectBrInst>((ExitingBlocks[i])->getTerminator())) { + HasIndBrExiting = true; + break; + } + assert(HasIndBrExiting && + "LoopSimplify has no excuse for missing exit block info!"); + } } diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index d68427a..6232f32 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -108,8 +108,19 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) { bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) { assert(L->isLCSSAForm()); - BasicBlock *Header = L->getHeader(); + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) { + DEBUG(errs() << " Can't unroll; loop preheader-insertion failed.\n"); + return false; + } + BasicBlock *LatchBlock = L->getLoopLatch(); + if (!LatchBlock) { + DEBUG(errs() << " Can't unroll; loop exit-block-insertion failed.\n"); + return false; + } + + BasicBlock *Header = L->getHeader(); BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); if (!BI || BI->isUnconditional()) { @@ -351,8 +362,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) if (isInstructionTriviallyDead(Inst)) (*BB)->getInstList().erase(Inst); - else if (Constant *C = ConstantFoldInstruction(Inst, - Header->getContext())) { + else if (Constant *C = ConstantFoldInstruction(Inst)) { Inst->replaceAllUsesWith(C); (*BB)->getInstList().erase(Inst); } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 8e1fb98..8dbc808 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -78,166 +78,6 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, PN->addIncoming(PN->getIncomingValueForBlock(ExistPred), NewPred); } -/// CanPropagatePredecessorsForPHIs - Return true if we can fold BB, an -/// almost-empty BB ending in an unconditional branch to Succ, into succ. -/// -/// Assumption: Succ is the single successor for BB. -/// -static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { - assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!"); - - DEBUG(errs() << "Looking to fold " << BB->getName() << " into " - << Succ->getName() << "\n"); - // Shortcut, if there is only a single predecessor it must be BB and merging - // is always safe - if (Succ->getSinglePredecessor()) return true; - - // Make a list of the predecessors of BB - typedef SmallPtrSet<BasicBlock*, 16> BlockSet; - BlockSet BBPreds(pred_begin(BB), pred_end(BB)); - - // Use that list to make another list of common predecessors of BB and Succ - BlockSet CommonPreds; - for (pred_iterator PI = pred_begin(Succ), PE = pred_end(Succ); - PI != PE; ++PI) - if (BBPreds.count(*PI)) - CommonPreds.insert(*PI); - - // Shortcut, if there are no common predecessors, merging is always safe - if (CommonPreds.empty()) - return true; - - // Look at all the phi nodes in Succ, to see if they present a conflict when - // merging these blocks - for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) { - PHINode *PN = cast<PHINode>(I); - - // If the incoming value from BB is again a PHINode in - // BB which has the same incoming value for *PI as PN does, we can - // merge the phi nodes and then the blocks can still be merged - PHINode *BBPN = dyn_cast<PHINode>(PN->getIncomingValueForBlock(BB)); - if (BBPN && BBPN->getParent() == BB) { - for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end(); - PI != PE; PI++) { - if (BBPN->getIncomingValueForBlock(*PI) - != PN->getIncomingValueForBlock(*PI)) { - DEBUG(errs() << "Can't fold, phi node " << PN->getName() << " in " - << Succ->getName() << " is conflicting with " - << BBPN->getName() << " with regard to common predecessor " - << (*PI)->getName() << "\n"); - return false; - } - } - } else { - Value* Val = PN->getIncomingValueForBlock(BB); - for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end(); - PI != PE; PI++) { - // See if the incoming value for the common predecessor is equal to the - // one for BB, in which case this phi node will not prevent the merging - // of the block. - if (Val != PN->getIncomingValueForBlock(*PI)) { - DEBUG(errs() << "Can't fold, phi node " << PN->getName() << " in " - << Succ->getName() << " is conflicting with regard to common " - << "predecessor " << (*PI)->getName() << "\n"); - return false; - } - } - } - } - - return true; -} - -/// TryToSimplifyUncondBranchFromEmptyBlock - BB contains an unconditional -/// branch to Succ, and contains no instructions other than PHI nodes and the -/// branch. If possible, eliminate BB. -static bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, - BasicBlock *Succ) { - // Check to see if merging these blocks would cause conflicts for any of the - // phi nodes in BB or Succ. If not, we can safely merge. - if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false; - - // Check for cases where Succ has multiple predecessors and a PHI node in BB - // has uses which will not disappear when the PHI nodes are merged. It is - // possible to handle such cases, but difficult: it requires checking whether - // BB dominates Succ, which is non-trivial to calculate in the case where - // Succ has multiple predecessors. Also, it requires checking whether - // constructing the necessary self-referential PHI node doesn't intoduce any - // conflicts; this isn't too difficult, but the previous code for doing this - // was incorrect. - // - // Note that if this check finds a live use, BB dominates Succ, so BB is - // something like a loop pre-header (or rarely, a part of an irreducible CFG); - // folding the branch isn't profitable in that case anyway. - if (!Succ->getSinglePredecessor()) { - BasicBlock::iterator BBI = BB->begin(); - while (isa<PHINode>(*BBI)) { - for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end(); - UI != E; ++UI) { - if (PHINode* PN = dyn_cast<PHINode>(*UI)) { - if (PN->getIncomingBlock(UI) != BB) - return false; - } else { - return false; - } - } - ++BBI; - } - } - - DEBUG(errs() << "Killing Trivial BB: \n" << *BB); - - if (isa<PHINode>(Succ->begin())) { - // If there is more than one pred of succ, and there are PHI nodes in - // the successor, then we need to add incoming edges for the PHI nodes - // - const SmallVector<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB)); - - // Loop over all of the PHI nodes in the successor of BB. - for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) { - PHINode *PN = cast<PHINode>(I); - Value *OldVal = PN->removeIncomingValue(BB, false); - assert(OldVal && "No entry in PHI for Pred BB!"); - - // If this incoming value is one of the PHI nodes in BB, the new entries - // in the PHI node are the entries from the old PHI. - if (isa<PHINode>(OldVal) && cast<PHINode>(OldVal)->getParent() == BB) { - PHINode *OldValPN = cast<PHINode>(OldVal); - for (unsigned i = 0, e = OldValPN->getNumIncomingValues(); i != e; ++i) - // Note that, since we are merging phi nodes and BB and Succ might - // have common predecessors, we could end up with a phi node with - // identical incoming branches. This will be cleaned up later (and - // will trigger asserts if we try to clean it up now, without also - // simplifying the corresponding conditional branch). - PN->addIncoming(OldValPN->getIncomingValue(i), - OldValPN->getIncomingBlock(i)); - } else { - // Add an incoming value for each of the new incoming values. - for (unsigned i = 0, e = BBPreds.size(); i != e; ++i) - PN->addIncoming(OldVal, BBPreds[i]); - } - } - } - - while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) { - if (Succ->getSinglePredecessor()) { - // BB is the only predecessor of Succ, so Succ will end up with exactly - // the same predecessors BB had. - Succ->getInstList().splice(Succ->begin(), - BB->getInstList(), BB->begin()); - } else { - // We explicitly check for such uses in CanPropagatePredecessorsForPHIs. - assert(PN->use_empty() && "There shouldn't be any uses here!"); - PN->eraseFromParent(); - } - } - - // Everything that jumped to BB now goes to Succ. - BB->replaceAllUsesWith(Succ); - if (!Succ->hasName()) Succ->takeName(BB); - BB->eraseFromParent(); // Delete the old basic block. - return true; -} /// GetIfCondition - Given a basic block (BB) with two predecessors (and /// presumably PHI nodes in it), check to see if the merge at this block is due @@ -1217,7 +1057,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI) { } // Check for trivial simplification. - if (Constant *C = ConstantFoldInstruction(N, BB->getContext())) { + if (Constant *C = ConstantFoldInstruction(N)) { TranslateMap[BBI] = C; delete N; // Constant folded away, don't need actual inst } else { @@ -1983,13 +1823,11 @@ bool llvm::SimplifyCFG(BasicBlock *BB) { if (BI->isUnconditional()) { BasicBlock::iterator BBI = BB->getFirstNonPHI(); - BasicBlock *Succ = BI->getSuccessor(0); // Ignore dbg intrinsics. while (isa<DbgInfoIntrinsic>(BBI)) ++BBI; - if (BBI->isTerminator() && // Terminator is the only non-phi instruction! - Succ != BB) // Don't hurt infinite loops! - if (TryToSimplifyUncondBranchFromEmptyBlock(BB, Succ)) + if (BBI->isTerminator()) // Terminator is the only non-phi instruction! + if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) return true; } else { // Conditional branch diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp index 9a803a1..82d7914 100644 --- a/lib/VMCore/AsmWriter.cpp +++ b/lib/VMCore/AsmWriter.cpp @@ -1238,7 +1238,8 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V, return; } - if (V->getValueID() == Value::PseudoSourceValueVal) { + if (V->getValueID() == Value::PseudoSourceValueVal || + V->getValueID() == Value::FixedStackPseudoSourceValueVal) { V->print(Out); return; } @@ -1497,8 +1498,8 @@ static void PrintLinkage(GlobalValue::LinkageTypes LT, case GlobalValue::AvailableExternallyLinkage: Out << "available_externally "; break; - case GlobalValue::GhostLinkage: - llvm_unreachable("GhostLinkage not allowed in AsmWriter!"); + // This is invalid syntax and just a debugging aid. + case GlobalValue::GhostLinkage: Out << "ghost "; break; } } diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp index 000a063..c622558 100644 --- a/lib/VMCore/Constants.cpp +++ b/lib/VMCore/Constants.cpp @@ -318,7 +318,7 @@ Constant* ConstantInt::get(const Type* Ty, const APInt& V) { return C; } -ConstantInt* ConstantInt::get(const IntegerType* Ty, const StringRef& Str, +ConstantInt* ConstantInt::get(const IntegerType* Ty, StringRef Str, uint8_t radix) { return get(Ty->getContext(), APInt(Ty->getBitWidth(), Str, radix)); } @@ -362,7 +362,7 @@ Constant* ConstantFP::get(const Type* Ty, double V) { } -Constant* ConstantFP::get(const Type* Ty, const StringRef& Str) { +Constant* ConstantFP::get(const Type* Ty, StringRef Str) { LLVMContext &Context = Ty->getContext(); APFloat FV(*TypeToFloatSemantics(Ty->getScalarType()), Str); @@ -508,7 +508,7 @@ Constant* ConstantArray::get(const ArrayType* T, Constant* const* Vals, /// Otherwise, the length parameter specifies how much of the string to use /// and it won't be null terminated. /// -Constant* ConstantArray::get(LLVMContext &Context, const StringRef &Str, +Constant* ConstantArray::get(LLVMContext &Context, StringRef Str, bool AddNull) { std::vector<Constant*> ElementVals; for (unsigned i = 0; i < Str.size(); ++i) diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp index 1a34180..78cd4dc 100644 --- a/lib/VMCore/Core.cpp +++ b/lib/VMCore/Core.cpp @@ -1860,8 +1860,9 @@ LLVMValueRef LLVMBuildPointerCast(LLVMBuilderRef B, LLVMValueRef Val, } LLVMValueRef LLVMBuildIntCast(LLVMBuilderRef B, LLVMValueRef Val, - LLVMTypeRef DestTy, const char *Name) { - return wrap(unwrap(B)->CreateIntCast(unwrap(Val), unwrap(DestTy), Name)); + LLVMTypeRef DestTy, int isSigned, + const char *Name) { + return wrap(unwrap(B)->CreateIntCast(unwrap(Val), unwrap(DestTy), isSigned, Name)); } LLVMValueRef LLVMBuildFPCast(LLVMBuilderRef B, LLVMValueRef Val, @@ -1987,13 +1988,15 @@ int LLVMCreateMemoryBufferWithContentsOfFile(const char *Path, int LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf, char **OutMessage) { - if (MemoryBuffer *MB = MemoryBuffer::getSTDIN()) { - *OutMemBuf = wrap(MB); - return 0; + MemoryBuffer *MB = MemoryBuffer::getSTDIN(); + if (!MB->getBufferSize()) { + delete MB; + *OutMessage = strdup("stdin is empty."); + return 1; } - - *OutMessage = strdup("stdin is empty."); - return 1; + + *OutMemBuf = wrap(MB); + return 0; } void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf) { diff --git a/lib/VMCore/Globals.cpp b/lib/VMCore/Globals.cpp index 03ceecb..94bf3de 100644 --- a/lib/VMCore/Globals.cpp +++ b/lib/VMCore/Globals.cpp @@ -16,7 +16,6 @@ #include "llvm/GlobalVariable.h" #include "llvm/GlobalAlias.h" #include "llvm/DerivedTypes.h" -#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/ErrorHandling.h" @@ -95,8 +94,7 @@ void GlobalValue::copyAttributesFrom(const GlobalValue *Src) { // GlobalVariable Implementation //===----------------------------------------------------------------------===// -GlobalVariable::GlobalVariable(LLVMContext &Context, const Type *Ty, - bool constant, LinkageTypes Link, +GlobalVariable::GlobalVariable(const Type *Ty, bool constant, LinkageTypes Link, Constant *InitVal, const Twine &Name, bool ThreadLocal, unsigned AddressSpace) : GlobalValue(PointerType::get(Ty, AddressSpace), @@ -173,6 +171,21 @@ void GlobalVariable::replaceUsesOfWithOnConstant(Value *From, Value *To, this->setOperand(0, cast<Constant>(To)); } +void GlobalVariable::setInitializer(Constant *InitVal) { + if (InitVal == 0) { + if (hasInitializer()) { + Op<0>().set(0); + NumOperands = 0; + } + } else { + assert(InitVal->getType() == getType()->getElementType() && + "Initializer type must match GlobalVariable type"); + if (!hasInitializer()) + NumOperands = 1; + Op<0>().set(InitVal); + } +} + /// copyAttributesFrom - copy all additional attributes (those not needed to /// create a GlobalVariable) from the GlobalVariable Src to this one. void GlobalVariable::copyAttributesFrom(const GlobalValue *Src) { diff --git a/lib/VMCore/InlineAsm.cpp b/lib/VMCore/InlineAsm.cpp index 3a36a1b..16de1af 100644 --- a/lib/VMCore/InlineAsm.cpp +++ b/lib/VMCore/InlineAsm.cpp @@ -26,16 +26,16 @@ InlineAsm::~InlineAsm() { // NOTE: when memoizing the function type, we have to be careful to handle the // case when the type gets refined. -InlineAsm *InlineAsm::get(const FunctionType *Ty, const StringRef &AsmString, - const StringRef &Constraints, bool hasSideEffects, +InlineAsm *InlineAsm::get(const FunctionType *Ty, StringRef AsmString, + StringRef Constraints, bool hasSideEffects, bool isAlignStack) { // FIXME: memoize! return new InlineAsm(Ty, AsmString, Constraints, hasSideEffects, isAlignStack); } -InlineAsm::InlineAsm(const FunctionType *Ty, const StringRef &asmString, - const StringRef &constraints, bool hasSideEffects, +InlineAsm::InlineAsm(const FunctionType *Ty, StringRef asmString, + StringRef constraints, bool hasSideEffects, bool isAlignStack) : Value(PointerType::getUnqual(Ty), Value::InlineAsmVal), @@ -54,7 +54,7 @@ const FunctionType *InlineAsm::getFunctionType() const { /// Parse - Analyze the specified string (e.g. "==&{eax}") and fill in the /// fields in this structure. If the constraint string is not understood, /// return true, otherwise return false. -bool InlineAsm::ConstraintInfo::Parse(const StringRef &Str, +bool InlineAsm::ConstraintInfo::Parse(StringRef Str, std::vector<InlineAsm::ConstraintInfo> &ConstraintsSoFar) { StringRef::iterator I = Str.begin(), E = Str.end(); @@ -149,7 +149,7 @@ bool InlineAsm::ConstraintInfo::Parse(const StringRef &Str, } std::vector<InlineAsm::ConstraintInfo> -InlineAsm::ParseConstraints(const StringRef &Constraints) { +InlineAsm::ParseConstraints(StringRef Constraints) { std::vector<ConstraintInfo> Result; // Scan the constraints string. @@ -183,7 +183,7 @@ InlineAsm::ParseConstraints(const StringRef &Constraints) { /// Verify - Verify that the specified constraint string is reasonable for the /// specified function type, and otherwise validate the constraint string. -bool InlineAsm::Verify(const FunctionType *Ty, const StringRef &ConstStr) { +bool InlineAsm::Verify(const FunctionType *Ty, StringRef ConstStr) { if (Ty->isVarArg()) return false; std::vector<ConstraintInfo> Constraints = ParseConstraints(ConstStr); diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp index 279bc73..b03ee93 100644 --- a/lib/VMCore/Instructions.cpp +++ b/lib/VMCore/Instructions.cpp @@ -24,8 +24,6 @@ #include "llvm/Support/CallSite.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetData.h" - using namespace llvm; //===----------------------------------------------------------------------===// @@ -465,9 +463,11 @@ static Instruction *createMalloc(Instruction *InsertBefore, ArraySize = ConstantInt::get(IntPtrTy, 1); else if (ArraySize->getType() != IntPtrTy) { if (InsertBefore) - ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, false, "", InsertBefore); + ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, false, + "", InsertBefore); else - ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, false, "", InsertAtEnd); + ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, false, + "", InsertAtEnd); } if (!IsConstantOne(ArraySize)) { @@ -494,22 +494,21 @@ static Instruction *createMalloc(Instruction *InsertBefore, BasicBlock* BB = InsertBefore ? InsertBefore->getParent() : InsertAtEnd; Module* M = BB->getParent()->getParent(); const Type *BPTy = Type::getInt8PtrTy(BB->getContext()); - if (!MallocF) + Value *MallocFunc = MallocF; + if (!MallocFunc) // prototype malloc as "void *malloc(size_t)" - MallocF = cast<Function>(M->getOrInsertFunction("malloc", BPTy, - IntPtrTy, NULL)); - if (!MallocF->doesNotAlias(0)) MallocF->setDoesNotAlias(0); + MallocFunc = M->getOrInsertFunction("malloc", BPTy, IntPtrTy, NULL); const PointerType *AllocPtrType = PointerType::getUnqual(AllocTy); CallInst *MCall = NULL; Instruction *Result = NULL; if (InsertBefore) { - MCall = CallInst::Create(MallocF, AllocSize, "malloccall", InsertBefore); + MCall = CallInst::Create(MallocFunc, AllocSize, "malloccall", InsertBefore); Result = MCall; if (Result->getType() != AllocPtrType) // Create a cast instruction to convert to the right type... Result = new BitCastInst(MCall, AllocPtrType, Name, InsertBefore); } else { - MCall = CallInst::Create(MallocF, AllocSize, "malloccall"); + MCall = CallInst::Create(MallocFunc, AllocSize, "malloccall"); Result = MCall; if (Result->getType() != AllocPtrType) { InsertAtEnd->getInstList().push_back(MCall); @@ -518,6 +517,10 @@ static Instruction *createMalloc(Instruction *InsertBefore, } } MCall->setTailCall(); + if (Function *F = dyn_cast<Function>(MallocFunc)) { + MCall->setCallingConv(F->getCallingConv()); + if (!F->doesNotAlias(0)) F->setDoesNotAlias(0); + } assert(MCall->getType() != Type::getVoidTy(BB->getContext()) && "Malloc has void return type"); @@ -567,8 +570,7 @@ static Instruction* createFree(Value* Source, Instruction *InsertBefore, const Type *VoidTy = Type::getVoidTy(M->getContext()); const Type *IntPtrTy = Type::getInt8PtrTy(M->getContext()); // prototype free as "void free(void*)" - Constant *FreeFunc = M->getOrInsertFunction("free", VoidTy, IntPtrTy, NULL); - + Value *FreeFunc = M->getOrInsertFunction("free", VoidTy, IntPtrTy, NULL); CallInst* Result = NULL; Value *PtrCast = Source; if (InsertBefore) { @@ -581,6 +583,8 @@ static Instruction* createFree(Value* Source, Instruction *InsertBefore, Result = CallInst::Create(FreeFunc, PtrCast, ""); } Result->setTailCall(); + if (Function *F = dyn_cast<Function>(FreeFunc)) + Result->setCallingConv(F->getCallingConv()); return Result; } diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp index 4fadfed..24e715b 100644 --- a/lib/VMCore/Metadata.cpp +++ b/lib/VMCore/Metadata.cpp @@ -39,6 +39,17 @@ MDString *MDString::get(LLVMContext &Context, StringRef Str) { new MDString(Context, Entry.getKey()); } +MDString *MDString::get(LLVMContext &Context, const char *Str) { + LLVMContextImpl *pImpl = Context.pImpl; + StringMapEntry<MDString *> &Entry = + pImpl->MDStringCache.GetOrCreateValue(Str ? StringRef(Str) : StringRef()); + MDString *&S = Entry.getValue(); + if (S) return S; + + return S = + new MDString(Context, Entry.getKey()); +} + //===----------------------------------------------------------------------===// // MDNode implementation. // @@ -341,11 +352,11 @@ MDNode *MetadataContextImpl::getMD(unsigned MDKind, const Instruction *Inst) { /// getMDs - Get the metadata attached to an Instruction. void MetadataContextImpl:: getMDs(const Instruction *Inst, SmallVectorImpl<MDPairTy> &MDs) const { - MDStoreTy::iterator I = MetadataStore.find(Inst); + MDStoreTy::const_iterator I = MetadataStore.find(Inst); if (I == MetadataStore.end()) return; MDs.resize(I->second.size()); - for (MDMapTy::iterator MI = I->second.begin(), ME = I->second.end(); + for (MDMapTy::const_iterator MI = I->second.begin(), ME = I->second.end(); MI != ME; ++MI) // MD kinds are numbered from 1. MDs[MI->first - 1] = std::make_pair(MI->first, MI->second); diff --git a/lib/VMCore/Module.cpp b/lib/VMCore/Module.cpp index add2449..3efd3e3 100644 --- a/lib/VMCore/Module.cpp +++ b/lib/VMCore/Module.cpp @@ -31,8 +31,7 @@ using namespace llvm; // GlobalVariable *ilist_traits<GlobalVariable>::createSentinel() { - GlobalVariable *Ret = new GlobalVariable(getGlobalContext(), - Type::getInt32Ty(getGlobalContext()), + GlobalVariable *Ret = new GlobalVariable(Type::getInt32Ty(getGlobalContext()), false, GlobalValue::ExternalLinkage); // This should not be garbage monitored. LeakDetector::removeGarbageObject(Ret); @@ -56,7 +55,7 @@ template class SymbolTableListTraits<GlobalAlias, Module>; // Primitive Module methods. // -Module::Module(const StringRef &MID, LLVMContext& C) +Module::Module(StringRef MID, LLVMContext& C) : Context(C), ModuleID(MID), DataLayout("") { ValSymTab = new ValueSymbolTable(); TypeSymTab = new TypeSymbolTable(); @@ -115,7 +114,7 @@ Module::PointerSize Module::getPointerSize() const { /// getNamedValue - Return the first global value in the module with /// the specified name, of arbitrary type. This method returns null /// if a global with the specified name is not found. -GlobalValue *Module::getNamedValue(const StringRef &Name) const { +GlobalValue *Module::getNamedValue(StringRef Name) const { return cast_or_null<GlobalValue>(getValueSymbolTable().lookup(Name)); } @@ -128,7 +127,7 @@ GlobalValue *Module::getNamedValue(const StringRef &Name) const { // it. This is nice because it allows most passes to get away with not handling // the symbol table directly for this common task. // -Constant *Module::getOrInsertFunction(const StringRef &Name, +Constant *Module::getOrInsertFunction(StringRef Name, const FunctionType *Ty, AttrListPtr AttributeList) { // See if we have a definition for the specified function already. @@ -161,7 +160,7 @@ Constant *Module::getOrInsertFunction(const StringRef &Name, return F; } -Constant *Module::getOrInsertTargetIntrinsic(const StringRef &Name, +Constant *Module::getOrInsertTargetIntrinsic(StringRef Name, const FunctionType *Ty, AttrListPtr AttributeList) { // See if we have a definition for the specified function already. @@ -178,7 +177,7 @@ Constant *Module::getOrInsertTargetIntrinsic(const StringRef &Name, return F; } -Constant *Module::getOrInsertFunction(const StringRef &Name, +Constant *Module::getOrInsertFunction(StringRef Name, const FunctionType *Ty) { AttrListPtr AttributeList = AttrListPtr::get((AttributeWithIndex *)0, 0); return getOrInsertFunction(Name, Ty, AttributeList); @@ -189,7 +188,7 @@ Constant *Module::getOrInsertFunction(const StringRef &Name, // This version of the method takes a null terminated list of function // arguments, which makes it easier for clients to use. // -Constant *Module::getOrInsertFunction(const StringRef &Name, +Constant *Module::getOrInsertFunction(StringRef Name, AttrListPtr AttributeList, const Type *RetTy, ...) { va_list Args; @@ -208,7 +207,7 @@ Constant *Module::getOrInsertFunction(const StringRef &Name, AttributeList); } -Constant *Module::getOrInsertFunction(const StringRef &Name, +Constant *Module::getOrInsertFunction(StringRef Name, const Type *RetTy, ...) { va_list Args; va_start(Args, RetTy); @@ -229,7 +228,7 @@ Constant *Module::getOrInsertFunction(const StringRef &Name, // getFunction - Look up the specified function in the module symbol table. // If it does not exist, return null. // -Function *Module::getFunction(const StringRef &Name) const { +Function *Module::getFunction(StringRef Name) const { return dyn_cast_or_null<Function>(getNamedValue(Name)); } @@ -244,7 +243,7 @@ Function *Module::getFunction(const StringRef &Name) const { /// If AllowLocal is set to true, this function will return types that /// have an local. By default, these types are not returned. /// -GlobalVariable *Module::getGlobalVariable(const StringRef &Name, +GlobalVariable *Module::getGlobalVariable(StringRef Name, bool AllowLocal) const { if (GlobalVariable *Result = dyn_cast_or_null<GlobalVariable>(getNamedValue(Name))) @@ -259,7 +258,7 @@ GlobalVariable *Module::getGlobalVariable(const StringRef &Name, /// with a constantexpr cast to the right type. /// 3. Finally, if the existing global is the correct delclaration, return the /// existing global. -Constant *Module::getOrInsertGlobal(const StringRef &Name, const Type *Ty) { +Constant *Module::getOrInsertGlobal(StringRef Name, const Type *Ty) { // See if we have a definition for the specified global already. GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(getNamedValue(Name)); if (GV == 0) { @@ -286,21 +285,21 @@ Constant *Module::getOrInsertGlobal(const StringRef &Name, const Type *Ty) { // getNamedAlias - Look up the specified global in the module symbol table. // If it does not exist, return null. // -GlobalAlias *Module::getNamedAlias(const StringRef &Name) const { +GlobalAlias *Module::getNamedAlias(StringRef Name) const { return dyn_cast_or_null<GlobalAlias>(getNamedValue(Name)); } /// getNamedMetadata - Return the first NamedMDNode in the module with the /// specified name. This method returns null if a NamedMDNode with the //// specified name is not found. -NamedMDNode *Module::getNamedMetadata(const StringRef &Name) const { +NamedMDNode *Module::getNamedMetadata(StringRef Name) const { return dyn_cast_or_null<NamedMDNode>(getValueSymbolTable().lookup(Name)); } /// getOrInsertNamedMetadata - Return the first named MDNode in the module /// with the specified name. This method returns a new NamedMDNode if a /// NamedMDNode with the specified name is not found. -NamedMDNode *Module::getOrInsertNamedMetadata(const StringRef &Name) { +NamedMDNode *Module::getOrInsertNamedMetadata(StringRef Name) { NamedMDNode *NMD = dyn_cast_or_null<NamedMDNode>(getValueSymbolTable().lookup(Name)); if (!NMD) @@ -317,7 +316,7 @@ NamedMDNode *Module::getOrInsertNamedMetadata(const StringRef &Name) { // there is already an entry for this name, true is returned and the symbol // table is not modified. // -bool Module::addTypeName(const StringRef &Name, const Type *Ty) { +bool Module::addTypeName(StringRef Name, const Type *Ty) { TypeSymbolTable &ST = getTypeSymbolTable(); if (ST.lookup(Name)) return true; // Already in symtab... @@ -331,7 +330,7 @@ bool Module::addTypeName(const StringRef &Name, const Type *Ty) { /// getTypeByName - Return the type with the specified name in this module, or /// null if there is none by that name. -const Type *Module::getTypeByName(const StringRef &Name) const { +const Type *Module::getTypeByName(StringRef Name) const { const TypeSymbolTable &ST = getTypeSymbolTable(); return cast_or_null<Type>(ST.lookup(Name)); } @@ -377,14 +376,14 @@ void Module::dropAllReferences() { I->dropAllReferences(); } -void Module::addLibrary(const StringRef& Lib) { +void Module::addLibrary(StringRef Lib) { for (Module::lib_iterator I = lib_begin(), E = lib_end(); I != E; ++I) if (*I == Lib) return; LibraryList.push_back(Lib); } -void Module::removeLibrary(const StringRef& Lib) { +void Module::removeLibrary(StringRef Lib) { LibraryListType::iterator I = LibraryList.begin(); LibraryListType::iterator E = LibraryList.end(); for (;I != E; ++I) diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp index a17eed8..1232fe2 100644 --- a/lib/VMCore/Pass.cpp +++ b/lib/VMCore/Pass.cpp @@ -149,7 +149,7 @@ public: return I != PassInfoMap.end() ? I->second : 0; } - const PassInfo *GetPassInfo(const StringRef &Arg) const { + const PassInfo *GetPassInfo(StringRef Arg) const { StringMapType::const_iterator I = PassInfoStringMap.find(Arg); return I != PassInfoStringMap.end() ? I->second : 0; } @@ -238,7 +238,7 @@ const PassInfo *Pass::lookupPassInfo(intptr_t TI) { return getPassRegistrar()->GetPassInfo(TI); } -const PassInfo *Pass::lookupPassInfo(const StringRef &Arg) { +const PassInfo *Pass::lookupPassInfo(StringRef Arg) { return getPassRegistrar()->GetPassInfo(Arg); } diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp index eb097ed..d3d61f5 100644 --- a/lib/VMCore/PassManager.cpp +++ b/lib/VMCore/PassManager.cpp @@ -746,7 +746,7 @@ void PMDataManager::removeNotPreservedAnalysis(Pass *P) { } /// Remove analysis passes that are not used any longer -void PMDataManager::removeDeadPasses(Pass *P, const StringRef &Msg, +void PMDataManager::removeDeadPasses(Pass *P, StringRef Msg, enum PassDebuggingString DBG_STR) { SmallVector<Pass *, 12> DeadPasses; @@ -768,7 +768,7 @@ void PMDataManager::removeDeadPasses(Pass *P, const StringRef &Msg, freePass(*I, Msg, DBG_STR); } -void PMDataManager::freePass(Pass *P, const StringRef &Msg, +void PMDataManager::freePass(Pass *P, StringRef Msg, enum PassDebuggingString DBG_STR) { dumpPassInfo(P, FREEING_MSG, DBG_STR, Msg); @@ -972,7 +972,7 @@ void PMDataManager::dumpPassArguments() const { void PMDataManager::dumpPassInfo(Pass *P, enum PassDebuggingString S1, enum PassDebuggingString S2, - const StringRef &Msg) { + StringRef Msg) { if (PassDebugging < Executions) return; errs() << (void*)this << std::string(getDepth()*2+1, ' '); @@ -1028,7 +1028,7 @@ void PMDataManager::dumpPreservedSet(const Pass *P) const { dumpAnalysisUsage("Preserved", P, analysisUsage.getPreservedSet()); } -void PMDataManager::dumpAnalysisUsage(const StringRef &Msg, const Pass *P, +void PMDataManager::dumpAnalysisUsage(StringRef Msg, const Pass *P, const AnalysisUsage::VectorType &Set) const { assert(PassDebugging >= Details); if (Set.empty()) diff --git a/lib/VMCore/TypeSymbolTable.cpp b/lib/VMCore/TypeSymbolTable.cpp index 3440a77..0d0cdf5 100644 --- a/lib/VMCore/TypeSymbolTable.cpp +++ b/lib/VMCore/TypeSymbolTable.cpp @@ -31,7 +31,7 @@ TypeSymbolTable::~TypeSymbolTable() { } } -std::string TypeSymbolTable::getUniqueName(const StringRef &BaseName) const { +std::string TypeSymbolTable::getUniqueName(StringRef BaseName) const { std::string TryName = BaseName; const_iterator End = tmap.end(); @@ -43,7 +43,7 @@ std::string TypeSymbolTable::getUniqueName(const StringRef &BaseName) const { } // lookup a type by name - returns null on failure -Type* TypeSymbolTable::lookup(const StringRef &Name) const { +Type* TypeSymbolTable::lookup(StringRef Name) const { const_iterator TI = tmap.find(Name); Type* result = 0; if (TI != tmap.end()) @@ -51,7 +51,6 @@ Type* TypeSymbolTable::lookup(const StringRef &Name) const { return result; } - // remove - Remove a type from the symbol table... Type* TypeSymbolTable::remove(iterator Entry) { assert(Entry != tmap.end() && "Invalid entry to remove!"); @@ -80,7 +79,7 @@ Type* TypeSymbolTable::remove(iterator Entry) { // insert - Insert a type into the symbol table with the specified name... -void TypeSymbolTable::insert(const StringRef &Name, const Type* T) { +void TypeSymbolTable::insert(StringRef Name, const Type* T) { assert(T && "Can't insert null type into symbol table!"); if (tmap.insert(std::make_pair(Name, T)).second) { diff --git a/lib/VMCore/ValueSymbolTable.cpp b/lib/VMCore/ValueSymbolTable.cpp index 7765a98..9d39a50 100644 --- a/lib/VMCore/ValueSymbolTable.cpp +++ b/lib/VMCore/ValueSymbolTable.cpp @@ -77,7 +77,7 @@ void ValueSymbolTable::removeValueName(ValueName *V) { /// createValueName - This method attempts to create a value name and insert /// it into the symbol table with the specified name. If it conflicts, it /// auto-renames the name and returns that instead. -ValueName *ValueSymbolTable::createValueName(const StringRef &Name, Value *V) { +ValueName *ValueSymbolTable::createValueName(StringRef Name, Value *V) { // In the common case, the name is not already in the symbol table. ValueName &Entry = vmap.GetOrCreateValue(Name); if (Entry.getValue() == 0) { diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp index 5990e48..7ab7b15 100644 --- a/lib/VMCore/Verifier.cpp +++ b/lib/VMCore/Verifier.cpp @@ -780,9 +780,13 @@ void Verifier::visitSwitchInst(SwitchInst &SI) { // Check to make sure that all of the constants in the switch instruction // have the same type as the switched-on value. const Type *SwitchTy = SI.getCondition()->getType(); - for (unsigned i = 1, e = SI.getNumCases(); i != e; ++i) + SmallPtrSet<ConstantInt*, 32> Constants; + for (unsigned i = 1, e = SI.getNumCases(); i != e; ++i) { Assert1(SI.getCaseValue(i)->getType() == SwitchTy, "Switch constants must all be same type as switch value!", &SI); + Assert2(Constants.insert(SI.getCaseValue(i)), + "Duplicate integer as switch case", &SI, SI.getCaseValue(i)); + } visitTerminatorInst(SI); } |