diff options
author | ed <ed@FreeBSD.org> | 2009-06-22 08:08:12 +0000 |
---|---|---|
committer | ed <ed@FreeBSD.org> | 2009-06-22 08:08:12 +0000 |
commit | a4c19d68f13cf0a83bc0da53bd6d547fcaf635fe (patch) | |
tree | 86c1bc482baa6c81fc70b8d715153bfa93377186 /lib | |
parent | db89e312d968c258aba3c79c1c398f5fb19267a3 (diff) | |
download | FreeBSD-src-a4c19d68f13cf0a83bc0da53bd6d547fcaf635fe.zip FreeBSD-src-a4c19d68f13cf0a83bc0da53bd6d547fcaf635fe.tar.gz |
Update LLVM sources to r73879.
Diffstat (limited to 'lib')
161 files changed, 5683 insertions, 1930 deletions
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 261c635..5aa4d56 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -365,7 +365,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) { if (TD && CE->getOpcode() == Instruction::IntToPtr) { Constant *Input = CE->getOperand(0); - unsigned InWidth = Input->getType()->getPrimitiveSizeInBits(); + unsigned InWidth = Input->getType()->getScalarSizeInBits(); if (TD->getPointerSizeInBits() < InWidth) { Constant *Mask = ConstantInt::get(APInt::getLowBitsSet(InWidth, @@ -384,7 +384,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) { if (TD && TD->getPointerSizeInBits() <= - CE->getType()->getPrimitiveSizeInBits()) { + CE->getType()->getScalarSizeInBits()) { if (CE->getOpcode() == Instruction::PtrToInt) { Constant *Input = CE->getOperand(0); Constant *C = FoldBitCast(Input, DestTy, *TD); diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp index 6bdb64c..adda5ee 100644 --- a/lib/Analysis/DebugInfo.cpp +++ b/lib/Analysis/DebugInfo.cpp @@ -352,7 +352,7 @@ Constant *DIFactory::GetStringConstant(const std::string &String) { const PointerType *DestTy = PointerType::getUnqual(Type::Int8Ty); - // If empty string then use a sbyte* null instead. + // If empty string then use a i8* null instead. if (String.empty()) return Slot = ConstantPointerNull::get(DestTy); diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index 7af9130..6a53a83 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -82,11 +82,8 @@ static bool containsAddRecFromDifferentLoop(SCEVHandle S, Loop *L) { /// outer loop of the current loop. static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L, Loop *UseLoop, SCEVHandle &Start, SCEVHandle &Stride, - bool &isSigned, ScalarEvolution *SE, DominatorTree *DT) { SCEVHandle TheAddRec = Start; // Initialize to zero. - bool isSExt = false; - bool isZExt = false; // If the outer level is an AddExpr, the operands are all start values except // for a nested AddRecExpr. @@ -101,13 +98,6 @@ static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L, Loop *UseLoop, } else { Start = SE->getAddExpr(Start, AE->getOperand(i)); } - - } else if (const SCEVZeroExtendExpr *Z = dyn_cast<SCEVZeroExtendExpr>(SH)) { - TheAddRec = Z->getOperand(); - isZExt = true; - } else if (const SCEVSignExtendExpr *S = dyn_cast<SCEVSignExtendExpr>(SH)) { - TheAddRec = S->getOperand(); - isSExt = true; } else if (isa<SCEVAddRecExpr>(SH)) { TheAddRec = SH; } else { @@ -120,9 +110,8 @@ static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L, Loop *UseLoop, // Use getSCEVAtScope to attempt to simplify other loops out of // the picture. SCEVHandle AddRecStart = AddRec->getStart(); - SCEVHandle BetterAddRecStart = SE->getSCEVAtScope(AddRecStart, UseLoop); - if (!isa<SCEVCouldNotCompute>(BetterAddRecStart)) - AddRecStart = BetterAddRecStart; + AddRecStart = SE->getSCEVAtScope(AddRecStart, UseLoop); + SCEVHandle AddRecStride = AddRec->getStepRecurrence(*SE); // FIXME: If Start contains an SCEVAddRecExpr from a different loop, other // than an outer loop of the current loop, reject it. LSR has no concept of @@ -131,24 +120,20 @@ static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L, Loop *UseLoop, if (containsAddRecFromDifferentLoop(AddRecStart, L)) return false; - if (isSExt || isZExt) - Start = SE->getTruncateExpr(Start, AddRec->getType()); - Start = SE->getAddExpr(Start, AddRecStart); - if (!isa<SCEVConstant>(AddRec->getStepRecurrence(*SE))) { - // If stride is an instruction, make sure it dominates the loop preheader. - // Otherwise we could end up with a use before def situation. + // If stride is an instruction, make sure it dominates the loop preheader. + // Otherwise we could end up with a use before def situation. + if (!isa<SCEVConstant>(AddRecStride)) { BasicBlock *Preheader = L->getLoopPreheader(); - if (!AddRec->getStepRecurrence(*SE)->dominates(Preheader, DT)) + if (!AddRecStride->dominates(Preheader, DT)) return false; DOUT << "[" << L->getHeader()->getName() << "] Variable stride: " << *AddRec << "\n"; } - Stride = AddRec->getStepRecurrence(*SE); - isSigned = isSExt; + Stride = AddRecStride; return true; } @@ -218,9 +203,8 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { Loop *UseLoop = LI->getLoopFor(I->getParent()); SCEVHandle Start = SE->getIntegerSCEV(0, ISE->getType()); SCEVHandle Stride = Start; - bool isSigned = false; // Arbitrary initial value - pacifies compiler. - if (!getSCEVStartAndStride(ISE, L, UseLoop, Start, Stride, isSigned, SE, DT)) + if (!getSCEVStartAndStride(ISE, L, UseLoop, Start, Stride, SE, DT)) return false; // Non-reducible symbolic expression, bail out. SmallPtrSet<Instruction *, 4> UniqueUsers; @@ -271,11 +255,11 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { // The value used will be incremented by the stride more than we are // expecting, so subtract this off. SCEVHandle NewStart = SE->getMinusSCEV(Start, Stride); - StrideUses->addUser(NewStart, User, I, isSigned); + StrideUses->addUser(NewStart, User, I); StrideUses->Users.back().setIsUseOfPostIncrementedValue(true); DOUT << " USING POSTINC SCEV, START=" << *NewStart<< "\n"; } else { - StrideUses->addUser(Start, User, I, isSigned); + StrideUses->addUser(Start, User, I); } } } @@ -312,7 +296,6 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { /// getReplacementExpr - Return a SCEV expression which computes the /// value of the OperandValToReplace of the given IVStrideUse. SCEVHandle IVUsers::getReplacementExpr(const IVStrideUse &U) const { - const Type *UseTy = U.getOperandValToReplace()->getType(); // Start with zero. SCEVHandle RetVal = SE->getIntegerSCEV(0, U.getParent()->Stride->getType()); // Create the basic add recurrence. @@ -326,17 +309,9 @@ SCEVHandle IVUsers::getReplacementExpr(const IVStrideUse &U) const { // Evaluate the expression out of the loop, if possible. if (!L->contains(U.getUser()->getParent())) { SCEVHandle ExitVal = SE->getSCEVAtScope(RetVal, L->getParentLoop()); - if (!isa<SCEVCouldNotCompute>(ExitVal) && ExitVal->isLoopInvariant(L)) + if (ExitVal->isLoopInvariant(L)) RetVal = ExitVal; } - // Promote the result to the type of the use. - if (SE->getTypeSizeInBits(RetVal->getType()) != - SE->getTypeSizeInBits(UseTy)) { - if (U.isSigned()) - RetVal = SE->getSignExtendExpr(RetVal, UseTy); - else - RetVal = SE->getZeroExtendExpr(RetVal, UseTy); - } return RetVal; } diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 98ab6f4..68aa595 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -68,6 +68,7 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" #include "llvm/Target/TargetData.h" #include "llvm/Support/CommandLine.h" @@ -132,7 +133,8 @@ bool SCEV::isOne() const { return false; } -SCEVCouldNotCompute::SCEVCouldNotCompute() : SCEV(scCouldNotCompute) {} +SCEVCouldNotCompute::SCEVCouldNotCompute(const ScalarEvolution* p) : + SCEV(scCouldNotCompute, p) {} SCEVCouldNotCompute::~SCEVCouldNotCompute() {} bool SCEVCouldNotCompute::isLoopInvariant(const Loop *L) const { @@ -178,7 +180,7 @@ SCEVConstant::~SCEVConstant() { SCEVHandle ScalarEvolution::getConstant(ConstantInt *V) { SCEVConstant *&R = (*SCEVConstants)[V]; - if (R == 0) R = new SCEVConstant(V); + if (R == 0) R = new SCEVConstant(V, this); return R; } @@ -186,6 +188,11 @@ SCEVHandle ScalarEvolution::getConstant(const APInt& Val) { return getConstant(ConstantInt::get(Val)); } +SCEVHandle +ScalarEvolution::getConstant(const Type *Ty, uint64_t V, bool isSigned) { + return getConstant(ConstantInt::get(cast<IntegerType>(Ty), V, isSigned)); +} + const Type *SCEVConstant::getType() const { return V->getType(); } void SCEVConstant::print(raw_ostream &OS) const { @@ -193,8 +200,9 @@ void SCEVConstant::print(raw_ostream &OS) const { } SCEVCastExpr::SCEVCastExpr(unsigned SCEVTy, - const SCEVHandle &op, const Type *ty) - : SCEV(SCEVTy), Op(op), Ty(ty) {} + const SCEVHandle &op, const Type *ty, + const ScalarEvolution* p) + : SCEV(SCEVTy, p), Op(op), Ty(ty) {} SCEVCastExpr::~SCEVCastExpr() {} @@ -208,8 +216,9 @@ bool SCEVCastExpr::dominates(BasicBlock *BB, DominatorTree *DT) const { static ManagedStatic<std::map<std::pair<const SCEV*, const Type*>, SCEVTruncateExpr*> > SCEVTruncates; -SCEVTruncateExpr::SCEVTruncateExpr(const SCEVHandle &op, const Type *ty) - : SCEVCastExpr(scTruncate, op, ty) { +SCEVTruncateExpr::SCEVTruncateExpr(const SCEVHandle &op, const Type *ty, + const ScalarEvolution* p) + : SCEVCastExpr(scTruncate, op, ty, p) { assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) && (Ty->isInteger() || isa<PointerType>(Ty)) && "Cannot truncate non-integer value!"); @@ -229,8 +238,9 @@ void SCEVTruncateExpr::print(raw_ostream &OS) const { static ManagedStatic<std::map<std::pair<const SCEV*, const Type*>, SCEVZeroExtendExpr*> > SCEVZeroExtends; -SCEVZeroExtendExpr::SCEVZeroExtendExpr(const SCEVHandle &op, const Type *ty) - : SCEVCastExpr(scZeroExtend, op, ty) { +SCEVZeroExtendExpr::SCEVZeroExtendExpr(const SCEVHandle &op, const Type *ty, + const ScalarEvolution* p) + : SCEVCastExpr(scZeroExtend, op, ty, p) { assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) && (Ty->isInteger() || isa<PointerType>(Ty)) && "Cannot zero extend non-integer value!"); @@ -250,8 +260,9 @@ void SCEVZeroExtendExpr::print(raw_ostream &OS) const { static ManagedStatic<std::map<std::pair<const SCEV*, const Type*>, SCEVSignExtendExpr*> > SCEVSignExtends; -SCEVSignExtendExpr::SCEVSignExtendExpr(const SCEVHandle &op, const Type *ty) - : SCEVCastExpr(scSignExtend, op, ty) { +SCEVSignExtendExpr::SCEVSignExtendExpr(const SCEVHandle &op, const Type *ty, + const ScalarEvolution* p) + : SCEVCastExpr(scSignExtend, op, ty, p) { assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) && (Ty->isInteger() || isa<PointerType>(Ty)) && "Cannot sign extend non-integer value!"); @@ -293,7 +304,7 @@ replaceSymbolicValuesWithConcrete(const SCEVHandle &Sym, SCEVHandle H = getOperand(i)->replaceSymbolicValuesWithConcrete(Sym, Conc, SE); if (H != getOperand(i)) { - std::vector<SCEVHandle> NewOps; + SmallVector<SCEVHandle, 8> NewOps; NewOps.reserve(getNumOperands()); for (unsigned j = 0; j != i; ++j) NewOps.push_back(getOperand(j)); @@ -373,7 +384,7 @@ replaceSymbolicValuesWithConcrete(const SCEVHandle &Sym, SCEVHandle H = getOperand(i)->replaceSymbolicValuesWithConcrete(Sym, Conc, SE); if (H != getOperand(i)) { - std::vector<SCEVHandle> NewOps; + SmallVector<SCEVHandle, 8> NewOps; NewOps.reserve(getNumOperands()); for (unsigned j = 0; j != i; ++j) NewOps.push_back(getOperand(j)); @@ -504,9 +515,18 @@ namespace { return false; } - // Constant sorting doesn't matter since they'll be folded. - if (isa<SCEVConstant>(LHS)) - return false; + // Compare constant values. + if (const SCEVConstant *LC = dyn_cast<SCEVConstant>(LHS)) { + const SCEVConstant *RC = cast<SCEVConstant>(RHS); + return LC->getValue()->getValue().ult(RC->getValue()->getValue()); + } + + // Compare addrec loop depths. + if (const SCEVAddRecExpr *LA = dyn_cast<SCEVAddRecExpr>(LHS)) { + const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS); + if (LA->getLoop()->getLoopDepth() != RA->getLoop()->getLoopDepth()) + return LA->getLoop()->getLoopDepth() < RA->getLoop()->getLoopDepth(); + } // Lexicographically compare n-ary expressions. if (const SCEVNAryExpr *LC = dyn_cast<SCEVNAryExpr>(LHS)) { @@ -558,7 +578,7 @@ namespace { /// this to depend on where the addresses of various SCEV objects happened to /// land in memory. /// -static void GroupByComplexity(std::vector<SCEVHandle> &Ops, +static void GroupByComplexity(SmallVectorImpl<SCEVHandle> &Ops, LoopInfo *LI) { if (Ops.size() < 2) return; // Noop if (Ops.size() == 2) { @@ -763,17 +783,16 @@ SCEVHandle ScalarEvolution::getTruncateExpr(const SCEVHandle &Op, if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op)) return getTruncateOrZeroExtend(SZ->getOperand(), Ty); - // If the input value is a chrec scev made out of constants, truncate - // all of the constants. + // If the input value is a chrec scev, truncate the chrec's operands. if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) { - std::vector<SCEVHandle> Operands; + SmallVector<SCEVHandle, 4> Operands; for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) Operands.push_back(getTruncateExpr(AddRec->getOperand(i), Ty)); return getAddRecExpr(Operands, AddRec->getLoop()); } SCEVTruncateExpr *&Result = (*SCEVTruncates)[std::make_pair(Op, Ty)]; - if (Result == 0) Result = new SCEVTruncateExpr(Op, Ty); + if (Result == 0) Result = new SCEVTruncateExpr(Op, Ty, this); return Result; } @@ -861,7 +880,7 @@ SCEVHandle ScalarEvolution::getZeroExtendExpr(const SCEVHandle &Op, } SCEVZeroExtendExpr *&Result = (*SCEVZeroExtends)[std::make_pair(Op, Ty)]; - if (Result == 0) Result = new SCEVZeroExtendExpr(Op, Ty); + if (Result == 0) Result = new SCEVZeroExtendExpr(Op, Ty, this); return Result; } @@ -933,7 +952,7 @@ SCEVHandle ScalarEvolution::getSignExtendExpr(const SCEVHandle &Op, } SCEVSignExtendExpr *&Result = (*SCEVSignExtends)[std::make_pair(Op, Ty)]; - if (Result == 0) Result = new SCEVSignExtendExpr(Op, Ty); + if (Result == 0) Result = new SCEVSignExtendExpr(Op, Ty, this); return Result; } @@ -979,9 +998,105 @@ SCEVHandle ScalarEvolution::getAnyExtendExpr(const SCEVHandle &Op, return ZExt; } +/// CollectAddOperandsWithScales - Process the given Ops list, which is +/// a list of operands to be added under the given scale, update the given +/// map. This is a helper function for getAddRecExpr. As an example of +/// what it does, given a sequence of operands that would form an add +/// expression like this: +/// +/// m + n + 13 + (A * (o + p + (B * q + m + 29))) + r + (-1 * r) +/// +/// where A and B are constants, update the map with these values: +/// +/// (m, 1+A*B), (n, 1), (o, A), (p, A), (q, A*B), (r, 0) +/// +/// and add 13 + A*B*29 to AccumulatedConstant. +/// This will allow getAddRecExpr to produce this: +/// +/// 13+A*B*29 + n + (m * (1+A*B)) + ((o + p) * A) + (q * A*B) +/// +/// This form often exposes folding opportunities that are hidden in +/// the original operand list. +/// +/// Return true iff it appears that any interesting folding opportunities +/// may be exposed. This helps getAddRecExpr short-circuit extra work in +/// the common case where no interesting opportunities are present, and +/// is also used as a check to avoid infinite recursion. +/// +static bool +CollectAddOperandsWithScales(DenseMap<SCEVHandle, APInt> &M, + SmallVector<SCEVHandle, 8> &NewOps, + APInt &AccumulatedConstant, + const SmallVectorImpl<SCEVHandle> &Ops, + const APInt &Scale, + ScalarEvolution &SE) { + bool Interesting = false; + + // Iterate over the add operands. + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]); + if (Mul && isa<SCEVConstant>(Mul->getOperand(0))) { + APInt NewScale = + Scale * cast<SCEVConstant>(Mul->getOperand(0))->getValue()->getValue(); + if (Mul->getNumOperands() == 2 && isa<SCEVAddExpr>(Mul->getOperand(1))) { + // A multiplication of a constant with another add; recurse. + Interesting |= + CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant, + cast<SCEVAddExpr>(Mul->getOperand(1)) + ->getOperands(), + NewScale, SE); + } else { + // A multiplication of a constant with some other value. Update + // the map. + SmallVector<SCEVHandle, 4> MulOps(Mul->op_begin()+1, Mul->op_end()); + SCEVHandle Key = SE.getMulExpr(MulOps); + std::pair<DenseMap<SCEVHandle, APInt>::iterator, bool> Pair = + M.insert(std::make_pair(Key, APInt())); + if (Pair.second) { + Pair.first->second = NewScale; + NewOps.push_back(Pair.first->first); + } else { + Pair.first->second += NewScale; + // The map already had an entry for this value, which may indicate + // a folding opportunity. + Interesting = true; + } + } + } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) { + // Pull a buried constant out to the outside. + if (Scale != 1 || AccumulatedConstant != 0 || C->isZero()) + Interesting = true; + AccumulatedConstant += Scale * C->getValue()->getValue(); + } else { + // An ordinary operand. Update the map. + std::pair<DenseMap<SCEVHandle, APInt>::iterator, bool> Pair = + M.insert(std::make_pair(Ops[i], APInt())); + if (Pair.second) { + Pair.first->second = Scale; + NewOps.push_back(Pair.first->first); + } else { + Pair.first->second += Scale; + // The map already had an entry for this value, which may indicate + // a folding opportunity. + Interesting = true; + } + } + } + + return Interesting; +} + +namespace { + struct APIntCompare { + bool operator()(const APInt &LHS, const APInt &RHS) const { + return LHS.ult(RHS); + } + }; +} + /// getAddExpr - Get a canonical add expression, or something simpler if /// possible. -SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) { +SCEVHandle ScalarEvolution::getAddExpr(SmallVectorImpl<SCEVHandle> &Ops) { assert(!Ops.empty() && "Cannot get empty add!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG @@ -1001,11 +1116,10 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) { assert(Idx < Ops.size()); while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { // We found two constants, fold them together! - ConstantInt *Fold = ConstantInt::get(LHSC->getValue()->getValue() + - RHSC->getValue()->getValue()); - Ops[0] = getConstant(Fold); + Ops[0] = getConstant(LHSC->getValue()->getValue() + + RHSC->getValue()->getValue()); + if (Ops.size() == 2) return Ops[0]; Ops.erase(Ops.begin()+1); // Erase the folded element - if (Ops.size() == 1) return Ops[0]; LHSC = cast<SCEVConstant>(Ops[0]); } @@ -1043,7 +1157,7 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) { const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(Ops[Idx]); const Type *DstType = Trunc->getType(); const Type *SrcType = Trunc->getOperand()->getType(); - std::vector<SCEVHandle> LargeOps; + SmallVector<SCEVHandle, 8> LargeOps; bool Ok = true; // Check all the operands to see if they can be represented in the // source type of the truncate. @@ -1059,7 +1173,7 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) { // is much more likely to be foldable here. LargeOps.push_back(getSignExtendExpr(C, SrcType)); } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) { - std::vector<SCEVHandle> LargeMulOps; + SmallVector<SCEVHandle, 8> LargeMulOps; for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) { if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(M->getOperand(j))) { @@ -1120,6 +1234,38 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) { while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr) ++Idx; + // Check to see if there are any folding opportunities present with + // operands multiplied by constant values. + if (Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx])) { + uint64_t BitWidth = getTypeSizeInBits(Ty); + DenseMap<SCEVHandle, APInt> M; + SmallVector<SCEVHandle, 8> NewOps; + APInt AccumulatedConstant(BitWidth, 0); + if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant, + Ops, APInt(BitWidth, 1), *this)) { + // Some interesting folding opportunity is present, so its worthwhile to + // re-generate the operands list. Group the operands by constant scale, + // to avoid multiplying by the same constant scale multiple times. + std::map<APInt, SmallVector<SCEVHandle, 4>, APIntCompare> MulOpLists; + for (SmallVector<SCEVHandle, 8>::iterator I = NewOps.begin(), + E = NewOps.end(); I != E; ++I) + MulOpLists[M.find(*I)->second].push_back(*I); + // Re-generate the operands list. + Ops.clear(); + if (AccumulatedConstant != 0) + Ops.push_back(getConstant(AccumulatedConstant)); + for (std::map<APInt, SmallVector<SCEVHandle, 4>, APIntCompare>::iterator I = + MulOpLists.begin(), E = MulOpLists.end(); I != E; ++I) + if (I->first != 0) + Ops.push_back(getMulExpr(getConstant(I->first), getAddExpr(I->second))); + if (Ops.empty()) + return getIntegerSCEV(0, Ty); + if (Ops.size() == 1) + return Ops[0]; + return getAddExpr(Ops); + } + } + // If we are adding something to a multiply expression, make sure the // something is not already an operand of the multiply. If so, merge it into // the multiply. @@ -1128,13 +1274,13 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) { for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) { const SCEV *MulOpSCEV = Mul->getOperand(MulOp); for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp) - if (MulOpSCEV == Ops[AddOp] && !isa<SCEVConstant>(MulOpSCEV)) { + if (MulOpSCEV == Ops[AddOp] && !isa<SCEVConstant>(Ops[AddOp])) { // Fold W + X + (X * Y * Z) --> W + (X * ((Y*Z)+1)) SCEVHandle InnerMul = Mul->getOperand(MulOp == 0); if (Mul->getNumOperands() != 2) { // If the multiply has more than two operands, we must get the // Y*Z term. - std::vector<SCEVHandle> MulOps(Mul->op_begin(), Mul->op_end()); + SmallVector<SCEVHandle, 4> MulOps(Mul->op_begin(), Mul->op_end()); MulOps.erase(MulOps.begin()+MulOp); InnerMul = getMulExpr(MulOps); } @@ -1166,13 +1312,13 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) { // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E)) SCEVHandle InnerMul1 = Mul->getOperand(MulOp == 0); if (Mul->getNumOperands() != 2) { - std::vector<SCEVHandle> MulOps(Mul->op_begin(), Mul->op_end()); + SmallVector<SCEVHandle, 4> MulOps(Mul->op_begin(), Mul->op_end()); MulOps.erase(MulOps.begin()+MulOp); InnerMul1 = getMulExpr(MulOps); } SCEVHandle InnerMul2 = OtherMul->getOperand(OMulOp == 0); if (OtherMul->getNumOperands() != 2) { - std::vector<SCEVHandle> MulOps(OtherMul->op_begin(), + SmallVector<SCEVHandle, 4> MulOps(OtherMul->op_begin(), OtherMul->op_end()); MulOps.erase(MulOps.begin()+OMulOp); InnerMul2 = getMulExpr(MulOps); @@ -1199,7 +1345,7 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) { for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) { // Scan all of the other operands to this add and add them to the vector if // they are loop invariant w.r.t. the recurrence. - std::vector<SCEVHandle> LIOps; + SmallVector<SCEVHandle, 8> LIOps; const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]); for (unsigned i = 0, e = Ops.size(); i != e; ++i) if (Ops[i]->isLoopInvariant(AddRec->getLoop())) { @@ -1213,7 +1359,8 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) { // NLI + LI + {Start,+,Step} --> NLI + {LI+Start,+,Step} LIOps.push_back(AddRec->getStart()); - std::vector<SCEVHandle> AddRecOps(AddRec->op_begin(), AddRec->op_end()); + SmallVector<SCEVHandle, 4> AddRecOps(AddRec->op_begin(), + AddRec->op_end()); AddRecOps[0] = getAddExpr(LIOps); SCEVHandle NewRec = getAddRecExpr(AddRecOps, AddRec->getLoop()); @@ -1238,7 +1385,7 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) { const SCEVAddRecExpr *OtherAddRec = cast<SCEVAddRecExpr>(Ops[OtherIdx]); if (AddRec->getLoop() == OtherAddRec->getLoop()) { // Other + {A,+,B} + {C,+,D} --> Other + {A+C,+,B+D} - std::vector<SCEVHandle> NewOps(AddRec->op_begin(), AddRec->op_end()); + SmallVector<SCEVHandle, 4> NewOps(AddRec->op_begin(), AddRec->op_end()); for (unsigned i = 0, e = OtherAddRec->getNumOperands(); i != e; ++i) { if (i >= NewOps.size()) { NewOps.insert(NewOps.end(), OtherAddRec->op_begin()+i, @@ -1267,14 +1414,14 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) { std::vector<const SCEV*> SCEVOps(Ops.begin(), Ops.end()); SCEVCommutativeExpr *&Result = (*SCEVCommExprs)[std::make_pair(scAddExpr, SCEVOps)]; - if (Result == 0) Result = new SCEVAddExpr(Ops); + if (Result == 0) Result = new SCEVAddExpr(Ops, this); return Result; } /// getMulExpr - Get a canonical multiply expression, or something simpler if /// possible. -SCEVHandle ScalarEvolution::getMulExpr(std::vector<SCEVHandle> &Ops) { +SCEVHandle ScalarEvolution::getMulExpr(SmallVectorImpl<SCEVHandle> &Ops) { assert(!Ops.empty() && "Cannot get empty mul!"); #ifndef NDEBUG for (unsigned i = 1, e = Ops.size(); i != e; ++i) @@ -1355,7 +1502,7 @@ SCEVHandle ScalarEvolution::getMulExpr(std::vector<SCEVHandle> &Ops) { for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) { // Scan all of the other operands to this mul and add them to the vector if // they are loop invariant w.r.t. the recurrence. - std::vector<SCEVHandle> LIOps; + SmallVector<SCEVHandle, 8> LIOps; const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]); for (unsigned i = 0, e = Ops.size(); i != e; ++i) if (Ops[i]->isLoopInvariant(AddRec->getLoop())) { @@ -1367,7 +1514,7 @@ SCEVHandle ScalarEvolution::getMulExpr(std::vector<SCEVHandle> &Ops) { // If we found some loop invariants, fold them into the recurrence. if (!LIOps.empty()) { // NLI * LI * {Start,+,Step} --> NLI * {LI*Start,+,LI*Step} - std::vector<SCEVHandle> NewOps; + SmallVector<SCEVHandle, 4> NewOps; NewOps.reserve(AddRec->getNumOperands()); if (LIOps.size() == 1) { const SCEV *Scale = LIOps[0]; @@ -1375,7 +1522,7 @@ SCEVHandle ScalarEvolution::getMulExpr(std::vector<SCEVHandle> &Ops) { NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i))); } else { for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) { - std::vector<SCEVHandle> MulOps(LIOps); + SmallVector<SCEVHandle, 4> MulOps(LIOps.begin(), LIOps.end()); MulOps.push_back(AddRec->getOperand(i)); NewOps.push_back(getMulExpr(MulOps)); } @@ -1433,7 +1580,7 @@ SCEVHandle ScalarEvolution::getMulExpr(std::vector<SCEVHandle> &Ops) { SCEVCommutativeExpr *&Result = (*SCEVCommExprs)[std::make_pair(scMulExpr, SCEVOps)]; if (Result == 0) - Result = new SCEVMulExpr(Ops); + Result = new SCEVMulExpr(Ops, this); return Result; } @@ -1473,14 +1620,14 @@ SCEVHandle ScalarEvolution::getUDivExpr(const SCEVHandle &LHS, getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy), getZeroExtendExpr(Step, ExtTy), AR->getLoop())) { - std::vector<SCEVHandle> Operands; + SmallVector<SCEVHandle, 4> Operands; for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i) Operands.push_back(getUDivExpr(AR->getOperand(i), RHS)); return getAddRecExpr(Operands, AR->getLoop()); } // (A*B)/C --> A*(B/C) if safe and B/C can be folded. if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) { - std::vector<SCEVHandle> Operands; + SmallVector<SCEVHandle, 4> Operands; for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) Operands.push_back(getZeroExtendExpr(M->getOperand(i), ExtTy)); if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands)) @@ -1489,7 +1636,9 @@ SCEVHandle ScalarEvolution::getUDivExpr(const SCEVHandle &LHS, SCEVHandle Op = M->getOperand(i); SCEVHandle Div = getUDivExpr(Op, RHSC); if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) { - Operands = M->getOperands(); + const SmallVectorImpl<SCEVHandle> &MOperands = M->getOperands(); + Operands = SmallVector<SCEVHandle, 4>(MOperands.begin(), + MOperands.end()); Operands[i] = Div; return getMulExpr(Operands); } @@ -1497,7 +1646,7 @@ SCEVHandle ScalarEvolution::getUDivExpr(const SCEVHandle &LHS, } // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded. if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(LHS)) { - std::vector<SCEVHandle> Operands; + SmallVector<SCEVHandle, 4> Operands; for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy)); if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) { @@ -1522,7 +1671,7 @@ SCEVHandle ScalarEvolution::getUDivExpr(const SCEVHandle &LHS, } SCEVUDivExpr *&Result = (*SCEVUDivs)[std::make_pair(LHS, RHS)]; - if (Result == 0) Result = new SCEVUDivExpr(LHS, RHS); + if (Result == 0) Result = new SCEVUDivExpr(LHS, RHS, this); return Result; } @@ -1531,7 +1680,7 @@ SCEVHandle ScalarEvolution::getUDivExpr(const SCEVHandle &LHS, /// Simplify the expression as much as possible. SCEVHandle ScalarEvolution::getAddRecExpr(const SCEVHandle &Start, const SCEVHandle &Step, const Loop *L) { - std::vector<SCEVHandle> Operands; + SmallVector<SCEVHandle, 4> Operands; Operands.push_back(Start); if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step)) if (StepChrec->getLoop() == L) { @@ -1546,7 +1695,7 @@ SCEVHandle ScalarEvolution::getAddRecExpr(const SCEVHandle &Start, /// getAddRecExpr - Get an add recurrence expression for the specified loop. /// Simplify the expression as much as possible. -SCEVHandle ScalarEvolution::getAddRecExpr(std::vector<SCEVHandle> &Operands, +SCEVHandle ScalarEvolution::getAddRecExpr(SmallVectorImpl<SCEVHandle> &Operands, const Loop *L) { if (Operands.size() == 1) return Operands[0]; #ifndef NDEBUG @@ -1565,8 +1714,8 @@ SCEVHandle ScalarEvolution::getAddRecExpr(std::vector<SCEVHandle> &Operands, if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) { const Loop* NestedLoop = NestedAR->getLoop(); if (L->getLoopDepth() < NestedLoop->getLoopDepth()) { - std::vector<SCEVHandle> NestedOperands(NestedAR->op_begin(), - NestedAR->op_end()); + SmallVector<SCEVHandle, 4> NestedOperands(NestedAR->op_begin(), + NestedAR->op_end()); SCEVHandle NestedARHandle(NestedAR); Operands[0] = NestedAR->getStart(); NestedOperands[0] = getAddRecExpr(Operands, L); @@ -1576,19 +1725,20 @@ SCEVHandle ScalarEvolution::getAddRecExpr(std::vector<SCEVHandle> &Operands, std::vector<const SCEV*> SCEVOps(Operands.begin(), Operands.end()); SCEVAddRecExpr *&Result = (*SCEVAddRecExprs)[std::make_pair(L, SCEVOps)]; - if (Result == 0) Result = new SCEVAddRecExpr(Operands, L); + if (Result == 0) Result = new SCEVAddRecExpr(Operands, L, this); return Result; } SCEVHandle ScalarEvolution::getSMaxExpr(const SCEVHandle &LHS, const SCEVHandle &RHS) { - std::vector<SCEVHandle> Ops; + SmallVector<SCEVHandle, 2> Ops; Ops.push_back(LHS); Ops.push_back(RHS); return getSMaxExpr(Ops); } -SCEVHandle ScalarEvolution::getSMaxExpr(std::vector<SCEVHandle> Ops) { +SCEVHandle +ScalarEvolution::getSMaxExpr(SmallVectorImpl<SCEVHandle> &Ops) { assert(!Ops.empty() && "Cannot get empty smax!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG @@ -1662,19 +1812,20 @@ SCEVHandle ScalarEvolution::getSMaxExpr(std::vector<SCEVHandle> Ops) { std::vector<const SCEV*> SCEVOps(Ops.begin(), Ops.end()); SCEVCommutativeExpr *&Result = (*SCEVCommExprs)[std::make_pair(scSMaxExpr, SCEVOps)]; - if (Result == 0) Result = new SCEVSMaxExpr(Ops); + if (Result == 0) Result = new SCEVSMaxExpr(Ops, this); return Result; } SCEVHandle ScalarEvolution::getUMaxExpr(const SCEVHandle &LHS, const SCEVHandle &RHS) { - std::vector<SCEVHandle> Ops; + SmallVector<SCEVHandle, 2> Ops; Ops.push_back(LHS); Ops.push_back(RHS); return getUMaxExpr(Ops); } -SCEVHandle ScalarEvolution::getUMaxExpr(std::vector<SCEVHandle> Ops) { +SCEVHandle +ScalarEvolution::getUMaxExpr(SmallVectorImpl<SCEVHandle> &Ops) { assert(!Ops.empty() && "Cannot get empty umax!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG @@ -1748,17 +1899,29 @@ SCEVHandle ScalarEvolution::getUMaxExpr(std::vector<SCEVHandle> Ops) { std::vector<const SCEV*> SCEVOps(Ops.begin(), Ops.end()); SCEVCommutativeExpr *&Result = (*SCEVCommExprs)[std::make_pair(scUMaxExpr, SCEVOps)]; - if (Result == 0) Result = new SCEVUMaxExpr(Ops); + if (Result == 0) Result = new SCEVUMaxExpr(Ops, this); return Result; } +SCEVHandle ScalarEvolution::getSMinExpr(const SCEVHandle &LHS, + const SCEVHandle &RHS) { + // ~smax(~x, ~y) == smin(x, y). + return getNotSCEV(getSMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS))); +} + +SCEVHandle ScalarEvolution::getUMinExpr(const SCEVHandle &LHS, + const SCEVHandle &RHS) { + // ~umax(~x, ~y) == umin(x, y) + return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS))); +} + SCEVHandle ScalarEvolution::getUnknown(Value *V) { if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) return getConstant(CI); if (isa<ConstantPointerNull>(V)) return getIntegerSCEV(0, V->getType()); SCEVUnknown *&Result = (*SCEVUnknowns)[V]; - if (Result == 0) Result = new SCEVUnknown(V); + if (Result == 0) Result = new SCEVUnknown(V, this); return Result; } @@ -1977,6 +2140,22 @@ ScalarEvolution::getTruncateOrNoop(const SCEVHandle &V, const Type *Ty) { return getTruncateExpr(V, Ty); } +/// getUMaxFromMismatchedTypes - Promote the operands to the wider of +/// the types using zero-extension, and then perform a umax operation +/// with them. +SCEVHandle ScalarEvolution::getUMaxFromMismatchedTypes(const SCEVHandle &LHS, + const SCEVHandle &RHS) { + SCEVHandle PromotedLHS = LHS; + SCEVHandle PromotedRHS = RHS; + + if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType())) + PromotedRHS = getZeroExtendExpr(RHS, LHS->getType()); + else + PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType()); + + return getUMaxExpr(PromotedLHS, PromotedRHS); +} + /// ReplaceSymbolicValueWithConcrete - This looks up the computed SCEV value for /// the specified instruction and replaces any references to the symbolic value /// SymName with the specified value. This is used during PHI resolution. @@ -2040,7 +2219,7 @@ SCEVHandle ScalarEvolution::createNodeForPHI(PHINode *PN) { if (FoundIndex != Add->getNumOperands()) { // Create an add with everything but the specified operand. - std::vector<SCEVHandle> Ops; + SmallVector<SCEVHandle, 8> Ops; for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) if (i != FoundIndex) Ops.push_back(Add->getOperand(i)); @@ -2143,73 +2322,134 @@ SCEVHandle ScalarEvolution::createNodeForGEP(User *GEP) { /// guaranteed to end in (at every loop iteration). It is, at the same time, /// the minimum number of times S is divisible by 2. For example, given {4,+,8} /// it returns 2. If S is guaranteed to be 0, it returns the bitwidth of S. -static uint32_t GetMinTrailingZeros(SCEVHandle S, const ScalarEvolution &SE) { +uint32_t +ScalarEvolution::GetMinTrailingZeros(const SCEVHandle &S) { if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) return C->getValue()->getValue().countTrailingZeros(); if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(S)) - return std::min(GetMinTrailingZeros(T->getOperand(), SE), - (uint32_t)SE.getTypeSizeInBits(T->getType())); + return std::min(GetMinTrailingZeros(T->getOperand()), + (uint32_t)getTypeSizeInBits(T->getType())); if (const SCEVZeroExtendExpr *E = dyn_cast<SCEVZeroExtendExpr>(S)) { - uint32_t OpRes = GetMinTrailingZeros(E->getOperand(), SE); - return OpRes == SE.getTypeSizeInBits(E->getOperand()->getType()) ? - SE.getTypeSizeInBits(E->getType()) : OpRes; + uint32_t OpRes = GetMinTrailingZeros(E->getOperand()); + return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ? + getTypeSizeInBits(E->getType()) : OpRes; } if (const SCEVSignExtendExpr *E = dyn_cast<SCEVSignExtendExpr>(S)) { - uint32_t OpRes = GetMinTrailingZeros(E->getOperand(), SE); - return OpRes == SE.getTypeSizeInBits(E->getOperand()->getType()) ? - SE.getTypeSizeInBits(E->getType()) : OpRes; + uint32_t OpRes = GetMinTrailingZeros(E->getOperand()); + return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ? + getTypeSizeInBits(E->getType()) : OpRes; } if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) { // The result is the min of all operands results. - uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0), SE); + uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0)); for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i) - MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i), SE)); + MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i))); return MinOpRes; } if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) { // The result is the sum of all operands results. - uint32_t SumOpRes = GetMinTrailingZeros(M->getOperand(0), SE); - uint32_t BitWidth = SE.getTypeSizeInBits(M->getType()); + uint32_t SumOpRes = GetMinTrailingZeros(M->getOperand(0)); + uint32_t BitWidth = getTypeSizeInBits(M->getType()); for (unsigned i = 1, e = M->getNumOperands(); SumOpRes != BitWidth && i != e; ++i) - SumOpRes = std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i), SE), + SumOpRes = std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i)), BitWidth); return SumOpRes; } if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) { // The result is the min of all operands results. - uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0), SE); + uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0)); for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i) - MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i), SE)); + MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i))); return MinOpRes; } if (const SCEVSMaxExpr *M = dyn_cast<SCEVSMaxExpr>(S)) { // The result is the min of all operands results. - uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0), SE); + uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0)); for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i) - MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i), SE)); + MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i))); return MinOpRes; } if (const SCEVUMaxExpr *M = dyn_cast<SCEVUMaxExpr>(S)) { // The result is the min of all operands results. - uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0), SE); + uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0)); for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i) - MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i), SE)); + MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i))); return MinOpRes; } - // SCEVUDivExpr, SCEVUnknown + if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { + // For a SCEVUnknown, ask ValueTracking. + unsigned BitWidth = getTypeSizeInBits(U->getType()); + APInt Mask = APInt::getAllOnesValue(BitWidth); + APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); + ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones); + return Zeros.countTrailingOnes(); + } + + // SCEVUDivExpr return 0; } +uint32_t +ScalarEvolution::GetMinLeadingZeros(const SCEVHandle &S) { + // TODO: Handle other SCEV expression types here. + + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) + return C->getValue()->getValue().countLeadingZeros(); + + if (const SCEVZeroExtendExpr *C = dyn_cast<SCEVZeroExtendExpr>(S)) { + // A zero-extension cast adds zero bits. + return GetMinLeadingZeros(C->getOperand()) + + (getTypeSizeInBits(C->getType()) - + getTypeSizeInBits(C->getOperand()->getType())); + } + + if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { + // For a SCEVUnknown, ask ValueTracking. + unsigned BitWidth = getTypeSizeInBits(U->getType()); + APInt Mask = APInt::getAllOnesValue(BitWidth); + APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); + ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones, TD); + return Zeros.countLeadingOnes(); + } + + return 1; +} + +uint32_t +ScalarEvolution::GetMinSignBits(const SCEVHandle &S) { + // TODO: Handle other SCEV expression types here. + + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) { + const APInt &A = C->getValue()->getValue(); + return A.isNegative() ? A.countLeadingOnes() : + A.countLeadingZeros(); + } + + if (const SCEVSignExtendExpr *C = dyn_cast<SCEVSignExtendExpr>(S)) { + // A sign-extension cast adds sign bits. + return GetMinSignBits(C->getOperand()) + + (getTypeSizeInBits(C->getType()) - + getTypeSizeInBits(C->getOperand()->getType())); + } + + if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { + // For a SCEVUnknown, ask ValueTracking. + return ComputeNumSignBits(U->getValue(), TD); + } + + return 1; +} + /// createSCEV - We know that there is no SCEV for the specified value. /// Analyze the expression. /// @@ -2248,14 +2488,27 @@ SCEVHandle ScalarEvolution::createSCEV(Value *V) { if (CI->isAllOnesValue()) return getSCEV(U->getOperand(0)); const APInt &A = CI->getValue(); - unsigned Ones = A.countTrailingOnes(); - if (APIntOps::isMask(Ones, A)) + + // Instcombine's ShrinkDemandedConstant may strip bits out of + // constants, obscuring what would otherwise be a low-bits mask. + // Use ComputeMaskedBits to compute what ShrinkDemandedConstant + // knew about to reconstruct a low-bits mask value. + unsigned LZ = A.countLeadingZeros(); + unsigned BitWidth = A.getBitWidth(); + APInt AllOnes = APInt::getAllOnesValue(BitWidth); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + ComputeMaskedBits(U->getOperand(0), AllOnes, KnownZero, KnownOne, TD); + + APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ); + + if (LZ != 0 && !((~A & ~KnownZero) & EffectiveMask)) return getZeroExtendExpr(getTruncateExpr(getSCEV(U->getOperand(0)), - IntegerType::get(Ones)), + IntegerType::get(BitWidth - LZ)), U->getType()); } break; + case Instruction::Or: // If the RHS of the Or is a constant, we may have something like: // X*4+1 which got turned into X*4|1. Handle this as an Add so loop @@ -2266,7 +2519,7 @@ SCEVHandle ScalarEvolution::createSCEV(Value *V) { if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) { SCEVHandle LHS = getSCEV(U->getOperand(0)); const APInt &CIVal = CI->getValue(); - if (GetMinTrailingZeros(LHS, *this) >= + if (GetMinTrailingZeros(LHS) >= (CIVal.getBitWidth() - CIVal.countLeadingZeros())) return getAddExpr(LHS, getSCEV(U->getOperand(1))); } @@ -2292,9 +2545,27 @@ SCEVHandle ScalarEvolution::createSCEV(Value *V) { if (BO->getOpcode() == Instruction::And && LCI->getValue() == CI->getValue()) if (const SCEVZeroExtendExpr *Z = - dyn_cast<SCEVZeroExtendExpr>(getSCEV(U->getOperand(0)))) - return getZeroExtendExpr(getNotSCEV(Z->getOperand()), - U->getType()); + dyn_cast<SCEVZeroExtendExpr>(getSCEV(U->getOperand(0)))) { + const Type *UTy = U->getType(); + SCEVHandle Z0 = Z->getOperand(); + const Type *Z0Ty = Z0->getType(); + unsigned Z0TySize = getTypeSizeInBits(Z0Ty); + + // If C is a low-bits mask, the zero extend is zerving to + // mask off the high bits. Complement the operand and + // re-apply the zext. + if (APIntOps::isMask(Z0TySize, CI->getValue())) + return getZeroExtendExpr(getNotSCEV(Z0), UTy); + + // If C is a single bit, it may be in the sign-bit position + // before the zero-extend. In this case, represent the xor + // using an add, which is equivalent, and re-apply the zext. + APInt Trunc = APInt(CI->getValue()).trunc(Z0TySize); + if (APInt(Trunc).zext(getTypeSizeInBits(UTy)) == CI->getValue() && + Trunc.isSignBit()) + return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)), + UTy); + } } break; @@ -2385,10 +2656,7 @@ SCEVHandle ScalarEvolution::createSCEV(Value *V) { if (LHS == U->getOperand(1) && RHS == U->getOperand(2)) return getSMaxExpr(getSCEV(LHS), getSCEV(RHS)); else if (LHS == U->getOperand(2) && RHS == U->getOperand(1)) - // ~smax(~x, ~y) == smin(x, y). - return getNotSCEV(getSMaxExpr( - getNotSCEV(getSCEV(LHS)), - getNotSCEV(getSCEV(RHS)))); + return getSMinExpr(getSCEV(LHS), getSCEV(RHS)); break; case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: @@ -2399,9 +2667,25 @@ SCEVHandle ScalarEvolution::createSCEV(Value *V) { if (LHS == U->getOperand(1) && RHS == U->getOperand(2)) return getUMaxExpr(getSCEV(LHS), getSCEV(RHS)); else if (LHS == U->getOperand(2) && RHS == U->getOperand(1)) - // ~umax(~x, ~y) == umin(x, y) - return getNotSCEV(getUMaxExpr(getNotSCEV(getSCEV(LHS)), - getNotSCEV(getSCEV(RHS)))); + return getUMinExpr(getSCEV(LHS), getSCEV(RHS)); + break; + case ICmpInst::ICMP_NE: + // n != 0 ? n : 1 -> umax(n, 1) + if (LHS == U->getOperand(1) && + isa<ConstantInt>(U->getOperand(2)) && + cast<ConstantInt>(U->getOperand(2))->isOne() && + isa<ConstantInt>(RHS) && + cast<ConstantInt>(RHS)->isZero()) + return getUMaxExpr(getSCEV(LHS), getSCEV(U->getOperand(2))); + break; + case ICmpInst::ICMP_EQ: + // n == 0 ? 1 : n -> umax(n, 1) + if (LHS == U->getOperand(2) && + isa<ConstantInt>(U->getOperand(1)) && + cast<ConstantInt>(U->getOperand(1))->isOne() && + isa<ConstantInt>(RHS) && + cast<ConstantInt>(RHS)->isZero()) + return getUMaxExpr(getSCEV(LHS), getSCEV(U->getOperand(1))); break; default: break; @@ -2462,9 +2746,13 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { // Update the value in the map. Pair.first->second = ItCount; - } else if (isa<PHINode>(L->getHeader()->begin())) { - // Only count loops that have phi nodes as not being computable. - ++NumTripCountsNotComputed; + } else { + if (ItCount.Max != CouldNotCompute) + // Update the value in the map. + Pair.first->second = ItCount; + if (isa<PHINode>(L->getHeader()->begin())) + // Only count loops that have phi nodes as not being computable. + ++NumTripCountsNotComputed; } // Now that we know more about the trip count for this loop, forget any @@ -2520,19 +2808,58 @@ void ScalarEvolution::forgetLoopPHIs(const Loop *L) { /// of the specified loop will execute. ScalarEvolution::BackedgeTakenInfo ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { - // If the loop has a non-one exit block count, we can't analyze it. - BasicBlock *ExitBlock = L->getExitBlock(); - if (!ExitBlock) - return CouldNotCompute; + SmallVector<BasicBlock*, 8> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + + // Examine all exits and pick the most conservative values. + SCEVHandle BECount = CouldNotCompute; + SCEVHandle MaxBECount = CouldNotCompute; + bool CouldNotComputeBECount = false; + bool CouldNotComputeMaxBECount = false; + for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { + BackedgeTakenInfo NewBTI = + ComputeBackedgeTakenCountFromExit(L, ExitingBlocks[i]); + + if (NewBTI.Exact == CouldNotCompute) { + // We couldn't compute an exact value for this exit, so + // we don't be able to compute an exact value for the loop. + CouldNotComputeBECount = true; + BECount = CouldNotCompute; + } else if (!CouldNotComputeBECount) { + if (BECount == CouldNotCompute) + BECount = NewBTI.Exact; + else { + // TODO: More analysis could be done here. For example, a + // loop with a short-circuiting && operator has an exact count + // of the min of both sides. + CouldNotComputeBECount = true; + BECount = CouldNotCompute; + } + } + if (NewBTI.Max == CouldNotCompute) { + // We couldn't compute an maximum value for this exit, so + // we don't be able to compute an maximum value for the loop. + CouldNotComputeMaxBECount = true; + MaxBECount = CouldNotCompute; + } else if (!CouldNotComputeMaxBECount) { + if (MaxBECount == CouldNotCompute) + MaxBECount = NewBTI.Max; + else + MaxBECount = getUMaxFromMismatchedTypes(MaxBECount, NewBTI.Max); + } + } + + return BackedgeTakenInfo(BECount, MaxBECount); +} - // Okay, there is one exit block. Try to find the condition that causes the - // loop to be exited. - BasicBlock *ExitingBlock = L->getExitingBlock(); - if (!ExitingBlock) - return CouldNotCompute; // More than one block exiting! +/// ComputeBackedgeTakenCountFromExit - Compute the number of times the backedge +/// of the specified loop will execute if it exits via the specified block. +ScalarEvolution::BackedgeTakenInfo +ScalarEvolution::ComputeBackedgeTakenCountFromExit(const Loop *L, + BasicBlock *ExitingBlock) { - // Okay, we've computed the exiting block. See what condition causes us to - // exit. + // Okay, we've chosen an exiting block. See what condition causes us to + // exit at this block. // // FIXME: we should be able to handle switch instructions (with a single exit) BranchInst *ExitBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); @@ -2547,23 +2874,154 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { // Currently we check for this by checking to see if the Exit branch goes to // the loop header. If so, we know it will always execute the same number of // times as the loop. We also handle the case where the exit block *is* the - // loop header. This is common for un-rotated loops. More extensive analysis - // could be done to handle more cases here. + // loop header. This is common for un-rotated loops. + // + // If both of those tests fail, walk up the unique predecessor chain to the + // header, stopping if there is an edge that doesn't exit the loop. If the + // header is reached, the execution count of the branch will be equal to the + // trip count of the loop. + // + // More extensive analysis could be done to handle more cases here. + // if (ExitBr->getSuccessor(0) != L->getHeader() && ExitBr->getSuccessor(1) != L->getHeader() && - ExitBr->getParent() != L->getHeader()) - return CouldNotCompute; - - ICmpInst *ExitCond = dyn_cast<ICmpInst>(ExitBr->getCondition()); + ExitBr->getParent() != L->getHeader()) { + // The simple checks failed, try climbing the unique predecessor chain + // up to the header. + bool Ok = false; + for (BasicBlock *BB = ExitBr->getParent(); BB; ) { + BasicBlock *Pred = BB->getUniquePredecessor(); + if (!Pred) + return CouldNotCompute; + TerminatorInst *PredTerm = Pred->getTerminator(); + for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) { + BasicBlock *PredSucc = PredTerm->getSuccessor(i); + if (PredSucc == BB) + continue; + // If the predecessor has a successor that isn't BB and isn't + // outside the loop, assume the worst. + if (L->contains(PredSucc)) + return CouldNotCompute; + } + if (Pred == L->getHeader()) { + Ok = true; + break; + } + BB = Pred; + } + if (!Ok) + return CouldNotCompute; + } + + // Procede to the next level to examine the exit condition expression. + return ComputeBackedgeTakenCountFromExitCond(L, ExitBr->getCondition(), + ExitBr->getSuccessor(0), + ExitBr->getSuccessor(1)); +} + +/// ComputeBackedgeTakenCountFromExitCond - Compute the number of times the +/// backedge of the specified loop will execute if its exit condition +/// were a conditional branch of ExitCond, TBB, and FBB. +ScalarEvolution::BackedgeTakenInfo +ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L, + Value *ExitCond, + BasicBlock *TBB, + BasicBlock *FBB) { + // Check if the controlling expression for this loop is an and or or. In + // such cases, an exact backedge-taken count may be infeasible, but a + // maximum count may still be feasible. + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) { + if (BO->getOpcode() == Instruction::And) { + // Recurse on the operands of the and. + BackedgeTakenInfo BTI0 = + ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB); + BackedgeTakenInfo BTI1 = + ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB); + SCEVHandle BECount = CouldNotCompute; + SCEVHandle MaxBECount = CouldNotCompute; + if (L->contains(TBB)) { + // Both conditions must be true for the loop to continue executing. + // Choose the less conservative count. + // TODO: Take the minimum of the exact counts. + if (BTI0.Exact == BTI1.Exact) + BECount = BTI0.Exact; + // TODO: Take the minimum of the maximum counts. + if (BTI0.Max == CouldNotCompute) + MaxBECount = BTI1.Max; + else if (BTI1.Max == CouldNotCompute) + MaxBECount = BTI0.Max; + else if (const SCEVConstant *C0 = dyn_cast<SCEVConstant>(BTI0.Max)) + if (const SCEVConstant *C1 = dyn_cast<SCEVConstant>(BTI1.Max)) + MaxBECount = getConstant(APIntOps::umin(C0->getValue()->getValue(), + C1->getValue()->getValue())); + } else { + // Both conditions must be true for the loop to exit. + assert(L->contains(FBB) && "Loop block has no successor in loop!"); + if (BTI0.Exact != CouldNotCompute && BTI1.Exact != CouldNotCompute) + BECount = getUMaxFromMismatchedTypes(BTI0.Exact, BTI1.Exact); + if (BTI0.Max != CouldNotCompute && BTI1.Max != CouldNotCompute) + MaxBECount = getUMaxFromMismatchedTypes(BTI0.Max, BTI1.Max); + } + + return BackedgeTakenInfo(BECount, MaxBECount); + } + if (BO->getOpcode() == Instruction::Or) { + // Recurse on the operands of the or. + BackedgeTakenInfo BTI0 = + ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB); + BackedgeTakenInfo BTI1 = + ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB); + SCEVHandle BECount = CouldNotCompute; + SCEVHandle MaxBECount = CouldNotCompute; + if (L->contains(FBB)) { + // Both conditions must be false for the loop to continue executing. + // Choose the less conservative count. + // TODO: Take the minimum of the exact counts. + if (BTI0.Exact == BTI1.Exact) + BECount = BTI0.Exact; + // TODO: Take the minimum of the maximum counts. + if (BTI0.Max == CouldNotCompute) + MaxBECount = BTI1.Max; + else if (BTI1.Max == CouldNotCompute) + MaxBECount = BTI0.Max; + else if (const SCEVConstant *C0 = dyn_cast<SCEVConstant>(BTI0.Max)) + if (const SCEVConstant *C1 = dyn_cast<SCEVConstant>(BTI1.Max)) + MaxBECount = getConstant(APIntOps::umin(C0->getValue()->getValue(), + C1->getValue()->getValue())); + } else { + // Both conditions must be false for the loop to exit. + assert(L->contains(TBB) && "Loop block has no successor in loop!"); + if (BTI0.Exact != CouldNotCompute && BTI1.Exact != CouldNotCompute) + BECount = getUMaxFromMismatchedTypes(BTI0.Exact, BTI1.Exact); + if (BTI0.Max != CouldNotCompute && BTI1.Max != CouldNotCompute) + MaxBECount = getUMaxFromMismatchedTypes(BTI0.Max, BTI1.Max); + } + + return BackedgeTakenInfo(BECount, MaxBECount); + } + } + + // With an icmp, it may be feasible to compute an exact backedge-taken count. + // Procede to the next level to examine the icmp. + if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond)) + return ComputeBackedgeTakenCountFromExitCondICmp(L, ExitCondICmp, TBB, FBB); // If it's not an integer or pointer comparison then compute it the hard way. - if (ExitCond == 0) - return ComputeBackedgeTakenCountExhaustively(L, ExitBr->getCondition(), - ExitBr->getSuccessor(0) == ExitBlock); + return ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB)); +} + +/// ComputeBackedgeTakenCountFromExitCondICmp - Compute the number of times the +/// backedge of the specified loop will execute if its exit condition +/// were a conditional branch of the ICmpInst ExitCond, TBB, and FBB. +ScalarEvolution::BackedgeTakenInfo +ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L, + ICmpInst *ExitCond, + BasicBlock *TBB, + BasicBlock *FBB) { // If the condition was exit on true, convert the condition to exit on false ICmpInst::Predicate Cond; - if (ExitBr->getSuccessor(1) == ExitBlock) + if (!L->contains(FBB)) Cond = ExitCond->getPredicate(); else Cond = ExitCond->getInversePredicate(); @@ -2573,7 +3031,12 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) { SCEVHandle ItCnt = ComputeLoadConstantCompareBackedgeTakenCount(LI, RHS, L, Cond); - if (!isa<SCEVCouldNotCompute>(ItCnt)) return ItCnt; + if (!isa<SCEVCouldNotCompute>(ItCnt)) { + unsigned BitWidth = getTypeSizeInBits(ItCnt->getType()); + return BackedgeTakenInfo(ItCnt, + isa<SCEVConstant>(ItCnt) ? ItCnt : + getConstant(APInt::getMaxValue(BitWidth)-1)); + } } SCEVHandle LHS = getSCEV(ExitCond->getOperand(0)); @@ -2651,8 +3114,7 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { break; } return - ComputeBackedgeTakenCountExhaustively(L, ExitCond, - ExitBr->getSuccessor(0) == ExitBlock); + ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB)); } static ConstantInt * @@ -2750,7 +3212,7 @@ ComputeLoadConstantCompareBackedgeTakenCount(LoadInst *LI, Constant *RHS, unsigned MaxSteps = MaxBruteForceIterations; for (unsigned IterationNum = 0; IterationNum != MaxSteps; ++IterationNum) { ConstantInt *ItCst = - ConstantInt::get(IdxExpr->getType(), IterationNum); + ConstantInt::get(cast<IntegerType>(IdxExpr->getType()), IterationNum); ConstantInt *Val = EvaluateConstantChrecAtConstant(IdxExpr, ItCst, *this); // Form the GEP offset. @@ -2945,7 +3407,7 @@ ComputeBackedgeTakenCountExhaustively(const Loop *L, Value *Cond, bool ExitWhen) if (CondVal->getValue() == uint64_t(ExitWhen)) { ConstantEvolutionLoopExitValue[PN] = PHIVal; ++NumBruteForceTripCountsComputed; - return getConstant(ConstantInt::get(Type::Int32Ty, IterationNum)); + return getConstant(Type::Int32Ty, IterationNum); } // Compute the value of the PHI node for the next iteration. @@ -3074,7 +3536,7 @@ SCEVHandle ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { if (OpAtScope != Comm->getOperand(i)) { // Okay, at least one of these operands is loop variant but might be // foldable. Build a new instance of the folded commutative expression. - std::vector<SCEVHandle> NewOps(Comm->op_begin(), Comm->op_begin()+i); + SmallVector<SCEVHandle, 8> NewOps(Comm->op_begin(), Comm->op_begin()+i); NewOps.push_back(OpAtScope); for (++i; i != e; ++i) { @@ -3394,6 +3856,29 @@ ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) { return 0; } +/// HasSameValue - SCEV structural equivalence is usually sufficient for +/// testing whether two expressions are equal, however for the purposes of +/// looking for a condition guarding a loop, it can be useful to be a little +/// more general, since a front-end may have replicated the controlling +/// expression. +/// +static bool HasSameValue(const SCEVHandle &A, const SCEVHandle &B) { + // Quick check to see if they are the same SCEV. + if (A == B) return true; + + // Otherwise, if they're both SCEVUnknown, it's possible that they hold + // two different instructions with the same value. Check for this case. + if (const SCEVUnknown *AU = dyn_cast<SCEVUnknown>(A)) + if (const SCEVUnknown *BU = dyn_cast<SCEVUnknown>(B)) + if (const Instruction *AI = dyn_cast<Instruction>(AU->getValue())) + if (const Instruction *BI = dyn_cast<Instruction>(BU->getValue())) + if (AI->isIdenticalTo(BI)) + return true; + + // Otherwise assume they may have a different value. + return false; +} + /// isLoopGuardedByCond - Test whether entry to the loop is protected by /// a conditional between LHS and RHS. This is used to help avoid max /// expressions in loop trip counts. @@ -3494,15 +3979,43 @@ bool ScalarEvolution::isLoopGuardedByCond(const Loop *L, SCEVHandle PreCondLHSSCEV = getSCEV(PreCondLHS); SCEVHandle PreCondRHSSCEV = getSCEV(PreCondRHS); - if ((LHS == PreCondLHSSCEV && RHS == PreCondRHSSCEV) || - (LHS == getNotSCEV(PreCondRHSSCEV) && - RHS == getNotSCEV(PreCondLHSSCEV))) + if ((HasSameValue(LHS, PreCondLHSSCEV) && + HasSameValue(RHS, PreCondRHSSCEV)) || + (HasSameValue(LHS, getNotSCEV(PreCondRHSSCEV)) && + HasSameValue(RHS, getNotSCEV(PreCondLHSSCEV)))) return true; } return false; } +/// getBECount - Subtract the end and start values and divide by the step, +/// rounding up, to get the number of times the backedge is executed. Return +/// CouldNotCompute if an intermediate computation overflows. +SCEVHandle ScalarEvolution::getBECount(const SCEVHandle &Start, + const SCEVHandle &End, + const SCEVHandle &Step) { + const Type *Ty = Start->getType(); + SCEVHandle NegOne = getIntegerSCEV(-1, Ty); + SCEVHandle Diff = getMinusSCEV(End, Start); + SCEVHandle RoundUp = getAddExpr(Step, NegOne); + + // Add an adjustment to the difference between End and Start so that + // the division will effectively round up. + SCEVHandle Add = getAddExpr(Diff, RoundUp); + + // Check Add for unsigned overflow. + // TODO: More sophisticated things could be done here. + const Type *WideTy = IntegerType::get(getTypeSizeInBits(Ty) + 1); + SCEVHandle OperandExtendedAdd = + getAddExpr(getZeroExtendExpr(Diff, WideTy), + getZeroExtendExpr(RoundUp, WideTy)); + if (getZeroExtendExpr(Add, WideTy) != OperandExtendedAdd) + return CouldNotCompute; + + return getUDivExpr(Add, Step); +} + /// HowManyLessThans - Return the number of times a backedge containing the /// specified less-than comparison will execute. If not computable, return /// CouldNotCompute. @@ -3520,7 +4033,6 @@ HowManyLessThans(const SCEV *LHS, const SCEV *RHS, // FORNOW: We only support unit strides. unsigned BitWidth = getTypeSizeInBits(AddRec->getType()); SCEVHandle Step = AddRec->getStepRecurrence(*this); - SCEVHandle NegOne = getIntegerSCEV(-1, AddRec->getType()); // TODO: handle non-constant strides. const SCEVConstant *CStep = dyn_cast<SCEVConstant>(Step); @@ -3575,22 +4087,20 @@ HowManyLessThans(const SCEV *LHS, const SCEV *RHS, : getUMaxExpr(RHS, Start); // Determine the maximum constant end value. - SCEVHandle MaxEnd = isa<SCEVConstant>(End) ? End : - getConstant(isSigned ? APInt::getSignedMaxValue(BitWidth) : - APInt::getMaxValue(BitWidth)); + SCEVHandle MaxEnd = + isa<SCEVConstant>(End) ? End : + getConstant(isSigned ? APInt::getSignedMaxValue(BitWidth) + .ashr(GetMinSignBits(End) - 1) : + APInt::getMaxValue(BitWidth) + .lshr(GetMinLeadingZeros(End))); // Finally, we subtract these two values and divide, rounding up, to get // the number of times the backedge is executed. - SCEVHandle BECount = getUDivExpr(getAddExpr(getMinusSCEV(End, Start), - getAddExpr(Step, NegOne)), - Step); + SCEVHandle BECount = getBECount(Start, End, Step); // The maximum backedge count is similar, except using the minimum start // value and the maximum end value. - SCEVHandle MaxBECount = getUDivExpr(getAddExpr(getMinusSCEV(MaxEnd, - MinStart), - getAddExpr(Step, NegOne)), - Step); + SCEVHandle MaxBECount = getBECount(MinStart, MaxEnd, Step);; return BackedgeTakenInfo(BECount, MaxBECount); } @@ -3611,7 +4121,7 @@ SCEVHandle SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, // If the start is a non-zero constant, shift the range to simplify things. if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart())) if (!SC->getValue()->isZero()) { - std::vector<SCEVHandle> Operands(op_begin(), op_end()); + SmallVector<SCEVHandle, 4> Operands(op_begin(), op_end()); Operands[0] = SE.getIntegerSCEV(0, SC->getType()); SCEVHandle Shifted = SE.getAddRecExpr(Operands, getLoop()); if (const SCEVAddRecExpr *ShiftedAddRec = @@ -3636,7 +4146,7 @@ SCEVHandle SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, // iteration exits. unsigned BitWidth = SE.getTypeSizeInBits(getType()); if (!Range.contains(APInt(BitWidth, 0))) - return SE.getConstant(ConstantInt::get(getType(),0)); + return SE.getIntegerSCEV(0, getType()); if (isAffine()) { // If this is an affine expression then we have this situation: @@ -3672,7 +4182,7 @@ SCEVHandle SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, // quadratic equation to solve it. To do this, we must frame our problem in // terms of figuring out when zero is crossed, instead of when // Range.getUpper() is crossed. - std::vector<SCEVHandle> NewOps(op_begin(), op_end()); + SmallVector<SCEVHandle, 4> NewOps(op_begin(), op_end()); NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper())); SCEVHandle NewAddRec = SE.getAddRecExpr(NewOps, getLoop()); @@ -3783,7 +4293,7 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se) //===----------------------------------------------------------------------===// ScalarEvolution::ScalarEvolution() - : FunctionPass(&ID), CouldNotCompute(new SCEVCouldNotCompute()) { + : FunctionPass(&ID), CouldNotCompute(new SCEVCouldNotCompute(0)) { } bool ScalarEvolution::runOnFunction(Function &F) { @@ -3847,11 +4357,18 @@ void ScalarEvolution::print(raw_ostream &OS, const Module* ) const { OS << " --> "; SCEVHandle SV = SE.getSCEV(&*I); SV->print(OS); - OS << "\t\t"; - if (const Loop *L = LI->getLoopFor((*I).getParent())) { - OS << "Exits: "; - SCEVHandle ExitValue = SE.getSCEVAtScope(&*I, L->getParentLoop()); + const Loop *L = LI->getLoopFor((*I).getParent()); + + SCEVHandle AtUse = SE.getSCEVAtScope(SV, L); + if (AtUse != SV) { + OS << " --> "; + AtUse->print(OS); + } + + if (L) { + OS << "\t\t" "Exits: "; + SCEVHandle ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop()); if (!ExitValue->isLoopInvariant(L)) { OS << "<<Unknown>>"; } else { diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index e1f8fa4..2a73c27 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -182,7 +182,8 @@ static bool FactorOutConstant(SCEVHandle &S, if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0))) if (!C->getValue()->getValue().srem(Factor)) { - std::vector<SCEVHandle> NewMulOps(M->getOperands()); + const SmallVectorImpl<SCEVHandle> &MOperands = M->getOperands(); + SmallVector<SCEVHandle, 4> NewMulOps(MOperands.begin(), MOperands.end()); NewMulOps[0] = SE.getConstant(C->getValue()->getValue().sdiv(Factor)); S = SE.getMulExpr(NewMulOps); @@ -239,7 +240,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEVHandle *op_begin, Value *V) { const Type *ElTy = PTy->getElementType(); SmallVector<Value *, 4> GepIndices; - std::vector<SCEVHandle> Ops(op_begin, op_end); + SmallVector<SCEVHandle, 8> Ops(op_begin, op_end); bool AnyNonZeroIndices = false; // Decend down the pointer's type and attempt to convert the other @@ -250,8 +251,8 @@ Value *SCEVExpander::expandAddToGEP(const SCEVHandle *op_begin, for (;;) { APInt ElSize = APInt(SE.getTypeSizeInBits(Ty), ElTy->isSized() ? SE.TD->getTypeAllocSize(ElTy) : 0); - std::vector<SCEVHandle> NewOps; - std::vector<SCEVHandle> ScaledOps; + SmallVector<SCEVHandle, 8> NewOps; + SmallVector<SCEVHandle, 8> ScaledOps; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { // Split AddRecs up into parts as either of the parts may be usable // without the other. @@ -297,9 +298,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEVHandle *op_begin, GepIndices.push_back(ConstantInt::get(Type::Int32Ty, ElIdx)); ElTy = STy->getTypeAtIndex(ElIdx); Ops[0] = - SE.getConstant(ConstantInt::get(Ty, - FullOffset - - SL.getElementOffset(ElIdx))); + SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx)); AnyNonZeroIndices = true; continue; } @@ -365,7 +364,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { // comments on expandAddToGEP for details. if (SE.TD) if (const PointerType *PTy = dyn_cast<PointerType>(V->getType())) { - const std::vector<SCEVHandle> &Ops = S->getOperands(); + const SmallVectorImpl<SCEVHandle> &Ops = S->getOperands(); return expandAddToGEP(&Ops[0], &Ops[Ops.size() - 1], PTy, Ty, V); } @@ -432,7 +431,7 @@ static void ExposePointerBase(SCEVHandle &Base, SCEVHandle &Rest, } if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) { Base = A->getOperand(A->getNumOperands()-1); - std::vector<SCEVHandle> NewAddOps(A->op_begin(), A->op_end()); + SmallVector<SCEVHandle, 8> NewAddOps(A->op_begin(), A->op_end()); NewAddOps.back() = Rest; Rest = SE.getAddExpr(NewAddOps); ExposePointerBase(Base, Rest, SE); @@ -473,7 +472,8 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { // {X,+,F} --> X + {0,+,F} if (!S->getStart()->isZero()) { - std::vector<SCEVHandle> NewOps(S->getOperands()); + const SmallVectorImpl<SCEVHandle> &SOperands = S->getOperands(); + SmallVector<SCEVHandle, 4> NewOps(SOperands.begin(), SOperands.end()); NewOps[0] = SE.getIntegerSCEV(0, Ty); SCEVHandle Rest = SE.getAddRecExpr(NewOps, L); diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 45f97b8..17ffa2d 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -52,11 +52,12 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, assert(V && "No Value?"); assert(Depth <= MaxDepth && "Limit Search Depth"); unsigned BitWidth = Mask.getBitWidth(); - assert((V->getType()->isInteger() || isa<PointerType>(V->getType())) && + assert((V->getType()->isIntOrIntVector() || isa<PointerType>(V->getType())) && "Not integer or pointer type!"); - assert((!TD || TD->getTypeSizeInBits(V->getType()) == BitWidth) && - (!isa<IntegerType>(V->getType()) || - V->getType()->getPrimitiveSizeInBits() == BitWidth) && + assert((!TD || + TD->getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) && + (!V->getType()->isIntOrIntVector() || + V->getType()->getScalarSizeInBits() == BitWidth) && KnownZero.getBitWidth() == BitWidth && KnownOne.getBitWidth() == BitWidth && "V, Mask, KnownOne and KnownZero should have same BitWidth"); @@ -67,12 +68,26 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, KnownZero = ~KnownOne & Mask; return; } - // Null is all-zeros. - if (isa<ConstantPointerNull>(V)) { + // Null and aggregate-zero are all-zeros. + if (isa<ConstantPointerNull>(V) || + isa<ConstantAggregateZero>(V)) { KnownOne.clear(); KnownZero = Mask; return; } + // Handle a constant vector by taking the intersection of the known bits of + // each element. + if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) { + KnownZero.set(); KnownOne.set(); + for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) { + APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0); + ComputeMaskedBits(CV->getOperand(i), Mask, KnownZero2, KnownOne2, + TD, Depth); + KnownZero &= KnownZero2; + KnownOne &= KnownOne2; + } + return; + } // The address of an aligned GlobalValue has trailing zeros. if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { unsigned Align = GV->getAlignment(); @@ -218,7 +233,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, const Type *SrcTy = I->getOperand(0)->getType(); unsigned SrcBitWidth = TD ? TD->getTypeSizeInBits(SrcTy) : - SrcTy->getPrimitiveSizeInBits(); + SrcTy->getScalarSizeInBits(); APInt MaskIn(Mask); MaskIn.zextOrTrunc(SrcBitWidth); KnownZero.zextOrTrunc(SrcBitWidth); @@ -480,7 +495,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, // Handle array index arithmetic. const Type *IndexedTy = GTI.getIndexedType(); if (!IndexedTy->isSized()) return; - unsigned GEPOpiBits = Index->getType()->getPrimitiveSizeInBits(); + unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits(); uint64_t TypeSize = TD ? TD->getTypeAllocSize(IndexedTy) : 1; LocalMask = APInt::getAllOnesValue(GEPOpiBits); LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0); @@ -609,8 +624,8 @@ bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, /// 'Op' must have a scalar integer type. /// unsigned llvm::ComputeNumSignBits(Value *V, TargetData *TD, unsigned Depth) { - const IntegerType *Ty = cast<IntegerType>(V->getType()); - unsigned TyBits = Ty->getBitWidth(); + const Type *Ty = V->getType(); + unsigned TyBits = Ty->getScalarSizeInBits(); unsigned Tmp, Tmp2; unsigned FirstAnswer = 1; diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index c5190ef..b3f7cdb 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -526,6 +526,10 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(coldcc); KEYWORD(x86_stdcallcc); KEYWORD(x86_fastcallcc); + KEYWORD(arm_apcscc); + KEYWORD(arm_aapcscc); + KEYWORD(arm_aapcs_vfpcc); + KEYWORD(cc); KEYWORD(c); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 4863f3c..909370c 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -808,8 +808,11 @@ bool LLParser::ParseOptionalVisibility(unsigned &Res) { /// ::= 'coldcc' /// ::= 'x86_stdcallcc' /// ::= 'x86_fastcallcc' +/// ::= 'arm_apcscc' +/// ::= 'arm_aapcscc' +/// ::= 'arm_aapcs_vfpcc' /// ::= 'cc' UINT -/// +/// bool LLParser::ParseOptionalCallingConv(unsigned &CC) { switch (Lex.getKind()) { default: CC = CallingConv::C; return false; @@ -818,6 +821,9 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) { case lltok::kw_coldcc: CC = CallingConv::Cold; break; case lltok::kw_x86_stdcallcc: CC = CallingConv::X86_StdCall; break; case lltok::kw_x86_fastcallcc: CC = CallingConv::X86_FastCall; break; + case lltok::kw_arm_apcscc: CC = CallingConv::ARM_APCS; break; + case lltok::kw_arm_aapcscc: CC = CallingConv::ARM_AAPCS; break; + case lltok::kw_arm_aapcs_vfpcc:CC = CallingConv::ARM_AAPCS_VFP; break; case lltok::kw_cc: Lex.Lex(); return ParseUInt32(CC); } Lex.Lex(); @@ -1743,7 +1749,7 @@ bool LLParser::ParseValID(ValID &ID) { Lex.Lex(); if (ParseToken(lltok::lparen, "expected '(' after constantexpr cast") || ParseGlobalTypeAndValue(SrcVal) || - ParseToken(lltok::kw_to, "expected 'to' int constantexpr cast") || + ParseToken(lltok::kw_to, "expected 'to' in constantexpr cast") || ParseType(DestTy) || ParseToken(lltok::rparen, "expected ')' at end of constantexpr cast")) return true; @@ -3145,7 +3151,7 @@ bool LLParser::ParseFree(Instruction *&Inst, PerFunctionState &PFS) { } /// ParseLoad -/// ::= 'volatile'? 'load' TypeAndValue (',' 'align' uint)? +/// ::= 'volatile'? 'load' TypeAndValue (',' 'align' i32)? bool LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS, bool isVolatile) { Value *Val; LocTy Loc; @@ -3163,7 +3169,7 @@ bool LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS, } /// ParseStore -/// ::= 'volatile'? 'store' TypeAndValue ',' TypeAndValue (',' 'align' uint)? +/// ::= 'volatile'? 'store' TypeAndValue ',' TypeAndValue (',' 'align' i32)? bool LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS, bool isVolatile) { Value *Val, *Ptr; LocTy Loc, PtrLoc; @@ -3186,7 +3192,7 @@ bool LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS, } /// ParseGetResult -/// ::= 'getresult' TypeAndValue ',' uint +/// ::= 'getresult' TypeAndValue ',' i32 /// FIXME: Remove support for getresult in LLVM 3.0 bool LLParser::ParseGetResult(Instruction *&Inst, PerFunctionState &PFS) { Value *Val; LocTy ValLoc, EltLoc; diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index 9335d19..cff89f8 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -60,7 +60,9 @@ namespace lltok { kw_gc, kw_c, - kw_cc, kw_ccc, kw_fastcc, kw_coldcc, kw_x86_stdcallcc, kw_x86_fastcallcc, + kw_cc, kw_ccc, kw_fastcc, kw_coldcc, + kw_x86_stdcallcc, kw_x86_fastcallcc, + kw_arm_apcscc, kw_arm_aapcscc, kw_arm_aapcs_vfpcc, kw_signext, kw_zeroext, diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 3b44f564..6b9606c 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -2040,14 +2040,13 @@ void BitcodeReader::dematerializeFunction(Function *F) { Module *BitcodeReader::materializeModule(std::string *ErrInfo) { - for (DenseMap<Function*, std::pair<uint64_t, unsigned> >::iterator I = - DeferredFunctionInfo.begin(), E = DeferredFunctionInfo.end(); I != E; - ++I) { - Function *F = I->first; + // Iterate over the module, deserializing any functions that are still on + // disk. + for (Module::iterator F = TheModule->begin(), E = TheModule->end(); + F != E; ++F) if (F->hasNotBeenReadFromBitcode() && materializeFunction(F, ErrInfo)) return 0; - } // Upgrade any intrinsic calls that slipped through (should not happen!) and // delete the old functions to clean up. We can't do this unless the entire @@ -2123,7 +2122,7 @@ Module *llvm::ParseBitcodeFile(MemoryBuffer *Buffer, std::string *ErrMsg){ // is run. if (M) M = R->releaseModule(ErrMsg); - + delete R; return M; } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 45462da..e931904 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -152,6 +152,9 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { bool AsmPrinter::doInitialization(Module &M) { Mang = new Mangler(M, TAI->getGlobalPrefix(), TAI->getPrivateGlobalPrefix()); + if (TAI->doesAllowQuotesInName()) + Mang->setUseQuotes(true); + GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); assert(MI && "AsmPrinter didn't require GCModuleInfo?"); @@ -174,9 +177,17 @@ bool AsmPrinter::doInitialization(Module &M) { SwitchToDataSection(""); // Reset back to no section. - MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>(); - if (MMI) MMI->AnalyzeModule(M); - DW = getAnalysisIfAvailable<DwarfWriter>(); + if (TAI->doesSupportDebugInformation() + || TAI->doesSupportExceptionHandling()) { + MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>(); + if (MMI) { + MMI->AnalyzeModule(M); + DW = getAnalysisIfAvailable<DwarfWriter>(); + if (DW) + DW->BeginModule(&M, MMI, O, this, TAI); + } + } + return false; } @@ -347,8 +358,9 @@ void AsmPrinter::EmitJumpTableInfo(MachineJumpTableInfo *MJTI, const char* JumpTableDataSection = TAI->getJumpTableDataSection(); const Function *F = MF.getFunction(); unsigned SectionFlags = TAI->SectionFlagsForGlobal(F); + bool JTInDiffSection = false; if ((IsPic && !(LoweringInfo && LoweringInfo->usesGlobalOffsetTable())) || - !JumpTableDataSection || + !JumpTableDataSection || SectionFlags & SectionFlags::Linkonce) { // In PIC mode, we need to emit the jump table to the same section as the // function body itself, otherwise the label differences won't make sense. @@ -357,6 +369,7 @@ void AsmPrinter::EmitJumpTableInfo(MachineJumpTableInfo *MJTI, SwitchToSection(TAI->SectionForGlobal(F)); } else { SwitchToDataSection(JumpTableDataSection); + JTInDiffSection = true; } EmitAlignment(Log2_32(MJTI->getAlignment())); @@ -380,8 +393,10 @@ void AsmPrinter::EmitJumpTableInfo(MachineJumpTableInfo *MJTI, // before each jump table. The first label is never referenced, but tells // the assembler and linker the extents of the jump table object. The // second label is actually referenced by the code. - if (const char *JTLabelPrefix = TAI->getJumpTableSpecialLabelPrefix()) - O << JTLabelPrefix << "JTI" << getFunctionNumber() << '_' << i << ":\n"; + if (JTInDiffSection) { + if (const char *JTLabelPrefix = TAI->getJumpTableSpecialLabelPrefix()) + O << JTLabelPrefix << "JTI" << getFunctionNumber() << '_' << i << ":\n"; + } O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' << i << ":\n"; @@ -502,7 +517,7 @@ const GlobalValue * AsmPrinter::findGlobalValue(const Constant *CV) { void AsmPrinter::EmitLLVMUsedList(Constant *List) { const char *Directive = TAI->getUsedDirective(); - // Should be an array of 'sbyte*'. + // Should be an array of 'i8*'. ConstantArray *InitList = dyn_cast<ConstantArray>(List); if (InitList == 0) return; diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index c773378..9d340e3 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -1757,6 +1757,9 @@ unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU, if (TimePassesIsEnabled) DebugTimer->startTimer(); + CompileUnit *Unit = MainCU; + if (!Unit) + Unit = &FindCompileUnit(SP.getCompileUnit()); GlobalVariable *GV = SP.getGV(); DenseMap<const GlobalVariable *, DbgScope *>::iterator II = AbstractInstanceRootMap.find(GV); @@ -1767,7 +1770,6 @@ unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU, DbgScope *Scope = new DbgScope(NULL, DIDescriptor(GV)); // Get the compile unit context. - CompileUnit *Unit = &FindCompileUnit(SP.getCompileUnit()); DIE *SPDie = Unit->getDieMapSlotFor(GV); if (!SPDie) SPDie = CreateSubprogramDIE(Unit, SP, false, true); @@ -1789,7 +1791,6 @@ unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU, // Create a concrete inlined instance for this inlined function. DbgConcreteScope *ConcreteScope = new DbgConcreteScope(DIDescriptor(GV)); DIE *ScopeDie = new DIE(dwarf::DW_TAG_inlined_subroutine); - CompileUnit *Unit = &FindCompileUnit(SP.getCompileUnit()); ScopeDie->setAbstractCompileUnit(Unit); DIE *Origin = Unit->getDieMapSlotFor(GV); @@ -1850,7 +1851,14 @@ unsigned DwarfDebug::RecordInlinedFnEnd(DISubprogram &SP) { } SmallVector<DbgScope *, 8> &Scopes = I->second; - assert(!Scopes.empty() && "We should have at least one debug scope!"); + if (Scopes.empty()) { + // Returned ID is 0 if this is unbalanced "end of inlined + // scope". This could happen if optimizer eats dbg intrinsics + // or "beginning of inlined scope" is not recoginized due to + // missing location info. In such cases, ignore this region.end. + return 0; + } + DbgScope *Scope = Scopes.back(); Scopes.pop_back(); unsigned ID = MMI->NextLabelID(); MMI->RecordUsedDbgLabel(ID); @@ -1987,8 +1995,8 @@ void DwarfDebug::EmitInitial() { Asm->SwitchToDataSection(TAI->getDwarfARangesSection()); EmitLabel("section_aranges", 0); - if (TAI->doesSupportMacInfoSection()) { - Asm->SwitchToDataSection(TAI->getDwarfMacInfoSection()); + if (const char *LineInfoDirective = TAI->getDwarfMacroInfoSection()) { + Asm->SwitchToDataSection(LineInfoDirective); EmitLabel("section_macinfo", 0); } @@ -2534,9 +2542,9 @@ void DwarfDebug::EmitDebugRanges() { /// EmitDebugMacInfo - Emit visible names into a debug macinfo section. /// void DwarfDebug::EmitDebugMacInfo() { - if (TAI->doesSupportMacInfoSection()) { + if (const char *LineInfoDirective = TAI->getDwarfMacroInfoSection()) { // Start the dwarf macinfo section. - Asm->SwitchToDataSection(TAI->getDwarfMacInfoSection()); + Asm->SwitchToDataSection(LineInfoDirective); Asm->EOL(); } } diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp index f7ca4f4..a1b97df 100644 --- a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp @@ -190,7 +190,7 @@ void Dwarf::EmitFrameMoves(const char *BaseLabel, unsigned BaseLabelID, Asm->EmitULEB128Bytes(Offset); Asm->EOL("Offset"); } else { - assert(0 && "Machine move no supported yet."); + assert(0 && "Machine move not supported yet."); } } else if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) { @@ -200,7 +200,7 @@ void Dwarf::EmitFrameMoves(const char *BaseLabel, unsigned BaseLabelID, Asm->EmitULEB128Bytes(RI->getDwarfRegNum(Dst.getReg(), isEH)); Asm->EOL("Register"); } else { - assert(0 && "Machine move no supported yet."); + assert(0 && "Machine move not supported yet."); } } else { unsigned Reg = RI->getDwarfRegNum(Src.getReg(), isEH); diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index 1d0887f..4d5c3c2 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -547,7 +547,11 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { // fallthrough. if (!BBI.FalseBB) BBI.FalseBB = findFalseBlock(BBI.BB, BBI.TrueBB); - assert(BBI.FalseBB && "Expected to find the fallthrough block!"); + if (!BBI.FalseBB) { + // Malformed bcc? True and false blocks are the same? + BBI.IsUnpredicable = true; + return; + } } // Then scan all the instructions. @@ -663,6 +667,13 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, return BBI; } + // Do not ifcvt if true and false fallthrough blocks are the same. + if (!BBI.FalseBB) { + BBI.IsBeingAnalyzed = false; + BBI.IsAnalyzed = true; + return BBI; + } + BBInfo &TrueBBI = AnalyzeBlock(BBI.TrueBB, Tokens); BBInfo &FalseBBI = AnalyzeBlock(BBI.FalseBB, Tokens); diff --git a/lib/CodeGen/LazyLiveness.cpp b/lib/CodeGen/LazyLiveness.cpp index 6fb35d2..a951c99 100644 --- a/lib/CodeGen/LazyLiveness.cpp +++ b/lib/CodeGen/LazyLiveness.cpp @@ -32,10 +32,12 @@ void LazyLiveness::computeBackedgeChain(MachineFunction& mf, calculated.set(preorder[MBB]); for (SparseBitVector<128>::iterator I = tmp.begin(); I != tmp.end(); ++I) { + assert(rev_preorder.size() > *I && "Unknown block!"); + MachineBasicBlock* SrcMBB = rev_preorder[*I]; - for (MachineBasicBlock::succ_iterator SI = SrcMBB->succ_begin(); - SI != SrcMBB->succ_end(); ++SI) { + for (MachineBasicBlock::succ_iterator SI = SrcMBB->succ_begin(), + SE = SrcMBB->succ_end(); SI != SE; ++SI) { MachineBasicBlock* TgtMBB = *SI; if (backedges.count(std::make_pair(SrcMBB, TgtMBB)) && @@ -44,7 +46,8 @@ void LazyLiveness::computeBackedgeChain(MachineFunction& mf, computeBackedgeChain(mf, TgtMBB); tv[MBB].set(preorder[TgtMBB]); - tv[MBB] |= tv[TgtMBB]; + SparseBitVector<128> right = tv[TgtMBB]; + tv[MBB] |= right; } } @@ -60,6 +63,12 @@ bool LazyLiveness::runOnMachineFunction(MachineFunction &mf) { backedge_target.clear(); calculated.clear(); preorder.clear(); + rev_preorder.clear(); + + rv.resize(mf.size()); + tv.resize(mf.size()); + preorder.resize(mf.size()); + rev_preorder.reserve(mf.size()); MRI = &mf.getRegInfo(); MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>(); @@ -106,8 +115,8 @@ bool LazyLiveness::runOnMachineFunction(MachineFunction &mf) { for (MachineBasicBlock::succ_iterator SI = (*POI)->succ_begin(), SE = (*POI)->succ_end(); SI != SE; ++SI) if (!backedges.count(std::make_pair(*POI, *SI)) && tv.count(*SI)) { - SparseBitVector<128>& PBV = tv[*POI]; - PBV = tv[*SI]; + SparseBitVector<128> right = tv[*SI]; + tv[*POI] |= right; } for (po_iterator<MachineBasicBlock*> POI = po_begin(&*mf.begin()), diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index 67120b8..cac9253 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -19,6 +19,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/STLExtras.h" @@ -305,9 +306,9 @@ void LiveInterval::removeRange(unsigned Start, unsigned End, VNInfo *VNI = valnos.back(); valnos.pop_back(); VNI->~VNInfo(); - } while (!valnos.empty() && valnos.back()->def == ~1U); + } while (!valnos.empty() && valnos.back()->isUnused()); } else { - ValNo->def = ~1U; + ValNo->setIsUnused(true); } } } @@ -353,9 +354,9 @@ void LiveInterval::removeValNo(VNInfo *ValNo) { VNInfo *VNI = valnos.back(); valnos.pop_back(); VNI->~VNInfo(); - } while (!valnos.empty() && valnos.back()->def == ~1U); + } while (!valnos.empty() && valnos.back()->isUnused()); } else { - ValNo->def = ~1U; + ValNo->setIsUnused(true); } } @@ -371,9 +372,8 @@ void LiveInterval::scaleNumbering(unsigned factor) { // Scale VNI info. for (vni_iterator VNI = vni_begin(), VNIE = vni_end(); VNI != VNIE; ++VNI) { VNInfo *vni = *VNI; - if (vni->def != ~0U && vni->def != ~1U) { - vni->def = InstrSlots::scale(vni->def, factor); - } + + vni->def = InstrSlots::scale(vni->def, factor); for (unsigned i = 0; i < vni->kills.size(); ++i) { if (vni->kills[i] != 0) @@ -421,13 +421,13 @@ VNInfo *LiveInterval::findDefinedVNInfo(unsigned DefIdxOrReg) const { return VNI; } - /// join - Join two live intervals (this, and other) together. This applies /// mappings to the value numbers in the LHS/RHS intervals as specified. If /// the intervals are not joinable, this aborts. void LiveInterval::join(LiveInterval &Other, const int *LHSValNoAssignments, const int *RHSValNoAssignments, - SmallVector<VNInfo*, 16> &NewVNInfo) { + SmallVector<VNInfo*, 16> &NewVNInfo, + MachineRegisterInfo *MRI) { // Determine if any of our live range values are mapped. This is uncommon, so // we want to avoid the interval scan if not. bool MustMapCurValNos = false; @@ -502,8 +502,18 @@ void LiveInterval::join(LiveInterval &Other, const int *LHSValNoAssignments, } weight += Other.weight; - if (Other.preference && !preference) - preference = Other.preference; + + // Update regalloc hint if currently there isn't one. + if (TargetRegisterInfo::isVirtualRegister(reg) && + TargetRegisterInfo::isVirtualRegister(Other.reg)) { + std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(reg); + if (Hint.first == 0 && Hint.second == 0) { + std::pair<unsigned, unsigned> OtherHint = + MRI->getRegAllocationHint(Other.reg); + if (OtherHint.first || OtherHint.second) + MRI->setRegAllocationHint(reg, OtherHint.first, OtherHint.second); + } + } } /// MergeRangesInAsValue - Merge all of the intervals in RHS into this live @@ -582,9 +592,9 @@ void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS, VNInfo *VNI = valnos.back(); valnos.pop_back(); VNI->~VNInfo(); - } while (!valnos.empty() && valnos.back()->def == ~1U); + } while (!valnos.empty() && valnos.back()->isUnused()); } else { - V1->def = ~1U; + V1->setIsUnused(true); } } } @@ -611,7 +621,7 @@ void LiveInterval::MergeInClobberRanges(const LiveInterval &Clobbers, else if (UnusedValNo) ClobberValNo = UnusedValNo; else { - UnusedValNo = ClobberValNo = getNextValue(~0U, 0, VNInfoAllocator); + UnusedValNo = ClobberValNo = getNextValue(0, 0, false, VNInfoAllocator); ValNoMaps.insert(std::make_pair(I->valno, ClobberValNo)); } @@ -664,7 +674,7 @@ void LiveInterval::MergeInClobberRange(unsigned Start, unsigned End, BumpPtrAllocator &VNInfoAllocator) { // Find a value # to use for the clobber ranges. If there is already a value# // for unknown values, use it. - VNInfo *ClobberValNo = getNextValue(~0U, 0, VNInfoAllocator); + VNInfo *ClobberValNo = getNextValue(0, 0, false, VNInfoAllocator); iterator IP = begin(); IP = std::upper_bound(IP, end(), Start); @@ -747,24 +757,26 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { VNInfo *VNI = valnos.back(); valnos.pop_back(); VNI->~VNInfo(); - } while (valnos.back()->def == ~1U); + } while (valnos.back()->isUnused()); } else { - V1->def = ~1U; + V1->setIsUnused(true); } return V2; } void LiveInterval::Copy(const LiveInterval &RHS, + MachineRegisterInfo *MRI, BumpPtrAllocator &VNInfoAllocator) { ranges.clear(); valnos.clear(); - preference = RHS.preference; + std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(RHS.reg); + MRI->setRegAllocationHint(reg, Hint.first, Hint.second); + weight = RHS.weight; for (unsigned i = 0, e = RHS.getNumValNums(); i != e; ++i) { const VNInfo *VNI = RHS.getValNumInfo(i); - VNInfo *NewVNI = getNextValue(~0U, 0, VNInfoAllocator); - copyValNumInfo(NewVNI, VNI); + createValueCopy(VNI, VNInfoAllocator); } for (unsigned i = 0, e = RHS.ranges.size(); i != e; ++i) { const LiveRange &LR = RHS.ranges[i]; @@ -816,22 +828,22 @@ void LiveInterval::print(std::ostream &OS, const VNInfo *vni = *i; if (vnum) OS << " "; OS << vnum << "@"; - if (vni->def == ~1U) { + if (vni->isUnused()) { OS << "x"; } else { - if (vni->def == ~0U) + if (!vni->isDefAccurate()) OS << "?"; else OS << vni->def; unsigned ee = vni->kills.size(); - if (ee || vni->hasPHIKill) { + if (ee || vni->hasPHIKill()) { OS << "-("; for (unsigned j = 0; j != ee; ++j) { OS << vni->kills[j]; if (j != ee-1) OS << " "; } - if (vni->hasPHIKill) { + if (vni->hasPHIKill()) { if (ee) OS << " "; OS << "phi"; diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index cf0a648..d6931df 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -199,7 +199,7 @@ void LiveIntervals::computeNumbering() { // Remap the VNInfo def index, which works the same as the // start indices above. VN's with special sentinel defs // don't need to be remapped. - if (vni->def != ~0U && vni->def != ~1U) { + if (vni->isDefAccurate() && !vni->isUnused()) { unsigned index = vni->def / InstrSlots::NUM; unsigned offset = vni->def % InstrSlots::NUM; if (offset == InstrSlots::LOAD) { @@ -447,7 +447,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)) CopyMI = mi; // Earlyclobbers move back one. - ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator); + ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator); assert(ValNo->id == 0 && "First value in interval is not 0?"); @@ -539,13 +539,15 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // The new value number (#1) is defined by the instruction we claimed // defined value #0. VNInfo *ValNo = interval.getNextValue(OldValNo->def, OldValNo->copy, + false, // update at * VNInfoAllocator); - + ValNo->setFlags(OldValNo->getFlags()); // * <- updating here + // Value#0 is now defined by the 2-addr instruction. OldValNo->def = RedefIndex; OldValNo->copy = 0; if (MO.isEarlyClobber()) - OldValNo->redefByEC = true; + OldValNo->setHasRedefByEC(true); // Add the new live interval which replaces the range for the input copy. LiveRange LR(DefIndex, RedefIndex, ValNo); @@ -577,12 +579,14 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, DOUT << " Removing [" << Start << "," << End << "] from: "; interval.print(DOUT, tri_); DOUT << "\n"; interval.removeRange(Start, End); - VNI->hasPHIKill = true; + VNI->setHasPHIKill(true); DOUT << " RESULT: "; interval.print(DOUT, tri_); // Replace the interval with one of a NEW value number. Note that this // value number isn't actually defined by an instruction, weird huh? :) - LiveRange LR(Start, End, interval.getNextValue(~0, 0, VNInfoAllocator)); + LiveRange LR(Start, End, + interval.getNextValue(mbb->getNumber(), 0, false, VNInfoAllocator)); + LR.valno->setIsPHIDef(true); DOUT << " replace range with " << LR; interval.addRange(LR); interval.addKill(LR.valno, End); @@ -604,13 +608,13 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, mi->getOpcode() == TargetInstrInfo::SUBREG_TO_REG || tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)) CopyMI = mi; - ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator); + ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator); unsigned killIndex = getMBBEndIdx(mbb) + 1; LiveRange LR(defIndex, killIndex, ValNo); interval.addRange(LR); interval.addKill(ValNo, killIndex); - ValNo->hasPHIKill = true; + ValNo->setHasPHIKill(true); DOUT << " +" << LR; } } @@ -692,9 +696,9 @@ exit: LiveInterval::iterator OldLR = interval.FindLiveRangeContaining(start); bool Extend = OldLR != interval.end(); VNInfo *ValNo = Extend - ? OldLR->valno : interval.getNextValue(start, CopyMI, VNInfoAllocator); + ? OldLR->valno : interval.getNextValue(start, CopyMI, true, VNInfoAllocator); if (MO.isEarlyClobber() && Extend) - ValNo->redefByEC = true; + ValNo->setHasRedefByEC(true); LiveRange LR(start, end, ValNo); interval.addRange(LR); interval.addKill(LR.valno, end); @@ -750,7 +754,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, DOUT << " killed"; end = getUseIndex(baseIndex) + 1; SeenDefUse = true; - goto exit; + break; } else if (mi->modifiesRegister(interval.reg, tri_)) { // Another instruction redefines the register before it is ever read. // Then the register is essentially dead at the instruction that defines @@ -759,7 +763,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, DOUT << " dead"; end = getDefIndex(start) + 1; SeenDefUse = true; - goto exit; + break; } baseIndex += InstrSlots::NUM; @@ -771,7 +775,6 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, } } -exit: // Live-in register might not be used at all. if (!SeenDefUse) { if (isAlias) { @@ -783,7 +786,11 @@ exit: } } - LiveRange LR(start, end, interval.getNextValue(~0U, 0, VNInfoAllocator)); + VNInfo *vni = + interval.getNextValue(MBB->getNumber(), 0, false, VNInfoAllocator); + vni->setIsPHIDef(true); + LiveRange LR(start, end, vni); + interval.addRange(LR); interval.addKill(LR.valno, end); DOUT << " +" << LR << '\n'; @@ -896,7 +903,7 @@ LiveInterval* LiveIntervals::createInterval(unsigned reg) { /// managing the allocated memory. LiveInterval* LiveIntervals::dupInterval(LiveInterval *li) { LiveInterval *NewLI = createInterval(li->reg); - NewLI->Copy(*li, getVNInfoAllocator()); + NewLI->Copy(*li, mri_, getVNInfoAllocator()); return NewLI; } @@ -1099,13 +1106,12 @@ bool LiveIntervals::isReMaterializable(const LiveInterval &li, for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end(); i != e; ++i) { const VNInfo *VNI = *i; - unsigned DefIdx = VNI->def; - if (DefIdx == ~1U) + if (VNI->isUnused()) continue; // Dead val#. // Is the def for the val# rematerializable? - if (DefIdx == ~0u) + if (!VNI->isDefAccurate()) return false; - MachineInstr *ReMatDefMI = getInstructionFromIndex(DefIdx); + MachineInstr *ReMatDefMI = getInstructionFromIndex(VNI->def); bool DefIsLoad = false; if (!ReMatDefMI || !isReMaterializable(li, VNI, ReMatDefMI, SpillIs, DefIsLoad)) @@ -1450,7 +1456,7 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, if (HasUse) { if (CreatedNewVReg) { LiveRange LR(getLoadIndex(index), getUseIndex(index)+1, - nI.getNextValue(~0U, 0, VNInfoAllocator)); + nI.getNextValue(0, 0, false, VNInfoAllocator)); DOUT << " +" << LR; nI.addRange(LR); } else { @@ -1464,7 +1470,7 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, } if (HasDef) { LiveRange LR(getDefIndex(index), getStoreIndex(index), - nI.getNextValue(~0U, 0, VNInfoAllocator)); + nI.getNextValue(0, 0, false, VNInfoAllocator)); DOUT << " +" << LR; nI.addRange(LR); } @@ -1840,14 +1846,14 @@ addIntervalsForSpillsFast(const LiveInterval &li, unsigned index = getInstructionIndex(MI); if (HasUse) { LiveRange LR(getLoadIndex(index), getUseIndex(index), - nI.getNextValue(~0U, 0, getVNInfoAllocator())); + nI.getNextValue(0, 0, false, getVNInfoAllocator())); DOUT << " +" << LR; nI.addRange(LR); vrm.addRestorePoint(NewVReg, MI); } if (HasDef) { LiveRange LR(getDefIndex(index), getStoreIndex(index), - nI.getNextValue(~0U, 0, getVNInfoAllocator())); + nI.getNextValue(0, 0, false, getVNInfoAllocator())); DOUT << " +" << LR; nI.addRange(LR); vrm.addSpillPoint(NewVReg, true, MI); @@ -1961,12 +1967,11 @@ addIntervalsForSpills(const LiveInterval &li, i != e; ++i) { const VNInfo *VNI = *i; unsigned VN = VNI->id; - unsigned DefIdx = VNI->def; - if (DefIdx == ~1U) + if (VNI->isUnused()) continue; // Dead val#. // Is the def for the val# rematerializable? - MachineInstr *ReMatDefMI = (DefIdx == ~0u) - ? 0 : getInstructionFromIndex(DefIdx); + MachineInstr *ReMatDefMI = VNI->isDefAccurate() + ? getInstructionFromIndex(VNI->def) : 0; bool dummy; if (ReMatDefMI && isReMaterializable(li, VNI, ReMatDefMI, SpillIs, dummy)) { // Remember how to remat the def of this val#. @@ -1977,7 +1982,7 @@ addIntervalsForSpills(const LiveInterval &li, ReMatDefs[VN] = Clone; bool CanDelete = true; - if (VNI->hasPHIKill) { + if (VNI->hasPHIKill()) { // A kill is a phi node, not all of its uses can be rematerialized. // It must not be deleted. CanDelete = false; @@ -2287,8 +2292,8 @@ LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg, LiveInterval& Interval = getOrCreateInterval(reg); VNInfo* VN = Interval.getNextValue( getInstructionIndex(startInst) + InstrSlots::DEF, - startInst, getVNInfoAllocator()); - VN->hasPHIKill = true; + startInst, true, getVNInfoAllocator()); + VN->setHasPHIKill(true); VN->kills.push_back(getMBBEndIdx(startInst->getParent())); LiveRange LR(getInstructionIndex(startInst) + InstrSlots::DEF, getMBBEndIdx(startInst->getParent()) + 1, VN); diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index 6228821..bd84508 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -359,12 +359,11 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { // That is, unless we are currently processing the last reference itself. LastRefOrPartRef->addRegisterDead(Reg, TRI, true); - /* Partial uses. Mark register def dead and add implicit def of - sub-registers which are used. - FIXME: LiveIntervalAnalysis can't handle this yet! - EAX<dead> = op AL<imp-def> - That is, EAX def is dead but AL def extends pass it. - Enable this after live interval analysis is fixed to improve codegen! + // Partial uses. Mark register def dead and add implicit def of + // sub-registers which are used. + // EAX<dead> = op AL<imp-def> + // That is, EAX def is dead but AL def extends pass it. + // Enable this after live interval analysis is fixed to improve codegen! else if (!PhysRegUse[Reg]) { PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true); for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); @@ -377,7 +376,7 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { PartUses.erase(*SS); } } - } */ + } else LastRefOrPartRef->addRegisterKilled(Reg, TRI, true); return true; diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index 4f5ab1f..544d83a 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -16,6 +16,7 @@ using namespace llvm; MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) { VRegInfo.reserve(256); + RegAllocHints.reserve(256); RegClass2VRegMap.resize(TRI.getNumRegClasses()+1); // RC ID starts at 1. UsedPhysRegs.resize(TRI.getNumRegs()); @@ -64,6 +65,7 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){ // Add a reg, but keep track of whether the vector reallocated or not. void *ArrayBase = VRegInfo.empty() ? 0 : &VRegInfo[0]; VRegInfo.push_back(std::make_pair(RegClass, (MachineOperand*)0)); + RegAllocHints.push_back(std::make_pair(0, 0)); if (!((&VRegInfo[0] == ArrayBase || VRegInfo.size() == 1))) // The vector reallocated, handle this now. diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp index 97d4728..ae60c86 100644 --- a/lib/CodeGen/PreAllocSplitting.cpp +++ b/lib/CodeGen/PreAllocSplitting.cpp @@ -343,7 +343,7 @@ int PreAllocSplitting::CreateSpillStackSlot(unsigned Reg, if (CurrSLI->hasAtLeastOneValue()) CurrSValNo = CurrSLI->getValNumInfo(0); else - CurrSValNo = CurrSLI->getNextValue(~0U, 0, LSs->getVNInfoAllocator()); + CurrSValNo = CurrSLI->getNextValue(0, 0, false, LSs->getVNInfoAllocator()); return SS; } @@ -637,8 +637,9 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us if (Phis.count(MBB)) return Phis[MBB]; unsigned StartIndex = LIs->getMBBStartIdx(MBB); - VNInfo *RetVNI = Phis[MBB] = LI->getNextValue(~0U, /*FIXME*/ 0, - LIs->getVNInfoAllocator()); + VNInfo *RetVNI = Phis[MBB] = + LI->getNextValue(0, /*FIXME*/ 0, false, LIs->getVNInfoAllocator()); + if (!IsIntraBlock) LiveOut[MBB] = RetVNI; // If there are no uses or defs between our starting point and the @@ -654,7 +655,7 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us IncomingVNs[*PI] = Incoming; } - if (MBB->pred_size() == 1 && !RetVNI->hasPHIKill) { + if (MBB->pred_size() == 1 && !RetVNI->hasPHIKill()) { VNInfo* OldVN = RetVNI; VNInfo* NewVN = IncomingVNs.begin()->second; VNInfo* MergedVN = LI->MergeValueNumberInto(OldVN, NewVN); @@ -678,7 +679,7 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us // VNInfo to represent the joined value. for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator I = IncomingVNs.begin(), E = IncomingVNs.end(); I != E; ++I) { - I->second->hasPHIKill = true; + I->second->setHasPHIKill(true); unsigned KillIndex = LIs->getMBBEndIdx(I->first); if (!LiveInterval::isKill(I->second, KillIndex)) LI->addKill(I->second, KillIndex); @@ -730,7 +731,9 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) { unsigned DefIdx = LIs->getInstructionIndex(&*DI); DefIdx = LiveIntervals::getDefIndex(DefIdx); - VNInfo* NewVN = LI->getNextValue(DefIdx, 0, Alloc); + assert(DI->getOpcode() != TargetInstrInfo::PHI && + "Following NewVN isPHIDef flag incorrect. Fix me!"); + VNInfo* NewVN = LI->getNextValue(DefIdx, 0, true, Alloc); // If the def is a move, set the copy field. unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; @@ -793,7 +796,7 @@ void PreAllocSplitting::RenumberValno(VNInfo* VN) { // Bail out if we ever encounter a valno that has a PHI kill. We can't // renumber these. - if (OldVN->hasPHIKill) return; + if (OldVN->hasPHIKill()) return; VNsToCopy.push_back(OldVN); @@ -823,9 +826,7 @@ void PreAllocSplitting::RenumberValno(VNInfo* VN) { VNInfo* OldVN = *OI; // Copy the valno over - VNInfo* NewVN = NewLI.getNextValue(OldVN->def, OldVN->copy, - LIs->getVNInfoAllocator()); - NewLI.copyValNumInfo(NewVN, OldVN); + VNInfo* NewVN = NewLI.createValueCopy(OldVN, LIs->getVNInfoAllocator()); NewLI.MergeValueInAsValue(*CurrLI, OldVN, NewVN); // Remove the valno from the old interval @@ -873,7 +874,7 @@ bool PreAllocSplitting::Rematerialize(unsigned vreg, VNInfo* ValNo, MachineBasicBlock::iterator KillPt = BarrierMBB->end(); unsigned KillIdx = 0; - if (ValNo->def == ~0U || DefMI->getParent() == BarrierMBB) + if (!ValNo->isDefAccurate() || DefMI->getParent() == BarrierMBB) KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB, KillIdx); else KillPt = findNextEmptySlot(DefMI->getParent(), DefMI, KillIdx); @@ -942,7 +943,7 @@ MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg, if (CurrSLI->hasAtLeastOneValue()) CurrSValNo = CurrSLI->getValNumInfo(0); else - CurrSValNo = CurrSLI->getNextValue(~0U, 0, LSs->getVNInfoAllocator()); + CurrSValNo = CurrSLI->getNextValue(0, 0, false, LSs->getVNInfoAllocator()); } return FMI; @@ -1032,13 +1033,13 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { CurrLI->FindLiveRangeContaining(LIs->getUseIndex(BarrierIdx)); VNInfo *ValNo = LR->valno; - if (ValNo->def == ~1U) { + if (ValNo->isUnused()) { // Defined by a dead def? How can this be? assert(0 && "Val# is defined by a dead def?"); abort(); } - MachineInstr *DefMI = (ValNo->def != ~0U) + MachineInstr *DefMI = ValNo->isDefAccurate() ? LIs->getInstructionFromIndex(ValNo->def) : NULL; // If this would create a new join point, do not split. @@ -1072,8 +1073,8 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { unsigned SpillIndex = 0; MachineInstr *SpillMI = NULL; int SS = -1; - if (ValNo->def == ~0U) { - // If it's defined by a phi, we must split just before the barrier. + if (!ValNo->isDefAccurate()) { + // If we don't know where the def is we must split just before the barrier. if ((SpillMI = FoldSpill(LI->reg, RC, 0, Barrier, BarrierMBB, SS, RefsInMBB))) { SpillIndex = LIs->getInstructionIndex(SpillMI); @@ -1254,17 +1255,16 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) { // We don't currently try to handle definitions with PHI kills, because // it would involve processing more than one VNInfo at once. - if (CurrVN->hasPHIKill) continue; + if (CurrVN->hasPHIKill()) continue; // We also don't try to handle the results of PHI joins, since there's // no defining instruction to analyze. - unsigned DefIdx = CurrVN->def; - if (DefIdx == ~0U || DefIdx == ~1U) continue; + if (!CurrVN->isDefAccurate() || CurrVN->isUnused()) continue; // We're only interested in eliminating cruft introduced by the splitter, // is of the form load-use or load-use-store. First, check that the // definition is a load, and remember what stack slot we loaded it from. - MachineInstr* DefMI = LIs->getInstructionFromIndex(DefIdx); + MachineInstr* DefMI = LIs->getInstructionFromIndex(CurrVN->def); int FrameIndex; if (!TII->isLoadFromStackSlot(DefMI, FrameIndex)) continue; @@ -1383,7 +1383,7 @@ bool PreAllocSplitting::createsNewJoin(LiveRange* LR, if (DefMBB == BarrierMBB) return false; - if (LR->valno->hasPHIKill) + if (LR->valno->hasPHIKill()) return false; unsigned MBBEnd = LIs->getMBBEndIdx(BarrierMBB); diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp index 804fae5..41a42fd 100644 --- a/lib/CodeGen/RegAllocLinearScan.cpp +++ b/lib/CodeGen/RegAllocLinearScan.cpp @@ -281,7 +281,8 @@ namespace { /// getFreePhysReg - return a free physical register for this virtual /// register interval if we have one, otherwise return 0. unsigned getFreePhysReg(LiveInterval* cur); - unsigned getFreePhysReg(const TargetRegisterClass *RC, + unsigned getFreePhysReg(LiveInterval* cur, + const TargetRegisterClass *RC, unsigned MaxInactiveCount, SmallVector<unsigned, 256> &inactiveCounts, bool SkipDGRegs); @@ -352,11 +353,12 @@ void RALinScan::ComputeRelatedRegClasses() { /// different register classes or because the coalescer was overly /// conservative. unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) { - if ((cur.preference && cur.preference == Reg) || !cur.containsOneValue()) + unsigned Preference = vrm_->getRegAllocPref(cur.reg); + if ((Preference && Preference == Reg) || !cur.containsOneValue()) return Reg; VNInfo *vni = cur.begin()->valno; - if (!vni->def || vni->def == ~1U || vni->def == ~0U) + if (!vni->def || vni->isUnused() || !vni->isDefAccurate()) return Reg; MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def); unsigned SrcReg, DstReg, SrcSubReg, DstSubReg, PhysReg; @@ -584,7 +586,7 @@ void RALinScan::linearScan() // register allocator had to spill other registers in its register class. if (ls_->getNumIntervals() == 0) return; - if (!vrm_->FindUnusedRegisters(tri_, li_)) + if (!vrm_->FindUnusedRegisters(li_)) return; } @@ -743,7 +745,7 @@ static void addStackInterval(LiveInterval *cur, LiveStacks *ls_, if (SI.hasAtLeastOneValue()) VNI = SI.getValNumInfo(0); else - VNI = SI.getNextValue(~0U, 0, ls_->getVNInfoAllocator()); + VNI = SI.getNextValue(0, 0, false, ls_->getVNInfoAllocator()); LiveInterval &RI = li_->getInterval(cur->reg); // FIXME: This may be overly conservative. @@ -897,7 +899,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) // This is an implicitly defined live interval, just assign any register. const TargetRegisterClass *RC = mri_->getRegClass(cur->reg); if (cur->empty()) { - unsigned physReg = cur->preference; + unsigned physReg = vrm_->getRegAllocPref(cur->reg); if (!physReg) physReg = *RC->allocation_order_begin(*mf_); DOUT << tri_->getName(physReg) << '\n'; @@ -917,9 +919,9 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) // register class, then we should try to assign it the same register. // This can happen when the move is from a larger register class to a smaller // one, e.g. X86::mov32to32_. These move instructions are not coalescable. - if (!cur->preference && cur->hasAtLeastOneValue()) { + if (!vrm_->getRegAllocPref(cur->reg) && cur->hasAtLeastOneValue()) { VNInfo *vni = cur->begin()->valno; - if (vni->def && vni->def != ~1U && vni->def != ~0U) { + if (vni->def && !vni->isUnused() && vni->isDefAccurate()) { MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def); unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; if (CopyMI && @@ -935,7 +937,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) if (DstSubReg) Reg = tri_->getMatchingSuperReg(Reg, DstSubReg, RC); if (Reg && allocatableRegs_[Reg] && RC->contains(Reg)) - cur->preference = Reg; + mri_->setRegAllocationHint(cur->reg, 0, Reg); } } } @@ -1044,7 +1046,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) { // "Downgrade" physReg to try to keep physReg from being allocated until // the next reload from the same SS is allocated. - NextReloadLI->preference = physReg; + mri_->setRegAllocationHint(NextReloadLI->reg, 0, physReg); DowngradeRegister(cur, physReg); } return; @@ -1071,7 +1073,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) // Find a register to spill. float minWeight = HUGE_VALF; - unsigned minReg = 0; /*cur->preference*/; // Try the pref register first. + unsigned minReg = 0; bool Found = false; std::vector<std::pair<unsigned,float> > RegsWeights; @@ -1290,7 +1292,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) // It interval has a preference, it must be defined by a copy. Clear the // preference now since the source interval allocation may have been // undone as well. - i->preference = 0; + mri_->setRegAllocationHint(i->reg, 0, 0); else { UpgradeRegister(ii->second); } @@ -1346,15 +1348,23 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) } } -unsigned RALinScan::getFreePhysReg(const TargetRegisterClass *RC, +unsigned RALinScan::getFreePhysReg(LiveInterval* cur, + const TargetRegisterClass *RC, unsigned MaxInactiveCount, SmallVector<unsigned, 256> &inactiveCounts, bool SkipDGRegs) { unsigned FreeReg = 0; unsigned FreeRegInactiveCount = 0; - TargetRegisterClass::iterator I = RC->allocation_order_begin(*mf_); - TargetRegisterClass::iterator E = RC->allocation_order_end(*mf_); + std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(cur->reg); + // Resolve second part of the hint (if possible) given the current allocation. + unsigned physReg = Hint.second; + if (physReg && + TargetRegisterInfo::isVirtualRegister(physReg) && vrm_->hasPhys(physReg)) + physReg = vrm_->getPhys(physReg); + + TargetRegisterClass::iterator I, E; + tie(I, E) = tri_->getAllocationOrder(RC, Hint.first, physReg, *mf_); assert(I != E && "No allocatable register in this register class!"); // Scan for the first available register. @@ -1377,7 +1387,7 @@ unsigned RALinScan::getFreePhysReg(const TargetRegisterClass *RC, // return this register. if (FreeReg == 0 || FreeRegInactiveCount == MaxInactiveCount) return FreeReg; - + // Continue scanning the registers, looking for the one with the highest // inactive count. Alkis found that this reduced register pressure very // slightly on X86 (in rev 1.94 of this file), though this should probably be @@ -1428,20 +1438,21 @@ unsigned RALinScan::getFreePhysReg(LiveInterval *cur) { // If copy coalescer has assigned a "preferred" register, check if it's // available first. - if (cur->preference) { - DOUT << "(preferred: " << tri_->getName(cur->preference) << ") "; - if (isRegAvail(cur->preference) && - RC->contains(cur->preference)) - return cur->preference; + unsigned Preference = vrm_->getRegAllocPref(cur->reg); + if (Preference) { + DOUT << "(preferred: " << tri_->getName(Preference) << ") "; + if (isRegAvail(Preference) && + RC->contains(Preference)) + return Preference; } if (!DowngradedRegs.empty()) { - unsigned FreeReg = getFreePhysReg(RC, MaxInactiveCount, inactiveCounts, + unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, true); if (FreeReg) return FreeReg; } - return getFreePhysReg(RC, MaxInactiveCount, inactiveCounts, false); + return getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, false); } FunctionPass* llvm::createLinearScanRegisterAllocator() { diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 61450a7..89e2c59 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -651,7 +651,7 @@ void PBQPRegAlloc::addStackInterval(const LiveInterval *spilled, if (stackInterval.getNumValNums() != 0) vni = stackInterval.getValNumInfo(0); else - vni = stackInterval.getNextValue(-0U, 0, lss->getVNInfoAllocator()); + vni = stackInterval.getNextValue(0, 0, false, lss->getVNInfoAllocator()); LiveInterval &rhsInterval = lis->getInterval(spilled->reg); stackInterval.MergeRangesInAsValue(rhsInterval, vni); @@ -733,8 +733,7 @@ void PBQPRegAlloc::finalizeAlloc() const { itr != end; ++itr) { LiveInterval *li = *itr; - unsigned physReg = li->preference; - + unsigned physReg = vrm->getRegAllocPref(li->reg); if (physReg == 0) { const TargetRegisterClass *liRC = mri->getRegClass(li->reg); physReg = *liRC->allocation_order_begin(*mf); diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 4a7dbeb..24fccf0 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -47,7 +47,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/DebugLoc.h" #include "llvm/CodeGen/DwarfWriter.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/Target/TargetData.h" @@ -361,7 +360,7 @@ bool FastISel::SelectCall(User *I) { // Returned ID is 0 if this is unbalanced "end of inlined // scope". This could happen if optimizer eats dbg intrinsics // or "beginning of inlined scope" is not recoginized due to - // missing location info. In such cases, do ignore this region.end. + // missing location info. In such cases, ignore this region.end. BuildMI(MBB, DL, II).addImm(ID); } else { const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL); diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index f3c2833..1bb8090 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2768,6 +2768,53 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, ISD::SETULT : ISD::SETUGT)); break; } + case ISD::UMULO: + case ISD::SMULO: { + MVT VT = Node->getValueType(0); + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + SDValue BottomHalf; + SDValue TopHalf; + static unsigned Ops[2][3] = + { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND }, + { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }}; + bool isSigned = Node->getOpcode() == ISD::SMULO; + if (TLI.isOperationLegalOrCustom(Ops[isSigned][0], VT)) { + BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS); + TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS); + } else if (TLI.isOperationLegalOrCustom(Ops[isSigned][1], VT)) { + BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS, + RHS); + TopHalf = BottomHalf.getValue(1); + } else if (TLI.isTypeLegal(MVT::getIntegerVT(VT.getSizeInBits() * 2))) { + MVT WideVT = MVT::getIntegerVT(VT.getSizeInBits() * 2); + LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS); + RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); + Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS); + BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1, + DAG.getIntPtrConstant(0)); + TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1, + DAG.getIntPtrConstant(1)); + } else { + // FIXME: We should be able to fall back to a libcall with an illegal + // type in some cases cases. + // Also, we can fall back to a division in some cases, but that's a big + // performance hit in the general case. + assert(0 && "Don't know how to expand this operation yet!"); + } + if (isSigned) { + Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy()); + Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, Tmp1); + TopHalf = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), TopHalf, Tmp1, + ISD::SETNE); + } else { + TopHalf = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), TopHalf, + DAG.getConstant(0, VT), ISD::SETNE); + } + Results.push_back(BottomHalf); + Results.push_back(TopHalf); + break; + } case ISD::BUILD_PAIR: { MVT PairTy = Node->getValueType(0); Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, PairTy, Node->getOperand(0)); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index b7d7818..6e5adee 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -95,14 +95,13 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo, if (InVT.isVector() && OutVT.isInteger()) { // Handle cases like i64 = BIT_CONVERT v1i64 on x86, where the operand // is legal but the result is not. - MVT NVT = MVT::getVectorVT(TLI.getTypeToTransformTo(OutVT), 2); + MVT NVT = MVT::getVectorVT(NOutVT, 2); if (isTypeLegal(NVT)) { SDValue CastInOp = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, InOp); - MVT EltNVT = NVT.getVectorElementType(); - Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltNVT, CastInOp, + Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp, DAG.getIntPtrConstant(0)); - Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltNVT, CastInOp, + Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp, DAG.getIntPtrConstant(1)); if (TLI.isBigEndian()) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp index 93750d6..48ebd0f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp @@ -5317,8 +5317,12 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) { if ((OpFlag & 7) == 2 /*REGDEF*/ || (OpFlag & 7) == 6 /* EARLYCLOBBER REGDEF */) { // Add (OpFlag&0xffff)>>3 registers to MatchedRegs. - assert(!OpInfo.isIndirect && - "Don't know how to handle tied indirect register inputs yet!"); + if (OpInfo.isIndirect) { + cerr << "llvm: error: " + "Don't know how to handle tied indirect " + "register inputs yet!\n"; + exit(1); + } RegsForValue MatchedRegs; MatchedRegs.TLI = &TLI; MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType()); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index ab4cd51..a771d46 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -171,6 +171,8 @@ static void InitLibcallNames(const char **Names) { Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2"; Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2"; Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2"; + Names[RTLIB::FPTOSINT_F32_I8] = "__fixsfi8"; + Names[RTLIB::FPTOSINT_F32_I16] = "__fixsfi16"; Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi"; Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi"; Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti"; @@ -183,6 +185,8 @@ static void InitLibcallNames(const char **Names) { Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi"; Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi"; Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti"; + Names[RTLIB::FPTOUINT_F32_I8] = "__fixunssfi8"; + Names[RTLIB::FPTOUINT_F32_I16] = "__fixunssfi16"; Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi"; Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi"; Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti"; @@ -271,6 +275,10 @@ RTLIB::Libcall RTLIB::getFPROUND(MVT OpVT, MVT RetVT) { /// UNKNOWN_LIBCALL if there is none. RTLIB::Libcall RTLIB::getFPTOSINT(MVT OpVT, MVT RetVT) { if (OpVT == MVT::f32) { + if (RetVT == MVT::i8) + return FPTOSINT_F32_I8; + if (RetVT == MVT::i16) + return FPTOSINT_F32_I16; if (RetVT == MVT::i32) return FPTOSINT_F32_I32; if (RetVT == MVT::i64) @@ -306,6 +314,10 @@ RTLIB::Libcall RTLIB::getFPTOSINT(MVT OpVT, MVT RetVT) { /// UNKNOWN_LIBCALL if there is none. RTLIB::Libcall RTLIB::getFPTOUINT(MVT OpVT, MVT RetVT) { if (OpVT == MVT::f32) { + if (RetVT == MVT::i8) + return FPTOUINT_F32_I8; + if (RetVT == MVT::i16) + return FPTOUINT_F32_I16; if (RetVT == MVT::i32) return FPTOUINT_F32_I32; if (RetVT == MVT::i64) @@ -2584,8 +2596,12 @@ bool TargetLowering::CheckTailCallReturnConstraints(CallSDNode *TheCall, // Check that operand of the RET node sources from the CALL node. The RET node // has at least two operands. Operand 0 holds the chain. Operand 1 holds the // value. + // Also we need to check that there is no code in between the call and the + // return. Hence we also check that the incomming chain to the return sources + // from the outgoing chain of the call. if (NumOps > 1 && - IgnoreHarmlessInstructions(Ret.getOperand(1)) == SDValue(TheCall,0)) + IgnoreHarmlessInstructions(Ret.getOperand(1)) == SDValue(TheCall,0) && + Ret.getOperand(0) == SDValue(TheCall, TheCall->getNumValues()-1)) return true; // void return: The RET node has the chain result value of the CALL node as // input. diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp index 2bc234f..2034805 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -141,7 +141,7 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, // The live interval of ECX is represented as this: // %reg20,inf = [46,47:1)[174,230:0) 0@174-(230) 1@46-(47) // The coalescer has no idea there was a def in the middle of [174,230]. - if (AValNo->redefByEC) + if (AValNo->hasRedefByEC()) return false; // If AValNo is defined as a copy from IntB, we can potentially process this. @@ -203,7 +203,8 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) { LiveInterval &SRLI = li_->getInterval(*SR); SRLI.addRange(LiveRange(FillerStart, FillerEnd, - SRLI.getNextValue(FillerStart, 0, li_->getVNInfoAllocator()))); + SRLI.getNextValue(FillerStart, 0, true, + li_->getVNInfoAllocator()))); } } @@ -304,8 +305,10 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, assert(ALR != IntA.end() && "Live range not found!"); VNInfo *AValNo = ALR->valno; // If other defs can reach uses of this def, then it's not safe to perform - // the optimization. - if (AValNo->def == ~0U || AValNo->def == ~1U || AValNo->hasPHIKill) + // the optimization. FIXME: Do isPHIDef and isDefAccurate both need to be + // tested? + if (AValNo->isPHIDef() || !AValNo->isDefAccurate() || + AValNo->isUnused() || AValNo->hasPHIKill()) return false; MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def); const TargetInstrDesc &TID = DefMI->getDesc(); @@ -351,7 +354,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false); NewMI->getOperand(OpIdx).setIsKill(); - bool BHasPHIKill = BValNo->hasPHIKill; + bool BHasPHIKill = BValNo->hasPHIKill(); SmallVector<VNInfo*, 4> BDeadValNos; SmallVector<unsigned, 4> BKills; std::map<unsigned, unsigned> BExtend; @@ -403,7 +406,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, // extended to the end of the existing live range defined by the copy. unsigned DefIdx = li_->getDefIndex(UseIdx); const LiveRange *DLR = IntB.getLiveRangeContaining(DefIdx); - BHasPHIKill |= DLR->valno->hasPHIKill; + BHasPHIKill |= DLR->valno->hasPHIKill(); assert(DLR->valno->def == DefIdx); BDeadValNos.push_back(DLR->valno); BExtend[DLR->start] = DLR->end; @@ -462,7 +465,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, } } IntB.addKills(ValNo, BKills); - ValNo->hasPHIKill = BHasPHIKill; + ValNo->setHasPHIKill(BHasPHIKill); DOUT << " result = "; IntB.print(DOUT, tri_); DOUT << "\n"; @@ -578,8 +581,10 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, assert(SrcLR != SrcInt.end() && "Live range not found!"); VNInfo *ValNo = SrcLR->valno; // If other defs can reach uses of this def, then it's not safe to perform - // the optimization. - if (ValNo->def == ~0U || ValNo->def == ~1U || ValNo->hasPHIKill) + // the optimization. FIXME: Do isPHIDef and isDefAccurate both need to be + // tested? + if (ValNo->isPHIDef() || !ValNo->isDefAccurate() || + ValNo->isUnused() || ValNo->hasPHIKill()) return false; MachineInstr *DefMI = li_->getInstructionFromIndex(ValNo->def); const TargetInstrDesc &TID = DefMI->getDesc(); @@ -616,19 +621,17 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, } MachineBasicBlock::iterator MII = next(MachineBasicBlock::iterator(CopyMI)); - CopyMI->removeFromParent(); tii_->reMaterialize(*MBB, MII, DstReg, DefMI); MachineInstr *NewMI = prior(MII); if (checkForDeadDef) { - // PR4090 fix: Trim interval failed because there was no use of the - // source interval in this MBB. If the def is in this MBB too then we - // should mark it dead: - if (DefMI->getParent() == MBB) { - DefMI->addRegisterDead(SrcInt.reg, tri_); - SrcLR->end = SrcLR->start + 1; - } - + // PR4090 fix: Trim interval failed because there was no use of the + // source interval in this MBB. If the def is in this MBB too then we + // should mark it dead: + if (DefMI->getParent() == MBB) { + DefMI->addRegisterDead(SrcInt.reg, tri_); + SrcLR->end = SrcLR->start + 1; + } } // CopyMI may have implicit operands, transfer them over to the newly @@ -647,7 +650,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, } li_->ReplaceMachineInstrInMaps(CopyMI, NewMI); - MBB->getParent()->DeleteMachineInstr(CopyMI); + CopyMI->eraseFromParent(); ReMatCopies.insert(CopyMI); ReMatDefs.insert(DefMI); ++NumReMats; @@ -673,7 +676,7 @@ bool SimpleRegisterCoalescing::isBackEdgeCopy(MachineInstr *CopyMI, return false; unsigned KillIdx = li_->getMBBEndIdx(MBB) + 1; if (DstLR->valno->kills.size() == 1 && - DstLR->valno->kills[0] == KillIdx && DstLR->valno->hasPHIKill) + DstLR->valno->kills[0] == KillIdx && DstLR->valno->hasPHIKill()) return true; return false; } @@ -937,7 +940,7 @@ bool SimpleRegisterCoalescing::CanCoalesceWithImpDef(MachineInstr *CopyMI, LiveInterval::iterator LR = li.FindLiveRangeContaining(CopyIdx); if (LR == li.end()) return false; - if (LR->valno->hasPHIKill) + if (LR->valno->hasPHIKill()) return false; if (LR->valno->def != CopyIdx) return false; @@ -965,11 +968,11 @@ bool SimpleRegisterCoalescing::CanCoalesceWithImpDef(MachineInstr *CopyMI, } -/// RemoveCopiesFromValNo - The specified value# is defined by an implicit -/// def and it is being removed. Turn all copies from this value# into -/// identity copies so they will be removed. -void SimpleRegisterCoalescing::RemoveCopiesFromValNo(LiveInterval &li, - VNInfo *VNI) { +/// TurnCopiesFromValNoToImpDefs - The specified value# is defined by an +/// implicit_def and it is being removed. Turn all copies from this value# +/// into implicit_defs. +void SimpleRegisterCoalescing::TurnCopiesFromValNoToImpDefs(LiveInterval &li, + VNInfo *VNI) { SmallVector<MachineInstr*, 4> ImpDefs; MachineOperand *LastUse = NULL; unsigned LastUseIdx = li_->getUseIndex(VNI->def); @@ -979,9 +982,8 @@ void SimpleRegisterCoalescing::RemoveCopiesFromValNo(LiveInterval &li, MachineInstr *MI = &*RI; ++RI; if (MO->isDef()) { - if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) { + if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) ImpDefs.push_back(MI); - } continue; } if (JoinedCopies.count(MI)) @@ -994,13 +996,18 @@ void SimpleRegisterCoalescing::RemoveCopiesFromValNo(LiveInterval &li, unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && SrcReg == li.reg) { - // Each use MI may have multiple uses of this register. Change them all. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.getReg() == li.reg) - MO.setReg(DstReg); - } - JoinedCopies.insert(MI); + // Change it to an implicit_def. + MI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF)); + for (int i = MI->getNumOperands() - 1, e = 0; i > e; --i) + MI->RemoveOperand(i); + // It's no longer a copy, update the valno it defines. + unsigned DefIdx = li_->getDefIndex(UseIdx); + LiveInterval &DstInt = li_->getInterval(DstReg); + LiveInterval::iterator DLR = DstInt.FindLiveRangeContaining(DefIdx); + assert(DLR != DstInt.end() && "Live range not found!"); + assert(DLR->valno->copy == MI); + DLR->valno->copy = NULL; + ReMatCopies.insert(MI); } else if (UseIdx > LastUseIdx) { LastUseIdx = UseIdx; LastUse = MO; @@ -1265,6 +1272,17 @@ SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg, return true; } +/// getRegAllocPreference - Return register allocation preference register. +/// +static unsigned getRegAllocPreference(unsigned Reg, MachineFunction &MF, + MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI) { + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return 0; + std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(Reg); + return TRI->ResolveRegAllocHint(Hint.first, Hint.second, MF); +} + /// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, /// which are the src/dst of the copy instruction CopyMI. This returns true /// if the copy was successfully coalesced away. If it is not currently @@ -1566,7 +1584,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { if (PhysJoinTweak) { if (SrcIsPhys) { if (!isWinToJoinVRWithSrcPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) { - DstInt.preference = SrcReg; + mri_->setRegAllocationHint(DstInt.reg, 0, SrcReg); ++numAborts; DOUT << "\tMay tie down a physical register, abort!\n"; Again = true; // May be possible to coalesce later. @@ -1574,7 +1592,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { } } else { if (!isWinToJoinVRWithDstPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) { - SrcInt.preference = DstReg; + mri_->setRegAllocationHint(SrcInt.reg, 0, DstReg); ++numAborts; DOUT << "\tMay tie down a physical register, abort!\n"; Again = true; // May be possible to coalesce later. @@ -1598,7 +1616,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { if (Length > Threshold && (((float)std::distance(mri_->use_begin(JoinVReg), mri_->use_end()) / Length) < Ratio)) { - JoinVInt.preference = JoinPReg; + mri_->setRegAllocationHint(JoinVInt.reg, 0, JoinPReg); ++numAborts; DOUT << "\tMay tie down a physical register, abort!\n"; Again = true; // May be possible to coalesce later. @@ -1669,9 +1687,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { E = SavedLI->vni_end(); I != E; ++I) { const VNInfo *ValNo = *I; VNInfo *NewValNo = RealInt.getNextValue(ValNo->def, ValNo->copy, + false, // updated at * li_->getVNInfoAllocator()); - NewValNo->hasPHIKill = ValNo->hasPHIKill; - NewValNo->redefByEC = ValNo->redefByEC; + NewValNo->setFlags(ValNo->getFlags()); // * updated here. RealInt.addKills(NewValNo, ValNo->kills); RealInt.MergeValueInAsValue(*SavedLI, ValNo, NewValNo); } @@ -1691,7 +1709,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { !SrcIsPhys && !DstIsPhys) { if ((isExtSubReg && !Swapped) || ((isInsSubReg || isSubRegToReg) && Swapped)) { - ResSrcInt->Copy(*ResDstInt, li_->getVNInfoAllocator()); + ResSrcInt->Copy(*ResDstInt, mri_, li_->getVNInfoAllocator()); std::swap(SrcReg, DstReg); std::swap(ResSrcInt, ResDstInt); } @@ -1710,7 +1728,8 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { for (LiveInterval::const_vni_iterator i = ResSrcInt->vni_begin(), e = ResSrcInt->vni_end(); i != e; ++i) { const VNInfo *vni = *i; - if (!vni->def || vni->def == ~1U || vni->def == ~0U) + // FIXME: Do isPHIDef and isDefAccurate both need to be tested? + if (!vni->def || vni->isUnused() || vni->isPHIDef() || !vni->isDefAccurate()) continue; MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def); unsigned NewSrcReg, NewDstReg, NewSrcSubIdx, NewDstSubIdx; @@ -1747,6 +1766,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // being merged. li_->removeInterval(SrcReg); + // Update regalloc hint. + tri_->UpdateRegAllocHint(SrcReg, DstReg, *mf_); + // Manually deleted the live interval copy. if (SavedLI) { SavedLI->clear(); @@ -1762,7 +1784,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { VNInfo *ImpVal = LR->valno; assert(ImpVal->def == CopyIdx); unsigned NextDef = LR->end; - RemoveCopiesFromValNo(*ResDstInt, ImpVal); + TurnCopiesFromValNoToImpDefs(*ResDstInt, ImpVal); ResDstInt->removeValNo(ImpVal); LR = ResDstInt->FindLiveRangeContaining(NextDef); if (LR != ResDstInt->end() && LR->valno->def == NextDef) { @@ -1778,11 +1800,12 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // If resulting interval has a preference that no longer fits because of subreg // coalescing, just clear the preference. - if (ResDstInt->preference && (isExtSubReg || isInsSubReg || isSubRegToReg) && + unsigned Preference = getRegAllocPreference(ResDstInt->reg, *mf_, mri_, tri_); + if (Preference && (isExtSubReg || isInsSubReg || isSubRegToReg) && TargetRegisterInfo::isVirtualRegister(ResDstInt->reg)) { const TargetRegisterClass *RC = mri_->getRegClass(ResDstInt->reg); - if (!RC->contains(ResDstInt->preference)) - ResDstInt->preference = 0; + if (!RC->contains(Preference)) + mri_->setRegAllocationHint(ResDstInt->reg, 0, 0); } DOUT << "\n\t\tJoined. Result = "; ResDstInt->print(DOUT, tri_); @@ -1856,7 +1879,8 @@ bool SimpleRegisterCoalescing::RangeIsDefinedByCopyFromReg(LiveInterval &li, unsigned SrcReg = li_->getVNInfoSourceReg(LR->valno); if (SrcReg == Reg) return true; - if (LR->valno->def == ~0U && + // FIXME: Do isPHIDef and isDefAccurate both need to be tested? + if ((LR->valno->isPHIDef() || !LR->valno->isDefAccurate()) && TargetRegisterInfo::isPhysicalRegister(li.reg) && *tri_->getSuperRegisters(li.reg)) { // It's a sub-register live interval, we may not have precise information. @@ -2025,12 +2049,20 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){ // Okay, the final step is to loop over the RHS live intervals, adding them to // the LHS. - LHSValNo->hasPHIKill |= VNI->hasPHIKill; + if (VNI->hasPHIKill()) + LHSValNo->setHasPHIKill(true); LHS.addKills(LHSValNo, VNI->kills); LHS.MergeRangesInAsValue(RHS, LHSValNo); LHS.weight += RHS.weight; - if (RHS.preference && !LHS.preference) - LHS.preference = RHS.preference; + + // Update regalloc hint if both are virtual registers. + if (TargetRegisterInfo::isVirtualRegister(LHS.reg) && + TargetRegisterInfo::isVirtualRegister(RHS.reg)) { + std::pair<unsigned, unsigned> RHSPref = mri_->getRegAllocationHint(RHS.reg); + std::pair<unsigned, unsigned> LHSPref = mri_->getRegAllocationHint(LHS.reg); + if (RHSPref != LHSPref) + mri_->setRegAllocationHint(LHS.reg, RHSPref.first, RHSPref.second); + } // Update the liveintervals of sub-registers. if (TargetRegisterInfo::isPhysicalRegister(LHS.reg)) @@ -2185,7 +2217,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); i != e; ++i) { VNInfo *VNI = *i; - if (VNI->def == ~1U || VNI->copy == 0) // Src not defined by a copy? + if (VNI->isUnused() || VNI->copy == 0) // Src not defined by a copy? continue; // DstReg is known to be a register in the LHS interval. If the src is @@ -2202,7 +2234,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); i != e; ++i) { VNInfo *VNI = *i; - if (VNI->def == ~1U || VNI->copy == 0) // Src not defined by a copy? + if (VNI->isUnused() || VNI->copy == 0) // Src not defined by a copy? continue; // DstReg is known to be a register in the RHS interval. If the src is @@ -2222,7 +2254,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, i != e; ++i) { VNInfo *VNI = *i; unsigned VN = VNI->id; - if (LHSValNoAssignments[VN] >= 0 || VNI->def == ~1U) + if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused()) continue; ComputeUltimateVN(VNI, NewVNInfo, LHSValsDefinedFromRHS, RHSValsDefinedFromLHS, @@ -2232,7 +2264,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, i != e; ++i) { VNInfo *VNI = *i; unsigned VN = VNI->id; - if (RHSValNoAssignments[VN] >= 0 || VNI->def == ~1U) + if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused()) continue; // If this value number isn't a copy from the LHS, it's a new number. if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) { @@ -2296,7 +2328,8 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, VNInfo *VNI = I->first; unsigned LHSValID = LHSValNoAssignments[VNI->id]; LiveInterval::removeKill(NewVNInfo[LHSValID], VNI->def); - NewVNInfo[LHSValID]->hasPHIKill |= VNI->hasPHIKill; + if (VNI->hasPHIKill()) + NewVNInfo[LHSValID]->setHasPHIKill(true); RHS.addKills(NewVNInfo[LHSValID], VNI->kills); } @@ -2306,7 +2339,8 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, VNInfo *VNI = I->first; unsigned RHSValID = RHSValNoAssignments[VNI->id]; LiveInterval::removeKill(NewVNInfo[RHSValID], VNI->def); - NewVNInfo[RHSValID]->hasPHIKill |= VNI->hasPHIKill; + if (VNI->hasPHIKill()) + NewVNInfo[RHSValID]->setHasPHIKill(true); LHS.addKills(NewVNInfo[RHSValID], VNI->kills); } @@ -2315,10 +2349,12 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, if ((RHS.ranges.size() > LHS.ranges.size() && TargetRegisterInfo::isVirtualRegister(LHS.reg)) || TargetRegisterInfo::isPhysicalRegister(RHS.reg)) { - RHS.join(LHS, &RHSValNoAssignments[0], &LHSValNoAssignments[0], NewVNInfo); + RHS.join(LHS, &RHSValNoAssignments[0], &LHSValNoAssignments[0], NewVNInfo, + mri_); Swapped = true; } else { - LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo); + LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo, + mri_); Swapped = false; } return true; @@ -2620,6 +2656,11 @@ SimpleRegisterCoalescing::TurnCopyIntoImpDef(MachineBasicBlock::iterator &I, return false; LiveInterval &DstInt = li_->getInterval(DstReg); const LiveRange *DstLR = DstInt.getLiveRangeContaining(CopyIdx); + // If the valno extends beyond this basic block, then it's not safe to delete + // the val# or else livein information won't be correct. + MachineBasicBlock *EndMBB = li_->getMBBFromIndex(DstLR->end); + if (EndMBB != MBB) + return false; DstInt.removeValNo(DstLR->valno); CopyMI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF)); for (int i = CopyMI->getNumOperands() - 1, e = 0; i > e; --i) @@ -2800,7 +2841,8 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { } // Slightly prefer live interval that has been assigned a preferred reg. - if (LI.preference) + std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(LI.reg); + if (Hint.first || Hint.second) LI.weight *= 1.01F; // Divide the weight of the interval by its size. This encourages diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h index a495bfd..d2c5581 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.h +++ b/lib/CodeGen/SimpleRegisterCoalescing.h @@ -219,10 +219,10 @@ namespace llvm { bool CanCoalesceWithImpDef(MachineInstr *CopyMI, LiveInterval &li, LiveInterval &ImpLi) const; - /// RemoveCopiesFromValNo - The specified value# is defined by an implicit - /// def and it is being removed. Turn all copies from this value# into - /// identity copies so they will be removed. - void RemoveCopiesFromValNo(LiveInterval &li, VNInfo *VNI); + /// TurnCopiesFromValNoToImpDefs - The specified value# is defined by an + /// implicit_def and it is being removed. Turn all copies from this value# + /// into implicit_defs. + void TurnCopiesFromValNoToImpDefs(LiveInterval &li, VNInfo *VNI); /// isWinToJoinVRWithSrcPhysReg - Return true if it's worth while to join a /// a virtual destination register with physical source register. diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp index ce63121..919a0ce 100644 --- a/lib/CodeGen/Spiller.cpp +++ b/lib/CodeGen/Spiller.cpp @@ -39,7 +39,8 @@ protected: VirtRegMap *vrm; /// Construct a spiller base. - SpillerBase(MachineFunction *mf, LiveIntervals *lis, LiveStacks *ls, VirtRegMap *vrm) : + SpillerBase(MachineFunction *mf, LiveIntervals *lis, LiveStacks *ls, + VirtRegMap *vrm) : mf(mf), lis(lis), ls(ls), vrm(vrm) { mfi = mf->getFrameInfo(); @@ -47,16 +48,24 @@ protected: tii = mf->getTarget().getInstrInfo(); } - /// Insert a store of the given vreg to the given stack slot immediately - /// after the given instruction. Returns the base index of the inserted - /// instruction. The caller is responsible for adding an appropriate - /// LiveInterval to the LiveIntervals analysis. - unsigned insertStoreFor(MachineInstr *mi, unsigned ss, - unsigned newVReg, - const TargetRegisterClass *trc) { - MachineBasicBlock::iterator nextInstItr(mi); - ++nextInstItr; + /// Ensures there is space before the given machine instruction, returns the + /// instruction's new number. + unsigned makeSpaceBefore(MachineInstr *mi) { + if (!lis->hasGapBeforeInstr(lis->getInstructionIndex(mi))) { + lis->scaleNumbering(2); + ls->scaleNumbering(2); + } + + unsigned miIdx = lis->getInstructionIndex(mi); + assert(lis->hasGapBeforeInstr(miIdx)); + + return miIdx; + } + + /// Ensure there is space after the given machine instruction, returns the + /// instruction's new number. + unsigned makeSpaceAfter(MachineInstr *mi) { if (!lis->hasGapAfterInstr(lis->getInstructionIndex(mi))) { lis->scaleNumbering(2); ls->scaleNumbering(2); @@ -66,7 +75,24 @@ protected: assert(lis->hasGapAfterInstr(miIdx)); - tii->storeRegToStackSlot(*mi->getParent(), nextInstItr, newVReg, + return miIdx; + } + + + /// Insert a store of the given vreg to the given stack slot immediately + /// after the given instruction. Returns the base index of the inserted + /// instruction. The caller is responsible for adding an appropriate + /// LiveInterval to the LiveIntervals analysis. + unsigned insertStoreFor(MachineInstr *mi, unsigned ss, + unsigned vreg, + const TargetRegisterClass *trc) { + + MachineBasicBlock::iterator nextInstItr(mi); + ++nextInstItr; + + unsigned miIdx = makeSpaceAfter(mi); + + tii->storeRegToStackSlot(*mi->getParent(), nextInstItr, vreg, true, ss, trc); MachineBasicBlock::iterator storeInstItr(mi); ++storeInstItr; @@ -81,25 +107,35 @@ protected: return storeInstIdx; } + void insertStoreOnInterval(LiveInterval *li, + MachineInstr *mi, unsigned ss, + unsigned vreg, + const TargetRegisterClass *trc) { + + unsigned storeInstIdx = insertStoreFor(mi, ss, vreg, trc); + unsigned start = lis->getDefIndex(lis->getInstructionIndex(mi)), + end = lis->getUseIndex(storeInstIdx); + + VNInfo *vni = + li->getNextValue(storeInstIdx, 0, true, lis->getVNInfoAllocator()); + vni->kills.push_back(storeInstIdx); + LiveRange lr(start, end, vni); + + li->addRange(lr); + } + /// Insert a load of the given veg from the given stack slot immediately /// before the given instruction. Returns the base index of the inserted /// instruction. The caller is responsible for adding an appropriate /// LiveInterval to the LiveIntervals analysis. unsigned insertLoadFor(MachineInstr *mi, unsigned ss, - unsigned newVReg, + unsigned vreg, const TargetRegisterClass *trc) { MachineBasicBlock::iterator useInstItr(mi); - - if (!lis->hasGapBeforeInstr(lis->getInstructionIndex(mi))) { - lis->scaleNumbering(2); - ls->scaleNumbering(2); - } - - unsigned miIdx = lis->getInstructionIndex(mi); - - assert(lis->hasGapBeforeInstr(miIdx)); - - tii->loadRegFromStackSlot(*mi->getParent(), useInstItr, newVReg, ss, trc); + + unsigned miIdx = makeSpaceBefore(mi); + + tii->loadRegFromStackSlot(*mi->getParent(), useInstItr, vreg, ss, trc); MachineBasicBlock::iterator loadInstItr(mi); --loadInstItr; MachineInstr *loadInst = &*loadInstItr; @@ -113,6 +149,24 @@ protected: return loadInstIdx; } + void insertLoadOnInterval(LiveInterval *li, + MachineInstr *mi, unsigned ss, + unsigned vreg, + const TargetRegisterClass *trc) { + + unsigned loadInstIdx = insertLoadFor(mi, ss, vreg, trc); + unsigned start = lis->getDefIndex(loadInstIdx), + end = lis->getUseIndex(lis->getInstructionIndex(mi)); + + VNInfo *vni = + li->getNextValue(loadInstIdx, 0, true, lis->getVNInfoAllocator()); + vni->kills.push_back(lis->getInstructionIndex(mi)); + LiveRange lr(start, end, vni); + + li->addRange(lr); + } + + /// Add spill ranges for every use/def of the live interval, inserting loads /// immediately before each use, and stores after each def. No folding is @@ -173,35 +227,16 @@ protected: assert(hasUse || hasDef); if (hasUse) { - unsigned loadInstIdx = insertLoadFor(mi, ss, newVReg, trc); - unsigned start = lis->getDefIndex(loadInstIdx), - end = lis->getUseIndex(lis->getInstructionIndex(mi)); - - VNInfo *vni = - newLI->getNextValue(loadInstIdx, 0, lis->getVNInfoAllocator()); - vni->kills.push_back(lis->getInstructionIndex(mi)); - LiveRange lr(start, end, vni); - - newLI->addRange(lr); + insertLoadOnInterval(newLI, mi, ss, newVReg, trc); } if (hasDef) { - unsigned storeInstIdx = insertStoreFor(mi, ss, newVReg, trc); - unsigned start = lis->getDefIndex(lis->getInstructionIndex(mi)), - end = lis->getUseIndex(storeInstIdx); - - VNInfo *vni = - newLI->getNextValue(storeInstIdx, 0, lis->getVNInfoAllocator()); - vni->kills.push_back(storeInstIdx); - LiveRange lr(start, end, vni); - - newLI->addRange(lr); + insertStoreOnInterval(newLI, mi, ss, newVReg, trc); } added.push_back(newLI); } - return added; } @@ -212,13 +247,44 @@ protected: /// folding. class TrivialSpiller : public SpillerBase { public: - TrivialSpiller(MachineFunction *mf, LiveIntervals *lis, LiveStacks *ls, VirtRegMap *vrm) : + + TrivialSpiller(MachineFunction *mf, LiveIntervals *lis, LiveStacks *ls, + VirtRegMap *vrm) : SpillerBase(mf, lis, ls, vrm) {} std::vector<LiveInterval*> spill(LiveInterval *li) { return trivialSpillEverywhere(li); } + std::vector<LiveInterval*> intraBlockSplit(LiveInterval *li, VNInfo *valno) { + std::vector<LiveInterval*> spillIntervals; + MachineBasicBlock::iterator storeInsertPoint; + + if (valno->isDefAccurate()) { + // If we have an accurate def we can just grab an iterator to the instr + // after the def. + storeInsertPoint = + next(MachineBasicBlock::iterator(lis->getInstructionFromIndex(valno->def))); + } else { + // If the def info isn't accurate we check if this is a PHI def. + // If it is then def holds the index of the defining Basic Block, and we + // can use that to get an insertion point. + if (valno->isPHIDef()) { + + } else { + // We have no usable def info. We can't split this value sensibly. + // FIXME: Need sensible feedback for "failure to split", an empty + // set of spill intervals could be reasonably returned from a + // split where both the store and load are folded. + return spillIntervals; + } + } + + + + return spillIntervals; + } + }; } diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h index cad054d..9c3900d 100644 --- a/lib/CodeGen/Spiller.h +++ b/lib/CodeGen/Spiller.h @@ -13,11 +13,14 @@ #include <vector> namespace llvm { + class LiveInterval; class LiveIntervals; class LiveStacks; class MachineFunction; + class MachineInstr; class VirtRegMap; + class VNInfo; /// Spiller interface. /// @@ -26,7 +29,15 @@ namespace llvm { class Spiller { public: virtual ~Spiller() = 0; + + /// Spill the given live range. The method used will depend on the Spiller + /// implementation selected. virtual std::vector<LiveInterval*> spill(LiveInterval *li) = 0; + + /// Intra-block split. + virtual std::vector<LiveInterval*> intraBlockSplit(LiveInterval *li, + VNInfo *valno) = 0; + }; /// Create and return a spiller object, as specified on the command line. diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp index a2c1255..ca99528 100644 --- a/lib/CodeGen/StrongPHIElimination.cpp +++ b/lib/CodeGen/StrongPHIElimination.cpp @@ -827,7 +827,7 @@ void StrongPHIElimination::InsertCopies(MachineDomTreeNode* MDTN, // Add a live range for the new vreg LiveInterval& Int = LI.getInterval(I->getOperand(i).getReg()); VNInfo* FirstVN = *Int.vni_begin(); - FirstVN->hasPHIKill = false; + FirstVN->setHasPHIKill(false); if (I->getOperand(i).isKill()) FirstVN->kills.push_back( LiveIntervals::getUseIndex(LI.getInstructionIndex(I))); @@ -886,10 +886,7 @@ bool StrongPHIElimination::mergeLiveIntervals(unsigned primary, VNInfo* OldVN = R.valno; VNInfo*& NewVN = VNMap[OldVN]; if (!NewVN) { - NewVN = LHS.getNextValue(OldVN->def, - OldVN->copy, - LI.getVNInfoAllocator()); - NewVN->kills = OldVN->kills; + NewVN = LHS.createValueCopy(OldVN, LI.getVNInfoAllocator()); } LiveRange LR (R.start, R.end, NewVN); @@ -987,7 +984,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) { LiveInterval& Int = LI.getOrCreateInterval(I->first); const LiveRange* LR = Int.getLiveRangeContaining(LI.getMBBEndIdx(SI->second)); - LR->valno->hasPHIKill = true; + LR->valno->setHasPHIKill(true); I->second.erase(SI->first); } @@ -1037,7 +1034,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) { // now has an unknown def. unsigned idx = LI.getDefIndex(LI.getInstructionIndex(PInstr)); const LiveRange* PLR = PI.getLiveRangeContaining(idx); - PLR->valno->def = ~0U; + PLR->valno->setIsPHIDef(true); LiveRange R (LI.getMBBStartIdx(PInstr->getParent()), PLR->start, PLR->valno); PI.addRange(R); diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 29637b9..4d3417f 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -51,6 +51,7 @@ static RegisterPass<VirtRegMap> X("virtregmap", "Virtual Register Map"); bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) { + MRI = &mf.getRegInfo(); TII = mf.getTarget().getInstrInfo(); TRI = mf.getTarget().getRegisterInfo(); MF = &mf; @@ -98,6 +99,18 @@ void VirtRegMap::grow() { ImplicitDefed.resize(LastVirtReg-TargetRegisterInfo::FirstVirtualRegister+1); } +unsigned VirtRegMap::getRegAllocPref(unsigned virtReg) { + std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(virtReg); + unsigned physReg = Hint.second; + if (physReg && + TargetRegisterInfo::isVirtualRegister(physReg) && hasPhys(physReg)) + physReg = getPhys(physReg); + if (Hint.first == 0) + return (physReg && TargetRegisterInfo::isPhysicalRegister(physReg)) + ? physReg : 0; + return TRI->ResolveRegAllocHint(Hint.first, physReg, *MF); +} + int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) { assert(TargetRegisterInfo::isVirtualRegister(virtReg)); assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT && @@ -213,8 +226,7 @@ void VirtRegMap::RemoveMachineInstrFromMaps(MachineInstr *MI) { /// FindUnusedRegisters - Gather a list of allocatable registers that /// have not been allocated to any virtual register. -bool VirtRegMap::FindUnusedRegisters(const TargetRegisterInfo *TRI, - LiveIntervals* LIs) { +bool VirtRegMap::FindUnusedRegisters(LiveIntervals* LIs) { unsigned NumRegs = TRI->getNumRegs(); UnusedRegs.reset(); UnusedRegs.resize(NumRegs); diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h index 507557d..fe767b7 100644 --- a/lib/CodeGen/VirtRegMap.h +++ b/lib/CodeGen/VirtRegMap.h @@ -31,6 +31,7 @@ namespace llvm { class LiveIntervals; class MachineInstr; class MachineFunction; + class MachineRegisterInfo; class TargetInstrInfo; class TargetRegisterInfo; @@ -47,6 +48,7 @@ namespace llvm { std::pair<unsigned, ModRef> > MI2VirtMapTy; private: + MachineRegisterInfo *MRI; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; MachineFunction *MF; @@ -190,6 +192,9 @@ namespace llvm { grow(); } + /// @brief returns the register allocation preference. + unsigned getRegAllocPref(unsigned virtReg); + /// @brief records virtReg is a split live interval from SReg. void setIsSplitFromReg(unsigned virtReg, unsigned SReg) { Virt2SplitMap[virtReg] = SReg; @@ -445,8 +450,7 @@ namespace llvm { /// FindUnusedRegisters - Gather a list of allocatable registers that /// have not been allocated to any virtual register. - bool FindUnusedRegisters(const TargetRegisterInfo *TRI, - LiveIntervals* LIs); + bool FindUnusedRegisters(LiveIntervals* LIs); /// HasUnusedRegisters - Return true if there are any allocatable registers /// that have not been allocated to any virtual register. diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp index 83397a58..401a226 100644 --- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp +++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp @@ -118,7 +118,7 @@ int LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT, char **OutError) { std::string Error; if (ExecutionEngine *JIT = - ExecutionEngine::createJIT(unwrap(MP), &Error, 0, + ExecutionEngine::create(unwrap(MP), false, &Error, (CodeGenOpt::Level)OptLevel)) { *OutJIT = wrap(JIT); return 0; diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt index 7c8ce70..e7a76cc 100644 --- a/lib/Support/CMakeLists.txt +++ b/lib/Support/CMakeLists.txt @@ -19,6 +19,7 @@ add_llvm_library(LLVMSupport PrettyStackTrace.cpp SlowOperationInformer.cpp SmallPtrSet.cpp + SourceMgr.cpp Statistic.cpp Streams.cpp StringExtras.cpp diff --git a/lib/Support/ManagedStatic.cpp b/lib/Support/ManagedStatic.cpp index 6de6575..4e655a0 100644 --- a/lib/Support/ManagedStatic.cpp +++ b/lib/Support/ManagedStatic.cpp @@ -14,18 +14,15 @@ #include "llvm/Support/ManagedStatic.h" #include "llvm/Config/config.h" #include "llvm/System/Atomic.h" -#include "llvm/System/Mutex.h" #include <cassert> using namespace llvm; static const ManagedStaticBase *StaticList = 0; -static sys::Mutex* ManagedStaticMutex = 0; - void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(), void (*Deleter)(void*)) const { - if (ManagedStaticMutex) { - ManagedStaticMutex->acquire(); + if (llvm_is_multithreaded()) { + llvm_acquire_global_lock(); if (Ptr == 0) { void* tmp = Creator ? Creator() : 0; @@ -39,7 +36,7 @@ void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(), StaticList = this; } - ManagedStaticMutex->release(); + llvm_release_global_lock(); } else { assert(Ptr == 0 && DeleterFn == 0 && Next == 0 && "Partially initialized ManagedStatic!?"); @@ -68,24 +65,11 @@ void ManagedStaticBase::destroy() const { DeleterFn = 0; } -bool llvm::llvm_start_multithreaded() { -#if LLVM_MULTITHREADED - assert(ManagedStaticMutex == 0 && "Multithreaded LLVM already initialized!"); - ManagedStaticMutex = new sys::Mutex(true); - return true; -#else - return false; -#endif -} - /// llvm_shutdown - Deallocate and destroy all ManagedStatic variables. void llvm::llvm_shutdown() { while (StaticList) StaticList->destroy(); - if (ManagedStaticMutex) { - delete ManagedStaticMutex; - ManagedStaticMutex = 0; - } + if (llvm_is_multithreaded()) llvm_stop_multithreaded(); } diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp new file mode 100644 index 0000000..d789f10 --- /dev/null +++ b/lib/Support/SourceMgr.cpp @@ -0,0 +1,127 @@ +//===- SourceMgr.cpp - Manager for Simple Source Buffers & Diagnostics ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SourceMgr class. This class is used as a simple +// substrate for diagnostics, #include handling, and other low level things for +// simple parsers. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +SourceMgr::~SourceMgr() { + while (!Buffers.empty()) { + delete Buffers.back().Buffer; + Buffers.pop_back(); + } +} + +/// AddIncludeFile - Search for a file with the specified name in the current +/// directory or in one of the IncludeDirs. If no file is found, this returns +/// ~0, otherwise it returns the buffer ID of the stacked file. +unsigned SourceMgr::AddIncludeFile(const std::string &Filename, + SMLoc IncludeLoc) { + + MemoryBuffer *NewBuf = MemoryBuffer::getFile(Filename.c_str()); + + // If the file didn't exist directly, see if it's in an include path. + for (unsigned i = 0, e = IncludeDirectories.size(); i != e && !NewBuf; ++i) { + std::string IncFile = IncludeDirectories[i] + "/" + Filename; + NewBuf = MemoryBuffer::getFile(IncFile.c_str()); + } + + if (NewBuf == 0) return ~0U; + + return AddNewSourceBuffer(NewBuf, IncludeLoc); +} + + +/// FindBufferContainingLoc - Return the ID of the buffer containing the +/// specified location, returning -1 if not found. +int SourceMgr::FindBufferContainingLoc(SMLoc Loc) const { + for (unsigned i = 0, e = Buffers.size(); i != e; ++i) + if (Loc.getPointer() >= Buffers[i].Buffer->getBufferStart() && + // Use <= here so that a pointer to the null at the end of the buffer + // is included as part of the buffer. + Loc.getPointer() <= Buffers[i].Buffer->getBufferEnd()) + return i; + return -1; +} + +/// FindLineNumber - Find the line number for the specified location in the +/// specified file. This is not a fast method. +unsigned SourceMgr::FindLineNumber(SMLoc Loc, int BufferID) const { + if (BufferID == -1) BufferID = FindBufferContainingLoc(Loc); + assert(BufferID != -1 && "Invalid Location!"); + + MemoryBuffer *Buff = getBufferInfo(BufferID).Buffer; + + // Count the number of \n's between the start of the file and the specified + // location. + unsigned LineNo = 1; + + const char *Ptr = Buff->getBufferStart(); + + for (; SMLoc::getFromPointer(Ptr) != Loc; ++Ptr) + if (*Ptr == '\n') ++LineNo; + return LineNo; +} + +void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc) const { + if (IncludeLoc == SMLoc()) return; // Top of stack. + + int CurBuf = FindBufferContainingLoc(IncludeLoc); + assert(CurBuf != -1 && "Invalid or unspecified location!"); + + PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc); + + errs() << "Included from " + << getBufferInfo(CurBuf).Buffer->getBufferIdentifier() + << ":" << FindLineNumber(IncludeLoc, CurBuf) << ":\n"; +} + + +void SourceMgr::PrintMessage(SMLoc Loc, const std::string &Msg) const { + raw_ostream &OS = errs(); + + // First thing to do: find the current buffer containing the specified + // location. + int CurBuf = FindBufferContainingLoc(Loc); + assert(CurBuf != -1 && "Invalid or unspecified location!"); + + PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc); + + MemoryBuffer *CurMB = getBufferInfo(CurBuf).Buffer; + + + OS << "Parsing " << CurMB->getBufferIdentifier() << ":" + << FindLineNumber(Loc, CurBuf) << ": "; + + OS << Msg << "\n"; + + // Scan backward to find the start of the line. + const char *LineStart = Loc.getPointer(); + while (LineStart != CurMB->getBufferStart() && + LineStart[-1] != '\n' && LineStart[-1] != '\r') + --LineStart; + // Get the end of the line. + const char *LineEnd = Loc.getPointer(); + while (LineEnd != CurMB->getBufferEnd() && + LineEnd[0] != '\n' && LineEnd[0] != '\r') + ++LineEnd; + // Print out the line. + OS << std::string(LineStart, LineEnd) << "\n"; + // Print out spaces before the caret. + for (const char *Pos = LineStart; Pos != Loc.getPointer(); ++Pos) + OS << (*Pos == '\t' ? '\t' : ' '); + OS << "^\n"; +} diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index e8cf69d..dd5c3d6 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -43,6 +43,7 @@ const char *Triple::getOSTypeName(OSType Kind) { switch (Kind) { case UnknownOS: return "unknown"; + case AuroraUX: return "auroraux"; case Darwin: return "darwin"; case DragonFly: return "dragonfly"; case FreeBSD: return "freebsd"; @@ -79,7 +80,9 @@ void Triple::Parse() const { Vendor = UnknownVendor; std::string OSName = getOSName(); - if (memcmp(&OSName[0], "darwin", 6) == 0) + if (memcmp(&OSName[0], "auroraux", 8) == 0) + OS = AuroraUX; + else if (memcmp(&OSName[0], "darwin", 6) == 0) OS = Darwin; else if (memcmp(&OSName[0], "dragonfly", 9) == 0) OS = DragonFly; diff --git a/lib/System/Atomic.cpp b/lib/System/Atomic.cpp index 2827d88..416f981 100644 --- a/lib/System/Atomic.cpp +++ b/lib/System/Atomic.cpp @@ -51,3 +51,31 @@ sys::cas_flag sys::CompareAndSwap(volatile sys::cas_flag* ptr, # error No compare-and-swap implementation for your platform! #endif } + +sys::cas_flag sys::AtomicIncrement(volatile sys::cas_flag* ptr) { +#if LLVM_MULTITHREADED==0 + ++(*ptr); + return *ptr; +#elif defined(__GNUC__) + return __sync_add_and_fetch(ptr, 1); +#elif defined(_MSC_VER) + return InterlockedIncrement(ptr); +#else +# error No atomic increment implementation for your platform! +#endif +} + +sys::cas_flag sys::AtomicDecrement(volatile sys::cas_flag* ptr) { +#if LLVM_MULTITHREADED==0 + --(*ptr); + return *ptr; +#elif defined(__GNUC__) + return __sync_sub_and_fetch(ptr, 1); +#elif defined(_MSC_VER) + return InterlockedDecrement(ptr); +#else +# error No atomic decrement implementation for your platform! +#endif +} + + diff --git a/lib/System/CMakeLists.txt b/lib/System/CMakeLists.txt index 5415dd6..a5a56e8 100644 --- a/lib/System/CMakeLists.txt +++ b/lib/System/CMakeLists.txt @@ -10,7 +10,9 @@ add_llvm_library(LLVMSystem Path.cpp Process.cpp Program.cpp + RWMutex.cpp Signals.cpp + Threading.cpp TimeValue.cpp ) diff --git a/lib/System/Mutex.cpp b/lib/System/Mutex.cpp index d95c25b..a5e9920 100644 --- a/lib/System/Mutex.cpp +++ b/lib/System/Mutex.cpp @@ -23,11 +23,11 @@ // Define all methods as no-ops if threading is explicitly disabled namespace llvm { using namespace sys; -Mutex::Mutex( bool recursive) { } -Mutex::~Mutex() { } -bool Mutex::acquire() { return true; } -bool Mutex::release() { return true; } -bool Mutex::tryacquire() { return true; } +MutexImpl::MutexImpl( bool recursive) { } +MutexImpl::~MutexImpl() { } +bool MutexImpl::acquire() { return true; } +bool MutexImpl::release() { return true; } +bool MutexImpl::tryacquire() { return true; } } #else @@ -55,7 +55,7 @@ using namespace sys; static const bool pthread_enabled = true; // Construct a Mutex using pthread calls -Mutex::Mutex( bool recursive) +MutexImpl::MutexImpl( bool recursive) : data_(0) { if (pthread_enabled) @@ -94,7 +94,7 @@ Mutex::Mutex( bool recursive) } // Destruct a Mutex -Mutex::~Mutex() +MutexImpl::~MutexImpl() { if (pthread_enabled) { @@ -106,7 +106,7 @@ Mutex::~Mutex() } bool -Mutex::acquire() +MutexImpl::acquire() { if (pthread_enabled) { @@ -120,7 +120,7 @@ Mutex::acquire() } bool -Mutex::release() +MutexImpl::release() { if (pthread_enabled) { @@ -134,7 +134,7 @@ Mutex::release() } bool -Mutex::tryacquire() +MutexImpl::tryacquire() { if (pthread_enabled) { diff --git a/lib/System/RWMutex.cpp b/lib/System/RWMutex.cpp new file mode 100644 index 0000000..15d98cb --- /dev/null +++ b/lib/System/RWMutex.cpp @@ -0,0 +1,175 @@ +//===- RWMutex.cpp - Reader/Writer Mutual Exclusion Lock --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the llvm::sys::RWMutex class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Config/config.h" +#include "llvm/System/RWMutex.h" +#include <cstring> + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only TRULY operating system +//=== independent code. +//===----------------------------------------------------------------------===// + +#if !defined(ENABLE_THREADS) || ENABLE_THREADS == 0 +// Define all methods as no-ops if threading is explicitly disabled +namespace llvm { +using namespace sys; +RWMutexImpl::RWMutexImpl() { } +RWMutexImpl::~RWMutexImpl() { } +bool RWMutexImpl::reader_acquire() { return true; } +bool RWMutexImpl::reader_release() { return true; } +bool RWMutexImpl::writer_acquire() { return true; } +bool RWMutexImpl::writer_release() { return true; } +} +#else + +#if defined(HAVE_PTHREAD_H) && defined(HAVE_PTHREAD_RWLOCK_INIT) + +#include <cassert> +#include <pthread.h> +#include <stdlib.h> + +namespace llvm { +using namespace sys; + + +// This variable is useful for situations where the pthread library has been +// compiled with weak linkage for its interface symbols. This allows the +// threading support to be turned off by simply not linking against -lpthread. +// In that situation, the value of pthread_mutex_init will be 0 and +// consequently pthread_enabled will be false. In such situations, all the +// pthread operations become no-ops and the functions all return false. If +// pthread_rwlock_init does have an address, then rwlock support is enabled. +// Note: all LLVM tools will link against -lpthread if its available since it +// is configured into the LIBS variable. +// Note: this line of code generates a warning if pthread_rwlock_init is not +// declared with weak linkage. It's safe to ignore the warning. +static const bool pthread_enabled = true; + +// Construct a RWMutex using pthread calls +RWMutexImpl::RWMutexImpl() + : data_(0) +{ + if (pthread_enabled) + { + // Declare the pthread_rwlock data structures + pthread_rwlock_t* rwlock = + static_cast<pthread_rwlock_t*>(malloc(sizeof(pthread_rwlock_t))); + +#ifdef __APPLE__ + // Workaround a bug/mis-feature in Darwin's pthread_rwlock_init. + bzero(rwlock, sizeof(pthread_rwlock_t)); +#endif + + pthread_rwlockattr_t attr; + + // Initialize the rwlock attributes + int errorcode = pthread_rwlockattr_init(&attr); + assert(errorcode == 0); + +#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) && !defined(__DragonFly__) + // Make it a process local rwlock + errorcode = pthread_rwlockattr_setpshared(&attr, PTHREAD_PROCESS_PRIVATE); +#endif + + // Initialize the rwlock + errorcode = pthread_rwlock_init(rwlock, &attr); + assert(errorcode == 0); + + // Destroy the attributes + errorcode = pthread_rwlockattr_destroy(&attr); + assert(errorcode == 0); + + // Assign the data member + data_ = rwlock; + } +} + +// Destruct a RWMutex +RWMutexImpl::~RWMutexImpl() +{ + if (pthread_enabled) + { + pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_); + assert(rwlock != 0); + pthread_rwlock_destroy(rwlock); + free(rwlock); + } +} + +bool +RWMutexImpl::reader_acquire() +{ + if (pthread_enabled) + { + pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_); + assert(rwlock != 0); + + int errorcode = pthread_rwlock_rdlock(rwlock); + return errorcode == 0; + } + return false; +} + +bool +RWMutexImpl::reader_release() +{ + if (pthread_enabled) + { + pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_); + assert(rwlock != 0); + + int errorcode = pthread_rwlock_unlock(rwlock); + return errorcode == 0; + } + return false; +} + +bool +RWMutexImpl::writer_acquire() +{ + if (pthread_enabled) + { + pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_); + assert(rwlock != 0); + + int errorcode = pthread_rwlock_wrlock(rwlock); + return errorcode == 0; + } + return false; +} + +bool +RWMutexImpl::writer_release() +{ + if (pthread_enabled) + { + pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_); + assert(rwlock != 0); + + int errorcode = pthread_rwlock_unlock(rwlock); + return errorcode == 0; + } + return false; +} + +} + +#elif defined(LLVM_ON_UNIX) +#include "Unix/RWMutex.inc" +#elif defined( LLVM_ON_WIN32) +#include "Win32/RWMutex.inc" +#else +#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 was set in System/Mutex.cpp +#endif +#endif diff --git a/lib/System/Threading.cpp b/lib/System/Threading.cpp new file mode 100644 index 0000000..a2d7f82 --- /dev/null +++ b/lib/System/Threading.cpp @@ -0,0 +1,63 @@ +//===-- llvm/System/Threading.cpp- Control multithreading mode --*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements llvm_start_multithreaded() and friends. +// +//===----------------------------------------------------------------------===// + +#include "llvm/System/Threading.h" +#include "llvm/System/Atomic.h" +#include "llvm/System/Mutex.h" +#include <cassert> + +using namespace llvm; + +static bool multithreaded_mode = false; + +static sys::Mutex* global_lock = 0; + +bool llvm::llvm_start_multithreaded() { +#ifdef LLVM_MULTITHREADED + assert(!multithreaded_mode && "Already multithreaded!"); + multithreaded_mode = true; + global_lock = new sys::Mutex(true); + + // We fence here to ensure that all initialization is complete BEFORE we + // return from llvm_start_multithreaded(). + sys::MemoryFence(); + return true; +#else + return false; +#endif +} + +void llvm::llvm_stop_multithreaded() { +#ifdef LLVM_MULTITHREADED + assert(multithreaded_mode && "Not currently multithreaded!"); + + // We fence here to insure that all threaded operations are complete BEFORE we + // return from llvm_stop_multithreaded(). + sys::MemoryFence(); + + multithreaded_mode = false; + delete global_lock; +#endif +} + +bool llvm::llvm_is_multithreaded() { + return multithreaded_mode; +} + +void llvm::llvm_acquire_global_lock() { + if (multithreaded_mode) global_lock->acquire(); +} + +void llvm::llvm_release_global_lock() { + if (multithreaded_mode) global_lock->release(); +} diff --git a/lib/System/Unix/Mutex.inc b/lib/System/Unix/Mutex.inc index 4a015a6..10e7ecb 100644 --- a/lib/System/Unix/Mutex.inc +++ b/lib/System/Unix/Mutex.inc @@ -20,28 +20,28 @@ namespace llvm { using namespace sys; -Mutex::Mutex( bool recursive) +MutexImpl::MutexImpl( bool recursive) { } -Mutex::~Mutex() +MutexImpl::~MutexImpl() { } bool -Mutex::acquire() +MutexImpl::MutexImpl() { return true; } bool -Mutex::release() +MutexImpl::release() { return true; } bool -Mutex::tryacquire( void ) +MutexImpl::tryacquire( void ) { return true; } diff --git a/lib/System/Unix/Path.inc b/lib/System/Unix/Path.inc index d5edee1..1f73571 100644 --- a/lib/System/Unix/Path.inc +++ b/lib/System/Unix/Path.inc @@ -104,6 +104,14 @@ Path::isValid() const { } bool +Path::isAbsolute(const char *NameStart, unsigned NameLen) { + assert(NameStart); + if (NameLen == 0) + return false; + return NameStart[0] == '/'; +} + +bool Path::isAbsolute() const { if (path.empty()) return false; diff --git a/lib/System/Unix/RWMutex.inc b/lib/System/Unix/RWMutex.inc new file mode 100644 index 0000000..e83d41e --- /dev/null +++ b/lib/System/Unix/RWMutex.inc @@ -0,0 +1,43 @@ +//= llvm/System/Unix/RWMutex.inc - Unix Reader/Writer Mutual Exclusion Lock =// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Unix specific (non-pthread) RWMutex class. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only generic UNIX code that +//=== is guaranteed to work on *all* UNIX variants. +//===----------------------------------------------------------------------===// + +namespace llvm { + +using namespace sys; + +RWMutexImpl::RWMutexImpl() { } + +RWMutexImpl::~RWMutexImpl() { } + +bool RWMutexImpl::reader_acquire() { + return true; +} + +bool RWMutexImpl::reader_release() { + return true; +} + +bool RWMutexImpl::writer_acquire() { + return true; +} + +bool RWMutexImpl::writer_release() { + return true; +} + +} diff --git a/lib/System/Unix/Unix.h b/lib/System/Unix/Unix.h index 452226f..c2c06dd 100644 --- a/lib/System/Unix/Unix.h +++ b/lib/System/Unix/Unix.h @@ -79,12 +79,19 @@ static inline bool MakeErrMsg( return true; char buffer[MAXPATHLEN]; buffer[0] = 0; + char* str = buffer; if (errnum == -1) errnum = errno; #ifdef HAVE_STRERROR_R // strerror_r is thread-safe. if (errnum) +# if defined(__GLIBC__) && defined(_GNU_SOURCE) + // glibc defines its own incompatible version of strerror_r + // which may not use the buffer supplied. + str = strerror_r(errnum,buffer,MAXPATHLEN-1); +# else strerror_r(errnum,buffer,MAXPATHLEN-1); +# endif #elif HAVE_STRERROR // Copy the thread un-safe result of strerror into // the buffer as fast as possible to minimize impact @@ -97,7 +104,7 @@ static inline bool MakeErrMsg( // but, oh well, just use a generic message sprintf(buffer, "Error #%d", errnum); #endif - *ErrMsg = prefix + ": " + buffer; + *ErrMsg = prefix + ": " + str; return true; } diff --git a/lib/System/Win32/Mutex.inc b/lib/System/Win32/Mutex.inc index 7c1723b..75f01fe 100644 --- a/lib/System/Win32/Mutex.inc +++ b/lib/System/Win32/Mutex.inc @@ -22,13 +22,13 @@ namespace llvm { using namespace sys; -Mutex::Mutex(bool /*recursive*/) +MutexImpl::MutexImpl(bool /*recursive*/) { data_ = new CRITICAL_SECTION; InitializeCriticalSection((LPCRITICAL_SECTION)data_); } -Mutex::~Mutex() +MutexImpl::~MutexImpl() { DeleteCriticalSection((LPCRITICAL_SECTION)data_); delete (LPCRITICAL_SECTION)data_; @@ -36,21 +36,21 @@ Mutex::~Mutex() } bool -Mutex::acquire() +MutexImpl::acquire() { EnterCriticalSection((LPCRITICAL_SECTION)data_); return true; } bool -Mutex::release() +MutexImpl::release() { LeaveCriticalSection((LPCRITICAL_SECTION)data_); return true; } bool -Mutex::tryacquire() +MutexImpl::tryacquire() { return TryEnterCriticalSection((LPCRITICAL_SECTION)data_); } diff --git a/lib/System/Win32/Path.inc b/lib/System/Win32/Path.inc index fbf8f66..683c94b 100644 --- a/lib/System/Win32/Path.inc +++ b/lib/System/Win32/Path.inc @@ -125,6 +125,20 @@ Path::isValid() const { return true; } +bool +Path::isAbsolute(const char *NameStart, unsigned NameLen) { + assert(NameStart); + switch (NameLen) { + case 0: + return false; + case 1: + case 2: + return NameStart[0] == '/'; + default: + return NameStart[0] == '/' || (NameStart[1] == ':' && NameStart[2] == '/'); + } +} + bool Path::isAbsolute() const { switch (path.length()) { @@ -234,7 +248,9 @@ Path::GetCurrentDirectory() { /// GetMainExecutable - Return the path to the main executable, given the /// value of argv[0] from program startup. Path Path::GetMainExecutable(const char *argv0, void *MainAddr) { - return Path(); + char pathname[MAX_PATH]; + DWORD ret = ::GetModuleFileNameA(NULL, pathname, MAX_PATH); + return ret != MAX_PATH ? Path(pathname) : Path(); } diff --git a/lib/System/Win32/RWMutex.inc b/lib/System/Win32/RWMutex.inc new file mode 100644 index 0000000..e269226 --- /dev/null +++ b/lib/System/Win32/RWMutex.inc @@ -0,0 +1,58 @@ +//= llvm/System/Win32/Mutex.inc - Win32 Reader/Writer Mutual Exclusion Lock =// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Win32 specific (non-pthread) RWMutex class. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +//=== WARNING: Implementation here must contain only generic Win32 code that +//=== is guaranteed to work on *all* Win32 variants. +//===----------------------------------------------------------------------===// + +#include "Win32.h" + +// FIXME: Windows does not have reader-writer locks pre-Vista. If you want +// real reader-writer locks, you a pthreads implementation for Windows. + +namespace llvm { +using namespace sys; + +RWMutexImpl::RWMutexImpl() { + data_ = calloc(1, sizeof(CRITICAL_SECTION)); + InitializeCriticalSection(static_cast<LPCRITICAL_SECTION>(data_)); +} + +RWMutexImpl::~RWMutexImpl() { + DeleteCriticalSection(static_cast<LPCRITICAL_SECTION>(data_)); + free(data_); +} + +bool RWMutexImpl::reader_acquire() { + EnterCriticalSection(static_cast<LPCRITICAL_SECTION>(data_)); + return true; +} + +bool RWMutexImpl::reader_release() { + LeaveCriticalSection(static_cast<LPCRITICAL_SECTION>(data_)); + return true; +} + +bool RWMutexImpl::writer_acquire() { + EnterCriticalSection(static_cast<LPCRITICAL_SECTION>(data_)); + return true; +} + +bool RWMutexImpl::writer_release() { + LeaveCriticalSection(static_cast<LPCRITICAL_SECTION>(data_)); + return true; +} + + +} diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 594811d..9001e50 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -45,62 +45,72 @@ def FeatureThumb2 : SubtargetFeature<"thumb2", "ThumbMode", "Thumb2", // ARM Processors supported. // -class Proc<string Name, list<SubtargetFeature> Features> - : Processor<Name, NoItineraries, Features>; +include "ARMSchedule.td" + +class ProcNoItin<string Name, list<SubtargetFeature> Features> + : Processor<Name, GenericItineraries, Features>; // V4 Processors. -def : Proc<"generic", []>; -def : Proc<"arm8", []>; -def : Proc<"arm810", []>; -def : Proc<"strongarm", []>; -def : Proc<"strongarm110", []>; -def : Proc<"strongarm1100", []>; -def : Proc<"strongarm1110", []>; +def : ProcNoItin<"generic", []>; +def : ProcNoItin<"arm8", []>; +def : ProcNoItin<"arm810", []>; +def : ProcNoItin<"strongarm", []>; +def : ProcNoItin<"strongarm110", []>; +def : ProcNoItin<"strongarm1100", []>; +def : ProcNoItin<"strongarm1110", []>; // V4T Processors. -def : Proc<"arm7tdmi", [ArchV4T]>; -def : Proc<"arm7tdmi-s", [ArchV4T]>; -def : Proc<"arm710t", [ArchV4T]>; -def : Proc<"arm720t", [ArchV4T]>; -def : Proc<"arm9", [ArchV4T]>; -def : Proc<"arm9tdmi", [ArchV4T]>; -def : Proc<"arm920", [ArchV4T]>; -def : Proc<"arm920t", [ArchV4T]>; -def : Proc<"arm922t", [ArchV4T]>; -def : Proc<"arm940t", [ArchV4T]>; -def : Proc<"ep9312", [ArchV4T]>; +def : ProcNoItin<"arm7tdmi", [ArchV4T]>; +def : ProcNoItin<"arm7tdmi-s", [ArchV4T]>; +def : ProcNoItin<"arm710t", [ArchV4T]>; +def : ProcNoItin<"arm720t", [ArchV4T]>; +def : ProcNoItin<"arm9", [ArchV4T]>; +def : ProcNoItin<"arm9tdmi", [ArchV4T]>; +def : ProcNoItin<"arm920", [ArchV4T]>; +def : ProcNoItin<"arm920t", [ArchV4T]>; +def : ProcNoItin<"arm922t", [ArchV4T]>; +def : ProcNoItin<"arm940t", [ArchV4T]>; +def : ProcNoItin<"ep9312", [ArchV4T]>; // V5T Processors. -def : Proc<"arm10tdmi", [ArchV5T]>; -def : Proc<"arm1020t", [ArchV5T]>; +def : ProcNoItin<"arm10tdmi", [ArchV5T]>; +def : ProcNoItin<"arm1020t", [ArchV5T]>; // V5TE Processors. -def : Proc<"arm9e", [ArchV5TE]>; -def : Proc<"arm926ej-s", [ArchV5TE]>; -def : Proc<"arm946e-s", [ArchV5TE]>; -def : Proc<"arm966e-s", [ArchV5TE]>; -def : Proc<"arm968e-s", [ArchV5TE]>; -def : Proc<"arm10e", [ArchV5TE]>; -def : Proc<"arm1020e", [ArchV5TE]>; -def : Proc<"arm1022e", [ArchV5TE]>; -def : Proc<"xscale", [ArchV5TE]>; -def : Proc<"iwmmxt", [ArchV5TE]>; +def : ProcNoItin<"arm9e", [ArchV5TE]>; +def : ProcNoItin<"arm926ej-s", [ArchV5TE]>; +def : ProcNoItin<"arm946e-s", [ArchV5TE]>; +def : ProcNoItin<"arm966e-s", [ArchV5TE]>; +def : ProcNoItin<"arm968e-s", [ArchV5TE]>; +def : ProcNoItin<"arm10e", [ArchV5TE]>; +def : ProcNoItin<"arm1020e", [ArchV5TE]>; +def : ProcNoItin<"arm1022e", [ArchV5TE]>; +def : ProcNoItin<"xscale", [ArchV5TE]>; +def : ProcNoItin<"iwmmxt", [ArchV5TE]>; // V6 Processors. -def : Proc<"arm1136j-s", [ArchV6]>; -def : Proc<"arm1136jf-s", [ArchV6, FeatureVFP2]>; -def : Proc<"arm1176jz-s", [ArchV6]>; -def : Proc<"arm1176jzf-s", [ArchV6, FeatureVFP2]>; -def : Proc<"mpcorenovfp", [ArchV6]>; -def : Proc<"mpcore", [ArchV6, FeatureVFP2]>; +def : Processor<"arm1136j-s", V6Itineraries, + [ArchV6]>; +def : Processor<"arm1136jf-s", V6Itineraries, + [ArchV6, FeatureVFP2]>; +def : Processor<"arm1176jz-s", V6Itineraries, + [ArchV6]>; +def : Processor<"arm1176jzf-s", V6Itineraries, + [ArchV6, FeatureVFP2]>; +def : Processor<"mpcorenovfp", V6Itineraries, + [ArchV6]>; +def : Processor<"mpcore", V6Itineraries, + [ArchV6, FeatureVFP2]>; // V6T2 Processors. -def : Proc<"arm1156t2-s", [ArchV6T2, FeatureThumb2]>; -def : Proc<"arm1156t2f-s", [ArchV6T2, FeatureThumb2, FeatureVFP2]>; +def : Processor<"arm1156t2-s", V6Itineraries, + [ArchV6T2, FeatureThumb2]>; +def : Processor<"arm1156t2f-s", V6Itineraries, + [ArchV6T2, FeatureThumb2, FeatureVFP2]>; // V7 Processors. -def : Proc<"cortex-a8", [ArchV7A, FeatureThumb2, FeatureNEON]>; -def : Proc<"cortex-a9", [ArchV7A, FeatureThumb2, FeatureNEON]>; +def : ProcNoItin<"cortex-a8", [ArchV7A, FeatureThumb2, FeatureNEON]>; +def : ProcNoItin<"cortex-a9", [ArchV7A, FeatureThumb2, FeatureNEON]>; //===----------------------------------------------------------------------===// // Register File Description diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index f126760..47151e6 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -17,11 +17,6 @@ class CCIfSubtarget<string F, CCAction A>: class CCIfAlign<string Align, CCAction A>: CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>; -/// CCIfFloatABI - Match of the float ABI and the arg. ABIType may be "Hard" or -/// "Soft". -class CCIfFloatABI<string ABIType, CCAction A>: - CCIf<!strconcat("llvm::FloatABIType == llvm::FloatABI::", ABIType), A>; - //===----------------------------------------------------------------------===// // ARM APCS Calling Convention //===----------------------------------------------------------------------===// @@ -105,25 +100,3 @@ def RetCC_ARM_AAPCS_VFP : CallingConv<[ S9, S10, S11, S12, S13, S14, S15]>>, CCDelegateTo<RetCC_ARM_AAPCS_Common> ]>; - -//===----------------------------------------------------------------------===// -// ARM Calling Convention Dispatch -//===----------------------------------------------------------------------===// - -def CC_ARM : CallingConv<[ - CCIfSubtarget<"isAAPCS_ABI()", - CCIfSubtarget<"hasVFP2()", - CCIfFloatABI<"Hard", - CCDelegateTo<CC_ARM_AAPCS_VFP>>>>, - CCIfSubtarget<"isAAPCS_ABI()", CCDelegateTo<CC_ARM_AAPCS>>, - CCDelegateTo<CC_ARM_APCS> -]>; - -def RetCC_ARM : CallingConv<[ - CCIfSubtarget<"isAAPCS_ABI()", - CCIfSubtarget<"hasVFP2()", - CCIfFloatABI<"Hard", - CCDelegateTo<RetCC_ARM_AAPCS_VFP>>>>, - CCIfSubtarget<"isAAPCS_ABI()", CCDelegateTo<RetCC_ARM_AAPCS>>, - CCDelegateTo<RetCC_ARM_APCS> -]>; diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index 44fac12..f6629fe 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -900,6 +900,10 @@ void Emitter<CodeEmitter>::emitMiscLoadStoreInstruction(const MachineInstr &MI, // Set first operand Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; + // Skip LDRD and STRD's second operand. + if (TID.Opcode == ARM::LDRD || TID.Opcode == ARM::STRD) + ++OpIdx; + // Set second operand if (ImplicitRn) // Special handling for implicit use (e.g. PC). diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index ca3a9cb..1ed9e80 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -52,8 +52,13 @@ public: virtual const char *getPassName() const { return "ARM Instruction Selection"; - } - + } + + /// getI32Imm - Return a target constant with the specified value, of type i32. + inline SDValue getI32Imm(unsigned Imm) { + return CurDAG->getTargetConstant(Imm, MVT::i32); + } + SDNode *Select(SDValue Op); virtual void InstructionSelect(); bool SelectAddrMode2(SDValue Op, SDValue N, SDValue &Base, @@ -84,6 +89,9 @@ public: bool SelectThumbAddrModeSP(SDValue Op, SDValue N, SDValue &Base, SDValue &OffImm); + bool SelectShifterOperand(SDValue Op, SDValue N, + SDValue &BaseReg, SDValue &Opc); + bool SelectShifterOperandReg(SDValue Op, SDValue N, SDValue &A, SDValue &B, SDValue &C); @@ -509,8 +517,30 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue Op, SDValue N, return false; } +bool ARMDAGToDAGISel::SelectShifterOperand(SDValue Op, + SDValue N, + SDValue &BaseReg, + SDValue &Opc) { + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N); + + // Don't match base register only case. That is matched to a separate + // lower complexity pattern with explicit register operand. + if (ShOpcVal == ARM_AM::no_shift) return false; + + BaseReg = N.getOperand(0); + unsigned ShImmVal = 0; + if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) + ShImmVal = RHS->getZExtValue() & 31; + else + return false; + + Opc = getI32Imm(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal)); + + return true; +} + bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue Op, - SDValue N, + SDValue N, SDValue &BaseReg, SDValue &ShReg, SDValue &Opc) { @@ -549,6 +579,10 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { switch (N->getOpcode()) { default: break; case ISD::Constant: { + // ARMv6T2 and later should materialize imms via MOV / MOVT pair. + if (Subtarget->hasV6T2Ops() || Subtarget->hasThumb2()) + break; + unsigned Val = cast<ConstantSDNode>(N)->getZExtValue(); bool UseCP = true; if (Subtarget->isThumb()) diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index ec8bd1f..2443625 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -292,6 +292,25 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setIfCvtBlockSizeLimit(Subtarget->isThumb() ? 0 : 10); setIfCvtDupBlockSizeLimit(Subtarget->isThumb() ? 0 : 2); + if (!Subtarget->isThumb()) { + // Use branch latency information to determine if-conversion limits. + // FIXME: If-converter should use instruction latency of the branch being + // eliminated to compute the threshold. For ARMv6, the branch "latency" + // varies depending on whether it's dynamically or statically predicted + // and on whether the destination is in the prefetch buffer. + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const InstrItineraryData &InstrItins = Subtarget->getInstrItineraryData(); + unsigned Latency= InstrItins.getLatency(TII->get(ARM::Bcc).getSchedClass()); + if (Latency > 1) { + setIfCvtBlockSizeLimit(Latency-1); + if (Latency > 2) + setIfCvtDupBlockSizeLimit(Latency-2); + } else { + setIfCvtBlockSizeLimit(10); + setIfCvtDupBlockSizeLimit(2); + } + } + maxStoresPerMemcpy = 1; //// temporary - rewrite interface to use type // Do not enable CodePlacementOpt for now: it currently runs after the // ARMConstantIslandPass and messes up branch relaxation and placement @@ -415,7 +434,7 @@ static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, ARM::NoRegister }; unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 4); - if (Reg == 0) + if (Reg == 0) return false; // we didn't handle it unsigned i; @@ -487,6 +506,33 @@ static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, State); } +/// CCAssignFnForNode - Selects the correct CCAssignFn for a the +/// given CallingConvention value. +CCAssignFn *ARMTargetLowering::CCAssignFnForNode(unsigned CC, + bool Return) const { + switch (CC) { + default: + assert(0 && "Unsupported calling convention"); + case CallingConv::C: + case CallingConv::Fast: + // Use target triple & subtarget features to do actual dispatch. + if (Subtarget->isAAPCS_ABI()) { + if (Subtarget->hasVFP2() && + FloatABIType == FloatABI::Hard) + return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); + else + return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); + } else + return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); + case CallingConv::ARM_AAPCS_VFP: + return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); + case CallingConv::ARM_AAPCS: + return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); + case CallingConv::ARM_APCS: + return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); + } +} + /// LowerCallResult - Lower the result values of an ISD::CALL into the /// appropriate copies out of appropriate physical registers. This assumes that /// Chain/InFlag are the input chain/flag to use, and that TheCall is the call @@ -501,7 +547,8 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, SmallVector<CCValAssign, 16> RVLocs; bool isVarArg = TheCall->isVarArg(); CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs); - CCInfo.AnalyzeCallResult(TheCall, RetCC_ARM); + CCInfo.AnalyzeCallResult(TheCall, + CCAssignFnForNode(CallingConv, /* Return*/ true)); SmallVector<SDValue, 8> ResultVals; @@ -586,8 +633,6 @@ SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { MVT RetVT = TheCall->getRetValType(0); SDValue Chain = TheCall->getChain(); unsigned CC = TheCall->getCallingConv(); - assert((CC == CallingConv::C || - CC == CallingConv::Fast) && "unknown calling convention"); bool isVarArg = TheCall->isVarArg(); SDValue Callee = TheCall->getCallee(); DebugLoc dl = TheCall->getDebugLoc(); @@ -595,7 +640,7 @@ SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); - CCInfo.AnalyzeCallOperands(TheCall, CC_ARM); + CCInfo.AnalyzeCallOperands(TheCall, CCAssignFnForNode(CC, /* Return*/ false)); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); @@ -788,7 +833,7 @@ SDValue ARMTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); // Analyze return values of ISD::RET. - CCInfo.AnalyzeReturn(Op.getNode(), RetCC_ARM); + CCInfo.AnalyzeReturn(Op.getNode(), CCAssignFnForNode(CC, /* Return */ true)); // If this is the first return lowered for this function, add // the regs to the liveout set for the function. @@ -1085,7 +1130,8 @@ ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { // Assign locations to all of the incoming arguments. SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); - CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_ARM); + CCInfo.AnalyzeFormalArguments(Op.getNode(), + CCAssignFnForNode(CC, /* Return*/ false)); SmallVector<SDValue, 16> ArgValues; @@ -1456,7 +1502,7 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); - unsigned FrameReg = (Subtarget->isThumb() || Subtarget->useThumbBacktraces()) + unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin()) ? ARM::R7 : ARM::R11; SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); while (Depth--) diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 2dab2db..8f53e39 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -151,6 +151,7 @@ namespace llvm { /// unsigned ARMPCLabelIndex; + CCAssignFn *CCAssignFnForNode(unsigned CC, bool Return) const; SDValue LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG, const SDValue &StackPtr, const CCValAssign &VA, SDValue Chain, SDValue Arg, ISD::ArgFlagsTy Flags); diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index 4b0dbb5..d19fb8e 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -697,7 +697,6 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, bool isVarArg = AFI->getVarArgsRegSaveSize() > 0; MachineInstr *PopMI = MF.CreateMachineInstr(get(ARM::tPOP),MI->getDebugLoc()); - MBB.insert(MI, PopMI); for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); if (Reg == ARM::LR) { @@ -706,10 +705,15 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, continue; Reg = ARM::PC; PopMI->setDesc(get(ARM::tPOP_RET)); - MBB.erase(MI); + MI = MBB.erase(MI); } PopMI->addOperand(MachineOperand::CreateReg(Reg, true)); } + + // It's illegal to emit pop instruction without operands. + if (PopMI->getNumOperands() > 0) + MBB.insert(MI, PopMI); + return true; } diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index cc9f1a5..4707e3b 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -90,12 +90,12 @@ def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", SDT_ARMEH_SJLJ_Setjmp>; //===----------------------------------------------------------------------===// // ARM Instruction Predicate Definitions. // -def HasV5T : Predicate<"Subtarget->hasV5TOps()">; -def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">; -def HasV6 : Predicate<"Subtarget->hasV6Ops()">; -def IsThumb : Predicate<"Subtarget->isThumb()">; -def IsThumb2 : Predicate<"Subtarget->isThumb2()">; -def IsARM : Predicate<"!Subtarget->isThumb()">; +def HasV5T : Predicate<"Subtarget->hasV5TOps()">; +def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">; +def HasV6 : Predicate<"Subtarget->hasV6Ops()">; +def IsThumb : Predicate<"Subtarget->isThumb()">; +def HasThumb2 : Predicate<"Subtarget->hasThumb2()">; +def IsARM : Predicate<"!Subtarget->isThumb()">; //===----------------------------------------------------------------------===// // ARM Flag Definitions. @@ -539,7 +539,7 @@ let isReturn = 1, isTerminator = 1 in LdStMulFrm, "ldm${p}${addr:submode} $addr, $dst1", []>; -let isCall = 1, +let isCall = 1, Itinerary = IIC_Br, Defs = [R0, R1, R2, R3, R12, LR, D0, D1, D2, D3, D4, D5, D6, D7, CPSR] in { def BL : ABXI<0b1011, (outs), (ins i32imm:$func, variable_ops), @@ -567,7 +567,7 @@ let isCall = 1, } } -let isBranch = 1, isTerminator = 1 in { +let isBranch = 1, isTerminator = 1, Itinerary = IIC_Br in { // B is "predicable" since it can be xformed into a Bcc. let isBarrier = 1 in { let isPredicable = 1 in @@ -647,9 +647,8 @@ def LDRSB : AI3ldsb<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, let mayLoad = 1 in { // Load doubleword -def LDRD : AI3ldd<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, - "ldr", "d $dst, $addr", - []>, Requires<[IsARM, HasV5T]>; +def LDRD : AI3ldd<(outs GPR:$dst1, GPR:$dst2), (ins addrmode3:$addr), LdMiscFrm, + "ldr", "d $dst1, $addr", []>, Requires<[IsARM, HasV5T]>; // Indexed loads def LDR_PRE : AI2ldwpr<(outs GPR:$dst, GPR:$base_wb), @@ -709,9 +708,8 @@ def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, // Store doubleword let mayStore = 1 in -def STRD : AI3std<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm, - "str", "d $src, $addr", - []>, Requires<[IsARM, HasV5T]>; +def STRD : AI3std<(outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr),StMiscFrm, + "str", "d $src1, $addr", []>, Requires<[IsARM, HasV5T]>; // Indexed stores def STR_PRE : AI2stwpr<(outs GPR:$base_wb), @@ -1387,6 +1385,12 @@ def : ARMV5TEPat<(add GPR:$acc, include "ARMInstrThumb.td" //===----------------------------------------------------------------------===// +// Thumb2 Support +// + +include "ARMInstrThumb2.td" + +//===----------------------------------------------------------------------===// // Floating Point Support // diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 54232f6..9297f08 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -277,6 +277,7 @@ def tPUSH : TI<(outs), (ins reglist:$src1, variable_ops), // // Add with carry +let isCommutable = 1 in def tADC : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), "adc $dst, $rhs", [(set tGPR:$dst, (adde tGPR:$lhs, tGPR:$rhs))]>; @@ -311,6 +312,7 @@ def tADDrSPi : TI<(outs tGPR:$dst), (ins GPR:$sp, i32imm:$rhs), def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), "add $dst, $rhs * 4", []>; +let isCommutable = 1 in def tAND : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), "and $dst, $rhs", [(set tGPR:$dst, (and tGPR:$lhs, tGPR:$rhs))]>; @@ -358,6 +360,7 @@ def tCMPNZr : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs), // TODO: A7-37: CMP(3) - cmp hi regs +let isCommutable = 1 in def tEOR : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), "eor $dst, $rhs", [(set tGPR:$dst, (xor tGPR:$lhs, tGPR:$rhs))]>; @@ -399,6 +402,7 @@ def tMOVhir2hir : TI<(outs GPR:$dst), (ins GPR:$src), "cpy $dst, $src\t@ hir2hir", []>; } // neverHasSideEffects +let isCommutable = 1 in def tMUL : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), "mul $dst, $rhs", [(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>; @@ -411,6 +415,7 @@ def tNEG : TI<(outs tGPR:$dst), (ins tGPR:$src), "neg $dst, $src", [(set tGPR:$dst, (ineg tGPR:$src))]>; +let isCommutable = 1 in def tORR : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), "orr $dst, $rhs", [(set tGPR:$dst, (or tGPR:$lhs, tGPR:$rhs))]>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 168fb45..07c71da 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -10,3 +10,199 @@ // This file describes the Thumb2 instruction set. // //===----------------------------------------------------------------------===// + +// Shifted operands. No register controlled shifts for Thumb2. +// Note: We do not support rrx shifted operands yet. +def t2_so_reg : Operand<i32>, // reg imm + ComplexPattern<i32, 2, "SelectShifterOperand", + [shl,srl,sra,rotr]> { + let PrintMethod = "printSOOperand"; + let MIOperandInfo = (ops GPR, i32imm); +} + +def LO16 : SDNodeXForm<imm, [{ + // Transformation function: shift the immediate value down into the low bits. + return getI32Imm((unsigned short)N->getZExtValue()); +}]>; + +def HI16 : SDNodeXForm<imm, [{ + // Transformation function: shift the immediate value down into the low bits. + return getI32Imm((unsigned)N->getZExtValue() >> 16); +}]>; + +def imm16high : PatLeaf<(i32 imm), [{ + // Returns true if all bits out of the [31..16] range are 0. + return ((N->getZExtValue() & 0xFFFF0000ULL) == N->getZExtValue()); +}], HI16>; + +def imm16high0xffff : PatLeaf<(i32 imm), [{ + // Returns true if lo 16 bits are set and this is a 32-bit value. + return ((N->getZExtValue() & 0x0000FFFFULL) == 0xFFFFULL); +}], HI16>; + +def imm0_4095 : PatLeaf<(i32 imm), [{ + return (uint32_t)N->getZExtValue() < 4096; +}]>; + +def imm0_4095_neg : PatLeaf<(i32 imm), [{ + return (uint32_t)-N->getZExtValue() < 4096; +}], imm_neg_XFORM>; + +def imm0_65535 : PatLeaf<(i32 imm), [{ + return N->getZExtValue() < 65536; +}]>; + +// A6.3.2 Modified immediate constants in Thumb instructions (#<const>) +// FIXME: Move it the the addrmode matcher code. +def t2_so_imm : PatLeaf<(i32 imm), [{ + uint64_t v = N->getZExtValue(); + if (v == 0 || v > 0xffffffffUL) return false; + // variant1 - 0b0000x - 8-bit which could be zero (not supported for now) + + // variant2 - 0b00nnx - 8-bit repeated inside the 32-bit room + unsigned hi16 = (unsigned)(v >> 16); + unsigned lo16 = (unsigned)(v & 0xffffUL); + bool valid = (hi16 == lo16) && ( + (v & 0x00ff00ffUL) == 0 || // type 0001x + (v & 0xff00ff00UL) == 0 || // type 0010x + ((lo16 >> 8) == (lo16 & 0xff))); // type 0011x + if (valid) return true; + + // variant3 - 0b01000..0b11111 - 8-bit shifted inside the 32-bit room + unsigned shift = CountLeadingZeros_32(v); + uint64_t mask = (0xff000000ULL >> shift); + // If valid, it is type 01000 + shift + return ((shift < 24) && (v & mask) > 0) && ((v & (~mask)) == 0); +}]>; + + +//===----------------------------------------------------------------------===// +// Thumb-2 to cover the functionality of the ARM instruction set. +// + +/// T2I_bin_irs - Defines a set of (op reg, {so_imm|reg|so_reg}) patterns for a +// binary operation that produces a value. +multiclass T2I_bin_irs<string opc, PatFrag opnode> { + // shifted imm + def ri : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), + !strconcat(opc, " $dst, $lhs, $rhs"), + [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>, + Requires<[HasThumb2]>; + // register + def rr : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), + !strconcat(opc, " $dst, $lhs, $rhs"), + [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>, + Requires<[HasThumb2]>; + // shifted register + def rs : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), + !strconcat(opc, " $dst, $lhs, $rhs"), + [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>, + Requires<[HasThumb2]>; +} + +/// T2I_bin_s_irs - Similar to T2I_bin_irs except it sets the 's' bit so the +/// instruction modifies the CPSR register. +let Defs = [CPSR] in { +multiclass T2I_bin_s_irs<string opc, PatFrag opnode> { + // shifted imm + def ri : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), + !strconcat(opc, "s $dst, $lhs, $rhs"), + [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>, + Requires<[HasThumb2]>; + + // register + def rr : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), + !strconcat(opc, "s $dst, $lhs, $rhs"), + [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>, + Requires<[HasThumb2]>; + + // shifted register + def rs : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), + !strconcat(opc, "s $dst, $lhs, $rhs"), + [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>, + Requires<[HasThumb2]>; +} +} + +/// T2I_bin_c_irs - Similar to T2I_bin_irs except it uses the 's' bit. Also the +/// instruction can optionally set the CPSR register. +let Uses = [CPSR] in { +multiclass T2I_bin_c_irs<string opc, PatFrag opnode> { + // shifted imm + def ri : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs, cc_out:$s), + !strconcat(opc, "${s} $dst, $lhs, $rhs"), + [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>, + Requires<[HasThumb2]>; + + // register + def rr : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs, cc_out:$s), + !strconcat(opc, "${s} $dst, $lhs, $rhs"), + [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>, + Requires<[HasThumb2]>; + + // shifted register + def rs : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs, cc_out:$s), + !strconcat(opc, "${s} $dst, $lhs, $rhs"), + [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>, + Requires<[HasThumb2]>; +} +} + +//===----------------------------------------------------------------------===// +// Arithmetic Instructions. +// + +//===----------------------------------------------------------------------===// +// Move Instructions. +// +def tMOVi16 : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), + "movw $dst, $src", + [(set GPR:$dst, imm0_65535:$src)]>, + Requires<[HasThumb2]>; + +let isTwoAddress = 1 in +def tMOVTi16 : PseudoInst<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm), + "movt $dst, $imm", + [(set GPR:$dst, (or (and GPR:$src, 0xffff), + imm16high:$imm))]>, + Requires<[HasThumb2]>; + +def : Pat<(and (or GPR:$src, imm16high:$imm1), imm16high0xffff:$imm2), + (tMOVTi16 GPR:$src, (HI16 imm16high:$imm1))>, + Requires<[HasThumb2]>; + +def : Pat<(i32 imm:$imm), + (tMOVTi16 (tMOVi16 (LO16 imm:$imm)),(HI16 imm:$imm))>, + Requires<[HasThumb2]>; + +//===----------------------------------------------------------------------===// +// Arithmetic Instructions. +// +defm t2ADD : T2I_bin_irs <"add", BinOpFrag<(add node:$LHS, node:$RHS)>>; +defm t2SUB : T2I_bin_irs <"sub", BinOpFrag<(sub node:$LHS, node:$RHS)>>; + +def tADDri12 : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), + "add $dst, $lhs, $rhs", + [(set GPR:$dst, (add GPR:$lhs, imm0_4095:$rhs))]>, + Requires<[HasThumb2]>; +def tSUBri12 : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), + "sub $dst, $lhs, $rhs", + [(set GPR:$dst, (add GPR:$lhs, imm0_4095_neg:$rhs))]>, + Requires<[HasThumb2]>; + +defm t2ADDS : T2I_bin_s_irs<"add", BinOpFrag<(addc node:$LHS, node:$RHS)>>; +defm t2SUBS : T2I_bin_s_irs<"sub", BinOpFrag<(subc node:$LHS, node:$RHS)>>; + +defm t2ADC : T2I_bin_c_irs<"adc", BinOpFrag<(adde node:$LHS, node:$RHS)>>; +defm t2SBC : T2I_bin_c_irs<"sbc", BinOpFrag<(sube node:$LHS, node:$RHS)>>; + + +def tMLS : PseudoInst<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), + "mls $dst, $a, $b, $c", + [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>, + Requires<[HasThumb2]>; + +def tORNrs : PseudoInst<(outs GPR:$dst), (ins GPR:$src1, t2_so_reg:$src2), + "orn $dst, $src1, $src2", + [(set GPR:$dst, (or GPR:$src1, (not t2_so_reg: $src2)))]>, + Requires<[HasThumb2]>; diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 684ecb4..59cf125 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -17,19 +17,22 @@ #include "ARMAddressingModes.h" #include "ARMMachineFunctionInfo.h" #include "ARMRegisterInfo.h" +#include "llvm/DerivedTypes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/Compiler.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" using namespace llvm; @@ -39,6 +42,12 @@ STATISTIC(NumSTMGened , "Number of stm instructions generated"); STATISTIC(NumFLDMGened, "Number of fldm instructions generated"); STATISTIC(NumFSTMGened, "Number of fstm instructions generated"); STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); +STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); +STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); +STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); +STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); +STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); +STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine /// load / store instructions to form ldm / stm instructions. @@ -82,6 +91,8 @@ namespace { SmallVector<MachineBasicBlock::iterator, 4> &Merges); void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); + bool FixInvalidRegPairOp(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI); bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); bool MergeReturnIntoLDM(MachineBasicBlock &MBB); }; @@ -586,13 +597,19 @@ void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { static int getMemoryOpOffset(const MachineInstr *MI) { int Opcode = MI->getOpcode(); bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; + bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; unsigned NumOperands = MI->getDesc().getNumOperands(); unsigned OffField = MI->getOperand(NumOperands-3).getImm(); int Offset = isAM2 - ? ARM_AM::getAM2Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4; + ? ARM_AM::getAM2Offset(OffField) + : (isAM3 ? ARM_AM::getAM3Offset(OffField) + : ARM_AM::getAM5Offset(OffField) * 4); if (isAM2) { if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub) Offset = -Offset; + } else if (isAM3) { + if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) + Offset = -Offset; } else { if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) Offset = -Offset; @@ -600,6 +617,120 @@ static int getMemoryOpOffset(const MachineInstr *MI) { return Offset; } +static void InsertLDR_STR(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + int OffImm, bool isDef, + DebugLoc dl, unsigned NewOpc, + unsigned Reg, bool RegDeadKill, + unsigned BaseReg, bool BaseKill, + unsigned OffReg, bool OffKill, + ARMCC::CondCodes Pred, unsigned PredReg, + const TargetInstrInfo *TII) { + unsigned Offset; + if (OffImm < 0) + Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift); + else + Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift); + if (isDef) + BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) + .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) + .addReg(BaseReg, getKillRegState(BaseKill)) + .addReg(OffReg, getKillRegState(OffKill)) + .addImm(Offset) + .addImm(Pred).addReg(PredReg); + else + BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) + .addReg(Reg, getKillRegState(RegDeadKill)) + .addReg(BaseReg, getKillRegState(BaseKill)) + .addReg(OffReg, getKillRegState(OffKill)) + .addImm(Offset) + .addImm(Pred).addReg(PredReg); +} + +bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI) { + MachineInstr *MI = &*MBBI; + unsigned Opcode = MI->getOpcode(); + if (Opcode == ARM::LDRD || Opcode == ARM::STRD) { + unsigned EvenReg = MI->getOperand(0).getReg(); + unsigned OddReg = MI->getOperand(1).getReg(); + unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); + unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); + if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum) + return false; + + bool isLd = Opcode == ARM::LDRD; + bool EvenDeadKill = isLd ? + MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); + bool OddDeadKill = isLd ? + MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); + const MachineOperand &BaseOp = MI->getOperand(2); + unsigned BaseReg = BaseOp.getReg(); + bool BaseKill = BaseOp.isKill(); + const MachineOperand &OffOp = MI->getOperand(3); + unsigned OffReg = OffOp.getReg(); + bool OffKill = OffOp.isKill(); + int OffImm = getMemoryOpOffset(MI); + unsigned PredReg = 0; + ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); + + if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) { + // Ascending register numbers and no offset. It's safe to change it to a + // ldm or stm. + unsigned NewOpc = (Opcode == ARM::LDRD) ? ARM::LDM : ARM::STM; + if (isLd) { + BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) + .addReg(BaseReg, getKillRegState(BaseKill)) + .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) + .addImm(Pred).addReg(PredReg) + .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) + .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); + ++NumLDRD2LDM; + } else { + BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) + .addReg(BaseReg, getKillRegState(BaseKill)) + .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) + .addImm(Pred).addReg(PredReg) + .addReg(EvenReg, getKillRegState(EvenDeadKill)) + .addReg(OddReg, getKillRegState(OddDeadKill)); + ++NumSTRD2STM; + } + } else { + // Split into two instructions. + unsigned NewOpc = (Opcode == ARM::LDRD) ? ARM::LDR : ARM::STR; + DebugLoc dl = MBBI->getDebugLoc(); + // If this is a load and base register is killed, it may have been + // re-defed by the load, make sure the first load does not clobber it. + if (isLd && + (BaseKill || OffKill) && + (TRI->regsOverlap(EvenReg, BaseReg) || + (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) { + assert(!TRI->regsOverlap(OddReg, BaseReg) && + (!OffReg || !TRI->regsOverlap(OddReg, OffReg))); + InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, OddReg, OddDeadKill, + BaseReg, false, OffReg, false, Pred, PredReg, TII); + InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill, + BaseReg, BaseKill, OffReg, OffKill, Pred, PredReg, TII); + } else { + InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, + EvenReg, EvenDeadKill, BaseReg, false, OffReg, false, + Pred, PredReg, TII); + InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, + OddReg, OddDeadKill, BaseReg, BaseKill, OffReg, OffKill, + Pred, PredReg, TII); + } + if (isLd) + ++NumLDRD2LDR; + else + ++NumSTRD2STR; + } + + MBBI = prior(MBBI); + MBB.erase(MI); + } + return false; +} + /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR /// ops of the same base and incrementing offset into LDM / STM ops. bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { @@ -617,6 +748,9 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { RS->enterBasicBlock(&MBB); MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); while (MBBI != E) { + if (FixInvalidRegPairOp(MBB, MBBI)) + continue; + bool Advance = false; bool TryMerge = false; bool Clobber = false; @@ -817,8 +951,10 @@ namespace { static char ID; ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {} + const TargetData *TD; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; + const ARMSubtarget *STI; MachineRegisterInfo *MRI; virtual bool runOnMachineFunction(MachineFunction &Fn); @@ -828,6 +964,11 @@ namespace { } private: + bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, + unsigned &NewOpc, unsigned &EvenReg, + unsigned &OddReg, unsigned &BaseReg, + unsigned &OffReg, unsigned &Offset, + unsigned &PredReg, ARMCC::CondCodes &Pred); bool RescheduleOps(MachineBasicBlock *MBB, SmallVector<MachineInstr*, 4> &Ops, unsigned Base, bool isLd, @@ -838,8 +979,10 @@ namespace { } bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { + TD = Fn.getTarget().getTargetData(); TII = Fn.getTarget().getInstrInfo(); TRI = Fn.getTarget().getRegisterInfo(); + STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); MRI = &Fn.getRegInfo(); bool Modified = false; @@ -850,15 +993,19 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { return Modified; } -static bool IsSafeToMove(bool isLd, unsigned Base, - MachineBasicBlock::iterator I, - MachineBasicBlock::iterator E, - SmallPtrSet<MachineInstr*, 4> MoveOps, - const TargetRegisterInfo *TRI) { +static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator E, + SmallPtrSet<MachineInstr*, 4> &MemOps, + SmallSet<unsigned, 4> &MemRegs, + const TargetRegisterInfo *TRI) { // Are there stores / loads / calls between them? // FIXME: This is overly conservative. We should make use of alias information // some day. + SmallSet<unsigned, 4> AddedRegPressure; while (++I != E) { + if (MemOps.count(&*I)) + continue; const TargetInstrDesc &TID = I->getDesc(); if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects()) return false; @@ -871,15 +1018,76 @@ static bool IsSafeToMove(bool isLd, unsigned Base, // str r1, [r0] // strh r5, [r0] // str r4, [r0, #+4] - if (TID.mayStore() && !MoveOps.count(&*I)) + if (TID.mayStore()) return false; } for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { MachineOperand &MO = I->getOperand(j); - if (MO.isReg() && MO.isDef() && TRI->regsOverlap(MO.getReg(), Base)) + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (MO.isDef() && TRI->regsOverlap(Reg, Base)) return false; + if (Reg != Base && !MemRegs.count(Reg)) + AddedRegPressure.insert(Reg); } } + + // Estimate register pressure increase due to the transformation. + if (MemRegs.size() <= 4) + // Ok if we are moving small number of instructions. + return true; + return AddedRegPressure.size() <= MemRegs.size() * 2; +} + +bool +ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, + DebugLoc &dl, + unsigned &NewOpc, unsigned &EvenReg, + unsigned &OddReg, unsigned &BaseReg, + unsigned &OffReg, unsigned &Offset, + unsigned &PredReg, + ARMCC::CondCodes &Pred) { + // FIXME: FLDS / FSTS -> FLDD / FSTD + unsigned Opcode = Op0->getOpcode(); + if (Opcode == ARM::LDR) + NewOpc = ARM::LDRD; + else if (Opcode == ARM::STR) + NewOpc = ARM::STRD; + else + return 0; + + // Must sure the base address satisfies i64 ld / st alignment requirement. + if (!Op0->hasOneMemOperand() || + !Op0->memoperands_begin()->getValue() || + Op0->memoperands_begin()->isVolatile()) + return false; + + unsigned Align = Op0->memoperands_begin()->getAlignment(); + unsigned ReqAlign = STI->hasV6Ops() + ? TD->getPrefTypeAlignment(Type::Int64Ty) : 8; // Pre-v6 need 8-byte align + if (Align < ReqAlign) + return false; + + // Then make sure the immediate offset fits. + int OffImm = getMemoryOpOffset(Op0); + ARM_AM::AddrOpc AddSub = ARM_AM::add; + if (OffImm < 0) { + AddSub = ARM_AM::sub; + OffImm = - OffImm; + } + if (OffImm >= 256) // 8 bits + return false; + Offset = ARM_AM::getAM3Opc(AddSub, OffImm); + + EvenReg = Op0->getOperand(0).getReg(); + OddReg = Op1->getOperand(0).getReg(); + if (EvenReg == OddReg) + return false; + BaseReg = Op0->getOperand(1).getReg(); + OffReg = Op0->getOperand(2).getReg(); + Pred = getInstrPredicate(Op0, PredReg); + dl = Op0->getDebugLoc(); return true; } @@ -902,6 +1110,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, MachineInstr *FirstOp = 0; MachineInstr *LastOp = 0; int LastOffset = 0; + unsigned LastOpcode = 0; unsigned LastBytes = 0; unsigned NumMove = 0; for (int i = Ops.size() - 1; i >= 0; --i) { @@ -916,6 +1125,10 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, LastOp = Op; } + unsigned Opcode = Op->getOpcode(); + if (LastOpcode && Opcode != LastOpcode) + break; + int Offset = getMemoryOpOffset(Op); unsigned Bytes = getLSMultipleTransferSize(Op); if (LastBytes) { @@ -924,34 +1137,80 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, } LastOffset = Offset; LastBytes = Bytes; - if (++NumMove == 4) + LastOpcode = Opcode; + if (++NumMove == 8) // FIXME: Tune break; } if (NumMove <= 1) Ops.pop_back(); else { - SmallPtrSet<MachineInstr*, 4> MoveOps; - for (int i = NumMove-1; i >= 0; --i) - MoveOps.insert(Ops[i]); + SmallPtrSet<MachineInstr*, 4> MemOps; + SmallSet<unsigned, 4> MemRegs; + for (int i = NumMove-1; i >= 0; --i) { + MemOps.insert(Ops[i]); + MemRegs.insert(Ops[i]->getOperand(0).getReg()); + } // Be conservative, if the instructions are too far apart, don't // move them. We want to limit the increase of register pressure. - bool DoMove = (LastLoc - FirstLoc) < NumMove*4; + bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. if (DoMove) - DoMove = IsSafeToMove(isLd, Base, FirstOp, LastOp, MoveOps, TRI); + DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, + MemOps, MemRegs, TRI); if (!DoMove) { for (unsigned i = 0; i != NumMove; ++i) Ops.pop_back(); } else { // This is the new location for the loads / stores. MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; - while (InsertPos != MBB->end() && MoveOps.count(InsertPos)) + while (InsertPos != MBB->end() && MemOps.count(InsertPos)) ++InsertPos; - for (unsigned i = 0; i != NumMove; ++i) { - MachineInstr *Op = Ops.back(); + + // If we are moving a pair of loads / stores, see if it makes sense + // to try to allocate a pair of registers that can form register pairs. + MachineInstr *Op0 = Ops.back(); + MachineInstr *Op1 = Ops[Ops.size()-2]; + unsigned EvenReg = 0, OddReg = 0; + unsigned BaseReg = 0, OffReg = 0, PredReg = 0; + ARMCC::CondCodes Pred = ARMCC::AL; + unsigned NewOpc = 0; + unsigned Offset = 0; + DebugLoc dl; + if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, + EvenReg, OddReg, BaseReg, OffReg, + Offset, PredReg, Pred)) { + Ops.pop_back(); Ops.pop_back(); - MBB->splice(InsertPos, MBB, Op); + + // Form the pair instruction. + if (isLd) { + BuildMI(*MBB, InsertPos, dl, TII->get(NewOpc)) + .addReg(EvenReg, RegState::Define) + .addReg(OddReg, RegState::Define) + .addReg(BaseReg).addReg(0).addImm(Offset) + .addImm(Pred).addReg(PredReg); + ++NumLDRDFormed; + } else { + BuildMI(*MBB, InsertPos, dl, TII->get(NewOpc)) + .addReg(EvenReg) + .addReg(OddReg) + .addReg(BaseReg).addReg(0).addImm(Offset) + .addImm(Pred).addReg(PredReg); + ++NumSTRDFormed; + } + MBB->erase(Op0); + MBB->erase(Op1); + + // Add register allocation hints to form register pairs. + MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); + MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); + } else { + for (unsigned i = 0; i != NumMove; ++i) { + MachineInstr *Op = Ops.back(); + Ops.pop_back(); + MBB->splice(InsertPos, MBB, Op); + } } NumLdStMoved += NumMove; @@ -1039,7 +1298,8 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { } if (StopHere) { - // Found a duplicate (a base+offset combination that's seen earlier). Backtrack. + // Found a duplicate (a base+offset combination that's seen earlier). + // Backtrack. --Loc; break; } diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp index 199858f..bbc1300 100644 --- a/lib/Target/ARM/ARMRegisterInfo.cpp +++ b/lib/Target/ARM/ARMRegisterInfo.cpp @@ -159,7 +159,7 @@ ARMRegisterInfo::ARMRegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &sti) : ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), TII(tii), STI(sti), - FramePtr((STI.useThumbBacktraces() || STI.isThumb()) ? ARM::R7 : ARM::R11) { + FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11) { } static inline @@ -194,10 +194,6 @@ void ARMRegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, .addReg(0).addImm(0).addImm(Pred).addReg(PredReg); } -const TargetRegisterClass *ARMRegisterInfo::getPointerRegClass() const { - return &ARM::GPRRegClass; -} - /// isLowRegister - Returns true if the register is low register r0-r7. /// bool ARMRegisterInfo::isLowRegister(unsigned Reg) const { @@ -304,6 +300,191 @@ ARMRegisterInfo::isReservedReg(const MachineFunction &MF, unsigned Reg) const { return false; } +const TargetRegisterClass *ARMRegisterInfo::getPointerRegClass() const { + return &ARM::GPRRegClass; +} + +/// getAllocationOrder - Returns the register allocation order for a specified +/// register class in the form of a pair of TargetRegisterClass iterators. +std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator> +ARMRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC, + unsigned HintType, unsigned HintReg, + const MachineFunction &MF) const { + // Alternative register allocation orders when favoring even / odd registers + // of register pairs. + + // No FP, R9 is available. + static const unsigned GPREven1[] = { + ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8, ARM::R10, + ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7, + ARM::R9, ARM::R11 + }; + static const unsigned GPROdd1[] = { + ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R9, ARM::R11, + ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, + ARM::R8, ARM::R10 + }; + + // FP is R7, R9 is available. + static const unsigned GPREven2[] = { + ARM::R0, ARM::R2, ARM::R4, ARM::R8, ARM::R10, + ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6, + ARM::R9, ARM::R11 + }; + static const unsigned GPROdd2[] = { + ARM::R1, ARM::R3, ARM::R5, ARM::R9, ARM::R11, + ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, + ARM::R8, ARM::R10 + }; + + // FP is R11, R9 is available. + static const unsigned GPREven3[] = { + ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8, + ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7, + ARM::R9 + }; + static const unsigned GPROdd3[] = { + ARM::R1, ARM::R3, ARM::R5, ARM::R6, ARM::R9, + ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R7, + ARM::R8 + }; + + // No FP, R9 is not available. + static const unsigned GPREven4[] = { + ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R10, + ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8, + ARM::R11 + }; + static const unsigned GPROdd4[] = { + ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R11, + ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8, + ARM::R10 + }; + + // FP is R7, R9 is not available. + static const unsigned GPREven5[] = { + ARM::R0, ARM::R2, ARM::R4, ARM::R10, + ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6, ARM::R8, + ARM::R11 + }; + static const unsigned GPROdd5[] = { + ARM::R1, ARM::R3, ARM::R5, ARM::R11, + ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8, + ARM::R10 + }; + + // FP is R11, R9 is not available. + static const unsigned GPREven6[] = { + ARM::R0, ARM::R2, ARM::R4, ARM::R6, + ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8 + }; + static const unsigned GPROdd6[] = { + ARM::R1, ARM::R3, ARM::R5, ARM::R7, + ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8 + }; + + + if (HintType == ARMRI::RegPairEven) { + if (isPhysicalRegister(HintReg) && getRegisterPairEven(HintReg, MF) == 0) + // It's no longer possible to fulfill this hint. Return the default + // allocation order. + return std::make_pair(RC->allocation_order_begin(MF), + RC->allocation_order_end(MF)); + + if (!STI.isTargetDarwin() && !hasFP(MF)) { + if (!STI.isR9Reserved()) + return std::make_pair(GPREven1, + GPREven1 + (sizeof(GPREven1)/sizeof(unsigned))); + else + return std::make_pair(GPREven4, + GPREven4 + (sizeof(GPREven4)/sizeof(unsigned))); + } else if (FramePtr == ARM::R7) { + if (!STI.isR9Reserved()) + return std::make_pair(GPREven2, + GPREven2 + (sizeof(GPREven2)/sizeof(unsigned))); + else + return std::make_pair(GPREven5, + GPREven5 + (sizeof(GPREven5)/sizeof(unsigned))); + } else { // FramePtr == ARM::R11 + if (!STI.isR9Reserved()) + return std::make_pair(GPREven3, + GPREven3 + (sizeof(GPREven3)/sizeof(unsigned))); + else + return std::make_pair(GPREven6, + GPREven6 + (sizeof(GPREven6)/sizeof(unsigned))); + } + } else if (HintType == ARMRI::RegPairOdd) { + if (isPhysicalRegister(HintReg) && getRegisterPairOdd(HintReg, MF) == 0) + // It's no longer possible to fulfill this hint. Return the default + // allocation order. + return std::make_pair(RC->allocation_order_begin(MF), + RC->allocation_order_end(MF)); + + if (!STI.isTargetDarwin() && !hasFP(MF)) { + if (!STI.isR9Reserved()) + return std::make_pair(GPROdd1, + GPROdd1 + (sizeof(GPROdd1)/sizeof(unsigned))); + else + return std::make_pair(GPROdd4, + GPROdd4 + (sizeof(GPROdd4)/sizeof(unsigned))); + } else if (FramePtr == ARM::R7) { + if (!STI.isR9Reserved()) + return std::make_pair(GPROdd2, + GPROdd2 + (sizeof(GPROdd2)/sizeof(unsigned))); + else + return std::make_pair(GPROdd5, + GPROdd5 + (sizeof(GPROdd5)/sizeof(unsigned))); + } else { // FramePtr == ARM::R11 + if (!STI.isR9Reserved()) + return std::make_pair(GPROdd3, + GPROdd3 + (sizeof(GPROdd3)/sizeof(unsigned))); + else + return std::make_pair(GPROdd6, + GPROdd6 + (sizeof(GPROdd6)/sizeof(unsigned))); + } + } + return std::make_pair(RC->allocation_order_begin(MF), + RC->allocation_order_end(MF)); +} + +/// ResolveRegAllocHint - Resolves the specified register allocation hint +/// to a physical register. Returns the physical register if it is successful. +unsigned +ARMRegisterInfo::ResolveRegAllocHint(unsigned Type, unsigned Reg, + const MachineFunction &MF) const { + if (Reg == 0 || !isPhysicalRegister(Reg)) + return 0; + if (Type == 0) + return Reg; + else if (Type == (unsigned)ARMRI::RegPairOdd) + // Odd register. + return getRegisterPairOdd(Reg, MF); + else if (Type == (unsigned)ARMRI::RegPairEven) + // Even register. + return getRegisterPairEven(Reg, MF); + return 0; +} + +void +ARMRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg, + MachineFunction &MF) const { + MachineRegisterInfo *MRI = &MF.getRegInfo(); + std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(Reg); + if ((Hint.first == (unsigned)ARMRI::RegPairOdd || + Hint.first == (unsigned)ARMRI::RegPairEven) && + Hint.second && TargetRegisterInfo::isVirtualRegister(Hint.second)) { + // If 'Reg' is one of the even / odd register pair and it's now changed + // (e.g. coalesced) into a different register. The other register of the + // pair allocation hint must be updated to reflect the relationship + // change. + unsigned OtherReg = Hint.second; + Hint = MRI->getRegAllocationHint(OtherReg); + if (Hint.second == Reg) + // Make sure the pair has not already divorced. + MRI->setRegAllocationHint(OtherReg, Hint.first, NewReg); + } +} + bool ARMRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); @@ -1506,9 +1687,8 @@ unsigned ARMRegisterInfo::getRARegister() const { unsigned ARMRegisterInfo::getFrameRegister(MachineFunction &MF) const { if (STI.isTargetDarwin() || hasFP(MF)) - return (STI.useThumbBacktraces() || STI.isThumb()) ? ARM::R7 : ARM::R11; - else - return ARM::SP; + return FramePtr; + return ARM::SP; } unsigned ARMRegisterInfo::getEHExceptionRegister() const { @@ -1525,4 +1705,152 @@ int ARMRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { return ARMGenRegisterInfo::getDwarfRegNumFull(RegNum, 0); } +unsigned ARMRegisterInfo::getRegisterPairEven(unsigned Reg, + const MachineFunction &MF) const { + switch (Reg) { + default: break; + // Return 0 if either register of the pair is a special register. + // So no R12, etc. + case ARM::R1: + return ARM::R0; + case ARM::R3: + // FIXME! + return STI.isThumb() ? 0 : ARM::R2; + case ARM::R5: + return ARM::R4; + case ARM::R7: + return isReservedReg(MF, ARM::R7) ? 0 : ARM::R6; + case ARM::R9: + return isReservedReg(MF, ARM::R9) ? 0 :ARM::R8; + case ARM::R11: + return isReservedReg(MF, ARM::R11) ? 0 : ARM::R10; + + case ARM::S1: + return ARM::S0; + case ARM::S3: + return ARM::S2; + case ARM::S5: + return ARM::S4; + case ARM::S7: + return ARM::S6; + case ARM::S9: + return ARM::S8; + case ARM::S11: + return ARM::S10; + case ARM::S13: + return ARM::S12; + case ARM::S15: + return ARM::S14; + case ARM::S17: + return ARM::S16; + case ARM::S19: + return ARM::S18; + case ARM::S21: + return ARM::S20; + case ARM::S23: + return ARM::S22; + case ARM::S25: + return ARM::S24; + case ARM::S27: + return ARM::S26; + case ARM::S29: + return ARM::S28; + case ARM::S31: + return ARM::S30; + + case ARM::D1: + return ARM::D0; + case ARM::D3: + return ARM::D2; + case ARM::D5: + return ARM::D4; + case ARM::D7: + return ARM::D6; + case ARM::D9: + return ARM::D8; + case ARM::D11: + return ARM::D10; + case ARM::D13: + return ARM::D12; + case ARM::D15: + return ARM::D14; + } + + return 0; +} + +unsigned ARMRegisterInfo::getRegisterPairOdd(unsigned Reg, + const MachineFunction &MF) const { + switch (Reg) { + default: break; + // Return 0 if either register of the pair is a special register. + // So no R12, etc. + case ARM::R0: + return ARM::R1; + case ARM::R2: + // FIXME! + return STI.isThumb() ? 0 : ARM::R3; + case ARM::R4: + return ARM::R5; + case ARM::R6: + return isReservedReg(MF, ARM::R7) ? 0 : ARM::R7; + case ARM::R8: + return isReservedReg(MF, ARM::R9) ? 0 :ARM::R9; + case ARM::R10: + return isReservedReg(MF, ARM::R11) ? 0 : ARM::R11; + + case ARM::S0: + return ARM::S1; + case ARM::S2: + return ARM::S3; + case ARM::S4: + return ARM::S5; + case ARM::S6: + return ARM::S7; + case ARM::S8: + return ARM::S9; + case ARM::S10: + return ARM::S11; + case ARM::S12: + return ARM::S13; + case ARM::S14: + return ARM::S15; + case ARM::S16: + return ARM::S17; + case ARM::S18: + return ARM::S19; + case ARM::S20: + return ARM::S21; + case ARM::S22: + return ARM::S23; + case ARM::S24: + return ARM::S25; + case ARM::S26: + return ARM::S27; + case ARM::S28: + return ARM::S29; + case ARM::S30: + return ARM::S31; + + case ARM::D0: + return ARM::D1; + case ARM::D2: + return ARM::D3; + case ARM::D4: + return ARM::D5; + case ARM::D6: + return ARM::D7; + case ARM::D8: + return ARM::D9; + case ARM::D10: + return ARM::D11; + case ARM::D12: + return ARM::D13; + case ARM::D14: + return ARM::D15; + } + + return 0; +} + #include "ARMGenRegisterInfo.inc" diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h index e1d9efb..e8f4fd8 100644 --- a/lib/Target/ARM/ARMRegisterInfo.h +++ b/lib/Target/ARM/ARMRegisterInfo.h @@ -22,12 +22,17 @@ namespace llvm { class TargetInstrInfo; class Type; +/// Register allocation hints. +namespace ARMRI { + enum { + RegPairOdd = 1, + RegPairEven = 2 + }; +} + struct ARMRegisterInfo : public ARMGenRegisterInfo { const TargetInstrInfo &TII; const ARMSubtarget &STI; -private: - /// FramePtr - ARM physical register used as frame ptr. - unsigned FramePtr; public: ARMRegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI); @@ -49,10 +54,6 @@ public: /// if the register is a single precision VFP register. static unsigned getRegisterNumbering(unsigned RegEnum, bool &isSPVFP); - /// getPointerRegClass - Return the register class to use to hold pointers. - /// This is used for addressing modes. - const TargetRegisterClass *getPointerRegClass() const; - /// Code Generation virtual methods... const TargetRegisterClass * getPhysicalRegisterRegClass(unsigned Reg, MVT VT = MVT::Other) const; @@ -65,6 +66,19 @@ public: bool isReservedReg(const MachineFunction &MF, unsigned Reg) const; + const TargetRegisterClass *getPointerRegClass() const; + + std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator> + getAllocationOrder(const TargetRegisterClass *RC, + unsigned HintType, unsigned HintReg, + const MachineFunction &MF) const; + + unsigned ResolveRegAllocHint(unsigned Type, unsigned Reg, + const MachineFunction &MF) const; + + void UpdateRegAllocHint(unsigned Reg, unsigned NewReg, + MachineFunction &MF) const; + bool requiresRegisterScavenging(const MachineFunction &MF) const; bool hasFP(const MachineFunction &MF) const; @@ -95,6 +109,15 @@ public: int getDwarfRegNum(unsigned RegNum, bool isEH) const; bool isLowRegister(unsigned Reg) const; + +private: + /// FramePtr - ARM physical register used as frame ptr. + unsigned FramePtr; + + unsigned getRegisterPairEven(unsigned Reg, const MachineFunction &MF) const; + + unsigned getRegisterPairOdd(unsigned Reg, const MachineFunction &MF) const; + }; } // end namespace llvm diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index ebe7d58..d864079 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -134,7 +134,7 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, GPRClass::allocation_order_begin(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); - if (Subtarget.useThumbBacktraces()) { + if (Subtarget.isTargetDarwin()) { if (Subtarget.isR9Reserved()) return ARM_GPR_AO_4; else @@ -154,7 +154,7 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); GPRClass::iterator I; - if (Subtarget.useThumbBacktraces()) { + if (Subtarget.isTargetDarwin()) { if (Subtarget.isR9Reserved()) { I = ARM_GPR_AO_4 + (sizeof(ARM_GPR_AO_4)/sizeof(unsigned)); } else { diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td new file mode 100644 index 0000000..75fa707 --- /dev/null +++ b/lib/Target/ARM/ARMSchedule.td @@ -0,0 +1,35 @@ +//===- ARMSchedule.td - ARM Scheduling Definitions ---------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Functional units across ARM processors +// +def FU_iALU : FuncUnit; // Integer alu unit +def FU_iLdSt : FuncUnit; // Integer load / store unit +def FU_FpALU : FuncUnit; // FP alu unit +def FU_FpLdSt : FuncUnit; // FP load / store unit +def FU_Br : FuncUnit; // Branch unit + +//===----------------------------------------------------------------------===// +// Instruction Itinerary classes used for ARM +// +def IIC_iALU : InstrItinClass; +def IIC_iLoad : InstrItinClass; +def IIC_iStore : InstrItinClass; +def IIC_fpALU : InstrItinClass; +def IIC_fpLoad : InstrItinClass; +def IIC_fpStore : InstrItinClass; +def IIC_Br : InstrItinClass; + +//===----------------------------------------------------------------------===// +// Processor instruction itineraries. + +def GenericItineraries : ProcessorItineraries<[]>; + +include "ARMScheduleV6.td" diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td new file mode 100644 index 0000000..596a57f --- /dev/null +++ b/lib/Target/ARM/ARMScheduleV6.td @@ -0,0 +1,22 @@ +//===- ARMSchedule.td - ARM v6 Scheduling Definitions ------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the ARM v6 processors. +// +//===----------------------------------------------------------------------===// + +def V6Itineraries : ProcessorItineraries<[ + InstrItinData<IIC_iALU , [InstrStage<1, [FU_iALU]>]>, + InstrItinData<IIC_iLoad , [InstrStage<2, [FU_iLdSt]>]>, + InstrItinData<IIC_iStore , [InstrStage<1, [FU_iLdSt]>]>, + InstrItinData<IIC_fpALU , [InstrStage<6, [FU_FpALU]>]>, + InstrItinData<IIC_fpLoad , [InstrStage<2, [FU_FpLdSt]>]>, + InstrItinData<IIC_fpStore , [InstrStage<1, [FU_FpLdSt]>]>, + InstrItinData<IIC_Br , [InstrStage<3, [FU_Br]>]> +]>; diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index a978380..7ac7b49 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -24,7 +24,6 @@ ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS, , ARMFPUType(None) , IsThumb(isThumb) , ThumbMode(Thumb1) - , UseThumbBacktraces(false) , IsR9Reserved(false) , stackAlignment(4) , CPUString("generic") @@ -83,8 +82,6 @@ ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS, if (isAAPCS_ABI()) stackAlignment = 8; - if (isTargetDarwin()) { - UseThumbBacktraces = true; + if (isTargetDarwin()) IsR9Reserved = true; - } } diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 0704055..c3cc7ff 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -14,6 +14,7 @@ #ifndef ARMSUBTARGET_H #define ARMSUBTARGET_H +#include "llvm/Target/TargetInstrItineraries.h" #include "llvm/Target/TargetSubtarget.h" #include <string> @@ -48,9 +49,6 @@ protected: /// ThumbMode - Indicates supported Thumb version. ThumbTypeEnum ThumbMode; - /// UseThumbBacktraces - True if we use thumb style backtraces. - bool UseThumbBacktraces; - /// IsR9Reserved - True if R9 is a not available as general purpose register. bool IsR9Reserved; @@ -61,6 +59,9 @@ protected: /// CPUString - String name of used CPU. std::string CPUString; + /// Selected instruction itineraries (one entry per itinerary class.) + InstrItineraryData InstrItins; + public: enum { isELF, isDarwin @@ -106,14 +107,17 @@ protected: bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; } bool isThumb() const { return IsThumb; } - bool isThumb1() const { return IsThumb && (ThumbMode == Thumb1); } - bool isThumb2() const { return IsThumb && (ThumbMode >= Thumb2); } + bool isThumb1Only() const { return IsThumb && (ThumbMode == Thumb1); } + bool hasThumb2() const { return IsThumb && (ThumbMode >= Thumb2); } - bool useThumbBacktraces() const { return UseThumbBacktraces; } bool isR9Reserved() const { return IsR9Reserved; } const std::string & getCPUString() const { return CPUString; } + /// getInstrItins - Return the instruction itineraies based on subtarget + /// selection. + const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } + /// getStackAlignment - Returns the minimum alignment known to hold of the /// stack frame on entry to the function and which must be maintained by every /// function for this subtarget. diff --git a/lib/Target/ARM/ARMTargetAsmInfo.cpp b/lib/Target/ARM/ARMTargetAsmInfo.cpp index 4107dcc..42b8eae 100644 --- a/lib/Target/ARM/ARMTargetAsmInfo.cpp +++ b/lib/Target/ARM/ARMTargetAsmInfo.cpp @@ -17,80 +17,42 @@ #include <cctype> using namespace llvm; - const char *const llvm::arm_asm_table[] = { - "{r0}", "r0", - "{r1}", "r1", - "{r2}", "r2", - "{r3}", "r3", - "{r4}", "r4", - "{r5}", "r5", - "{r6}", "r6", - "{r7}", "r7", - "{r8}", "r8", - "{r9}", "r9", - "{r10}", "r10", - "{r11}", "r11", - "{r12}", "r12", - "{r13}", "r13", - "{r14}", "r14", - "{lr}", "lr", - "{sp}", "sp", - "{ip}", "ip", - "{fp}", "fp", - "{sl}", "sl", - "{memory}", "memory", - "{cc}", "cc", - 0,0}; + "{r0}", "r0", + "{r1}", "r1", + "{r2}", "r2", + "{r3}", "r3", + "{r4}", "r4", + "{r5}", "r5", + "{r6}", "r6", + "{r7}", "r7", + "{r8}", "r8", + "{r9}", "r9", + "{r10}", "r10", + "{r11}", "r11", + "{r12}", "r12", + "{r13}", "r13", + "{r14}", "r14", + "{lr}", "lr", + "{sp}", "sp", + "{ip}", "ip", + "{fp}", "fp", + "{sl}", "sl", + "{memory}", "memory", + "{cc}", "cc", + 0,0 +}; ARMDarwinTargetAsmInfo::ARMDarwinTargetAsmInfo(const ARMTargetMachine &TM): ARMTargetAsmInfo<DarwinTargetAsmInfo>(TM) { Subtarget = &TM.getSubtarget<ARMSubtarget>(); - GlobalPrefix = "_"; - PrivateGlobalPrefix = "L"; - LessPrivateGlobalPrefix = "l"; - StringConstantPrefix = "\1LC"; - BSSSection = 0; // no BSS section ZeroDirective = "\t.space\t"; ZeroFillDirective = "\t.zerofill\t"; // Uses .zerofill SetDirective = "\t.set\t"; - WeakRefDirective = "\t.weak_reference\t"; - WeakDefDirective = "\t.weak_definition "; - HiddenDirective = "\t.private_extern\t"; ProtectedDirective = NULL; - JumpTableDataSection = ".const"; - CStringSection = "\t.cstring"; HasDotTypeDotSizeDirective = false; - HasSingleParameterDotFile = false; - NeedsIndirectEncoding = true; - if (TM.getRelocationModel() == Reloc::Static) { - StaticCtorsSection = ".constructor"; - StaticDtorsSection = ".destructor"; - } else { - StaticCtorsSection = ".mod_init_func"; - StaticDtorsSection = ".mod_term_func"; - } - - // In non-PIC modes, emit a special label before jump tables so that the - // linker can perform more accurate dead code stripping. - if (TM.getRelocationModel() != Reloc::PIC_) { - // Emit a local label that is preserved until the linker runs. - JumpTableSpecialLabelPrefix = "l"; - } - - NeedsSet = true; - DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug"; - DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug"; - DwarfLineSection = ".section __DWARF,__debug_line,regular,debug"; - DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug"; - DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug"; - DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug"; - DwarfStrSection = ".section __DWARF,__debug_str,regular,debug"; - DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug"; - DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug"; - DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug"; - DwarfMacInfoSection = ".section __DWARF,__debug_macinfo,regular,debug"; + SupportsDebugInformation = true; } ARMELFTargetAsmInfo::ARMELFTargetAsmInfo(const ARMTargetMachine &TM): @@ -115,7 +77,7 @@ ARMELFTargetAsmInfo::ARMELFTargetAsmInfo(const ARMTargetMachine &TM): DwarfLocSection = "\t.section\t.debug_loc,\"\",%progbits"; DwarfARangesSection = "\t.section\t.debug_aranges,\"\",%progbits"; DwarfRangesSection = "\t.section\t.debug_ranges,\"\",%progbits"; - DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"\",%progbits"; + DwarfMacroInfoSection = "\t.section\t.debug_macinfo,\"\",%progbits"; if (Subtarget->isAAPCS_ABI()) { StaticCtorsSection = "\t.section .init_array,\"aw\",%init_array"; @@ -124,6 +86,7 @@ ARMELFTargetAsmInfo::ARMELFTargetAsmInfo(const ARMTargetMachine &TM): StaticCtorsSection = "\t.section .ctors,\"aw\",%progbits"; StaticDtorsSection = "\t.section .dtors,\"aw\",%progbits"; } + SupportsDebugInformation = true; } /// Count the number of comma-separated arguments. diff --git a/lib/Target/ARM/ARMTargetAsmInfo.h b/lib/Target/ARM/ARMTargetAsmInfo.h index 9e6f856..683692f 100644 --- a/lib/Target/ARM/ARMTargetAsmInfo.h +++ b/lib/Target/ARM/ARMTargetAsmInfo.h @@ -26,8 +26,7 @@ namespace llvm { template <class BaseTAI> struct ARMTargetAsmInfo : public BaseTAI { - explicit ARMTargetAsmInfo(const ARMTargetMachine &TM): - BaseTAI(TM) { + explicit ARMTargetAsmInfo(const ARMTargetMachine &TM) : BaseTAI(TM) { BaseTAI::AsmTransCBE = arm_asm_table; BaseTAI::AlignmentIsInBytes = false; diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 7033907..8006b9b 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -23,9 +23,6 @@ #include "llvm/Target/TargetOptions.h" using namespace llvm; -static cl::opt<bool> -EnablePreLdStOpti("arm-pre-alloc-loadstore-opti", cl::Hidden, - cl::desc("Enable pre-regalloc load store optimization pass")); static cl::opt<bool> DisableLdStOpti("disable-arm-loadstore-opti", cl::Hidden, cl::desc("Disable load store optimization pass")); static cl::opt<bool> DisableIfConversion("disable-arm-if-conversion",cl::Hidden, @@ -42,6 +39,11 @@ int ARMTargetMachineModule = 0; static RegisterTarget<ARMTargetMachine> X("arm", "ARM"); static RegisterTarget<ThumbTargetMachine> Y("thumb", "Thumb"); +// Force static initialization when called from llvm/InitializeAllTargets.h +namespace llvm { + void InitializeARMTarget() { } +} + // No assembler printer by default ARMTargetMachine::AsmPrinterCtorFn ARMTargetMachine::AsmPrinterCtor = 0; @@ -97,7 +99,8 @@ ARMTargetMachine::ARMTargetMachine(const Module &M, const std::string &FS, InstrInfo(Subtarget), FrameInfo(Subtarget), JITInfo(), - TLInfo(*this) { + TLInfo(*this), + InstrItins(Subtarget.getInstrItineraryData()) { DefRelocModel = getRelocationModel(); } @@ -149,8 +152,6 @@ bool ARMTargetMachine::addInstSelector(PassManagerBase &PM, bool ARMTargetMachine::addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { - if (!EnablePreLdStOpti) - return false; // FIXME: temporarily disabling load / store optimization pass for Thumb mode. if (OptLevel != CodeGenOpt::None && !DisableLdStOpti && !Subtarget.isThumb()) PM.add(createARMLoadStoreOptimizationPass(true)); diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index 7192c1b..c4c8e6c 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -28,13 +28,14 @@ namespace llvm { class Module; class ARMTargetMachine : public LLVMTargetMachine { - ARMSubtarget Subtarget; - const TargetData DataLayout; // Calculates type size & alignment - ARMInstrInfo InstrInfo; - ARMFrameInfo FrameInfo; - ARMJITInfo JITInfo; - ARMTargetLowering TLInfo; - Reloc::Model DefRelocModel; // Reloc model before it's overridden. + ARMSubtarget Subtarget; + const TargetData DataLayout; // Calculates type size & alignment + ARMInstrInfo InstrInfo; + ARMFrameInfo FrameInfo; + ARMJITInfo JITInfo; + ARMTargetLowering TLInfo; + InstrItineraryData InstrItins; + Reloc::Model DefRelocModel; // Reloc model before it's overridden. protected: // To avoid having target depend on the asmprinter stuff libraries, asmprinter @@ -59,6 +60,9 @@ public: virtual ARMTargetLowering *getTargetLowering() const { return const_cast<ARMTargetLowering*>(&TLInfo); } + virtual const InstrItineraryData getInstrItineraryData() const { + return InstrItins; + } static void registerAsmPrinter(AsmPrinterCtorFn F) { AsmPrinterCtor = F; diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index d908cf4..948a100 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -45,7 +45,6 @@ STATISTIC(EmittedInsts, "Number of machine instrs printed"); namespace { class VISIBILITY_HIDDEN ARMAsmPrinter : public AsmPrinter { DwarfWriter *DW; - MachineModuleInfo *MMI; /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can /// make the right decision when printing asm code for different targets. @@ -84,7 +83,7 @@ namespace { explicit ARMAsmPrinter(raw_ostream &O, TargetMachine &TM, const TargetAsmInfo *T, CodeGenOpt::Level OL, bool V) - : AsmPrinter(O, TM, T, OL, V), DW(0), MMI(NULL), AFI(NULL), MCP(NULL), + : AsmPrinter(O, TM, T, OL, V), DW(0), AFI(NULL), MCP(NULL), InCPMode(false) { Subtarget = &TM.getSubtarget<ARMSubtarget>(); } @@ -97,6 +96,7 @@ namespace { const char *Modifier = 0); void printSOImmOperand(const MachineInstr *MI, int opNum); void printSOImm2PartOperand(const MachineInstr *MI, int opNum); + void printSOOperand(const MachineInstr *MI, int OpNum); void printSORegOperand(const MachineInstr *MI, int opNum); void printAddrMode2Operand(const MachineInstr *MI, int OpNo); void printAddrMode2OffsetOperand(const MachineInstr *MI, int OpNo); @@ -396,6 +396,28 @@ void ARMAsmPrinter::printSOImm2PartOperand(const MachineInstr *MI, int OpNum) { printSOImm(O, ARM_AM::getSOImmVal(V2), VerboseAsm, TAI); } +// Constant shifts so_reg is a 3-operand unit corresponding to register forms of +// the A5.1 "Addressing Mode 1 - Data-processing operands" forms. This +// includes: +// REG 0 - e.g. R5 +// REG IMM, SH_OPC - e.g. R5, LSL #3 +void ARMAsmPrinter::printSOOperand(const MachineInstr *MI, int OpNum) { + const MachineOperand &MO1 = MI->getOperand(OpNum); + const MachineOperand &MO2 = MI->getOperand(OpNum+1); + + unsigned Reg = MO1.getReg(); + assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + O << TM.getRegisterInfo()->getAsmName(Reg); + + // Print the shift opc. + O << ", " + << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm())) + << " "; + + assert(MO2.isImm() && "Not a valid t2_so_reg value!"); + O << "#" << ARM_AM::getSORegOffset(MO2.getImm()); +} + // so_reg is a 4-operand unit corresponding to register forms of the A5.1 // "Addressing Mode 1 - Data-processing operands" forms. This includes: // REG 0 0 - e.g. R5 @@ -805,17 +827,11 @@ void ARMAsmPrinter::printMachineInstruction(const MachineInstr *MI) { bool ARMAsmPrinter::doInitialization(Module &M) { bool Result = AsmPrinter::doInitialization(M); - - // Emit initial debug information. - MMI = getAnalysisIfAvailable<MachineModuleInfo>(); - assert(MMI); DW = getAnalysisIfAvailable<DwarfWriter>(); - assert(DW && "Dwarf Writer is not available"); - DW->BeginModule(&M, MMI, O, this, TAI); - // Darwin wants symbols to be quoted if they have complex names. - if (Subtarget->isTargetDarwin()) - Mang->setUseQuotes(true); + // Thumb-2 instructions are supported only in unified assembler syntax mode. + if (Subtarget->hasThumb2()) + O << "\t.syntax unified\n"; // Emit ARM Build Attributes if (Subtarget->isTargetELF()) { @@ -1115,3 +1131,9 @@ namespace { } } Registrator; } + +// Force static initialization when called from +// llvm/InitializeAllAsmPrinters.h +namespace llvm { + void InitializeARMAsmPrinter() { } +} diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp index 7ed8ef6..1be1713 100644 --- a/lib/Target/Alpha/AlphaISelLowering.cpp +++ b/lib/Target/Alpha/AlphaISelLowering.cpp @@ -39,7 +39,7 @@ static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) : TargetLowering(TM) { // Set up the TargetLowering object. - //I am having problems with shr n ubyte 1 + //I am having problems with shr n i8 1 setShiftAmountType(MVT::i64); setBooleanContents(ZeroOrOneBooleanContent); diff --git a/lib/Target/Alpha/AlphaTargetMachine.cpp b/lib/Target/Alpha/AlphaTargetMachine.cpp index 4c83054..cdd4fa4 100644 --- a/lib/Target/Alpha/AlphaTargetMachine.cpp +++ b/lib/Target/Alpha/AlphaTargetMachine.cpp @@ -21,16 +21,17 @@ using namespace llvm; -/// AlphaTargetMachineModule - Note that this is used on hosts that cannot link -/// in a library unless there are references into the library. In particular, -/// it seems that it is not possible to get things to work on Win32 without -/// this. Though it is unused, do not remove it. -extern "C" int AlphaTargetMachineModule; -int AlphaTargetMachineModule = 0; - // Register the targets static RegisterTarget<AlphaTargetMachine> X("alpha", "Alpha [experimental]"); +// No assembler printer by default +AlphaTargetMachine::AsmPrinterCtorFn AlphaTargetMachine::AsmPrinterCtor = 0; + +// Force static initialization when called from llvm/InitializeAllTargets.h +namespace llvm { + void InitializeAlphaTarget() { } +} + const TargetAsmInfo *AlphaTargetMachine::createTargetAsmInfo() const { return new AlphaTargetAsmInfo(*this); } @@ -92,23 +93,32 @@ bool AlphaTargetMachine::addAssemblyEmitter(PassManagerBase &PM, bool Verbose, raw_ostream &Out) { PM.add(createAlphaLLRPPass(*this)); - PM.add(createAlphaCodePrinterPass(Out, *this, OptLevel, Verbose)); + // Output assembly language. + assert(AsmPrinterCtor && "AsmPrinter was not linked in"); + if (AsmPrinterCtor) + PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose)); return false; } bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, bool DumpAsm, MachineCodeEmitter &MCE) { PM.add(createAlphaCodeEmitterPass(*this, MCE)); - if (DumpAsm) - PM.add(createAlphaCodePrinterPass(errs(), *this, OptLevel, true)); + if (DumpAsm) { + assert(AsmPrinterCtor && "AsmPrinter was not linked in"); + if (AsmPrinterCtor) + PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + } return false; } bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, bool DumpAsm, JITCodeEmitter &JCE) { PM.add(createAlphaJITCodeEmitterPass(*this, JCE)); - if (DumpAsm) - PM.add(createAlphaCodePrinterPass(errs(), *this, OptLevel, true)); + if (DumpAsm) { + assert(AsmPrinterCtor && "AsmPrinter was not linked in"); + if (AsmPrinterCtor) + PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + } return false; } bool AlphaTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, diff --git a/lib/Target/Alpha/AlphaTargetMachine.h b/lib/Target/Alpha/AlphaTargetMachine.h index 51224e8..946ca55 100644 --- a/lib/Target/Alpha/AlphaTargetMachine.h +++ b/lib/Target/Alpha/AlphaTargetMachine.h @@ -33,10 +33,18 @@ class AlphaTargetMachine : public LLVMTargetMachine { AlphaJITInfo JITInfo; AlphaSubtarget Subtarget; AlphaTargetLowering TLInfo; - + protected: virtual const TargetAsmInfo *createTargetAsmInfo() const; - + + // To avoid having target depend on the asmprinter stuff libraries, asmprinter + // set this functions to ctor pointer at startup time if they are linked in. + typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o, + TargetMachine &tm, + CodeGenOpt::Level OptLevel, + bool verbose); + static AsmPrinterCtorFn AsmPrinterCtor; + public: AlphaTargetMachine(const Module &M, const std::string &FS); @@ -46,7 +54,7 @@ public: virtual const AlphaRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); } - virtual AlphaTargetLowering* getTargetLowering() const { + virtual AlphaTargetLowering* getTargetLowering() const { return const_cast<AlphaTargetLowering*>(&TLInfo); } virtual const TargetData *getTargetData() const { return &DataLayout; } @@ -56,7 +64,7 @@ public: static unsigned getJITMatchQuality(); static unsigned getModuleMatchQuality(const Module &M); - + // Pass Pipeline Configuration virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); @@ -75,6 +83,10 @@ public: CodeGenOpt::Level OptLevel, bool DumpAsm, JITCodeEmitter &JCE); + + static void registerAsmPrinter(AsmPrinterCtorFn F) { + AsmPrinterCtor = F; + } }; } // end namespace llvm diff --git a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp index 74b48ee6..7b73bb3 100644 --- a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp +++ b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp @@ -303,3 +303,17 @@ bool AlphaAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, O << ")"; return false; } + +// Force static initialization when called from +// llvm/InitializeAllAsmPrinters.h +namespace llvm { + void InitializeAlphaAsmPrinter() { } +} + +namespace { + static struct Register { + Register() { + AlphaTargetMachine::registerAsmPrinter(createAlphaCodePrinterPass); + } + } Registrator; +} diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp index 5814d27..c3554f6 100644 --- a/lib/Target/CBackend/CBackend.cpp +++ b/lib/Target/CBackend/CBackend.cpp @@ -59,6 +59,11 @@ int CBackendTargetMachineModule = 0; // Register the target. static RegisterTarget<CTargetMachine> X("c", "C backend"); +// Force static initialization when called from llvm/InitializeAllTargets.h +namespace llvm { + void InitializeCBackendTarget() { } +} + namespace { /// CBackendNameAllUsedStructsAndMergeFunctions - This pass inserts names for /// any unnamed structure types that are used by the program, and merges @@ -1449,6 +1454,17 @@ std::string CWriter::GetValueName(const Value *Operand) { /// writeInstComputationInline - Emit the computation for the specified /// instruction inline, with no destination provided. void CWriter::writeInstComputationInline(Instruction &I) { + // We can't currently support integer types other than 1, 8, 16, 32, 64. + // Validate this. + const Type *Ty = I.getType(); + if (Ty->isInteger() && (Ty!=Type::Int1Ty && Ty!=Type::Int8Ty && + Ty!=Type::Int16Ty && Ty!=Type::Int32Ty && Ty!=Type::Int64Ty)) { + cerr << "The C backend does not currently support integer " + << "types of widths other than 1, 8, 16, 32, 64.\n"; + cerr << "This is being tracked as PR 4158.\n"; + abort(); + } + // If this is a non-trivial bool computation, make sure to truncate down to // a 1 bit value. This is important because we want "add i1 x, y" to return // "0" when x and y are true, not "2" for example. diff --git a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp index da1bf07..26a8ece 100644 --- a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp +++ b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp @@ -287,12 +287,11 @@ namespace { /// LinuxAsmPrinter - SPU assembly printer, customized for Linux class VISIBILITY_HIDDEN LinuxAsmPrinter : public SPUAsmPrinter { DwarfWriter *DW; - MachineModuleInfo *MMI; public: explicit LinuxAsmPrinter(raw_ostream &O, SPUTargetMachine &TM, const TargetAsmInfo *T, CodeGenOpt::Level F, bool V) - : SPUAsmPrinter(O, TM, T, F, V), DW(0), MMI(0) {} + : SPUAsmPrinter(O, TM, T, F, V), DW(0) {} virtual const char *getPassName() const { return "STI CBEA SPU Assembly Printer"; @@ -490,12 +489,8 @@ LinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) bool LinuxAsmPrinter::doInitialization(Module &M) { bool Result = AsmPrinter::doInitialization(M); - SwitchToTextSection("\t.text"); - // Emit initial debug information. DW = getAnalysisIfAvailable<DwarfWriter>(); - assert(DW && "Dwarf Writer is not available"); - MMI = getAnalysisIfAvailable<MachineModuleInfo>(); - DW->BeginModule(&M, MMI, O, this, TAI); + SwitchToTextSection("\t.text"); return Result; } @@ -621,3 +616,17 @@ FunctionPass *llvm::createSPUAsmPrinterPass(raw_ostream &o, bool verbose) { return new LinuxAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose); } + +// Force static initialization when called from +// llvm/InitializeAllAsmPrinters.h +namespace llvm { + void InitializeCellSPUAsmPrinter() { } +} + +namespace { + static struct Register { + Register() { + SPUTargetMachine::registerAsmPrinter(createSPUAsmPrinterPass); + } + } Registrator; +} diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index 864a914..b286443 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -249,6 +249,25 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::MUL, MVT::i32, Legal); setOperationAction(ISD::MUL, MVT::i64, Legal); + // Expand double-width multiplication + // FIXME: It would probably be reasonable to support some of these operations + setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand); + setOperationAction(ISD::MULHU, MVT::i8, Expand); + setOperationAction(ISD::MULHS, MVT::i8, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand); + setOperationAction(ISD::MULHU, MVT::i16, Expand); + setOperationAction(ISD::MULHS, MVT::i16, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::MULHU, MVT::i32, Expand); + setOperationAction(ISD::MULHS, MVT::i32, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::MULHU, MVT::i64, Expand); + setOperationAction(ISD::MULHS, MVT::i64, Expand); + // Need to custom handle (some) common i8, i64 math ops setOperationAction(ISD::ADD, MVT::i8, Custom); setOperationAction(ISD::ADD, MVT::i64, Legal); diff --git a/lib/Target/CellSPU/SPUTargetAsmInfo.cpp b/lib/Target/CellSPU/SPUTargetAsmInfo.cpp index ff88ed8..2868ff7 100644 --- a/lib/Target/CellSPU/SPUTargetAsmInfo.cpp +++ b/lib/Target/CellSPU/SPUTargetAsmInfo.cpp @@ -41,7 +41,6 @@ SPULinuxTargetAsmInfo::SPULinuxTargetAsmInfo(const SPUTargetMachine &TM) : SupportsDebugInformation = true; NeedsSet = true; - SupportsMacInfoSection = false; DwarfAbbrevSection = "\t.section .debug_abbrev,\"\",@progbits"; DwarfInfoSection = "\t.section .debug_info,\"\",@progbits"; DwarfLineSection = "\t.section .debug_line,\"\",@progbits"; @@ -52,7 +51,7 @@ SPULinuxTargetAsmInfo::SPULinuxTargetAsmInfo(const SPUTargetMachine &TM) : DwarfLocSection = "\t.section .debug_loc,\"\",@progbits"; DwarfARangesSection = "\t.section .debug_aranges,\"\",@progbits"; DwarfRangesSection = "\t.section .debug_ranges,\"\",@progbits"; - DwarfMacInfoSection = "\t.section .debug_macinfo,\"\",progbits"; + DwarfMacroInfoSection = 0; // macro info not supported. // Exception handling is not supported on CellSPU (think about it: you only // have 256K for code+data. Would you support exception handling?) diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp index 7fa9022..c675ebb 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.cpp +++ b/lib/Target/CellSPU/SPUTargetMachine.cpp @@ -23,20 +23,20 @@ using namespace llvm; -/// CellSPUTargetMachineModule - Note that this is used on hosts that -/// cannot link in a library unless there are references into the -/// library. In particular, it seems that it is not possible to get -/// things to work on Win32 without this. Though it is unused, do not -/// remove it. -extern "C" int CellSPUTargetMachineModule; -int CellSPUTargetMachineModule = 0; - namespace { // Register the targets RegisterTarget<SPUTargetMachine> CELLSPU("cellspu", "STI CBEA Cell SPU [experimental]"); } +// No assembler printer by default +SPUTargetMachine::AsmPrinterCtorFn SPUTargetMachine::AsmPrinterCtor = 0; + +// Force static initialization when called from llvm/InitializeAllTargets.h +namespace llvm { + void InitializeCellSPUTarget() { } +} + const std::pair<unsigned, int> * SPUFrameInfo::getCalleeSaveSpillSlots(unsigned &NumEntries) const { NumEntries = 1; @@ -93,6 +93,9 @@ bool SPUTargetMachine::addAssemblyEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, bool Verbose, raw_ostream &Out) { - PM.add(createSPUAsmPrinterPass(Out, *this, OptLevel, Verbose)); + // Output assembly language. + assert(AsmPrinterCtor && "AsmPrinter was not linked in"); + if (AsmPrinterCtor) + PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose)); return false; } diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h index cd39203..d8fe300 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.h +++ b/lib/Target/CellSPU/SPUTargetMachine.h @@ -35,10 +35,18 @@ class SPUTargetMachine : public LLVMTargetMachine { SPUFrameInfo FrameInfo; SPUTargetLowering TLInfo; InstrItineraryData InstrItins; - + protected: virtual const TargetAsmInfo *createTargetAsmInfo() const; - + + // To avoid having target depend on the asmprinter stuff libraries, asmprinter + // set this functions to ctor pointer at startup time if they are linked in. + typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o, + SPUTargetMachine &tm, + CodeGenOpt::Level OptLevel, + bool verbose); + static AsmPrinterCtorFn AsmPrinterCtor; + public: SPUTargetMachine(const Module &M, const std::string &FS); @@ -78,7 +86,7 @@ public: return &DataLayout; } - virtual const InstrItineraryData getInstrItineraryData() const { + virtual const InstrItineraryData getInstrItineraryData() const { return InstrItins; } @@ -88,6 +96,10 @@ public: virtual bool addAssemblyEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, bool Verbose, raw_ostream &Out); + + static void registerAsmPrinter(AsmPrinterCtorFn F) { + AsmPrinterCtor = F; + } }; } // end namespace llvm diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 04a6829..1feea96 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -82,6 +82,11 @@ int CppBackendTargetMachineModule = 0; // Register the target. static RegisterTarget<CPPTargetMachine> X("cpp", "C++ backend"); +// Force static initialization when called from llvm/InitializeAllTargets.h +namespace llvm { + void InitializeCppBackendTarget() { } +} + namespace { typedef std::vector<const Type*> TypeList; typedef std::map<const Type*,std::string> TypeMap; diff --git a/lib/Target/DarwinTargetAsmInfo.cpp b/lib/Target/DarwinTargetAsmInfo.cpp index 05d2351..d7d675a 100644 --- a/lib/Target/DarwinTargetAsmInfo.cpp +++ b/lib/Target/DarwinTargetAsmInfo.cpp @@ -50,6 +50,53 @@ DarwinTargetAsmInfo::DarwinTargetAsmInfo(const TargetMachine &TM) ConstDataSection = getUnnamedSection(".const_data", SectionFlags::None); DataCoalSection = getNamedSection("\t__DATA,__datacoal_nt,coalesced", SectionFlags::Writeable); + + + // Common settings for all Darwin targets. + // Syntax: + GlobalPrefix = "_"; + PrivateGlobalPrefix = "L"; + LessPrivateGlobalPrefix = "l"; // Marker for some ObjC metadata + StringConstantPrefix = "\1LC"; + NeedsSet = true; + NeedsIndirectEncoding = true; + AllowQuotesInName = true; + HasSingleParameterDotFile = false; + + // In non-PIC modes, emit a special label before jump tables so that the + // linker can perform more accurate dead code stripping. We do not check the + // relocation model here since it can be overridden later. + JumpTableSpecialLabelPrefix = "l"; + + // Directives: + WeakDefDirective = "\t.weak_definition "; + WeakRefDirective = "\t.weak_reference "; + HiddenDirective = "\t.private_extern "; + + // Sections: + CStringSection = "\t.cstring"; + JumpTableDataSection = "\t.const\n"; + BSSSection = 0; + + if (TM.getRelocationModel() == Reloc::Static) { + StaticCtorsSection = ".constructor"; + StaticDtorsSection = ".destructor"; + } else { + StaticCtorsSection = ".mod_init_func"; + StaticDtorsSection = ".mod_term_func"; + } + + DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug"; + DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug"; + DwarfLineSection = ".section __DWARF,__debug_line,regular,debug"; + DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug"; + DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug"; + DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug"; + DwarfStrSection = ".section __DWARF,__debug_str,regular,debug"; + DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug"; + DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug"; + DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug"; + DwarfMacroInfoSection = ".section __DWARF,__debug_macinfo,regular,debug"; } /// emitUsedDirectiveFor - On Darwin, internally linked data beginning with diff --git a/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp b/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp index fc54e23..662c667 100644 --- a/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp +++ b/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp @@ -374,3 +374,18 @@ FunctionPass *llvm::createIA64CodePrinterPass(raw_ostream &o, bool verbose) { return new IA64AsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose); } + +namespace { + static struct Register { + Register() { + IA64TargetMachine::registerAsmPrinter(createIA64CodePrinterPass); + } + } Registrator; +} + + +// Force static initialization when called from +// llvm/InitializeAllAsmPrinters.h +namespace llvm { + void InitializeIA64AsmPrinter() { } +} diff --git a/lib/Target/IA64/IA64ISelLowering.cpp b/lib/Target/IA64/IA64ISelLowering.cpp index 34a0686..c545b9c 100644 --- a/lib/Target/IA64/IA64ISelLowering.cpp +++ b/lib/Target/IA64/IA64ISelLowering.cpp @@ -107,6 +107,10 @@ IA64TargetLowering::IA64TargetLowering(TargetMachine &TM) // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VAARG , MVT::Other, Custom); setOperationAction(ISD::VASTART , MVT::Other, Custom); + + // FIXME: These should be legal + setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand); + setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand); // Use the default implementation. setOperationAction(ISD::VACOPY , MVT::Other, Expand); diff --git a/lib/Target/IA64/IA64TargetMachine.cpp b/lib/Target/IA64/IA64TargetMachine.cpp index 878a00a..0b93ee5 100644 --- a/lib/Target/IA64/IA64TargetMachine.cpp +++ b/lib/Target/IA64/IA64TargetMachine.cpp @@ -19,16 +19,18 @@ #include "llvm/Target/TargetMachineRegistry.h" using namespace llvm; -/// IA64TargetMachineModule - Note that this is used on hosts that cannot link -/// in a library unless there are references into the library. In particular, -/// it seems that it is not possible to get things to work on Win32 without -/// this. Though it is unused, do not remove it. -extern "C" int IA64TargetMachineModule; -int IA64TargetMachineModule = 0; - -static RegisterTarget<IA64TargetMachine> X("ia64", +// Register the target +static RegisterTarget<IA64TargetMachine> X("ia64", "IA-64 (Itanium) [experimental]"); +// No assembler printer by default +IA64TargetMachine::AsmPrinterCtorFn IA64TargetMachine::AsmPrinterCtor = 0; + +// Force static initialization when called from llvm/InitializeAllTargets.h +namespace llvm { + void InitializeIA64Target() { } +} + const TargetAsmInfo *IA64TargetMachine::createTargetAsmInfo() const { return new IA64TargetAsmInfo(*this); } @@ -88,7 +90,10 @@ bool IA64TargetMachine::addAssemblyEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, bool Verbose, raw_ostream &Out) { - PM.add(createIA64CodePrinterPass(Out, *this, OptLevel, Verbose)); + // Output assembly language. + assert(AsmPrinterCtor && "AsmPrinter was not linked in"); + if (AsmPrinterCtor) + PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose)); return false; } diff --git a/lib/Target/IA64/IA64TargetMachine.h b/lib/Target/IA64/IA64TargetMachine.h index 29d625c..a64da9f 100644 --- a/lib/Target/IA64/IA64TargetMachine.h +++ b/lib/Target/IA64/IA64TargetMachine.h @@ -30,24 +30,32 @@ class IA64TargetMachine : public LLVMTargetMachine { TargetFrameInfo FrameInfo; //IA64JITInfo JITInfo; IA64TargetLowering TLInfo; - + protected: virtual const TargetAsmInfo *createTargetAsmInfo() const; + // To avoid having target depend on the asmprinter stuff libraries, asmprinter + // set this functions to ctor pointer at startup time if they are linked in. + typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o, + IA64TargetMachine &tm, + CodeGenOpt::Level OptLevel, + bool verbose); + static AsmPrinterCtorFn AsmPrinterCtor; + public: IA64TargetMachine(const Module &M, const std::string &FS); virtual const IA64InstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; } virtual const IA64Subtarget *getSubtargetImpl() const { return &Subtarget; } - virtual IA64TargetLowering *getTargetLowering() const { + virtual IA64TargetLowering *getTargetLowering() const { return const_cast<IA64TargetLowering*>(&TLInfo); } virtual const IA64RegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); } virtual const TargetData *getTargetData() const { return &DataLayout; } - + static unsigned getModuleMatchQuality(const Module &M); // Pass Pipeline Configuration @@ -56,6 +64,10 @@ public: virtual bool addAssemblyEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, bool Verbose, raw_ostream &Out); + + static void registerAsmPrinter(AsmPrinterCtorFn F) { + AsmPrinterCtor = F; + } }; } // End llvm namespace diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp index 37e5b1e..0aff14f 100644 --- a/lib/Target/MSIL/MSILWriter.cpp +++ b/lib/Target/MSIL/MSILWriter.cpp @@ -55,6 +55,11 @@ int MSILTargetMachineModule = 0; static RegisterTarget<MSILTarget> X("msil", "MSIL backend"); +// Force static initialization when called from llvm/InitializeAllTargets.h +namespace llvm { + void InitializeMSILTarget() { } +} + bool MSILModule::runOnModule(Module &M) { ModulePtr = &M; TD = &getAnalysis<TargetData>(); diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp index 7886946..0f5244d 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -35,6 +35,11 @@ int MSP430TargetMachineModule = 0; static RegisterTarget<MSP430TargetMachine> X("msp430", "MSP430 [experimental]"); +// Force static initialization when called from llvm/InitializeAllTargets.h +namespace llvm { + void InitializeMSP430Target() { } +} + MSP430TargetMachine::MSP430TargetMachine(const Module &M, const std::string &FS) : Subtarget(*this, M, FS), diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp index dfb6238..077ec96 100644 --- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp @@ -578,3 +578,17 @@ doFinalization(Module &M) return AsmPrinter::doFinalization(M); } + +namespace { + static struct Register { + Register() { + MipsTargetMachine::registerAsmPrinter(createMipsCodePrinterPass); + } + } Registrator; +} + +// Force static initialization when called from +// llvm/InitializeAllAsmPrinters.h +namespace llvm { + void InitializeMipsAsmPrinter() { } +} diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 4517cfc..42afceb 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -95,6 +95,7 @@ MipsTargetLowering(MipsTargetMachine &TM): TargetLowering(TM) setOperationAction(ISD::JumpTable, MVT::i32, Custom); setOperationAction(ISD::ConstantPool, MVT::i32, Custom); setOperationAction(ISD::SELECT, MVT::f32, Custom); + setOperationAction(ISD::SELECT, MVT::f64, Custom); setOperationAction(ISD::SELECT, MVT::i32, Custom); setOperationAction(ISD::SETCC, MVT::f32, Custom); setOperationAction(ISD::SETCC, MVT::f64, Custom); @@ -122,6 +123,7 @@ MipsTargetLowering(MipsTargetMachine &TM): TargetLowering(TM) setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); // We don't have line number support yet. setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index ef524e3..83b9b62 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -31,6 +31,14 @@ int MipsTargetMachineModule = 0; static RegisterTarget<MipsTargetMachine> X("mips", "Mips"); static RegisterTarget<MipselTargetMachine> Y("mipsel", "Mipsel"); +MipsTargetMachine::AsmPrinterCtorFn MipsTargetMachine::AsmPrinterCtor = 0; + + +// Force static initialization when called from llvm/InitializeAllTargets.h +namespace llvm { + void InitializeMipsTarget() { } +} + const TargetAsmInfo *MipsTargetMachine:: createTargetAsmInfo() const { @@ -125,9 +133,9 @@ addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) // true if AssemblyEmitter is supported bool MipsTargetMachine:: addAssemblyEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool Verbose, raw_ostream &Out) -{ + bool Verbose, raw_ostream &Out) { // Output assembly language. - PM.add(createMipsCodePrinterPass(Out, *this, OptLevel, Verbose)); + assert(AsmPrinterCtor && "AsmPrinter was not linked in"); + PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose)); return false; } diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index a9e1df2..85fafad 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -33,10 +33,23 @@ namespace llvm { protected: virtual const TargetAsmInfo *createTargetAsmInfo() const; - + protected: + // To avoid having target depend on the asmprinter stuff libraries, + // asmprinter set this functions to ctor pointer at startup time if they are + // linked in. + typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o, + MipsTargetMachine &tm, + CodeGenOpt::Level OptLevel, + bool verbose); + static AsmPrinterCtorFn AsmPrinterCtor; + public: MipsTargetMachine(const Module &M, const std::string &FS, bool isLittle); + static void registerAsmPrinter(AsmPrinterCtorFn F) { + AsmPrinterCtor = F; + } + virtual const MipsInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const TargetFrameInfo *getFrameInfo() const diff --git a/lib/Target/PIC16/PIC16AsmPrinter.cpp b/lib/Target/PIC16/PIC16AsmPrinter.cpp index f9a8801..ca1089b 100644 --- a/lib/Target/PIC16/PIC16AsmPrinter.cpp +++ b/lib/Target/PIC16/PIC16AsmPrinter.cpp @@ -48,27 +48,10 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) { const Function *F = MF.getFunction(); CurrentFnName = Mang->getValueName(F); - // Iterate over the first basic block instructions to find if it has a - // DebugLoc. If so emit .file directive. Instructions such as movlw do not - // have valid DebugLoc, so need to iterate over instructions. - MachineFunction::const_iterator I = MF.begin(); - for (MachineBasicBlock::const_iterator MBBI = I->begin(), E = I->end(); - MBBI != E; MBBI++) { - const DebugLoc DLoc = MBBI->getDebugLoc(); - if (!DLoc.isUnknown()) { - GlobalVariable *CU = MF.getDebugLocTuple(DLoc).CompileUnit; - unsigned line = MF.getDebugLocTuple(DLoc).Line; - DbgInfo.EmitFileDirective(CU); - DbgInfo.SetFunctBeginLine(line); - break; - } - } - // Emit the function frame (args and temps). EmitFunctionFrame(MF); - // Emit function begin debug directive. - DbgInfo.EmitFunctBeginDI(F); + DbgInfo.BeginFunction(MF); // Emit the autos section of function. EmitAutos(CurrentFnName); @@ -89,9 +72,7 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) { // Emit function start label. O << CurrentFnName << ":\n"; - // For emitting line directives, we need to keep track of the current - // source line. When it changes then only emit the line directive. - unsigned CurLine = 0; + DebugLoc CurDL; O << "\n"; // Print out code for the function. for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); @@ -109,12 +90,9 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) { // Emit the line directive if source line changed. const DebugLoc DL = II->getDebugLoc(); - if (!DL.isUnknown()) { - unsigned line = MF.getDebugLocTuple(DL).Line; - if (line != CurLine) { - O << "\t.line " << line << "\n"; - CurLine = line; - } + if (!DL.isUnknown() && DL != CurDL) { + DbgInfo.ChangeDebugLoc(MF, DL); + CurDL = DL; } // Print the assembly for the instruction. @@ -123,7 +101,7 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) { } // Emit function end debug directives. - DbgInfo.EmitFunctEndDI(F, CurLine); + DbgInfo.EndFunction(MF); return false; // we didn't modify anything. } @@ -226,7 +204,7 @@ bool PIC16AsmPrinter::doInitialization (Module &M) { I->setSection(TAI->SectionForGlobal(I)->getName()); } - DbgInfo.Init(M); + DbgInfo.BeginModule(M); EmitFunctionDecls(M); EmitUndefinedVars(M); EmitDefinedVars(M); @@ -313,8 +291,7 @@ void PIC16AsmPrinter::EmitRomData (Module &M) bool PIC16AsmPrinter::doFinalization(Module &M) { printLibcallDecls(); EmitRemainingAutos(); - DbgInfo.EmitVarDebugInfo(M); - DbgInfo.EmitEOF(); + DbgInfo.EndModule(M); O << "\n\t" << "END\n"; bool Result = AsmPrinter::doFinalization(M); return Result; diff --git a/lib/Target/PIC16/PIC16AsmPrinter.h b/lib/Target/PIC16/PIC16AsmPrinter.h index 8bdcf72..3ec5659 100644 --- a/lib/Target/PIC16/PIC16AsmPrinter.h +++ b/lib/Target/PIC16/PIC16AsmPrinter.h @@ -32,7 +32,7 @@ namespace llvm { explicit PIC16AsmPrinter(raw_ostream &O, PIC16TargetMachine &TM, const TargetAsmInfo *T, CodeGenOpt::Level OL, bool V) - : AsmPrinter(O, TM, T, OL, V), DbgInfo(O,T) { + : AsmPrinter(O, TM, T, OL, V), DbgInfo(O, T) { PTLI = TM.getTargetLowering(); PTAI = static_cast<const PIC16TargetAsmInfo *> (T); } diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp index d7ebea7..27551cd 100644 --- a/lib/Target/PIC16/PIC16DebugInfo.cpp +++ b/lib/Target/PIC16/PIC16DebugInfo.cpp @@ -14,91 +14,23 @@ #include "PIC16.h" #include "PIC16DebugInfo.h" #include "llvm/GlobalVariable.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Support/DebugLoc.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -void PIC16DbgInfo::PopulateDebugInfo(DIType Ty, unsigned short &TypeNo, - bool &HasAux, int Aux[], - std::string &TypeName) { - if (Ty.isBasicType(Ty.getTag())) { - std::string Name = ""; - Ty.getName(Name); - unsigned short BaseTy = GetTypeDebugNumber(Name); - TypeNo = TypeNo << PIC16Dbg::S_BASIC; - TypeNo = TypeNo | (0xffff & BaseTy); - } - else if (Ty.isDerivedType(Ty.getTag())) { - switch(Ty.getTag()) - { - case dwarf::DW_TAG_pointer_type: - TypeNo = TypeNo << PIC16Dbg::S_DERIVED; - TypeNo = TypeNo | PIC16Dbg::DT_PTR; - break; - default: - TypeNo = TypeNo << PIC16Dbg::S_DERIVED; - } - DIType BaseType = DIDerivedType(Ty.getGV()).getTypeDerivedFrom(); - PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TypeName); - } - else if (Ty.isCompositeType(Ty.getTag())) { - switch (Ty.getTag()) { - case dwarf::DW_TAG_array_type: { - DICompositeType CTy = DICompositeType(Ty.getGV()); - DIArray Elements = CTy.getTypeArray(); - unsigned short size = 1; - unsigned short Dimension[4]={0,0,0,0}; - for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { - DIDescriptor Element = Elements.getElement(i); - if (Element.getTag() == dwarf::DW_TAG_subrange_type) { - TypeNo = TypeNo << PIC16Dbg::S_DERIVED; - TypeNo = TypeNo | PIC16Dbg::DT_ARY; - DISubrange SubRange = DISubrange(Element.getGV()); - Dimension[i] = SubRange.getHi() - SubRange.getLo() + 1; - // Each dimension is represented by 2 bytes starting at byte 9. - Aux[8+i*2+0] = Dimension[i]; - Aux[8+i*2+1] = Dimension[i] >> 8; - size = size * Dimension[i]; - } - } - HasAux = true; - // In auxillary entry for array, 7th and 8th byte represent array size. - Aux[6] = size & 0xff; - Aux[7] = size >> 8; - DIType BaseType = CTy.getTypeDerivedFrom(); - PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TypeName); - - break; - } - case dwarf:: DW_TAG_union_type: - case dwarf::DW_TAG_structure_type: { - DICompositeType CTy = DICompositeType(Ty.getGV()); - TypeNo = TypeNo << PIC16Dbg::S_BASIC; - if (Ty.getTag() == dwarf::DW_TAG_structure_type) - TypeNo = TypeNo | PIC16Dbg::T_STRUCT; - else - TypeNo = TypeNo | PIC16Dbg::T_UNION; - CTy.getName(TypeName); - // UniqueSuffix is .number where number is obtained from - // llvm.dbg.composite<number>. - std::string UniqueSuffix = "." + Ty.getGV()->getName().substr(18); - TypeName += UniqueSuffix; - unsigned short size = CTy.getSizeInBits()/8; - // 7th and 8th byte represent size. - HasAux = true; - Aux[6] = size & 0xff; - Aux[7] = size >> 8; - break; - } - case dwarf::DW_TAG_enumeration_type: { - TypeNo = TypeNo << PIC16Dbg::S_BASIC; - TypeNo = TypeNo | PIC16Dbg::T_ENUM; - break; - } - default: - TypeNo = TypeNo << PIC16Dbg::S_DERIVED; - } - } +/// PopulateDebugInfo - Populate the TypeNo, Aux[] and TagName from Ty. +/// +void PIC16DbgInfo::PopulateDebugInfo (DIType Ty, unsigned short &TypeNo, + bool &HasAux, int Aux[], + std::string &TagName) { + if (Ty.isBasicType(Ty.getTag())) + PopulateBasicTypeInfo (Ty, TypeNo); + else if (Ty.isDerivedType(Ty.getTag())) + PopulateDerivedTypeInfo (Ty, TypeNo, HasAux, Aux, TagName); + else if (Ty.isCompositeType(Ty.getTag())) + PopulateCompositeTypeInfo (Ty, TypeNo, HasAux, Aux, TagName); else { TypeNo = PIC16Dbg::T_NULL; HasAux = false; @@ -106,7 +38,127 @@ void PIC16DbgInfo::PopulateDebugInfo(DIType Ty, unsigned short &TypeNo, return; } +/// PopulateBasicTypeInfo- Populate TypeNo for basic type from Ty. +/// +void PIC16DbgInfo::PopulateBasicTypeInfo (DIType Ty, unsigned short &TypeNo) { + std::string Name = ""; + Ty.getName(Name); + unsigned short BaseTy = GetTypeDebugNumber(Name); + TypeNo = TypeNo << PIC16Dbg::S_BASIC; + TypeNo = TypeNo | (0xffff & BaseTy); +} + +/// PopulateDerivedTypeInfo - Populate TypeNo, Aux[], TagName for derived type +/// from Ty. Derived types are mostly pointers. +/// +void PIC16DbgInfo::PopulateDerivedTypeInfo (DIType Ty, unsigned short &TypeNo, + bool &HasAux, int Aux[], + std::string &TagName) { + + switch(Ty.getTag()) + { + case dwarf::DW_TAG_pointer_type: + TypeNo = TypeNo << PIC16Dbg::S_DERIVED; + TypeNo = TypeNo | PIC16Dbg::DT_PTR; + break; + default: + TypeNo = TypeNo << PIC16Dbg::S_DERIVED; + } + + // We also need to encode the the information about the base type of + // pointer in TypeNo. + DIType BaseType = DIDerivedType(Ty.getGV()).getTypeDerivedFrom(); + PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TagName); +} + +/// PopulateArrayTypeInfo - Populate TypeNo, Aux[] for array from Ty. +void PIC16DbgInfo::PopulateArrayTypeInfo (DIType Ty, unsigned short &TypeNo, + bool &HasAux, int Aux[], + std::string &TagName) { + DICompositeType CTy = DICompositeType(Ty.getGV()); + DIArray Elements = CTy.getTypeArray(); + unsigned short size = 1; + unsigned short Dimension[4]={0,0,0,0}; + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { + DIDescriptor Element = Elements.getElement(i); + if (Element.getTag() == dwarf::DW_TAG_subrange_type) { + TypeNo = TypeNo << PIC16Dbg::S_DERIVED; + TypeNo = TypeNo | PIC16Dbg::DT_ARY; + DISubrange SubRange = DISubrange(Element.getGV()); + Dimension[i] = SubRange.getHi() - SubRange.getLo() + 1; + // Each dimension is represented by 2 bytes starting at byte 9. + Aux[8+i*2+0] = Dimension[i]; + Aux[8+i*2+1] = Dimension[i] >> 8; + size = size * Dimension[i]; + } + } + HasAux = true; + // In auxillary entry for array, 7th and 8th byte represent array size. + Aux[6] = size & 0xff; + Aux[7] = size >> 8; + DIType BaseType = CTy.getTypeDerivedFrom(); + PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TagName); +} + +/// PopulateStructOrUnionTypeInfo - Populate TypeNo, Aux[] , TagName for +/// structure or union. +/// +void PIC16DbgInfo::PopulateStructOrUnionTypeInfo (DIType Ty, + unsigned short &TypeNo, + bool &HasAux, int Aux[], + std::string &TagName) { + DICompositeType CTy = DICompositeType(Ty.getGV()); + TypeNo = TypeNo << PIC16Dbg::S_BASIC; + if (Ty.getTag() == dwarf::DW_TAG_structure_type) + TypeNo = TypeNo | PIC16Dbg::T_STRUCT; + else + TypeNo = TypeNo | PIC16Dbg::T_UNION; + CTy.getName(TagName); + // UniqueSuffix is .number where number is obtained from + // llvm.dbg.composite<number>. + std::string UniqueSuffix = "." + Ty.getGV()->getName().substr(18); + TagName += UniqueSuffix; + unsigned short size = CTy.getSizeInBits()/8; + // 7th and 8th byte represent size. + HasAux = true; + Aux[6] = size & 0xff; + Aux[7] = size >> 8; +} + +/// PopulateEnumTypeInfo - Populate TypeNo for enum from Ty. +void PIC16DbgInfo::PopulateEnumTypeInfo (DIType Ty, unsigned short &TypeNo) { + TypeNo = TypeNo << PIC16Dbg::S_BASIC; + TypeNo = TypeNo | PIC16Dbg::T_ENUM; +} + +/// PopulateCompositeTypeInfo - Populate TypeNo, Aux[] and TagName for +/// composite types from Ty. +/// +void PIC16DbgInfo::PopulateCompositeTypeInfo (DIType Ty, unsigned short &TypeNo, + bool &HasAux, int Aux[], + std::string &TagName) { + switch (Ty.getTag()) { + case dwarf::DW_TAG_array_type: { + PopulateArrayTypeInfo (Ty, TypeNo, HasAux, Aux, TagName); + break; + } + case dwarf:: DW_TAG_union_type: + case dwarf::DW_TAG_structure_type: { + PopulateStructOrUnionTypeInfo (Ty, TypeNo, HasAux, Aux, TagName); + break; + } + case dwarf::DW_TAG_enumeration_type: { + PopulateEnumTypeInfo (Ty, TypeNo); + break; + } + default: + TypeNo = TypeNo << PIC16Dbg::S_DERIVED; + } +} + +/// GetTypeDebugNumber - Get debug type number for given type. +/// unsigned PIC16DbgInfo::GetTypeDebugNumber(std::string &type) { if (type == "char") return PIC16Dbg::T_CHAR; @@ -127,8 +179,10 @@ unsigned PIC16DbgInfo::GetTypeDebugNumber(std::string &type) { else return 0; } - -short PIC16DbgInfo::getClass(DIGlobalVariable DIGV) { + +/// GetStorageClass - Get storage class for give debug variable. +/// +short PIC16DbgInfo::getStorageClass(DIGlobalVariable DIGV) { short ClassNo; if (PAN::isLocalName(DIGV.getGlobal()->getName())) { // Generating C_AUTO here fails due to error in linker. Change it once @@ -142,12 +196,126 @@ short PIC16DbgInfo::getClass(DIGlobalVariable DIGV) { return ClassNo; } -void PIC16DbgInfo::Init(Module &M) { - // Do all debug related initializations here. - EmitFileDirective(M); +/// BeginModule - Emit necessary debug info to start a Module and do other +/// required initializations. +void PIC16DbgInfo::BeginModule(Module &M) { + // Emit file directive for module. + GlobalVariable *CU = M.getNamedGlobal("llvm.dbg.compile_unit"); + if (CU) { + EmitDebugDirectives = true; + SwitchToCU(CU); + } + + // Emit debug info for decls of composite types. EmitCompositeTypeDecls(M); } +/// Helper to find first valid debug loc for a function. +/// +static const DebugLoc GetDebugLocForFunction(const MachineFunction &MF) { + DebugLoc DL; + for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); + II != E; ++II) { + DL = II->getDebugLoc(); + if (!DL.isUnknown()) + return DL; + } + } + return DL; +} + +/// BeginFunction - Emit necessary debug info to start a function. +/// +void PIC16DbgInfo::BeginFunction(const MachineFunction &MF) { + if (! EmitDebugDirectives) return; + + // Retreive the first valid debug Loc and process it. + const DebugLoc &DL = GetDebugLocForFunction(MF); + ChangeDebugLoc(MF, DL, true); + + EmitFunctBeginDI(MF.getFunction()); + + // Set current line to 0 so that.line directive is genearted after .bf. + CurLine = 0; +} + +/// ChangeDebugLoc - Take necessary steps when DebugLoc changes. +/// CurFile and CurLine may change as a result of this. +/// +void PIC16DbgInfo::ChangeDebugLoc(const MachineFunction &MF, + const DebugLoc &DL, bool IsInBeginFunction) { + if (! EmitDebugDirectives) return; + assert (! DL.isUnknown() && "can't change to invalid debug loc"); + + GlobalVariable *CU = MF.getDebugLocTuple(DL).CompileUnit; + unsigned line = MF.getDebugLocTuple(DL).Line; + + SwitchToCU(CU); + SwitchToLine(line, IsInBeginFunction); +} + +/// SwitchToLine - Emit line directive for a new line. +/// +void PIC16DbgInfo::SwitchToLine(unsigned Line, bool IsInBeginFunction) { + if (CurLine == Line) return; + if (!IsInBeginFunction) O << "\n\t.line " << Line << "\n"; + CurLine = Line; +} + +/// EndFunction - Emit .ef for end of function. +/// +void PIC16DbgInfo::EndFunction(const MachineFunction &MF) { + if (! EmitDebugDirectives) return; + EmitFunctEndDI(MF.getFunction(), CurLine); +} + +/// EndModule - Emit .eof for end of module. +/// +void PIC16DbgInfo::EndModule(Module &M) { + if (! EmitDebugDirectives) return; + EmitVarDebugInfo(M); + if (CurFile != "") O << "\n\t.eof"; +} + +/// EmitCompositeTypeElements - Emit debug information for members of a +/// composite type. +/// +void PIC16DbgInfo::EmitCompositeTypeElements (DICompositeType CTy, + std::string UniqueSuffix) { + unsigned long Value = 0; + DIArray Elements = CTy.getTypeArray(); + for (unsigned i = 0, N = Elements.getNumElements(); i < N; i++) { + DIDescriptor Element = Elements.getElement(i); + unsigned short TypeNo = 0; + bool HasAux = false; + int ElementAux[PIC16Dbg::AuxSize] = { 0 }; + std::string TagName = ""; + std::string ElementName; + GlobalVariable *GV = Element.getGV(); + DIDerivedType DITy(GV); + DITy.getName(ElementName); + unsigned short ElementSize = DITy.getSizeInBits()/8; + // Get mangleddd name for this structure/union element. + std::string MangMemName = ElementName + UniqueSuffix; + PopulateDebugInfo(DITy, TypeNo, HasAux, ElementAux, TagName); + short Class; + if( CTy.getTag() == dwarf::DW_TAG_union_type) + Class = PIC16Dbg::C_MOU; + else if (CTy.getTag() == dwarf::DW_TAG_structure_type) + Class = PIC16Dbg::C_MOS; + EmitSymbol(MangMemName, Class, TypeNo, Value); + if (CTy.getTag() == dwarf::DW_TAG_structure_type) + Value += ElementSize; + if (HasAux) + EmitAuxEntry(MangMemName, ElementAux, PIC16Dbg::AuxSize, TagName); + } +} + +/// EmitCompositeTypeDecls - Emit composite type declarations like structure +/// and union declarations. +/// void PIC16DbgInfo::EmitCompositeTypeDecls(Module &M) { for(iplist<GlobalVariable>::iterator I = M.getGlobalList().begin(), E = M.getGlobalList().end(); I != E; I++) { @@ -178,33 +346,10 @@ void PIC16DbgInfo::EmitCompositeTypeDecls(Module &M) { // Emit auxiliary debug information for structure/union tag. EmitAuxEntry(MangledCTyName, Aux, PIC16Dbg::AuxSize); - unsigned long Value = 0; - DIArray Elements = CTy.getTypeArray(); - for (unsigned i = 0, N = Elements.getNumElements(); i < N; i++) { - DIDescriptor Element = Elements.getElement(i); - unsigned short TypeNo = 0; - bool HasAux = false; - int ElementAux[PIC16Dbg::AuxSize] = { 0 }; - std::string TypeName = ""; - std::string ElementName; - GlobalVariable *GV = Element.getGV(); - DIDerivedType DITy(GV); - DITy.getName(ElementName); - unsigned short ElementSize = DITy.getSizeInBits()/8; - // Get mangleddd name for this structure/union element. - std::string MangMemName = ElementName + UniqueSuffix; - PopulateDebugInfo(DITy, TypeNo, HasAux, ElementAux, TypeName); - short Class; - if( CTy.getTag() == dwarf::DW_TAG_union_type) - Class = PIC16Dbg::C_MOU; - else if (CTy.getTag() == dwarf::DW_TAG_structure_type) - Class = PIC16Dbg::C_MOS; - EmitSymbol(MangMemName, Class, TypeNo, Value); - if (CTy.getTag() == dwarf::DW_TAG_structure_type) - Value += ElementSize; - if (HasAux) - EmitAuxEntry(MangMemName, ElementAux, PIC16Dbg::AuxSize, TypeName); - } + + // Emit members. + EmitCompositeTypeElements (CTy, UniqueSuffix); + // Emit mangled Symbol for end of structure/union. std::string EOSSymbol = ".eos" + UniqueSuffix; EmitSymbol(EOSSymbol, PIC16Dbg::C_EOS); @@ -214,6 +359,8 @@ void PIC16DbgInfo::EmitCompositeTypeDecls(Module &M) { } } +/// EmitFunctBeginDI - Emit .bf for function. +/// void PIC16DbgInfo::EmitFunctBeginDI(const Function *F) { std::string FunctName = F->getName(); if (EmitDebugDirectives) { @@ -221,16 +368,20 @@ void PIC16DbgInfo::EmitFunctBeginDI(const Function *F) { std::string BlockBeginSym = ".bb." + FunctName; int BFAux[PIC16Dbg::AuxSize] = {0}; - BFAux[4] = FunctBeginLine; - BFAux[5] = FunctBeginLine >> 8; + BFAux[4] = CurLine; + BFAux[5] = CurLine >> 8; + // Emit debug directives for beginning of function. EmitSymbol(FunctBeginSym, PIC16Dbg::C_FCN); EmitAuxEntry(FunctBeginSym, BFAux, PIC16Dbg::AuxSize); + EmitSymbol(BlockBeginSym, PIC16Dbg::C_BLOCK); EmitAuxEntry(BlockBeginSym, BFAux, PIC16Dbg::AuxSize); } } +/// EmitFunctEndDI - Emit .ef for function end. +/// void PIC16DbgInfo::EmitFunctEndDI(const Function *F, unsigned Line) { std::string FunctName = F->getName(); if (EmitDebugDirectives) { @@ -241,8 +392,8 @@ void PIC16DbgInfo::EmitFunctEndDI(const Function *F, unsigned Line) { EmitSymbol(BlockEndSym, PIC16Dbg::C_BLOCK); int EFAux[PIC16Dbg::AuxSize] = {0}; // 5th and 6th byte stand for line number. - EFAux[4] = Line; - EFAux[5] = Line >> 8; + EFAux[4] = CurLine; + EFAux[5] = CurLine >> 8; EmitAuxEntry(BlockEndSym, EFAux, PIC16Dbg::AuxSize); EmitSymbol(FunctEndSym, PIC16Dbg::C_FCN); EmitAuxEntry(FunctEndSym, EFAux, PIC16Dbg::AuxSize); @@ -251,15 +402,18 @@ void PIC16DbgInfo::EmitFunctEndDI(const Function *F, unsigned Line) { /// EmitAuxEntry - Emit Auxiliary debug information. /// -void PIC16DbgInfo::EmitAuxEntry(const std::string VarName, int Aux[], int num, - std::string tag) { +void PIC16DbgInfo::EmitAuxEntry(const std::string VarName, int Aux[], int Num, + std::string TagName) { O << "\n\t.dim " << VarName << ", 1" ; - if (tag != "") - O << ", " << tag; - for (int i = 0; i<num; i++) + // TagName is emitted in case of structure/union objects. + if (TagName != "") + O << ", " << TagName; + for (int i = 0; i<Num; i++) O << "," << Aux[i]; } +/// EmitSymbol - Emit .def for a symbol. Value is offset for the member. +/// void PIC16DbgInfo::EmitSymbol(std::string Name, short Class, unsigned short Type, unsigned long Value) { O << "\n\t" << ".def "<< Name << ", type = " << Type << ", class = " @@ -268,6 +422,8 @@ void PIC16DbgInfo::EmitSymbol(std::string Name, short Class, unsigned short O << ", value = " << Value; } +/// EmitVarDebugInfo - Emit debug information for all variables. +/// void PIC16DbgInfo::EmitVarDebugInfo(Module &M) { GlobalVariable *Root = M.getGlobalVariable("llvm.dbg.global_variables"); if (!Root) @@ -283,47 +439,45 @@ void PIC16DbgInfo::EmitVarDebugInfo(Module &M) { unsigned short TypeNo = 0; bool HasAux = false; int Aux[PIC16Dbg::AuxSize] = { 0 }; - std::string TypeName = ""; + std::string TagName = ""; std::string VarName = TAI->getGlobalPrefix()+DIGV.getGlobal()->getName(); - PopulateDebugInfo(Ty, TypeNo, HasAux, Aux, TypeName); + PopulateDebugInfo(Ty, TypeNo, HasAux, Aux, TagName); // Emit debug info only if type information is availaible. if (TypeNo != PIC16Dbg::T_NULL) { O << "\n\t.type " << VarName << ", " << TypeNo; - short ClassNo = getClass(DIGV); + short ClassNo = getStorageClass(DIGV); O << "\n\t.class " << VarName << ", " << ClassNo; if (HasAux) - EmitAuxEntry(VarName, Aux, PIC16Dbg::AuxSize, TypeName); + EmitAuxEntry(VarName, Aux, PIC16Dbg::AuxSize, TagName); } } } O << "\n"; } -void PIC16DbgInfo::EmitFileDirective(Module &M) { - GlobalVariable *CU = M.getNamedGlobal("llvm.dbg.compile_unit"); - if (CU) { - EmitDebugDirectives = true; - EmitFileDirective(CU, false); - } -} +/// SwitchToCU - Switch to a new compilation unit. +/// +void PIC16DbgInfo::SwitchToCU(GlobalVariable *CU) { + // Get the file path from CU. + DICompileUnit cu(CU); + std::string DirName, FileName; + std::string FilePath = cu.getDirectory(DirName) + "/" + + cu.getFilename(FileName); -void PIC16DbgInfo::EmitFileDirective(GlobalVariable *CU, bool EmitEof) { - std::string Dir, FN; - DICompileUnit DIUnit(CU); - std::string File = DIUnit.getDirectory(Dir) + "/" + DIUnit.getFilename(FN); - if ( File != CurFile ) { - if (EmitEof) - EmitEOF(); - O << "\n\t.file\t\"" << File << "\"\n" ; - CurFile = File; - } + // Nothing to do if source file is still same. + if ( FilePath == CurFile ) return; + + // Else, close the current one and start a new. + if (CurFile != "") O << "\n\t.eof"; + O << "\n\t.file\t\"" << FilePath << "\"\n" ; + CurFile = FilePath; + CurLine = 0; } +/// EmitEOF - Emit .eof for end of file. +/// void PIC16DbgInfo::EmitEOF() { if (CurFile != "") O << "\n\t.EOF"; } -void PIC16DbgInfo::SetFunctBeginLine(unsigned line) { - FunctBeginLine = line; -} diff --git a/lib/Target/PIC16/PIC16DebugInfo.h b/lib/Target/PIC16/PIC16DebugInfo.h index 9d50380..d126d85 100644 --- a/lib/Target/PIC16/PIC16DebugInfo.h +++ b/lib/Target/PIC16/PIC16DebugInfo.h @@ -20,6 +20,8 @@ #include <map> namespace llvm { + class MachineFunction; + class DebugLoc; namespace PIC16Dbg { enum VarType { T_NULL, @@ -94,33 +96,64 @@ namespace llvm { raw_ostream &O; const TargetAsmInfo *TAI; std::string CurFile; + unsigned CurLine; + // EmitDebugDirectives is set if debug information is available. Default // value for it is false. bool EmitDebugDirectives; - unsigned FunctBeginLine; + public: PIC16DbgInfo(raw_ostream &o, const TargetAsmInfo *T) : O(o), TAI(T) { - CurFile = ""; + CurFile = ""; + CurLine = 0; EmitDebugDirectives = false; } - void PopulateDebugInfo(DIType Ty, unsigned short &TypeNo, bool &HasAux, + + void BeginModule (Module &M); + void BeginFunction (const MachineFunction &MF); + void ChangeDebugLoc (const MachineFunction &MF, const DebugLoc &DL, + bool IsInBeginFunction = false); + void EndFunction (const MachineFunction &MF); + void EndModule (Module &M); + + + private: + void SwitchToCU (GlobalVariable *CU); + void SwitchToLine (unsigned Line, bool IsInBeginFunction = false); + + void PopulateDebugInfo (DIType Ty, unsigned short &TypeNo, bool &HasAux, int Aux[], std::string &TypeName); - unsigned GetTypeDebugNumber(std::string &type); - short getClass(DIGlobalVariable DIGV); + void PopulateBasicTypeInfo (DIType Ty, unsigned short &TypeNo); + void PopulateDerivedTypeInfo (DIType Ty, unsigned short &TypeNo, + bool &HasAux, int Aux[], + std::string &TypeName); + + void PopulateCompositeTypeInfo (DIType Ty, unsigned short &TypeNo, + bool &HasAux, int Aux[], + std::string &TypeName); + void PopulateArrayTypeInfo (DIType Ty, unsigned short &TypeNo, + bool &HasAux, int Aux[], + std::string &TypeName); + + void PopulateStructOrUnionTypeInfo (DIType Ty, unsigned short &TypeNo, + bool &HasAux, int Aux[], + std::string &TypeName); + void PopulateEnumTypeInfo (DIType Ty, unsigned short &TypeNo); + + unsigned GetTypeDebugNumber(std::string &Type); + short getStorageClass(DIGlobalVariable DIGV); void EmitFunctBeginDI(const Function *F); - void Init(Module &M); void EmitCompositeTypeDecls(Module &M); + void EmitCompositeTypeElements (DICompositeType CTy, + std::string UniqueSuffix); void EmitFunctEndDI(const Function *F, unsigned Line); void EmitAuxEntry(const std::string VarName, int Aux[], - int num = PIC16Dbg::AuxSize, std::string tag = ""); + int num = PIC16Dbg::AuxSize, std::string TagName = ""); inline void EmitSymbol(std::string Name, short Class, unsigned short Type = PIC16Dbg::T_NULL, unsigned long Value = 0); void EmitVarDebugInfo(Module &M); - void EmitFileDirective(Module &M); - void EmitFileDirective(GlobalVariable *CU, bool EmitEof = true); void EmitEOF(); - void SetFunctBeginLine(unsigned line); }; } // end namespace llvm; #endif diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp index ba465f3..f113a48 100644 --- a/lib/Target/PIC16/PIC16ISelLowering.cpp +++ b/lib/Target/PIC16/PIC16ISelLowering.cpp @@ -31,42 +31,72 @@ static const char *getIntrinsicName(unsigned opcode) { std::string Basename; switch(opcode) { default: assert (0 && "do not know intrinsic name"); + // Arithmetic Right shift for integer types. case PIC16ISD::SRA_I8: Basename = "sra.i8"; break; case RTLIB::SRA_I16: Basename = "sra.i16"; break; case RTLIB::SRA_I32: Basename = "sra.i32"; break; + // Left shift for integer types. case PIC16ISD::SLL_I8: Basename = "sll.i8"; break; case RTLIB::SHL_I16: Basename = "sll.i16"; break; case RTLIB::SHL_I32: Basename = "sll.i32"; break; + // Logical Right Shift for integer types. case PIC16ISD::SRL_I8: Basename = "srl.i8"; break; case RTLIB::SRL_I16: Basename = "srl.i16"; break; case RTLIB::SRL_I32: Basename = "srl.i32"; break; + // Multiply for integer types. case PIC16ISD::MUL_I8: Basename = "mul.i8"; break; case RTLIB::MUL_I16: Basename = "mul.i16"; break; case RTLIB::MUL_I32: Basename = "mul.i32"; break; + // Signed division for integers. case RTLIB::SDIV_I16: Basename = "sdiv.i16"; break; case RTLIB::SDIV_I32: Basename = "sdiv.i32"; break; + + // Unsigned division for integers. case RTLIB::UDIV_I16: Basename = "udiv.i16"; break; case RTLIB::UDIV_I32: Basename = "udiv.i32"; break; + // Signed Modulas for integers. case RTLIB::SREM_I16: Basename = "srem.i16"; break; case RTLIB::SREM_I32: Basename = "srem.i32"; break; + + // Unsigned Modulas for integers. case RTLIB::UREM_I16: Basename = "urem.i16"; break; case RTLIB::UREM_I32: Basename = "urem.i32"; break; - case RTLIB::FPTOSINT_F32_I32: - Basename = "f32_to_si32"; break; - case RTLIB::SINTTOFP_I32_F32: - Basename = "si32_to_f32"; break; + ////////////////////// + // LIBCALLS FOR FLOATS + ////////////////////// + + // Float to signed integrals + case RTLIB::FPTOSINT_F32_I8: Basename = "f32_to_si32"; break; + case RTLIB::FPTOSINT_F32_I16: Basename = "f32_to_si32"; break; + case RTLIB::FPTOSINT_F32_I32: Basename = "f32_to_si32"; break; + + // Signed integrals to float. char and int are first sign extended to i32 + // before being converted to float, so an I8_F32 or I16_F32 isn't required. + case RTLIB::SINTTOFP_I32_F32: Basename = "si32_to_f32"; break; + + // Float to Unsigned conversions. + // Signed conversion can be used for unsigned conversion as well. + // In signed and unsigned versions only the interpretation of the + // MSB is different. Bit representation remains the same. + case RTLIB::FPTOUINT_F32_I8: Basename = "f32_to_si32"; break; + case RTLIB::FPTOUINT_F32_I16: Basename = "f32_to_si32"; break; + case RTLIB::FPTOUINT_F32_I32: Basename = "f32_to_si32"; break; + + // Unsigned to Float conversions. char and int are first zero extended + // before being converted to float. + case RTLIB::UINTTOFP_I32_F32: Basename = "ui32_to_f32"; break; + // Floating point add, sub, mul, div. case RTLIB::ADD_F32: Basename = "add.f32"; break; case RTLIB::SUB_F32: Basename = "sub.f32"; break; case RTLIB::MUL_F32: Basename = "mul.f32"; break; case RTLIB::DIV_F32: Basename = "div.f32"; break; - } std::string prefix = PAN::getTagName(PAN::PREFIX_SYMBOL); @@ -83,7 +113,7 @@ static const char *getIntrinsicName(unsigned opcode) { // PIC16TargetLowering Constructor. PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM) : TargetLowering(TM), TmpSize(0) { - + Subtarget = &TM.getSubtarget<PIC16Subtarget>(); addRegisterClass(MVT::i8, PIC16::GPRRegisterClass); @@ -114,6 +144,7 @@ PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM) // Signed division lib call names setLibcallName(RTLIB::SDIV_I16, getIntrinsicName(RTLIB::SDIV_I16)); setLibcallName(RTLIB::SDIV_I32, getIntrinsicName(RTLIB::SDIV_I32)); + // Unsigned division lib call names setLibcallName(RTLIB::UDIV_I16, getIntrinsicName(RTLIB::UDIV_I16)); setLibcallName(RTLIB::UDIV_I32, getIntrinsicName(RTLIB::UDIV_I32)); @@ -121,15 +152,36 @@ PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM) // Signed remainder lib call names setLibcallName(RTLIB::SREM_I16, getIntrinsicName(RTLIB::SREM_I16)); setLibcallName(RTLIB::SREM_I32, getIntrinsicName(RTLIB::SREM_I32)); + // Unsigned remainder lib call names setLibcallName(RTLIB::UREM_I16, getIntrinsicName(RTLIB::UREM_I16)); setLibcallName(RTLIB::UREM_I32, getIntrinsicName(RTLIB::UREM_I32)); - // Floating point operations + // Floating point to signed int conversions. + setLibcallName(RTLIB::FPTOSINT_F32_I8, + getIntrinsicName(RTLIB::FPTOSINT_F32_I8)); + setLibcallName(RTLIB::FPTOSINT_F32_I16, + getIntrinsicName(RTLIB::FPTOSINT_F32_I16)); setLibcallName(RTLIB::FPTOSINT_F32_I32, getIntrinsicName(RTLIB::FPTOSINT_F32_I32)); + + // Signed int to floats. setLibcallName(RTLIB::SINTTOFP_I32_F32, getIntrinsicName(RTLIB::SINTTOFP_I32_F32)); + + // Floating points to unsigned ints. + setLibcallName(RTLIB::FPTOUINT_F32_I8, + getIntrinsicName(RTLIB::FPTOUINT_F32_I8)); + setLibcallName(RTLIB::FPTOUINT_F32_I16, + getIntrinsicName(RTLIB::FPTOUINT_F32_I16)); + setLibcallName(RTLIB::FPTOUINT_F32_I32, + getIntrinsicName(RTLIB::FPTOUINT_F32_I32)); + + // Unsigned int to floats. + setLibcallName(RTLIB::UINTTOFP_I32_F32, + getIntrinsicName(RTLIB::UINTTOFP_I32_F32)); + + // Floating point add, sub, mul ,div. setLibcallName(RTLIB::ADD_F32, getIntrinsicName(RTLIB::ADD_F32)); setLibcallName(RTLIB::SUB_F32, getIntrinsicName(RTLIB::SUB_F32)); setLibcallName(RTLIB::MUL_F32, getIntrinsicName(RTLIB::MUL_F32)); diff --git a/lib/Target/PIC16/PIC16TargetMachine.cpp b/lib/Target/PIC16/PIC16TargetMachine.cpp index bda6326..d4f46a4 100644 --- a/lib/Target/PIC16/PIC16TargetMachine.cpp +++ b/lib/Target/PIC16/PIC16TargetMachine.cpp @@ -37,6 +37,11 @@ X("pic16", "PIC16 14-bit [experimental]."); static RegisterTarget<CooperTargetMachine> Y("cooper", "PIC16 Cooper [experimental]."); +// Force static initialization when called from llvm/InitializeAllTargets.h +namespace llvm { + void InitializePIC16Target() { } +} + // PIC16TargetMachine - Traditional PIC16 Machine. PIC16TargetMachine::PIC16TargetMachine(const Module &M, const std::string &FS, bool Cooper) diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp index 7723982..c7bfb6d 100644 --- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp @@ -646,19 +646,10 @@ bool PPCLinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) { bool PPCLinuxAsmPrinter::doInitialization(Module &M) { bool Result = AsmPrinter::doInitialization(M); - - // Emit initial debug information. + DW = getAnalysisIfAvailable<DwarfWriter>(); MMI = getAnalysisIfAvailable<MachineModuleInfo>(); assert(MMI); - DW = getAnalysisIfAvailable<DwarfWriter>(); - assert(DW && "DwarfWriter is not available"); - DW->BeginModule(&M, MMI, O, this, TAI); - - // GNU as handles section names wrapped in quotes - Mang->setUseQuotes(true); - SwitchToSection(TAI->getTextSection()); - return Result; } @@ -875,18 +866,9 @@ bool PPCDarwinAsmPrinter::doInitialization(Module &M) { O << "\t.machine " << CPUDirectives[Directive] << '\n'; bool Result = AsmPrinter::doInitialization(M); - - // Emit initial debug information. - // We need this for Personality functions. - // AsmPrinter::doInitialization should have done this analysis. + DW = getAnalysisIfAvailable<DwarfWriter>(); MMI = getAnalysisIfAvailable<MachineModuleInfo>(); assert(MMI); - DW = getAnalysisIfAvailable<DwarfWriter>(); - assert(DW && "DwarfWriter is not available"); - DW->BeginModule(&M, MMI, O, this, TAI); - - // Darwin wants symbols to be quoted if they have complex names. - Mang->setUseQuotes(true); // Prime text sections so they are adjacent. This reduces the likelihood a // large data or debug section causes a branch to exceed 16M limit. @@ -1202,3 +1184,9 @@ namespace { extern "C" int PowerPCAsmPrinterForceLink; int PowerPCAsmPrinterForceLink = 0; + +// Force static initialization when called from +// llvm/InitializeAllAsmPrinters.h +namespace llvm { + void InitializePowerPCAsmPrinter() { } +} diff --git a/lib/Target/PowerPC/PPCTargetAsmInfo.cpp b/lib/Target/PowerPC/PPCTargetAsmInfo.cpp index c69e591..ebffd69 100644 --- a/lib/Target/PowerPC/PPCTargetAsmInfo.cpp +++ b/lib/Target/PowerPC/PPCTargetAsmInfo.cpp @@ -19,59 +19,19 @@ using namespace llvm; using namespace llvm::dwarf; -PPCDarwinTargetAsmInfo::PPCDarwinTargetAsmInfo(const PPCTargetMachine &TM): +PPCDarwinTargetAsmInfo::PPCDarwinTargetAsmInfo(const PPCTargetMachine &TM) : PPCTargetAsmInfo<DarwinTargetAsmInfo>(TM) { PCSymbol = "."; CommentString = ";"; - GlobalPrefix = "_"; - PrivateGlobalPrefix = "L"; - LessPrivateGlobalPrefix = "l"; - StringConstantPrefix = "\1LC"; ConstantPoolSection = "\t.const\t"; - JumpTableDataSection = ".const"; - CStringSection = "\t.cstring"; - if (TM.getRelocationModel() == Reloc::Static) { - StaticCtorsSection = ".constructor"; - StaticDtorsSection = ".destructor"; - } else { - StaticCtorsSection = ".mod_init_func"; - StaticDtorsSection = ".mod_term_func"; - } - HasSingleParameterDotFile = false; - SwitchToSectionDirective = "\t.section "; UsedDirective = "\t.no_dead_strip\t"; - WeakDefDirective = "\t.weak_definition "; - WeakRefDirective = "\t.weak_reference "; - HiddenDirective = "\t.private_extern "; SupportsExceptionHandling = true; - NeedsIndirectEncoding = true; - NeedsSet = true; - BSSSection = 0; DwarfEHFrameSection = - ".section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support"; + ".section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support"; DwarfExceptionSection = ".section __DATA,__gcc_except_tab"; GlobalEHDirective = "\t.globl\t"; SupportsWeakOmittedEHFrame = false; - - DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug"; - DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug"; - DwarfLineSection = ".section __DWARF,__debug_line,regular,debug"; - DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug"; - DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug"; - DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug"; - DwarfStrSection = ".section __DWARF,__debug_str,regular,debug"; - DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug"; - DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug"; - DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug"; - DwarfMacInfoSection = ".section __DWARF,__debug_macinfo,regular,debug"; - - // In non-PIC modes, emit a special label before jump tables so that the - // linker can perform more accurate dead code stripping. - if (TM.getRelocationModel() != Reloc::PIC_) { - // Emit a local label that is preserved until the linker runs. - JumpTableSpecialLabelPrefix = "l"; - } } /// PreferredEHDataFormat - This hook allows the target to select data @@ -131,7 +91,7 @@ PPCLinuxTargetAsmInfo::PPCLinuxTargetAsmInfo(const PPCTargetMachine &TM) : DwarfLocSection = "\t.section\t.debug_loc,\"\",@progbits"; DwarfARangesSection = "\t.section\t.debug_aranges,\"\",@progbits"; DwarfRangesSection = "\t.section\t.debug_ranges,\"\",@progbits"; - DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits"; + DwarfMacroInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits"; PCSymbol = "."; diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index ef3f0fc..3e89885 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -35,6 +35,11 @@ X("ppc32", "PowerPC 32"); static RegisterTarget<PPC64TargetMachine> Y("ppc64", "PowerPC 64"); +// Force static initialization when called from llvm/InitializeAllTargets.h +namespace llvm { + void InitializePowerPCTarget() { } +} + // No assembler printer by default PPCTargetMachine::AsmPrinterCtorFn PPCTargetMachine::AsmPrinterCtor = 0; diff --git a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp index 61707f5..6a2fdca 100644 --- a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp +++ b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp @@ -15,6 +15,7 @@ #define DEBUG_TYPE "asm-printer" #include "Sparc.h" #include "SparcInstrInfo.h" +#include "SparcTargetMachine.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" @@ -24,8 +25,6 @@ #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/Target/TargetAsmInfo.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Support/Mangler.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/Statistic.h" @@ -88,6 +87,7 @@ FunctionPass *llvm::createSparcCodePrinterPass(raw_ostream &o, return new SparcAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose); } + /// runOnMachineFunction - This uses the printInstruction() /// method to print assembly for each instruction. /// @@ -353,3 +353,17 @@ bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, return false; } + +namespace { + static struct Register { + Register() { + SparcTargetMachine::registerAsmPrinter(createSparcCodePrinterPass); + } + } Registrator; +} + +// Force static initialization when called from +// llvm/InitializeAllAsmPrinters.h +namespace llvm { + void InitializeSparcAsmPrinter() { } +} diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index eda0309..fd0f124 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -18,17 +18,18 @@ #include "llvm/Target/TargetMachineRegistry.h" using namespace llvm; -/// SparcTargetMachineModule - Note that this is used on hosts that -/// cannot link in a library unless there are references into the -/// library. In particular, it seems that it is not possible to get -/// things to work on Win32 without this. Though it is unused, do not -/// remove it. -extern "C" int SparcTargetMachineModule; -int SparcTargetMachineModule = 0; - // Register the target. static RegisterTarget<SparcTargetMachine> X("sparc", "SPARC"); +// No assembler printer by default +SparcTargetMachine::AsmPrinterCtorFn SparcTargetMachine::AsmPrinterCtor = 0; + + +// Force static initialization when called from llvm/InitializeAllTargets.h +namespace llvm { + void InitializeSparcTarget() { } +} + const TargetAsmInfo *SparcTargetMachine::createTargetAsmInfo() const { // FIXME: Handle Solaris subtarget someday :) return new SparcELFTargetAsmInfo(*this); @@ -89,6 +90,8 @@ bool SparcTargetMachine::addAssemblyEmitter(PassManagerBase &PM, bool Verbose, raw_ostream &Out) { // Output assembly language. - PM.add(createSparcCodePrinterPass(Out, *this, OptLevel, Verbose)); + assert(AsmPrinterCtor && "AsmPrinter was not linked in"); + if (AsmPrinterCtor) + PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose)); return false; } diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h index 40b44f2..8afcc73 100644 --- a/lib/Target/Sparc/SparcTargetMachine.h +++ b/lib/Target/Sparc/SparcTargetMachine.h @@ -35,6 +35,14 @@ class SparcTargetMachine : public LLVMTargetMachine { protected: virtual const TargetAsmInfo *createTargetAsmInfo() const; + // To avoid having target depend on the asmprinter stuff libraries, asmprinter + // set this functions to ctor pointer at startup time if they are linked in. + typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o, + TargetMachine &tm, + CodeGenOpt::Level OptLevel, + bool verbose); + static AsmPrinterCtorFn AsmPrinterCtor; + public: SparcTargetMachine(const Module &M, const std::string &FS); @@ -56,6 +64,10 @@ public: virtual bool addAssemblyEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, bool Verbose, raw_ostream &Out); + + static void registerAsmPrinter(AsmPrinterCtorFn F) { + AsmPrinterCtor = F; + } }; } // end namespace llvm diff --git a/lib/Target/TargetAsmInfo.cpp b/lib/Target/TargetAsmInfo.cpp index 6a2de6f..3f5f1bd 100644 --- a/lib/Target/TargetAsmInfo.cpp +++ b/lib/Target/TargetAsmInfo.cpp @@ -24,10 +24,10 @@ #include "llvm/Support/Dwarf.h" #include <cctype> #include <cstring> - using namespace llvm; -void TargetAsmInfo::fillDefaultValues() { +TargetAsmInfo::TargetAsmInfo(const TargetMachine &tm) +: TM(tm) { BSSSection = "\t.bss"; BSSSection_ = 0; ReadOnlySection = 0; @@ -58,6 +58,7 @@ void TargetAsmInfo::fillDefaultValues() { InlineAsmEnd = "#NO_APP"; AssemblerDialect = 0; StringConstantPrefix = ".str"; + AllowQuotesInName = false; ZeroDirective = "\t.zero\t"; ZeroDirectiveSuffix = 0; AsciiDirective = "\t.ascii\t"; @@ -102,7 +103,6 @@ void TargetAsmInfo::fillDefaultValues() { SupportsExceptionHandling = false; DwarfRequiresFrameSection = true; DwarfUsesInlineInfoSection = false; - SupportsMacInfoSection = true; NonLocalEHFrameLabel = false; GlobalEHDirective = 0; SupportsWeakOmittedEHFrame = true; @@ -118,7 +118,7 @@ void TargetAsmInfo::fillDefaultValues() { DwarfLocSection = ".debug_loc"; DwarfARangesSection = ".debug_aranges"; DwarfRangesSection = ".debug_ranges"; - DwarfMacInfoSection = ".debug_macinfo"; + DwarfMacroInfoSection = ".debug_macinfo"; DwarfEHFrameSection = ".eh_frame"; DwarfExceptionSection = ".gcc_except_table"; AsmTransCBE = 0; @@ -126,11 +126,6 @@ void TargetAsmInfo::fillDefaultValues() { DataSection = getUnnamedSection("\t.data", SectionFlags::Writeable); } -TargetAsmInfo::TargetAsmInfo(const TargetMachine &tm) - : TM(tm) { - fillDefaultValues(); -} - TargetAsmInfo::~TargetAsmInfo() { } diff --git a/lib/Target/X86/AsmPrinter/CMakeLists.txt b/lib/Target/X86/AsmPrinter/CMakeLists.txt index dbd03d8..368bcaa 100644 --- a/lib/Target/X86/AsmPrinter/CMakeLists.txt +++ b/lib/Target/X86/AsmPrinter/CMakeLists.txt @@ -2,6 +2,7 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/ add_partially_linked_object(LLVMX86AsmPrinter X86ATTAsmPrinter.cpp + X86ATTInstPrinter.cpp X86AsmPrinter.cpp X86IntelAsmPrinter.cpp ) diff --git a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp index 8afe2ea..60ed4f0 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp @@ -26,8 +26,10 @@ #include "llvm/Type.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/MC/MCInst.h" #include "llvm/CodeGen/DwarfWriter.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Mangler.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetAsmInfo.h" @@ -36,6 +38,9 @@ using namespace llvm; STATISTIC(EmittedInsts, "Number of machine instrs printed"); +static cl::opt<bool> NewAsmPrinter("experimental-asm-printer", + cl::Hidden); + static std::string getPICLabelString(unsigned FnNum, const TargetAsmInfo *TAI, const X86Subtarget* Subtarget) { @@ -266,7 +271,7 @@ bool X86ATTAsmPrinter::runOnMachineFunction(MachineFunction &MF) { O << "\t.size\t" << CurrentFnName << ", .-" << CurrentFnName << '\n'; // Emit post-function debug information. - if (TAI->doesSupportDebugInformation()) + if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling()) DW->EndFunction(&MF); // Print out jump tables referenced by the function. @@ -291,6 +296,136 @@ static inline bool shouldPrintStub(TargetMachine &TM, const X86Subtarget* ST) { return ST->isPICStyleStub() && TM.getRelocationModel() != Reloc::Static; } +/// print_pcrel_imm - This is used to print an immediate value that ends up +/// being encoded as a pc-relative value. These print slightly differently, for +/// example, a $ is not emitted. +void X86ATTAsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo) { + const MachineOperand &MO = MI->getOperand(OpNo); + switch (MO.getType()) { + default: assert(0 && "Unknown pcrel immediate operand"); + case MachineOperand::MO_Immediate: + O << MO.getImm(); + return; + case MachineOperand::MO_MachineBasicBlock: + printBasicBlockLabel(MO.getMBB(), false, false, VerboseAsm); + return; + + case MachineOperand::MO_GlobalAddress: { + const GlobalValue *GV = MO.getGlobal(); + std::string Name = Mang->getValueName(GV); + decorateName(Name, GV); + + bool needCloseParen = false; + if (Name[0] == '$') { + // The name begins with a dollar-sign. In order to avoid having it look + // like an integer immediate to the assembler, enclose it in parens. + O << '('; + needCloseParen = true; + } + + if (shouldPrintStub(TM, Subtarget)) { + // Link-once, declaration, or Weakly-linked global variables need + // non-lazily-resolved stubs + if (GV->isDeclaration() || GV->isWeakForLinker()) { + // Dynamically-resolved functions need a stub for the function. + if (isa<Function>(GV)) { + // Function stubs are no longer needed for Mac OS X 10.5 and up. + if (Subtarget->isTargetDarwin() && Subtarget->getDarwinVers() >= 9) { + O << Name; + } else { + FnStubs.insert(Name); + printSuffixedName(Name, "$stub"); + } + } else if (GV->hasHiddenVisibility()) { + if (!GV->isDeclaration() && !GV->hasCommonLinkage()) + // Definition is not definitely in the current translation unit. + O << Name; + else { + HiddenGVStubs.insert(Name); + printSuffixedName(Name, "$non_lazy_ptr"); + } + } else { + GVStubs.insert(Name); + printSuffixedName(Name, "$non_lazy_ptr"); + } + } else { + if (GV->hasDLLImportLinkage()) + O << "__imp_"; + O << Name; + } + } else { + if (GV->hasDLLImportLinkage()) { + O << "__imp_"; + } + O << Name; + + if (shouldPrintPLT(TM, Subtarget)) { + // Assemble call via PLT for externally visible symbols + if (!GV->hasHiddenVisibility() && !GV->hasProtectedVisibility() && + !GV->hasLocalLinkage()) + O << "@PLT"; + } + if (Subtarget->isTargetCygMing() && GV->isDeclaration()) + // Save function name for later type emission + FnStubs.insert(Name); + } + + if (GV->hasExternalWeakLinkage()) + ExtWeakSymbols.insert(GV); + + printOffset(MO.getOffset()); + + if (needCloseParen) + O << ')'; + return; + } + + case MachineOperand::MO_ExternalSymbol: { + bool needCloseParen = false; + std::string Name(TAI->getGlobalPrefix()); + Name += MO.getSymbolName(); + // Print function stub suffix unless it's Mac OS X 10.5 and up. + if (shouldPrintStub(TM, Subtarget) && + !(Subtarget->isTargetDarwin() && Subtarget->getDarwinVers() >= 9)) { + FnStubs.insert(Name); + printSuffixedName(Name, "$stub"); + return; + } + + if (Name[0] == '$') { + // The name begins with a dollar-sign. In order to avoid having it look + // like an integer immediate to the assembler, enclose it in parens. + O << '('; + needCloseParen = true; + } + + O << Name; + + if (shouldPrintPLT(TM, Subtarget)) { + std::string GOTName(TAI->getGlobalPrefix()); + GOTName+="_GLOBAL_OFFSET_TABLE_"; + if (Name == GOTName) + // HACK! Emit extra offset to PC during printing GOT offset to + // compensate for the size of popl instruction. The resulting code + // should look like: + // call .piclabel + // piclabel: + // popl %some_register + // addl $_GLOBAL_ADDRESS_TABLE_ + [.-piclabel], %some_register + O << " + [.-" + << getPICLabelString(getFunctionNumber(), TAI, Subtarget) << ']'; + + O << "@PLT"; + } + + if (needCloseParen) + O << ')'; + + return; + } + } +} + void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, const char *Modifier, bool NotRIPRel) { const MachineOperand &MO = MI->getOperand(OpNo); @@ -312,14 +447,10 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, case MachineOperand::MO_Immediate: if (!Modifier || (strcmp(Modifier, "debug") && - strcmp(Modifier, "mem") && - strcmp(Modifier, "call"))) + strcmp(Modifier, "mem"))) O << '$'; O << MO.getImm(); return; - case MachineOperand::MO_MachineBasicBlock: - printBasicBlockLabel(MO.getMBB(), false, false, VerboseAsm); - return; case MachineOperand::MO_JumpTableIndex: { bool isMemOp = Modifier && !strcmp(Modifier, "mem"); if (!isMemOp) O << '$'; @@ -359,8 +490,7 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, return; } case MachineOperand::MO_GlobalAddress: { - bool isCallOp = Modifier && !strcmp(Modifier, "call"); - bool isMemOp = Modifier && !strcmp(Modifier, "mem"); + bool isMemOp = Modifier && !strcmp(Modifier, "mem"); bool needCloseParen = false; const GlobalValue *GV = MO.getGlobal(); @@ -369,7 +499,7 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, // If GV is an alias then use the aliasee for determining // thread-localness. if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) - GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false)); + GVar =dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false)); } bool isThreadLocal = GVar && GVar->isThreadLocal(); @@ -377,7 +507,7 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, std::string Name = Mang->getValueName(GV); decorateName(Name, GV); - if (!isMemOp && !isCallOp) + if (!isMemOp) O << '$'; else if (Name[0] == '$') { // The name begins with a dollar-sign. In order to avoid having it look @@ -391,15 +521,7 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, // non-lazily-resolved stubs if (GV->isDeclaration() || GV->isWeakForLinker()) { // Dynamically-resolved functions need a stub for the function. - if (isCallOp && isa<Function>(GV)) { - // Function stubs are no longer needed for Mac OS X 10.5 and up. - if (Subtarget->isTargetDarwin() && Subtarget->getDarwinVers() >= 9) { - O << Name; - } else { - FnStubs.insert(Name); - printSuffixedName(Name, "$stub"); - } - } else if (GV->hasHiddenVisibility()) { + if (GV->hasHiddenVisibility()) { if (!GV->isDeclaration() && !GV->hasCommonLinkage()) // Definition is not definitely in the current translation unit. O << Name; @@ -417,25 +539,12 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, O << Name; } - if (!isCallOp && TM.getRelocationModel() == Reloc::PIC_) + if (TM.getRelocationModel() == Reloc::PIC_) O << '-' << getPICLabelString(getFunctionNumber(), TAI, Subtarget); } else { - if (GV->hasDLLImportLinkage()) { + if (GV->hasDLLImportLinkage()) O << "__imp_"; - } O << Name; - - if (isCallOp) { - if (shouldPrintPLT(TM, Subtarget)) { - // Assemble call via PLT for externally visible symbols - if (!GV->hasHiddenVisibility() && !GV->hasProtectedVisibility() && - !GV->hasLocalLinkage()) - O << "@PLT"; - } - if (Subtarget->isTargetCygMing() && GV->isDeclaration()) - // Save function name for later type emission - FnStubs.insert(Name); - } } if (GV->hasExternalWeakLinkage()) @@ -443,6 +552,10 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, printOffset(MO.getOffset()); + if (needCloseParen) + O << ')'; + + bool isRIPRelative = false; if (isThreadLocal) { TLSModel::Model model = getTLSModel(GVar, TM.getRelocationModel()); switch (model) { @@ -456,7 +569,8 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, case TLSModel::InitialExec: if (Subtarget->is64Bit()) { assert (!NotRIPRel); - O << "@GOTTPOFF(%rip)"; + O << "@GOTTPOFF"; + isRIPRelative = true; } else { O << "@INDNTPOFF"; } @@ -476,43 +590,33 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, O << "@GOT"; else O << "@GOTOFF"; - } else if (Subtarget->isPICStyleRIPRel() && !NotRIPRel) { + } else if (Subtarget->isPICStyleRIPRel() && + !NotRIPRel) { if (TM.getRelocationModel() != Reloc::Static) { if (Subtarget->GVRequiresExtraLoad(GV, TM, false)) O << "@GOTPCREL"; - - if (needCloseParen) { - needCloseParen = false; - O << ')'; - } } - - // Use rip when possible to reduce code size, except when - // index or base register are also part of the address. e.g. - // foo(%rip)(%rcx,%rax,4) is not legal - O << "(%rip)"; + + isRIPRelative = true; } } - if (needCloseParen) - O << ')'; - + // Use rip when possible to reduce code size, except when + // index or base register are also part of the address. e.g. + // foo(%rip)(%rcx,%rax,4) is not legal. + if (isRIPRelative) + O << "(%rip)"; + return; } case MachineOperand::MO_ExternalSymbol: { - bool isCallOp = Modifier && !strcmp(Modifier, "call"); bool isMemOp = Modifier && !strcmp(Modifier, "mem"); bool needCloseParen = false; std::string Name(TAI->getGlobalPrefix()); Name += MO.getSymbolName(); + // Print function stub suffix unless it's Mac OS X 10.5 and up. - if (isCallOp && shouldPrintStub(TM, Subtarget) && - !(Subtarget->isTargetDarwin() && Subtarget->getDarwinVers() >= 9)) { - FnStubs.insert(Name); - printSuffixedName(Name, "$stub"); - return; - } - if (!isMemOp && !isCallOp) + if (!isMemOp) O << '$'; else if (Name[0] == '$') { // The name begins with a dollar-sign. In order to avoid having it look @@ -536,17 +640,13 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, // addl $_GLOBAL_ADDRESS_TABLE_ + [.-piclabel], %some_register O << " + [.-" << getPICLabelString(getFunctionNumber(), TAI, Subtarget) << ']'; - - if (isCallOp) - O << "@PLT"; } if (needCloseParen) O << ')'; - if (!isCallOp && Subtarget->isPICStyleRIPRel()) + if (Subtarget->isPICStyleRIPRel()) O << "(%rip)"; - return; } default: @@ -673,8 +773,7 @@ void X86ATTAsmPrinter::printPICJumpTableEntry(const MachineJumpTableInfo *MJTI, printBasicBlockLabel(MBB, false, false, false); } -bool X86ATTAsmPrinter::printAsmMRegister(const MachineOperand &MO, - const char Mode) { +bool X86ATTAsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode) { unsigned Reg = MO.getReg(); switch (Mode) { default: return true; // Unknown mode. @@ -758,38 +857,85 @@ bool X86ATTAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, return false; } +static void lower_lea64_32mem(MCInst *MI, unsigned OpNo) { + // Convert registers in the addr mode according to subreg64. + for (unsigned i = 0; i != 4; ++i) { + if (!MI->getOperand(i).isReg()) continue; + + unsigned Reg = MI->getOperand(i).getReg(); + if (Reg == 0) continue; + + MI->getOperand(i).setReg(getX86SubSuperRegister(Reg, MVT::i64)); + } +} + /// printMachineInstruction -- Print out a single X86 LLVM instruction MI in /// AT&T syntax to the current output stream. /// void X86ATTAsmPrinter::printMachineInstruction(const MachineInstr *MI) { ++EmittedInsts; + if (NewAsmPrinter) { + if (MI->getOpcode() == TargetInstrInfo::INLINEASM) { + O << "\t"; + printInlineAsm(MI); + return; + } else if (MI->isLabel()) { + printLabel(MI); + return; + } else if (MI->getOpcode() == TargetInstrInfo::DECLARE) { + printDeclare(MI); + return; + } else if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) { + printImplicitDef(MI); + return; + } + + O << "NEW: "; + MCInst TmpInst; + + TmpInst.setOpcode(MI->getOpcode()); + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + + MCOperand MCOp; + if (MO.isReg()) { + MCOp.MakeReg(MO.getReg()); + } else if (MO.isImm()) { + MCOp.MakeImm(MO.getImm()); + } else if (MO.isMBB()) { + MCOp.MakeMBBLabel(getFunctionNumber(), MO.getMBB()->getNumber()); + } else { + assert(0 && "Unimp"); + } + + TmpInst.addOperand(MCOp); + } + + switch (TmpInst.getOpcode()) { + case X86::LEA64_32r: + // Handle the 'subreg rewriting' for the lea64_32mem operand. + lower_lea64_32mem(&TmpInst, 1); + break; + } + + // FIXME: Convert TmpInst. + printInstruction(&TmpInst); + O << "OLD: "; + } + // Call the autogenerated instruction printer routines. printInstruction(MI); } /// doInitialization bool X86ATTAsmPrinter::doInitialization(Module &M) { - - bool Result = AsmPrinter::doInitialization(M); - - if (TAI->doesSupportDebugInformation()) { - // Let PassManager know we need debug information and relay - // the MachineModuleInfo address on to DwarfWriter. - // AsmPrinter::doInitialization did this analysis. + if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling()) MMI = getAnalysisIfAvailable<MachineModuleInfo>(); - DW = getAnalysisIfAvailable<DwarfWriter>(); - DW->BeginModule(&M, MMI, O, this, TAI); - } - - // Darwin wants symbols to be quoted if they have complex names. - if (Subtarget->isTargetDarwin()) - Mang->setUseQuotes(true); - - return Result; + return AsmPrinter::doInitialization(M); } - void X86ATTAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { const TargetData *TD = TM.getTargetData(); @@ -1040,8 +1186,8 @@ bool X86ATTAsmPrinter::doFinalization(Module &M) { } // Emit final debug information. - DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>(); - DW->EndModule(); + if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling()) + DW->EndModule(); // Funny Darwin hack: This flag tells the linker that no global symbols // contain code that falls through to other global symbols (e.g. the obvious @@ -1060,12 +1206,12 @@ bool X86ATTAsmPrinter::doFinalization(Module &M) { } // Emit final debug information. - DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>(); - DW->EndModule(); + if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling()) + DW->EndModule(); } else if (Subtarget->isTargetELF()) { // Emit final debug information. - DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>(); - DW->EndModule(); + if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling()) + DW->EndModule(); } return AsmPrinter::doFinalization(M); diff --git a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h index 5b40e73..68a6bc8 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h @@ -27,16 +27,16 @@ namespace llvm { class MachineJumpTableInfo; +class MCInst; class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter { - DwarfWriter *DW; MachineModuleInfo *MMI; const X86Subtarget *Subtarget; public: explicit X86ATTAsmPrinter(raw_ostream &O, X86TargetMachine &TM, const TargetAsmInfo *T, CodeGenOpt::Level OL, bool V) - : AsmPrinter(O, TM, T, OL, V), DW(0), MMI(0) { + : AsmPrinter(O, TM, T, OL, V), MMI(0) { Subtarget = &TM.getSubtarget<X86Subtarget>(); } @@ -63,10 +63,62 @@ class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter { /// machine instruction was sufficiently described to print it, otherwise it /// returns false. bool printInstruction(const MachineInstr *MI); + + + // New MCInst printing stuff. + bool printInstruction(const MCInst *MI); + + void printOperand(const MCInst *MI, unsigned OpNo, + const char *Modifier = 0, bool NotRIPRel = false); + void printMemReference(const MCInst *MI, unsigned Op); + void printLeaMemReference(const MCInst *MI, unsigned Op); + void printSSECC(const MCInst *MI, unsigned Op); + void printPICLabel(const MCInst *MI, unsigned Op); + void print_pcrel_imm(const MCInst *MI, unsigned OpNo); + + void printi8mem(const MCInst *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printi16mem(const MCInst *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printi32mem(const MCInst *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printi64mem(const MCInst *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printi128mem(const MCInst *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printf32mem(const MCInst *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printf64mem(const MCInst *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printf80mem(const MCInst *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printf128mem(const MCInst *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printlea32mem(const MCInst *MI, unsigned OpNo) { + printLeaMemReference(MI, OpNo); + } + void printlea64mem(const MCInst *MI, unsigned OpNo) { + printLeaMemReference(MI, OpNo); + } + void printlea64_32mem(const MCInst *MI, unsigned OpNo) { + printLeaMemReference(MI, OpNo); + } + + // These methods are used by the tablegen'erated instruction printer. void printOperand(const MachineInstr *MI, unsigned OpNo, const char *Modifier = 0, bool NotRIPRel = false); + void print_pcrel_imm(const MachineInstr *MI, unsigned OpNo); void printi8mem(const MachineInstr *MI, unsigned OpNo) { printMemReference(MI, OpNo); } @@ -104,7 +156,7 @@ class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter { printLeaMemReference(MI, OpNo, "subreg64"); } - bool printAsmMRegister(const MachineOperand &MO, const char Mode); + bool printAsmMRegister(const MachineOperand &MO, char Mode); bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode); bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp new file mode 100644 index 0000000..9d50edc --- /dev/null +++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp @@ -0,0 +1,143 @@ +//===-- X86ATTInstPrinter.cpp - AT&T assembly instruction printing --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file includes code for rendering MCInst instances as AT&T-style +// assembly. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "llvm/MC/MCInst.h" +#include "X86ATTAsmPrinter.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +// Include the auto-generated portion of the assembly writer. +#define MachineInstr MCInst +#define NO_ASM_WRITER_BOILERPLATE +#include "X86GenAsmWriter.inc" +#undef MachineInstr + +void X86ATTAsmPrinter::printSSECC(const MCInst *MI, unsigned Op) { + switch (MI->getOperand(Op).getImm()) { + default: assert(0 && "Invalid ssecc argument!"); + case 0: O << "eq"; break; + case 1: O << "lt"; break; + case 2: O << "le"; break; + case 3: O << "unord"; break; + case 4: O << "neq"; break; + case 5: O << "nlt"; break; + case 6: O << "nle"; break; + case 7: O << "ord"; break; + } +} + + +void X86ATTAsmPrinter::printPICLabel(const MCInst *MI, unsigned Op) { + assert(0 && + "This is only used for MOVPC32r, should lower before asm printing!"); +} + + +/// print_pcrel_imm - This is used to print an immediate value that ends up +/// being encoded as a pc-relative value. These print slightly differently, for +/// example, a $ is not emitted. +void X86ATTAsmPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo) { + const MCOperand &Op = MI->getOperand(OpNo); + + if (Op.isImm()) + O << Op.getImm(); + else if (Op.isMBBLabel()) + // FIXME: Keep in sync with printBasicBlockLabel. printBasicBlockLabel + // should eventually call into this code, not the other way around. + O << TAI->getPrivateGlobalPrefix() << "BB" << Op.getMBBLabelFunction() + << '_' << Op.getMBBLabelBlock(); + else + assert(0 && "Unknown pcrel immediate operand"); +} + + +void X86ATTAsmPrinter::printOperand(const MCInst *MI, unsigned OpNo, + const char *Modifier, bool NotRIPRel) { + assert(Modifier == 0 && "Modifiers should not be used"); + + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isReg()) { + O << '%'; + unsigned Reg = Op.getReg(); +#if 0 + if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) { + MVT VT = (strcmp(Modifier+6,"64") == 0) ? + MVT::i64 : ((strcmp(Modifier+6, "32") == 0) ? MVT::i32 : + ((strcmp(Modifier+6,"16") == 0) ? MVT::i16 : MVT::i8)); + Reg = getX86SubSuperRegister(Reg, VT); + } +#endif + O << TRI->getAsmName(Reg); + return; + } else if (Op.isImm()) { + //if (!Modifier || (strcmp(Modifier, "debug") && strcmp(Modifier, "mem"))) + O << '$'; + O << Op.getImm(); + return; + } + + O << "<<UNKNOWN OPERAND KIND>>"; +} + +void X86ATTAsmPrinter::printLeaMemReference(const MCInst *MI, unsigned Op) { + bool NotRIPRel = false; + + const MCOperand &BaseReg = MI->getOperand(Op); + const MCOperand &IndexReg = MI->getOperand(Op+2); + const MCOperand &DispSpec = MI->getOperand(Op+3); + + NotRIPRel |= IndexReg.getReg() || BaseReg.getReg(); + if (DispSpec.isImm()) { + int64_t DispVal = DispSpec.getImm(); + if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) + O << DispVal; + } else { + abort(); + //assert(DispSpec.isGlobal() || DispSpec.isCPI() || + // DispSpec.isJTI() || DispSpec.isSymbol()); + //printOperand(MI, Op+3, "mem", NotRIPRel); + } + + if (IndexReg.getReg() || BaseReg.getReg()) { + // There are cases where we can end up with ESP/RSP in the indexreg slot. + // If this happens, swap the base/index register to support assemblers that + // don't work when the index is *SP. + // FIXME: REMOVE THIS. + assert(IndexReg.getReg() != X86::ESP && IndexReg.getReg() != X86::RSP); + + O << '('; + if (BaseReg.getReg()) + printOperand(MI, Op); + + if (IndexReg.getReg()) { + O << ','; + printOperand(MI, Op+2); + unsigned ScaleVal = MI->getOperand(Op+1).getImm(); + if (ScaleVal != 1) + O << ',' << ScaleVal; + } + O << ')'; + } +} + +void X86ATTAsmPrinter::printMemReference(const MCInst *MI, unsigned Op) { + const MCOperand &Segment = MI->getOperand(Op+4); + if (Segment.getReg()) { + printOperand(MI, Op+4); + O << ':'; + } + printLeaMemReference(MI, Op); +} diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp index c874849..a39203b 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp @@ -29,13 +29,11 @@ FunctionPass *llvm::createX86CodePrinterPass(raw_ostream &o, bool verbose) { const X86Subtarget *Subtarget = &tm.getSubtarget<X86Subtarget>(); - if (Subtarget->isFlavorIntel()) { + if (Subtarget->isFlavorIntel()) return new X86IntelAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose); - } else { - return new X86ATTAsmPrinter(o, tm, tm.getTargetAsmInfo(), - OptLevel, verbose); - } + return new X86ATTAsmPrinter(o, tm, tm.getTargetAsmInfo(), + OptLevel, verbose); } namespace { @@ -48,3 +46,9 @@ namespace { extern "C" int X86AsmPrinterForceLink; int X86AsmPrinterForceLink = 0; + +// Force static initialization when called from +// llvm/InitializeAllAsmPrinters.h +namespace llvm { + void InitializeX86AsmPrinter() { } +} diff --git a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp index 6599349..ceae7be 100644 --- a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp @@ -223,9 +223,6 @@ void X86IntelAsmPrinter::printOp(const MachineOperand &MO, case MachineOperand::MO_Immediate: O << MO.getImm(); return; - case MachineOperand::MO_MachineBasicBlock: - printBasicBlockLabel(MO.getMBB()); - return; case MachineOperand::MO_JumpTableIndex: { bool isMemOp = Modifier && !strcmp(Modifier, "mem"); if (!isMemOp) O << "OFFSET "; @@ -243,14 +240,13 @@ void X86IntelAsmPrinter::printOp(const MachineOperand &MO, return; } case MachineOperand::MO_GlobalAddress: { - bool isCallOp = Modifier && !strcmp(Modifier, "call"); bool isMemOp = Modifier && !strcmp(Modifier, "mem"); GlobalValue *GV = MO.getGlobal(); std::string Name = Mang->getValueName(GV); decorateName(Name, GV); - if (!isMemOp && !isCallOp) O << "OFFSET "; + if (!isMemOp) O << "OFFSET "; if (GV->hasDLLImportLinkage()) { // FIXME: This should be fixed with full support of stdcall & fastcall // CC's @@ -261,8 +257,6 @@ void X86IntelAsmPrinter::printOp(const MachineOperand &MO, return; } case MachineOperand::MO_ExternalSymbol: { - bool isCallOp = Modifier && !strcmp(Modifier, "call"); - if (!isCallOp) O << "OFFSET "; O << TAI->getGlobalPrefix() << MO.getSymbolName(); return; } @@ -271,6 +265,39 @@ void X86IntelAsmPrinter::printOp(const MachineOperand &MO, } } +void X86IntelAsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo){ + const MachineOperand &MO = MI->getOperand(OpNo); + switch (MO.getType()) { + default: assert(0 && "Unknown pcrel immediate operand"); + case MachineOperand::MO_Immediate: + O << MO.getImm(); + return; + case MachineOperand::MO_MachineBasicBlock: + printBasicBlockLabel(MO.getMBB()); + return; + + case MachineOperand::MO_GlobalAddress: { + GlobalValue *GV = MO.getGlobal(); + std::string Name = Mang->getValueName(GV); + decorateName(Name, GV); + + if (GV->hasDLLImportLinkage()) { + // FIXME: This should be fixed with full support of stdcall & fastcall + // CC's + O << "__imp_"; + } + O << Name; + printOffset(MO.getOffset()); + return; + } + + case MachineOperand::MO_ExternalSymbol: + O << TAI->getGlobalPrefix() << MO.getSymbolName(); + return; + } +} + + void X86IntelAsmPrinter::printLeaMemReference(const MachineInstr *MI, unsigned Op, const char *Modifier) { @@ -339,8 +366,8 @@ void X86IntelAsmPrinter::printPICJumpTableSetLabel(unsigned uid, } void X86IntelAsmPrinter::printPICLabel(const MachineInstr *MI, unsigned Op) { - O << "\"L" << getFunctionNumber() << "$pb\"\n"; - O << "\"L" << getFunctionNumber() << "$pb\":"; + O << "L" << getFunctionNumber() << "$pb\n"; + O << "L" << getFunctionNumber() << "$pb:"; } bool X86IntelAsmPrinter::printAsmMRegister(const MachineOperand &MO, @@ -362,7 +389,7 @@ bool X86IntelAsmPrinter::printAsmMRegister(const MachineOperand &MO, break; } - O << '%' << TRI->getName(Reg); + O << TRI->getName(Reg); return false; } @@ -414,7 +441,7 @@ bool X86IntelAsmPrinter::doInitialization(Module &M) { Mang->markCharUnacceptable('.'); - O << "\t.686\n\t.model flat\n\n"; + O << "\t.686\n\t.MMX\n\t.XMM\n\t.model flat\n\n"; // Emit declarations for external functions. for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) @@ -422,7 +449,7 @@ bool X86IntelAsmPrinter::doInitialization(Module &M) { std::string Name = Mang->getValueName(I); decorateName(Name, I); - O << "\textern " ; + O << "\tEXTERN " ; if (I->hasDLLImportLinkage()) { O << "__imp_"; } @@ -436,7 +463,7 @@ bool X86IntelAsmPrinter::doInitialization(Module &M) { if (I->isDeclaration()) { std::string Name = Mang->getValueName(I); - O << "\textern " ; + O << "\tEXTERN " ; if (I->hasDLLImportLinkage()) { O << "__imp_"; } @@ -471,14 +498,14 @@ bool X86IntelAsmPrinter::doFinalization(Module &M) { case GlobalValue::WeakAnyLinkage: case GlobalValue::WeakODRLinkage: SwitchToDataSection(""); - O << name << "?\tsegment common 'COMMON'\n"; + O << name << "?\tSEGEMNT PARA common 'COMMON'\n"; bCustomSegment = true; // FIXME: the default alignment is 16 bytes, but 1, 2, 4, and 256 // are also available. break; case GlobalValue::AppendingLinkage: SwitchToDataSection(""); - O << name << "?\tsegment public 'DATA'\n"; + O << name << "?\tSEGMENT PARA public 'DATA'\n"; bCustomSegment = true; // FIXME: the default alignment is 16 bytes, but 1, 2, 4, and 256 // are also available. diff --git a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h index 9520d98..04f2595 100644 --- a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h @@ -52,6 +52,9 @@ struct VISIBILITY_HIDDEN X86IntelAsmPrinter : public AsmPrinter { printOp(MO, Modifier); } } + + void print_pcrel_imm(const MachineInstr *MI, unsigned OpNo); + void printi8mem(const MachineInstr *MI, unsigned OpNo) { O << "BYTE PTR "; diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index 3796aac..4464878 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -1858,8 +1858,23 @@ Ideal output: setne %al ret -We could do this transformation in instcombine, but it's only clearly -beneficial on platforms with a test instruction. +This should definitely be done in instcombine, canonicalizing the range +condition into a != condition. We get this IR: + +define i32 @a(i32 %x) nounwind readnone { +entry: + %0 = and i32 %x, 127 ; <i32> [#uses=1] + %1 = icmp ugt i32 %0, 31 ; <i1> [#uses=1] + %2 = zext i1 %1 to i32 ; <i32> [#uses=1] + ret i32 %2 +} + +Instcombine prefers to strength reduce relational comparisons to equality +comparisons when possible, this should be another case of that. This could +be handled pretty easily in InstCombiner::visitICmpInstWithInstAndIntCst, but it +looks like InstCombiner::visitICmpInstWithInstAndIntCst should really already +be redesigned to use ComputeMaskedBits and friends. + //===---------------------------------------------------------------------===// Testcase: @@ -1880,20 +1895,40 @@ Ideal output: Testcase: int x(int a) { return (a & 0x80) ? 0x100 : 0; } +int y(int a) { return (a & 0x80) *2; } -Current output: +Current: testl $128, 4(%esp) setne %al movzbl %al, %eax shll $8, %eax ret -Ideal output: +Better: movl 4(%esp), %eax addl %eax, %eax andl $256, %eax ret -We generally want to fold shifted tests of a single bit into a shift+and on x86. +This is another general instcombine transformation that is profitable on all +targets. In LLVM IR, these functions look like this: + +define i32 @x(i32 %a) nounwind readnone { +entry: + %0 = and i32 %a, 128 + %1 = icmp eq i32 %0, 0 + %iftmp.0.0 = select i1 %1, i32 0, i32 256 + ret i32 %iftmp.0.0 +} + +define i32 @y(i32 %a) nounwind readnone { +entry: + %0 = shl i32 %a, 1 + %1 = and i32 %0, 256 + ret i32 %1 +} + +Replacing an icmp+select with a shift should always be considered profitable in +instcombine. //===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 0f2fbcc..ed4eb44 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -991,8 +991,13 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { case X86::FpSET_ST0_32: case X86::FpSET_ST0_64: case X86::FpSET_ST0_80: - assert((StackTop == 1 || StackTop == 2) - && "Stack should have one or two element on it to return!"); + // FpSET_ST0_80 is generated by copyRegToReg for both function return + // and inline assembly with the "st" constrain. In the latter case, + // it is possible for FP0 to be alive after this instruction. + if (!MI->killsRegister(X86::FP0)) { + // Duplicate ST0 + duplicateToTop(0, 0, I); + } --StackTop; // "Forget" we have something on the top of stack! break; case X86::FpSET_ST1_32: diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index b003efd..9cedafc 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -167,6 +167,8 @@ namespace { SDValue &Segment); bool SelectLEAAddr(SDValue Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp); + bool SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base, + SDValue &Scale, SDValue &Index, SDValue &Disp); bool SelectScalarSSELoad(SDValue Op, SDValue Pred, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, @@ -1293,6 +1295,32 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N, return false; } +/// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes. +bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base, + SDValue &Scale, SDValue &Index, + SDValue &Disp) { + assert(Op.getOpcode() == X86ISD::TLSADDR); + assert(N.getOpcode() == ISD::TargetGlobalTLSAddress); + const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); + + X86ISelAddressMode AM; + AM.GV = GA->getGlobal(); + AM.Disp += GA->getOffset(); + AM.Base.Reg = CurDAG->getRegister(0, N.getValueType()); + + if (N.getValueType() == MVT::i32) { + AM.Scale = 1; + AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32); + } else { + AM.IndexReg = CurDAG->getRegister(0, MVT::i64); + } + + SDValue Segment; + getAddressOperands(AM, Base, Scale, Index, Disp, Segment); + return true; +} + + bool X86DAGToDAGISel::TryFoldLoad(SDValue P, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 36e3ab2..8d0ea66 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -788,8 +788,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::USUBO, MVT::i64, Custom); setOperationAction(ISD::SMULO, MVT::i32, Custom); setOperationAction(ISD::SMULO, MVT::i64, Custom); - setOperationAction(ISD::UMULO, MVT::i32, Custom); - setOperationAction(ISD::UMULO, MVT::i64, Custom); if (!Subtarget->is64Bit()) { // These libcalls are not available in 32-bit. @@ -4439,9 +4437,8 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial // exec) - SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), - GA->getValueType(0), - GA->getOffset()); + SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), + GA->getOffset()); SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, TGA); if (model == TLSModel::InitialExec) @@ -8474,6 +8471,14 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, } } return; + case 'K': + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { + if ((int8_t)C->getSExtValue() == C->getSExtValue()) { + Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType()); + break; + } + } + return; case 'N': if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { if (C->getZExtValue() <= 255) { diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index dc15e4a..063913f 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -19,6 +19,14 @@ // 64-bits but only 32 bits are significant. def i64i32imm : Operand<i64>; + +// 64-bits but only 32 bits are significant, and those bits are treated as being +// pc relative. +def i64i32imm_pcrel : Operand<i64> { + let PrintMethod = "print_pcrel_imm"; +} + + // 64-bits but only 8 bits are significant. def i64i8imm : Operand<i64>; @@ -29,6 +37,7 @@ def lea64mem : Operand<i64> { def lea64_32mem : Operand<i32> { let PrintMethod = "printlea64_32mem"; + let AsmOperandLowerMethod = "lower_lea64_32mem"; let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm); } @@ -39,6 +48,9 @@ def lea64addr : ComplexPattern<i64, 4, "SelectLEAAddr", [add, mul, X86mul_imm, shl, or, frameindex, X86Wrapper], []>; +def tls64addr : ComplexPattern<i64, 4, "SelectTLSADDRAddr", + [tglobaltlsaddr], []>; + //===----------------------------------------------------------------------===// // Pattern fragments. // @@ -113,9 +125,9 @@ let isCall = 1 in // NOTE: this pattern doesn't match "X86call imm", because we do not know // that the offset between an arbitrary immediate and the call will fit in // the 32-bit pcrel field that we have. - def CALL64pcrel32 : I<0xE8, RawFrm, - (outs), (ins i64i32imm:$dst, variable_ops), - "call\t${dst:call}", []>, + def CALL64pcrel32 : Ii32<0xE8, RawFrm, + (outs), (ins i64i32imm_pcrel:$dst, variable_ops), + "call\t$dst", []>, Requires<[In64BitMode]>; def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops), "call\t{*}$dst", [(X86call GR64:$dst)]>; @@ -177,6 +189,15 @@ def PUSH64r : I<0x50, AddRegFrm, (outs), (ins GR64:$reg), "push{q}\t$reg", []>; } +let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in { +def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm), + "push{q}\t$imm", []>; +def PUSH64i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), + "push{q}\t$imm", []>; +def PUSH64i32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm), + "push{q}\t$imm", []>; +} + let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1 in def POPFQ : I<0x9D, RawFrm, (outs), (ins), "popf", []>, REX_W; let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1 in @@ -1312,13 +1333,13 @@ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], Uses = [RSP] in -def TLS_addr64 : I<0, Pseudo, (outs), (ins i64imm:$sym), +def TLS_addr64 : I<0, Pseudo, (outs), (ins lea64mem:$sym), ".byte\t0x66; " - "leaq\t${sym:mem}(%rip), %rdi; " + "leaq\t$sym(%rip), %rdi; " ".word\t0x6666; " "rex64; " "call\t__tls_get_addr@PLT", - [(X86tlsaddr tglobaltlsaddr:$sym)]>, + [(X86tlsaddr tls64addr:$sym)]>, Requires<[In64BitMode]>; let AddedComplexity = 5 in diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 50ae417..2d8f55f 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -163,6 +163,11 @@ def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>; // X86 Operand Definitions. // +def i32imm_pcrel : Operand<i32> { + let PrintMethod = "print_pcrel_imm"; +} + + // *mem - Operand definitions for the funky X86 addressing mode operands. // class X86MemOperand<string printMethod> : Operand<iPTR> { @@ -206,8 +211,10 @@ def i16i8imm : Operand<i16>; // 32-bits but only 8 bits are significant. def i32i8imm : Operand<i32>; -// Branch targets have OtherVT type. -def brtarget : Operand<OtherVT>; +// Branch targets have OtherVT type and print as pc-relative values. +def brtarget : Operand<OtherVT> { + let PrintMethod = "print_pcrel_imm"; +} //===----------------------------------------------------------------------===// // X86 Complex Pattern Definitions. @@ -217,6 +224,8 @@ def brtarget : Operand<OtherVT>; def addr : ComplexPattern<iPTR, 5, "SelectAddr", [], []>; def lea32addr : ComplexPattern<i32, 4, "SelectLEAAddr", [add, sub, mul, shl, or, frameindex], []>; +def tls32addr : ComplexPattern<i32, 4, "SelectTLSADDRAddr", + [tglobaltlsaddr], []>; //===----------------------------------------------------------------------===// // X86 Instruction Predicate Definitions. @@ -561,8 +570,9 @@ let isCall = 1 in XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], Uses = [ESP] in { - def CALLpcrel32 : Ii32<0xE8, RawFrm, (outs), (ins i32imm:$dst,variable_ops), - "call\t${dst:call}", []>; + def CALLpcrel32 : Ii32<0xE8, RawFrm, + (outs), (ins i32imm_pcrel:$dst,variable_ops), + "call\t$dst", []>; def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops), "call\t{*}$dst", [(X86call GR32:$dst)]>; def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops), @@ -587,7 +597,7 @@ def TCRETURNri : I<0, Pseudo, (outs), (ins GR32:$dst, i32imm:$offset, variable_o let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in - def TAILJMPd : IBr<0xE9, (ins i32imm:$dst), "jmp\t${dst:call} # TAILCALL", + def TAILJMPd : IBr<0xE9, (ins i32imm_pcrel:$dst), "jmp\t$dst # TAILCALL", []>; let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst # TAILCALL", @@ -611,6 +621,15 @@ let mayStore = 1 in def PUSH32r : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>; } +let Defs = [ESP], Uses = [ESP], neverHasSideEffects = 1, mayStore = 1 in { +def PUSH32i8 : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm), + "push{l}\t$imm", []>; +def PUSH32i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), + "push{l}\t$imm", []>; +def PUSH32i32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm), + "push{l}\t$imm", []>; +} + let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, neverHasSideEffects=1 in def POPFD : I<0x9D, RawFrm, (outs), (ins), "popf", []>; let Defs = [ESP], Uses = [ESP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in @@ -1726,13 +1745,13 @@ let isTwoAddress = 0 in { let Defs = [EFLAGS] in { let Uses = [CL] in { def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src), - "shl{b}\t{%cl, $dst|$dst, %CL}", + "shl{b}\t{%cl, $dst|$dst, CL}", [(set GR8:$dst, (shl GR8:$src, CL))]>; def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src), - "shl{w}\t{%cl, $dst|$dst, %CL}", + "shl{w}\t{%cl, $dst|$dst, CL}", [(set GR16:$dst, (shl GR16:$src, CL))]>, OpSize; def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src), - "shl{l}\t{%cl, $dst|$dst, %CL}", + "shl{l}\t{%cl, $dst|$dst, CL}", [(set GR32:$dst, (shl GR32:$src, CL))]>; } // Uses = [CL] @@ -1753,13 +1772,13 @@ def SHL32ri : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), let isTwoAddress = 0 in { let Uses = [CL] in { def SHL8mCL : I<0xD2, MRM4m, (outs), (ins i8mem :$dst), - "shl{b}\t{%cl, $dst|$dst, %CL}", + "shl{b}\t{%cl, $dst|$dst, CL}", [(store (shl (loadi8 addr:$dst), CL), addr:$dst)]>; def SHL16mCL : I<0xD3, MRM4m, (outs), (ins i16mem:$dst), - "shl{w}\t{%cl, $dst|$dst, %CL}", + "shl{w}\t{%cl, $dst|$dst, CL}", [(store (shl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize; def SHL32mCL : I<0xD3, MRM4m, (outs), (ins i32mem:$dst), - "shl{l}\t{%cl, $dst|$dst, %CL}", + "shl{l}\t{%cl, $dst|$dst, CL}", [(store (shl (loadi32 addr:$dst), CL), addr:$dst)]>; } def SHL8mi : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, i8imm:$src), @@ -1788,13 +1807,13 @@ let isTwoAddress = 0 in { let Uses = [CL] in { def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src), - "shr{b}\t{%cl, $dst|$dst, %CL}", + "shr{b}\t{%cl, $dst|$dst, CL}", [(set GR8:$dst, (srl GR8:$src, CL))]>; def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src), - "shr{w}\t{%cl, $dst|$dst, %CL}", + "shr{w}\t{%cl, $dst|$dst, CL}", [(set GR16:$dst, (srl GR16:$src, CL))]>, OpSize; def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src), - "shr{l}\t{%cl, $dst|$dst, %CL}", + "shr{l}\t{%cl, $dst|$dst, CL}", [(set GR32:$dst, (srl GR32:$src, CL))]>; } @@ -1822,14 +1841,14 @@ def SHR32r1 : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1), let isTwoAddress = 0 in { let Uses = [CL] in { def SHR8mCL : I<0xD2, MRM5m, (outs), (ins i8mem :$dst), - "shr{b}\t{%cl, $dst|$dst, %CL}", + "shr{b}\t{%cl, $dst|$dst, CL}", [(store (srl (loadi8 addr:$dst), CL), addr:$dst)]>; def SHR16mCL : I<0xD3, MRM5m, (outs), (ins i16mem:$dst), - "shr{w}\t{%cl, $dst|$dst, %CL}", + "shr{w}\t{%cl, $dst|$dst, CL}", [(store (srl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize; def SHR32mCL : I<0xD3, MRM5m, (outs), (ins i32mem:$dst), - "shr{l}\t{%cl, $dst|$dst, %CL}", + "shr{l}\t{%cl, $dst|$dst, CL}", [(store (srl (loadi32 addr:$dst), CL), addr:$dst)]>; } def SHR8mi : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src), @@ -1857,13 +1876,13 @@ let isTwoAddress = 0 in { let Uses = [CL] in { def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src), - "sar{b}\t{%cl, $dst|$dst, %CL}", + "sar{b}\t{%cl, $dst|$dst, CL}", [(set GR8:$dst, (sra GR8:$src, CL))]>; def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src), - "sar{w}\t{%cl, $dst|$dst, %CL}", + "sar{w}\t{%cl, $dst|$dst, CL}", [(set GR16:$dst, (sra GR16:$src, CL))]>, OpSize; def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src), - "sar{l}\t{%cl, $dst|$dst, %CL}", + "sar{l}\t{%cl, $dst|$dst, CL}", [(set GR32:$dst, (sra GR32:$src, CL))]>; } @@ -1892,13 +1911,13 @@ def SAR32r1 : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1), let isTwoAddress = 0 in { let Uses = [CL] in { def SAR8mCL : I<0xD2, MRM7m, (outs), (ins i8mem :$dst), - "sar{b}\t{%cl, $dst|$dst, %CL}", + "sar{b}\t{%cl, $dst|$dst, CL}", [(store (sra (loadi8 addr:$dst), CL), addr:$dst)]>; def SAR16mCL : I<0xD3, MRM7m, (outs), (ins i16mem:$dst), - "sar{w}\t{%cl, $dst|$dst, %CL}", + "sar{w}\t{%cl, $dst|$dst, CL}", [(store (sra (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize; def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst), - "sar{l}\t{%cl, $dst|$dst, %CL}", + "sar{l}\t{%cl, $dst|$dst, CL}", [(store (sra (loadi32 addr:$dst), CL), addr:$dst)]>; } def SAR8mi : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, i8imm:$src), @@ -1929,13 +1948,13 @@ let isTwoAddress = 0 in { // FIXME: provide shorter instructions when imm8 == 1 let Uses = [CL] in { def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src), - "rol{b}\t{%cl, $dst|$dst, %CL}", + "rol{b}\t{%cl, $dst|$dst, CL}", [(set GR8:$dst, (rotl GR8:$src, CL))]>; def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src), - "rol{w}\t{%cl, $dst|$dst, %CL}", + "rol{w}\t{%cl, $dst|$dst, CL}", [(set GR16:$dst, (rotl GR16:$src, CL))]>, OpSize; def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src), - "rol{l}\t{%cl, $dst|$dst, %CL}", + "rol{l}\t{%cl, $dst|$dst, CL}", [(set GR32:$dst, (rotl GR32:$src, CL))]>; } @@ -1963,13 +1982,13 @@ def ROL32r1 : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1), let isTwoAddress = 0 in { let Uses = [CL] in { def ROL8mCL : I<0xD2, MRM0m, (outs), (ins i8mem :$dst), - "rol{b}\t{%cl, $dst|$dst, %CL}", + "rol{b}\t{%cl, $dst|$dst, CL}", [(store (rotl (loadi8 addr:$dst), CL), addr:$dst)]>; def ROL16mCL : I<0xD3, MRM0m, (outs), (ins i16mem:$dst), - "rol{w}\t{%cl, $dst|$dst, %CL}", + "rol{w}\t{%cl, $dst|$dst, CL}", [(store (rotl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize; def ROL32mCL : I<0xD3, MRM0m, (outs), (ins i32mem:$dst), - "rol{l}\t{%cl, $dst|$dst, %CL}", + "rol{l}\t{%cl, $dst|$dst, CL}", [(store (rotl (loadi32 addr:$dst), CL), addr:$dst)]>; } def ROL8mi : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, i8imm:$src), @@ -1998,13 +2017,13 @@ let isTwoAddress = 0 in { let Uses = [CL] in { def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src), - "ror{b}\t{%cl, $dst|$dst, %CL}", + "ror{b}\t{%cl, $dst|$dst, CL}", [(set GR8:$dst, (rotr GR8:$src, CL))]>; def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src), - "ror{w}\t{%cl, $dst|$dst, %CL}", + "ror{w}\t{%cl, $dst|$dst, CL}", [(set GR16:$dst, (rotr GR16:$src, CL))]>, OpSize; def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src), - "ror{l}\t{%cl, $dst|$dst, %CL}", + "ror{l}\t{%cl, $dst|$dst, CL}", [(set GR32:$dst, (rotr GR32:$src, CL))]>; } @@ -2032,13 +2051,13 @@ def ROR32r1 : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1), let isTwoAddress = 0 in { let Uses = [CL] in { def ROR8mCL : I<0xD2, MRM1m, (outs), (ins i8mem :$dst), - "ror{b}\t{%cl, $dst|$dst, %CL}", + "ror{b}\t{%cl, $dst|$dst, CL}", [(store (rotr (loadi8 addr:$dst), CL), addr:$dst)]>; def ROR16mCL : I<0xD3, MRM1m, (outs), (ins i16mem:$dst), - "ror{w}\t{%cl, $dst|$dst, %CL}", + "ror{w}\t{%cl, $dst|$dst, CL}", [(store (rotr (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize; def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst), - "ror{l}\t{%cl, $dst|$dst, %CL}", + "ror{l}\t{%cl, $dst|$dst, CL}", [(store (rotr (loadi32 addr:$dst), CL), addr:$dst)]>; } def ROR8mi : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src), @@ -2070,17 +2089,17 @@ let isTwoAddress = 0 in { // Double shift instructions (generalizations of rotate) let Uses = [CL] in { def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "shld{l}\t{%cl, $src2, $dst|$dst, $src2, %CL}", + "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}", [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))]>, TB; def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, %CL}", + "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}", [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))]>, TB; def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "shld{w}\t{%cl, $src2, $dst|$dst, $src2, %CL}", + "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}", [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))]>, TB, OpSize; def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, %CL}", + "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}", [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))]>, TB, OpSize; } @@ -2115,11 +2134,11 @@ def SHRD16rri8 : Ii8<0xAC, MRMDestReg, let isTwoAddress = 0 in { let Uses = [CL] in { def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), - "shld{l}\t{%cl, $src2, $dst|$dst, $src2, %CL}", + "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}", [(store (X86shld (loadi32 addr:$dst), GR32:$src2, CL), addr:$dst)]>, TB; def SHRD32mrCL : I<0xAD, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), - "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, %CL}", + "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}", [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL), addr:$dst)]>, TB; } @@ -2138,11 +2157,11 @@ let isTwoAddress = 0 in { let Uses = [CL] in { def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), - "shld{w}\t{%cl, $src2, $dst|$dst, $src2, %CL}", + "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}", [(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL), addr:$dst)]>, TB, OpSize; def SHRD16mrCL : I<0xAD, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), - "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, %CL}", + "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}", [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, CL), addr:$dst)]>, TB, OpSize; } @@ -3095,11 +3114,11 @@ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], - Uses = [ESP, EBX] in -def TLS_addr32 : I<0, Pseudo, (outs), (ins i32imm:$sym), - "leal\t${sym:mem}(,%ebx,1), %eax; " + Uses = [ESP] in +def TLS_addr32 : I<0, Pseudo, (outs), (ins lea32mem:$sym), + "leal\t$sym, %eax; " "call\t___tls_get_addr@PLT", - [(X86tlsaddr tglobaltlsaddr:$sym)]>, + [(X86tlsaddr tls32addr:$sym)]>, Requires<[In32BitMode]>; let AddedComplexity = 5 in diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index b44c7a6..5d6ef36 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3027,6 +3027,12 @@ def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)), (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; } +// vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but +// fall back to this for SSE1) +def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))), + (SHUFPSrri VR128:$src2, VR128:$src1, + (SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE1]>; + // Set lowest element and zero upper elements. let AddedComplexity = 15 in def : Pat<(v2f64 (movl immAllZerosV_bc, VR128:$src)), diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 6c0074e..a2f319f 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -662,6 +662,10 @@ void X86RegisterInfo::emitFrameMoves(MachineFunction &MF, TargetFrameInfo::StackGrowsUp ? TD->getPointerSize() : -TD->getPointerSize()); + MachineLocation FPDst(hasFP(MF) ? FramePtr : StackPtr); + MachineLocation FPSrc(MachineLocation::VirtualFP); + Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc)); + if (StackSize) { // Show update of SP. if (hasFP(MF)) { @@ -676,7 +680,7 @@ void X86RegisterInfo::emitFrameMoves(MachineFunction &MF, Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc)); } } else { - //FIXME: Verify & implement for FP + // FIXME: Verify & implement for FP MachineLocation SPDst(StackPtr); MachineLocation SPSrc(StackPtr, stackGrowth); Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc)); @@ -711,10 +715,6 @@ void X86RegisterInfo::emitFrameMoves(MachineFunction &MF, MachineLocation FPSrc(FramePtr); Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc)); } - - MachineLocation FPDst(hasFP(MF) ? FramePtr : StackPtr); - MachineLocation FPSrc(MachineLocation::VirtualFP); - Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc)); } @@ -729,8 +729,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { bool needsFrameMoves = (MMI && MMI->hasDebugInfo()) || !Fn->doesNotThrow() || UnwindTablesMandatory; - DebugLoc DL = (MBBI != MBB.end() ? MBBI->getDebugLoc() : - DebugLoc::getUnknownLoc()); + DebugLoc DL; // Prepare for frame info. unsigned FrameLabelId = 0; @@ -822,13 +821,6 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); } - unsigned ReadyLabelId = 0; - if (needsFrameMoves) { - // Mark effective beginning of when frame pointer is ready. - ReadyLabelId = MMI->NextLabelID(); - BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId); - } - // Skip the callee-saved push instructions. while (MBBI != MBB.end() && (MBBI->getOpcode() == X86::PUSH32r || @@ -891,8 +883,13 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII); } - if (needsFrameMoves) + if (needsFrameMoves) { + unsigned ReadyLabelId = 0; + // Mark effective beginning of when frame pointer is ready. + ReadyLabelId = MMI->NextLabelID(); + BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId); emitFrameMoves(MF, FrameLabelId, ReadyLabelId); + } } void X86RegisterInfo::emitEpilogue(MachineFunction &MF, diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 46476f2..694b0eb 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -173,7 +173,7 @@ public: bool isPICStyleGOT() const { return PICStyle == PICStyles::GOT; } bool isPICStyleStub() const { return PICStyle == PICStyles::Stub; } bool isPICStyleRIPRel() const { return PICStyle == PICStyles::RIPRel; } - bool isPICStyleWinPIC() const { return PICStyle == PICStyles:: WinPIC; } + bool isPICStyleWinPIC() const { return PICStyle == PICStyles::WinPIC; } /// getDarwinVers - Return the darwin version number, 8 = tiger, 9 = leopard. unsigned getDarwinVers() const { return DarwinVers; } diff --git a/lib/Target/X86/X86TargetAsmInfo.cpp b/lib/Target/X86/X86TargetAsmInfo.cpp index 5dda5f4..f49ca15 100644 --- a/lib/Target/X86/X86TargetAsmInfo.cpp +++ b/lib/Target/X86/X86TargetAsmInfo.cpp @@ -44,40 +44,25 @@ X86DarwinTargetAsmInfo::X86DarwinTargetAsmInfo(const X86TargetMachine &TM): AlignmentIsInBytes = false; TextAlignFillValue = 0x90; - GlobalPrefix = "_"; + + if (!is64Bit) Data64bitsDirective = 0; // we can't emit a 64-bit unit ZeroDirective = "\t.space\t"; // ".space N" emits N zeros. - PrivateGlobalPrefix = "L"; // Marker for constant pool idxs - LessPrivateGlobalPrefix = "l"; // Marker for some ObjC metadata - BSSSection = 0; // no BSS section. ZeroFillDirective = "\t.zerofill\t"; // Uses .zerofill if (TM.getRelocationModel() != Reloc::Static) ConstantPoolSection = "\t.const_data"; else ConstantPoolSection = "\t.const\n"; - JumpTableDataSection = "\t.const\n"; - CStringSection = "\t.cstring"; - // FIXME: Why don't always use this section? - if (is64Bit) { + // FIXME: Why don't we always use this section? + if (is64Bit) SixteenByteConstantSection = getUnnamedSection("\t.literal16\n", SectionFlags::Mergeable); - } LCOMMDirective = "\t.lcomm\t"; - SwitchToSectionDirective = "\t.section "; - StringConstantPrefix = "\1LC"; // Leopard and above support aligned common symbols. COMMDirectiveTakesAlignment = (Subtarget->getDarwinVers() >= 9); HasDotTypeDotSizeDirective = false; - HasSingleParameterDotFile = false; NonLocalEHFrameLabel = true; - if (TM.getRelocationModel() == Reloc::Static) { - StaticCtorsSection = ".constructor"; - StaticDtorsSection = ".destructor"; - } else { - StaticCtorsSection = ".mod_init_func"; - StaticDtorsSection = ".mod_term_func"; - } if (is64Bit) { PersonalityPrefix = ""; PersonalitySuffix = "+4@GOTPCREL"; @@ -85,40 +70,18 @@ X86DarwinTargetAsmInfo::X86DarwinTargetAsmInfo(const X86TargetMachine &TM): PersonalityPrefix = "L"; PersonalitySuffix = "$non_lazy_ptr"; } - NeedsIndirectEncoding = true; InlineAsmStart = "## InlineAsm Start"; InlineAsmEnd = "## InlineAsm End"; CommentString = "##"; SetDirective = "\t.set"; PCSymbol = "."; UsedDirective = "\t.no_dead_strip\t"; - WeakDefDirective = "\t.weak_definition "; - WeakRefDirective = "\t.weak_reference "; - HiddenDirective = "\t.private_extern "; ProtectedDirective = "\t.globl\t"; - // In non-PIC modes, emit a special label before jump tables so that the - // linker can perform more accurate dead code stripping. - if (TM.getRelocationModel() != Reloc::PIC_) { - // Emit a local label that is preserved until the linker runs. - JumpTableSpecialLabelPrefix = "l"; - } - SupportsDebugInformation = true; - NeedsSet = true; - DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug"; - DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug"; - DwarfLineSection = ".section __DWARF,__debug_line,regular,debug"; - DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug"; - DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug"; - DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug"; + DwarfDebugInlineSection = ".section __DWARF,__debug_inlined,regular,debug"; DwarfUsesInlineInfoSection = true; - DwarfStrSection = ".section __DWARF,__debug_str,regular,debug"; - DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug"; - DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug"; - DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug"; - DwarfMacInfoSection = ".section __DWARF,__debug_macinfo,regular,debug"; // Exceptions handling SupportsExceptionHandling = true; @@ -176,7 +139,7 @@ X86ELFTargetAsmInfo::X86ELFTargetAsmInfo(const X86TargetMachine &TM): DwarfLocSection = "\t.section\t.debug_loc,\"\",@progbits"; DwarfARangesSection = "\t.section\t.debug_aranges,\"\",@progbits"; DwarfRangesSection = "\t.section\t.debug_ranges,\"\",@progbits"; - DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits"; + DwarfMacroInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits"; // Exceptions handling SupportsExceptionHandling = true; @@ -259,7 +222,7 @@ X86COFFTargetAsmInfo::X86COFFTargetAsmInfo(const X86TargetMachine &TM): DwarfLocSection = "\t.section\t.debug_loc,\"dr\""; DwarfARangesSection = "\t.section\t.debug_aranges,\"dr\""; DwarfRangesSection = "\t.section\t.debug_ranges,\"dr\""; - DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"dr\""; + DwarfMacroInfoSection = "\t.section\t.debug_macinfo,\"dr\""; } unsigned @@ -340,8 +303,11 @@ X86WinTargetAsmInfo::X86WinTargetAsmInfo(const X86TargetMachine &TM): GlobalPrefix = "_"; CommentString = ";"; + InlineAsmStart = "; InlineAsm Start"; + InlineAsmEnd = "; InlineAsm End"; + PrivateGlobalPrefix = "$"; - AlignDirective = "\talign\t"; + AlignDirective = "\tALIGN\t"; ZeroDirective = "\tdb\t"; ZeroDirectiveSuffix = " dup(0)"; AsciiDirective = "\tdb\t"; @@ -353,13 +319,15 @@ X86WinTargetAsmInfo::X86WinTargetAsmInfo(const X86TargetMachine &TM): HasDotTypeDotSizeDirective = false; HasSingleParameterDotFile = false; + AlignmentIsInBytes = true; + TextSection = getUnnamedSection("_text", SectionFlags::Code); DataSection = getUnnamedSection("_data", SectionFlags::Writeable); JumpTableDataSection = NULL; SwitchToSectionDirective = ""; - TextSectionStartSuffix = "\tsegment 'CODE'"; - DataSectionStartSuffix = "\tsegment 'DATA'"; + TextSectionStartSuffix = "\tSEGMENT PARA 'CODE'"; + DataSectionStartSuffix = "\tSEGMENT PARA 'DATA'"; SectionEndDirectiveSuffix = "\tends\n"; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index dfb055f..53c46c3 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -36,6 +36,11 @@ X("x86", "32-bit X86: Pentium-Pro and above"); static RegisterTarget<X86_64TargetMachine> Y("x86-64", "64-bit X86: EM64T and AMD64"); +// Force static initialization when called from llvm/InitializeAllTargets.h +namespace llvm { + void InitializeX86Target() { } +} + // No assembler printer by default X86TargetMachine::AsmPrinterCtorFn X86TargetMachine::AsmPrinterCtor = 0; diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp index c9a6d8a..ed4c101 100644 --- a/lib/Target/XCore/XCoreAsmPrinter.cpp +++ b/lib/Target/XCore/XCoreAsmPrinter.cpp @@ -428,6 +428,7 @@ void XCoreAsmPrinter::printMachineInstruction(const MachineInstr *MI) { bool XCoreAsmPrinter::doInitialization(Module &M) { bool Result = AsmPrinter::doInitialization(M); + DW = getAnalysisIfAvailable<DwarfWriter>(); if (!FileDirective.empty()) { emitFileDirective(FileDirective); @@ -449,11 +450,6 @@ bool XCoreAsmPrinter::doInitialization(Module &M) { } } - // Emit initial debug information. - DW = getAnalysisIfAvailable<DwarfWriter>(); - assert(DW && "Dwarf Writer is not available"); - DW->BeginModule(&M, getAnalysisIfAvailable<MachineModuleInfo>(), - O, this, TAI); return Result; } diff --git a/lib/Target/XCore/XCoreTargetAsmInfo.cpp b/lib/Target/XCore/XCoreTargetAsmInfo.cpp index 5513762..59ad624 100644 --- a/lib/Target/XCore/XCoreTargetAsmInfo.cpp +++ b/lib/Target/XCore/XCoreTargetAsmInfo.cpp @@ -24,6 +24,7 @@ using namespace llvm; XCoreTargetAsmInfo::XCoreTargetAsmInfo(const XCoreTargetMachine &TM) : ELFTargetAsmInfo(TM), Subtarget(TM.getSubtargetImpl()) { + SupportsDebugInformation = true; TextSection = getUnnamedSection("\t.text", SectionFlags::Code); DataSection = getNamedSection("\t.dp.data", SectionFlags::Writeable | SectionFlags::Small); @@ -64,7 +65,7 @@ XCoreTargetAsmInfo::XCoreTargetAsmInfo(const XCoreTargetMachine &TM) DwarfLocSection = "\t.section\t.debug_loc,\"\",@progbits"; DwarfARangesSection = "\t.section\t.debug_aranges,\"\",@progbits"; DwarfRangesSection = "\t.section\t.debug_ranges,\"\",@progbits"; - DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits"; + DwarfMacroInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits"; } const Section* diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp index 5437c57..cfd3cd3 100644 --- a/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -31,6 +31,11 @@ namespace { RegisterTarget<XCoreTargetMachine> X("xcore", "XCore"); } +// Force static initialization when called from llvm/InitializeAllTargets.h +namespace llvm { + void InitializeXCoreTarget() { } +} + const TargetAsmInfo *XCoreTargetMachine::createTargetAsmInfo() const { return new XCoreTargetAsmInfo(*this); } diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt index 4b85e13..1438b48 100644 --- a/lib/Transforms/IPO/CMakeLists.txt +++ b/lib/Transforms/IPO/CMakeLists.txt @@ -16,6 +16,7 @@ add_llvm_library(LLVMipo LoopExtractor.cpp LowerSetJmp.cpp MergeFunctions.cpp + PartialInlining.cpp PartialSpecialization.cpp PruneEH.cpp RaiseAllocations.cpp diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 9a1b294..cbf3a1d 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -1667,11 +1667,14 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, // // NOTE: It doesn't make sense to promote non single-value types since we // are just replacing static memory to stack memory. + // + // If the global is in different address space, don't bring it to stack. if (!GS.HasMultipleAccessingFunctions && GS.AccessingFunction && !GS.HasNonInstructionUser && GV->getType()->getElementType()->isSingleValueType() && GS.AccessingFunction->getName() == "main" && - GS.AccessingFunction->hasExternalLinkage()) { + GS.AccessingFunction->hasExternalLinkage() && + GV->getType()->getAddressSpace() == 0) { DOUT << "LOCALIZING GLOBAL: " << *GV; Instruction* FirstI = GS.AccessingFunction->getEntryBlock().begin(); const Type* ElemTy = GV->getType()->getElementType(); diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index b3a25540..0b975ae 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -20,10 +20,13 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/FunctionUtils.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/CFG.h" using namespace llvm; +STATISTIC(NumPartialInlined, "Number of functions partially inlined"); + namespace { struct VISIBILITY_HIDDEN PartialInliner : public ModulePass { virtual void getAnalysisUsage(AnalysisUsage &AU) const { } @@ -132,6 +135,8 @@ Function* PartialInliner::unswitchFunction(Function* F) { duplicateFunction->replaceAllUsesWith(F); duplicateFunction->eraseFromParent(); + ++NumPartialInlined; + return extractedFunction; } diff --git a/lib/Transforms/IPO/RaiseAllocations.cpp b/lib/Transforms/IPO/RaiseAllocations.cpp index a81bbdb..8c97b5d 100644 --- a/lib/Transforms/IPO/RaiseAllocations.cpp +++ b/lib/Transforms/IPO/RaiseAllocations.cpp @@ -82,14 +82,14 @@ void RaiseAllocations::doInitialization(Module &M) { // Chck to see if we got the expected malloc if (TyWeHave != Malloc1Type) { - // Check to see if the prototype is wrong, giving us sbyte*(uint) * malloc + // Check to see if the prototype is wrong, giving us i8*(i32) * malloc // This handles the common declaration of: 'void *malloc(unsigned);' const FunctionType *Malloc2Type = FunctionType::get(PointerType::getUnqual(Type::Int8Ty), std::vector<const Type*>(1, Type::Int32Ty), false); if (TyWeHave != Malloc2Type) { // Check to see if the prototype is missing, giving us - // sbyte*(...) * malloc + // i8*(...) * malloc // This handles the common declaration of: 'void *malloc();' const FunctionType *Malloc3Type = FunctionType::get(PointerType::getUnqual(Type::Int8Ty), diff --git a/lib/Transforms/Instrumentation/RSProfiling.cpp b/lib/Transforms/Instrumentation/RSProfiling.cpp index c6cf4df..b110f4e 100644 --- a/lib/Transforms/Instrumentation/RSProfiling.cpp +++ b/lib/Transforms/Instrumentation/RSProfiling.cpp @@ -108,9 +108,9 @@ namespace { class VISIBILITY_HIDDEN GlobalRandomCounter : public Chooser { GlobalVariable* Counter; Value* ResetValue; - const Type* T; + const IntegerType* T; public: - GlobalRandomCounter(Module& M, const Type* t, uint64_t resetval); + GlobalRandomCounter(Module& M, const IntegerType* t, uint64_t resetval); virtual ~GlobalRandomCounter(); virtual void PrepFunction(Function* F); virtual void ProcessChoicePoint(BasicBlock* bb); @@ -121,9 +121,9 @@ namespace { GlobalVariable* Counter; Value* ResetValue; AllocaInst* AI; - const Type* T; + const IntegerType* T; public: - GlobalRandomCounterOpt(Module& M, const Type* t, uint64_t resetval); + GlobalRandomCounterOpt(Module& M, const IntegerType* t, uint64_t resetval); virtual ~GlobalRandomCounterOpt(); virtual void PrepFunction(Function* F); virtual void ProcessChoicePoint(BasicBlock* bb); @@ -193,7 +193,7 @@ static void getBackEdges(Function& F, T& BackEdges); // Methods of choosing when to profile /////////////////////////////////////// -GlobalRandomCounter::GlobalRandomCounter(Module& M, const Type* t, +GlobalRandomCounter::GlobalRandomCounter(Module& M, const IntegerType* t, uint64_t resetval) : T(t) { ConstantInt* Init = ConstantInt::get(T, resetval); ResetValue = Init; @@ -229,7 +229,7 @@ void GlobalRandomCounter::ProcessChoicePoint(BasicBlock* bb) { ReplacePhiPred(oldnext, bb, resetblock); } -GlobalRandomCounterOpt::GlobalRandomCounterOpt(Module& M, const Type* t, +GlobalRandomCounterOpt::GlobalRandomCounterOpt(Module& M, const IntegerType* t, uint64_t resetval) : AI(0), T(t) { ConstantInt* Init = ConstantInt::get(T, resetval); diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index 42978e7..e9bee64 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -401,8 +401,8 @@ static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum, /// OptimizeNoopCopyExpression - If the specified cast instruction is a noop -/// copy (e.g. it's casting from one pointer type to another, int->uint, or -/// int->sbyte on PPC), sink it into user blocks to reduce the number of virtual +/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC), +/// sink it into user blocks to reduce the number of virtual /// registers that must be created and coalesced. /// /// Return true if any changes are made. diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 673d38b..f4a9898 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -37,6 +37,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" #include <cstdio> using namespace llvm; @@ -48,7 +49,7 @@ STATISTIC(NumPRELoad, "Number of loads PRE'd"); static cl::opt<bool> EnablePRE("enable-pre", cl::init(true), cl::Hidden); -cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true)); +static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true)); //===----------------------------------------------------------------------===// // ValueTable Class @@ -952,8 +953,14 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // If we had a phi translation failure, we'll have a single entry which is a // clobber in the current block. Reject this early. - if (Deps.size() == 1 && Deps[0].second.isClobber()) + if (Deps.size() == 1 && Deps[0].second.isClobber()) { + DEBUG( + DOUT << "GVN: non-local load "; + WriteAsOperand(*DOUT.stream(), LI); + DOUT << " is clobbered by " << *Deps[0].second.getInst(); + ); return false; + } // Filter out useless results (non-locals, etc). Keep track of the blocks // where we have a value available in repl, also keep track of whether we see @@ -1069,6 +1076,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, BasicBlock *TmpBB = LoadBB; bool isSinglePred = false; + bool allSingleSucc = true; while (TmpBB->getSinglePredecessor()) { isSinglePred = true; TmpBB = TmpBB->getSinglePredecessor(); @@ -1078,6 +1086,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI, return false; if (Blockers.count(TmpBB)) return false; + if (TmpBB->getTerminator()->getNumSuccessors() != 1) + allSingleSucc = false; } assert(TmpBB); @@ -1154,7 +1164,20 @@ bool GVN::processNonLocalLoad(LoadInst *LI, << UnavailablePred->getName() << "': " << *LI); return false; } - + + // Make sure it is valid to move this load here. We have to watch out for: + // @1 = getelementptr (i8* p, ... + // test p and branch if == 0 + // load @1 + // It is valid to have the getelementptr before the test, even if p can be 0, + // as getelementptr only does address arithmetic. + // If we are not pushing the value through any multiple-successor blocks + // we do not have this case. Otherwise, check that the load is safe to + // put anywhere; this can be improved, but should be conservatively safe. + if (!allSingleSucc && + !isSafeToLoadUnconditionally(LoadPtr, UnavailablePred->getTerminator())) + return false; + // Okay, we can eliminate this load by inserting a reload in the predecessor // and using PHI construction to get the value in the other predecessors, do // it. diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 38b1198..326fb38 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -17,7 +17,10 @@ // which starts at zero and steps by one. // 2. The canonical induction variable is guaranteed to be the first PHI node // in the loop header block. -// 3. Any pointer arithmetic recurrences are raised to use array subscripts. +// 3. The canonical induction variable is guaranteed to be in a wide enough +// type so that IV expressions need not be (directly) zero-extended or +// sign-extended. +// 4. Any pointer arithmetic recurrences are raised to use array subscripts. // // If the trip count of a loop is computable, this pass also makes the following // changes: @@ -296,11 +299,11 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, // If this instruction is dead now, delete it. RecursivelyDeleteTriviallyDeadInstructions(Inst); - // See if this is a single-entry LCSSA PHI node. If so, we can (and - // have to) remove - // the PHI entirely. This is safe, because the NewVal won't be variant + // If we're inserting code into the exit block rather than the + // preheader, we can (and have to) remove the PHI entirely. + // This is safe, because the NewVal won't be variant // in the loop, so we don't need an LCSSA phi node anymore. - if (NumPreds == 1) { + if (ExitBlocks.size() == 1) { PN->replaceAllUsesWith(ExitVal); RecursivelyDeleteTriviallyDeadInstructions(PN); break; diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp index 5465e4a..5bd17e0 100644 --- a/lib/Transforms/Scalar/InstructionCombining.cpp +++ b/lib/Transforms/Scalar/InstructionCombining.cpp @@ -390,7 +390,7 @@ namespace { Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned); - bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty, + bool CanEvaluateInDifferentType(Value *V, const Type *Ty, unsigned CastOpc, int &NumCastsRemoved); unsigned GetOrEnforceKnownAlignment(Value *V, unsigned PrefAlign = 0); @@ -654,30 +654,12 @@ static unsigned getOpcode(const Value *V) { } /// AddOne - Add one to a ConstantInt -static ConstantInt *AddOne(ConstantInt *C) { - APInt Val(C->getValue()); - return ConstantInt::get(++Val); +static Constant *AddOne(Constant *C) { + return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1)); } /// SubOne - Subtract one from a ConstantInt -static ConstantInt *SubOne(ConstantInt *C) { - APInt Val(C->getValue()); - return ConstantInt::get(--Val); -} -/// Add - Add two ConstantInts together -static ConstantInt *Add(ConstantInt *C1, ConstantInt *C2) { - return ConstantInt::get(C1->getValue() + C2->getValue()); -} -/// And - Bitwise AND two ConstantInts together -static ConstantInt *And(ConstantInt *C1, ConstantInt *C2) { - return ConstantInt::get(C1->getValue() & C2->getValue()); -} -/// Subtract - Subtract one ConstantInt from another -static ConstantInt *Subtract(ConstantInt *C1, ConstantInt *C2) { - return ConstantInt::get(C1->getValue() - C2->getValue()); -} -/// Multiply - Multiply two ConstantInts together -static ConstantInt *Multiply(ConstantInt *C1, ConstantInt *C2) { - return ConstantInt::get(C1->getValue() * C2->getValue()); +static Constant *SubOne(ConstantInt *C) { + return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1)); } /// MultiplyOverflows - True if the multiply can not be expressed in an int /// this size. @@ -774,7 +756,7 @@ static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero, /// SimplifyDemandedBits knows about. See if the instruction has any /// properties that allow us to simplify its operands. bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) { - unsigned BitWidth = cast<IntegerType>(Inst.getType())->getBitWidth(); + unsigned BitWidth = Inst.getType()->getScalarSizeInBits(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); APInt DemandedMask(APInt::getAllOnesValue(BitWidth)); @@ -830,13 +812,13 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, const Type *VTy = V->getType(); assert((TD || !isa<PointerType>(VTy)) && "SimplifyDemandedBits needs to know bit widths!"); - assert((!TD || TD->getTypeSizeInBits(VTy) == BitWidth) && - (!isa<IntegerType>(VTy) || - VTy->getPrimitiveSizeInBits() == BitWidth) && + assert((!TD || TD->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) && + (!VTy->isIntOrIntVector() || + VTy->getScalarSizeInBits() == BitWidth) && KnownZero.getBitWidth() == BitWidth && KnownOne.getBitWidth() == BitWidth && - "Value *V, DemandedMask, KnownZero and KnownOne \ - must have same BitWidth"); + "Value *V, DemandedMask, KnownZero and KnownOne " + "must have same BitWidth"); if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { // We know all of the bits for a constant! KnownOne = CI->getValue() & DemandedMask; @@ -1089,7 +1071,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, RHSKnownZero &= LHSKnownZero; break; case Instruction::Trunc: { - unsigned truncBf = I->getOperand(0)->getType()->getPrimitiveSizeInBits(); + unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits(); DemandedMask.zext(truncBf); RHSKnownZero.zext(truncBf); RHSKnownOne.zext(truncBf); @@ -1112,7 +1094,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, break; case Instruction::ZExt: { // Compute the bits in the result that are not present in the input. - unsigned SrcBitWidth =I->getOperand(0)->getType()->getPrimitiveSizeInBits(); + unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits(); DemandedMask.trunc(SrcBitWidth); RHSKnownZero.trunc(SrcBitWidth); @@ -1130,7 +1112,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, } case Instruction::SExt: { // Compute the bits in the result that are not present in the input. - unsigned SrcBitWidth =I->getOperand(0)->getType()->getPrimitiveSizeInBits(); + unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits(); APInt InputDemandedBits = DemandedMask & APInt::getLowBitsSet(BitWidth, SrcBitWidth); @@ -1354,7 +1336,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) { APInt RA = Rem->getValue().abs(); if (RA.isPowerOf2()) { - if (DemandedMask.ule(RA)) // srem won't affect demanded bits + if (DemandedMask.ult(RA)) // srem won't affect demanded bits return I->getOperand(0); APInt LowBits = RA - 1; @@ -2087,7 +2069,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // See if SimplifyDemandedBits can simplify this. This handles stuff like // (X & 254)+1 -> (X&254)|1 - if (!isa<VectorType>(I.getType()) && SimplifyDemandedInstructionBits(I)) + if (SimplifyDemandedInstructionBits(I)) return &I; // zext(i1) - 1 -> select i1, 0, -1 @@ -2107,7 +2089,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { Value *XorLHS = 0; if (isa<ConstantInt>(RHSC) && match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) { - uint32_t TySizeBits = I.getType()->getPrimitiveSizeInBits(); + uint32_t TySizeBits = I.getType()->getScalarSizeInBits(); const APInt& RHSVal = cast<ConstantInt>(RHSC)->getValue(); uint32_t Size = TySizeBits / 2; @@ -2197,7 +2179,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // X*C1 + X*C2 --> X * (C1+C2) ConstantInt *C1; if (X == dyn_castFoldableMul(RHS, C1)) - return BinaryOperator::CreateMul(X, Add(C1, C2)); + return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2)); } // X + X*C --> X * (C+1) @@ -2262,7 +2244,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // (X & FF00) + xx00 -> (X+xx00) & FF00 if (LHS->hasOneUse() && match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) { - Constant *Anded = And(CRHS, C2); + Constant *Anded = ConstantExpr::getAnd(CRHS, C2); if (Anded == CRHS) { // See if all bits from the first bit set in the Add RHS up are included // in the mask. First, get the rightmost bit. @@ -2290,7 +2272,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { } // add (cast *A to intptrtype) B -> - // cast (GEP (cast *A to sbyte*) B) --> intptrtype + // cast (GEP (cast *A to i8*) B) --> intptrtype { CastInst *CI = dyn_cast<CastInst>(LHS); Value *Other = RHS; @@ -2299,7 +2281,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { Other = LHS; } if (CI && CI->getType()->isSized() && - (CI->getType()->getPrimitiveSizeInBits() == + (CI->getType()->getScalarSizeInBits() == TD->getIntPtrType()->getPrimitiveSizeInBits()) && isa<PointerType>(CI->getOperand(0)->getType())) { unsigned AS = @@ -2523,7 +2505,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { else if (ConstantInt *CI1 = dyn_cast<ConstantInt>(I.getOperand(0))) { if (ConstantInt *CI2 = dyn_cast<ConstantInt>(Op1I->getOperand(1))) // C1-(X+C2) --> (C1-C2)-X - return BinaryOperator::CreateSub(Subtract(CI1, CI2), + return BinaryOperator::CreateSub(ConstantExpr::getSub(CI1, CI2), Op1I->getOperand(0)); } } @@ -2564,7 +2546,8 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { // X - X*C --> X * (1-C) ConstantInt *C2 = 0; if (dyn_castFoldableMul(Op1I, C2) == Op0) { - Constant *CP1 = Subtract(ConstantInt::get(I.getType(), 1), C2); + Constant *CP1 = ConstantExpr::getSub(ConstantInt::get(I.getType(), 1), + C2); return BinaryOperator::CreateMul(Op0, CP1); } } @@ -2589,7 +2572,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { ConstantInt *C2; // X*C1 - X*C2 -> X * (C1-C2) if (X == dyn_castFoldableMul(Op1, C2)) - return BinaryOperator::CreateMul(X, Subtract(C1, C2)); + return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2)); } return 0; } @@ -2950,12 +2933,12 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { // (sdiv X, X) --> 1 (udiv X, X) --> 1 if (Op0 == Op1) { if (const VectorType *Ty = dyn_cast<VectorType>(I.getType())) { - ConstantInt *CI = ConstantInt::get(Ty->getElementType(), 1); + Constant *CI = ConstantInt::get(Ty->getElementType(), 1); std::vector<Constant*> Elts(Ty->getNumElements(), CI); return ReplaceInstUsesWith(I, ConstantVector::get(Elts)); } - ConstantInt *CI = ConstantInt::get(I.getType(), 1); + Constant *CI = ConstantInt::get(I.getType(), 1); return ReplaceInstUsesWith(I, CI); } @@ -2980,7 +2963,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); else return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0), - Multiply(RHS, LHSRHS)); + ConstantExpr::getMul(RHS, LHSRHS)); } if (!RHS->isZero()) { // avoid X udiv 0 @@ -3513,7 +3496,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, Value *X = Op->getOperand(0); Constant *Together = 0; if (!Op->isShift()) - Together = And(AndRHS, OpRHS); + Together = ConstantExpr::getAnd(AndRHS, OpRHS); switch (Op->getOpcode()) { case Instruction::Xor: @@ -3724,7 +3707,7 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS, switch (LHSI->getOpcode()) { default: return 0; case Instruction::And: - if (And(N, Mask) == Mask) { + if (ConstantExpr::getAnd(N, Mask) == Mask) { // If the AndRHS is a power of two minus one (0+1+), this is simple. if ((Mask->getValue().countLeadingZeros() + Mask->getValue().countPopulation()) == @@ -3748,7 +3731,7 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS, // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0 if ((Mask->getValue().countLeadingZeros() + Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth() - && And(N, Mask)->isZero()) + && ConstantExpr::getAnd(N, Mask)->isNullValue()) break; return 0; } @@ -3946,10 +3929,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. - if (!isa<VectorType>(I.getType())) { - if (SimplifyDemandedInstructionBits(I)) - return &I; - } else { + if (SimplifyDemandedInstructionBits(I)) + return &I; + if (isa<VectorType>(I.getType())) { if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) { if (CP->isAllOnesValue()) // X & <-1,-1> -> X return ReplaceInstUsesWith(I, I.getOperand(0)); @@ -3957,7 +3939,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { return ReplaceInstUsesWith(I, Op1); // X & <0,0> -> <0,0> } } - + if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) { const APInt& AndRHSMask = AndRHS->getValue(); APInt NotAndRHS(~AndRHSMask); @@ -4510,7 +4492,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, Instruction *Add = BinaryOperator::CreateAdd(Val, AddCST, Val->getName()+".off"); InsertNewInstBefore(Add, I); - AddCST = Subtract(AddOne(RHSCst), LHSCst); + AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst); return new ICmpInst(ICmpInst::ICMP_ULT, Add, AddCST); } break; // (X == 13 | X == 15) -> no change @@ -4653,18 +4635,17 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. - if (!isa<VectorType>(I.getType())) { - if (SimplifyDemandedInstructionBits(I)) - return &I; - } else if (isa<ConstantAggregateZero>(Op1)) { - return ReplaceInstUsesWith(I, Op0); // X | <0,0> -> X - } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) { - if (CP->isAllOnesValue()) // X | <-1,-1> -> <-1,-1> - return ReplaceInstUsesWith(I, I.getOperand(1)); + if (SimplifyDemandedInstructionBits(I)) + return &I; + if (isa<VectorType>(I.getType())) { + if (isa<ConstantAggregateZero>(Op1)) { + return ReplaceInstUsesWith(I, Op0); // X | <0,0> -> X + } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) { + if (CP->isAllOnesValue()) // X | <-1,-1> -> <-1,-1> + return ReplaceInstUsesWith(I, I.getOperand(1)); + } } - - // or X, -1 == -1 if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { ConstantInt *C1 = 0; Value *X = 0; @@ -4991,12 +4972,11 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. - if (!isa<VectorType>(I.getType())) { - if (SimplifyDemandedInstructionBits(I)) - return &I; - } else if (isa<ConstantAggregateZero>(Op1)) { - return ReplaceInstUsesWith(I, Op0); // X ^ <0,0> -> X - } + if (SimplifyDemandedInstructionBits(I)) + return &I; + if (isa<VectorType>(I.getType())) + if (isa<ConstantAggregateZero>(Op1)) + return ReplaceInstUsesWith(I, Op0); // X ^ <0,0> -> X // Is this a ~ operation? if (Value *NotOp = dyn_castNotVal(&I)) { @@ -5083,7 +5063,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS); // Anything in both C1 and C2 is known to be zero, remove it from // NewRHS. - Constant *CommonBits = And(Op0CI, RHS); + Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS); NewRHS = ConstantExpr::getAnd(NewRHS, ConstantExpr::getNot(CommonBits)); AddToWorkList(Op0I); @@ -5247,12 +5227,13 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { return Changed ? &I : 0; } -/// AddWithOverflow - Compute Result = In1+In2, returning true if the result -/// overflowed for this type. -static bool AddWithOverflow(ConstantInt *&Result, ConstantInt *In1, - ConstantInt *In2, bool IsSigned = false) { - Result = cast<ConstantInt>(Add(In1, In2)); +static ConstantInt *ExtractElement(Constant *V, Constant *Idx) { + return cast<ConstantInt>(ConstantExpr::getExtractElement(V, Idx)); +} +static bool HasAddOverflow(ConstantInt *Result, + ConstantInt *In1, ConstantInt *In2, + bool IsSigned) { if (IsSigned) if (In2->getValue().isNegative()) return Result->getValue().sgt(In1->getValue()); @@ -5262,12 +5243,32 @@ static bool AddWithOverflow(ConstantInt *&Result, ConstantInt *In1, return Result->getValue().ult(In1->getValue()); } -/// SubWithOverflow - Compute Result = In1-In2, returning true if the result +/// AddWithOverflow - Compute Result = In1+In2, returning true if the result /// overflowed for this type. -static bool SubWithOverflow(ConstantInt *&Result, ConstantInt *In1, - ConstantInt *In2, bool IsSigned = false) { - Result = cast<ConstantInt>(Subtract(In1, In2)); +static bool AddWithOverflow(Constant *&Result, Constant *In1, + Constant *In2, bool IsSigned = false) { + Result = ConstantExpr::getAdd(In1, In2); + + if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) { + for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { + Constant *Idx = ConstantInt::get(Type::Int32Ty, i); + if (HasAddOverflow(ExtractElement(Result, Idx), + ExtractElement(In1, Idx), + ExtractElement(In2, Idx), + IsSigned)) + return true; + } + return false; + } + + return HasAddOverflow(cast<ConstantInt>(Result), + cast<ConstantInt>(In1), cast<ConstantInt>(In2), + IsSigned); +} +static bool HasSubOverflow(ConstantInt *Result, + ConstantInt *In1, ConstantInt *In2, + bool IsSigned) { if (IsSigned) if (In2->getValue().isNegative()) return Result->getValue().slt(In1->getValue()); @@ -5277,6 +5278,29 @@ static bool SubWithOverflow(ConstantInt *&Result, ConstantInt *In1, return Result->getValue().ugt(In1->getValue()); } +/// SubWithOverflow - Compute Result = In1-In2, returning true if the result +/// overflowed for this type. +static bool SubWithOverflow(Constant *&Result, Constant *In1, + Constant *In2, bool IsSigned = false) { + Result = ConstantExpr::getSub(In1, In2); + + if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) { + for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { + Constant *Idx = ConstantInt::get(Type::Int32Ty, i); + if (HasSubOverflow(ExtractElement(Result, Idx), + ExtractElement(In1, Idx), + ExtractElement(In2, Idx), + IsSigned)) + return true; + } + return false; + } + + return HasSubOverflow(cast<ConstantInt>(Result), + cast<ConstantInt>(In1), cast<ConstantInt>(In2), + IsSigned); +} + /// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the /// code necessary to compute the offset from the base pointer (without adding /// in the base pointer). Return the result as a signed integer of intptr size. @@ -5589,7 +5613,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, // Check to see that the input is converted from an integer type that is small // enough that preserves all bits. TODO: check here for "known" sign bits. // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e. - unsigned InputSize = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits(); + unsigned InputSize = LHSI->getOperand(0)->getType()->getScalarSizeInBits(); // If this is a uitofp instruction, we need an extra bit to hold the sign. bool LHSUnsigned = isa<UIToFPInst>(LHSI); @@ -5644,7 +5668,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, // See if the FP constant is too large for the integer. For example, // comparing an i8 to 300.0. - unsigned IntWidth = IntTy->getPrimitiveSizeInBits(); + unsigned IntWidth = IntTy->getScalarSizeInBits(); if (!LHSUnsigned) { // If the RHS value is > SignedMax, fold the comparison. This handles +INF @@ -5943,9 +5967,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { unsigned BitWidth = 0; if (TD) - BitWidth = TD->getTypeSizeInBits(Ty); - else if (isa<IntegerType>(Ty)) - BitWidth = Ty->getPrimitiveSizeInBits(); + BitWidth = TD->getTypeSizeInBits(Ty->getScalarType()); + else if (Ty->isIntOrIntVector()) + BitWidth = Ty->getScalarSizeInBits(); bool isSignBit = false; @@ -6459,7 +6483,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and // C2 (CI). By solving for X we can turn this into a range check // instead of computing a divide. - ConstantInt *Prod = Multiply(CmpRHS, DivRHS); + Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS); // Determine if the product overflows by seeing if the product is // not equal to the divide. Make sure we do the same kind of divide @@ -6478,7 +6502,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, // overflow variable is set to 0 if it's corresponding bound variable is valid // -1 if overflowed off the bottom end, or +1 if overflowed off the top end. int LoOverflow = 0, HiOverflow = 0; - ConstantInt *LoBound = 0, *HiBound = 0; + Constant *LoBound = 0, *HiBound = 0; if (!DivIsSigned) { // udiv // e.g. X/5 op 3 --> [15, 20) @@ -6966,7 +6990,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, if (ConstantInt *BOp1C = dyn_cast<ConstantInt>(BO->getOperand(1))) { if (BO->hasOneUse()) return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), - Subtract(RHS, BOp1C)); + ConstantExpr::getSub(RHS, BOp1C)); } else if (RHSV == 0) { // Replace ((add A, B) != 0) with (A != -B) if A or B is // efficiently invertible, or if the add has just this one use. @@ -7133,10 +7157,10 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { if (Res2 == CI) { // Make sure that sign of the Cmp and the sign of the Cast are the same. // For example, we might have: - // %A = sext short %X to uint - // %B = icmp ugt uint %A, 1330 + // %A = sext i16 %X to i32 + // %B = icmp ugt i32 %A, 1330 // It is incorrect to transform this into - // %B = icmp ugt short %X, 1330 + // %B = icmp ugt i16 %X, 1330 // because %A may have negative value. // // However, we allow this when the compare is EQ/NE, because they are @@ -7210,18 +7234,16 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) { if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0)) if (CSI->isAllOnesValue()) return ReplaceInstUsesWith(I, CSI); - + // See if we can turn a signed shr into an unsigned shr. - if (!isa<VectorType>(I.getType())) { - if (MaskedValueIsZero(Op0, - APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()))) - return BinaryOperator::CreateLShr(Op0, I.getOperand(1)); - - // Arithmetic shifting an all-sign-bit value is a no-op. - unsigned NumSignBits = ComputeNumSignBits(Op0); - if (NumSignBits == Op0->getType()->getPrimitiveSizeInBits()) - return ReplaceInstUsesWith(I, Op0); - } + if (MaskedValueIsZero(Op0, + APInt::getSignBit(I.getType()->getScalarSizeInBits()))) + return BinaryOperator::CreateLShr(Op0, I.getOperand(1)); + + // Arithmetic shifting an all-sign-bit value is a no-op. + unsigned NumSignBits = ComputeNumSignBits(Op0); + if (NumSignBits == Op0->getType()->getScalarSizeInBits()) + return ReplaceInstUsesWith(I, Op0); return 0; } @@ -7250,7 +7272,7 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { } // See if we can fold away this shift. - if (!isa<VectorType>(I.getType()) && SimplifyDemandedInstructionBits(I)) + if (SimplifyDemandedInstructionBits(I)) return &I; // Try to fold constant and into select arguments. @@ -7271,10 +7293,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. - uint32_t TypeBits = Op0->getType()->getPrimitiveSizeInBits(); + uint32_t TypeBits = Op0->getType()->getScalarSizeInBits(); - // shl uint X, 32 = 0 and shr ubyte Y, 9 = 0, ... just don't eliminate shr - // of a signed value. + // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate + // a signed shift. // if (Op1->uge(TypeBits)) { if (I.getOpcode() != Instruction::AShr) @@ -7320,8 +7342,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // part of the register be zeros. Emulate this by inserting an AND to // clear the top bits as needed. This 'and' will usually be zapped by // other xforms later if dead. - unsigned SrcSize = TrOp->getType()->getPrimitiveSizeInBits(); - unsigned DstSize = TI->getType()->getPrimitiveSizeInBits(); + unsigned SrcSize = TrOp->getType()->getScalarSizeInBits(); + unsigned DstSize = TI->getType()->getScalarSizeInBits(); APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize)); // The mask we constructed says what the trunc would do if occurring @@ -7729,7 +7751,8 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, // If the allocation size is constant, form a constant mul expression Amt = ConstantInt::get(Type::Int32Ty, Scale); if (isa<ConstantInt>(NumElements)) - Amt = Multiply(cast<ConstantInt>(NumElements), cast<ConstantInt>(Amt)); + Amt = ConstantExpr::getMul(cast<ConstantInt>(NumElements), + cast<ConstantInt>(Amt)); // otherwise multiply the amount and the number of elements else { Instruction *Tmp = BinaryOperator::CreateMul(Amt, NumElements, "tmp"); @@ -7788,17 +7811,17 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, /// If CastOpc is a sext or zext, we are asking if the low bits of the value can /// bit computed in a larger type, which is then and'd or sext_in_reg'd to get /// the final result. -bool InstCombiner::CanEvaluateInDifferentType(Value *V, const IntegerType *Ty, +bool InstCombiner::CanEvaluateInDifferentType(Value *V, const Type *Ty, unsigned CastOpc, int &NumCastsRemoved){ // We can always evaluate constants in another type. - if (isa<ConstantInt>(V)) + if (isa<Constant>(V)) return true; Instruction *I = dyn_cast<Instruction>(V); if (!I) return false; - const IntegerType *OrigTy = cast<IntegerType>(V->getType()); + const Type *OrigTy = V->getType(); // If this is an extension or truncate, we can often eliminate it. if (isa<TruncInst>(I) || isa<ZExtInst>(I) || isa<SExtInst>(I)) { @@ -7836,8 +7859,8 @@ bool InstCombiner::CanEvaluateInDifferentType(Value *V, const IntegerType *Ty, // If we are truncating the result of this SHL, and if it's a shift of a // constant amount, we can always perform a SHL in a smaller type. if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { - uint32_t BitWidth = Ty->getBitWidth(); - if (BitWidth < OrigTy->getBitWidth() && + uint32_t BitWidth = Ty->getScalarSizeInBits(); + if (BitWidth < OrigTy->getScalarSizeInBits() && CI->getLimitedValue(BitWidth) < BitWidth) return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc, NumCastsRemoved); @@ -7848,8 +7871,8 @@ bool InstCombiner::CanEvaluateInDifferentType(Value *V, const IntegerType *Ty, // lshr iff we know that the bits we would otherwise be shifting in are // already zeros. if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { - uint32_t OrigBitWidth = OrigTy->getBitWidth(); - uint32_t BitWidth = Ty->getBitWidth(); + uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); + uint32_t BitWidth = Ty->getScalarSizeInBits(); if (BitWidth < OrigBitWidth && MaskedValueIsZero(I->getOperand(0), APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) && @@ -8131,8 +8154,8 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { Value *Src = CI.getOperand(0); const Type *SrcTy = Src->getType(); const Type *DestTy = CI.getType(); - uint32_t SrcBitSize = SrcTy->getPrimitiveSizeInBits(); - uint32_t DestBitSize = DestTy->getPrimitiveSizeInBits(); + uint32_t SrcBitSize = SrcTy->getScalarSizeInBits(); + uint32_t DestBitSize = DestTy->getScalarSizeInBits(); // See if we can simplify any instructions used by the LHS whose sole // purpose is to compute bits we don't care about. @@ -8151,8 +8174,9 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { // Only do this if the dest type is a simple type, don't convert the // expression tree to something weird like i93 unless the source is also // strange. - (isSafeIntegerType(DestTy) || !isSafeIntegerType(SrcI->getType())) && - CanEvaluateInDifferentType(SrcI, cast<IntegerType>(DestTy), + (isSafeIntegerType(DestTy->getScalarType()) || + !isSafeIntegerType(SrcI->getType()->getScalarType())) && + CanEvaluateInDifferentType(SrcI, DestTy, CI.getOpcode(), NumCastsRemoved)) { // If this cast is a truncate, evaluting in a different type always // eliminates the cast, so it is always a win. If this is a zero-extension, @@ -8350,17 +8374,18 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { Value *Src = CI.getOperand(0); const Type *Ty = CI.getType(); - uint32_t DestBitWidth = Ty->getPrimitiveSizeInBits(); - uint32_t SrcBitWidth = cast<IntegerType>(Src->getType())->getBitWidth(); + uint32_t DestBitWidth = Ty->getScalarSizeInBits(); + uint32_t SrcBitWidth = Src->getType()->getScalarSizeInBits(); // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0) - if (DestBitWidth == 1) { + if (DestBitWidth == 1 && + isa<VectorType>(Ty) == isa<VectorType>(Src->getType())) { Constant *One = ConstantInt::get(Src->getType(), 1); Src = InsertNewInstBefore(BinaryOperator::CreateAnd(Src, One, "tmp"), CI); Value *Zero = Constant::getNullValue(Src->getType()); return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero); } - + // Optimize trunc(lshr(), c) to pull the shift through the truncate. ConstantInt *ShAmtV = 0; Value *ShiftOp = 0; @@ -8403,7 +8428,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, Value *In = ICI->getOperand(0); Value *Sh = ConstantInt::get(In->getType(), - In->getType()->getPrimitiveSizeInBits()-1); + In->getType()->getScalarSizeInBits()-1); In = InsertNewInstBefore(BinaryOperator::CreateLShr(In, Sh, In->getName()+".lobit"), CI); @@ -8494,28 +8519,30 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { // Get the sizes of the types involved. We know that the intermediate type // will be smaller than A or C, but don't know the relation between A and C. Value *A = CSrc->getOperand(0); - unsigned SrcSize = A->getType()->getPrimitiveSizeInBits(); - unsigned MidSize = CSrc->getType()->getPrimitiveSizeInBits(); - unsigned DstSize = CI.getType()->getPrimitiveSizeInBits(); + unsigned SrcSize = A->getType()->getScalarSizeInBits(); + unsigned MidSize = CSrc->getType()->getScalarSizeInBits(); + unsigned DstSize = CI.getType()->getScalarSizeInBits(); // If we're actually extending zero bits, then if // SrcSize < DstSize: zext(a & mask) // SrcSize == DstSize: a & mask // SrcSize > DstSize: trunc(a) & mask if (SrcSize < DstSize) { APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); - Constant *AndConst = ConstantInt::get(AndValue); + Constant *AndConst = ConstantInt::get(A->getType(), AndValue); Instruction *And = BinaryOperator::CreateAnd(A, AndConst, CSrc->getName()+".mask"); InsertNewInstBefore(And, CI); return new ZExtInst(And, CI.getType()); } else if (SrcSize == DstSize) { APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); - return BinaryOperator::CreateAnd(A, ConstantInt::get(AndValue)); + return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(), + AndValue)); } else if (SrcSize > DstSize) { Instruction *Trunc = new TruncInst(A, CI.getType(), "tmp"); InsertNewInstBefore(Trunc, CI); APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize)); - return BinaryOperator::CreateAnd(Trunc, ConstantInt::get(AndValue)); + return BinaryOperator::CreateAnd(Trunc, ConstantInt::get(Trunc->getType(), + AndValue)); } } @@ -8537,6 +8564,33 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { } } + // zext(trunc(t) & C) -> (t & zext(C)). + if (SrcI && SrcI->getOpcode() == Instruction::And && SrcI->hasOneUse()) + if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1))) + if (TruncInst *TI = dyn_cast<TruncInst>(SrcI->getOperand(0))) { + Value *TI0 = TI->getOperand(0); + if (TI0->getType() == CI.getType()) + return + BinaryOperator::CreateAnd(TI0, + ConstantExpr::getZExt(C, CI.getType())); + } + + // zext((trunc(t) & C) ^ C) -> ((t & zext(C)) ^ zext(C)). + if (SrcI && SrcI->getOpcode() == Instruction::Xor && SrcI->hasOneUse()) + if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1))) + if (BinaryOperator *And = dyn_cast<BinaryOperator>(SrcI->getOperand(0))) + if (And->getOpcode() == Instruction::And && And->hasOneUse() && + And->getOperand(1) == C) + if (TruncInst *TI = dyn_cast<TruncInst>(And->getOperand(0))) { + Value *TI0 = TI->getOperand(0); + if (TI0->getType() == CI.getType()) { + Constant *ZC = ConstantExpr::getZExt(C, CI.getType()); + Instruction *NewAnd = BinaryOperator::CreateAnd(TI0, ZC, "tmp"); + InsertNewInstBefore(NewAnd, *And); + return BinaryOperator::CreateXor(NewAnd, ZC); + } + } + return 0; } @@ -8556,9 +8610,9 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { // eliminate the trunc/sext pair. if (getOpcode(Src) == Instruction::Trunc) { Value *Op = cast<User>(Src)->getOperand(0); - unsigned OpBits = cast<IntegerType>(Op->getType())->getBitWidth(); - unsigned MidBits = cast<IntegerType>(Src->getType())->getBitWidth(); - unsigned DestBits = cast<IntegerType>(CI.getType())->getBitWidth(); + unsigned OpBits = Op->getType()->getScalarSizeInBits(); + unsigned MidBits = Src->getType()->getScalarSizeInBits(); + unsigned DestBits = CI.getType()->getScalarSizeInBits(); unsigned NumSignBits = ComputeNumSignBits(Op); if (OpBits == DestBits) { @@ -8599,8 +8653,8 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { BA == CA && isa<TruncInst>(A)) { Value *I = cast<TruncInst>(A)->getOperand(0); if (I->getType() == CI.getType()) { - unsigned MidSize = Src->getType()->getPrimitiveSizeInBits(); - unsigned SrcDstSize = CI.getType()->getPrimitiveSizeInBits(); + unsigned MidSize = Src->getType()->getScalarSizeInBits(); + unsigned SrcDstSize = CI.getType()->getScalarSizeInBits(); unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize; Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt); I = InsertNewInstBefore(BinaryOperator::CreateShl(I, ShAmtV, @@ -8671,11 +8725,11 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1)); if (LHSTrunc->getType() != SrcTy && RHSTrunc->getType() != SrcTy) { - unsigned DstSize = CI.getType()->getPrimitiveSizeInBits(); + unsigned DstSize = CI.getType()->getScalarSizeInBits(); // If the source types were both smaller than the destination type of // the cast, do this xform. - if (LHSTrunc->getType()->getPrimitiveSizeInBits() <= DstSize && - RHSTrunc->getType()->getPrimitiveSizeInBits() <= DstSize) { + if (LHSTrunc->getType()->getScalarSizeInBits() <= DstSize && + RHSTrunc->getType()->getScalarSizeInBits() <= DstSize) { LHSTrunc = InsertCastBefore(Instruction::FPExt, LHSTrunc, CI.getType(), CI); RHSTrunc = InsertCastBefore(Instruction::FPExt, RHSTrunc, @@ -8706,7 +8760,7 @@ Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) { // 'X' value would cause an undefined result for the fptoui. if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) && OpI->getOperand(0)->getType() == FI.getType() && - (int)FI.getType()->getPrimitiveSizeInBits() < /*extra bit for sign */ + (int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */ OpI->getType()->getFPMantissaWidth()) return ReplaceInstUsesWith(FI, OpI->getOperand(0)); @@ -8726,7 +8780,7 @@ Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) { // 'X' value would cause an undefined result for the fptoui. if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) && OpI->getOperand(0)->getType() == FI.getType() && - (int)FI.getType()->getPrimitiveSizeInBits() <= + (int)FI.getType()->getScalarSizeInBits() <= OpI->getType()->getFPMantissaWidth()) return ReplaceInstUsesWith(FI, OpI->getOperand(0)); @@ -8747,7 +8801,7 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { // trunc to be exposed to other transforms. Don't do this for extending // ptrtoint's, because we don't know if the target sign or zero extends its // pointers. - if (CI.getType()->getPrimitiveSizeInBits() < TD->getPointerSizeInBits()) { + if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) { Value *P = InsertNewInstBefore(new PtrToIntInst(CI.getOperand(0), TD->getIntPtrType(), "tmp"), CI); @@ -8763,7 +8817,7 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { // allows the trunc to be exposed to other transforms. Don't do this for // extending inttoptr's, because we don't know if the target sign or zero // extends to pointers. - if (CI.getOperand(0)->getType()->getPrimitiveSizeInBits() > + if (CI.getOperand(0)->getType()->getScalarSizeInBits() > TD->getPointerSizeInBits()) { Value *P = InsertNewInstBefore(new TruncInst(CI.getOperand(0), TD->getIntPtrType(), @@ -9194,7 +9248,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, (Pred == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) { Value *In = ICI->getOperand(0); Value *Sh = ConstantInt::get(In->getType(), - In->getType()->getPrimitiveSizeInBits()-1); + In->getType()->getScalarSizeInBits()-1); In = InsertNewInstBefore(BinaryOperator::CreateAShr(In, Sh, In->getName()+".lobit"), *ICI); @@ -9316,7 +9370,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // The comparison constant and the result are not neccessarily the // same width. Make an all-ones value by inserting a AShr. Value *X = IC->getOperand(0); - uint32_t Bits = X->getType()->getPrimitiveSizeInBits(); + uint32_t Bits = X->getType()->getScalarSizeInBits(); Constant *ShAmt = ConstantInt::get(X->getType(), Bits-1); Instruction *SRA = BinaryOperator::Create(Instruction::AShr, X, ShAmt, "ones"); @@ -10850,8 +10904,8 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) { static Value *InsertCastToIntPtrTy(Value *V, const Type *DTy, Instruction *InsertPoint, InstCombiner *IC) { - unsigned PtrSize = DTy->getPrimitiveSizeInBits(); - unsigned VTySize = V->getType()->getPrimitiveSizeInBits(); + unsigned PtrSize = DTy->getScalarSizeInBits(); + unsigned VTySize = V->getType()->getScalarSizeInBits(); // We must cast correctly to the pointer type. Ensure that we // sign extend the integer value if it is smaller as this is // used for address computation. @@ -10892,7 +10946,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { const Type *SrcTy = CI->getOperand(0)->getType(); // We can eliminate a cast from i32 to i64 iff the target // is a 32-bit pointer target. - if (SrcTy->getPrimitiveSizeInBits() >= TD->getPointerSizeInBits()) { + if (SrcTy->getScalarSizeInBits() >= TD->getPointerSizeInBits()) { MadeChange = true; *i = CI->getOperand(0); } @@ -11105,7 +11159,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { ConstantInt *Scale = 0; if (ArrayEltSize == 1) { NewIdx = GEP.getOperand(1); - Scale = ConstantInt::get(NewIdx->getType(), 1); + Scale = ConstantInt::get(cast<IntegerType>(NewIdx->getType()), 1); } else if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP.getOperand(1))) { NewIdx = ConstantInt::get(CI->getType(), 1); Scale = CI; @@ -11114,7 +11168,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { isa<ConstantInt>(Inst->getOperand(1))) { ConstantInt *ShAmt = cast<ConstantInt>(Inst->getOperand(1)); uint32_t ShAmtVal = ShAmt->getLimitedValue(64); - Scale = ConstantInt::get(Inst->getType(), 1ULL << ShAmtVal); + Scale = ConstantInt::get(cast<IntegerType>(Inst->getType()), + 1ULL << ShAmtVal); NewIdx = Inst->getOperand(0); } else if (Inst->getOpcode() == Instruction::Mul && isa<ConstantInt>(Inst->getOperand(1))) { @@ -11390,45 +11445,6 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, return 0; } -/// isSafeToLoadUnconditionally - Return true if we know that executing a load -/// from this value cannot trap. If it is not obviously safe to load from the -/// specified pointer, we do a quick local scan of the basic block containing -/// ScanFrom, to determine if the address is already accessed. -static bool isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom) { - // If it is an alloca it is always safe to load from. - if (isa<AllocaInst>(V)) return true; - - // If it is a global variable it is mostly safe to load from. - if (const GlobalValue *GV = dyn_cast<GlobalVariable>(V)) - // Don't try to evaluate aliases. External weak GV can be null. - return !isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage(); - - // Otherwise, be a little bit agressive by scanning the local block where we - // want to check to see if the pointer is already being loaded or stored - // from/to. If so, the previous load or store would have already trapped, - // so there is no harm doing an extra load (also, CSE will later eliminate - // the load entirely). - BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin(); - - while (BBI != E) { - --BBI; - - // If we see a free or a call (which might do a free) the pointer could be - // marked invalid. - if (isa<FreeInst>(BBI) || - (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI))) - return false; - - if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { - if (LI->getOperand(0) == V) return true; - } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) { - if (SI->getOperand(1) == V) return true; - } - - } - return false; -} - Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { Value *Op = LI.getOperand(0); diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index c0ca2df..5a70fc3 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -76,7 +76,7 @@ namespace { bool ProcessBlock(BasicBlock *BB); bool ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, BasicBlock *SuccBB, unsigned JumpThreadCost); - BasicBlock *FactorCommonPHIPreds(PHINode *PN, Constant *CstVal); + BasicBlock *FactorCommonPHIPreds(PHINode *PN, Value *Val); bool ProcessBranchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB); bool ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB); @@ -163,10 +163,10 @@ void JumpThreading::FindLoopHeaders(Function &F) { /// This is important for things like "phi i1 [true, true, false, true, x]" /// where we only need to clone the block for the true blocks once. /// -BasicBlock *JumpThreading::FactorCommonPHIPreds(PHINode *PN, Constant *CstVal) { +BasicBlock *JumpThreading::FactorCommonPHIPreds(PHINode *PN, Value *Val) { SmallVector<BasicBlock*, 16> CommonPreds; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (PN->getIncomingValue(i) == CstVal) + if (PN->getIncomingValue(i) == Val) CommonPreds.push_back(PN->getIncomingBlock(i)); if (CommonPreds.size() == 1) @@ -324,10 +324,6 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { } } - // If there is only a single predecessor of this block, nothing to fold. - if (BB->getSinglePredecessor()) - return false; - // All the rest of our checks depend on the condition being an instruction. if (CondInst == 0) return false; @@ -346,13 +342,36 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { CondInst->getOpcode() == Instruction::And)) return true; - // If we have "br (phi != 42)" and the phi node has any constant values as - // operands, we can thread through this block. - if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) - if (isa<PHINode>(CondCmp->getOperand(0)) && - isa<Constant>(CondCmp->getOperand(1)) && - ProcessBranchOnCompare(CondCmp, BB)) - return true; + if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) { + if (isa<PHINode>(CondCmp->getOperand(0))) { + // If we have "br (phi != 42)" and the phi node has any constant values + // as operands, we can thread through this block. + // + // If we have "br (cmp phi, x)" and the phi node contains x such that the + // comparison uniquely identifies the branch target, we can thread + // through this block. + + if (ProcessBranchOnCompare(CondCmp, BB)) + return true; + } + + // If we have a comparison, loop over the predecessors to see if there is + // a condition with the same value. + pred_iterator PI = pred_begin(BB), E = pred_end(BB); + for (; PI != E; ++PI) + if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) + if (PBI->isConditional() && *PI != BB) { + if (CmpInst *CI = dyn_cast<CmpInst>(PBI->getCondition())) { + if (CI->getOperand(0) == CondCmp->getOperand(0) && + CI->getOperand(1) == CondCmp->getOperand(1) && + CI->getPredicate() == CondCmp->getPredicate()) { + // TODO: Could handle things like (x != 4) --> (x == 17) + if (ProcessBranchOnDuplicateCond(*PI, BB)) + return true; + } + } + } + } // Check for some cases that are worth simplifying. Right now we want to look // for loads that are used by a switch or by the condition for the branch. If @@ -770,12 +789,30 @@ bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB, return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost); } +/// GetResultOfComparison - Given an icmp/fcmp predicate and the left and right +/// hand sides of the compare instruction, try to determine the result. If the +/// result can not be determined, a null pointer is returned. +static Constant *GetResultOfComparison(CmpInst::Predicate pred, + Value *LHS, Value *RHS) { + if (Constant *CLHS = dyn_cast<Constant>(LHS)) + if (Constant *CRHS = dyn_cast<Constant>(RHS)) + return ConstantExpr::getCompare(pred, CLHS, CRHS); + + if (LHS == RHS) + if (isa<IntegerType>(LHS->getType()) || isa<PointerType>(LHS->getType())) + return ICmpInst::isTrueWhenEqual(pred) ? + ConstantInt::getTrue() : ConstantInt::getFalse(); + + return 0; +} + /// ProcessBranchOnCompare - We found a branch on a comparison between a phi -/// node and a constant. If the PHI node contains any constants as inputs, we -/// can fold the compare for that edge and thread through it. +/// node and a value. If we can identify when the comparison is true between +/// the phi inputs and the value, we can fold the compare for that edge and +/// thread through it. bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) { PHINode *PN = cast<PHINode>(Cmp->getOperand(0)); - Constant *RHS = cast<Constant>(Cmp->getOperand(1)); + Value *RHS = Cmp->getOperand(1); // If the phi isn't in the current block, an incoming edge to this block // doesn't control the destination. @@ -784,18 +821,17 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) { // We can do this simplification if any comparisons fold to true or false. // See if any do. - Constant *PredCst = 0; + Value *PredVal = 0; bool TrueDirection = false; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - PredCst = dyn_cast<Constant>(PN->getIncomingValue(i)); - if (PredCst == 0) continue; + PredVal = PN->getIncomingValue(i); + + Constant *Res = GetResultOfComparison(Cmp->getPredicate(), PredVal, RHS); + if (!Res) { + PredVal = 0; + continue; + } - Constant *Res; - if (ICmpInst *ICI = dyn_cast<ICmpInst>(Cmp)) - Res = ConstantExpr::getICmp(ICI->getPredicate(), PredCst, RHS); - else - Res = ConstantExpr::getFCmp(cast<FCmpInst>(Cmp)->getPredicate(), - PredCst, RHS); // If this folded to a constant expr, we can't do anything. if (ConstantInt *ResC = dyn_cast<ConstantInt>(Res)) { TrueDirection = ResC->getZExtValue(); @@ -808,11 +844,11 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) { } // Otherwise, we can't fold this input. - PredCst = 0; + PredVal = 0; } // If no match, bail out. - if (PredCst == 0) + if (PredVal == 0) return false; // See if the cost of duplicating this block is low enough. @@ -825,7 +861,7 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) { // If so, we can actually do this threading. Merge any common predecessors // that will act the same. - BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst); + BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredVal); // Next, get our successor. BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(!TrueDirection); diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp index 9c78596..6f7a7f8 100644 --- a/lib/Transforms/Scalar/LoopIndexSplit.cpp +++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp @@ -290,13 +290,13 @@ static bool isUsedOutsideLoop(Value *V, Loop *L) { // Return V+1 static Value *getPlusOne(Value *V, bool Sign, Instruction *InsertPt) { - ConstantInt *One = ConstantInt::get(V->getType(), 1, Sign); + Constant *One = ConstantInt::get(V->getType(), 1, Sign); return BinaryOperator::CreateAdd(V, One, "lsp", InsertPt); } // Return V-1 static Value *getMinusOne(Value *V, bool Sign, Instruction *InsertPt) { - ConstantInt *One = ConstantInt::get(V->getType(), 1, Sign); + Constant *One = ConstantInt::get(V->getType(), 1, Sign); return BinaryOperator::CreateSub(V, One, "lsp", InsertPt); } diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 944f409..7579748 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -143,10 +143,10 @@ namespace { /// inside the loop then try to eliminate the cast opeation. void OptimizeShadowIV(Loop *L); - /// OptimizeSMax - Rewrite the loop's terminating condition - /// if it uses an smax computation. - ICmpInst *OptimizeSMax(Loop *L, ICmpInst *Cond, - IVStrideUse* &CondUse); + /// OptimizeMax - Rewrite the loop's terminating condition + /// if it uses a max computation. + ICmpInst *OptimizeMax(Loop *L, ICmpInst *Cond, + IVStrideUse* &CondUse); bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse, const SCEVHandle *&CondStride); @@ -336,13 +336,6 @@ namespace { /// EmittedBase. Value *OperandValToReplace; - /// isSigned - The stride (and thus also the Base) of this use may be in - /// a narrower type than the use itself (OperandValToReplace->getType()). - /// When this is the case, the isSigned field indicates whether the - /// IV expression should be signed-extended instead of zero-extended to - /// fit the type of the use. - bool isSigned; - /// Imm - The immediate value that should be added to the base immediately /// before Inst, because it will be folded into the imm field of the /// instruction. This is also sometimes used for loop-variant values that @@ -363,7 +356,6 @@ namespace { BasedUser(IVStrideUse &IVSU, ScalarEvolution *se) : SE(se), Base(IVSU.getOffset()), Inst(IVSU.getUser()), OperandValToReplace(IVSU.getOperandValToReplace()), - isSigned(IVSU.isSigned()), Imm(SE->getIntegerSCEV(0, Base->getType())), isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {} @@ -428,11 +420,6 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEVHandle &NewBase, NewValSCEV = SE->getAddExpr(NewValSCEV, Imm); } - if (isSigned) - NewValSCEV = SE->getTruncateOrSignExtend(NewValSCEV, Ty); - else - NewValSCEV = SE->getTruncateOrZeroExtend(NewValSCEV, Ty); - return Rewriter.expandCodeFor(NewValSCEV, Ty, IP); } @@ -592,7 +579,7 @@ static void MoveLoopVariantsToImmediateField(SCEVHandle &Val, SCEVHandle &Imm, if (Val->isLoopInvariant(L)) return; // Nothing to do. if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) { - std::vector<SCEVHandle> NewOps; + SmallVector<SCEVHandle, 4> NewOps; NewOps.reserve(SAE->getNumOperands()); for (unsigned i = 0; i != SAE->getNumOperands(); ++i) @@ -613,7 +600,7 @@ static void MoveLoopVariantsToImmediateField(SCEVHandle &Val, SCEVHandle &Imm, SCEVHandle Start = SARE->getStart(); MoveLoopVariantsToImmediateField(Start, Imm, L, SE); - std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end()); + SmallVector<SCEVHandle, 4> Ops(SARE->op_begin(), SARE->op_end()); Ops[0] = Start; Val = SE->getAddRecExpr(Ops, SARE->getLoop()); } else { @@ -633,7 +620,7 @@ static void MoveImmediateValues(const TargetLowering *TLI, bool isAddress, Loop *L, ScalarEvolution *SE) { if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) { - std::vector<SCEVHandle> NewOps; + SmallVector<SCEVHandle, 4> NewOps; NewOps.reserve(SAE->getNumOperands()); for (unsigned i = 0; i != SAE->getNumOperands(); ++i) { @@ -660,7 +647,7 @@ static void MoveImmediateValues(const TargetLowering *TLI, MoveImmediateValues(TLI, AccessTy, Start, Imm, isAddress, L, SE); if (Start != SARE->getStart()) { - std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end()); + SmallVector<SCEVHandle, 4> Ops(SARE->op_begin(), SARE->op_end()); Ops[0] = Start; Val = SE->getAddRecExpr(Ops, SARE->getLoop()); } @@ -717,7 +704,7 @@ static void MoveImmediateValues(const TargetLowering *TLI, /// SeparateSubExprs - Decompose Expr into all of the subexpressions that are /// added together. This is used to reassociate common addition subexprs /// together for maximal sharing when rewriting bases. -static void SeparateSubExprs(std::vector<SCEVHandle> &SubExprs, +static void SeparateSubExprs(SmallVector<SCEVHandle, 16> &SubExprs, SCEVHandle Expr, ScalarEvolution *SE) { if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(Expr)) { @@ -729,7 +716,7 @@ static void SeparateSubExprs(std::vector<SCEVHandle> &SubExprs, SubExprs.push_back(Expr); } else { // Compute the addrec with zero as its base. - std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end()); + SmallVector<SCEVHandle, 4> Ops(SARE->op_begin(), SARE->op_end()); Ops[0] = Zero; // Start with zero base. SubExprs.push_back(SE->getAddRecExpr(Ops, SARE->getLoop())); @@ -783,9 +770,9 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses, // UniqueSubExprs - Keep track of all of the subexpressions we see in the // order we see them. - std::vector<SCEVHandle> UniqueSubExprs; + SmallVector<SCEVHandle, 16> UniqueSubExprs; - std::vector<SCEVHandle> SubExprs; + SmallVector<SCEVHandle, 16> SubExprs; unsigned NumUsesInsideLoop = 0; for (unsigned i = 0; i != NumUses; ++i) { // If the user is outside the loop, just ignore it for base computation. @@ -1129,11 +1116,11 @@ static bool isNonConstantNegative(const SCEVHandle &Expr) { return SC->getValue()->getValue().isNegative(); } -// CollectIVUsers - Transform our list of users and offsets to a bit more -// complex table. In this new vector, each 'BasedUser' contains 'Base', the base -// of the strided accesses, as well as the old information from Uses. We -// progressively move information from the Base field to the Imm field, until -// we eventually have the full access expression to rewrite the use. +/// CollectIVUsers - Transform our list of users and offsets to a bit more +/// complex table. In this new vector, each 'BasedUser' contains 'Base', the base +/// of the strided accesses, as well as the old information from Uses. We +/// progressively move information from the Base field to the Imm field, until +/// we eventually have the full access expression to rewrite the use. SCEVHandle LoopStrengthReduce::CollectIVUsers(const SCEVHandle &Stride, IVUsersOfOneStride &Uses, Loop *L, @@ -2008,15 +1995,15 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, if (!isa<PointerType>(NewCmpTy)) NewCmpRHS = ConstantInt::get(NewCmpTy, NewCmpVal); else { - ConstantInt *CI = ConstantInt::get(NewCmpIntTy, NewCmpVal); + Constant *CI = ConstantInt::get(NewCmpIntTy, NewCmpVal); NewCmpRHS = ConstantExpr::getIntToPtr(CI, NewCmpTy); } NewOffset = TyBits == NewTyBits ? SE->getMulExpr(CondUse->getOffset(), - SE->getConstant(ConstantInt::get(CmpTy, Scale))) - : SE->getConstant(ConstantInt::get(NewCmpIntTy, + SE->getConstant(CmpTy, Scale)) + : SE->getConstant(NewCmpIntTy, cast<SCEVConstant>(CondUse->getOffset())->getValue() - ->getSExtValue()*Scale)); + ->getSExtValue()*Scale); break; } } @@ -2047,7 +2034,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, OldCond->replaceAllUsesWith(Cond); OldCond->eraseFromParent(); - IU->IVUsesByStride[*NewStride]->addUser(NewOffset, Cond, NewCmpLHS, false); + IU->IVUsesByStride[*NewStride]->addUser(NewOffset, Cond, NewCmpLHS); CondUse = &IU->IVUsesByStride[*NewStride]->Users.back(); CondStride = NewStride; ++NumEliminated; @@ -2057,8 +2044,8 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, return Cond; } -/// OptimizeSMax - Rewrite the loop's terminating condition if it uses -/// an smax computation. +/// OptimizeMax - Rewrite the loop's terminating condition if it uses +/// a max computation. /// /// This is a narrow solution to a specific, but acute, problem. For loops /// like this: @@ -2068,10 +2055,10 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, /// p[i] = 0.0; /// } while (++i < n); /// -/// where the comparison is signed, the trip count isn't just 'n', because -/// 'n' could be negative. And unfortunately this can come up even for loops -/// where the user didn't use a C do-while loop. For example, seemingly -/// well-behaved top-test loops will commonly be lowered like this: +/// the trip count isn't just 'n', because 'n' might not be positive. And +/// unfortunately this can come up even for loops where the user didn't use +/// a C do-while loop. For example, seemingly well-behaved top-test loops +/// will commonly be lowered like this: // /// if (n > 0) { /// i = 0; @@ -2084,14 +2071,14 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, /// test in such a way that indvars can't find it. /// /// When indvars can't find the if test in loops like this, it creates a -/// signed-max expression, which allows it to give the loop a canonical +/// max expression, which allows it to give the loop a canonical /// induction variable: /// /// i = 0; -/// smax = n < 1 ? 1 : n; +/// max = n < 1 ? 1 : n; /// do { /// p[i] = 0.0; -/// } while (++i != smax); +/// } while (++i != max); /// /// Canonical induction variables are necessary because the loop passes /// are designed around them. The most obvious example of this is the @@ -2107,8 +2094,8 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, /// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting /// the instructions for the maximum computation. /// -ICmpInst *LoopStrengthReduce::OptimizeSMax(Loop *L, ICmpInst *Cond, - IVStrideUse* &CondUse) { +ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond, + IVStrideUse* &CondUse) { // Check that the loop matches the pattern we're looking for. if (Cond->getPredicate() != CmpInst::ICMP_EQ && Cond->getPredicate() != CmpInst::ICMP_NE) @@ -2126,12 +2113,19 @@ ICmpInst *LoopStrengthReduce::OptimizeSMax(Loop *L, ICmpInst *Cond, SCEVHandle IterationCount = SE->getAddExpr(BackedgeTakenCount, One); // Check for a max calculation that matches the pattern. - const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(IterationCount); - if (!SMax || SMax != SE->getSCEV(Sel)) return Cond; + if (!isa<SCEVSMaxExpr>(IterationCount) && !isa<SCEVUMaxExpr>(IterationCount)) + return Cond; + const SCEVNAryExpr *Max = cast<SCEVNAryExpr>(IterationCount); + if (Max != SE->getSCEV(Sel)) return Cond; + + // To handle a max with more than two operands, this optimization would + // require additional checking and setup. + if (Max->getNumOperands() != 2) + return Cond; - SCEVHandle SMaxLHS = SMax->getOperand(0); - SCEVHandle SMaxRHS = SMax->getOperand(1); - if (!SMaxLHS || SMaxLHS != One) return Cond; + SCEVHandle MaxLHS = Max->getOperand(0); + SCEVHandle MaxRHS = Max->getOperand(1); + if (!MaxLHS || MaxLHS != One) return Cond; // Check the relevant induction variable for conformance to // the pattern. @@ -2148,19 +2142,23 @@ ICmpInst *LoopStrengthReduce::OptimizeSMax(Loop *L, ICmpInst *Cond, // Check the right operand of the select, and remember it, as it will // be used in the new comparison instruction. Value *NewRHS = 0; - if (SE->getSCEV(Sel->getOperand(1)) == SMaxRHS) + if (SE->getSCEV(Sel->getOperand(1)) == MaxRHS) NewRHS = Sel->getOperand(1); - else if (SE->getSCEV(Sel->getOperand(2)) == SMaxRHS) + else if (SE->getSCEV(Sel->getOperand(2)) == MaxRHS) NewRHS = Sel->getOperand(2); if (!NewRHS) return Cond; + // Determine the new comparison opcode. It may be signed or unsigned, + // and the original comparison may be either equality or inequality. + CmpInst::Predicate Pred = + isa<SCEVSMaxExpr>(Max) ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT; + if (Cond->getPredicate() == CmpInst::ICMP_EQ) + Pred = CmpInst::getInversePredicate(Pred); + // Ok, everything looks ok to change the condition into an SLT or SGE and // delete the max calculation. ICmpInst *NewCond = - new ICmpInst(Cond->getPredicate() == CmpInst::ICMP_NE ? - CmpInst::ICMP_SLT : - CmpInst::ICMP_SGE, - Cond->getOperand(0), NewRHS, "scmp", Cond); + new ICmpInst(Pred, Cond->getOperand(0), NewRHS, "scmp", Cond); // Delete the max calculation instructions. Cond->replaceAllUsesWith(NewCond); @@ -2242,7 +2240,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry)); if (!Init) continue; - ConstantFP *NewInit = ConstantFP::get(DestTy, Init->getZExtValue()); + Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue()); BinaryOperator *Incr = dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch)); @@ -2266,7 +2264,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH); /* create new increment. '++d' in above example. */ - ConstantFP *CFP = ConstantFP::get(DestTy, C->getZExtValue()); + Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue()); BinaryOperator *NewIncr = BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ? Instruction::FAdd : Instruction::FSub, @@ -2284,9 +2282,9 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { } } -// OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar -// uses in the loop, look to see if we can eliminate some, in favor of using -// common indvars for the different uses. +/// OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar +/// uses in the loop, look to see if we can eliminate some, in favor of using +/// common indvars for the different uses. void LoopStrengthReduce::OptimizeIndvars(Loop *L) { // TODO: implement optzns here. @@ -2301,11 +2299,11 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { // induction variable, to allow coalescing the live ranges for the IV into // one register value. BasicBlock *LatchBlock = L->getLoopLatch(); - BasicBlock *ExitBlock = L->getExitingBlock(); - if (!ExitBlock) + BasicBlock *ExitingBlock = L->getExitingBlock(); + if (!ExitingBlock) // Multiple exits, just look at the exit in the latch block if there is one. - ExitBlock = LatchBlock; - BranchInst *TermBr = dyn_cast<BranchInst>(ExitBlock->getTerminator()); + ExitingBlock = LatchBlock; + BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); if (!TermBr) return; if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition())) @@ -2318,7 +2316,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { if (!FindIVUserForCond(Cond, CondUse, CondStride)) return; // setcc doesn't use the IV. - if (ExitBlock != LatchBlock) { + if (ExitingBlock != LatchBlock) { if (!Cond->hasOneUse()) // See below, we don't want the condition to be cloned. return; @@ -2373,14 +2371,14 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { StrideNoReuse.insert(*CondStride); } - // If the trip count is computed in terms of an smax (due to ScalarEvolution + // If the trip count is computed in terms of a max (due to ScalarEvolution // being unable to find a sufficient guard, for example), change the loop - // comparison to use SLT instead of NE. - Cond = OptimizeSMax(L, Cond, CondUse); + // comparison to use SLT or ULT instead of NE. + Cond = OptimizeMax(L, Cond, CondUse); // If possible, change stride and operands of the compare instruction to // eliminate one stride. - if (ExitBlock == LatchBlock) + if (ExitingBlock == LatchBlock) Cond = ChangeCompareStride(L, Cond, CondUse, CondStride); // It's possible for the setcc instruction to be anywhere in the loop, and @@ -2397,8 +2395,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { // Clone the IVUse, as the old use still exists! IU->IVUsesByStride[*CondStride]->addUser(CondUse->getOffset(), Cond, - CondUse->getOperandValToReplace(), - false); + CondUse->getOperandValToReplace()); CondUse = &IU->IVUsesByStride[*CondStride]->Users.back(); } } @@ -2413,9 +2410,9 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { ++NumLoopCond; } -// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding -// when to exit the loop is used only for that purpose, try to rearrange things -// so it counts down to a test against zero. +/// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding +/// when to exit the loop is used only for that purpose, try to rearrange things +/// so it counts down to a test against zero. void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) { // If the number of times the loop is executed isn't computable, give up. @@ -2506,7 +2503,7 @@ void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) { Value *startVal = phi->getIncomingValue(inBlock); Value *endVal = Cond->getOperand(1); // FIXME check for case where both are constant - ConstantInt* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0); + Constant* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0); BinaryOperator *NewStartVal = BinaryOperator::Create(Instruction::Sub, endVal, startVal, "tmp", PreInsertPt); diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 7143c7b..d89790c 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -820,10 +820,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0> } else { // If EltTy is a vector type, get the element type. - const Type *ValTy = EltTy; - if (const VectorType *VTy = dyn_cast<VectorType>(ValTy)) - ValTy = VTy->getElementType(); - + const Type *ValTy = EltTy->getScalarType(); + // Construct an integer with the right value. unsigned EltSize = TD->getTypeSizeInBits(ValTy); APInt OneVal(EltSize, CI->getZExtValue()); diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 59989c9..bbcb792 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -135,7 +135,11 @@ Value *LibCallOptimization::EmitStrLen(Value *Ptr, IRBuilder<> &B) { TD->getIntPtrType(), PointerType::getUnqual(Type::Int8Ty), NULL); - return B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen"); + CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen"); + if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; } /// EmitMemCpy - Emit a call to the memcpy function to the builder. This always @@ -164,7 +168,12 @@ Value *LibCallOptimization::EmitMemChr(Value *Ptr, Value *Val, PointerType::getUnqual(Type::Int8Ty), Type::Int32Ty, TD->getIntPtrType(), NULL); - return B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr"); + CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr"); + + if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; } /// EmitMemCmp - Emit a call to the memcmp function. @@ -182,8 +191,13 @@ Value *LibCallOptimization::EmitMemCmp(Value *Ptr1, Value *Ptr2, PointerType::getUnqual(Type::Int8Ty), PointerType::getUnqual(Type::Int8Ty), TD->getIntPtrType(), NULL); - return B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B), - Len, "memcmp"); + CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B), + Len, "memcmp"); + + if (const Function *F = dyn_cast<Function>(MemCmp->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; } /// EmitMemSet - Emit a call to the memset function @@ -217,20 +231,30 @@ Value *LibCallOptimization::EmitUnaryFloatFnCall(Value *Op, const char *Name, NameBuffer[NameLen+1] = 0; Name = NameBuffer; } - + Module *M = Caller->getParent(); - Value *Callee = M->getOrInsertFunction(Name, Op->getType(), + Value *Callee = M->getOrInsertFunction(Name, Op->getType(), Op->getType(), NULL); - return B.CreateCall(Callee, Op, Name); + CallInst *CI = B.CreateCall(Callee, Op, Name); + + if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; } /// EmitPutChar - Emit a call to the putchar function. This assumes that Char /// is an integer. void LibCallOptimization::EmitPutChar(Value *Char, IRBuilder<> &B) { Module *M = Caller->getParent(); - Value *F = M->getOrInsertFunction("putchar", Type::Int32Ty, - Type::Int32Ty, NULL); - B.CreateCall(F, B.CreateIntCast(Char, Type::Int32Ty, "chari"), "putchar"); + Value *PutChar = M->getOrInsertFunction("putchar", Type::Int32Ty, + Type::Int32Ty, NULL); + CallInst *CI = B.CreateCall(PutChar, + B.CreateIntCast(Char, Type::Int32Ty, "chari"), + "putchar"); + + if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); } /// EmitPutS - Emit a call to the puts function. This assumes that Str is @@ -241,10 +265,14 @@ void LibCallOptimization::EmitPutS(Value *Str, IRBuilder<> &B) { AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); - Value *F = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2), - Type::Int32Ty, - PointerType::getUnqual(Type::Int8Ty), NULL); - B.CreateCall(F, CastToCStr(Str, B), "puts"); + Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2), + Type::Int32Ty, + PointerType::getUnqual(Type::Int8Ty), + NULL); + CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts"); + if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + } /// EmitFPutC - Emit a call to the fputc function. This assumes that Char is @@ -258,12 +286,14 @@ void LibCallOptimization::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B) { if (isa<PointerType>(File->getType())) F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2), Type::Int32Ty, Type::Int32Ty, File->getType(), NULL); - else F = M->getOrInsertFunction("fputc", Type::Int32Ty, Type::Int32Ty, File->getType(), NULL); Char = B.CreateIntCast(Char, Type::Int32Ty, "chari"); - B.CreateCall2(F, Char, File, "fputc"); + CallInst *CI = B.CreateCall2(F, Char, File, "fputc"); + + if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) + CI->setCallingConv(Fn->getCallingConv()); } /// EmitFPutS - Emit a call to the puts function. Str is required to be a @@ -283,7 +313,10 @@ void LibCallOptimization::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B) { F = M->getOrInsertFunction("fputs", Type::Int32Ty, PointerType::getUnqual(Type::Int8Ty), File->getType(), NULL); - B.CreateCall2(F, CastToCStr(Str, B), File, "fputs"); + CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs"); + + if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) + CI->setCallingConv(Fn->getCallingConv()); } /// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is @@ -307,8 +340,11 @@ void LibCallOptimization::EmitFWrite(Value *Ptr, Value *Size, Value *File, PointerType::getUnqual(Type::Int8Ty), TD->getIntPtrType(), TD->getIntPtrType(), File->getType(), NULL); - B.CreateCall4(F, CastToCStr(Ptr, B), Size, - ConstantInt::get(TD->getIntPtrType(), 1), File); + CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size, + ConstantInt::get(TD->getIntPtrType(), 1), File); + + if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) + CI->setCallingConv(Fn->getCallingConv()); } //===----------------------------------------------------------------------===// @@ -673,12 +709,10 @@ struct VISIBILITY_HIDDEN StrCmpOpt : public LibCallOptimization { // strcmp(P, "x") -> memcmp(P, "x", 2) uint64_t Len1 = GetStringLength(Str1P); uint64_t Len2 = GetStringLength(Str2P); - if (Len1 || Len2) { - // Choose the smallest Len excluding 0 which means 'unknown'. - if (!Len1 || (Len2 && Len2 < Len1)) - Len1 = Len2; + if (Len1 && Len2) { return EmitMemCmp(Str1P, Str2P, - ConstantInt::get(TD->getIntPtrType(), Len1), B); + ConstantInt::get(TD->getIntPtrType(), + std::min(Len1, Len2)), B); } return 0; @@ -1039,7 +1073,7 @@ struct VISIBILITY_HIDDEN Exp2Opt : public LibCallOptimization { if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32) LdExpArg = B.CreateZExt(OpC->getOperand(0), Type::Int32Ty, "tmp"); } - + if (LdExpArg) { const char *Name; if (Op->getType() == Type::FloatTy) @@ -1056,12 +1090,15 @@ struct VISIBILITY_HIDDEN Exp2Opt : public LibCallOptimization { Module *M = Caller->getParent(); Value *Callee = M->getOrInsertFunction(Name, Op->getType(), Op->getType(), Type::Int32Ty,NULL); - return B.CreateCall2(Callee, One, LdExpArg); + CallInst *CI = B.CreateCall2(Callee, One, LdExpArg); + if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; } return 0; } }; - //===---------------------------------------===// // Double -> Float Shrinking Optimizations for Unary Functions like 'floor' @@ -1072,7 +1109,7 @@ struct VISIBILITY_HIDDEN UnaryDoubleFPOpt : public LibCallOptimization { if (FT->getNumParams() != 1 || FT->getReturnType() != Type::DoubleTy || FT->getParamType(0) != Type::DoubleTy) return 0; - + // If this is something like 'floor((double)floatval)', convert to floorf. FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getOperand(1)); if (Cast == 0 || Cast->getOperand(0)->getType() != Type::FloatTy) diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index 682d069..34ee57c 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -52,6 +52,7 @@ #define DEBUG_TYPE "tailcallelim" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" @@ -201,8 +202,21 @@ bool TailCallElim::runOnFunction(Function &F) { bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) { // FIXME: We can move load/store/call/free instructions above the call if the // call does not mod/ref the memory location being processed. - if (I->mayHaveSideEffects() || isa<LoadInst>(I)) + if (I->mayHaveSideEffects()) // This also handles volatile loads. return false; + + if (LoadInst* L = dyn_cast<LoadInst>(I)) { + // Loads may always be moved above calls without side effects. + if (CI->mayHaveSideEffects()) { + // Non-volatile loads may be moved above a call with side effects if it + // does not write to memory and the load provably won't trap. + // FIXME: Writes to memory only matter if they may alias the pointer + // being loaded from. + if (CI->mayWriteToMemory() || + !isSafeToLoadUnconditionally(L->getPointerOperand(), L)) + return false; + } + } // Otherwise, if this is a side-effect free instruction, check to make sure // that it does not use the return value of the call. If it doesn't use the diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 94483b8..c7fff54 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -14,6 +14,7 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/Constants.h" +#include "llvm/GlobalAlias.h" #include "llvm/GlobalVariable.h" #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" @@ -28,6 +29,50 @@ using namespace llvm; //===----------------------------------------------------------------------===// +// Local analysis. +// + +/// isSafeToLoadUnconditionally - Return true if we know that executing a load +/// from this value cannot trap. If it is not obviously safe to load from the +/// specified pointer, we do a quick local scan of the basic block containing +/// ScanFrom, to determine if the address is already accessed. +bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom) { + // If it is an alloca it is always safe to load from. + if (isa<AllocaInst>(V)) return true; + + // If it is a global variable it is mostly safe to load from. + if (const GlobalValue *GV = dyn_cast<GlobalVariable>(V)) + // Don't try to evaluate aliases. External weak GV can be null. + return !isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage(); + + // Otherwise, be a little bit agressive by scanning the local block where we + // want to check to see if the pointer is already being loaded or stored + // from/to. If so, the previous load or store would have already trapped, + // so there is no harm doing an extra load (also, CSE will later eliminate + // the load entirely). + BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin(); + + while (BBI != E) { + --BBI; + + // If we see a free or a call which may write to memory (i.e. which might do + // a free) the pointer could be marked invalid. + if (isa<FreeInst>(BBI) || + (isa<CallInst>(BBI) && BBI->mayWriteToMemory() && + !isa<DbgInfoIntrinsic>(BBI))) + return false; + + if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { + if (LI->getOperand(0) == V) return true; + } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) { + if (SI->getOperand(1) == V) return true; + } + } + return false; +} + + +//===----------------------------------------------------------------------===// // Local constant propagation. // diff --git a/lib/Transforms/Utils/LowerAllocations.cpp b/lib/Transforms/Utils/LowerAllocations.cpp index 3249895..9af47f5 100644 --- a/lib/Transforms/Utils/LowerAllocations.cpp +++ b/lib/Transforms/Utils/LowerAllocations.cpp @@ -112,7 +112,7 @@ bool LowerAllocations::runOnBasicBlock(BasicBlock &BB) { if (MallocInst *MI = dyn_cast<MallocInst>(I)) { const Type *AllocTy = MI->getType()->getElementType(); - // malloc(type) becomes sbyte *malloc(size) + // malloc(type) becomes i8 *malloc(size) Value *MallocArg; if (LowerMallocArgToInteger) MallocArg = ConstantInt::get(Type::Int64Ty, diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index bcc6b81..ee0f6a6 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -859,6 +859,26 @@ static bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI) { return Changed; } +// isSafeToHoistInvoke - If we would need to insert a select that uses the +// value of this invoke (comments in HoistThenElseCodeToIf explain why we +// would need to do this), we can't hoist the invoke, as there is nowhere +// to put the select in this case. +static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, + Instruction *I1, Instruction *I2) { + for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) { + PHINode *PN; + for (BasicBlock::iterator BBI = SI->begin(); + (PN = dyn_cast<PHINode>(BBI)); ++BBI) { + Value *BB1V = PN->getIncomingValueForBlock(BB1); + Value *BB2V = PN->getIncomingValueForBlock(BB2); + if (BB1V != BB2V && (BB1V==I1 || BB2V==I2)) { + return false; + } + } + } + return true; +} + /// HoistThenElseCodeToIf - Given a conditional branch that goes to BB1 and /// BB2, hoist any common code in the two blocks up into the branch block. The /// caller of this function guarantees that BI's block dominates BB1 and BB2. @@ -879,8 +899,9 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) { I1 = BB1_Itr++; while (isa<DbgInfoIntrinsic>(I2)) I2 = BB2_Itr++; - if (I1->getOpcode() != I2->getOpcode() || isa<PHINode>(I1) || - isa<InvokeInst>(I1) || !I1->isIdenticalTo(I2)) + if (I1->getOpcode() != I2->getOpcode() || isa<PHINode>(I1) || + !I1->isIdenticalTo(I2) || + (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))) return false; // If we get here, we can hoist at least one instruction. @@ -911,6 +932,10 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) { return true; HoistTerminator: + // It may not be possible to hoist an invoke. + if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)) + return true; + // Okay, it is safe to hoist the terminator. Instruction *NT = I1->clone(); BIParent->getInstList().insert(BI, NT); diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp index 6b369b6..73b1ed6 100644 --- a/lib/VMCore/AsmWriter.cpp +++ b/lib/VMCore/AsmWriter.cpp @@ -1384,7 +1384,10 @@ void AssemblyWriter::printFunction(const Function *F) { case CallingConv::Fast: Out << "fastcc "; break; case CallingConv::Cold: Out << "coldcc "; break; case CallingConv::X86_StdCall: Out << "x86_stdcallcc "; break; - case CallingConv::X86_FastCall: Out << "x86_fastcallcc "; break; + case CallingConv::X86_FastCall: Out << "x86_fastcallcc "; break; + case CallingConv::ARM_APCS: Out << "arm_apcscc "; break; + case CallingConv::ARM_AAPCS: Out << "arm_aapcscc "; break; + case CallingConv::ARM_AAPCS_VFP:Out << "arm_aapcs_vfpcc "; break; default: Out << "cc" << F->getCallingConv() << " "; break; } @@ -1640,7 +1643,10 @@ void AssemblyWriter::printInstruction(const Instruction &I) { case CallingConv::Fast: Out << " fastcc"; break; case CallingConv::Cold: Out << " coldcc"; break; case CallingConv::X86_StdCall: Out << " x86_stdcallcc"; break; - case CallingConv::X86_FastCall: Out << " x86_fastcallcc"; break; + case CallingConv::X86_FastCall: Out << " x86_fastcallcc"; break; + case CallingConv::ARM_APCS: Out << " arm_apcscc "; break; + case CallingConv::ARM_AAPCS: Out << " arm_aapcscc "; break; + case CallingConv::ARM_AAPCS_VFP:Out << " arm_aapcs_vfpcc "; break; default: Out << " cc" << CI->getCallingConv(); break; } @@ -1688,6 +1694,9 @@ void AssemblyWriter::printInstruction(const Instruction &I) { case CallingConv::Cold: Out << " coldcc"; break; case CallingConv::X86_StdCall: Out << " x86_stdcallcc"; break; case CallingConv::X86_FastCall: Out << " x86_fastcallcc"; break; + case CallingConv::ARM_APCS: Out << " arm_apcscc "; break; + case CallingConv::ARM_AAPCS: Out << " arm_aapcscc "; break; + case CallingConv::ARM_AAPCS_VFP:Out << " arm_aapcs_vfpcc "; break; default: Out << " cc" << II->getCallingConv(); break; } diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp index 1d293cc..3aab0cc 100644 --- a/lib/VMCore/ConstantFold.cpp +++ b/lib/VMCore/ConstantFold.cpp @@ -208,6 +208,22 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, const Constant *V, } } + // If the cast operand is a constant vector, perform the cast by + // operating on each element. In the cast of bitcasts, the element + // count may be mismatched; don't attempt to handle that here. + if (const ConstantVector *CV = dyn_cast<ConstantVector>(V)) + if (isa<VectorType>(DestTy) && + cast<VectorType>(DestTy)->getNumElements() == + CV->getType()->getNumElements()) { + std::vector<Constant*> res; + const VectorType *DestVecTy = cast<VectorType>(DestTy); + const Type *DstEltTy = DestVecTy->getElementType(); + for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i) + res.push_back(ConstantExpr::getCast(opc, + CV->getOperand(i), DstEltTy)); + return ConstantVector::get(DestVecTy, res); + } + // We actually have to do a cast now. Perform the cast according to the // opcode specified. switch (opc) { @@ -237,14 +253,6 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, const Constant *V, APInt Val(DestBitWidth, 2, x); return ConstantInt::get(Val); } - if (const ConstantVector *CV = dyn_cast<ConstantVector>(V)) { - std::vector<Constant*> res; - const VectorType *DestVecTy = cast<VectorType>(DestTy); - const Type *DstEltTy = DestVecTy->getElementType(); - for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i) - res.push_back(ConstantExpr::getCast(opc, CV->getOperand(i), DstEltTy)); - return ConstantVector::get(DestVecTy, res); - } return 0; // Can't fold. case Instruction::IntToPtr: //always treated as unsigned if (V->isNullValue()) // Is it an integral null value? @@ -266,14 +274,6 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, const Constant *V, APFloat::rmNearestTiesToEven); return ConstantFP::get(apf); } - if (const ConstantVector *CV = dyn_cast<ConstantVector>(V)) { - std::vector<Constant*> res; - const VectorType *DestVecTy = cast<VectorType>(DestTy); - const Type *DstEltTy = DestVecTy->getElementType(); - for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i) - res.push_back(ConstantExpr::getCast(opc, CV->getOperand(i), DstEltTy)); - return ConstantVector::get(DestVecTy, res); - } return 0; case Instruction::ZExt: if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { @@ -629,7 +629,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, } } - // Handle simplifications of the RHS when a constant int. + // Handle simplifications when the RHS is a constant int. if (const ConstantInt *CI2 = dyn_cast<ConstantInt>(C2)) { switch (Opcode) { case Instruction::Add: @@ -773,6 +773,20 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, } } } + + switch (Opcode) { + case Instruction::SDiv: + case Instruction::UDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::Shl: + if (CI1->equalsInt(0)) return const_cast<Constant*>(C1); + break; + default: + break; + } } else if (const ConstantFP *CFP1 = dyn_cast<ConstantFP>(C1)) { if (const ConstantFP *CFP2 = dyn_cast<ConstantFP>(C2)) { APFloat C1V = CFP1->getValueAPF(); diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp index 69c503d..c164a3b 100644 --- a/lib/VMCore/Constants.cpp +++ b/lib/VMCore/Constants.cpp @@ -25,6 +25,9 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MathExtras.h" +#include "llvm/System/Mutex.h" +#include "llvm/System/RWMutex.h" +#include "llvm/System/Threading.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include <algorithm> @@ -35,6 +38,9 @@ using namespace llvm; // Constant Class //===----------------------------------------------------------------------===// +// Becomes a no-op when multithreading is disabled. +ManagedStatic<sys::SmartRWMutex<true> > ConstantsLock; + void Constant::destroyConstantImpl() { // When a Constant is destroyed, there may be lingering // references to the constant by other constants in the constant pool. These @@ -269,9 +275,20 @@ typedef DenseMap<DenseMapAPIntKeyInfo::KeyTy, ConstantInt*, DenseMapAPIntKeyInfo> IntMapTy; static ManagedStatic<IntMapTy> IntConstants; -ConstantInt *ConstantInt::get(const Type *Ty, uint64_t V, bool isSigned) { - const IntegerType *ITy = cast<IntegerType>(Ty); - return get(APInt(ITy->getBitWidth(), V, isSigned)); +ConstantInt *ConstantInt::get(const IntegerType *Ty, + uint64_t V, bool isSigned) { + return get(APInt(Ty->getBitWidth(), V, isSigned)); +} + +Constant *ConstantInt::get(const Type *Ty, uint64_t V, bool isSigned) { + Constant *C = get(cast<IntegerType>(Ty->getScalarType()), V, isSigned); + + // For vectors, broadcast the value. + if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) + return + ConstantVector::get(std::vector<Constant *>(VTy->getNumElements(), C)); + + return C; } // Get a ConstantInt from an APInt. Note that the value stored in the DenseMap @@ -284,12 +301,35 @@ ConstantInt *ConstantInt::get(const APInt& V) { const IntegerType *ITy = IntegerType::get(V.getBitWidth()); // get an existing value or the insertion position DenseMapAPIntKeyInfo::KeyTy Key(V, ITy); + + ConstantsLock->reader_acquire(); ConstantInt *&Slot = (*IntConstants)[Key]; - // if it exists, return it. - if (Slot) + ConstantsLock->reader_release(); + + if (!Slot) { + sys::SmartScopedWriter<true> Writer(&*ConstantsLock); + ConstantInt *&NewSlot = (*IntConstants)[Key]; + if (!Slot) { + NewSlot = new ConstantInt(ITy, V); + } + + return NewSlot; + } else { return Slot; - // otherwise create a new one, insert it, and return it. - return Slot = new ConstantInt(ITy, V); + } +} + +Constant *ConstantInt::get(const Type *Ty, const APInt &V) { + ConstantInt *C = ConstantInt::get(V); + assert(C->getType() == Ty->getScalarType() && + "ConstantInt type doesn't match the type implied by its value!"); + + // For vectors, broadcast the value. + if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) + return + ConstantVector::get(std::vector<Constant *>(VTy->getNumElements(), C)); + + return C; } //===----------------------------------------------------------------------===// @@ -368,34 +408,54 @@ static ManagedStatic<FPMapTy> FPConstants; ConstantFP *ConstantFP::get(const APFloat &V) { DenseMapAPFloatKeyInfo::KeyTy Key(V); - ConstantFP *&Slot = (*FPConstants)[Key]; - if (Slot) return Slot; - const Type *Ty; - if (&V.getSemantics() == &APFloat::IEEEsingle) - Ty = Type::FloatTy; - else if (&V.getSemantics() == &APFloat::IEEEdouble) - Ty = Type::DoubleTy; - else if (&V.getSemantics() == &APFloat::x87DoubleExtended) - Ty = Type::X86_FP80Ty; - else if (&V.getSemantics() == &APFloat::IEEEquad) - Ty = Type::FP128Ty; - else { - assert(&V.getSemantics() == &APFloat::PPCDoubleDouble&&"Unknown FP format"); - Ty = Type::PPC_FP128Ty; + ConstantsLock->reader_acquire(); + ConstantFP *&Slot = (*FPConstants)[Key]; + ConstantsLock->reader_release(); + + if (!Slot) { + sys::SmartScopedWriter<true> Writer(&*ConstantsLock); + ConstantFP *&NewSlot = (*FPConstants)[Key]; + if (!NewSlot) { + const Type *Ty; + if (&V.getSemantics() == &APFloat::IEEEsingle) + Ty = Type::FloatTy; + else if (&V.getSemantics() == &APFloat::IEEEdouble) + Ty = Type::DoubleTy; + else if (&V.getSemantics() == &APFloat::x87DoubleExtended) + Ty = Type::X86_FP80Ty; + else if (&V.getSemantics() == &APFloat::IEEEquad) + Ty = Type::FP128Ty; + else { + assert(&V.getSemantics() == &APFloat::PPCDoubleDouble && + "Unknown FP format"); + Ty = Type::PPC_FP128Ty; + } + NewSlot = new ConstantFP(Ty, V); + } + + return NewSlot; } - return Slot = new ConstantFP(Ty, V); + return Slot; } /// get() - This returns a constant fp for the specified value in the /// specified type. This should only be used for simple constant values like /// 2.0/1.0 etc, that are known-valid both as double and as the target format. -ConstantFP *ConstantFP::get(const Type *Ty, double V) { +Constant *ConstantFP::get(const Type *Ty, double V) { APFloat FV(V); bool ignored; - FV.convert(*TypeToFloatSemantics(Ty), APFloat::rmNearestTiesToEven, &ignored); - return get(FV); + FV.convert(*TypeToFloatSemantics(Ty->getScalarType()), + APFloat::rmNearestTiesToEven, &ignored); + Constant *C = get(FV); + + // For vectors, broadcast the value. + if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) + return + ConstantVector::get(std::vector<Constant *>(VTy->getNumElements(), C)); + + return C; } //===----------------------------------------------------------------------===// @@ -1093,8 +1153,13 @@ namespace llvm { /// AbstractTypeMap - Map for abstract type constants. /// AbstractTypeMapTy AbstractTypeMap; + + /// ValueMapLock - Mutex for this map. + sys::SmartMutex<true> ValueMapLock; public: + // NOTE: This function is not locked. It is the caller's responsibility + // to enforce proper synchronization. typename MapTy::iterator map_end() { return Map.end(); } /// InsertOrGetItem - Return an iterator for the specified element. @@ -1102,6 +1167,8 @@ namespace llvm { /// entry and Exists=true. If not, the iterator points to the newly /// inserted entry and returns Exists=false. Newly inserted entries have /// I->second == 0, and should be filled in. + /// NOTE: This function is not locked. It is the caller's responsibility + // to enforce proper synchronization. typename MapTy::iterator InsertOrGetItem(std::pair<MapKey, Constant *> &InsertVal, bool &Exists) { @@ -1131,19 +1198,10 @@ private: } return I; } -public: - /// getOrCreate - Return the specified constant from the map, creating it if - /// necessary. - ConstantClass *getOrCreate(const TypeClass *Ty, const ValType &V) { - MapKey Lookup(Ty, V); - typename MapTy::iterator I = Map.find(Lookup); - // Is it in the map? - if (I != Map.end()) - return static_cast<ConstantClass *>(I->second); - - // If no preexisting value, create one now... - ConstantClass *Result = + ConstantClass* Create(const TypeClass *Ty, const ValType &V, + typename MapTy::iterator I) { + ConstantClass* Result = ConstantCreator<ConstantClass,TypeClass,ValType>::create(Ty, V); assert(Result->getType() == Ty && "Type specified is not correct!"); @@ -1151,11 +1209,12 @@ public: if (HasLargeKey) // Remember the reverse mapping if needed. InverseMap.insert(std::make_pair(Result, I)); - - // If the type of the constant is abstract, make sure that an entry exists - // for it in the AbstractTypeMap. + + // If the type of the constant is abstract, make sure that an entry + // exists for it in the AbstractTypeMap. if (Ty->isAbstract()) { - typename AbstractTypeMapTy::iterator TI = AbstractTypeMap.find(Ty); + typename AbstractTypeMapTy::iterator TI = + AbstractTypeMap.find(Ty); if (TI == AbstractTypeMap.end()) { // Add ourselves to the ATU list of the type. @@ -1164,10 +1223,33 @@ public: AbstractTypeMap.insert(TI, std::make_pair(Ty, I)); } } + + return Result; + } +public: + + /// getOrCreate - Return the specified constant from the map, creating it if + /// necessary. + ConstantClass *getOrCreate(const TypeClass *Ty, const ValType &V) { + sys::SmartScopedLock<true> Lock(&ValueMapLock); + MapKey Lookup(Ty, V); + ConstantClass* Result = 0; + + typename MapTy::iterator I = Map.find(Lookup); + // Is it in the map? + if (I != Map.end()) + Result = static_cast<ConstantClass *>(I->second); + + if (!Result) { + // If no preexisting value, create one now... + Result = Create(Ty, V, I); + } + return Result; } void remove(ConstantClass *CP) { + sys::SmartScopedLock<true> Lock(&ValueMapLock); typename MapTy::iterator I = FindExistingElement(CP); assert(I != Map.end() && "Constant not found in constant table!"); assert(I->second == CP && "Didn't find correct element?"); @@ -1221,6 +1303,8 @@ public: /// MoveConstantToNewSlot - If we are about to change C to be the element /// specified by I, update our internal data structures to reflect this /// fact. + /// NOTE: This function is not locked. It is the responsibility of the + /// caller to enforce proper synchronization if using this method. void MoveConstantToNewSlot(ConstantClass *C, typename MapTy::iterator I) { // First, remove the old location of the specified constant in the map. typename MapTy::iterator OldI = FindExistingElement(C); @@ -1250,6 +1334,7 @@ public: } void refineAbstractType(const DerivedType *OldTy, const Type *NewTy) { + sys::SmartScopedLock<true> Lock(&ValueMapLock); typename AbstractTypeMapTy::iterator I = AbstractTypeMap.find(cast<Type>(OldTy)); @@ -1314,12 +1399,15 @@ static char getValType(ConstantAggregateZero *CPZ) { return 0; } ConstantAggregateZero *ConstantAggregateZero::get(const Type *Ty) { assert((isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) && "Cannot create an aggregate zero of non-aggregate type!"); + + // Implicitly locked. return AggZeroConstants->getOrCreate(Ty, 0); } /// destroyConstant - Remove the constant from the constant table... /// void ConstantAggregateZero::destroyConstant() { + // Implicitly locked. AggZeroConstants->remove(this); destroyConstantImpl(); } @@ -1359,18 +1447,24 @@ Constant *ConstantArray::get(const ArrayType *Ty, // If this is an all-zero array, return a ConstantAggregateZero object if (!V.empty()) { Constant *C = V[0]; - if (!C->isNullValue()) + if (!C->isNullValue()) { + // Implicitly locked. return ArrayConstants->getOrCreate(Ty, V); + } for (unsigned i = 1, e = V.size(); i != e; ++i) - if (V[i] != C) + if (V[i] != C) { + // Implicitly locked. return ArrayConstants->getOrCreate(Ty, V); + } } + return ConstantAggregateZero::get(Ty); } /// destroyConstant - Remove the constant from the constant table... /// void ConstantArray::destroyConstant() { + // Implicitly locked. ArrayConstants->remove(this); destroyConstantImpl(); } @@ -1482,6 +1576,7 @@ Constant *ConstantStruct::get(const StructType *Ty, // Create a ConstantAggregateZero value if all elements are zeros... for (unsigned i = 0, e = V.size(); i != e; ++i) if (!V[i]->isNullValue()) + // Implicitly locked. return StructConstants->getOrCreate(Ty, V); return ConstantAggregateZero::get(Ty); @@ -1498,6 +1593,7 @@ Constant *ConstantStruct::get(const std::vector<Constant*> &V, bool packed) { // destroyConstant - Remove the constant from the constant table... // void ConstantStruct::destroyConstant() { + // Implicitly locked. StructConstants->remove(this); destroyConstantImpl(); } @@ -1552,6 +1648,8 @@ Constant *ConstantVector::get(const VectorType *Ty, return ConstantAggregateZero::get(Ty); if (isUndef) return UndefValue::get(Ty); + + // Implicitly locked. return VectorConstants->getOrCreate(Ty, V); } @@ -1563,6 +1661,7 @@ Constant *ConstantVector::get(const std::vector<Constant*> &V) { // destroyConstant - Remove the constant from the constant table... // void ConstantVector::destroyConstant() { + // Implicitly locked. VectorConstants->remove(this); destroyConstantImpl(); } @@ -1627,12 +1726,14 @@ static char getValType(ConstantPointerNull *) { ConstantPointerNull *ConstantPointerNull::get(const PointerType *Ty) { + // Implicitly locked. return NullPtrConstants->getOrCreate(Ty, 0); } // destroyConstant - Remove the constant from the constant table... // void ConstantPointerNull::destroyConstant() { + // Implicitly locked. NullPtrConstants->remove(this); destroyConstantImpl(); } @@ -1670,12 +1771,14 @@ static char getValType(UndefValue *) { UndefValue *UndefValue::get(const Type *Ty) { + // Implicitly locked. return UndefValueConstants->getOrCreate(Ty, 0); } // destroyConstant - Remove the constant from the constant table. // void UndefValue::destroyConstant() { + // Implicitly locked. UndefValueConstants->remove(this); destroyConstantImpl(); } @@ -1690,15 +1793,18 @@ MDString::MDString(const char *begin, const char *end) static ManagedStatic<StringMap<MDString*> > MDStringCache; MDString *MDString::get(const char *StrBegin, const char *StrEnd) { - StringMapEntry<MDString *> &Entry = MDStringCache->GetOrCreateValue(StrBegin, - StrEnd); + sys::SmartScopedWriter<true> Writer(&*ConstantsLock); + StringMapEntry<MDString *> &Entry = MDStringCache->GetOrCreateValue( + StrBegin, StrEnd); MDString *&S = Entry.getValue(); if (!S) S = new MDString(Entry.getKeyData(), Entry.getKeyData() + Entry.getKeyLength()); + return S; } void MDString::destroyConstant() { + sys::SmartScopedWriter<true> Writer(&*ConstantsLock); MDStringCache->erase(MDStringCache->find(StrBegin, StrEnd)); destroyConstantImpl(); } @@ -1724,18 +1830,27 @@ MDNode *MDNode::get(Value*const* Vals, unsigned NumVals) { for (unsigned i = 0; i != NumVals; ++i) ID.AddPointer(Vals[i]); + ConstantsLock->reader_acquire(); void *InsertPoint; - if (MDNode *N = MDNodeSet->FindNodeOrInsertPos(ID, InsertPoint)) - return N; - - // InsertPoint will have been set by the FindNodeOrInsertPos call. - MDNode *N = new(0) MDNode(Vals, NumVals); - MDNodeSet->InsertNode(N, InsertPoint); + MDNode *N = MDNodeSet->FindNodeOrInsertPos(ID, InsertPoint); + ConstantsLock->reader_release(); + + if (!N) { + sys::SmartScopedWriter<true> Writer(&*ConstantsLock); + N = MDNodeSet->FindNodeOrInsertPos(ID, InsertPoint); + if (!N) { + // InsertPoint will have been set by the FindNodeOrInsertPos call. + N = new(0) MDNode(Vals, NumVals); + MDNodeSet->InsertNode(N, InsertPoint); + } + } return N; } void MDNode::destroyConstant() { + sys::SmartScopedWriter<true> Writer(&*ConstantsLock); MDNodeSet->RemoveNode(this); + destroyConstantImpl(); } @@ -1902,6 +2017,8 @@ static inline Constant *getFoldedCast( // Look up the constant in the table first to ensure uniqueness std::vector<Constant*> argVec(1, C); ExprMapKeyType Key(opc, argVec); + + // Implicitly locked. return ExprConstants->getOrCreate(Ty, Key); } @@ -1932,19 +2049,19 @@ Constant *ConstantExpr::getCast(unsigned oc, Constant *C, const Type *Ty) { } Constant *ConstantExpr::getZExtOrBitCast(Constant *C, const Type *Ty) { - if (C->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits()) + if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits()) return getCast(Instruction::BitCast, C, Ty); return getCast(Instruction::ZExt, C, Ty); } Constant *ConstantExpr::getSExtOrBitCast(Constant *C, const Type *Ty) { - if (C->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits()) + if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits()) return getCast(Instruction::BitCast, C, Ty); return getCast(Instruction::SExt, C, Ty); } Constant *ConstantExpr::getTruncOrBitCast(Constant *C, const Type *Ty) { - if (C->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits()) + if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits()) return getCast(Instruction::BitCast, C, Ty); return getCast(Instruction::Trunc, C, Ty); } @@ -1960,9 +2077,10 @@ Constant *ConstantExpr::getPointerCast(Constant *S, const Type *Ty) { Constant *ConstantExpr::getIntegerCast(Constant *C, const Type *Ty, bool isSigned) { - assert(C->getType()->isInteger() && Ty->isInteger() && "Invalid cast"); - unsigned SrcBits = C->getType()->getPrimitiveSizeInBits(); - unsigned DstBits = Ty->getPrimitiveSizeInBits(); + assert(C->getType()->isIntOrIntVector() && + Ty->isIntOrIntVector() && "Invalid cast"); + unsigned SrcBits = C->getType()->getScalarSizeInBits(); + unsigned DstBits = Ty->getScalarSizeInBits(); Instruction::CastOps opcode = (SrcBits == DstBits ? Instruction::BitCast : (SrcBits > DstBits ? Instruction::Trunc : @@ -1971,10 +2089,10 @@ Constant *ConstantExpr::getIntegerCast(Constant *C, const Type *Ty, } Constant *ConstantExpr::getFPCast(Constant *C, const Type *Ty) { - assert(C->getType()->isFloatingPoint() && Ty->isFloatingPoint() && + assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() && "Invalid cast"); - unsigned SrcBits = C->getType()->getPrimitiveSizeInBits(); - unsigned DstBits = Ty->getPrimitiveSizeInBits(); + unsigned SrcBits = C->getType()->getScalarSizeInBits(); + unsigned DstBits = Ty->getScalarSizeInBits(); if (SrcBits == DstBits) return C; // Avoid a useless cast Instruction::CastOps opcode = @@ -1983,42 +2101,67 @@ Constant *ConstantExpr::getFPCast(Constant *C, const Type *Ty) { } Constant *ConstantExpr::getTrunc(Constant *C, const Type *Ty) { - assert(C->getType()->isInteger() && "Trunc operand must be integer"); - assert(Ty->isInteger() && "Trunc produces only integral"); - assert(C->getType()->getPrimitiveSizeInBits() > Ty->getPrimitiveSizeInBits()&& +#ifndef NDEBUG + bool fromVec = C->getType()->getTypeID() == Type::VectorTyID; + bool toVec = Ty->getTypeID() == Type::VectorTyID; +#endif + assert((fromVec == toVec) && "Cannot convert from scalar to/from vector"); + assert(C->getType()->isIntOrIntVector() && "Trunc operand must be integer"); + assert(Ty->isIntOrIntVector() && "Trunc produces only integral"); + assert(C->getType()->getScalarSizeInBits() > Ty->getScalarSizeInBits()&& "SrcTy must be larger than DestTy for Trunc!"); return getFoldedCast(Instruction::Trunc, C, Ty); } Constant *ConstantExpr::getSExt(Constant *C, const Type *Ty) { - assert(C->getType()->isInteger() && "SEXt operand must be integral"); - assert(Ty->isInteger() && "SExt produces only integer"); - assert(C->getType()->getPrimitiveSizeInBits() < Ty->getPrimitiveSizeInBits()&& +#ifndef NDEBUG + bool fromVec = C->getType()->getTypeID() == Type::VectorTyID; + bool toVec = Ty->getTypeID() == Type::VectorTyID; +#endif + assert((fromVec == toVec) && "Cannot convert from scalar to/from vector"); + assert(C->getType()->isIntOrIntVector() && "SExt operand must be integral"); + assert(Ty->isIntOrIntVector() && "SExt produces only integer"); + assert(C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&& "SrcTy must be smaller than DestTy for SExt!"); return getFoldedCast(Instruction::SExt, C, Ty); } Constant *ConstantExpr::getZExt(Constant *C, const Type *Ty) { - assert(C->getType()->isInteger() && "ZEXt operand must be integral"); - assert(Ty->isInteger() && "ZExt produces only integer"); - assert(C->getType()->getPrimitiveSizeInBits() < Ty->getPrimitiveSizeInBits()&& +#ifndef NDEBUG + bool fromVec = C->getType()->getTypeID() == Type::VectorTyID; + bool toVec = Ty->getTypeID() == Type::VectorTyID; +#endif + assert((fromVec == toVec) && "Cannot convert from scalar to/from vector"); + assert(C->getType()->isIntOrIntVector() && "ZEXt operand must be integral"); + assert(Ty->isIntOrIntVector() && "ZExt produces only integer"); + assert(C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&& "SrcTy must be smaller than DestTy for ZExt!"); return getFoldedCast(Instruction::ZExt, C, Ty); } Constant *ConstantExpr::getFPTrunc(Constant *C, const Type *Ty) { - assert(C->getType()->isFloatingPoint() && Ty->isFloatingPoint() && - C->getType()->getPrimitiveSizeInBits() > Ty->getPrimitiveSizeInBits()&& +#ifndef NDEBUG + bool fromVec = C->getType()->getTypeID() == Type::VectorTyID; + bool toVec = Ty->getTypeID() == Type::VectorTyID; +#endif + assert((fromVec == toVec) && "Cannot convert from scalar to/from vector"); + assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() && + C->getType()->getScalarSizeInBits() > Ty->getScalarSizeInBits()&& "This is an illegal floating point truncation!"); return getFoldedCast(Instruction::FPTrunc, C, Ty); } Constant *ConstantExpr::getFPExtend(Constant *C, const Type *Ty) { - assert(C->getType()->isFloatingPoint() && Ty->isFloatingPoint() && - C->getType()->getPrimitiveSizeInBits() < Ty->getPrimitiveSizeInBits()&& +#ifndef NDEBUG + bool fromVec = C->getType()->getTypeID() == Type::VectorTyID; + bool toVec = Ty->getTypeID() == Type::VectorTyID; +#endif + assert((fromVec == toVec) && "Cannot convert from scalar to/from vector"); + assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() && + C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&& "This is an illegal floating point extension!"); return getFoldedCast(Instruction::FPExt, C, Ty); } @@ -2136,6 +2279,8 @@ Constant *ConstantExpr::getTy(const Type *ReqTy, unsigned Opcode, std::vector<Constant*> argVec(1, C1); argVec.push_back(C2); ExprMapKeyType Key(Opcode, argVec); + + // Implicitly locked. return ExprConstants->getOrCreate(ReqTy, Key); } @@ -2188,34 +2333,30 @@ Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2) { case Instruction::UDiv: case Instruction::SDiv: assert(C1->getType() == C2->getType() && "Op types should be identical!"); - assert((C1->getType()->isInteger() || (isa<VectorType>(C1->getType()) && - cast<VectorType>(C1->getType())->getElementType()->isInteger())) && + assert(C1->getType()->isIntOrIntVector() && "Tried to create an arithmetic operation on a non-arithmetic type!"); break; case Instruction::FDiv: assert(C1->getType() == C2->getType() && "Op types should be identical!"); - assert((C1->getType()->isFloatingPoint() || (isa<VectorType>(C1->getType()) - && cast<VectorType>(C1->getType())->getElementType()->isFloatingPoint())) - && "Tried to create an arithmetic operation on a non-arithmetic type!"); + assert(C1->getType()->isFPOrFPVector() && + "Tried to create an arithmetic operation on a non-arithmetic type!"); break; case Instruction::URem: case Instruction::SRem: assert(C1->getType() == C2->getType() && "Op types should be identical!"); - assert((C1->getType()->isInteger() || (isa<VectorType>(C1->getType()) && - cast<VectorType>(C1->getType())->getElementType()->isInteger())) && + assert(C1->getType()->isIntOrIntVector() && "Tried to create an arithmetic operation on a non-arithmetic type!"); break; case Instruction::FRem: assert(C1->getType() == C2->getType() && "Op types should be identical!"); - assert((C1->getType()->isFloatingPoint() || (isa<VectorType>(C1->getType()) - && cast<VectorType>(C1->getType())->getElementType()->isFloatingPoint())) - && "Tried to create an arithmetic operation on a non-arithmetic type!"); + assert(C1->getType()->isFPOrFPVector() && + "Tried to create an arithmetic operation on a non-arithmetic type!"); break; case Instruction::And: case Instruction::Or: case Instruction::Xor: assert(C1->getType() == C2->getType() && "Op types should be identical!"); - assert((C1->getType()->isInteger() || isa<VectorType>(C1->getType())) && + assert(C1->getType()->isIntOrIntVector() && "Tried to create a logical operation on a non-integral type!"); break; case Instruction::Shl: @@ -2251,6 +2392,8 @@ Constant *ConstantExpr::getSelectTy(const Type *ReqTy, Constant *C, argVec[1] = V1; argVec[2] = V2; ExprMapKeyType Key(Instruction::Select, argVec); + + // Implicitly locked. return ExprConstants->getOrCreate(ReqTy, Key); } @@ -2274,6 +2417,8 @@ Constant *ConstantExpr::getGetElementPtrTy(const Type *ReqTy, Constant *C, for (unsigned i = 0; i != NumIdx; ++i) ArgVec.push_back(cast<Constant>(Idxs[i])); const ExprMapKeyType Key(Instruction::GetElementPtr, ArgVec); + + // Implicitly locked. return ExprConstants->getOrCreate(ReqTy, Key); } @@ -2308,6 +2453,8 @@ ConstantExpr::getICmp(unsigned short pred, Constant* LHS, Constant* RHS) { ArgVec.push_back(RHS); // Get the key type with both the opcode and predicate const ExprMapKeyType Key(Instruction::ICmp, ArgVec, pred); + + // Implicitly locked. return ExprConstants->getOrCreate(Type::Int1Ty, Key); } @@ -2325,6 +2472,8 @@ ConstantExpr::getFCmp(unsigned short pred, Constant* LHS, Constant* RHS) { ArgVec.push_back(RHS); // Get the key type with both the opcode and predicate const ExprMapKeyType Key(Instruction::FCmp, ArgVec, pred); + + // Implicitly locked. return ExprConstants->getOrCreate(Type::Int1Ty, Key); } @@ -2370,6 +2519,8 @@ ConstantExpr::getVICmp(unsigned short pred, Constant* LHS, Constant* RHS) { ArgVec.push_back(RHS); // Get the key type with both the opcode and predicate const ExprMapKeyType Key(Instruction::VICmp, ArgVec, pred); + + // Implicitly locked. return ExprConstants->getOrCreate(LHS->getType(), Key); } @@ -2417,6 +2568,8 @@ ConstantExpr::getVFCmp(unsigned short pred, Constant* LHS, Constant* RHS) { ArgVec.push_back(RHS); // Get the key type with both the opcode and predicate const ExprMapKeyType Key(Instruction::VFCmp, ArgVec, pred); + + // Implicitly locked. return ExprConstants->getOrCreate(ResultTy, Key); } @@ -2428,6 +2581,8 @@ Constant *ConstantExpr::getExtractElementTy(const Type *ReqTy, Constant *Val, std::vector<Constant*> ArgVec(1, Val); ArgVec.push_back(Idx); const ExprMapKeyType Key(Instruction::ExtractElement,ArgVec); + + // Implicitly locked. return ExprConstants->getOrCreate(ReqTy, Key); } @@ -2449,6 +2604,8 @@ Constant *ConstantExpr::getInsertElementTy(const Type *ReqTy, Constant *Val, ArgVec.push_back(Elt); ArgVec.push_back(Idx); const ExprMapKeyType Key(Instruction::InsertElement,ArgVec); + + // Implicitly locked. return ExprConstants->getOrCreate(ReqTy, Key); } @@ -2472,6 +2629,8 @@ Constant *ConstantExpr::getShuffleVectorTy(const Type *ReqTy, Constant *V1, ArgVec.push_back(V2); ArgVec.push_back(Mask); const ExprMapKeyType Key(Instruction::ShuffleVector,ArgVec); + + // Implicitly locked. return ExprConstants->getOrCreate(ReqTy, Key); } @@ -2555,6 +2714,7 @@ Constant *ConstantExpr::getZeroValueForNegationExpr(const Type *Ty) { // destroyConstant - Remove the constant from the constant table... // void ConstantExpr::destroyConstant() { + // Implicitly locked. ExprConstants->remove(this); destroyConstantImpl(); } @@ -2619,6 +2779,7 @@ void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To, Replacement = ConstantAggregateZero::get(getType()); } else { // Check to see if we have this array type already. + sys::SmartScopedWriter<true> Writer(&*ConstantsLock); bool Exists; ArrayConstantsTy::MapTy::iterator I = ArrayConstants->InsertOrGetItem(Lookup, Exists); @@ -2694,6 +2855,7 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To, Replacement = ConstantAggregateZero::get(getType()); } else { // Check to see if we have this array type already. + sys::SmartScopedWriter<true> Writer(&*ConstantsLock); bool Exists; StructConstantsTy::MapTy::iterator I = StructConstants->InsertOrGetItem(Lookup, Exists); diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp index 54bd895..0450566 100644 --- a/lib/VMCore/Function.cpp +++ b/lib/VMCore/Function.cpp @@ -16,7 +16,10 @@ #include "llvm/IntrinsicInst.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/Support/LeakDetector.h" +#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/StringPool.h" +#include "llvm/System/RWMutex.h" +#include "llvm/System/Threading.h" #include "SymbolTableListTraitsImpl.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringExtras.h" @@ -230,17 +233,21 @@ void Function::removeAttribute(unsigned i, Attributes attr) { // use GC. static DenseMap<const Function*,PooledStringPtr> *GCNames; static StringPool *GCNamePool; +static ManagedStatic<sys::SmartRWMutex<true> > GCLock; bool Function::hasGC() const { + sys::SmartScopedReader<true> Reader(&*GCLock); return GCNames && GCNames->count(this); } const char *Function::getGC() const { assert(hasGC() && "Function has no collector"); + sys::SmartScopedReader<true> Reader(&*GCLock); return *(*GCNames)[this]; } void Function::setGC(const char *Str) { + sys::SmartScopedWriter<true> Writer(&*GCLock); if (!GCNamePool) GCNamePool = new StringPool(); if (!GCNames) @@ -249,6 +256,7 @@ void Function::setGC(const char *Str) { } void Function::clearGC() { + sys::SmartScopedWriter<true> Writer(&*GCLock); if (GCNames) { GCNames->erase(this); if (GCNames->empty()) { diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp index 4c228fe..6a6424d 100644 --- a/lib/VMCore/Instructions.cpp +++ b/lib/VMCore/Instructions.cpp @@ -1310,7 +1310,7 @@ bool InsertElementInst::isValidOperands(const Value *Vec, const Value *Elt, return false;// Second operand of insertelement must be vector element type. if (Index->getType() != Type::Int32Ty) - return false; // Third operand of insertelement must be uint. + return false; // Third operand of insertelement must be i32. return true; } @@ -1576,9 +1576,8 @@ void BinaryOperator::init(BinaryOps iType) { case FDiv: assert(getType() == LHS->getType() && "Arithmetic operation should return same type as operands!"); - assert((getType()->isFloatingPoint() || (isa<VectorType>(getType()) && - cast<VectorType>(getType())->getElementType()->isFloatingPoint())) - && "Incorrect operand type (not floating point) for FDIV"); + assert(getType()->isFPOrFPVector() && + "Incorrect operand type (not floating point) for FDIV"); break; case URem: case SRem: @@ -1591,9 +1590,8 @@ void BinaryOperator::init(BinaryOps iType) { case FRem: assert(getType() == LHS->getType() && "Arithmetic operation should return same type as operands!"); - assert((getType()->isFloatingPoint() || (isa<VectorType>(getType()) && - cast<VectorType>(getType())->getElementType()->isFloatingPoint())) - && "Incorrect operand type (not floating point) for FREM"); + assert(getType()->isFPOrFPVector() && + "Incorrect operand type (not floating point) for FREM"); break; case Shl: case LShr: @@ -1837,11 +1835,11 @@ bool CastInst::isNoopCast(const Type *IntPtrTy) const { case Instruction::BitCast: return true; // BitCast never modifies bits. case Instruction::PtrToInt: - return IntPtrTy->getPrimitiveSizeInBits() == - getType()->getPrimitiveSizeInBits(); + return IntPtrTy->getScalarSizeInBits() == + getType()->getScalarSizeInBits(); case Instruction::IntToPtr: - return IntPtrTy->getPrimitiveSizeInBits() == - getOperand(0)->getType()->getPrimitiveSizeInBits(); + return IntPtrTy->getScalarSizeInBits() == + getOperand(0)->getType()->getScalarSizeInBits(); } } @@ -1880,8 +1878,8 @@ unsigned CastInst::isEliminableCastPair( // BITCONVERT = FirstClass n/a FirstClass n/a // // NOTE: some transforms are safe, but we consider them to be non-profitable. - // For example, we could merge "fptoui double to uint" + "zext uint to ulong", - // into "fptoui double to ulong", but this loses information about the range + // For example, we could merge "fptoui double to i32" + "zext i32 to i64", + // into "fptoui double to i64", but this loses information about the range // of the produced value (we no longer know the top-part is all zeros). // Further this conversion is often much more expensive for typical hardware, // and causes issues when building libgcc. We disallow fptosi+sext for the @@ -1946,8 +1944,8 @@ unsigned CastInst::isEliminableCastPair( return 0; case 7: { // ptrtoint, inttoptr -> bitcast (ptr -> ptr) if int size is >= ptr size - unsigned PtrSize = IntPtrTy->getPrimitiveSizeInBits(); - unsigned MidSize = MidTy->getPrimitiveSizeInBits(); + unsigned PtrSize = IntPtrTy->getScalarSizeInBits(); + unsigned MidSize = MidTy->getScalarSizeInBits(); if (MidSize >= PtrSize) return Instruction::BitCast; return 0; @@ -1956,8 +1954,8 @@ unsigned CastInst::isEliminableCastPair( // ext, trunc -> bitcast, if the SrcTy and DstTy are same size // ext, trunc -> ext, if sizeof(SrcTy) < sizeof(DstTy) // ext, trunc -> trunc, if sizeof(SrcTy) > sizeof(DstTy) - unsigned SrcSize = SrcTy->getPrimitiveSizeInBits(); - unsigned DstSize = DstTy->getPrimitiveSizeInBits(); + unsigned SrcSize = SrcTy->getScalarSizeInBits(); + unsigned DstSize = DstTy->getScalarSizeInBits(); if (SrcSize == DstSize) return Instruction::BitCast; else if (SrcSize < DstSize) @@ -1985,9 +1983,9 @@ unsigned CastInst::isEliminableCastPair( return 0; case 13: { // inttoptr, ptrtoint -> bitcast if SrcSize<=PtrSize and SrcSize==DstSize - unsigned PtrSize = IntPtrTy->getPrimitiveSizeInBits(); - unsigned SrcSize = SrcTy->getPrimitiveSizeInBits(); - unsigned DstSize = DstTy->getPrimitiveSizeInBits(); + unsigned PtrSize = IntPtrTy->getScalarSizeInBits(); + unsigned SrcSize = SrcTy->getScalarSizeInBits(); + unsigned DstSize = DstTy->getScalarSizeInBits(); if (SrcSize <= PtrSize && SrcSize == DstSize) return Instruction::BitCast; return 0; @@ -2051,7 +2049,7 @@ CastInst *CastInst::Create(Instruction::CastOps op, Value *S, const Type *Ty, CastInst *CastInst::CreateZExtOrBitCast(Value *S, const Type *Ty, const std::string &Name, Instruction *InsertBefore) { - if (S->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits()) + if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits()) return Create(Instruction::BitCast, S, Ty, Name, InsertBefore); return Create(Instruction::ZExt, S, Ty, Name, InsertBefore); } @@ -2059,7 +2057,7 @@ CastInst *CastInst::CreateZExtOrBitCast(Value *S, const Type *Ty, CastInst *CastInst::CreateZExtOrBitCast(Value *S, const Type *Ty, const std::string &Name, BasicBlock *InsertAtEnd) { - if (S->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits()) + if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits()) return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd); return Create(Instruction::ZExt, S, Ty, Name, InsertAtEnd); } @@ -2067,7 +2065,7 @@ CastInst *CastInst::CreateZExtOrBitCast(Value *S, const Type *Ty, CastInst *CastInst::CreateSExtOrBitCast(Value *S, const Type *Ty, const std::string &Name, Instruction *InsertBefore) { - if (S->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits()) + if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits()) return Create(Instruction::BitCast, S, Ty, Name, InsertBefore); return Create(Instruction::SExt, S, Ty, Name, InsertBefore); } @@ -2075,7 +2073,7 @@ CastInst *CastInst::CreateSExtOrBitCast(Value *S, const Type *Ty, CastInst *CastInst::CreateSExtOrBitCast(Value *S, const Type *Ty, const std::string &Name, BasicBlock *InsertAtEnd) { - if (S->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits()) + if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits()) return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd); return Create(Instruction::SExt, S, Ty, Name, InsertAtEnd); } @@ -2083,7 +2081,7 @@ CastInst *CastInst::CreateSExtOrBitCast(Value *S, const Type *Ty, CastInst *CastInst::CreateTruncOrBitCast(Value *S, const Type *Ty, const std::string &Name, Instruction *InsertBefore) { - if (S->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits()) + if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits()) return Create(Instruction::BitCast, S, Ty, Name, InsertBefore); return Create(Instruction::Trunc, S, Ty, Name, InsertBefore); } @@ -2091,7 +2089,7 @@ CastInst *CastInst::CreateTruncOrBitCast(Value *S, const Type *Ty, CastInst *CastInst::CreateTruncOrBitCast(Value *S, const Type *Ty, const std::string &Name, BasicBlock *InsertAtEnd) { - if (S->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits()) + if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits()) return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd); return Create(Instruction::Trunc, S, Ty, Name, InsertAtEnd); } @@ -2125,8 +2123,8 @@ CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty, bool isSigned, const std::string &Name, Instruction *InsertBefore) { assert(C->getType()->isInteger() && Ty->isInteger() && "Invalid cast"); - unsigned SrcBits = C->getType()->getPrimitiveSizeInBits(); - unsigned DstBits = Ty->getPrimitiveSizeInBits(); + unsigned SrcBits = C->getType()->getScalarSizeInBits(); + unsigned DstBits = Ty->getScalarSizeInBits(); Instruction::CastOps opcode = (SrcBits == DstBits ? Instruction::BitCast : (SrcBits > DstBits ? Instruction::Trunc : @@ -2137,9 +2135,10 @@ CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty, CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty, bool isSigned, const std::string &Name, BasicBlock *InsertAtEnd) { - assert(C->getType()->isInteger() && Ty->isInteger() && "Invalid cast"); - unsigned SrcBits = C->getType()->getPrimitiveSizeInBits(); - unsigned DstBits = Ty->getPrimitiveSizeInBits(); + assert(C->getType()->isIntOrIntVector() && Ty->isIntOrIntVector() && + "Invalid cast"); + unsigned SrcBits = C->getType()->getScalarSizeInBits(); + unsigned DstBits = Ty->getScalarSizeInBits(); Instruction::CastOps opcode = (SrcBits == DstBits ? Instruction::BitCast : (SrcBits > DstBits ? Instruction::Trunc : @@ -2150,10 +2149,10 @@ CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty, CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty, const std::string &Name, Instruction *InsertBefore) { - assert(C->getType()->isFloatingPoint() && Ty->isFloatingPoint() && + assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() && "Invalid cast"); - unsigned SrcBits = C->getType()->getPrimitiveSizeInBits(); - unsigned DstBits = Ty->getPrimitiveSizeInBits(); + unsigned SrcBits = C->getType()->getScalarSizeInBits(); + unsigned DstBits = Ty->getScalarSizeInBits(); Instruction::CastOps opcode = (SrcBits == DstBits ? Instruction::BitCast : (SrcBits > DstBits ? Instruction::FPTrunc : Instruction::FPExt)); @@ -2163,10 +2162,10 @@ CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty, CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty, const std::string &Name, BasicBlock *InsertAtEnd) { - assert(C->getType()->isFloatingPoint() && Ty->isFloatingPoint() && + assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() && "Invalid cast"); - unsigned SrcBits = C->getType()->getPrimitiveSizeInBits(); - unsigned DstBits = Ty->getPrimitiveSizeInBits(); + unsigned SrcBits = C->getType()->getScalarSizeInBits(); + unsigned DstBits = Ty->getScalarSizeInBits(); Instruction::CastOps opcode = (SrcBits == DstBits ? Instruction::BitCast : (SrcBits > DstBits ? Instruction::FPTrunc : Instruction::FPExt)); @@ -2183,8 +2182,8 @@ bool CastInst::isCastable(const Type *SrcTy, const Type *DestTy) { return true; // Get the bit sizes, we'll need these - unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); // 0 for ptr/vector - unsigned DestBits = DestTy->getPrimitiveSizeInBits(); // 0 for ptr/vector + unsigned SrcBits = SrcTy->getScalarSizeInBits(); // 0 for ptr + unsigned DestBits = DestTy->getScalarSizeInBits(); // 0 for ptr // Run through the possibilities ... if (DestTy->isInteger()) { // Casting to integral @@ -2242,8 +2241,8 @@ CastInst::getCastOpcode( const Value *Src, bool SrcIsSigned, const Type *DestTy, bool DestIsSigned) { // Get the bit sizes, we'll need these const Type *SrcTy = Src->getType(); - unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); // 0 for ptr/vector - unsigned DestBits = DestTy->getPrimitiveSizeInBits(); // 0 for ptr/vector + unsigned SrcBits = SrcTy->getScalarSizeInBits(); // 0 for ptr + unsigned DestBits = DestTy->getScalarSizeInBits(); // 0 for ptr assert(SrcTy->isFirstClassType() && DestTy->isFirstClassType() && "Only first class types are castable!"); @@ -2344,8 +2343,8 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, const Type *DstTy) { return false; // Get the size of the types in bits, we'll need this later - unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits(); - unsigned DstBitSize = DstTy->getPrimitiveSizeInBits(); + unsigned SrcBitSize = SrcTy->getScalarSizeInBits(); + unsigned DstBitSize = DstTy->getScalarSizeInBits(); // Switch on the opcode provided switch (op) { @@ -2400,7 +2399,7 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, const Type *DstTy) { // Now we know we're not dealing with a pointer/non-pointer mismatch. In all // these cases, the cast is okay if the source and destination bit widths // are identical. - return SrcBitSize == DstBitSize; + return SrcTy->getPrimitiveSizeInBits() == DstTy->getPrimitiveSizeInBits(); } } diff --git a/lib/VMCore/LeakDetector.cpp b/lib/VMCore/LeakDetector.cpp index 1bf9171..b5926bc 100644 --- a/lib/VMCore/LeakDetector.cpp +++ b/lib/VMCore/LeakDetector.cpp @@ -14,7 +14,10 @@ #include "llvm/Support/LeakDetector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Streams.h" +#include "llvm/System/RWMutex.h" +#include "llvm/System/Threading.h" #include "llvm/Value.h" using namespace llvm; @@ -29,16 +32,29 @@ namespace { static void print(const Value* P) { cerr << *P; } }; + ManagedStatic<sys::SmartRWMutex<true> > LeakDetectorLock; + template <typename T> struct VISIBILITY_HIDDEN LeakDetectorImpl { - explicit LeakDetectorImpl(const char* const name) : Cache(0), Name(name) { } + explicit LeakDetectorImpl(const char* const name = "") : + Cache(0), Name(name) { } + void clear() { + Cache = 0; + Ts.clear(); + } + + void setName(const char* n) { + Name = n; + } + // Because the most common usage pattern, by far, is to add a // garbage object, then remove it immediately, we optimize this // case. When an object is added, it is not added to the set // immediately, it is added to the CachedValue Value. If it is // immediately removed, no set search need be performed. void addGarbage(const T* o) { + sys::SmartScopedWriter<true> Writer(&*LeakDetectorLock); if (Cache) { assert(Ts.count(Cache) == 0 && "Object already in set!"); Ts.insert(Cache); @@ -47,6 +63,7 @@ namespace { } void removeGarbage(const T* o) { + sys::SmartScopedWriter<true> Writer(&*LeakDetectorLock); if (o == Cache) Cache = 0; // Cache hit else @@ -56,6 +73,7 @@ namespace { bool hasGarbage(const std::string& Message) { addGarbage(0); // Flush the Cache + sys::SmartScopedReader<true> Reader(&*LeakDetectorLock); assert(Cache == 0 && "No value should be cached anymore!"); if (!Ts.empty()) { @@ -70,58 +88,48 @@ namespace { return true; } + return false; } private: SmallPtrSet<const T*, 8> Ts; const T* Cache; - const char* const Name; + const char* Name; }; - static LeakDetectorImpl<void> *Objects; - static LeakDetectorImpl<Value> *LLVMObjects; - - static LeakDetectorImpl<void> &getObjects() { - if (Objects == 0) - Objects = new LeakDetectorImpl<void>("GENERIC"); - return *Objects; - } - - static LeakDetectorImpl<Value> &getLLVMObjects() { - if (LLVMObjects == 0) - LLVMObjects = new LeakDetectorImpl<Value>("LLVM"); - return *LLVMObjects; - } + static ManagedStatic<LeakDetectorImpl<void> > Objects; + static ManagedStatic<LeakDetectorImpl<Value> > LLVMObjects; static void clearGarbage() { - delete Objects; - delete LLVMObjects; - Objects = 0; - LLVMObjects = 0; + Objects->clear(); + LLVMObjects->clear(); } } void LeakDetector::addGarbageObjectImpl(void *Object) { - getObjects().addGarbage(Object); + Objects->addGarbage(Object); } void LeakDetector::addGarbageObjectImpl(const Value *Object) { - getLLVMObjects().addGarbage(Object); + LLVMObjects->addGarbage(Object); } void LeakDetector::removeGarbageObjectImpl(void *Object) { - getObjects().removeGarbage(Object); + Objects->removeGarbage(Object); } void LeakDetector::removeGarbageObjectImpl(const Value *Object) { - getLLVMObjects().removeGarbage(Object); + LLVMObjects->removeGarbage(Object); } void LeakDetector::checkForGarbageImpl(const std::string &Message) { + Objects->setName("GENERIC"); + LLVMObjects->setName("LLVM"); + // use non-short-circuit version so that both checks are performed - if (getObjects().hasGarbage(Message) | - getLLVMObjects().hasGarbage(Message)) + if (Objects->hasGarbage(Message) | + LLVMObjects->hasGarbage(Message)) cerr << "\nThis is probably because you removed an object, but didn't " << "delete it. Please check your code for memory leaks.\n"; diff --git a/lib/VMCore/Mangler.cpp b/lib/VMCore/Mangler.cpp index 0bd190a..1a68b89 100644 --- a/lib/VMCore/Mangler.cpp +++ b/lib/VMCore/Mangler.cpp @@ -14,6 +14,7 @@ #include "llvm/Support/Mangler.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" +#include "llvm/System/Atomic.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" @@ -164,8 +165,12 @@ std::string Mangler::getValueName(const GlobalValue *GV, const char * Suffix) { } else if (!GV->hasName()) { // Must mangle the global into a unique ID. unsigned TypeUniqueID = getTypeID(GV->getType()); - static unsigned GlobalID = 0; - Name = "__unnamed_" + utostr(TypeUniqueID) + "_" + utostr(GlobalID++); + static uint32_t GlobalID = 0; + + unsigned OldID = GlobalID; + sys::AtomicIncrement(&GlobalID); + + Name = "__unnamed_" + utostr(TypeUniqueID) + "_" + utostr(OldID); } else { if (GV->hasPrivateLinkage()) Name = makeNameProper(GV->getName() + Suffix, Prefix, PrivatePrefix); diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp index 6db5d7e..e943e31 100644 --- a/lib/VMCore/Pass.cpp +++ b/lib/VMCore/Pass.cpp @@ -19,6 +19,8 @@ #include "llvm/ModuleProvider.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/ManagedStatic.h" +#include "llvm/System/Atomic.h" +#include "llvm/System/Threading.h" #include <algorithm> #include <map> #include <set> @@ -192,8 +194,26 @@ static std::vector<PassRegistrationListener*> *Listeners = 0; // ressurection after llvm_shutdown is run. static PassRegistrar *getPassRegistrar() { static PassRegistrar *PassRegistrarObj = 0; - if (!PassRegistrarObj) + + // Use double-checked locking to safely initialize the registrar when + // we're running in multithreaded mode. + PassRegistrar* tmp = PassRegistrarObj; + if (llvm_is_multithreaded()) { + sys::MemoryFence(); + if (!tmp) { + llvm_acquire_global_lock(); + tmp = PassRegistrarObj; + if (!tmp) { + tmp = new PassRegistrar(); + sys::MemoryFence(); + PassRegistrarObj = tmp; + } + llvm_release_global_lock(); + } + } else if (!tmp) { PassRegistrarObj = new PassRegistrar(); + } + return PassRegistrarObj; } diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp index 4799915..86cf10e 100644 --- a/lib/VMCore/PassManager.cpp +++ b/lib/VMCore/PassManager.cpp @@ -20,6 +20,8 @@ #include "llvm/Support/Streams.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/System/Mutex.h" +#include "llvm/System/Threading.h" #include "llvm/Analysis/Dominators.h" #include "llvm-c/Core.h" #include <algorithm> @@ -355,6 +357,9 @@ namespace { /// amount of time each pass takes to execute. This only happens when /// -time-passes is enabled on the command line. /// + +static ManagedStatic<sys::SmartMutex<true> > TimingInfoMutex; + class VISIBILITY_HIDDEN TimingInfo { std::map<Pass*, Timer> TimingData; TimerGroup TG; @@ -379,15 +384,18 @@ public: if (dynamic_cast<PMDataManager *>(P)) return; + sys::SmartScopedLock<true> Lock(&*TimingInfoMutex); std::map<Pass*, Timer>::iterator I = TimingData.find(P); if (I == TimingData.end()) I=TimingData.insert(std::make_pair(P, Timer(P->getPassName(), TG))).first; I->second.startTimer(); } + void passEnded(Pass *P) { if (dynamic_cast<PMDataManager *>(P)) return; + sys::SmartScopedLock<true> Lock(&*TimingInfoMutex); std::map<Pass*, Timer>::iterator I = TimingData.find(P); assert(I != TimingData.end() && "passStarted/passEnded not nested right!"); I->second.stopTimer(); diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp index a1e6c42..5df7f12 100644 --- a/lib/VMCore/Type.cpp +++ b/lib/VMCore/Type.cpp @@ -23,6 +23,9 @@ #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/System/Mutex.h" +#include "llvm/System/RWMutex.h" +#include "llvm/System/Threading.h" #include <algorithm> #include <cstdarg> using namespace llvm; @@ -40,6 +43,14 @@ AbstractTypeUser::~AbstractTypeUser() {} // Type Class Implementation //===----------------------------------------------------------------------===// +// Reader/writer lock used for guarding access to the type maps. +static ManagedStatic<sys::SmartRWMutex<true> > TypeMapLock; + +// Recursive lock used for guarding access to AbstractTypeUsers. +// NOTE: The true template parameter means this will no-op when we're not in +// multithreaded mode. +static ManagedStatic<sys::SmartMutex<true> > AbstractTypeUsersLock; + // Concrete/Abstract TypeDescriptions - We lazily calculate type descriptions // for types as they are needed. Because resolution of types must invalidate // all of the abstract type descriptions, we keep them in a seperate map to make @@ -112,6 +123,14 @@ const Type *Type::getVAArgsPromotedType() const { return this; } +/// getScalarType - If this is a vector type, return the element type, +/// otherwise return this. +const Type *Type::getScalarType() const { + if (const VectorType *VTy = dyn_cast<VectorType>(this)) + return VTy->getElementType(); + return this; +} + /// isIntOrIntVector - Return true if this is an integer type or a vector of /// integer types. /// @@ -174,6 +193,28 @@ unsigned Type::getPrimitiveSizeInBits() const { } } +/// getScalarSizeInBits - If this is a vector type, return the +/// getPrimitiveSizeInBits value for the element type. Otherwise return the +/// getPrimitiveSizeInBits value for this type. +unsigned Type::getScalarSizeInBits() const { + return getScalarType()->getPrimitiveSizeInBits(); +} + +/// getFPMantissaWidth - Return the width of the mantissa of this type. This +/// is only valid on floating point types. If the FP type does not +/// have a stable mantissa (e.g. ppc long double), this method returns -1. +int Type::getFPMantissaWidth() const { + if (const VectorType *VTy = dyn_cast<VectorType>(this)) + return VTy->getElementType()->getFPMantissaWidth(); + assert(isFloatingPoint() && "Not a floating point type!"); + if (ID == FloatTyID) return 24; + if (ID == DoubleTyID) return 53; + if (ID == X86_FP80TyID) return 64; + if (ID == FP128TyID) return 113; + assert(ID == PPC_FP128TyID && "unknown fp type"); + return -1; +} + /// isSizedDerivedType - Derived types like structures and arrays are sized /// iff all of the members of the type are sized as well. Since asking for /// their size is relatively uncommon, move this operation out of line. @@ -414,8 +455,29 @@ void DerivedType::dropAllTypeUses() { if (NumContainedTys != 0) { // The type must stay abstract. To do this, we insert a pointer to a type // that will never get resolved, thus will always be abstract. - static Type *AlwaysOpaqueTy = OpaqueType::get(); - static PATypeHolder Holder(AlwaysOpaqueTy); + static Type *AlwaysOpaqueTy = 0; + static PATypeHolder* Holder = 0; + Type *tmp = AlwaysOpaqueTy; + if (llvm_is_multithreaded()) { + sys::MemoryFence(); + if (!tmp) { + llvm_acquire_global_lock(); + tmp = AlwaysOpaqueTy; + if (!tmp) { + tmp = OpaqueType::get(); + PATypeHolder* tmp2 = new PATypeHolder(AlwaysOpaqueTy); + sys::MemoryFence(); + AlwaysOpaqueTy = tmp; + Holder = tmp2; + } + + llvm_release_global_lock(); + } + } else { + AlwaysOpaqueTy = OpaqueType::get(); + Holder = new PATypeHolder(AlwaysOpaqueTy); + } + ContainedTys[0] = AlwaysOpaqueTy; // Change the rest of the types to be Int32Ty's. It doesn't matter what we @@ -818,7 +880,7 @@ public: // We already have this type in the table. Get rid of the newly refined // type. TypeClass *NewTy = cast<TypeClass>((Type*)I->second.get()); - Ty->refineAbstractTypeTo(NewTy); + Ty->unlockedRefineAbstractTypeTo(NewTy); return; } } else { @@ -854,7 +916,7 @@ public: } TypesByHash.erase(Entry); } - Ty->refineAbstractTypeTo(NewTy); + Ty->unlockedRefineAbstractTypeTo(NewTy); return; } } @@ -938,15 +1000,30 @@ const IntegerType *IntegerType::get(unsigned NumBits) { default: break; } - + IntegerValType IVT(NumBits); - IntegerType *ITy = IntegerTypes->get(IVT); - if (ITy) return ITy; // Found a match, return it! - - // Value not found. Derive a new type! - ITy = new IntegerType(NumBits); - IntegerTypes->add(IVT, ITy); - + IntegerType *ITy = 0; + + // First, see if the type is already in the table, for which + // a reader lock suffices. + TypeMapLock->reader_acquire(); + ITy = IntegerTypes->get(IVT); + TypeMapLock->reader_release(); + + if (!ITy) { + // OK, not in the table, get a writer lock. + sys::SmartScopedWriter<true> Writer(&*TypeMapLock); + ITy = IntegerTypes->get(IVT); + + // We need to _recheck_ the table in case someone + // put it in between when we released the reader lock + // and when we gained the writer lock! + if (!ITy) { + // Value not found. Derive a new type! + ITy = new IntegerType(NumBits); + IntegerTypes->add(IVT, ITy); + } + } #ifdef DEBUG_MERGE_TYPES DOUT << "Derived new type: " << *ITy << "\n"; #endif @@ -1010,14 +1087,26 @@ FunctionType *FunctionType::get(const Type *ReturnType, const std::vector<const Type*> &Params, bool isVarArg) { FunctionValType VT(ReturnType, Params, isVarArg); - FunctionType *FT = FunctionTypes->get(VT); - if (FT) - return FT; - - FT = (FunctionType*) operator new(sizeof(FunctionType) + - sizeof(PATypeHandle)*(Params.size()+1)); - new (FT) FunctionType(ReturnType, Params, isVarArg); - FunctionTypes->add(VT, FT); + FunctionType *FT = 0; + + TypeMapLock->reader_acquire(); + FT = FunctionTypes->get(VT); + TypeMapLock->reader_release(); + + if (!FT) { + sys::SmartScopedWriter<true> Writer(&*TypeMapLock); + + // Have to check again here, because it might have + // been inserted between when we release the reader + // lock and when we acquired the writer lock. + FT = FunctionTypes->get(VT); + if (!FT) { + FT = (FunctionType*) operator new(sizeof(FunctionType) + + sizeof(PATypeHandle)*(Params.size()+1)); + new (FT) FunctionType(ReturnType, Params, isVarArg); + FunctionTypes->add(VT, FT); + } + } #ifdef DEBUG_MERGE_TYPES DOUT << "Derived new type: " << FT << "\n"; @@ -1049,20 +1138,30 @@ public: } }; } -static ManagedStatic<TypeMap<ArrayValType, ArrayType> > ArrayTypes; +static ManagedStatic<TypeMap<ArrayValType, ArrayType> > ArrayTypes; ArrayType *ArrayType::get(const Type *ElementType, uint64_t NumElements) { assert(ElementType && "Can't get array of <null> types!"); assert(isValidElementType(ElementType) && "Invalid type for array element!"); ArrayValType AVT(ElementType, NumElements); - ArrayType *AT = ArrayTypes->get(AVT); - if (AT) return AT; // Found a match, return it! - - // Value not found. Derive a new type! - ArrayTypes->add(AVT, AT = new ArrayType(ElementType, NumElements)); - + ArrayType *AT = 0; + + TypeMapLock->reader_acquire(); + AT = ArrayTypes->get(AVT); + TypeMapLock->reader_release(); + + if (!AT) { + sys::SmartScopedWriter<true> Writer(&*TypeMapLock); + + // Recheck. Might have changed between release and acquire. + AT = ArrayTypes->get(AVT); + if (!AT) { + // Value not found. Derive a new type! + ArrayTypes->add(AVT, AT = new ArrayType(ElementType, NumElements)); + } + } #ifdef DEBUG_MERGE_TYPES DOUT << "Derived new type: " << *AT << "\n"; #endif @@ -1106,19 +1205,27 @@ public: } }; } -static ManagedStatic<TypeMap<VectorValType, VectorType> > VectorTypes; +static ManagedStatic<TypeMap<VectorValType, VectorType> > VectorTypes; VectorType *VectorType::get(const Type *ElementType, unsigned NumElements) { assert(ElementType && "Can't get vector of <null> types!"); VectorValType PVT(ElementType, NumElements); - VectorType *PT = VectorTypes->get(PVT); - if (PT) return PT; // Found a match, return it! - - // Value not found. Derive a new type! - VectorTypes->add(PVT, PT = new VectorType(ElementType, NumElements)); - + VectorType *PT = 0; + + TypeMapLock->reader_acquire(); + PT = VectorTypes->get(PVT); + TypeMapLock->reader_release(); + + if (!PT) { + sys::SmartScopedWriter<true> Writer(&*TypeMapLock); + PT = VectorTypes->get(PVT); + // Recheck. Might have changed between release and acquire. + if (!PT) { + VectorTypes->add(PVT, PT = new VectorType(ElementType, NumElements)); + } + } #ifdef DEBUG_MERGE_TYPES DOUT << "Derived new type: " << *PT << "\n"; #endif @@ -1173,15 +1280,24 @@ static ManagedStatic<TypeMap<StructValType, StructType> > StructTypes; StructType *StructType::get(const std::vector<const Type*> &ETypes, bool isPacked) { StructValType STV(ETypes, isPacked); - StructType *ST = StructTypes->get(STV); - if (ST) return ST; - - // Value not found. Derive a new type! - ST = (StructType*) operator new(sizeof(StructType) + - sizeof(PATypeHandle) * ETypes.size()); - new (ST) StructType(ETypes, isPacked); - StructTypes->add(STV, ST); - + StructType *ST = 0; + + TypeMapLock->reader_acquire(); + ST = StructTypes->get(STV); + TypeMapLock->reader_release(); + + if (!ST) { + sys::SmartScopedWriter<true> Writer(&*TypeMapLock); + ST = StructTypes->get(STV); + // Recheck. Might have changed between release and acquire. + if (!ST) { + // Value not found. Derive a new type! + ST = (StructType*) operator new(sizeof(StructType) + + sizeof(PATypeHandle) * ETypes.size()); + new (ST) StructType(ETypes, isPacked); + StructTypes->add(STV, ST); + } + } #ifdef DEBUG_MERGE_TYPES DOUT << "Derived new type: " << *ST << "\n"; #endif @@ -1249,12 +1365,21 @@ PointerType *PointerType::get(const Type *ValueType, unsigned AddressSpace) { assert(isValidElementType(ValueType) && "Invalid type for pointer element!"); PointerValType PVT(ValueType, AddressSpace); - PointerType *PT = PointerTypes->get(PVT); - if (PT) return PT; - - // Value not found. Derive a new type! - PointerTypes->add(PVT, PT = new PointerType(ValueType, AddressSpace)); - + PointerType *PT = 0; + + TypeMapLock->reader_acquire(); + PT = PointerTypes->get(PVT); + TypeMapLock->reader_release(); + + if (!PT) { + sys::SmartScopedWriter<true> Writer(&*TypeMapLock); + PT = PointerTypes->get(PVT); + // Recheck. Might have changed between release and acquire. + if (!PT) { + // Value not found. Derive a new type! + PointerTypes->add(PVT, PT = new PointerType(ValueType, AddressSpace)); + } + } #ifdef DEBUG_MERGE_TYPES DOUT << "Derived new type: " << *PT << "\n"; #endif @@ -1281,12 +1406,24 @@ bool PointerType::isValidElementType(const Type *ElemTy) { // Derived Type Refinement Functions //===----------------------------------------------------------------------===// +// addAbstractTypeUser - Notify an abstract type that there is a new user of +// it. This function is called primarily by the PATypeHandle class. +void Type::addAbstractTypeUser(AbstractTypeUser *U) const { + assert(isAbstract() && "addAbstractTypeUser: Current type not abstract!"); + AbstractTypeUsersLock->acquire(); + AbstractTypeUsers.push_back(U); + AbstractTypeUsersLock->release(); +} + + // removeAbstractTypeUser - Notify an abstract type that a user of the class // no longer has a handle to the type. This function is called primarily by // the PATypeHandle class. When there are no users of the abstract type, it // is annihilated, because there is no way to get a reference to it ever again. // void Type::removeAbstractTypeUser(AbstractTypeUser *U) const { + AbstractTypeUsersLock->acquire(); + // Search from back to front because we will notify users from back to // front. Also, it is likely that there will be a stack like behavior to // users that register and unregister users. @@ -1310,16 +1447,20 @@ void Type::removeAbstractTypeUser(AbstractTypeUser *U) const { DOUT << "DELETEing unused abstract type: <" << *this << ">[" << (void*)this << "]" << "\n"; #endif - this->destroy(); + + this->destroy(); } + + AbstractTypeUsersLock->release(); } -// refineAbstractTypeTo - This function is used when it is discovered that -// the 'this' abstract type is actually equivalent to the NewType specified. -// This causes all users of 'this' to switch to reference the more concrete type -// NewType and for 'this' to be deleted. +// unlockedRefineAbstractTypeTo - This function is used when it is discovered +// that the 'this' abstract type is actually equivalent to the NewType +// specified. This causes all users of 'this' to switch to reference the more +// concrete type NewType and for 'this' to be deleted. Only used for internal +// callers. // -void DerivedType::refineAbstractTypeTo(const Type *NewType) { +void DerivedType::unlockedRefineAbstractTypeTo(const Type *NewType) { assert(isAbstract() && "refineAbstractTypeTo: Current type is not abstract!"); assert(this != NewType && "Can't refine to myself!"); assert(ForwardType == 0 && "This type has already been refined!"); @@ -1338,8 +1479,7 @@ void DerivedType::refineAbstractTypeTo(const Type *NewType) { // refined, that we will not continue using a dead reference... // PATypeHolder NewTy(NewType); - - // Any PATypeHolders referring to this type will now automatically forward to + // Any PATypeHolders referring to this type will now automatically forward o // the type we are resolved to. ForwardType = NewType; if (NewType->isAbstract()) @@ -1362,6 +1502,7 @@ void DerivedType::refineAbstractTypeTo(const Type *NewType) { // will not cause users to drop off of the use list. If we resolve to ourself // we succeed! // + AbstractTypeUsersLock->acquire(); while (!AbstractTypeUsers.empty() && NewTy != this) { AbstractTypeUser *User = AbstractTypeUsers.back(); @@ -1377,6 +1518,7 @@ void DerivedType::refineAbstractTypeTo(const Type *NewType) { assert(AbstractTypeUsers.size() != OldSize && "AbsTyUser did not remove self from user list!"); } + AbstractTypeUsersLock->release(); // If we were successful removing all users from the type, 'this' will be // deleted when the last PATypeHolder is destroyed or updated from this type. @@ -1384,6 +1526,16 @@ void DerivedType::refineAbstractTypeTo(const Type *NewType) { // destroyed. } +// refineAbstractTypeTo - This function is used by external callers to notify +// us that this abstract type is equivalent to another type. +// +void DerivedType::refineAbstractTypeTo(const Type *NewType) { + // All recursive calls will go through unlockedRefineAbstractTypeTo, + // to avoid deadlock problems. + sys::SmartScopedWriter<true> Writer(&*TypeMapLock); + unlockedRefineAbstractTypeTo(NewType); +} + // notifyUsesThatTypeBecameConcrete - Notify AbstractTypeUsers of this type that // the current type has transitioned from being abstract to being concrete. // @@ -1392,6 +1544,7 @@ void DerivedType::notifyUsesThatTypeBecameConcrete() { DOUT << "typeIsREFINED type: " << (void*)this << " " << *this << "\n"; #endif + AbstractTypeUsersLock->acquire(); unsigned OldSize = AbstractTypeUsers.size(); OldSize=OldSize; while (!AbstractTypeUsers.empty()) { AbstractTypeUser *ATU = AbstractTypeUsers.back(); @@ -1400,6 +1553,7 @@ void DerivedType::notifyUsesThatTypeBecameConcrete() { assert(AbstractTypeUsers.size() < OldSize-- && "AbstractTypeUser did not remove itself from the use list!"); } + AbstractTypeUsersLock->release(); } // refineAbstractType - Called when a contained type is found to be more diff --git a/lib/VMCore/TypeSymbolTable.cpp b/lib/VMCore/TypeSymbolTable.cpp index 475d719..5ae60e2 100644 --- a/lib/VMCore/TypeSymbolTable.cpp +++ b/lib/VMCore/TypeSymbolTable.cpp @@ -14,13 +14,18 @@ #include "llvm/TypeSymbolTable.h" #include "llvm/DerivedTypes.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Streams.h" +#include "llvm/System/RWMutex.h" +#include "llvm/System/Threading.h" #include <algorithm> using namespace llvm; #define DEBUG_SYMBOL_TABLE 0 #define DEBUG_ABSTYPE 0 +static ManagedStatic<sys::SmartRWMutex<true> > TypeSymbolTableLock; + TypeSymbolTable::~TypeSymbolTable() { // Drop all abstract type references in the type plane... for (iterator TI = tmap.begin(), TE = tmap.end(); TI != TE; ++TI) { @@ -31,6 +36,9 @@ TypeSymbolTable::~TypeSymbolTable() { std::string TypeSymbolTable::getUniqueName(const std::string &BaseName) const { std::string TryName = BaseName; + + sys::SmartScopedReader<true> Reader(&*TypeSymbolTableLock); + const_iterator End = tmap.end(); // See if the name exists @@ -41,16 +49,20 @@ std::string TypeSymbolTable::getUniqueName(const std::string &BaseName) const { // lookup a type by name - returns null on failure Type* TypeSymbolTable::lookup(const std::string& Name) const { + sys::SmartScopedReader<true> Reader(&*TypeSymbolTableLock); + const_iterator TI = tmap.find(Name); + Type* result = 0; if (TI != tmap.end()) - return const_cast<Type*>(TI->second); - return 0; + result = const_cast<Type*>(TI->second); + return result; } // remove - Remove a type from the symbol table... Type* TypeSymbolTable::remove(iterator Entry) { + TypeSymbolTableLock->writer_acquire(); + assert(Entry != tmap.end() && "Invalid entry to remove!"); - const Type* Result = Entry->second; #if DEBUG_SYMBOL_TABLE @@ -59,6 +71,8 @@ Type* TypeSymbolTable::remove(iterator Entry) { #endif tmap.erase(Entry); + + TypeSymbolTableLock->writer_release(); // If we are removing an abstract type, remove the symbol table from it's use // list... @@ -79,6 +93,8 @@ Type* TypeSymbolTable::remove(iterator Entry) { void TypeSymbolTable::insert(const std::string& Name, const Type* T) { assert(T && "Can't insert null type into symbol table!"); + TypeSymbolTableLock->writer_acquire(); + if (tmap.insert(make_pair(Name, T)).second) { // Type inserted fine with no conflict. @@ -103,6 +119,8 @@ void TypeSymbolTable::insert(const std::string& Name, const Type* T) { // Insert the tmap entry tmap.insert(make_pair(UniqueName, T)); } + + TypeSymbolTableLock->writer_release(); // If we are adding an abstract type, add the symbol table to it's use list. if (T->isAbstract()) { @@ -116,7 +134,8 @@ void TypeSymbolTable::insert(const std::string& Name, const Type* T) { // This function is called when one of the types in the type plane are refined void TypeSymbolTable::refineAbstractType(const DerivedType *OldType, const Type *NewType) { - + sys::SmartScopedReader<true> Reader(&*TypeSymbolTableLock); + // Loop over all of the types in the symbol table, replacing any references // to OldType with references to NewType. Note that there may be multiple // occurrences, and although we only need to remove one at a time, it's @@ -146,6 +165,7 @@ void TypeSymbolTable::typeBecameConcrete(const DerivedType *AbsTy) { // Loop over all of the types in the symbol table, dropping any abstract // type user entries for AbsTy which occur because there are names for the // type. + sys::SmartScopedReader<true> Reader(&*TypeSymbolTableLock); for (iterator TI = begin(), TE = end(); TI != TE; ++TI) if (TI->second == const_cast<Type*>(static_cast<const Type*>(AbsTy))) AbsTy->removeAbstractTypeUser(this); @@ -159,6 +179,7 @@ static void DumpTypes(const std::pair<const std::string, const Type*>& T ) { void TypeSymbolTable::dump() const { cerr << "TypeSymbolPlane: "; + sys::SmartScopedReader<true> Reader(&*TypeSymbolTableLock); for_each(tmap.begin(), tmap.end(), DumpTypes); } diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp index 3af161f..c952b78 100644 --- a/lib/VMCore/Value.cpp +++ b/lib/VMCore/Value.cpp @@ -22,6 +22,8 @@ #include "llvm/Support/LeakDetector.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/ValueHandle.h" +#include "llvm/System/RWMutex.h" +#include "llvm/System/Threading.h" #include "llvm/ADT/DenseMap.h" #include <algorithm> using namespace llvm; @@ -405,6 +407,7 @@ Value *Value::DoPHITranslation(const BasicBlock *CurBB, /// not a value has an entry in this map. typedef DenseMap<Value*, ValueHandleBase*> ValueHandlesTy; static ManagedStatic<ValueHandlesTy> ValueHandles; +static ManagedStatic<sys::SmartRWMutex<true> > ValueHandlesLock; /// AddToExistingUseList - Add this ValueHandle to the use list for VP, where /// List is known to point into the existing use list. @@ -427,9 +430,11 @@ void ValueHandleBase::AddToUseList() { if (VP->HasValueHandle) { // If this value already has a ValueHandle, then it must be in the // ValueHandles map already. + sys::SmartScopedReader<true> Reader(&*ValueHandlesLock); ValueHandleBase *&Entry = (*ValueHandles)[VP]; assert(Entry != 0 && "Value doesn't have any handles?"); - return AddToExistingUseList(&Entry); + AddToExistingUseList(&Entry); + return; } // Ok, it doesn't have any handles yet, so we must insert it into the @@ -437,6 +442,7 @@ void ValueHandleBase::AddToUseList() { // reallocate itself, which would invalidate all of the PrevP pointers that // point into the old table. Handle this by checking for reallocation and // updating the stale pointers only if needed. + sys::SmartScopedWriter<true> Writer(&*ValueHandlesLock); ValueHandlesTy &Handles = *ValueHandles; const void *OldBucketPtr = Handles.getPointerIntoBucketsArray(); @@ -448,8 +454,9 @@ void ValueHandleBase::AddToUseList() { // If reallocation didn't happen or if this was the first insertion, don't // walk the table. if (Handles.isPointerIntoBucketsArray(OldBucketPtr) || - Handles.size() == 1) + Handles.size() == 1) { return; + } // Okay, reallocation did happen. Fix the Prev Pointers. for (ValueHandlesTy::iterator I = Handles.begin(), E = Handles.end(); @@ -477,6 +484,7 @@ void ValueHandleBase::RemoveFromUseList() { // If the Next pointer was null, then it is possible that this was the last // ValueHandle watching VP. If so, delete its entry from the ValueHandles // map. + sys::SmartScopedWriter<true> Writer(&*ValueHandlesLock); ValueHandlesTy &Handles = *ValueHandles; if (Handles.isPointerIntoBucketsArray(PrevPtr)) { Handles.erase(VP); @@ -490,7 +498,9 @@ void ValueHandleBase::ValueIsDeleted(Value *V) { // Get the linked list base, which is guaranteed to exist since the // HasValueHandle flag is set. + ValueHandlesLock->reader_acquire(); ValueHandleBase *Entry = (*ValueHandles)[V]; + ValueHandlesLock->reader_release(); assert(Entry && "Value bit set but no entries exist"); while (Entry) { @@ -528,7 +538,9 @@ void ValueHandleBase::ValueIsRAUWd(Value *Old, Value *New) { // Get the linked list base, which is guaranteed to exist since the // HasValueHandle flag is set. + ValueHandlesLock->reader_acquire(); ValueHandleBase *Entry = (*ValueHandles)[Old]; + ValueHandlesLock->reader_release(); assert(Entry && "Value bit set but no entries exist"); while (Entry) { diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp index e9f2acd..10816e6 100644 --- a/lib/VMCore/Verifier.cpp +++ b/lib/VMCore/Verifier.cpp @@ -745,8 +745,8 @@ void Verifier::visitTruncInst(TruncInst &I) { const Type *DestTy = I.getType(); // Get the size of the types in bits, we'll need this later - unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits(); - unsigned DestBitSize = DestTy->getPrimitiveSizeInBits(); + unsigned SrcBitSize = SrcTy->getScalarSizeInBits(); + unsigned DestBitSize = DestTy->getScalarSizeInBits(); Assert1(SrcTy->isIntOrIntVector(), "Trunc only operates on integer", &I); Assert1(DestTy->isIntOrIntVector(), "Trunc only produces integer", &I); @@ -767,8 +767,8 @@ void Verifier::visitZExtInst(ZExtInst &I) { Assert1(DestTy->isIntOrIntVector(), "ZExt only produces an integer", &I); Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy), "zext source and destination must both be a vector or neither", &I); - unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits(); - unsigned DestBitSize = DestTy->getPrimitiveSizeInBits(); + unsigned SrcBitSize = SrcTy->getScalarSizeInBits(); + unsigned DestBitSize = DestTy->getScalarSizeInBits(); Assert1(SrcBitSize < DestBitSize,"Type too small for ZExt", &I); @@ -781,8 +781,8 @@ void Verifier::visitSExtInst(SExtInst &I) { const Type *DestTy = I.getType(); // Get the size of the types in bits, we'll need this later - unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits(); - unsigned DestBitSize = DestTy->getPrimitiveSizeInBits(); + unsigned SrcBitSize = SrcTy->getScalarSizeInBits(); + unsigned DestBitSize = DestTy->getScalarSizeInBits(); Assert1(SrcTy->isIntOrIntVector(), "SExt only operates on integer", &I); Assert1(DestTy->isIntOrIntVector(), "SExt only produces an integer", &I); @@ -798,8 +798,8 @@ void Verifier::visitFPTruncInst(FPTruncInst &I) { const Type *SrcTy = I.getOperand(0)->getType(); const Type *DestTy = I.getType(); // Get the size of the types in bits, we'll need this later - unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits(); - unsigned DestBitSize = DestTy->getPrimitiveSizeInBits(); + unsigned SrcBitSize = SrcTy->getScalarSizeInBits(); + unsigned DestBitSize = DestTy->getScalarSizeInBits(); Assert1(SrcTy->isFPOrFPVector(),"FPTrunc only operates on FP", &I); Assert1(DestTy->isFPOrFPVector(),"FPTrunc only produces an FP", &I); @@ -816,8 +816,8 @@ void Verifier::visitFPExtInst(FPExtInst &I) { const Type *DestTy = I.getType(); // Get the size of the types in bits, we'll need this later - unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits(); - unsigned DestBitSize = DestTy->getPrimitiveSizeInBits(); + unsigned SrcBitSize = SrcTy->getScalarSizeInBits(); + unsigned DestBitSize = DestTy->getScalarSizeInBits(); Assert1(SrcTy->isFPOrFPVector(),"FPExt only operates on FP", &I); Assert1(DestTy->isFPOrFPVector(),"FPExt only produces an FP", &I); |