diff options
Diffstat (limited to 'lib')
40 files changed, 4329 insertions, 954 deletions
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index d062045..f689dca 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -28,7 +28,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/GetElementPtrTypeIterator.h" -#include "llvm/Support/ManagedStatic.h" #include <algorithm> using namespace llvm; diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index 6a53a83..caeb14b 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -39,7 +39,7 @@ Pass *llvm::createIVUsersPass() { /// containsAddRecFromDifferentLoop - Determine whether expression S involves a /// subexpression that is an AddRec from a loop other than L. An outer loop /// of L is OK, but not an inner loop nor a disjoint loop. -static bool containsAddRecFromDifferentLoop(SCEVHandle S, Loop *L) { +static bool containsAddRecFromDifferentLoop(const SCEV* S, Loop *L) { // This is very common, put it first. if (isa<SCEVConstant>(S)) return false; @@ -80,10 +80,10 @@ static bool containsAddRecFromDifferentLoop(SCEVHandle S, Loop *L) { /// a mix of loop invariant and loop variant expressions. The start cannot, /// however, contain an AddRec from a different loop, unless that loop is an /// outer loop of the current loop. -static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L, Loop *UseLoop, - SCEVHandle &Start, SCEVHandle &Stride, +static bool getSCEVStartAndStride(const SCEV* &SH, Loop *L, Loop *UseLoop, + const SCEV* &Start, const SCEV* &Stride, ScalarEvolution *SE, DominatorTree *DT) { - SCEVHandle TheAddRec = Start; // Initialize to zero. + const SCEV* TheAddRec = Start; // Initialize to zero. // If the outer level is an AddExpr, the operands are all start values except // for a nested AddRecExpr. @@ -109,9 +109,9 @@ static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L, Loop *UseLoop, // Use getSCEVAtScope to attempt to simplify other loops out of // the picture. - SCEVHandle AddRecStart = AddRec->getStart(); + const SCEV* AddRecStart = AddRec->getStart(); AddRecStart = SE->getSCEVAtScope(AddRecStart, UseLoop); - SCEVHandle AddRecStride = AddRec->getStepRecurrence(*SE); + const SCEV* AddRecStride = AddRec->getStepRecurrence(*SE); // FIXME: If Start contains an SCEVAddRecExpr from a different loop, other // than an outer loop of the current loop, reject it. LSR has no concept of @@ -196,13 +196,13 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { return true; // Instruction already handled. // Get the symbolic expression for this instruction. - SCEVHandle ISE = SE->getSCEV(I); + const SCEV* ISE = SE->getSCEV(I); if (isa<SCEVCouldNotCompute>(ISE)) return false; // Get the start and stride for this expression. Loop *UseLoop = LI->getLoopFor(I->getParent()); - SCEVHandle Start = SE->getIntegerSCEV(0, ISE->getType()); - SCEVHandle Stride = Start; + const SCEV* Start = SE->getIntegerSCEV(0, ISE->getType()); + const SCEV* Stride = Start; if (!getSCEVStartAndStride(ISE, L, UseLoop, Start, Stride, SE, DT)) return false; // Non-reducible symbolic expression, bail out. @@ -254,7 +254,7 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { if (IVUseShouldUsePostIncValue(User, I, L, LI, DT, this)) { // The value used will be incremented by the stride more than we are // expecting, so subtract this off. - SCEVHandle NewStart = SE->getMinusSCEV(Start, Stride); + const SCEV* NewStart = SE->getMinusSCEV(Start, Stride); StrideUses->addUser(NewStart, User, I); StrideUses->Users.back().setIsUseOfPostIncrementedValue(true); DOUT << " USING POSTINC SCEV, START=" << *NewStart<< "\n"; @@ -295,9 +295,9 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { /// getReplacementExpr - Return a SCEV expression which computes the /// value of the OperandValToReplace of the given IVStrideUse. -SCEVHandle IVUsers::getReplacementExpr(const IVStrideUse &U) const { +const SCEV* IVUsers::getReplacementExpr(const IVStrideUse &U) const { // Start with zero. - SCEVHandle RetVal = SE->getIntegerSCEV(0, U.getParent()->Stride->getType()); + const SCEV* RetVal = SE->getIntegerSCEV(0, U.getParent()->Stride->getType()); // Create the basic add recurrence. RetVal = SE->getAddRecExpr(RetVal, U.getParent()->Stride, L); // Add the offset in a separate step, because it may be loop-variant. @@ -308,7 +308,7 @@ SCEVHandle IVUsers::getReplacementExpr(const IVStrideUse &U) const { RetVal = SE->getAddExpr(RetVal, U.getParent()->Stride); // Evaluate the expression out of the loop, if possible. if (!L->contains(U.getUser()->getParent())) { - SCEVHandle ExitVal = SE->getSCEVAtScope(RetVal, L->getParentLoop()); + const SCEV* ExitVal = SE->getSCEVAtScope(RetVal, L->getParentLoop()); if (ExitVal->isLoopInvariant(L)) RetVal = ExitVal; } @@ -325,7 +325,7 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const { OS << ":\n"; for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e; ++Stride) { - std::map<SCEVHandle, IVUsersOfOneStride*>::const_iterator SI = + std::map<const SCEV*, IVUsersOfOneStride*>::const_iterator SI = IVUsesByStride.find(StrideOrder[Stride]); assert(SI != IVUsesByStride.end() && "Stride doesn't exist!"); OS << " Stride " << *SI->first->getType() << " " << *SI->first << ":\n"; diff --git a/lib/Analysis/LoopVR.cpp b/lib/Analysis/LoopVR.cpp index 0a3d06b..ae715ac 100644 --- a/lib/Analysis/LoopVR.cpp +++ b/lib/Analysis/LoopVR.cpp @@ -26,8 +26,8 @@ char LoopVR::ID = 0; static RegisterPass<LoopVR> X("loopvr", "Loop Value Ranges", false, true); /// getRange - determine the range for a particular SCEV within a given Loop -ConstantRange LoopVR::getRange(SCEVHandle S, Loop *L, ScalarEvolution &SE) { - SCEVHandle T = SE.getBackedgeTakenCount(L); +ConstantRange LoopVR::getRange(const SCEV* S, Loop *L, ScalarEvolution &SE) { + const SCEV* T = SE.getBackedgeTakenCount(L); if (isa<SCEVCouldNotCompute>(T)) return ConstantRange(cast<IntegerType>(S->getType())->getBitWidth(), true); @@ -36,7 +36,7 @@ ConstantRange LoopVR::getRange(SCEVHandle S, Loop *L, ScalarEvolution &SE) { } /// getRange - determine the range for a particular SCEV with a given trip count -ConstantRange LoopVR::getRange(SCEVHandle S, SCEVHandle T, ScalarEvolution &SE){ +ConstantRange LoopVR::getRange(const SCEV* S, const SCEV* T, ScalarEvolution &SE){ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) return ConstantRange(C->getValue()->getValue()); @@ -182,8 +182,8 @@ ConstantRange LoopVR::getRange(SCEVHandle S, SCEVHandle T, ScalarEvolution &SE){ if (!Trip) return FullSet; if (AddRec->isAffine()) { - SCEVHandle StartHandle = AddRec->getStart(); - SCEVHandle StepHandle = AddRec->getOperand(1); + const SCEV* StartHandle = AddRec->getStart(); + const SCEV* StepHandle = AddRec->getOperand(1); const SCEVConstant *Step = dyn_cast<SCEVConstant>(StepHandle); if (!Step) return FullSet; @@ -194,7 +194,7 @@ ConstantRange LoopVR::getRange(SCEVHandle S, SCEVHandle T, ScalarEvolution &SE){ if ((TripExt * StepExt).ugt(APInt::getLowBitsSet(ExWidth, ExWidth >> 1))) return FullSet; - SCEVHandle EndHandle = SE.getAddExpr(StartHandle, + const SCEV* EndHandle = SE.getAddExpr(StartHandle, SE.getMulExpr(T, StepHandle)); const SCEVConstant *Start = dyn_cast<SCEVConstant>(StartHandle); const SCEVConstant *End = dyn_cast<SCEVConstant>(EndHandle); @@ -254,7 +254,7 @@ ConstantRange LoopVR::compute(Value *V) { ScalarEvolution &SE = getAnalysis<ScalarEvolution>(); - SCEVHandle S = SE.getSCEV(I); + const SCEV* S = SE.getSCEV(I); if (isa<SCEVUnknown>(S) || isa<SCEVCouldNotCompute>(S)) return ConstantRange(cast<IntegerType>(V->getType())->getBitWidth(), false); diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 68aa595..5cbb5fa 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -14,7 +14,7 @@ // There are several aspects to this library. First is the representation of // scalar expressions, which are represented as subclasses of the SCEV class. // These classes are used to represent certain types of subexpressions that we -// can handle. These classes are reference counted, managed by the SCEVHandle +// can handle. These classes are reference counted, managed by the const SCEV* // class. We only create one SCEV of a particular shape, so pointer-comparisons // for equality are legal. // @@ -76,7 +76,6 @@ #include "llvm/Support/ConstantRange.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/InstIterator.h" -#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/Statistic.h" @@ -133,9 +132,8 @@ bool SCEV::isOne() const { return false; } -SCEVCouldNotCompute::SCEVCouldNotCompute(const ScalarEvolution* p) : - SCEV(scCouldNotCompute, p) {} -SCEVCouldNotCompute::~SCEVCouldNotCompute() {} +SCEVCouldNotCompute::SCEVCouldNotCompute() : + SCEV(scCouldNotCompute) {} bool SCEVCouldNotCompute::isLoopInvariant(const Loop *L) const { assert(0 && "Attempt to use a SCEVCouldNotCompute object!"); @@ -152,9 +150,9 @@ bool SCEVCouldNotCompute::hasComputableLoopEvolution(const Loop *L) const { return false; } -SCEVHandle SCEVCouldNotCompute:: -replaceSymbolicValuesWithConcrete(const SCEVHandle &Sym, - const SCEVHandle &Conc, +const SCEV* SCEVCouldNotCompute:: +replaceSymbolicValuesWithConcrete(const SCEV* Sym, + const SCEV* Conc, ScalarEvolution &SE) const { return this; } @@ -169,26 +167,20 @@ bool SCEVCouldNotCompute::classof(const SCEV *S) { // SCEVConstants - Only allow the creation of one SCEVConstant for any -// particular value. Don't use a SCEVHandle here, or else the object will +// particular value. Don't use a const SCEV* here, or else the object will // never be deleted! -static ManagedStatic<std::map<ConstantInt*, SCEVConstant*> > SCEVConstants; - -SCEVConstant::~SCEVConstant() { - SCEVConstants->erase(V); -} - -SCEVHandle ScalarEvolution::getConstant(ConstantInt *V) { - SCEVConstant *&R = (*SCEVConstants)[V]; - if (R == 0) R = new SCEVConstant(V, this); +const SCEV* ScalarEvolution::getConstant(ConstantInt *V) { + SCEVConstant *&R = SCEVConstants[V]; + if (R == 0) R = new SCEVConstant(V); return R; } -SCEVHandle ScalarEvolution::getConstant(const APInt& Val) { +const SCEV* ScalarEvolution::getConstant(const APInt& Val) { return getConstant(ConstantInt::get(Val)); } -SCEVHandle +const SCEV* ScalarEvolution::getConstant(const Type *Ty, uint64_t V, bool isSigned) { return getConstant(ConstantInt::get(cast<IntegerType>(Ty), V, isSigned)); } @@ -200,92 +192,62 @@ void SCEVConstant::print(raw_ostream &OS) const { } SCEVCastExpr::SCEVCastExpr(unsigned SCEVTy, - const SCEVHandle &op, const Type *ty, - const ScalarEvolution* p) - : SCEV(SCEVTy, p), Op(op), Ty(ty) {} - -SCEVCastExpr::~SCEVCastExpr() {} + const SCEV* op, const Type *ty) + : SCEV(SCEVTy), Op(op), Ty(ty) {} bool SCEVCastExpr::dominates(BasicBlock *BB, DominatorTree *DT) const { return Op->dominates(BB, DT); } // SCEVTruncates - Only allow the creation of one SCEVTruncateExpr for any -// particular input. Don't use a SCEVHandle here, or else the object will +// particular input. Don't use a const SCEV* here, or else the object will // never be deleted! -static ManagedStatic<std::map<std::pair<const SCEV*, const Type*>, - SCEVTruncateExpr*> > SCEVTruncates; -SCEVTruncateExpr::SCEVTruncateExpr(const SCEVHandle &op, const Type *ty, - const ScalarEvolution* p) - : SCEVCastExpr(scTruncate, op, ty, p) { +SCEVTruncateExpr::SCEVTruncateExpr(const SCEV* op, const Type *ty) + : SCEVCastExpr(scTruncate, op, ty) { assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) && (Ty->isInteger() || isa<PointerType>(Ty)) && "Cannot truncate non-integer value!"); } -SCEVTruncateExpr::~SCEVTruncateExpr() { - SCEVTruncates->erase(std::make_pair(Op, Ty)); -} void SCEVTruncateExpr::print(raw_ostream &OS) const { OS << "(trunc " << *Op->getType() << " " << *Op << " to " << *Ty << ")"; } // SCEVZeroExtends - Only allow the creation of one SCEVZeroExtendExpr for any -// particular input. Don't use a SCEVHandle here, or else the object will never +// particular input. Don't use a const SCEV* here, or else the object will never // be deleted! -static ManagedStatic<std::map<std::pair<const SCEV*, const Type*>, - SCEVZeroExtendExpr*> > SCEVZeroExtends; -SCEVZeroExtendExpr::SCEVZeroExtendExpr(const SCEVHandle &op, const Type *ty, - const ScalarEvolution* p) - : SCEVCastExpr(scZeroExtend, op, ty, p) { +SCEVZeroExtendExpr::SCEVZeroExtendExpr(const SCEV* op, const Type *ty) + : SCEVCastExpr(scZeroExtend, op, ty) { assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) && (Ty->isInteger() || isa<PointerType>(Ty)) && "Cannot zero extend non-integer value!"); } -SCEVZeroExtendExpr::~SCEVZeroExtendExpr() { - SCEVZeroExtends->erase(std::make_pair(Op, Ty)); -} - void SCEVZeroExtendExpr::print(raw_ostream &OS) const { OS << "(zext " << *Op->getType() << " " << *Op << " to " << *Ty << ")"; } // SCEVSignExtends - Only allow the creation of one SCEVSignExtendExpr for any -// particular input. Don't use a SCEVHandle here, or else the object will never +// particular input. Don't use a const SCEV* here, or else the object will never // be deleted! -static ManagedStatic<std::map<std::pair<const SCEV*, const Type*>, - SCEVSignExtendExpr*> > SCEVSignExtends; -SCEVSignExtendExpr::SCEVSignExtendExpr(const SCEVHandle &op, const Type *ty, - const ScalarEvolution* p) - : SCEVCastExpr(scSignExtend, op, ty, p) { +SCEVSignExtendExpr::SCEVSignExtendExpr(const SCEV* op, const Type *ty) + : SCEVCastExpr(scSignExtend, op, ty) { assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) && (Ty->isInteger() || isa<PointerType>(Ty)) && "Cannot sign extend non-integer value!"); } -SCEVSignExtendExpr::~SCEVSignExtendExpr() { - SCEVSignExtends->erase(std::make_pair(Op, Ty)); -} - void SCEVSignExtendExpr::print(raw_ostream &OS) const { OS << "(sext " << *Op->getType() << " " << *Op << " to " << *Ty << ")"; } // SCEVCommExprs - Only allow the creation of one SCEVCommutativeExpr for any -// particular input. Don't use a SCEVHandle here, or else the object will never +// particular input. Don't use a const SCEV* here, or else the object will never // be deleted! -static ManagedStatic<std::map<std::pair<unsigned, std::vector<const SCEV*> >, - SCEVCommutativeExpr*> > SCEVCommExprs; - -SCEVCommutativeExpr::~SCEVCommutativeExpr() { - std::vector<const SCEV*> SCEVOps(Operands.begin(), Operands.end()); - SCEVCommExprs->erase(std::make_pair(getSCEVType(), SCEVOps)); -} void SCEVCommutativeExpr::print(raw_ostream &OS) const { assert(Operands.size() > 1 && "This plus expr shouldn't exist!"); @@ -296,15 +258,15 @@ void SCEVCommutativeExpr::print(raw_ostream &OS) const { OS << ")"; } -SCEVHandle SCEVCommutativeExpr:: -replaceSymbolicValuesWithConcrete(const SCEVHandle &Sym, - const SCEVHandle &Conc, +const SCEV* SCEVCommutativeExpr:: +replaceSymbolicValuesWithConcrete(const SCEV* Sym, + const SCEV* Conc, ScalarEvolution &SE) const { for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { - SCEVHandle H = + const SCEV* H = getOperand(i)->replaceSymbolicValuesWithConcrete(Sym, Conc, SE); if (H != getOperand(i)) { - SmallVector<SCEVHandle, 8> NewOps; + SmallVector<const SCEV*, 8> NewOps; NewOps.reserve(getNumOperands()); for (unsigned j = 0; j != i; ++j) NewOps.push_back(getOperand(j)); @@ -338,14 +300,8 @@ bool SCEVNAryExpr::dominates(BasicBlock *BB, DominatorTree *DT) const { // SCEVUDivs - Only allow the creation of one SCEVUDivExpr for any particular -// input. Don't use a SCEVHandle here, or else the object will never be +// input. Don't use a const SCEV* here, or else the object will never be // deleted! -static ManagedStatic<std::map<std::pair<const SCEV*, const SCEV*>, - SCEVUDivExpr*> > SCEVUDivs; - -SCEVUDivExpr::~SCEVUDivExpr() { - SCEVUDivs->erase(std::make_pair(LHS, RHS)); -} bool SCEVUDivExpr::dominates(BasicBlock *BB, DominatorTree *DT) const { return LHS->dominates(BB, DT) && RHS->dominates(BB, DT); @@ -365,26 +321,18 @@ const Type *SCEVUDivExpr::getType() const { } // SCEVAddRecExprs - Only allow the creation of one SCEVAddRecExpr for any -// particular input. Don't use a SCEVHandle here, or else the object will never +// particular input. Don't use a const SCEV* here, or else the object will never // be deleted! -static ManagedStatic<std::map<std::pair<const Loop *, - std::vector<const SCEV*> >, - SCEVAddRecExpr*> > SCEVAddRecExprs; - -SCEVAddRecExpr::~SCEVAddRecExpr() { - std::vector<const SCEV*> SCEVOps(Operands.begin(), Operands.end()); - SCEVAddRecExprs->erase(std::make_pair(L, SCEVOps)); -} -SCEVHandle SCEVAddRecExpr:: -replaceSymbolicValuesWithConcrete(const SCEVHandle &Sym, - const SCEVHandle &Conc, +const SCEV* SCEVAddRecExpr:: +replaceSymbolicValuesWithConcrete(const SCEV* Sym, + const SCEV* Conc, ScalarEvolution &SE) const { for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { - SCEVHandle H = + const SCEV* H = getOperand(i)->replaceSymbolicValuesWithConcrete(Sym, Conc, SE); if (H != getOperand(i)) { - SmallVector<SCEVHandle, 8> NewOps; + SmallVector<const SCEV*, 8> NewOps; NewOps.reserve(getNumOperands()); for (unsigned j = 0; j != i; ++j) NewOps.push_back(getOperand(j)); @@ -418,11 +366,8 @@ void SCEVAddRecExpr::print(raw_ostream &OS) const { } // SCEVUnknowns - Only allow the creation of one SCEVUnknown for any particular -// value. Don't use a SCEVHandle here, or else the object will never be +// value. Don't use a const SCEV* here, or else the object will never be // deleted! -static ManagedStatic<std::map<Value*, SCEVUnknown*> > SCEVUnknowns; - -SCEVUnknown::~SCEVUnknown() { SCEVUnknowns->erase(V); } bool SCEVUnknown::isLoopInvariant(const Loop *L) const { // All non-instruction values are loop invariant. All instructions are loop @@ -578,7 +523,7 @@ namespace { /// this to depend on where the addresses of various SCEV objects happened to /// land in memory. /// -static void GroupByComplexity(SmallVectorImpl<SCEVHandle> &Ops, +static void GroupByComplexity(SmallVectorImpl<const SCEV*> &Ops, LoopInfo *LI) { if (Ops.size() < 2) return; // Noop if (Ops.size() == 2) { @@ -621,7 +566,7 @@ static void GroupByComplexity(SmallVectorImpl<SCEVHandle> &Ops, /// BinomialCoefficient - Compute BC(It, K). The result has width W. /// Assume, K > 0. -static SCEVHandle BinomialCoefficient(SCEVHandle It, unsigned K, +static const SCEV* BinomialCoefficient(const SCEV* It, unsigned K, ScalarEvolution &SE, const Type* ResultTy) { // Handle the simplest case efficiently. @@ -714,15 +659,15 @@ static SCEVHandle BinomialCoefficient(SCEVHandle It, unsigned K, // Calculate the product, at width T+W const IntegerType *CalculationTy = IntegerType::get(CalculationBits); - SCEVHandle Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy); + const SCEV* Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy); for (unsigned i = 1; i != K; ++i) { - SCEVHandle S = SE.getMinusSCEV(It, SE.getIntegerSCEV(i, It->getType())); + const SCEV* S = SE.getMinusSCEV(It, SE.getIntegerSCEV(i, It->getType())); Dividend = SE.getMulExpr(Dividend, SE.getTruncateOrZeroExtend(S, CalculationTy)); } // Divide by 2^T - SCEVHandle DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor)); + const SCEV* DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor)); // Truncate the result, and divide by K! / 2^T. @@ -739,14 +684,14 @@ static SCEVHandle BinomialCoefficient(SCEVHandle It, unsigned K, /// /// where BC(It, k) stands for binomial coefficient. /// -SCEVHandle SCEVAddRecExpr::evaluateAtIteration(SCEVHandle It, +const SCEV* SCEVAddRecExpr::evaluateAtIteration(const SCEV* It, ScalarEvolution &SE) const { - SCEVHandle Result = getStart(); + const SCEV* Result = getStart(); for (unsigned i = 1, e = getNumOperands(); i != e; ++i) { // The computation is correct in the face of overflow provided that the // multiplication is performed _after_ the evaluation of the binomial // coefficient. - SCEVHandle Coeff = BinomialCoefficient(It, i, SE, getType()); + const SCEV* Coeff = BinomialCoefficient(It, i, SE, getType()); if (isa<SCEVCouldNotCompute>(Coeff)) return Coeff; @@ -759,7 +704,7 @@ SCEVHandle SCEVAddRecExpr::evaluateAtIteration(SCEVHandle It, // SCEV Expression folder implementations //===----------------------------------------------------------------------===// -SCEVHandle ScalarEvolution::getTruncateExpr(const SCEVHandle &Op, +const SCEV* ScalarEvolution::getTruncateExpr(const SCEV* Op, const Type *Ty) { assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) && "This is not a truncating conversion!"); @@ -785,18 +730,18 @@ SCEVHandle ScalarEvolution::getTruncateExpr(const SCEVHandle &Op, // If the input value is a chrec scev, truncate the chrec's operands. if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) { - SmallVector<SCEVHandle, 4> Operands; + SmallVector<const SCEV*, 4> Operands; for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) Operands.push_back(getTruncateExpr(AddRec->getOperand(i), Ty)); return getAddRecExpr(Operands, AddRec->getLoop()); } - SCEVTruncateExpr *&Result = (*SCEVTruncates)[std::make_pair(Op, Ty)]; - if (Result == 0) Result = new SCEVTruncateExpr(Op, Ty, this); + SCEVTruncateExpr *&Result = SCEVTruncates[std::make_pair(Op, Ty)]; + if (Result == 0) Result = new SCEVTruncateExpr(Op, Ty); return Result; } -SCEVHandle ScalarEvolution::getZeroExtendExpr(const SCEVHandle &Op, +const SCEV* ScalarEvolution::getZeroExtendExpr(const SCEV* Op, const Type *Ty) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && "This is not an extending conversion!"); @@ -829,28 +774,28 @@ SCEVHandle ScalarEvolution::getZeroExtendExpr(const SCEVHandle &Op, // in infinite recursion. In the later case, the analysis code will // cope with a conservative value, and it will take care to purge // that value once it has finished. - SCEVHandle MaxBECount = getMaxBackedgeTakenCount(AR->getLoop()); + const SCEV* MaxBECount = getMaxBackedgeTakenCount(AR->getLoop()); if (!isa<SCEVCouldNotCompute>(MaxBECount)) { // Manually compute the final value for AR, checking for // overflow. - SCEVHandle Start = AR->getStart(); - SCEVHandle Step = AR->getStepRecurrence(*this); + const SCEV* Start = AR->getStart(); + const SCEV* Step = AR->getStepRecurrence(*this); // Check whether the backedge-taken count can be losslessly casted to // the addrec's type. The count is always unsigned. - SCEVHandle CastedMaxBECount = + const SCEV* CastedMaxBECount = getTruncateOrZeroExtend(MaxBECount, Start->getType()); - SCEVHandle RecastedMaxBECount = + const SCEV* RecastedMaxBECount = getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType()); if (MaxBECount == RecastedMaxBECount) { const Type *WideTy = IntegerType::get(getTypeSizeInBits(Start->getType()) * 2); // Check whether Start+Step*MaxBECount has no unsigned overflow. - SCEVHandle ZMul = + const SCEV* ZMul = getMulExpr(CastedMaxBECount, getTruncateOrZeroExtend(Step, Start->getType())); - SCEVHandle Add = getAddExpr(Start, ZMul); - SCEVHandle OperandExtendedAdd = + const SCEV* Add = getAddExpr(Start, ZMul); + const SCEV* OperandExtendedAdd = getAddExpr(getZeroExtendExpr(Start, WideTy), getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), getZeroExtendExpr(Step, WideTy))); @@ -862,7 +807,7 @@ SCEVHandle ScalarEvolution::getZeroExtendExpr(const SCEVHandle &Op, // Similar to above, only this time treat the step value as signed. // This covers loops that count down. - SCEVHandle SMul = + const SCEV* SMul = getMulExpr(CastedMaxBECount, getTruncateOrSignExtend(Step, Start->getType())); Add = getAddExpr(Start, SMul); @@ -879,12 +824,12 @@ SCEVHandle ScalarEvolution::getZeroExtendExpr(const SCEVHandle &Op, } } - SCEVZeroExtendExpr *&Result = (*SCEVZeroExtends)[std::make_pair(Op, Ty)]; - if (Result == 0) Result = new SCEVZeroExtendExpr(Op, Ty, this); + SCEVZeroExtendExpr *&Result = SCEVZeroExtends[std::make_pair(Op, Ty)]; + if (Result == 0) Result = new SCEVZeroExtendExpr(Op, Ty); return Result; } -SCEVHandle ScalarEvolution::getSignExtendExpr(const SCEVHandle &Op, +const SCEV* ScalarEvolution::getSignExtendExpr(const SCEV* Op, const Type *Ty) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && "This is not an extending conversion!"); @@ -917,28 +862,28 @@ SCEVHandle ScalarEvolution::getSignExtendExpr(const SCEVHandle &Op, // in infinite recursion. In the later case, the analysis code will // cope with a conservative value, and it will take care to purge // that value once it has finished. - SCEVHandle MaxBECount = getMaxBackedgeTakenCount(AR->getLoop()); + const SCEV* MaxBECount = getMaxBackedgeTakenCount(AR->getLoop()); if (!isa<SCEVCouldNotCompute>(MaxBECount)) { // Manually compute the final value for AR, checking for // overflow. - SCEVHandle Start = AR->getStart(); - SCEVHandle Step = AR->getStepRecurrence(*this); + const SCEV* Start = AR->getStart(); + const SCEV* Step = AR->getStepRecurrence(*this); // Check whether the backedge-taken count can be losslessly casted to // the addrec's type. The count is always unsigned. - SCEVHandle CastedMaxBECount = + const SCEV* CastedMaxBECount = getTruncateOrZeroExtend(MaxBECount, Start->getType()); - SCEVHandle RecastedMaxBECount = + const SCEV* RecastedMaxBECount = getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType()); if (MaxBECount == RecastedMaxBECount) { const Type *WideTy = IntegerType::get(getTypeSizeInBits(Start->getType()) * 2); // Check whether Start+Step*MaxBECount has no signed overflow. - SCEVHandle SMul = + const SCEV* SMul = getMulExpr(CastedMaxBECount, getTruncateOrSignExtend(Step, Start->getType())); - SCEVHandle Add = getAddExpr(Start, SMul); - SCEVHandle OperandExtendedAdd = + const SCEV* Add = getAddExpr(Start, SMul); + const SCEV* OperandExtendedAdd = getAddExpr(getSignExtendExpr(Start, WideTy), getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), getSignExtendExpr(Step, WideTy))); @@ -951,15 +896,15 @@ SCEVHandle ScalarEvolution::getSignExtendExpr(const SCEVHandle &Op, } } - SCEVSignExtendExpr *&Result = (*SCEVSignExtends)[std::make_pair(Op, Ty)]; - if (Result == 0) Result = new SCEVSignExtendExpr(Op, Ty, this); + SCEVSignExtendExpr *&Result = SCEVSignExtends[std::make_pair(Op, Ty)]; + if (Result == 0) Result = new SCEVSignExtendExpr(Op, Ty); return Result; } /// getAnyExtendExpr - Return a SCEV for the given operand extended with /// unspecified bits out to the given type. /// -SCEVHandle ScalarEvolution::getAnyExtendExpr(const SCEVHandle &Op, +const SCEV* ScalarEvolution::getAnyExtendExpr(const SCEV* Op, const Type *Ty) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && "This is not an extending conversion!"); @@ -974,19 +919,19 @@ SCEVHandle ScalarEvolution::getAnyExtendExpr(const SCEVHandle &Op, // Peel off a truncate cast. if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Op)) { - SCEVHandle NewOp = T->getOperand(); + const SCEV* NewOp = T->getOperand(); if (getTypeSizeInBits(NewOp->getType()) < getTypeSizeInBits(Ty)) return getAnyExtendExpr(NewOp, Ty); return getTruncateOrNoop(NewOp, Ty); } // Next try a zext cast. If the cast is folded, use it. - SCEVHandle ZExt = getZeroExtendExpr(Op, Ty); + const SCEV* ZExt = getZeroExtendExpr(Op, Ty); if (!isa<SCEVZeroExtendExpr>(ZExt)) return ZExt; // Next try a sext cast. If the cast is folded, use it. - SCEVHandle SExt = getSignExtendExpr(Op, Ty); + const SCEV* SExt = getSignExtendExpr(Op, Ty); if (!isa<SCEVSignExtendExpr>(SExt)) return SExt; @@ -1024,10 +969,10 @@ SCEVHandle ScalarEvolution::getAnyExtendExpr(const SCEVHandle &Op, /// is also used as a check to avoid infinite recursion. /// static bool -CollectAddOperandsWithScales(DenseMap<SCEVHandle, APInt> &M, - SmallVector<SCEVHandle, 8> &NewOps, +CollectAddOperandsWithScales(DenseMap<const SCEV*, APInt> &M, + SmallVector<const SCEV*, 8> &NewOps, APInt &AccumulatedConstant, - const SmallVectorImpl<SCEVHandle> &Ops, + const SmallVectorImpl<const SCEV*> &Ops, const APInt &Scale, ScalarEvolution &SE) { bool Interesting = false; @@ -1048,9 +993,9 @@ CollectAddOperandsWithScales(DenseMap<SCEVHandle, APInt> &M, } else { // A multiplication of a constant with some other value. Update // the map. - SmallVector<SCEVHandle, 4> MulOps(Mul->op_begin()+1, Mul->op_end()); - SCEVHandle Key = SE.getMulExpr(MulOps); - std::pair<DenseMap<SCEVHandle, APInt>::iterator, bool> Pair = + SmallVector<const SCEV*, 4> MulOps(Mul->op_begin()+1, Mul->op_end()); + const SCEV* Key = SE.getMulExpr(MulOps); + std::pair<DenseMap<const SCEV*, APInt>::iterator, bool> Pair = M.insert(std::make_pair(Key, APInt())); if (Pair.second) { Pair.first->second = NewScale; @@ -1069,7 +1014,7 @@ CollectAddOperandsWithScales(DenseMap<SCEVHandle, APInt> &M, AccumulatedConstant += Scale * C->getValue()->getValue(); } else { // An ordinary operand. Update the map. - std::pair<DenseMap<SCEVHandle, APInt>::iterator, bool> Pair = + std::pair<DenseMap<const SCEV*, APInt>::iterator, bool> Pair = M.insert(std::make_pair(Ops[i], APInt())); if (Pair.second) { Pair.first->second = Scale; @@ -1096,7 +1041,7 @@ namespace { /// getAddExpr - Get a canonical add expression, or something simpler if /// possible. -SCEVHandle ScalarEvolution::getAddExpr(SmallVectorImpl<SCEVHandle> &Ops) { +const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) { assert(!Ops.empty() && "Cannot get empty add!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG @@ -1140,8 +1085,8 @@ SCEVHandle ScalarEvolution::getAddExpr(SmallVectorImpl<SCEVHandle> &Ops) { if (Ops[i] == Ops[i+1]) { // X + Y + Y --> X + Y*2 // Found a match, merge the two values into a multiply, and add any // remaining values to the result. - SCEVHandle Two = getIntegerSCEV(2, Ty); - SCEVHandle Mul = getMulExpr(Ops[i], Two); + const SCEV* Two = getIntegerSCEV(2, Ty); + const SCEV* Mul = getMulExpr(Ops[i], Two); if (Ops.size() == 2) return Mul; Ops.erase(Ops.begin()+i, Ops.begin()+i+2); @@ -1157,7 +1102,7 @@ SCEVHandle ScalarEvolution::getAddExpr(SmallVectorImpl<SCEVHandle> &Ops) { const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(Ops[Idx]); const Type *DstType = Trunc->getType(); const Type *SrcType = Trunc->getOperand()->getType(); - SmallVector<SCEVHandle, 8> LargeOps; + SmallVector<const SCEV*, 8> LargeOps; bool Ok = true; // Check all the operands to see if they can be represented in the // source type of the truncate. @@ -1173,7 +1118,7 @@ SCEVHandle ScalarEvolution::getAddExpr(SmallVectorImpl<SCEVHandle> &Ops) { // is much more likely to be foldable here. LargeOps.push_back(getSignExtendExpr(C, SrcType)); } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) { - SmallVector<SCEVHandle, 8> LargeMulOps; + SmallVector<const SCEV*, 8> LargeMulOps; for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) { if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(M->getOperand(j))) { @@ -1201,7 +1146,7 @@ SCEVHandle ScalarEvolution::getAddExpr(SmallVectorImpl<SCEVHandle> &Ops) { } if (Ok) { // Evaluate the expression in the larger type. - SCEVHandle Fold = getAddExpr(LargeOps); + const SCEV* Fold = getAddExpr(LargeOps); // If it folds to something simple, use it. Otherwise, don't. if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold)) return getTruncateExpr(Fold, DstType); @@ -1238,23 +1183,23 @@ SCEVHandle ScalarEvolution::getAddExpr(SmallVectorImpl<SCEVHandle> &Ops) { // operands multiplied by constant values. if (Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx])) { uint64_t BitWidth = getTypeSizeInBits(Ty); - DenseMap<SCEVHandle, APInt> M; - SmallVector<SCEVHandle, 8> NewOps; + DenseMap<const SCEV*, APInt> M; + SmallVector<const SCEV*, 8> NewOps; APInt AccumulatedConstant(BitWidth, 0); if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant, Ops, APInt(BitWidth, 1), *this)) { // Some interesting folding opportunity is present, so its worthwhile to // re-generate the operands list. Group the operands by constant scale, // to avoid multiplying by the same constant scale multiple times. - std::map<APInt, SmallVector<SCEVHandle, 4>, APIntCompare> MulOpLists; - for (SmallVector<SCEVHandle, 8>::iterator I = NewOps.begin(), + std::map<APInt, SmallVector<const SCEV*, 4>, APIntCompare> MulOpLists; + for (SmallVector<const SCEV*, 8>::iterator I = NewOps.begin(), E = NewOps.end(); I != E; ++I) MulOpLists[M.find(*I)->second].push_back(*I); // Re-generate the operands list. Ops.clear(); if (AccumulatedConstant != 0) Ops.push_back(getConstant(AccumulatedConstant)); - for (std::map<APInt, SmallVector<SCEVHandle, 4>, APIntCompare>::iterator I = + for (std::map<APInt, SmallVector<const SCEV*, 4>, APIntCompare>::iterator I = MulOpLists.begin(), E = MulOpLists.end(); I != E; ++I) if (I->first != 0) Ops.push_back(getMulExpr(getConstant(I->first), getAddExpr(I->second))); @@ -1276,17 +1221,17 @@ SCEVHandle ScalarEvolution::getAddExpr(SmallVectorImpl<SCEVHandle> &Ops) { for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp) if (MulOpSCEV == Ops[AddOp] && !isa<SCEVConstant>(Ops[AddOp])) { // Fold W + X + (X * Y * Z) --> W + (X * ((Y*Z)+1)) - SCEVHandle InnerMul = Mul->getOperand(MulOp == 0); + const SCEV* InnerMul = Mul->getOperand(MulOp == 0); if (Mul->getNumOperands() != 2) { // If the multiply has more than two operands, we must get the // Y*Z term. - SmallVector<SCEVHandle, 4> MulOps(Mul->op_begin(), Mul->op_end()); + SmallVector<const SCEV*, 4> MulOps(Mul->op_begin(), Mul->op_end()); MulOps.erase(MulOps.begin()+MulOp); InnerMul = getMulExpr(MulOps); } - SCEVHandle One = getIntegerSCEV(1, Ty); - SCEVHandle AddOne = getAddExpr(InnerMul, One); - SCEVHandle OuterMul = getMulExpr(AddOne, Ops[AddOp]); + const SCEV* One = getIntegerSCEV(1, Ty); + const SCEV* AddOne = getAddExpr(InnerMul, One); + const SCEV* OuterMul = getMulExpr(AddOne, Ops[AddOp]); if (Ops.size() == 2) return OuterMul; if (AddOp < Idx) { Ops.erase(Ops.begin()+AddOp); @@ -1310,21 +1255,21 @@ SCEVHandle ScalarEvolution::getAddExpr(SmallVectorImpl<SCEVHandle> &Ops) { OMulOp != e; ++OMulOp) if (OtherMul->getOperand(OMulOp) == MulOpSCEV) { // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E)) - SCEVHandle InnerMul1 = Mul->getOperand(MulOp == 0); + const SCEV* InnerMul1 = Mul->getOperand(MulOp == 0); if (Mul->getNumOperands() != 2) { - SmallVector<SCEVHandle, 4> MulOps(Mul->op_begin(), Mul->op_end()); + SmallVector<const SCEV*, 4> MulOps(Mul->op_begin(), Mul->op_end()); MulOps.erase(MulOps.begin()+MulOp); InnerMul1 = getMulExpr(MulOps); } - SCEVHandle InnerMul2 = OtherMul->getOperand(OMulOp == 0); + const SCEV* InnerMul2 = OtherMul->getOperand(OMulOp == 0); if (OtherMul->getNumOperands() != 2) { - SmallVector<SCEVHandle, 4> MulOps(OtherMul->op_begin(), + SmallVector<const SCEV*, 4> MulOps(OtherMul->op_begin(), OtherMul->op_end()); MulOps.erase(MulOps.begin()+OMulOp); InnerMul2 = getMulExpr(MulOps); } - SCEVHandle InnerMulSum = getAddExpr(InnerMul1,InnerMul2); - SCEVHandle OuterMul = getMulExpr(MulOpSCEV, InnerMulSum); + const SCEV* InnerMulSum = getAddExpr(InnerMul1,InnerMul2); + const SCEV* OuterMul = getMulExpr(MulOpSCEV, InnerMulSum); if (Ops.size() == 2) return OuterMul; Ops.erase(Ops.begin()+Idx); Ops.erase(Ops.begin()+OtherMulIdx-1); @@ -1345,7 +1290,7 @@ SCEVHandle ScalarEvolution::getAddExpr(SmallVectorImpl<SCEVHandle> &Ops) { for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) { // Scan all of the other operands to this add and add them to the vector if // they are loop invariant w.r.t. the recurrence. - SmallVector<SCEVHandle, 8> LIOps; + SmallVector<const SCEV*, 8> LIOps; const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]); for (unsigned i = 0, e = Ops.size(); i != e; ++i) if (Ops[i]->isLoopInvariant(AddRec->getLoop())) { @@ -1359,11 +1304,11 @@ SCEVHandle ScalarEvolution::getAddExpr(SmallVectorImpl<SCEVHandle> &Ops) { // NLI + LI + {Start,+,Step} --> NLI + {LI+Start,+,Step} LIOps.push_back(AddRec->getStart()); - SmallVector<SCEVHandle, 4> AddRecOps(AddRec->op_begin(), + SmallVector<const SCEV*, 4> AddRecOps(AddRec->op_begin(), AddRec->op_end()); AddRecOps[0] = getAddExpr(LIOps); - SCEVHandle NewRec = getAddRecExpr(AddRecOps, AddRec->getLoop()); + const SCEV* NewRec = getAddRecExpr(AddRecOps, AddRec->getLoop()); // If all of the other operands were loop invariant, we are done. if (Ops.size() == 1) return NewRec; @@ -1385,7 +1330,7 @@ SCEVHandle ScalarEvolution::getAddExpr(SmallVectorImpl<SCEVHandle> &Ops) { const SCEVAddRecExpr *OtherAddRec = cast<SCEVAddRecExpr>(Ops[OtherIdx]); if (AddRec->getLoop() == OtherAddRec->getLoop()) { // Other + {A,+,B} + {C,+,D} --> Other + {A+C,+,B+D} - SmallVector<SCEVHandle, 4> NewOps(AddRec->op_begin(), AddRec->op_end()); + SmallVector<const SCEV*, 4> NewOps(AddRec->op_begin(), AddRec->op_end()); for (unsigned i = 0, e = OtherAddRec->getNumOperands(); i != e; ++i) { if (i >= NewOps.size()) { NewOps.insert(NewOps.end(), OtherAddRec->op_begin()+i, @@ -1394,7 +1339,7 @@ SCEVHandle ScalarEvolution::getAddExpr(SmallVectorImpl<SCEVHandle> &Ops) { } NewOps[i] = getAddExpr(NewOps[i], OtherAddRec->getOperand(i)); } - SCEVHandle NewAddRec = getAddRecExpr(NewOps, AddRec->getLoop()); + const SCEV* NewAddRec = getAddRecExpr(NewOps, AddRec->getLoop()); if (Ops.size() == 2) return NewAddRec; @@ -1412,16 +1357,16 @@ SCEVHandle ScalarEvolution::getAddExpr(SmallVectorImpl<SCEVHandle> &Ops) { // Okay, it looks like we really DO need an add expr. Check to see if we // already have one, otherwise create a new one. std::vector<const SCEV*> SCEVOps(Ops.begin(), Ops.end()); - SCEVCommutativeExpr *&Result = (*SCEVCommExprs)[std::make_pair(scAddExpr, + SCEVCommutativeExpr *&Result = SCEVCommExprs[std::make_pair(scAddExpr, SCEVOps)]; - if (Result == 0) Result = new SCEVAddExpr(Ops, this); + if (Result == 0) Result = new SCEVAddExpr(Ops); return Result; } /// getMulExpr - Get a canonical multiply expression, or something simpler if /// possible. -SCEVHandle ScalarEvolution::getMulExpr(SmallVectorImpl<SCEVHandle> &Ops) { +const SCEV* ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV*> &Ops) { assert(!Ops.empty() && "Cannot get empty mul!"); #ifndef NDEBUG for (unsigned i = 1, e = Ops.size(); i != e; ++i) @@ -1502,7 +1447,7 @@ SCEVHandle ScalarEvolution::getMulExpr(SmallVectorImpl<SCEVHandle> &Ops) { for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) { // Scan all of the other operands to this mul and add them to the vector if // they are loop invariant w.r.t. the recurrence. - SmallVector<SCEVHandle, 8> LIOps; + SmallVector<const SCEV*, 8> LIOps; const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]); for (unsigned i = 0, e = Ops.size(); i != e; ++i) if (Ops[i]->isLoopInvariant(AddRec->getLoop())) { @@ -1514,7 +1459,7 @@ SCEVHandle ScalarEvolution::getMulExpr(SmallVectorImpl<SCEVHandle> &Ops) { // If we found some loop invariants, fold them into the recurrence. if (!LIOps.empty()) { // NLI * LI * {Start,+,Step} --> NLI * {LI*Start,+,LI*Step} - SmallVector<SCEVHandle, 4> NewOps; + SmallVector<const SCEV*, 4> NewOps; NewOps.reserve(AddRec->getNumOperands()); if (LIOps.size() == 1) { const SCEV *Scale = LIOps[0]; @@ -1522,13 +1467,13 @@ SCEVHandle ScalarEvolution::getMulExpr(SmallVectorImpl<SCEVHandle> &Ops) { NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i))); } else { for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) { - SmallVector<SCEVHandle, 4> MulOps(LIOps.begin(), LIOps.end()); + SmallVector<const SCEV*, 4> MulOps(LIOps.begin(), LIOps.end()); MulOps.push_back(AddRec->getOperand(i)); NewOps.push_back(getMulExpr(MulOps)); } } - SCEVHandle NewRec = getAddRecExpr(NewOps, AddRec->getLoop()); + const SCEV* NewRec = getAddRecExpr(NewOps, AddRec->getLoop()); // If all of the other operands were loop invariant, we are done. if (Ops.size() == 1) return NewRec; @@ -1552,14 +1497,14 @@ SCEVHandle ScalarEvolution::getMulExpr(SmallVectorImpl<SCEVHandle> &Ops) { if (AddRec->getLoop() == OtherAddRec->getLoop()) { // F * G --> {A,+,B} * {C,+,D} --> {A*C,+,F*D + G*B + B*D} const SCEVAddRecExpr *F = AddRec, *G = OtherAddRec; - SCEVHandle NewStart = getMulExpr(F->getStart(), + const SCEV* NewStart = getMulExpr(F->getStart(), G->getStart()); - SCEVHandle B = F->getStepRecurrence(*this); - SCEVHandle D = G->getStepRecurrence(*this); - SCEVHandle NewStep = getAddExpr(getMulExpr(F, D), + const SCEV* B = F->getStepRecurrence(*this); + const SCEV* D = G->getStepRecurrence(*this); + const SCEV* NewStep = getAddExpr(getMulExpr(F, D), getMulExpr(G, B), getMulExpr(B, D)); - SCEVHandle NewAddRec = getAddRecExpr(NewStart, NewStep, + const SCEV* NewAddRec = getAddRecExpr(NewStart, NewStep, F->getLoop()); if (Ops.size() == 2) return NewAddRec; @@ -1577,17 +1522,17 @@ SCEVHandle ScalarEvolution::getMulExpr(SmallVectorImpl<SCEVHandle> &Ops) { // Okay, it looks like we really DO need an mul expr. Check to see if we // already have one, otherwise create a new one. std::vector<const SCEV*> SCEVOps(Ops.begin(), Ops.end()); - SCEVCommutativeExpr *&Result = (*SCEVCommExprs)[std::make_pair(scMulExpr, + SCEVCommutativeExpr *&Result = SCEVCommExprs[std::make_pair(scMulExpr, SCEVOps)]; if (Result == 0) - Result = new SCEVMulExpr(Ops, this); + Result = new SCEVMulExpr(Ops); return Result; } /// getUDivExpr - Get a canonical multiply expression, or something simpler if /// possible. -SCEVHandle ScalarEvolution::getUDivExpr(const SCEVHandle &LHS, - const SCEVHandle &RHS) { +const SCEV* ScalarEvolution::getUDivExpr(const SCEV* LHS, + const SCEV* RHS) { assert(getEffectiveSCEVType(LHS->getType()) == getEffectiveSCEVType(RHS->getType()) && "SCEVUDivExpr operand types don't match!"); @@ -1620,24 +1565,24 @@ SCEVHandle ScalarEvolution::getUDivExpr(const SCEVHandle &LHS, getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy), getZeroExtendExpr(Step, ExtTy), AR->getLoop())) { - SmallVector<SCEVHandle, 4> Operands; + SmallVector<const SCEV*, 4> Operands; for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i) Operands.push_back(getUDivExpr(AR->getOperand(i), RHS)); return getAddRecExpr(Operands, AR->getLoop()); } // (A*B)/C --> A*(B/C) if safe and B/C can be folded. if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) { - SmallVector<SCEVHandle, 4> Operands; + SmallVector<const SCEV*, 4> Operands; for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) Operands.push_back(getZeroExtendExpr(M->getOperand(i), ExtTy)); if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands)) // Find an operand that's safely divisible. for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { - SCEVHandle Op = M->getOperand(i); - SCEVHandle Div = getUDivExpr(Op, RHSC); + const SCEV* Op = M->getOperand(i); + const SCEV* Div = getUDivExpr(Op, RHSC); if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) { - const SmallVectorImpl<SCEVHandle> &MOperands = M->getOperands(); - Operands = SmallVector<SCEVHandle, 4>(MOperands.begin(), + const SmallVectorImpl<const SCEV*> &MOperands = M->getOperands(); + Operands = SmallVector<const SCEV*, 4>(MOperands.begin(), MOperands.end()); Operands[i] = Div; return getMulExpr(Operands); @@ -1646,13 +1591,13 @@ SCEVHandle ScalarEvolution::getUDivExpr(const SCEVHandle &LHS, } // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded. if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(LHS)) { - SmallVector<SCEVHandle, 4> Operands; + SmallVector<const SCEV*, 4> Operands; for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy)); if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) { Operands.clear(); for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) { - SCEVHandle Op = getUDivExpr(A->getOperand(i), RHS); + const SCEV* Op = getUDivExpr(A->getOperand(i), RHS); if (isa<SCEVUDivExpr>(Op) || getMulExpr(Op, RHS) != A->getOperand(i)) break; Operands.push_back(Op); @@ -1670,17 +1615,17 @@ SCEVHandle ScalarEvolution::getUDivExpr(const SCEVHandle &LHS, } } - SCEVUDivExpr *&Result = (*SCEVUDivs)[std::make_pair(LHS, RHS)]; - if (Result == 0) Result = new SCEVUDivExpr(LHS, RHS, this); + SCEVUDivExpr *&Result = SCEVUDivs[std::make_pair(LHS, RHS)]; + if (Result == 0) Result = new SCEVUDivExpr(LHS, RHS); return Result; } /// getAddRecExpr - Get an add recurrence expression for the specified loop. /// Simplify the expression as much as possible. -SCEVHandle ScalarEvolution::getAddRecExpr(const SCEVHandle &Start, - const SCEVHandle &Step, const Loop *L) { - SmallVector<SCEVHandle, 4> Operands; +const SCEV* ScalarEvolution::getAddRecExpr(const SCEV* Start, + const SCEV* Step, const Loop *L) { + SmallVector<const SCEV*, 4> Operands; Operands.push_back(Start); if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step)) if (StepChrec->getLoop() == L) { @@ -1695,7 +1640,7 @@ SCEVHandle ScalarEvolution::getAddRecExpr(const SCEVHandle &Start, /// getAddRecExpr - Get an add recurrence expression for the specified loop. /// Simplify the expression as much as possible. -SCEVHandle ScalarEvolution::getAddRecExpr(SmallVectorImpl<SCEVHandle> &Operands, +const SCEV* ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV*> &Operands, const Loop *L) { if (Operands.size() == 1) return Operands[0]; #ifndef NDEBUG @@ -1714,9 +1659,8 @@ SCEVHandle ScalarEvolution::getAddRecExpr(SmallVectorImpl<SCEVHandle> &Operands, if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) { const Loop* NestedLoop = NestedAR->getLoop(); if (L->getLoopDepth() < NestedLoop->getLoopDepth()) { - SmallVector<SCEVHandle, 4> NestedOperands(NestedAR->op_begin(), + SmallVector<const SCEV*, 4> NestedOperands(NestedAR->op_begin(), NestedAR->op_end()); - SCEVHandle NestedARHandle(NestedAR); Operands[0] = NestedAR->getStart(); NestedOperands[0] = getAddRecExpr(Operands, L); return getAddRecExpr(NestedOperands, NestedLoop); @@ -1724,21 +1668,21 @@ SCEVHandle ScalarEvolution::getAddRecExpr(SmallVectorImpl<SCEVHandle> &Operands, } std::vector<const SCEV*> SCEVOps(Operands.begin(), Operands.end()); - SCEVAddRecExpr *&Result = (*SCEVAddRecExprs)[std::make_pair(L, SCEVOps)]; - if (Result == 0) Result = new SCEVAddRecExpr(Operands, L, this); + SCEVAddRecExpr *&Result = SCEVAddRecExprs[std::make_pair(L, SCEVOps)]; + if (Result == 0) Result = new SCEVAddRecExpr(Operands, L); return Result; } -SCEVHandle ScalarEvolution::getSMaxExpr(const SCEVHandle &LHS, - const SCEVHandle &RHS) { - SmallVector<SCEVHandle, 2> Ops; +const SCEV* ScalarEvolution::getSMaxExpr(const SCEV* LHS, + const SCEV* RHS) { + SmallVector<const SCEV*, 2> Ops; Ops.push_back(LHS); Ops.push_back(RHS); return getSMaxExpr(Ops); } -SCEVHandle -ScalarEvolution::getSMaxExpr(SmallVectorImpl<SCEVHandle> &Ops) { +const SCEV* +ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV*> &Ops) { assert(!Ops.empty() && "Cannot get empty smax!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG @@ -1810,22 +1754,22 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<SCEVHandle> &Ops) { // Okay, it looks like we really DO need an smax expr. Check to see if we // already have one, otherwise create a new one. std::vector<const SCEV*> SCEVOps(Ops.begin(), Ops.end()); - SCEVCommutativeExpr *&Result = (*SCEVCommExprs)[std::make_pair(scSMaxExpr, + SCEVCommutativeExpr *&Result = SCEVCommExprs[std::make_pair(scSMaxExpr, SCEVOps)]; - if (Result == 0) Result = new SCEVSMaxExpr(Ops, this); + if (Result == 0) Result = new SCEVSMaxExpr(Ops); return Result; } -SCEVHandle ScalarEvolution::getUMaxExpr(const SCEVHandle &LHS, - const SCEVHandle &RHS) { - SmallVector<SCEVHandle, 2> Ops; +const SCEV* ScalarEvolution::getUMaxExpr(const SCEV* LHS, + const SCEV* RHS) { + SmallVector<const SCEV*, 2> Ops; Ops.push_back(LHS); Ops.push_back(RHS); return getUMaxExpr(Ops); } -SCEVHandle -ScalarEvolution::getUMaxExpr(SmallVectorImpl<SCEVHandle> &Ops) { +const SCEV* +ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV*> &Ops) { assert(!Ops.empty() && "Cannot get empty umax!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG @@ -1897,31 +1841,31 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<SCEVHandle> &Ops) { // Okay, it looks like we really DO need a umax expr. Check to see if we // already have one, otherwise create a new one. std::vector<const SCEV*> SCEVOps(Ops.begin(), Ops.end()); - SCEVCommutativeExpr *&Result = (*SCEVCommExprs)[std::make_pair(scUMaxExpr, + SCEVCommutativeExpr *&Result = SCEVCommExprs[std::make_pair(scUMaxExpr, SCEVOps)]; - if (Result == 0) Result = new SCEVUMaxExpr(Ops, this); + if (Result == 0) Result = new SCEVUMaxExpr(Ops); return Result; } -SCEVHandle ScalarEvolution::getSMinExpr(const SCEVHandle &LHS, - const SCEVHandle &RHS) { +const SCEV* ScalarEvolution::getSMinExpr(const SCEV* LHS, + const SCEV* RHS) { // ~smax(~x, ~y) == smin(x, y). return getNotSCEV(getSMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS))); } -SCEVHandle ScalarEvolution::getUMinExpr(const SCEVHandle &LHS, - const SCEVHandle &RHS) { +const SCEV* ScalarEvolution::getUMinExpr(const SCEV* LHS, + const SCEV* RHS) { // ~umax(~x, ~y) == umin(x, y) return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS))); } -SCEVHandle ScalarEvolution::getUnknown(Value *V) { +const SCEV* ScalarEvolution::getUnknown(Value *V) { if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) return getConstant(CI); if (isa<ConstantPointerNull>(V)) return getIntegerSCEV(0, V->getType()); - SCEVUnknown *&Result = (*SCEVUnknowns)[V]; - if (Result == 0) Result = new SCEVUnknown(V, this); + SCEVUnknown *&Result = SCEVUnknowns[V]; + if (Result == 0) Result = new SCEVUnknown(V); return Result; } @@ -1975,7 +1919,7 @@ const Type *ScalarEvolution::getEffectiveSCEVType(const Type *Ty) const { return TD->getIntPtrType(); } -SCEVHandle ScalarEvolution::getCouldNotCompute() { +const SCEV* ScalarEvolution::getCouldNotCompute() { return CouldNotCompute; } @@ -1987,19 +1931,19 @@ bool ScalarEvolution::hasSCEV(Value *V) const { /// getSCEV - Return an existing SCEV if it exists, otherwise analyze the /// expression and create a new one. -SCEVHandle ScalarEvolution::getSCEV(Value *V) { +const SCEV* ScalarEvolution::getSCEV(Value *V) { assert(isSCEVable(V->getType()) && "Value is not SCEVable!"); - std::map<SCEVCallbackVH, SCEVHandle>::iterator I = Scalars.find(V); + std::map<SCEVCallbackVH, const SCEV*>::iterator I = Scalars.find(V); if (I != Scalars.end()) return I->second; - SCEVHandle S = createSCEV(V); + const SCEV* S = createSCEV(V); Scalars.insert(std::make_pair(SCEVCallbackVH(V, this), S)); return S; } /// getIntegerSCEV - Given an integer or FP type, create a constant for the /// specified signed integer value and return a SCEV for the constant. -SCEVHandle ScalarEvolution::getIntegerSCEV(int Val, const Type *Ty) { +const SCEV* ScalarEvolution::getIntegerSCEV(int Val, const Type *Ty) { Ty = getEffectiveSCEVType(Ty); Constant *C; if (Val == 0) @@ -2014,7 +1958,7 @@ SCEVHandle ScalarEvolution::getIntegerSCEV(int Val, const Type *Ty) { /// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V /// -SCEVHandle ScalarEvolution::getNegativeSCEV(const SCEVHandle &V) { +const SCEV* ScalarEvolution::getNegativeSCEV(const SCEV* V) { if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V)) return getUnknown(ConstantExpr::getNeg(VC->getValue())); @@ -2024,20 +1968,20 @@ SCEVHandle ScalarEvolution::getNegativeSCEV(const SCEVHandle &V) { } /// getNotSCEV - Return a SCEV corresponding to ~V = -1-V -SCEVHandle ScalarEvolution::getNotSCEV(const SCEVHandle &V) { +const SCEV* ScalarEvolution::getNotSCEV(const SCEV* V) { if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V)) return getUnknown(ConstantExpr::getNot(VC->getValue())); const Type *Ty = V->getType(); Ty = getEffectiveSCEVType(Ty); - SCEVHandle AllOnes = getConstant(ConstantInt::getAllOnesValue(Ty)); + const SCEV* AllOnes = getConstant(ConstantInt::getAllOnesValue(Ty)); return getMinusSCEV(AllOnes, V); } /// getMinusSCEV - Return a SCEV corresponding to LHS - RHS. /// -SCEVHandle ScalarEvolution::getMinusSCEV(const SCEVHandle &LHS, - const SCEVHandle &RHS) { +const SCEV* ScalarEvolution::getMinusSCEV(const SCEV* LHS, + const SCEV* RHS) { // X - Y --> X + -Y return getAddExpr(LHS, getNegativeSCEV(RHS)); } @@ -2045,8 +1989,8 @@ SCEVHandle ScalarEvolution::getMinusSCEV(const SCEVHandle &LHS, /// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the /// input value to the specified type. If the type must be extended, it is zero /// extended. -SCEVHandle -ScalarEvolution::getTruncateOrZeroExtend(const SCEVHandle &V, +const SCEV* +ScalarEvolution::getTruncateOrZeroExtend(const SCEV* V, const Type *Ty) { const Type *SrcTy = V->getType(); assert((SrcTy->isInteger() || (TD && isa<PointerType>(SrcTy))) && @@ -2062,8 +2006,8 @@ ScalarEvolution::getTruncateOrZeroExtend(const SCEVHandle &V, /// getTruncateOrSignExtend - Return a SCEV corresponding to a conversion of the /// input value to the specified type. If the type must be extended, it is sign /// extended. -SCEVHandle -ScalarEvolution::getTruncateOrSignExtend(const SCEVHandle &V, +const SCEV* +ScalarEvolution::getTruncateOrSignExtend(const SCEV* V, const Type *Ty) { const Type *SrcTy = V->getType(); assert((SrcTy->isInteger() || (TD && isa<PointerType>(SrcTy))) && @@ -2079,8 +2023,8 @@ ScalarEvolution::getTruncateOrSignExtend(const SCEVHandle &V, /// getNoopOrZeroExtend - Return a SCEV corresponding to a conversion of the /// input value to the specified type. If the type must be extended, it is zero /// extended. The conversion must not be narrowing. -SCEVHandle -ScalarEvolution::getNoopOrZeroExtend(const SCEVHandle &V, const Type *Ty) { +const SCEV* +ScalarEvolution::getNoopOrZeroExtend(const SCEV* V, const Type *Ty) { const Type *SrcTy = V->getType(); assert((SrcTy->isInteger() || (TD && isa<PointerType>(SrcTy))) && (Ty->isInteger() || (TD && isa<PointerType>(Ty))) && @@ -2095,8 +2039,8 @@ ScalarEvolution::getNoopOrZeroExtend(const SCEVHandle &V, const Type *Ty) { /// getNoopOrSignExtend - Return a SCEV corresponding to a conversion of the /// input value to the specified type. If the type must be extended, it is sign /// extended. The conversion must not be narrowing. -SCEVHandle -ScalarEvolution::getNoopOrSignExtend(const SCEVHandle &V, const Type *Ty) { +const SCEV* +ScalarEvolution::getNoopOrSignExtend(const SCEV* V, const Type *Ty) { const Type *SrcTy = V->getType(); assert((SrcTy->isInteger() || (TD && isa<PointerType>(SrcTy))) && (Ty->isInteger() || (TD && isa<PointerType>(Ty))) && @@ -2112,8 +2056,8 @@ ScalarEvolution::getNoopOrSignExtend(const SCEVHandle &V, const Type *Ty) { /// the input value to the specified type. If the type must be extended, /// it is extended with unspecified bits. The conversion must not be /// narrowing. -SCEVHandle -ScalarEvolution::getNoopOrAnyExtend(const SCEVHandle &V, const Type *Ty) { +const SCEV* +ScalarEvolution::getNoopOrAnyExtend(const SCEV* V, const Type *Ty) { const Type *SrcTy = V->getType(); assert((SrcTy->isInteger() || (TD && isa<PointerType>(SrcTy))) && (Ty->isInteger() || (TD && isa<PointerType>(Ty))) && @@ -2127,8 +2071,8 @@ ScalarEvolution::getNoopOrAnyExtend(const SCEVHandle &V, const Type *Ty) { /// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the /// input value to the specified type. The conversion must not be widening. -SCEVHandle -ScalarEvolution::getTruncateOrNoop(const SCEVHandle &V, const Type *Ty) { +const SCEV* +ScalarEvolution::getTruncateOrNoop(const SCEV* V, const Type *Ty) { const Type *SrcTy = V->getType(); assert((SrcTy->isInteger() || (TD && isa<PointerType>(SrcTy))) && (Ty->isInteger() || (TD && isa<PointerType>(Ty))) && @@ -2143,10 +2087,10 @@ ScalarEvolution::getTruncateOrNoop(const SCEVHandle &V, const Type *Ty) { /// getUMaxFromMismatchedTypes - Promote the operands to the wider of /// the types using zero-extension, and then perform a umax operation /// with them. -SCEVHandle ScalarEvolution::getUMaxFromMismatchedTypes(const SCEVHandle &LHS, - const SCEVHandle &RHS) { - SCEVHandle PromotedLHS = LHS; - SCEVHandle PromotedRHS = RHS; +const SCEV* ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV* LHS, + const SCEV* RHS) { + const SCEV* PromotedLHS = LHS; + const SCEV* PromotedRHS = RHS; if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType())) PromotedRHS = getZeroExtendExpr(RHS, LHS->getType()); @@ -2156,17 +2100,33 @@ SCEVHandle ScalarEvolution::getUMaxFromMismatchedTypes(const SCEVHandle &LHS, return getUMaxExpr(PromotedLHS, PromotedRHS); } +/// getUMinFromMismatchedTypes - Promote the operands to the wider of +/// the types using zero-extension, and then perform a umin operation +/// with them. +const SCEV* ScalarEvolution::getUMinFromMismatchedTypes(const SCEV* LHS, + const SCEV* RHS) { + const SCEV* PromotedLHS = LHS; + const SCEV* PromotedRHS = RHS; + + if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType())) + PromotedRHS = getZeroExtendExpr(RHS, LHS->getType()); + else + PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType()); + + return getUMinExpr(PromotedLHS, PromotedRHS); +} + /// ReplaceSymbolicValueWithConcrete - This looks up the computed SCEV value for /// the specified instruction and replaces any references to the symbolic value /// SymName with the specified value. This is used during PHI resolution. void ScalarEvolution:: -ReplaceSymbolicValueWithConcrete(Instruction *I, const SCEVHandle &SymName, - const SCEVHandle &NewVal) { - std::map<SCEVCallbackVH, SCEVHandle>::iterator SI = +ReplaceSymbolicValueWithConcrete(Instruction *I, const SCEV* SymName, + const SCEV* NewVal) { + std::map<SCEVCallbackVH, const SCEV*>::iterator SI = Scalars.find(SCEVCallbackVH(I, this)); if (SI == Scalars.end()) return; - SCEVHandle NV = + const SCEV* NV = SI->second->replaceSymbolicValuesWithConcrete(SymName, NewVal, *this); if (NV == SI->second) return; // No change. @@ -2182,7 +2142,7 @@ ReplaceSymbolicValueWithConcrete(Instruction *I, const SCEVHandle &SymName, /// createNodeForPHI - PHI nodes have two cases. Either the PHI node exists in /// a loop header, making it a potential recurrence, or it doesn't. /// -SCEVHandle ScalarEvolution::createNodeForPHI(PHINode *PN) { +const SCEV* ScalarEvolution::createNodeForPHI(PHINode *PN) { if (PN->getNumIncomingValues() == 2) // The loops have been canonicalized. if (const Loop *L = LI->getLoopFor(PN->getParent())) if (L->getHeader() == PN->getParent()) { @@ -2192,14 +2152,14 @@ SCEVHandle ScalarEvolution::createNodeForPHI(PHINode *PN) { unsigned BackEdge = IncomingEdge^1; // While we are analyzing this PHI node, handle its value symbolically. - SCEVHandle SymbolicName = getUnknown(PN); + const SCEV* SymbolicName = getUnknown(PN); assert(Scalars.find(PN) == Scalars.end() && "PHI node already processed?"); Scalars.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName)); // Using this symbolic name for the PHI, analyze the value coming around // the back-edge. - SCEVHandle BEValue = getSCEV(PN->getIncomingValue(BackEdge)); + const SCEV* BEValue = getSCEV(PN->getIncomingValue(BackEdge)); // NOTE: If BEValue is loop invariant, we know that the PHI node just // has a special value for the first iteration of the loop. @@ -2219,19 +2179,19 @@ SCEVHandle ScalarEvolution::createNodeForPHI(PHINode *PN) { if (FoundIndex != Add->getNumOperands()) { // Create an add with everything but the specified operand. - SmallVector<SCEVHandle, 8> Ops; + SmallVector<const SCEV*, 8> Ops; for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) if (i != FoundIndex) Ops.push_back(Add->getOperand(i)); - SCEVHandle Accum = getAddExpr(Ops); + const SCEV* Accum = getAddExpr(Ops); // This is not a valid addrec if the step amount is varying each // loop iteration, but is not itself an addrec in this loop. if (Accum->isLoopInvariant(L) || (isa<SCEVAddRecExpr>(Accum) && cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) { - SCEVHandle StartVal = getSCEV(PN->getIncomingValue(IncomingEdge)); - SCEVHandle PHISCEV = getAddRecExpr(StartVal, Accum, L); + const SCEV* StartVal = getSCEV(PN->getIncomingValue(IncomingEdge)); + const SCEV* PHISCEV = getAddRecExpr(StartVal, Accum, L); // Okay, for the entire analysis of this edge we assumed the PHI // to be symbolic. We now need to go back and update all of the @@ -2250,13 +2210,13 @@ SCEVHandle ScalarEvolution::createNodeForPHI(PHINode *PN) { // Because the other in-value of i (0) fits the evolution of BEValue // i really is an addrec evolution. if (AddRec->getLoop() == L && AddRec->isAffine()) { - SCEVHandle StartVal = getSCEV(PN->getIncomingValue(IncomingEdge)); + const SCEV* StartVal = getSCEV(PN->getIncomingValue(IncomingEdge)); // If StartVal = j.start - j.stride, we can use StartVal as the // initial step of the addrec evolution. if (StartVal == getMinusSCEV(AddRec->getOperand(0), AddRec->getOperand(1))) { - SCEVHandle PHISCEV = + const SCEV* PHISCEV = getAddRecExpr(StartVal, AddRec->getOperand(1), L); // Okay, for the entire analysis of this edge we assumed the PHI @@ -2280,14 +2240,14 @@ SCEVHandle ScalarEvolution::createNodeForPHI(PHINode *PN) { /// createNodeForGEP - Expand GEP instructions into add and multiply /// operations. This allows them to be analyzed by regular SCEV code. /// -SCEVHandle ScalarEvolution::createNodeForGEP(User *GEP) { +const SCEV* ScalarEvolution::createNodeForGEP(User *GEP) { const Type *IntPtrTy = TD->getIntPtrType(); Value *Base = GEP->getOperand(0); // Don't attempt to analyze GEPs over unsized objects. if (!cast<PointerType>(Base->getType())->getElementType()->isSized()) return getUnknown(GEP); - SCEVHandle TotalOffset = getIntegerSCEV(0, IntPtrTy); + const SCEV* TotalOffset = getIntegerSCEV(0, IntPtrTy); gep_type_iterator GTI = gep_type_begin(GEP); for (GetElementPtrInst::op_iterator I = next(GEP->op_begin()), E = GEP->op_end(); @@ -2303,7 +2263,7 @@ SCEVHandle ScalarEvolution::createNodeForGEP(User *GEP) { getIntegerSCEV(Offset, IntPtrTy)); } else { // For an array, add the element offset, explicitly scaled. - SCEVHandle LocalOffset = getSCEV(Index); + const SCEV* LocalOffset = getSCEV(Index); if (!isa<PointerType>(LocalOffset->getType())) // Getelementptr indicies are signed. LocalOffset = getTruncateOrSignExtend(LocalOffset, @@ -2323,7 +2283,7 @@ SCEVHandle ScalarEvolution::createNodeForGEP(User *GEP) { /// the minimum number of times S is divisible by 2. For example, given {4,+,8} /// it returns 2. If S is guaranteed to be 0, it returns the bitwidth of S. uint32_t -ScalarEvolution::GetMinTrailingZeros(const SCEVHandle &S) { +ScalarEvolution::GetMinTrailingZeros(const SCEV* S) { if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) return C->getValue()->getValue().countTrailingZeros(); @@ -2400,7 +2360,7 @@ ScalarEvolution::GetMinTrailingZeros(const SCEVHandle &S) { } uint32_t -ScalarEvolution::GetMinLeadingZeros(const SCEVHandle &S) { +ScalarEvolution::GetMinLeadingZeros(const SCEV* S) { // TODO: Handle other SCEV expression types here. if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) @@ -2426,7 +2386,7 @@ ScalarEvolution::GetMinLeadingZeros(const SCEVHandle &S) { } uint32_t -ScalarEvolution::GetMinSignBits(const SCEVHandle &S) { +ScalarEvolution::GetMinSignBits(const SCEV* S) { // TODO: Handle other SCEV expression types here. if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) { @@ -2453,7 +2413,7 @@ ScalarEvolution::GetMinSignBits(const SCEVHandle &S) { /// createSCEV - We know that there is no SCEV for the specified value. /// Analyze the expression. /// -SCEVHandle ScalarEvolution::createSCEV(Value *V) { +const SCEV* ScalarEvolution::createSCEV(Value *V) { if (!isSCEVable(V->getType())) return getUnknown(V); @@ -2517,7 +2477,7 @@ SCEVHandle ScalarEvolution::createSCEV(Value *V) { // In order for this transformation to be safe, the LHS must be of the // form X*(2^n) and the Or constant must be less than 2^n. if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) { - SCEVHandle LHS = getSCEV(U->getOperand(0)); + const SCEV* LHS = getSCEV(U->getOperand(0)); const APInt &CIVal = CI->getValue(); if (GetMinTrailingZeros(LHS) >= (CIVal.getBitWidth() - CIVal.countLeadingZeros())) @@ -2547,7 +2507,7 @@ SCEVHandle ScalarEvolution::createSCEV(Value *V) { if (const SCEVZeroExtendExpr *Z = dyn_cast<SCEVZeroExtendExpr>(getSCEV(U->getOperand(0)))) { const Type *UTy = U->getType(); - SCEVHandle Z0 = Z->getOperand(); + const SCEV* Z0 = Z->getOperand(); const Type *Z0Ty = Z0->getType(); unsigned Z0TySize = getTypeSizeInBits(Z0Ty); @@ -2716,14 +2676,14 @@ SCEVHandle ScalarEvolution::createSCEV(Value *V) { /// loop-invariant backedge-taken count (see /// hasLoopInvariantBackedgeTakenCount). /// -SCEVHandle ScalarEvolution::getBackedgeTakenCount(const Loop *L) { +const SCEV* ScalarEvolution::getBackedgeTakenCount(const Loop *L) { return getBackedgeTakenInfo(L).Exact; } /// getMaxBackedgeTakenCount - Similar to getBackedgeTakenCount, except /// return the least SCEV value that is known never to be less than the /// actual backedge taken count. -SCEVHandle ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) { +const SCEV* ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) { return getBackedgeTakenInfo(L).Max; } @@ -2790,7 +2750,7 @@ void ScalarEvolution::forgetLoopPHIs(const Loop *L) { SmallVector<Instruction *, 16> Worklist; for (BasicBlock::iterator I = Header->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I) { - std::map<SCEVCallbackVH, SCEVHandle>::iterator It = Scalars.find((Value*)I); + std::map<SCEVCallbackVH, const SCEV*>::iterator It = Scalars.find((Value*)I); if (It != Scalars.end() && !isa<SCEVUnknown>(It->second)) Worklist.push_back(PN); } @@ -2812,8 +2772,8 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { L->getExitingBlocks(ExitingBlocks); // Examine all exits and pick the most conservative values. - SCEVHandle BECount = CouldNotCompute; - SCEVHandle MaxBECount = CouldNotCompute; + const SCEV* BECount = CouldNotCompute; + const SCEV* MaxBECount = CouldNotCompute; bool CouldNotComputeBECount = false; bool CouldNotComputeMaxBECount = false; for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { @@ -2822,7 +2782,7 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { if (NewBTI.Exact == CouldNotCompute) { // We couldn't compute an exact value for this exit, so - // we don't be able to compute an exact value for the loop. + // we won't be able to compute an exact value for the loop. CouldNotComputeBECount = true; BECount = CouldNotCompute; } else if (!CouldNotComputeBECount) { @@ -2838,7 +2798,7 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { } if (NewBTI.Max == CouldNotCompute) { // We couldn't compute an maximum value for this exit, so - // we don't be able to compute an maximum value for the loop. + // we won't be able to compute an maximum value for the loop. CouldNotComputeMaxBECount = true; MaxBECount = CouldNotCompute; } else if (!CouldNotComputeMaxBECount) { @@ -2937,23 +2897,21 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L, ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB); BackedgeTakenInfo BTI1 = ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB); - SCEVHandle BECount = CouldNotCompute; - SCEVHandle MaxBECount = CouldNotCompute; + const SCEV* BECount = CouldNotCompute; + const SCEV* MaxBECount = CouldNotCompute; if (L->contains(TBB)) { // Both conditions must be true for the loop to continue executing. // Choose the less conservative count. - // TODO: Take the minimum of the exact counts. - if (BTI0.Exact == BTI1.Exact) - BECount = BTI0.Exact; - // TODO: Take the minimum of the maximum counts. + if (BTI0.Exact == CouldNotCompute || BTI1.Exact == CouldNotCompute) + BECount = CouldNotCompute; + else + BECount = getUMinFromMismatchedTypes(BTI0.Exact, BTI1.Exact); if (BTI0.Max == CouldNotCompute) MaxBECount = BTI1.Max; else if (BTI1.Max == CouldNotCompute) MaxBECount = BTI0.Max; - else if (const SCEVConstant *C0 = dyn_cast<SCEVConstant>(BTI0.Max)) - if (const SCEVConstant *C1 = dyn_cast<SCEVConstant>(BTI1.Max)) - MaxBECount = getConstant(APIntOps::umin(C0->getValue()->getValue(), - C1->getValue()->getValue())); + else + MaxBECount = getUMinFromMismatchedTypes(BTI0.Max, BTI1.Max); } else { // Both conditions must be true for the loop to exit. assert(L->contains(FBB) && "Loop block has no successor in loop!"); @@ -2971,23 +2929,21 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L, ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB); BackedgeTakenInfo BTI1 = ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB); - SCEVHandle BECount = CouldNotCompute; - SCEVHandle MaxBECount = CouldNotCompute; + const SCEV* BECount = CouldNotCompute; + const SCEV* MaxBECount = CouldNotCompute; if (L->contains(FBB)) { // Both conditions must be false for the loop to continue executing. // Choose the less conservative count. - // TODO: Take the minimum of the exact counts. - if (BTI0.Exact == BTI1.Exact) - BECount = BTI0.Exact; - // TODO: Take the minimum of the maximum counts. + if (BTI0.Exact == CouldNotCompute || BTI1.Exact == CouldNotCompute) + BECount = CouldNotCompute; + else + BECount = getUMinFromMismatchedTypes(BTI0.Exact, BTI1.Exact); if (BTI0.Max == CouldNotCompute) MaxBECount = BTI1.Max; else if (BTI1.Max == CouldNotCompute) MaxBECount = BTI0.Max; - else if (const SCEVConstant *C0 = dyn_cast<SCEVConstant>(BTI0.Max)) - if (const SCEVConstant *C1 = dyn_cast<SCEVConstant>(BTI1.Max)) - MaxBECount = getConstant(APIntOps::umin(C0->getValue()->getValue(), - C1->getValue()->getValue())); + else + MaxBECount = getUMinFromMismatchedTypes(BTI0.Max, BTI1.Max); } else { // Both conditions must be false for the loop to exit. assert(L->contains(TBB) && "Loop block has no successor in loop!"); @@ -3029,7 +2985,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L, // Handle common loops like: for (X = "string"; *X; ++X) if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0))) if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) { - SCEVHandle ItCnt = + const SCEV* ItCnt = ComputeLoadConstantCompareBackedgeTakenCount(LI, RHS, L, Cond); if (!isa<SCEVCouldNotCompute>(ItCnt)) { unsigned BitWidth = getTypeSizeInBits(ItCnt->getType()); @@ -3039,8 +2995,8 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L, } } - SCEVHandle LHS = getSCEV(ExitCond->getOperand(0)); - SCEVHandle RHS = getSCEV(ExitCond->getOperand(1)); + const SCEV* LHS = getSCEV(ExitCond->getOperand(0)); + const SCEV* RHS = getSCEV(ExitCond->getOperand(1)); // Try to evaluate any dependencies out of the loop. LHS = getSCEVAtScope(LHS, L); @@ -3063,20 +3019,20 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L, ConstantRange CompRange( ICmpInst::makeConstantRange(Cond, RHSC->getValue()->getValue())); - SCEVHandle Ret = AddRec->getNumIterationsInRange(CompRange, *this); + const SCEV* Ret = AddRec->getNumIterationsInRange(CompRange, *this); if (!isa<SCEVCouldNotCompute>(Ret)) return Ret; } switch (Cond) { case ICmpInst::ICMP_NE: { // while (X != Y) // Convert to: while (X-Y != 0) - SCEVHandle TC = HowFarToZero(getMinusSCEV(LHS, RHS), L); + const SCEV* TC = HowFarToZero(getMinusSCEV(LHS, RHS), L); if (!isa<SCEVCouldNotCompute>(TC)) return TC; break; } case ICmpInst::ICMP_EQ: { // Convert to: while (X-Y == 0) // while (X == Y) - SCEVHandle TC = HowFarToNonZero(getMinusSCEV(LHS, RHS), L); + const SCEV* TC = HowFarToNonZero(getMinusSCEV(LHS, RHS), L); if (!isa<SCEVCouldNotCompute>(TC)) return TC; break; } @@ -3120,8 +3076,8 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L, static ConstantInt * EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C, ScalarEvolution &SE) { - SCEVHandle InVal = SE.getConstant(C); - SCEVHandle Val = AddRec->evaluateAtIteration(InVal, SE); + const SCEV* InVal = SE.getConstant(C); + const SCEV* Val = AddRec->evaluateAtIteration(InVal, SE); assert(isa<SCEVConstant>(Val) && "Evaluation of SCEV at constant didn't fold correctly?"); return cast<SCEVConstant>(Val)->getValue(); @@ -3164,7 +3120,7 @@ GetAddressedElementFromGlobal(GlobalVariable *GV, /// ComputeLoadConstantCompareBackedgeTakenCount - Given an exit condition of /// 'icmp op load X, cst', try to see if we can compute the backedge /// execution count. -SCEVHandle ScalarEvolution:: +const SCEV* ScalarEvolution:: ComputeLoadConstantCompareBackedgeTakenCount(LoadInst *LI, Constant *RHS, const Loop *L, ICmpInst::Predicate predicate) { @@ -3198,7 +3154,7 @@ ComputeLoadConstantCompareBackedgeTakenCount(LoadInst *LI, Constant *RHS, // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant. // Check to see if X is a loop variant variable value now. - SCEVHandle Idx = getSCEV(VarIdx); + const SCEV* Idx = getSCEV(VarIdx); Idx = getSCEVAtScope(Idx, L); // We can only recognize very limited forms of loop index expressions, in @@ -3374,7 +3330,7 @@ getConstantEvolutionLoopExitValue(PHINode *PN, const APInt& BEs, const Loop *L){ /// try to evaluate a few iterations of the loop until we get the exit /// condition gets a value of ExitWhen (true or false). If we cannot /// evaluate the trip count of the loop, return CouldNotCompute. -SCEVHandle ScalarEvolution:: +const SCEV* ScalarEvolution:: ComputeBackedgeTakenCountExhaustively(const Loop *L, Value *Cond, bool ExitWhen) { PHINode *PN = getConstantEvolvingPHI(Cond, L); if (PN == 0) return CouldNotCompute; @@ -3431,7 +3387,7 @@ ComputeBackedgeTakenCountExhaustively(const Loop *L, Value *Cond, bool ExitWhen) /// /// In the case that a relevant loop exit value cannot be computed, the /// original value V is returned. -SCEVHandle ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { +const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { // FIXME: this should be turned into a virtual method on SCEV! if (isa<SCEVConstant>(V)) return V; @@ -3448,7 +3404,7 @@ SCEVHandle ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { // to see if the loop that contains it has a known backedge-taken // count. If so, we may be able to force computation of the exit // value. - SCEVHandle BackedgeTakenCount = getBackedgeTakenCount(LI); + const SCEV* BackedgeTakenCount = getBackedgeTakenCount(LI); if (const SCEVConstant *BTCC = dyn_cast<SCEVConstant>(BackedgeTakenCount)) { // Okay, we know how many times the containing loop executes. If @@ -3486,7 +3442,7 @@ SCEVHandle ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { if (!isSCEVable(Op->getType())) return V; - SCEVHandle OpV = getSCEVAtScope(getSCEV(Op), L); + const SCEV* OpV = getSCEVAtScope(getSCEV(Op), L); if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(OpV)) { Constant *C = SC->getValue(); if (C->getType() != Op->getType()) @@ -3532,11 +3488,11 @@ SCEVHandle ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { // Avoid performing the look-up in the common case where the specified // expression has no loop-variant portions. for (unsigned i = 0, e = Comm->getNumOperands(); i != e; ++i) { - SCEVHandle OpAtScope = getSCEVAtScope(Comm->getOperand(i), L); + const SCEV* OpAtScope = getSCEVAtScope(Comm->getOperand(i), L); if (OpAtScope != Comm->getOperand(i)) { // Okay, at least one of these operands is loop variant but might be // foldable. Build a new instance of the folded commutative expression. - SmallVector<SCEVHandle, 8> NewOps(Comm->op_begin(), Comm->op_begin()+i); + SmallVector<const SCEV*, 8> NewOps(Comm->op_begin(), Comm->op_begin()+i); NewOps.push_back(OpAtScope); for (++i; i != e; ++i) { @@ -3559,8 +3515,8 @@ SCEVHandle ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { } if (const SCEVUDivExpr *Div = dyn_cast<SCEVUDivExpr>(V)) { - SCEVHandle LHS = getSCEVAtScope(Div->getLHS(), L); - SCEVHandle RHS = getSCEVAtScope(Div->getRHS(), L); + const SCEV* LHS = getSCEVAtScope(Div->getLHS(), L); + const SCEV* RHS = getSCEVAtScope(Div->getRHS(), L); if (LHS == Div->getLHS() && RHS == Div->getRHS()) return Div; // must be loop invariant return getUDivExpr(LHS, RHS); @@ -3572,7 +3528,7 @@ SCEVHandle ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { if (!L || !AddRec->getLoop()->contains(L->getHeader())) { // To evaluate this recurrence, we need to know how many times the AddRec // loop iterates. Compute this now. - SCEVHandle BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop()); + const SCEV* BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop()); if (BackedgeTakenCount == CouldNotCompute) return AddRec; // Then, evaluate the AddRec. @@ -3582,21 +3538,21 @@ SCEVHandle ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { } if (const SCEVZeroExtendExpr *Cast = dyn_cast<SCEVZeroExtendExpr>(V)) { - SCEVHandle Op = getSCEVAtScope(Cast->getOperand(), L); + const SCEV* Op = getSCEVAtScope(Cast->getOperand(), L); if (Op == Cast->getOperand()) return Cast; // must be loop invariant return getZeroExtendExpr(Op, Cast->getType()); } if (const SCEVSignExtendExpr *Cast = dyn_cast<SCEVSignExtendExpr>(V)) { - SCEVHandle Op = getSCEVAtScope(Cast->getOperand(), L); + const SCEV* Op = getSCEVAtScope(Cast->getOperand(), L); if (Op == Cast->getOperand()) return Cast; // must be loop invariant return getSignExtendExpr(Op, Cast->getType()); } if (const SCEVTruncateExpr *Cast = dyn_cast<SCEVTruncateExpr>(V)) { - SCEVHandle Op = getSCEVAtScope(Cast->getOperand(), L); + const SCEV* Op = getSCEVAtScope(Cast->getOperand(), L); if (Op == Cast->getOperand()) return Cast; // must be loop invariant return getTruncateExpr(Op, Cast->getType()); @@ -3608,7 +3564,7 @@ SCEVHandle ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { /// getSCEVAtScope - This is a convenience function which does /// getSCEVAtScope(getSCEV(V), L). -SCEVHandle ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) { +const SCEV* ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) { return getSCEVAtScope(getSCEV(V), L); } @@ -3621,7 +3577,7 @@ SCEVHandle ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) { /// A and B isn't important. /// /// If the equation does not have a solution, SCEVCouldNotCompute is returned. -static SCEVHandle SolveLinEquationWithOverflow(const APInt &A, const APInt &B, +static const SCEV* SolveLinEquationWithOverflow(const APInt &A, const APInt &B, ScalarEvolution &SE) { uint32_t BW = A.getBitWidth(); assert(BW == B.getBitWidth() && "Bit widths must be the same."); @@ -3664,7 +3620,7 @@ static SCEVHandle SolveLinEquationWithOverflow(const APInt &A, const APInt &B, /// given quadratic chrec {L,+,M,+,N}. This returns either the two roots (which /// might be the same) or two SCEVCouldNotCompute objects. /// -static std::pair<SCEVHandle,SCEVHandle> +static std::pair<const SCEV*,const SCEV*> SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!"); const SCEVConstant *LC = dyn_cast<SCEVConstant>(AddRec->getOperand(0)); @@ -3723,7 +3679,7 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { /// HowFarToZero - Return the number of times a backedge comparing the specified /// value to zero will execute. If not computable, return CouldNotCompute. -SCEVHandle ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { +const SCEV* ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { // If the value is a constant if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) { // If the value is already zero, the branch will execute zero times. @@ -3748,8 +3704,8 @@ SCEVHandle ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { // where BW is the common bit width of Start and Step. // Get the initial value for the loop. - SCEVHandle Start = getSCEVAtScope(AddRec->getStart(), L->getParentLoop()); - SCEVHandle Step = getSCEVAtScope(AddRec->getOperand(1), L->getParentLoop()); + const SCEV* Start = getSCEVAtScope(AddRec->getStart(), L->getParentLoop()); + const SCEV* Step = getSCEVAtScope(AddRec->getOperand(1), L->getParentLoop()); if (const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step)) { // For now we handle only constant steps. @@ -3769,7 +3725,7 @@ SCEVHandle ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { } else if (AddRec->isQuadratic() && AddRec->getType()->isInteger()) { // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of // the quadratic equation to solve it. - std::pair<SCEVHandle,SCEVHandle> Roots = SolveQuadraticEquation(AddRec, + std::pair<const SCEV*,const SCEV*> Roots = SolveQuadraticEquation(AddRec, *this); const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first); const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second); @@ -3788,7 +3744,7 @@ SCEVHandle ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { // We can only use this value if the chrec ends up with an exact zero // value at this index. When solving for "X*X != 5", for example, we // should not accept a root of 2. - SCEVHandle Val = AddRec->evaluateAtIteration(R1, *this); + const SCEV* Val = AddRec->evaluateAtIteration(R1, *this); if (Val->isZero()) return R1; // We found a quadratic root! } @@ -3801,7 +3757,7 @@ SCEVHandle ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { /// HowFarToNonZero - Return the number of times a backedge checking the /// specified value for nonzero will execute. If not computable, return /// CouldNotCompute -SCEVHandle ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) { +const SCEV* ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) { // Loops that look like: while (X == 0) are very strange indeed. We don't // handle them yet except for the trivial case. This could be expanded in the // future as needed. @@ -3862,7 +3818,7 @@ ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) { /// more general, since a front-end may have replicated the controlling /// expression. /// -static bool HasSameValue(const SCEVHandle &A, const SCEVHandle &B) { +static bool HasSameValue(const SCEV* A, const SCEV* B) { // Quick check to see if they are the same SCEV. if (A == B) return true; @@ -3977,8 +3933,8 @@ bool ScalarEvolution::isLoopGuardedByCond(const Loop *L, if (!PreCondLHS->getType()->isInteger()) continue; - SCEVHandle PreCondLHSSCEV = getSCEV(PreCondLHS); - SCEVHandle PreCondRHSSCEV = getSCEV(PreCondRHS); + const SCEV* PreCondLHSSCEV = getSCEV(PreCondLHS); + const SCEV* PreCondRHSSCEV = getSCEV(PreCondRHS); if ((HasSameValue(LHS, PreCondLHSSCEV) && HasSameValue(RHS, PreCondRHSSCEV)) || (HasSameValue(LHS, getNotSCEV(PreCondRHSSCEV)) && @@ -3992,22 +3948,22 @@ bool ScalarEvolution::isLoopGuardedByCond(const Loop *L, /// getBECount - Subtract the end and start values and divide by the step, /// rounding up, to get the number of times the backedge is executed. Return /// CouldNotCompute if an intermediate computation overflows. -SCEVHandle ScalarEvolution::getBECount(const SCEVHandle &Start, - const SCEVHandle &End, - const SCEVHandle &Step) { +const SCEV* ScalarEvolution::getBECount(const SCEV* Start, + const SCEV* End, + const SCEV* Step) { const Type *Ty = Start->getType(); - SCEVHandle NegOne = getIntegerSCEV(-1, Ty); - SCEVHandle Diff = getMinusSCEV(End, Start); - SCEVHandle RoundUp = getAddExpr(Step, NegOne); + const SCEV* NegOne = getIntegerSCEV(-1, Ty); + const SCEV* Diff = getMinusSCEV(End, Start); + const SCEV* RoundUp = getAddExpr(Step, NegOne); // Add an adjustment to the difference between End and Start so that // the division will effectively round up. - SCEVHandle Add = getAddExpr(Diff, RoundUp); + const SCEV* Add = getAddExpr(Diff, RoundUp); // Check Add for unsigned overflow. // TODO: More sophisticated things could be done here. const Type *WideTy = IntegerType::get(getTypeSizeInBits(Ty) + 1); - SCEVHandle OperandExtendedAdd = + const SCEV* OperandExtendedAdd = getAddExpr(getZeroExtendExpr(Diff, WideTy), getZeroExtendExpr(RoundUp, WideTy)); if (getZeroExtendExpr(Add, WideTy) != OperandExtendedAdd) @@ -4032,7 +3988,7 @@ HowManyLessThans(const SCEV *LHS, const SCEV *RHS, if (AddRec->isAffine()) { // FORNOW: We only support unit strides. unsigned BitWidth = getTypeSizeInBits(AddRec->getType()); - SCEVHandle Step = AddRec->getStepRecurrence(*this); + const SCEV* Step = AddRec->getStepRecurrence(*this); // TODO: handle non-constant strides. const SCEVConstant *CStep = dyn_cast<SCEVConstant>(Step); @@ -4068,10 +4024,10 @@ HowManyLessThans(const SCEV *LHS, const SCEV *RHS, // treat m-n as signed nor unsigned due to overflow possibility. // First, we get the value of the LHS in the first iteration: n - SCEVHandle Start = AddRec->getOperand(0); + const SCEV* Start = AddRec->getOperand(0); // Determine the minimum constant start value. - SCEVHandle MinStart = isa<SCEVConstant>(Start) ? Start : + const SCEV* MinStart = isa<SCEVConstant>(Start) ? Start : getConstant(isSigned ? APInt::getSignedMinValue(BitWidth) : APInt::getMinValue(BitWidth)); @@ -4079,7 +4035,7 @@ HowManyLessThans(const SCEV *LHS, const SCEV *RHS, // then we know that it will run exactly (m-n)/s times. Otherwise, we // only know that it will execute (max(m,n)-n)/s times. In both cases, // the division must round up. - SCEVHandle End = RHS; + const SCEV* End = RHS; if (!isLoopGuardedByCond(L, isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, getMinusSCEV(Start, Step), RHS)) @@ -4087,7 +4043,7 @@ HowManyLessThans(const SCEV *LHS, const SCEV *RHS, : getUMaxExpr(RHS, Start); // Determine the maximum constant end value. - SCEVHandle MaxEnd = + const SCEV* MaxEnd = isa<SCEVConstant>(End) ? End : getConstant(isSigned ? APInt::getSignedMaxValue(BitWidth) .ashr(GetMinSignBits(End) - 1) : @@ -4096,11 +4052,11 @@ HowManyLessThans(const SCEV *LHS, const SCEV *RHS, // Finally, we subtract these two values and divide, rounding up, to get // the number of times the backedge is executed. - SCEVHandle BECount = getBECount(Start, End, Step); + const SCEV* BECount = getBECount(Start, End, Step); // The maximum backedge count is similar, except using the minimum start // value and the maximum end value. - SCEVHandle MaxBECount = getBECount(MinStart, MaxEnd, Step);; + const SCEV* MaxBECount = getBECount(MinStart, MaxEnd, Step);; return BackedgeTakenInfo(BECount, MaxBECount); } @@ -4113,7 +4069,7 @@ HowManyLessThans(const SCEV *LHS, const SCEV *RHS, /// this is that it returns the first iteration number where the value is not in /// the condition, thus computing the exit count. If the iteration count can't /// be computed, an instance of SCEVCouldNotCompute is returned. -SCEVHandle SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, +const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, ScalarEvolution &SE) const { if (Range.isFullSet()) // Infinite loop. return SE.getCouldNotCompute(); @@ -4121,9 +4077,9 @@ SCEVHandle SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, // If the start is a non-zero constant, shift the range to simplify things. if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart())) if (!SC->getValue()->isZero()) { - SmallVector<SCEVHandle, 4> Operands(op_begin(), op_end()); + SmallVector<const SCEV*, 4> Operands(op_begin(), op_end()); Operands[0] = SE.getIntegerSCEV(0, SC->getType()); - SCEVHandle Shifted = SE.getAddRecExpr(Operands, getLoop()); + const SCEV* Shifted = SE.getAddRecExpr(Operands, getLoop()); if (const SCEVAddRecExpr *ShiftedAddRec = dyn_cast<SCEVAddRecExpr>(Shifted)) return ShiftedAddRec->getNumIterationsInRange( @@ -4182,12 +4138,12 @@ SCEVHandle SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, // quadratic equation to solve it. To do this, we must frame our problem in // terms of figuring out when zero is crossed, instead of when // Range.getUpper() is crossed. - SmallVector<SCEVHandle, 4> NewOps(op_begin(), op_end()); + SmallVector<const SCEV*, 4> NewOps(op_begin(), op_end()); NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper())); - SCEVHandle NewAddRec = SE.getAddRecExpr(NewOps, getLoop()); + const SCEV* NewAddRec = SE.getAddRecExpr(NewOps, getLoop()); // Next, solve the constructed addrec - std::pair<SCEVHandle,SCEVHandle> Roots = + std::pair<const SCEV*,const SCEV*> Roots = SolveQuadraticEquation(cast<SCEVAddRecExpr>(NewAddRec), SE); const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first); const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second); @@ -4293,7 +4249,7 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se) //===----------------------------------------------------------------------===// ScalarEvolution::ScalarEvolution() - : FunctionPass(&ID), CouldNotCompute(new SCEVCouldNotCompute(0)) { + : FunctionPass(&ID), CouldNotCompute(new SCEVCouldNotCompute()) { } bool ScalarEvolution::runOnFunction(Function &F) { @@ -4308,6 +4264,45 @@ void ScalarEvolution::releaseMemory() { BackedgeTakenCounts.clear(); ConstantEvolutionLoopExitValue.clear(); ValuesAtScopes.clear(); + + for (std::map<ConstantInt*, SCEVConstant*>::iterator + I = SCEVConstants.begin(), E = SCEVConstants.end(); I != E; ++I) + delete I->second; + for (std::map<std::pair<const SCEV*, const Type*>, + SCEVTruncateExpr*>::iterator I = SCEVTruncates.begin(), + E = SCEVTruncates.end(); I != E; ++I) + delete I->second; + for (std::map<std::pair<const SCEV*, const Type*>, + SCEVZeroExtendExpr*>::iterator I = SCEVZeroExtends.begin(), + E = SCEVZeroExtends.end(); I != E; ++I) + delete I->second; + for (std::map<std::pair<unsigned, std::vector<const SCEV*> >, + SCEVCommutativeExpr*>::iterator I = SCEVCommExprs.begin(), + E = SCEVCommExprs.end(); I != E; ++I) + delete I->second; + for (std::map<std::pair<const SCEV*, const SCEV*>, SCEVUDivExpr*>::iterator + I = SCEVUDivs.begin(), E = SCEVUDivs.end(); I != E; ++I) + delete I->second; + for (std::map<std::pair<const SCEV*, const Type*>, + SCEVSignExtendExpr*>::iterator I = SCEVSignExtends.begin(), + E = SCEVSignExtends.end(); I != E; ++I) + delete I->second; + for (std::map<std::pair<const Loop *, std::vector<const SCEV*> >, + SCEVAddRecExpr*>::iterator I = SCEVAddRecExprs.begin(), + E = SCEVAddRecExprs.end(); I != E; ++I) + delete I->second; + for (std::map<Value*, SCEVUnknown*>::iterator I = SCEVUnknowns.begin(), + E = SCEVUnknowns.end(); I != E; ++I) + delete I->second; + + SCEVConstants.clear(); + SCEVTruncates.clear(); + SCEVZeroExtends.clear(); + SCEVCommExprs.clear(); + SCEVUDivs.clear(); + SCEVSignExtends.clear(); + SCEVAddRecExprs.clear(); + SCEVUnknowns.clear(); } void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const { @@ -4355,12 +4350,12 @@ void ScalarEvolution::print(raw_ostream &OS, const Module* ) const { if (isSCEVable(I->getType())) { OS << *I; OS << " --> "; - SCEVHandle SV = SE.getSCEV(&*I); + const SCEV* SV = SE.getSCEV(&*I); SV->print(OS); const Loop *L = LI->getLoopFor((*I).getParent()); - SCEVHandle AtUse = SE.getSCEVAtScope(SV, L); + const SCEV* AtUse = SE.getSCEVAtScope(SV, L); if (AtUse != SV) { OS << " --> "; AtUse->print(OS); @@ -4368,7 +4363,7 @@ void ScalarEvolution::print(raw_ostream &OS, const Module* ) const { if (L) { OS << "\t\t" "Exits: "; - SCEVHandle ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop()); + const SCEV* ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop()); if (!ExitValue->isLoopInvariant(L)) { OS << "<<Unknown>>"; } else { diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 2a73c27..c5591d7 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -152,8 +152,8 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, Value *LHS, /// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made /// unnecessary; in its place, just signed-divide Ops[i] by the scale and /// check to see if the divide was folded. -static bool FactorOutConstant(SCEVHandle &S, - SCEVHandle &Remainder, +static bool FactorOutConstant(const SCEV* &S, + const SCEV* &Remainder, const APInt &Factor, ScalarEvolution &SE) { // Everything is divisible by one. @@ -168,7 +168,7 @@ static bool FactorOutConstant(SCEVHandle &S, // the value at this scale. It will be considered for subsequent // smaller scales. if (C->isZero() || !CI->isZero()) { - SCEVHandle Div = SE.getConstant(CI); + const SCEV* Div = SE.getConstant(CI); S = Div; Remainder = SE.getAddExpr(Remainder, @@ -182,8 +182,8 @@ static bool FactorOutConstant(SCEVHandle &S, if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0))) if (!C->getValue()->getValue().srem(Factor)) { - const SmallVectorImpl<SCEVHandle> &MOperands = M->getOperands(); - SmallVector<SCEVHandle, 4> NewMulOps(MOperands.begin(), MOperands.end()); + const SmallVectorImpl<const SCEV*> &MOperands = M->getOperands(); + SmallVector<const SCEV*, 4> NewMulOps(MOperands.begin(), MOperands.end()); NewMulOps[0] = SE.getConstant(C->getValue()->getValue().sdiv(Factor)); S = SE.getMulExpr(NewMulOps); @@ -192,13 +192,13 @@ static bool FactorOutConstant(SCEVHandle &S, // In an AddRec, check if both start and step are divisible. if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) { - SCEVHandle Step = A->getStepRecurrence(SE); - SCEVHandle StepRem = SE.getIntegerSCEV(0, Step->getType()); + const SCEV* Step = A->getStepRecurrence(SE); + const SCEV* StepRem = SE.getIntegerSCEV(0, Step->getType()); if (!FactorOutConstant(Step, StepRem, Factor, SE)) return false; if (!StepRem->isZero()) return false; - SCEVHandle Start = A->getStart(); + const SCEV* Start = A->getStart(); if (!FactorOutConstant(Start, Remainder, Factor, SE)) return false; S = SE.getAddRecExpr(Start, Step, A->getLoop()); @@ -233,14 +233,14 @@ static bool FactorOutConstant(SCEVHandle &S, /// loop-invariant portions of expressions, after considering what /// can be folded using target addressing modes. /// -Value *SCEVExpander::expandAddToGEP(const SCEVHandle *op_begin, - const SCEVHandle *op_end, +Value *SCEVExpander::expandAddToGEP(const SCEV* const *op_begin, + const SCEV* const *op_end, const PointerType *PTy, const Type *Ty, Value *V) { const Type *ElTy = PTy->getElementType(); SmallVector<Value *, 4> GepIndices; - SmallVector<SCEVHandle, 8> Ops(op_begin, op_end); + SmallVector<const SCEV*, 8> Ops(op_begin, op_end); bool AnyNonZeroIndices = false; // Decend down the pointer's type and attempt to convert the other @@ -251,14 +251,14 @@ Value *SCEVExpander::expandAddToGEP(const SCEVHandle *op_begin, for (;;) { APInt ElSize = APInt(SE.getTypeSizeInBits(Ty), ElTy->isSized() ? SE.TD->getTypeAllocSize(ElTy) : 0); - SmallVector<SCEVHandle, 8> NewOps; - SmallVector<SCEVHandle, 8> ScaledOps; + SmallVector<const SCEV*, 8> NewOps; + SmallVector<const SCEV*, 8> ScaledOps; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { // Split AddRecs up into parts as either of the parts may be usable // without the other. if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Ops[i])) if (!A->getStart()->isZero()) { - SCEVHandle Start = A->getStart(); + const SCEV* Start = A->getStart(); Ops.push_back(SE.getAddRecExpr(SE.getIntegerSCEV(0, A->getType()), A->getStepRecurrence(SE), A->getLoop())); @@ -267,8 +267,8 @@ Value *SCEVExpander::expandAddToGEP(const SCEVHandle *op_begin, } // If the scale size is not 0, attempt to factor out a scale. if (ElSize != 0) { - SCEVHandle Op = Ops[i]; - SCEVHandle Remainder = SE.getIntegerSCEV(0, Op->getType()); + const SCEV* Op = Ops[i]; + const SCEV* Remainder = SE.getIntegerSCEV(0, Op->getType()); if (FactorOutConstant(Op, Remainder, ElSize, SE)) { ScaledOps.push_back(Op); // Op now has ElSize factored out. NewOps.push_back(Remainder); @@ -364,7 +364,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { // comments on expandAddToGEP for details. if (SE.TD) if (const PointerType *PTy = dyn_cast<PointerType>(V->getType())) { - const SmallVectorImpl<SCEVHandle> &Ops = S->getOperands(); + const SmallVectorImpl<const SCEV*> &Ops = S->getOperands(); return expandAddToGEP(&Ops[0], &Ops[Ops.size() - 1], PTy, Ty, V); } @@ -420,7 +420,7 @@ Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) { /// Move parts of Base into Rest to leave Base with the minimal /// expression that provides a pointer operand suitable for a /// GEP expansion. -static void ExposePointerBase(SCEVHandle &Base, SCEVHandle &Rest, +static void ExposePointerBase(const SCEV* &Base, const SCEV* &Rest, ScalarEvolution &SE) { while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Base)) { Base = A->getStart(); @@ -431,7 +431,7 @@ static void ExposePointerBase(SCEVHandle &Base, SCEVHandle &Rest, } if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) { Base = A->getOperand(A->getNumOperands()-1); - SmallVector<SCEVHandle, 8> NewAddOps(A->op_begin(), A->op_end()); + SmallVector<const SCEV*, 8> NewAddOps(A->op_begin(), A->op_end()); NewAddOps.back() = Rest; Rest = SE.getAddExpr(NewAddOps); ExposePointerBase(Base, Rest, SE); @@ -455,9 +455,9 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { if (CanonicalIV && SE.getTypeSizeInBits(CanonicalIV->getType()) > SE.getTypeSizeInBits(Ty)) { - SCEVHandle Start = SE.getAnyExtendExpr(S->getStart(), + const SCEV* Start = SE.getAnyExtendExpr(S->getStart(), CanonicalIV->getType()); - SCEVHandle Step = SE.getAnyExtendExpr(S->getStepRecurrence(SE), + const SCEV* Step = SE.getAnyExtendExpr(S->getStepRecurrence(SE), CanonicalIV->getType()); Value *V = expand(SE.getAddRecExpr(Start, Step, S->getLoop())); BasicBlock::iterator SaveInsertPt = getInsertionPoint(); @@ -472,16 +472,16 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { // {X,+,F} --> X + {0,+,F} if (!S->getStart()->isZero()) { - const SmallVectorImpl<SCEVHandle> &SOperands = S->getOperands(); - SmallVector<SCEVHandle, 4> NewOps(SOperands.begin(), SOperands.end()); + const SmallVectorImpl<const SCEV*> &SOperands = S->getOperands(); + SmallVector<const SCEV*, 4> NewOps(SOperands.begin(), SOperands.end()); NewOps[0] = SE.getIntegerSCEV(0, Ty); - SCEVHandle Rest = SE.getAddRecExpr(NewOps, L); + const SCEV* Rest = SE.getAddRecExpr(NewOps, L); // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the // comments on expandAddToGEP for details. if (SE.TD) { - SCEVHandle Base = S->getStart(); - SCEVHandle RestArray[1] = { Rest }; + const SCEV* Base = S->getStart(); + const SCEV* RestArray[1] = { Rest }; // Dig into the expression to find the pointer base for a GEP. ExposePointerBase(Base, RestArray[0], SE); // If we found a pointer, expand the AddRec with a GEP. @@ -581,20 +581,20 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { // folders, then expandCodeFor the closed form. This allows the folders to // simplify the expression without having to build a bunch of special code // into this folder. - SCEVHandle IH = SE.getUnknown(I); // Get I as a "symbolic" SCEV. + const SCEV* IH = SE.getUnknown(I); // Get I as a "symbolic" SCEV. // Promote S up to the canonical IV type, if the cast is foldable. - SCEVHandle NewS = S; - SCEVHandle Ext = SE.getNoopOrAnyExtend(S, I->getType()); + const SCEV* NewS = S; + const SCEV* Ext = SE.getNoopOrAnyExtend(S, I->getType()); if (isa<SCEVAddRecExpr>(Ext)) NewS = Ext; - SCEVHandle V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE); + const SCEV* V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE); //cerr << "Evaluated: " << *this << "\n to: " << *V << "\n"; // Truncate the result down to the original type, if needed. - SCEVHandle T = SE.getTruncateOrNoop(V, Ty); - return expand(V); + const SCEV* T = SE.getTruncateOrNoop(V, Ty); + return expand(T); } Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) { @@ -654,7 +654,7 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { return LHS; } -Value *SCEVExpander::expandCodeFor(SCEVHandle SH, const Type *Ty) { +Value *SCEVExpander::expandCodeFor(const SCEV* SH, const Type *Ty) { // Expand the code for this SCEV. Value *V = expand(SH); if (Ty) { @@ -667,7 +667,7 @@ Value *SCEVExpander::expandCodeFor(SCEVHandle SH, const Type *Ty) { Value *SCEVExpander::expand(const SCEV *S) { // Check to see if we already expanded this. - std::map<SCEVHandle, AssertingVH<Value> >::iterator I = + std::map<const SCEV*, AssertingVH<Value> >::iterator I = InsertedExpressions.find(S); if (I != InsertedExpressions.end()) return I->second; @@ -685,7 +685,7 @@ Value * SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, const Type *Ty) { assert(Ty->isInteger() && "Can only insert integer induction variables!"); - SCEVHandle H = SE.getAddRecExpr(SE.getIntegerSCEV(0, Ty), + const SCEV* H = SE.getAddRecExpr(SE.getIntegerSCEV(0, Ty), SE.getIntegerSCEV(1, Ty), L); return expand(H); } diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 17ffa2d..7509e91 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -624,8 +624,12 @@ bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, /// 'Op' must have a scalar integer type. /// unsigned llvm::ComputeNumSignBits(Value *V, TargetData *TD, unsigned Depth) { + assert((TD || V->getType()->isIntOrIntVector()) && + "ComputeNumSignBits requires a TargetData object to operate " + "on non-integer values!"); const Type *Ty = V->getType(); - unsigned TyBits = Ty->getScalarSizeInBits(); + unsigned TyBits = TD ? TD->getTypeSizeInBits(V->getType()->getScalarType()) : + Ty->getScalarSizeInBits(); unsigned Tmp, Tmp2; unsigned FirstAnswer = 1; diff --git a/lib/CodeGen/ELF.h b/lib/CodeGen/ELF.h index 796bc2c..28b6be8 100644 --- a/lib/CodeGen/ELF.h +++ b/lib/CodeGen/ELF.h @@ -128,7 +128,13 @@ namespace llvm { /// added to logical symbol table for the module. This is eventually /// turned into a real symbol table in the file. struct ELFSym { - const GlobalValue *GV; // The global value this corresponds to. + // The global value this corresponds to. Global symbols can be on of the + // 3 types : if this symbol has a zero initializer, it is common or should + // be placed in bss section otherwise it's a constant. + const GlobalValue *GV; + bool IsCommon; + bool IsBss; + bool IsConstant; // ELF specific fields unsigned NameIdx; // Index in .strtab of name, once emitted. @@ -159,8 +165,9 @@ namespace llvm { STV_PROTECTED = 3 // Visible in other components but not preemptable }; - ELFSym(const GlobalValue *gv) : GV(gv), NameIdx(0), Value(0), - Size(0), Info(0), Other(0), + ELFSym(const GlobalValue *gv) : GV(gv), IsCommon(false), IsBss(false), + IsConstant(false), NameIdx(0), Value(0), + Size(0), Info(0), Other(STV_DEFAULT), SectionIdx(ELFSection::SHN_UNDEF) { if (!GV) return; @@ -180,16 +187,47 @@ namespace llvm { } } - void SetBind(unsigned X) { + unsigned getBind() { + return (Info >> 4) & 0xf; + } + + void setBind(unsigned X) { assert(X == (X & 0xF) && "Bind value out of range!"); Info = (Info & 0x0F) | (X << 4); } - void SetType(unsigned X) { + void setType(unsigned X) { assert(X == (X & 0xF) && "Type value out of range!"); Info = (Info & 0xF0) | X; } }; + /// ELFRelocation - This class contains all the information necessary to + /// to generate any 32-bit or 64-bit ELF relocation entry. + class ELFRelocation { + uint64_t r_offset; // offset in the section of the object this applies to + uint32_t r_symidx; // symbol table index of the symbol to use + uint32_t r_type; // machine specific relocation type + int64_t r_add; // explicit relocation addend + bool r_rela; // if true then the addend is part of the entry + // otherwise the addend is at the location specified + // by r_offset + public: + uint64_t getInfo(bool is64Bit) const { + if (is64Bit) + return ((uint64_t)r_symidx << 32) + ((uint64_t)r_type & 0xFFFFFFFFL); + else + return (r_symidx << 8) + (r_type & 0xFFL); + } + + uint64_t getOffset() const { return r_offset; } + int64_t getAddend() const { return r_add; } + + ELFRelocation(uint64_t off, uint32_t sym, uint32_t type, + bool rela = true, int64_t addend = 0) : + r_offset(off), r_symidx(sym), r_type(type), + r_add(addend), r_rela(rela) {} + }; + } // end namespace llvm #endif diff --git a/lib/CodeGen/ELFCodeEmitter.cpp b/lib/CodeGen/ELFCodeEmitter.cpp index ca68396..8cb7c94 100644 --- a/lib/CodeGen/ELFCodeEmitter.cpp +++ b/lib/CodeGen/ELFCodeEmitter.cpp @@ -71,39 +71,38 @@ bool ELFCodeEmitter::finishFunction(MachineFunction &MF) { // Update Section Size ES->Size = CurBufferPtr - BufferBegin; + // Set the symbol type as a function + FnSym.setType(ELFSym::STT_FUNC); + FnSym.SectionIdx = ES->SectionIdx; + FnSym.Size = CurBufferPtr-FnStartPtr; + + // Offset from start of Section + FnSym.Value = FnStartPtr-BufferBegin; + // Figure out the binding (linkage) of the symbol. switch (MF.getFunction()->getLinkage()) { default: // appending linkage is illegal for functions. assert(0 && "Unknown linkage type!"); case GlobalValue::ExternalLinkage: - FnSym.SetBind(ELFSym::STB_GLOBAL); + FnSym.setBind(ELFSym::STB_GLOBAL); + EW.SymbolList.push_back(FnSym); break; case GlobalValue::LinkOnceAnyLinkage: case GlobalValue::LinkOnceODRLinkage: case GlobalValue::WeakAnyLinkage: case GlobalValue::WeakODRLinkage: - FnSym.SetBind(ELFSym::STB_WEAK); + FnSym.setBind(ELFSym::STB_WEAK); + EW.SymbolList.push_back(FnSym); break; case GlobalValue::PrivateLinkage: assert (0 && "PrivateLinkage should not be in the symbol table."); case GlobalValue::InternalLinkage: - FnSym.SetBind(ELFSym::STB_LOCAL); + FnSym.setBind(ELFSym::STB_LOCAL); + EW.SymbolList.push_front(FnSym); break; } - // Set the symbol type as a function - FnSym.SetType(ELFSym::STT_FUNC); - - FnSym.SectionIdx = ES->SectionIdx; - FnSym.Size = CurBufferPtr-FnStartPtr; - - // Offset from start of Section - FnSym.Value = FnStartPtr-BufferBegin; - - // Finally, add it to the symtab. - EW.SymbolList.push_back(FnSym); - // Relocations // ----------- // If we have emitted any relocations to function-specific objects such as @@ -113,7 +112,6 @@ bool ELFCodeEmitter::finishFunction(MachineFunction &MF) { for (unsigned i = 0, e = Relocations.size(); i != e; ++i) { MachineRelocation &MR = Relocations[i]; intptr_t Addr; - if (MR.isBasicBlock()) { Addr = getMachineBasicBlockAddress(MR.getBasicBlock()); MR.setConstantVal(ES->SectionIdx); diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp index aeccefb..03db656 100644 --- a/lib/CodeGen/ELFWriter.cpp +++ b/lib/CodeGen/ELFWriter.cpp @@ -136,104 +136,40 @@ bool ELFWriter::doInitialization(Module &M) { ElfHdr.emitWord16(0); // Placeholder // Add the null section, which is required to be first in the file. - getSection("", ELFSection::SHT_NULL, 0); - - // Start up the symbol table. The first entry in the symtab is the null - // entry. - SymbolList.push_back(ELFSym(0)); + getNullSection(); return false; } -void ELFWriter::EmitGlobal(GlobalVariable *GV) { +unsigned ELFWriter::getGlobalELFLinkage(const GlobalVariable *GV) { + if (GV->hasInternalLinkage()) + return ELFSym::STB_LOCAL; + + if (GV->hasWeakLinkage()) + return ELFSym::STB_WEAK; - // XXX: put local symbols *before* global ones! + return ELFSym::STB_GLOBAL; +} + +// For global symbols without a section, return the Null section as a +// placeholder +ELFSection &ELFWriter::getGlobalSymELFSection(const GlobalVariable *GV, + ELFSym &Sym) { const Section *S = TAI->SectionForGlobal(GV); + unsigned Flags = S->getFlags(); + unsigned SectionType = ELFSection::SHT_PROGBITS; + unsigned SHdrFlags = ELFSection::SHF_ALLOC; DOUT << "Section " << S->getName() << " for global " << GV->getName() << "\n"; - // If this is an external global, emit it now. TODO: Note that it would be - // better to ignore the symbol here and only add it to the symbol table if - // referenced. + // If this is an external global, the symbol does not have a section. if (!GV->hasInitializer()) { - ELFSym ExternalSym(GV); - ExternalSym.SetBind(ELFSym::STB_GLOBAL); - ExternalSym.SetType(ELFSym::STT_NOTYPE); - ExternalSym.SectionIdx = ELFSection::SHN_UNDEF; - SymbolList.push_back(ExternalSym); - return; + Sym.SectionIdx = ELFSection::SHN_UNDEF; + return getNullSection(); } const TargetData *TD = TM.getTargetData(); unsigned Align = TD->getPreferredAlignment(GV); Constant *CV = GV->getInitializer(); - unsigned Size = TD->getTypeAllocSize(CV->getType()); - - // If this global has a zero initializer, go to .bss or common section. - if (CV->isNullValue() || isa<UndefValue>(CV)) { - // If this global is part of the common block, add it now. Variables are - // part of the common block if they are zero initialized and allowed to be - // merged with other symbols. - if (GV->hasLinkOnceLinkage() || GV->hasWeakLinkage() || - GV->hasCommonLinkage()) { - ELFSym CommonSym(GV); - // Value for common symbols is the alignment required. - CommonSym.Value = Align; - CommonSym.Size = Size; - CommonSym.SetBind(ELFSym::STB_GLOBAL); - CommonSym.SetType(ELFSym::STT_OBJECT); - CommonSym.SectionIdx = ELFSection::SHN_COMMON; - SymbolList.push_back(CommonSym); - getSection(S->getName(), ELFSection::SHT_NOBITS, - ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC, 1); - return; - } - - // Otherwise, this symbol is part of the .bss section. Emit it now. - // Handle alignment. Ensure section is aligned at least as much as required - // by this symbol. - ELFSection &BSSSection = getBSSSection(); - BSSSection.Align = std::max(BSSSection.Align, Align); - - // Within the section, emit enough virtual padding to get us to an alignment - // boundary. - if (Align) - BSSSection.Size = (BSSSection.Size + Align - 1) & ~(Align-1); - - ELFSym BSSSym(GV); - BSSSym.Value = BSSSection.Size; - BSSSym.Size = Size; - BSSSym.SetType(ELFSym::STT_OBJECT); - - switch (GV->getLinkage()) { - default: // weak/linkonce/common handled above - assert(0 && "Unexpected linkage type!"); - case GlobalValue::AppendingLinkage: // FIXME: This should be improved! - case GlobalValue::ExternalLinkage: - BSSSym.SetBind(ELFSym::STB_GLOBAL); - break; - case GlobalValue::InternalLinkage: - BSSSym.SetBind(ELFSym::STB_LOCAL); - break; - } - - // Set the idx of the .bss section - BSSSym.SectionIdx = BSSSection.SectionIdx; - if (!GV->hasPrivateLinkage()) - SymbolList.push_back(BSSSym); - - // Reserve space in the .bss section for this symbol. - BSSSection.Size += Size; - return; - } - - /// Emit the Global symbol to the right ELF section - ELFSym GblSym(GV); - GblSym.Size = Size; - GblSym.SetType(ELFSym::STT_OBJECT); - GblSym.SetBind(ELFSym::STB_GLOBAL); - unsigned Flags = S->getFlags(); - unsigned SectType = ELFSection::SHT_PROGBITS; - unsigned SHdrFlags = ELFSection::SHF_ALLOC; if (Flags & SectionFlags::Code) SHdrFlags |= ELFSection::SHF_EXECINSTR; @@ -246,29 +182,81 @@ void ELFWriter::EmitGlobal(GlobalVariable *GV) { if (Flags & SectionFlags::Strings) SHdrFlags |= ELFSection::SHF_STRINGS; - // Remove tab from section name prefix - std::string SectionName(S->getName()); - size_t Pos = SectionName.find("\t"); - if (Pos != std::string::npos) - SectionName.erase(Pos, 1); - - // The section alignment should be bound to the element with - // the largest alignment - ELFSection &ElfS = getSection(SectionName, SectType, SHdrFlags); - GblSym.SectionIdx = ElfS.SectionIdx; - if (Align > ElfS.Align) - ElfS.Align = Align; - - // S.Value should contain the symbol index inside the section, - // and all symbols should start on their required alignment boundary - GblSym.Value = (ElfS.size() + (Align-1)) & (-Align); - ElfS.emitAlignment(Align); - - // Emit the constant symbol to its section - EmitGlobalConstant(CV, ElfS); + // If this global has a zero initializer, go to .bss or common section. + // Variables are part of the common block if they are zero initialized + // and allowed to be merged with other symbols. + if (CV->isNullValue() || isa<UndefValue>(CV)) { + SectionType = ELFSection::SHT_NOBITS; + ELFSection &ElfS = getSection(S->getName(), SectionType, SHdrFlags); + if (GV->hasLinkOnceLinkage() || GV->hasWeakLinkage() || + GV->hasCommonLinkage()) { + Sym.SectionIdx = ELFSection::SHN_COMMON; + Sym.IsCommon = true; + ElfS.Align = 1; + return ElfS; + } + Sym.IsBss = true; + Sym.SectionIdx = ElfS.SectionIdx; + if (Align) ElfS.Size = (ElfS.Size + Align-1) & ~(Align-1); + ElfS.Align = std::max(ElfS.Align, Align); + return ElfS; + } + + Sym.IsConstant = true; + ELFSection &ElfS = getSection(S->getName(), SectionType, SHdrFlags); + Sym.SectionIdx = ElfS.SectionIdx; + ElfS.Align = std::max(ElfS.Align, Align); + return ElfS; +} + +void ELFWriter::EmitFunctionDeclaration(const Function *F) { + ELFSym GblSym(F); + GblSym.setBind(ELFSym::STB_GLOBAL); + GblSym.setType(ELFSym::STT_NOTYPE); + GblSym.SectionIdx = ELFSection::SHN_UNDEF; SymbolList.push_back(GblSym); } +void ELFWriter::EmitGlobalVar(const GlobalVariable *GV) { + unsigned SymBind = getGlobalELFLinkage(GV); + unsigned Align=0, Size=0; + ELFSym GblSym(GV); + GblSym.setBind(SymBind); + + if (GV->hasInitializer()) { + GblSym.setType(ELFSym::STT_OBJECT); + const TargetData *TD = TM.getTargetData(); + Align = TD->getPreferredAlignment(GV); + Size = TD->getTypeAllocSize(GV->getInitializer()->getType()); + GblSym.Size = Size; + } else { + GblSym.setType(ELFSym::STT_NOTYPE); + } + + ELFSection &GblSection = getGlobalSymELFSection(GV, GblSym); + + if (GblSym.IsCommon) { + GblSym.Value = Align; + } else if (GblSym.IsBss) { + GblSym.Value = GblSection.Size; + GblSection.Size += Size; + } else if (GblSym.IsConstant){ + // GblSym.Value should contain the symbol index inside the section, + // and all symbols should start on their required alignment boundary + GblSym.Value = (GblSection.size() + (Align-1)) & (-Align); + GblSection.emitAlignment(Align); + EmitGlobalConstant(GV->getInitializer(), GblSection); + } + + // Local symbols should come first on the symbol table. + if (!GV->hasPrivateLinkage()) { + if (SymBind == ELFSym::STB_LOCAL) + SymbolList.push_front(GblSym); + else + SymbolList.push_back(GblSym); + } +} + void ELFWriter::EmitGlobalConstantStruct(const ConstantStruct *CVS, ELFSection &GblS) { @@ -306,6 +294,7 @@ void ELFWriter::EmitGlobalConstant(const Constant *CV, ELFSection &GblS) { if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) { if (CVA->isString()) { std::string GblStr = CVA->getAsString(); + GblStr.resize(GblStr.size()-1); GblS.emitString(GblStr); } else { // Not a string. Print the values in successive locations for (unsigned i = 0, e = CVA->getNumOperands(); i != e; ++i) @@ -370,13 +359,39 @@ bool ELFWriter::doFinalization(Module &M) { // Build and emit data, bss and "common" sections. for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) - EmitGlobal(I); + I != E; ++I) { + EmitGlobalVar(I); + GblSymLookup[I] = 0; + } + + // Emit all pending globals + // TODO: this should be done only for referenced symbols + for (SetVector<GlobalValue*>::const_iterator I = PendingGlobals.begin(), + E = PendingGlobals.end(); I != E; ++I) { + + // No need to emit the symbol again + if (GblSymLookup.find(*I) != GblSymLookup.end()) + continue; + + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(*I)) { + EmitGlobalVar(GV); + } else if (Function *F = dyn_cast<Function>(*I)) { + // If function is not in GblSymLookup, it doesn't have a body, + // so emit the symbol as a function declaration (no section associated) + EmitFunctionDeclaration(F); + } else { + assert("unknown howto handle pending global"); + } + GblSymLookup[*I] = 0; + } // Emit non-executable stack note if (TAI->getNonexecutableStackDirective()) getNonExecStackSection(); + // Emit string table + EmitStringTable(); + // Emit the symbol table now, if non-empty. EmitSymbolTable(); @@ -400,6 +415,67 @@ bool ELFWriter::doFinalization(Module &M) { /// EmitRelocations - Emit relocations void ELFWriter::EmitRelocations() { + + // Create Relocation sections for each section which needs it. + for (std::list<ELFSection>::iterator I = SectionList.begin(), + E = SectionList.end(); I != E; ++I) { + + // This section does not have relocations + if (!I->hasRelocations()) continue; + + // Get the relocation section for section 'I' + bool HasRelA = TEW->hasRelocationAddend(); + ELFSection &RelSec = getRelocSection(I->getName(), HasRelA); + + // 'Link' - Section hdr idx of the associated symbol table + // 'Info' - Section hdr idx of the section to which the relocation applies + ELFSection &SymTab = getSymbolTableSection(); + RelSec.Link = SymTab.SectionIdx; + RelSec.Info = I->SectionIdx; + RelSec.EntSize = TEW->getRelocationEntrySize(); + + // Get the relocations from Section + std::vector<MachineRelocation> Relos = I->getRelocations(); + for (std::vector<MachineRelocation>::iterator MRI = Relos.begin(), + MRE = Relos.end(); MRI != MRE; ++MRI) { + MachineRelocation &MR = *MRI; + + // Offset from the start of the section containing the symbol + unsigned Offset = MR.getMachineCodeOffset(); + + // Symbol index in the symbol table + unsigned SymIdx = 0; + + // Target specific ELF relocation type + unsigned RelType = TEW->getRelocationType(MR.getRelocationType()); + + // Constant addend used to compute the value to be stored + // into the relocatable field + int64_t Addend = TEW->getAddendForRelTy(RelType); + + // There are several machine relocations types, and each one of + // them needs a different approach to retrieve the symbol table index. + if (MR.isGlobalValue()) { + const GlobalValue *G = MR.getGlobalValue(); + SymIdx = GblSymLookup[G]; + } else { + assert(0 && "dunno how to handle other relocation types"); + } + + // Get the relocation entry and emit to the relocation section + ELFRelocation Rel(Offset, SymIdx, RelType, HasRelA, Addend); + EmitRelocation(RelSec, Rel, HasRelA); + } + } +} + +/// EmitRelocation - Write relocation 'Rel' to the relocation section 'Rel' +void ELFWriter::EmitRelocation(BinaryObject &RelSec, ELFRelocation &Rel, + bool HasRelA) { + RelSec.emitWord(Rel.getOffset()); + RelSec.emitWord(Rel.getInfo(is64Bit)); + if (HasRelA) + RelSec.emitWord(Rel.getAddend()); } /// EmitSymbol - Write symbol 'Sym' to the symbol table 'SymbolTable' @@ -448,28 +524,28 @@ void ELFWriter::EmitSectionHeader(BinaryObject &SHdrTab, } } -/// EmitSymbolTable - If the current symbol table is non-empty, emit the string -/// table for it and then the symbol table itself. -void ELFWriter::EmitSymbolTable() { - if (SymbolList.size() == 1) return; // Only the null entry. - - // FIXME: compact all local symbols to the start of the symtab. - unsigned FirstNonLocalSymbol = 1; - +/// EmitStringTable - If the current symbol table is non-empty, emit the string +/// table for it +void ELFWriter::EmitStringTable() { + if (!SymbolList.size()) return; // Empty symbol table. ELFSection &StrTab = getStringTableSection(); // Set the zero'th symbol to a null byte, as required. StrTab.emitByte(0); + // Walk on the symbol list and write symbol names into the + // string table. unsigned Index = 1; - for (unsigned i = 1, e = SymbolList.size(); i != e; ++i) { + for (std::list<ELFSym>::iterator I = SymbolList.begin(), + E = SymbolList.end(); I != E; ++I) { + // Use the name mangler to uniquify the LLVM symbol. - std::string Name = Mang->getValueName(SymbolList[i].GV); + std::string Name = Mang->getValueName(I->GV); if (Name.empty()) { - SymbolList[i].NameIdx = 0; + I->NameIdx = 0; } else { - SymbolList[i].NameIdx = Index; + I->NameIdx = Index; StrTab.emitString(Name); // Keep track of the number of bytes emitted to this section. @@ -478,20 +554,45 @@ void ELFWriter::EmitSymbolTable() { } assert(Index == StrTab.size()); StrTab.Size = Index; +} + +/// EmitSymbolTable - Emit the symbol table itself. +void ELFWriter::EmitSymbolTable() { + if (!SymbolList.size()) return; // Empty symbol table. + unsigned FirstNonLocalSymbol = 1; // Now that we have emitted the string table and know the offset into the // string table of each symbol, emit the symbol table itself. ELFSection &SymTab = getSymbolTableSection(); - SymTab.Align = TEW->getSymTabAlignment(); - SymTab.Link = StrTab.SectionIdx; // Section Index of .strtab. - SymTab.Info = FirstNonLocalSymbol; // First non-STB_LOCAL symbol. + SymTab.Align = TEW->getPrefELFAlignment(); + + // Section Index of .strtab. + SymTab.Link = getStringTableSection().SectionIdx; // Size of each symtab entry. SymTab.EntSize = TEW->getSymTabEntrySize(); - for (unsigned i = 0, e = SymbolList.size(); i != e; ++i) - EmitSymbol(SymTab, SymbolList[i]); + // The first entry in the symtab is the null symbol + ELFSym NullSym = ELFSym(0); + EmitSymbol(SymTab, NullSym); + + // Emit all the symbols to the symbol table. Skip the null + // symbol, cause it's emitted already + unsigned Index = 1; + for (std::list<ELFSym>::iterator I = SymbolList.begin(), + E = SymbolList.end(); I != E; ++I, ++Index) { + // Keep track of the first non-local symbol + if (I->getBind() == ELFSym::STB_LOCAL) + FirstNonLocalSymbol++; + + // Emit symbol to the symbol table + EmitSymbol(SymTab, *I); + // Record the symbol table index for each global value + GblSymLookup[I->GV] = Index; + } + + SymTab.Info = FirstNonLocalSymbol; SymTab.Size = SymTab.size(); } @@ -500,7 +601,7 @@ void ELFWriter::EmitSymbolTable() { /// section names. void ELFWriter::EmitSectionTableStringTable() { // First step: add the section for the string table to the list of sections: - ELFSection &SHStrTab = getSection(".shstrtab", ELFSection::SHT_STRTAB, 0); + ELFSection &SHStrTab = getSectionHeaderStringTableSection(); // Now that we know which section number is the .shstrtab section, update the // e_shstrndx entry in the ELF header. @@ -559,7 +660,7 @@ void ELFWriter::OutputSectionsAndSectionTable() { } // Align Section Header. - unsigned TableAlign = is64Bit ? 8 : 4; + unsigned TableAlign = TEW->getPrefELFAlignment(); FileOff = (FileOff+TableAlign-1) & ~(TableAlign-1); // Now that we know where all of the sections will be emitted, set the e_shnum @@ -586,13 +687,12 @@ void ELFWriter::OutputSectionsAndSectionTable() { << ", SectionData Size: " << S.size() << "\n"; // Align FileOff to whatever the alignment restrictions of the section are. - if (S.Align) { - for (size_t NewFileOff = (FileOff+S.Align-1) & ~(S.Align-1); - FileOff != NewFileOff; ++FileOff) - O << (char)0xAB; - } - if (S.size()) { + if (S.Align) { + for (size_t NewFileOff = (FileOff+S.Align-1) & ~(S.Align-1); + FileOff != NewFileOff; ++FileOff) + O << (char)0xAB; + } O.write((char *)&S.getData()[0], S.Size); FileOff += S.Size; } diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h index 8a380f0..39577d9 100644 --- a/lib/CodeGen/ELFWriter.h +++ b/lib/CodeGen/ELFWriter.h @@ -16,7 +16,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/Support/OutputBuffer.h" +#include "llvm/Support/Debug.h" #include "llvm/Target/TargetAsmInfo.h" #include "llvm/Target/TargetELFWriterInfo.h" #include "ELF.h" @@ -89,7 +89,7 @@ namespace llvm { bool doFinalization(Module &M); private: - // Blob containing the Elf header + /// Blob containing the Elf header BinaryObject ElfHdr; /// SectionList - This is the list of sections that we have emitted to the @@ -102,14 +102,35 @@ namespace llvm { /// the SectionList. std::map<std::string, ELFSection*> SectionLookup; + /// GblSymLookup - This is a mapping from global value to a symbol index + /// in the symbol table. This is useful since relocations symbol references + /// must be quickly mapped to a symbol table index + std::map<const GlobalValue*, uint32_t> GblSymLookup; + + /// SymbolList - This is the list of symbols emitted to the symbol table + /// Local symbols go to the front and Globals to the back. + std::list<ELFSym> SymbolList; + + /// PendingGlobals - List of externally defined symbols that we have been + /// asked to emit, but have not seen a reference to. When a reference + /// is seen, the symbol will move from this list to the SymbolList. + SetVector<GlobalValue*> PendingGlobals; + /// getSection - Return the section with the specified name, creating a new /// section if one does not already exist. - ELFSection &getSection(const std::string &Name, unsigned Type, + ELFSection &getSection(const std::string &Name, unsigned Type, unsigned Flags = 0, unsigned Align = 0) { ELFSection *&SN = SectionLookup[Name]; if (SN) return *SN; - SectionList.push_back(ELFSection(Name, isLittleEndian, is64Bit)); + // Remove tab from section name prefix. This is necessary becase TAI + // sometimes return a section name prefixed with a "\t" char. + std::string SectionName(Name); + size_t Pos = SectionName.find("\t"); + if (Pos != std::string::npos) + SectionName.erase(Pos, 1); + + SectionList.push_back(ELFSection(SectionName, isLittleEndian, is64Bit)); SN = &SectionList.back(); SN->SectionIdx = NumSections++; SN->Type = Type; @@ -119,11 +140,25 @@ namespace llvm { return *SN; } + /// TODO: support mangled names here to emit the right .text section + /// for c++ object files. ELFSection &getTextSection() { return getSection(".text", ELFSection::SHT_PROGBITS, ELFSection::SHF_EXECINSTR | ELFSection::SHF_ALLOC); } + /// Return the relocation section of section 'S'. 'RelA' is true + /// if the relocation section contains entries with addends. + ELFSection &getRelocSection(std::string SName, bool RelA) { + std::string RelSName(".rel"); + unsigned SHdrTy = RelA ? ELFSection::SHT_RELA : ELFSection::SHT_REL; + + if (RelA) RelSName.append("a"); + RelSName.append(SName); + + return getSection(RelSName, SHdrTy, 0, TEW->getPrefELFAlignment()); + } + ELFSection &getNonExecStackSection() { return getSection(".note.GNU-stack", ELFSection::SHT_PROGBITS, 0, 1); } @@ -136,25 +171,23 @@ namespace llvm { return getSection(".strtab", ELFSection::SHT_STRTAB, 0, 1); } + ELFSection &getSectionHeaderStringTableSection() { + return getSection(".shstrtab", ELFSection::SHT_STRTAB, 0, 1); + } + ELFSection &getDataSection() { return getSection(".data", ELFSection::SHT_PROGBITS, - ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC); + ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC, 4); } ELFSection &getBSSSection() { return getSection(".bss", ELFSection::SHT_NOBITS, - ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC); + ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC, 4); } - /// SymbolList - This is the list of symbols we have emitted to the file. - /// This actually gets rearranged before emission to the file (to put the - /// local symbols first in the list). - std::vector<ELFSym> SymbolList; - - /// PendingGlobals - List of externally defined symbols that we have been - /// asked to emit, but have not seen a reference to. When a reference - /// is seen, the symbol will move from this list to the SymbolList. - SetVector<GlobalValue*> PendingGlobals; + ELFSection &getNullSection() { + return getSection("", ELFSection::SHT_NULL, 0); + } // As we complete the ELF file, we need to update fields in the ELF header // (e.g. the location of the section table). These members keep track of @@ -165,15 +198,20 @@ namespace llvm { unsigned ELFHdr_e_shnum_Offset; // e_shnum in ELF header. private: - void EmitGlobal(GlobalVariable *GV); + void EmitFunctionDeclaration(const Function *F); + void EmitGlobalVar(const GlobalVariable *GV); void EmitGlobalConstant(const Constant *C, ELFSection &GblS); void EmitGlobalConstantStruct(const ConstantStruct *CVS, ELFSection &GblS); + unsigned getGlobalELFLinkage(const GlobalVariable *GV); + ELFSection &getGlobalSymELFSection(const GlobalVariable *GV, ELFSym &Sym); void EmitRelocations(); + void EmitRelocation(BinaryObject &RelSec, ELFRelocation &Rel, bool HasRelA); void EmitSectionHeader(BinaryObject &SHdrTab, const ELFSection &SHdr); void EmitSectionTableStringTable(); void EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym); void EmitSymbolTable(); + void EmitStringTable(); void OutputSectionsAndSectionTable(); }; } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index a9adce8..ce01d53 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3121,6 +3121,8 @@ bool MeetsMaxMemopRequirement(std::vector<MVT> &MemOps, VT = (MVT::SimpleValueType)(VT.getSimpleVT() - 1); VTSize = VT.getSizeInBits() / 8; } else { + // This can result in a type that is not legal on the target, e.g. + // 1 or 2 bytes on PPC. VT = (MVT::SimpleValueType)(VT.getSimpleVT() - 1); VTSize >>= 1; } @@ -3177,12 +3179,29 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, getMemBasePlusOffset(Dst, DstOff, DAG), DstSV, DstSVOff + DstOff, false, DstAlign); } else { - Value = DAG.getLoad(VT, dl, Chain, - getMemBasePlusOffset(Src, SrcOff, DAG), - SrcSV, SrcSVOff + SrcOff, false, Align); - Store = DAG.getStore(Chain, dl, Value, - getMemBasePlusOffset(Dst, DstOff, DAG), - DstSV, DstSVOff + DstOff, false, DstAlign); + // The type might not be legal for the target. This should only happen + // if the type is smaller than a legal type, as on PPC, so the right + // thing to do is generate a LoadExt/StoreTrunc pair. + // FIXME does the case above also need this? + if (TLI.isTypeLegal(VT)) { + Value = DAG.getLoad(VT, dl, Chain, + getMemBasePlusOffset(Src, SrcOff, DAG), + SrcSV, SrcSVOff + SrcOff, false, Align); + Store = DAG.getStore(Chain, dl, Value, + getMemBasePlusOffset(Dst, DstOff, DAG), + DstSV, DstSVOff + DstOff, false, DstAlign); + } else { + MVT NVT = VT; + while (!TLI.isTypeLegal(NVT)) { + NVT = (MVT::SimpleValueType(NVT.getSimpleVT() + 1)); + } + Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain, + getMemBasePlusOffset(Src, SrcOff, DAG), + SrcSV, SrcSVOff + SrcOff, VT, false, Align); + Store = DAG.getTruncStore(Chain, dl, Value, + getMemBasePlusOffset(Dst, DstOff, DAG), + DstSV, DstSVOff + DstOff, VT, false, DstAlign); + } } OutChains.push_back(Store); SrcOff += VTSize; diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp index 2034805..c2105e6 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -739,6 +739,9 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, // After updating the operand, check if the machine instruction has // become a copy. If so, update its val# information. + if (JoinedCopies.count(UseMI)) + continue; + const TargetInstrDesc &TID = UseMI->getDesc(); unsigned CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx; if (TID.getNumDefs() == 1 && TID.getNumOperands() > 2 && @@ -749,9 +752,10 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, allocatableRegs_[CopyDstReg])) { LiveInterval &LI = li_->getInterval(CopyDstReg); unsigned DefIdx = li_->getDefIndex(li_->getInstructionIndex(UseMI)); - const LiveRange *DLR = LI.getLiveRangeContaining(DefIdx); - if (DLR->valno->def == DefIdx) - DLR->valno->copy = UseMI; + if (const LiveRange *DLR = LI.getLiveRangeContaining(DefIdx)) { + if (DLR->valno->def == DefIdx) + DLR->valno->copy = UseMI; + } } } } diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp index 160f1ba..b8525a3 100644 --- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp +++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp @@ -27,6 +27,7 @@ #include "llvm/System/DynamicLibrary.h" #include "llvm/Target/TargetData.h" #include "llvm/Support/ManagedStatic.h" +#include "llvm/System/Mutex.h" #include <csignal> #include <cstdio> #include <map> @@ -45,6 +46,8 @@ using namespace llvm; +static ManagedStatic<sys::Mutex> FunctionsLock; + typedef GenericValue (*ExFunc)(const FunctionType *, const std::vector<GenericValue> &); static ManagedStatic<std::map<const Function *, ExFunc> > ExportedFunctions; @@ -94,6 +97,7 @@ static ExFunc lookupFunction(const Function *F) { ExtName += getTypeID(FT->getContainedType(i)); ExtName += "_" + F->getName(); + sys::ScopedLock Writer(&*FunctionsLock); ExFunc FnPtr = FuncNames[ExtName]; if (FnPtr == 0) FnPtr = FuncNames["lle_X_"+F->getName()]; @@ -246,12 +250,16 @@ GenericValue Interpreter::callExternalFunction(Function *F, const std::vector<GenericValue> &ArgVals) { TheInterpreter = this; + FunctionsLock->acquire(); + // Do a lookup to see if the function is in our cache... this should just be a // deferred annotation! std::map<const Function *, ExFunc>::iterator FI = ExportedFunctions->find(F); if (ExFunc Fn = (FI == ExportedFunctions->end()) ? lookupFunction(F) - : FI->second) + : FI->second) { + FunctionsLock->release(); return Fn(F->getFunctionType(), ArgVals); + } #ifdef USE_LIBFFI std::map<const Function *, RawFunc>::iterator RF = RawFunctions->find(F); @@ -264,6 +272,8 @@ GenericValue Interpreter::callExternalFunction(Function *F, } else { RawFn = RF->second; } + + FunctionsLock->release(); GenericValue Result; if (RawFn != 0 && ffiInvoke(RawFn, F, ArgVals, getTargetData(), Result)) @@ -529,6 +539,7 @@ GenericValue lle_X_fprintf(const FunctionType *FT, void Interpreter::initializeExternalFunctions() { + sys::ScopedLock Writer(&*FunctionsLock); FuncNames["lle_X_atexit"] = lle_X_atexit; FuncNames["lle_X_exit"] = lle_X_exit; FuncNames["lle_X_abort"] = lle_X_abort; diff --git a/lib/Support/Annotation.cpp b/lib/Support/Annotation.cpp index 9764b5e..9c3efa3 100644 --- a/lib/Support/Annotation.cpp +++ b/lib/Support/Annotation.cpp @@ -13,6 +13,7 @@ #include "llvm/Support/Annotation.h" #include "llvm/Support/ManagedStatic.h" +#include "llvm/System/RWMutex.h" #include <map> #include <cstring> using namespace llvm; @@ -42,31 +43,33 @@ static unsigned IDCounter = 0; // Unique ID counter // Static member to ensure initialiation on demand. static ManagedStatic<IDMapType> IDMap; +static ManagedStatic<sys::SmartRWMutex<true> > AnnotationsLock; // On demand annotation creation support... typedef Annotation *(*AnnFactory)(AnnotationID, const Annotable *, void *); typedef std::map<unsigned, std::pair<AnnFactory,void*> > FactMapType; -static FactMapType *TheFactMap = 0; +static ManagedStatic<FactMapType> TheFactMap; static FactMapType &getFactMap() { - if (TheFactMap == 0) - TheFactMap = new FactMapType(); return *TheFactMap; } static void eraseFromFactMap(unsigned ID) { - assert(TheFactMap && "No entries found!"); + sys::SmartScopedWriter<true> Writer(&*AnnotationsLock); TheFactMap->erase(ID); - if (TheFactMap->empty()) { // Delete when empty - delete TheFactMap; - TheFactMap = 0; - } } AnnotationID AnnotationManager::getID(const char *Name) { // Name -> ID + AnnotationsLock->reader_acquire(); IDMapType::iterator I = IDMap->find(Name); - if (I == IDMap->end()) { - (*IDMap)[Name] = IDCounter++; // Add a new element + IDMapType::iterator E = IDMap->end(); + AnnotationsLock->reader_release(); + + if (I == E) { + sys::SmartScopedWriter<true> Writer(&*AnnotationsLock); + I = IDMap->find(Name); + if (I == IDMap->end()) + (*IDMap)[Name] = IDCounter++; // Add a new element return AnnotationID(IDCounter-1); } return AnnotationID(I->second); @@ -85,6 +88,7 @@ AnnotationID AnnotationManager::getID(const char *Name, Factory Fact, // only be used for debugging. // const char *AnnotationManager::getName(AnnotationID ID) { // ID -> Name + sys::SmartScopedReader<true> Reader(&*AnnotationsLock); IDMapType &TheMap = *IDMap; for (IDMapType::iterator I = TheMap.begin(); ; ++I) { assert(I != TheMap.end() && "Annotation ID is unknown!"); @@ -98,10 +102,12 @@ const char *AnnotationManager::getName(AnnotationID ID) { // ID -> Name // void AnnotationManager::registerAnnotationFactory(AnnotationID ID, AnnFactory F, void *ExtraData) { - if (F) + if (F) { + sys::SmartScopedWriter<true> Writer(&*AnnotationsLock); getFactMap()[ID.ID] = std::make_pair(F, ExtraData); - else + } else { eraseFromFactMap(ID.ID); + } } // createAnnotation - Create an annotation of the specified ID for the @@ -109,7 +115,13 @@ void AnnotationManager::registerAnnotationFactory(AnnotationID ID, AnnFactory F, // Annotation *AnnotationManager::createAnnotation(AnnotationID ID, const Annotable *Obj) { + AnnotationsLock->reader_acquire(); FactMapType::iterator I = getFactMap().find(ID.ID); - if (I == getFactMap().end()) return 0; + if (I == getFactMap().end()) { + AnnotationsLock->reader_release(); + return 0; + } + + AnnotationsLock->reader_release(); return I->second.first(ID, Obj, I->second.second); } diff --git a/lib/Support/PluginLoader.cpp b/lib/Support/PluginLoader.cpp index 5acf1d1..ef32af4 100644 --- a/lib/Support/PluginLoader.cpp +++ b/lib/Support/PluginLoader.cpp @@ -16,13 +16,16 @@ #include "llvm/Support/PluginLoader.h" #include "llvm/Support/Streams.h" #include "llvm/System/DynamicLibrary.h" +#include "llvm/System/Mutex.h" #include <ostream> #include <vector> using namespace llvm; static ManagedStatic<std::vector<std::string> > Plugins; +static ManagedStatic<sys::SmartMutex<true> > PluginsLock; void PluginLoader::operator=(const std::string &Filename) { + sys::SmartScopedLock<true> Lock(&*PluginsLock); std::string Error; if (sys::DynamicLibrary::LoadLibraryPermanently(Filename.c_str(), &Error)) { cerr << "Error opening '" << Filename << "': " << Error @@ -33,10 +36,12 @@ void PluginLoader::operator=(const std::string &Filename) { } unsigned PluginLoader::getNumPlugins() { + sys::SmartScopedLock<true> Lock(&*PluginsLock); return Plugins.isConstructed() ? Plugins->size() : 0; } std::string &PluginLoader::getPlugin(unsigned num) { + sys::SmartScopedLock<true> Lock(&*PluginsLock); assert(Plugins.isConstructed() && num < Plugins->size() && "Asking for an out of bounds plugin"); return (*Plugins)[num]; diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp index 13acc1b..6c652f8 100644 --- a/lib/Support/Statistic.cpp +++ b/lib/Support/Statistic.cpp @@ -25,6 +25,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Streams.h" +#include "llvm/System/Mutex.h" #include "llvm/ADT/StringExtras.h" #include <algorithm> #include <ostream> @@ -57,13 +58,14 @@ public: } static ManagedStatic<StatisticInfo> StatInfo; - +static ManagedStatic<sys::Mutex> StatLock; /// RegisterStatistic - The first time a statistic is bumped, this method is /// called. void Statistic::RegisterStatistic() { // If stats are enabled, inform StatInfo that this statistic should be // printed. + sys::ScopedLock Writer(&*StatLock); if (Enabled) StatInfo->addStatistic(this); // Remember we have been registered. diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp index 3c8879b..c4920f0 100644 --- a/lib/Support/Timer.cpp +++ b/lib/Support/Timer.cpp @@ -15,6 +15,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Streams.h" +#include "llvm/System/Mutex.h" #include "llvm/System/Process.h" #include <algorithm> #include <fstream> @@ -50,25 +51,28 @@ namespace { cl::Hidden, cl::location(getLibSupportInfoOutputFilename())); } -static TimerGroup *DefaultTimerGroup = 0; +static ManagedStatic<sys::SmartMutex<true> > TimerLock; +static ManagedStatic<TimerGroup> DefaultTimerGroup; static TimerGroup *getDefaultTimerGroup() { - if (DefaultTimerGroup) return DefaultTimerGroup; - return DefaultTimerGroup = new TimerGroup("Miscellaneous Ungrouped Timers"); + return &*DefaultTimerGroup; } Timer::Timer(const std::string &N) : Elapsed(0), UserTime(0), SystemTime(0), MemUsed(0), PeakMem(0), Name(N), Started(false), TG(getDefaultTimerGroup()) { + sys::SmartScopedLock<true> Lock(&*TimerLock); TG->addTimer(); } Timer::Timer(const std::string &N, TimerGroup &tg) : Elapsed(0), UserTime(0), SystemTime(0), MemUsed(0), PeakMem(0), Name(N), Started(false), TG(&tg) { + sys::SmartScopedLock<true> Lock(&*TimerLock); TG->addTimer(); } Timer::Timer(const Timer &T) { + sys::SmartScopedLock<true> Lock(&*TimerLock); TG = T.TG; if (TG) TG->addTimer(); operator=(T); @@ -77,6 +81,7 @@ Timer::Timer(const Timer &T) { // Copy ctor, initialize with no TG member. Timer::Timer(bool, const Timer &T) { + sys::SmartScopedLock<true> Lock(&*TimerLock); TG = T.TG; // Avoid assertion in operator= operator=(T); // Copy contents TG = 0; @@ -84,6 +89,7 @@ Timer::Timer(bool, const Timer &T) { Timer::~Timer() { + sys::SmartScopedLock<true> Lock(&*TimerLock); if (TG) { if (Started) { Started = false; @@ -129,8 +135,10 @@ static TimeRecord getTimeRecord(bool Start) { } static ManagedStatic<std::vector<Timer*> > ActiveTimers; +static ManagedStatic<sys::SmartMutex<true> > ActiveTimerLock; void Timer::startTimer() { + sys::SmartScopedLock<true> Lock(&*ActiveTimerLock); Started = true; ActiveTimers->push_back(this); TimeRecord TR = getTimeRecord(true); @@ -142,6 +150,7 @@ void Timer::startTimer() { } void Timer::stopTimer() { + sys::SmartScopedLock<true> Lock(&*ActiveTimerLock); TimeRecord TR = getTimeRecord(false); Elapsed += TR.Elapsed; UserTime += TR.UserTime; @@ -171,6 +180,7 @@ void Timer::sum(const Timer &T) { /// currently active timers, which will be printed when the timer group prints /// void Timer::addPeakMemoryMeasurement() { + sys::SmartScopedLock<true> Lock(&*ActiveTimerLock); size_t MemUsed = getMemUsage(); for (std::vector<Timer*>::iterator I = ActiveTimers->begin(), @@ -193,7 +203,10 @@ static ManagedStatic<Name2Timer> NamedTimers; static ManagedStatic<Name2Pair> NamedGroupedTimers; +static ManagedStatic<sys::SmartMutex<true> > NamedTimerLock; + static Timer &getNamedRegionTimer(const std::string &Name) { + sys::SmartScopedLock<true> Lock(&*NamedTimerLock); Name2Timer::iterator I = NamedTimers->find(Name); if (I != NamedTimers->end()) return I->second; @@ -203,6 +216,7 @@ static Timer &getNamedRegionTimer(const std::string &Name) { static Timer &getNamedRegionTimer(const std::string &Name, const std::string &GroupName) { + sys::SmartScopedLock<true> Lock(&*NamedTimerLock); Name2Pair::iterator I = NamedGroupedTimers->find(GroupName); if (I == NamedGroupedTimers->end()) { @@ -340,7 +354,7 @@ void TimerGroup::removeTimer() { // If this is not an collection of ungrouped times, print the total time. // Ungrouped timers don't really make sense to add up. We still print the // TOTAL line to make the percentages make sense. - if (this != DefaultTimerGroup) { + if (this != &*DefaultTimerGroup) { *OutStream << " Total Execution Time: "; printAlignedFP(Total.getProcessTime(), 4, 5, *OutStream); @@ -377,11 +391,5 @@ void TimerGroup::removeTimer() { if (OutStream != cerr.stream() && OutStream != cout.stream()) delete OutStream; // Close the file... } - - // Delete default timer group! - if (NumTimers == 0 && this == DefaultTimerGroup) { - delete DefaultTimerGroup; - DefaultTimerGroup = 0; - } } diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index 47151e6..8a4c741 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -24,19 +24,29 @@ def CC_ARM_APCS : CallingConv<[ CCIfType<[i8, i16], CCPromoteToType<i32>>, - // f64 is passed in pairs of GPRs, possibly split onto the stack - CCIfType<[f64], CCCustom<"CC_ARM_APCS_Custom_f64">>, + // Handle all vector types as either f64 or v2f64. + CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>, + + // f64 and v2f64 are passed in adjacent GPRs, possibly split onto the stack + CCIfType<[f64, v2f64], CCCustom<"CC_ARM_APCS_Custom_f64">>, CCIfType<[f32], CCBitConvertToType<i32>>, CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>, CCIfType<[i32], CCAssignToStack<4, 4>>, - CCIfType<[f64], CCAssignToStack<8, 4>> + CCIfType<[f64], CCAssignToStack<8, 4>>, + CCIfType<[v2f64], CCAssignToStack<16, 4>> ]>; def RetCC_ARM_APCS : CallingConv<[ CCIfType<[f32], CCBitConvertToType<i32>>, - CCIfType<[f64], CCCustom<"RetCC_ARM_APCS_Custom_f64">>, + + // Handle all vector types as either f64 or v2f64. + CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>, + + CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_APCS_Custom_f64">>, CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>, CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>> @@ -59,7 +69,8 @@ def CC_ARM_AAPCS_Common : CallingConv<[ CCAssignToReg<[R0, R1, R2, R3]>>>, CCIfType<[i32, f32], CCAssignToStack<4, 4>>, - CCIfType<[f64], CCAssignToStack<8, 8>> + CCIfType<[f64], CCAssignToStack<8, 8>>, + CCIfType<[v2f64], CCAssignToStack<16, 8>> ]>; def RetCC_ARM_AAPCS_Common : CallingConv<[ @@ -72,13 +83,21 @@ def RetCC_ARM_AAPCS_Common : CallingConv<[ //===----------------------------------------------------------------------===// def CC_ARM_AAPCS : CallingConv<[ - CCIfType<[f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>, + // Handle all vector types as either f64 or v2f64. + CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>, + + CCIfType<[f64, v2f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>, CCIfType<[f32], CCBitConvertToType<i32>>, CCDelegateTo<CC_ARM_AAPCS_Common> ]>; def RetCC_ARM_AAPCS : CallingConv<[ - CCIfType<[f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>, + // Handle all vector types as either f64 or v2f64. + CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>, + + CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>, CCIfType<[f32], CCBitConvertToType<i32>>, CCDelegateTo<RetCC_ARM_AAPCS_Common> ]>; @@ -88,6 +107,10 @@ def RetCC_ARM_AAPCS : CallingConv<[ //===----------------------------------------------------------------------===// def CC_ARM_AAPCS_VFP : CallingConv<[ + // Handle all vector types as either f64 or v2f64. + CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>, + CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15]>>, @@ -95,6 +118,10 @@ def CC_ARM_AAPCS_VFP : CallingConv<[ ]>; def RetCC_ARM_AAPCS_VFP : CallingConv<[ + // Handle all vector types as either f64 or v2f64. + CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>, + CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15]>>, diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 1ed9e80..ee9dadf 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -32,6 +32,9 @@ #include "llvm/Support/Debug.h" using namespace llvm; +static const unsigned arm_dsubreg_0 = 5; +static const unsigned arm_dsubreg_1 = 6; + //===--------------------------------------------------------------------===// /// ARMDAGToDAGISel - ARM specific code to select ARM machine /// instructions for SelectionDAG operations. @@ -579,17 +582,18 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { switch (N->getOpcode()) { default: break; case ISD::Constant: { - // ARMv6T2 and later should materialize imms via MOV / MOVT pair. - if (Subtarget->hasV6T2Ops() || Subtarget->hasThumb2()) - break; - unsigned Val = cast<ConstantSDNode>(N)->getZExtValue(); bool UseCP = true; - if (Subtarget->isThumb()) - UseCP = (Val > 255 && // MOV - ~Val > 255 && // MOV + MVN - !ARM_AM::isThumbImmShiftedVal(Val)); // MOV + LSL - else + if (Subtarget->isThumb()) { + if (Subtarget->hasThumb2()) + // Thumb2 has the MOVT instruction, so all immediates can + // be done with MOV + MOVT, at worst. + UseCP = 0; + else + UseCP = (Val > 255 && // MOV + ~Val > 255 && // MOV + MVN + !ARM_AM::isThumbImmShiftedVal(Val)); // MOV + LSL + } else UseCP = (ARM_AM::getSOImmVal(Val) == -1 && // MOV ARM_AM::getSOImmVal(~Val) == -1 && // MVN !ARM_AM::isSOImmTwoPartVal(Val)); // two instrs. @@ -917,6 +921,65 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { return CurDAG->getTargetNode(TargetInstrInfo::DECLARE, dl, MVT::Other, Ops, 3); } + + case ISD::CONCAT_VECTORS: { + MVT VT = Op.getValueType(); + assert(VT.is128BitVector() && Op.getNumOperands() == 2 && + "unexpected CONCAT_VECTORS"); + SDValue N0 = Op.getOperand(0); + SDValue N1 = Op.getOperand(1); + SDNode *Result = + CurDAG->getTargetNode(TargetInstrInfo::IMPLICIT_DEF, dl, VT); + if (N0.getOpcode() != ISD::UNDEF) + Result = CurDAG->getTargetNode(TargetInstrInfo::INSERT_SUBREG, dl, VT, + SDValue(Result, 0), N0, + CurDAG->getTargetConstant(arm_dsubreg_0, + MVT::i32)); + if (N1.getOpcode() != ISD::UNDEF) + Result = CurDAG->getTargetNode(TargetInstrInfo::INSERT_SUBREG, dl, VT, + SDValue(Result, 0), N1, + CurDAG->getTargetConstant(arm_dsubreg_1, + MVT::i32)); + return Result; + } + + case ISD::VECTOR_SHUFFLE: { + MVT VT = Op.getValueType(); + + // Match 128-bit splat to VDUPLANEQ. (This could be done with a Pat in + // ARMInstrNEON.td but it is awkward because the shuffle mask needs to be + // transformed first into a lane number and then to both a subregister + // index and an adjusted lane number.) If the source operand is a + // SCALAR_TO_VECTOR, leave it so it will be matched later as a VDUP. + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); + if (VT.is128BitVector() && SVOp->isSplat() && + Op.getOperand(0).getOpcode() != ISD::SCALAR_TO_VECTOR && + Op.getOperand(1).getOpcode() == ISD::UNDEF) { + unsigned LaneVal = SVOp->getSplatIndex(); + + MVT HalfVT; + unsigned Opc = 0; + switch (VT.getVectorElementType().getSimpleVT()) { + default: assert(false && "unhandled VDUP splat type"); + case MVT::i8: Opc = ARM::VDUPLN8q; HalfVT = MVT::v8i8; break; + case MVT::i16: Opc = ARM::VDUPLN16q; HalfVT = MVT::v4i16; break; + case MVT::i32: Opc = ARM::VDUPLN32q; HalfVT = MVT::v2i32; break; + case MVT::f32: Opc = ARM::VDUPLNfq; HalfVT = MVT::v2f32; break; + } + + // The source operand needs to be changed to a subreg of the original + // 128-bit operand, and the lane number needs to be adjusted accordingly. + unsigned NumElts = VT.getVectorNumElements() / 2; + unsigned SRVal = (LaneVal < NumElts ? arm_dsubreg_0 : arm_dsubreg_1); + SDValue SR = CurDAG->getTargetConstant(SRVal, MVT::i32); + SDValue NewLane = CurDAG->getTargetConstant(LaneVal % NumElts, MVT::i32); + SDNode *SubReg = CurDAG->getTargetNode(TargetInstrInfo::EXTRACT_SUBREG, + dl, HalfVT, N->getOperand(0), SR); + return CurDAG->SelectNodeTo(N, Opc, VT, SDValue(SubReg, 0), NewLane); + } + + break; + } } return SelectCode(Op); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 2443625..29d3da2 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -56,6 +56,52 @@ static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, ISD::ArgFlagsTy &ArgFlags, CCState &State); +void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT, + MVT PromotedBitwiseVT) { + if (VT != PromotedLdStVT) { + setOperationAction(ISD::LOAD, VT, Promote); + AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT); + + setOperationAction(ISD::STORE, VT, Promote); + AddPromotedToType (ISD::STORE, VT, PromotedLdStVT); + } + + MVT ElemTy = VT.getVectorElementType(); + if (ElemTy != MVT::i64 && ElemTy != MVT::f64) + setOperationAction(ISD::VSETCC, VT, Custom); + if (ElemTy == MVT::i8 || ElemTy == MVT::i16) + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); + setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); + if (VT.isInteger()) { + setOperationAction(ISD::SHL, VT, Custom); + setOperationAction(ISD::SRA, VT, Custom); + setOperationAction(ISD::SRL, VT, Custom); + } + + // Promote all bit-wise operations. + if (VT.isInteger() && VT != PromotedBitwiseVT) { + setOperationAction(ISD::AND, VT, Promote); + AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT); + setOperationAction(ISD::OR, VT, Promote); + AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT); + setOperationAction(ISD::XOR, VT, Promote); + AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT); + } +} + +void ARMTargetLowering::addDRTypeForNEON(MVT VT) { + addRegisterClass(VT, ARM::DPRRegisterClass); + addTypeForNEON(VT, MVT::f64, MVT::v2i32); +} + +void ARMTargetLowering::addQRTypeForNEON(MVT VT) { + addRegisterClass(VT, ARM::QPRRegisterClass); + addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); +} + ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) : TargetLowering(TM), ARMPCLabelIndex(0) { Subtarget = &TM.getSubtarget<ARMSubtarget>(); @@ -152,6 +198,30 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setTruncStoreAction(MVT::f64, MVT::f32, Expand); } + + if (Subtarget->hasNEON()) { + addDRTypeForNEON(MVT::v2f32); + addDRTypeForNEON(MVT::v8i8); + addDRTypeForNEON(MVT::v4i16); + addDRTypeForNEON(MVT::v2i32); + addDRTypeForNEON(MVT::v1i64); + + addQRTypeForNEON(MVT::v4f32); + addQRTypeForNEON(MVT::v2f64); + addQRTypeForNEON(MVT::v16i8); + addQRTypeForNEON(MVT::v8i16); + addQRTypeForNEON(MVT::v4i32); + addQRTypeForNEON(MVT::v2i64); + + setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); + setTargetDAGCombine(ISD::SHL); + setTargetDAGCombine(ISD::SRL); + setTargetDAGCombine(ISD::SRA); + setTargetDAGCombine(ISD::SIGN_EXTEND); + setTargetDAGCombine(ISD::ZERO_EXTEND); + setTargetDAGCombine(ISD::ANY_EXTEND); + } + computeRegisterProperties(); // ARM does not have f32 extending load. @@ -352,6 +422,36 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::FMDRR: return "ARMISD::FMDRR"; case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER"; + + case ARMISD::VCEQ: return "ARMISD::VCEQ"; + case ARMISD::VCGE: return "ARMISD::VCGE"; + case ARMISD::VCGEU: return "ARMISD::VCGEU"; + case ARMISD::VCGT: return "ARMISD::VCGT"; + case ARMISD::VCGTU: return "ARMISD::VCGTU"; + case ARMISD::VTST: return "ARMISD::VTST"; + + case ARMISD::VSHL: return "ARMISD::VSHL"; + case ARMISD::VSHRs: return "ARMISD::VSHRs"; + case ARMISD::VSHRu: return "ARMISD::VSHRu"; + case ARMISD::VSHLLs: return "ARMISD::VSHLLs"; + case ARMISD::VSHLLu: return "ARMISD::VSHLLu"; + case ARMISD::VSHLLi: return "ARMISD::VSHLLi"; + case ARMISD::VSHRN: return "ARMISD::VSHRN"; + case ARMISD::VRSHRs: return "ARMISD::VRSHRs"; + case ARMISD::VRSHRu: return "ARMISD::VRSHRu"; + case ARMISD::VRSHRN: return "ARMISD::VRSHRN"; + case ARMISD::VQSHLs: return "ARMISD::VQSHLs"; + case ARMISD::VQSHLu: return "ARMISD::VQSHLu"; + case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu"; + case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs"; + case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu"; + case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu"; + case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs"; + case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu"; + case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu"; + case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu"; + case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; + case ARMISD::VDUPLANEQ: return "ARMISD::VDUPLANEQ"; } } @@ -423,63 +523,93 @@ static bool FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, #include "ARMGenCallingConv.inc" // APCS f64 is in register pairs, possibly split to stack -static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State) { - static const unsigned HiRegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; - static const unsigned LoRegList[] = { ARM::R1, - ARM::R2, - ARM::R3, - ARM::NoRegister }; - - unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 4); - if (Reg == 0) - return false; // we didn't handle it +static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + CCState &State, bool CanFail) { + static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; + + // Try to get the first register. + if (unsigned Reg = State.AllocateReg(RegList, 4)) + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + else { + // For the 2nd half of a v2f64, do not fail. + if (CanFail) + return false; - unsigned i; - for (i = 0; i < 4; ++i) - if (HiRegList[i] == Reg) - break; + // Put the whole thing on the stack. + State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, + State.AllocateStack(8, 4), + LocVT, LocInfo)); + return true; + } - State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, MVT::i32, LocInfo)); - if (LoRegList[i] != ARM::NoRegister) - State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], - MVT::i32, LocInfo)); + // Try to get the second register. + if (unsigned Reg = State.AllocateReg(RegList, 4)) + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); else State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, State.AllocateStack(4, 4), - MVT::i32, LocInfo)); + LocVT, LocInfo)); + return true; +} + +static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true)) + return false; + if (LocVT == MVT::v2f64 && + !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false)) + return false; return true; // we handled it } // AAPCS f64 is in aligned register pairs -static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State) { +static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + CCState &State, bool CanFail) { static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; static const unsigned LoRegList[] = { ARM::R1, ARM::R3 }; unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2); - if (Reg == 0) - return false; // we didn't handle it + if (Reg == 0) { + // For the 2nd half of a v2f64, do not just fail. + if (CanFail) + return false; + + // Put the whole thing on the stack. + State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, + State.AllocateStack(8, 8), + LocVT, LocInfo)); + return true; + } unsigned i; for (i = 0; i < 2; ++i) if (HiRegList[i] == Reg) break; - State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, MVT::i32, LocInfo)); + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], - MVT::i32, LocInfo)); + LocVT, LocInfo)); + return true; +} + +static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true)) + return false; + if (LocVT == MVT::v2f64 && + !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false)) + return false; return true; // we handled it } -static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State) { +static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, CCState &State) { static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; static const unsigned LoRegList[] = { ARM::R1, ARM::R3 }; @@ -492,9 +622,20 @@ static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, if (HiRegList[i] == Reg) break; - State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, MVT::i32, LocInfo)); + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], - MVT::i32, LocInfo)); + LocVT, LocInfo)); + return true; +} + +static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State)) + return false; + if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State)) + return false; return true; // we handled it } @@ -558,7 +699,7 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, SDValue Val; if (VA.needsCustom()) { - // Handle f64 as custom. + // Handle f64 or half of a v2f64. SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); Chain = Lo.getValue(1); @@ -569,6 +710,24 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, Chain = Hi.getValue(1); InFlag = Hi.getValue(2); Val = DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi); + + if (VA.getLocVT() == MVT::v2f64) { + SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); + Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, + DAG.getConstant(0, MVT::i32)); + + VA = RVLocs[++i]; // skip ahead to next loc + Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); + Chain = Lo.getValue(1); + InFlag = Lo.getValue(2); + VA = RVLocs[++i]; // skip ahead to next loc + Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); + Chain = Hi.getValue(1); + InFlag = Hi.getValue(2); + Val = DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi); + Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, + DAG.getConstant(1, MVT::i32)); + } } else { Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), InFlag); @@ -625,6 +784,31 @@ ARMTargetLowering::LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG, PseudoSourceValue::getStack(), LocMemOffset); } +void ARMTargetLowering::PassF64ArgInRegs(CallSDNode *TheCall, SelectionDAG &DAG, + SDValue Chain, SDValue &Arg, + RegsToPassVector &RegsToPass, + CCValAssign &VA, CCValAssign &NextVA, + SDValue &StackPtr, + SmallVector<SDValue, 8> &MemOpChains, + ISD::ArgFlagsTy Flags) { + DebugLoc dl = TheCall->getDebugLoc(); + + SDValue fmrrd = DAG.getNode(ARMISD::FMRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), Arg); + RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd)); + + if (NextVA.isRegLoc()) + RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1))); + else { + assert(NextVA.isMemLoc()); + if (StackPtr.getNode() == 0) + StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); + + MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, NextVA, + Chain, fmrrd.getValue(1), Flags)); + } +} + /// LowerCALL - Lowering a ISD::CALL node into a callseq_start <- /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter /// nodes. @@ -651,7 +835,7 @@ SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { SDValue StackPtr = DAG.getRegister(ARM::SP, MVT::i32); - SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; + RegsToPassVector RegsToPass; SmallVector<SDValue, 8> MemOpChains; // Walk the register/memloc assignments, inserting copies/loads. In the case @@ -681,22 +865,32 @@ SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { break; } - // f64 is passed in i32 pairs and must be combined + // f64 and v2f64 are passed in i32 pairs and must be split into pieces if (VA.needsCustom()) { - SDValue fmrrd = DAG.getNode(ARMISD::FMRRD, dl, - DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); - RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd)); - VA = ArgLocs[++i]; // skip ahead to next loc - if (VA.isRegLoc()) - RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(1))); - else { - assert(VA.isMemLoc()); - if (StackPtr.getNode() == 0) - StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); - - MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, VA, - Chain, fmrrd.getValue(1), - Flags)); + if (VA.getLocVT() == MVT::v2f64) { + SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, + DAG.getConstant(0, MVT::i32)); + SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, + DAG.getConstant(1, MVT::i32)); + + PassF64ArgInRegs(TheCall, DAG, Chain, Op0, RegsToPass, + VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); + + VA = ArgLocs[++i]; // skip ahead to next loc + if (VA.isRegLoc()) { + PassF64ArgInRegs(TheCall, DAG, Chain, Op1, RegsToPass, + VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); + } else { + assert(VA.isMemLoc()); + if (StackPtr.getNode() == 0) + StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); + + MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, VA, + Chain, Op1, Flags)); + } + } else { + PassF64ArgInRegs(TheCall, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i], + StackPtr, MemOpChains, Flags); } } else if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); @@ -864,9 +1058,28 @@ SDValue ARMTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { break; } - // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is - // available. if (VA.needsCustom()) { + if (VA.getLocVT() == MVT::v2f64) { + // Extract the first half and return it in two registers. + SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, + DAG.getConstant(0, MVT::i32)); + SDValue HalfGPRs = DAG.getNode(ARMISD::FMRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), Half); + + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag); + Flag = Chain.getValue(1); + VA = RVLocs[++i]; // skip ahead to next loc + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + HalfGPRs.getValue(1), Flag); + Flag = Chain.getValue(1); + VA = RVLocs[++i]; // skip ahead to next loc + + // Extract the 2nd half and fall through to handle it as an f64 value. + Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, + DAG.getConstant(1, MVT::i32)); + } + // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is + // available. SDValue fmrrd = DAG.getNode(ARMISD::FMRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); @@ -1117,6 +1330,40 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, } SDValue +ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, + SDValue &Root, SelectionDAG &DAG, + DebugLoc dl) { + MachineFunction &MF = DAG.getMachineFunction(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + + TargetRegisterClass *RC; + if (AFI->isThumbFunction()) + RC = ARM::tGPRRegisterClass; + else + RC = ARM::GPRRegisterClass; + + // Transform the arguments stored in physical registers into virtual ones. + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); + SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); + + SDValue ArgValue2; + if (NextVA.isMemLoc()) { + unsigned ArgSize = NextVA.getLocVT().getSizeInBits()/8; + MachineFrameInfo *MFI = MF.getFrameInfo(); + int FI = MFI->CreateFixedObject(ArgSize, NextVA.getLocMemOffset()); + + // Create load node to retrieve arguments from the stack. + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, NULL, 0); + } else { + Reg = MF.addLiveIn(NextVA.getLocReg(), RC); + ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); + } + + return DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, ArgValue, ArgValue2); +} + +SDValue ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -1141,47 +1388,45 @@ ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { // Arguments stored in registers. if (VA.isRegLoc()) { MVT RegVT = VA.getLocVT(); - TargetRegisterClass *RC; - if (AFI->isThumbFunction()) - RC = ARM::tGPRRegisterClass; - else - RC = ARM::GPRRegisterClass; - if (FloatABIType == FloatABI::Hard) { - if (RegVT == MVT::f32) - RC = ARM::SPRRegisterClass; - else if (RegVT == MVT::f64) - RC = ARM::DPRRegisterClass; - } else if (RegVT == MVT::f64) { - // f64 is passed in pairs of GPRs and must be combined. + SDValue ArgValue; + if (VA.needsCustom()) { + // f64 and vector types are split up into multiple registers or + // combinations of registers and stack slots. RegVT = MVT::i32; - } else if (!((RegVT == MVT::i32) || (RegVT == MVT::f32))) - assert(0 && "RegVT not supported by FORMAL_ARGUMENTS Lowering"); - // Transform the arguments stored in physical registers into virtual ones. - unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); - SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, RegVT); + if (VA.getLocVT() == MVT::v2f64) { + SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i], + Root, DAG, dl); + VA = ArgLocs[++i]; // skip ahead to next loc + SDValue ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], + Root, DAG, dl); + ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); + ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, + ArgValue, ArgValue1, DAG.getIntPtrConstant(0)); + ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, + ArgValue, ArgValue2, DAG.getIntPtrConstant(1)); + } else + ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Root, DAG, dl); - // f64 is passed in i32 pairs and must be combined. - if (VA.needsCustom()) { - SDValue ArgValue2; + } else { + TargetRegisterClass *RC; + if (FloatABIType == FloatABI::Hard && RegVT == MVT::f32) + RC = ARM::SPRRegisterClass; + else if (FloatABIType == FloatABI::Hard && RegVT == MVT::f64) + RC = ARM::DPRRegisterClass; + else if (AFI->isThumbFunction()) + RC = ARM::tGPRRegisterClass; + else + RC = ARM::GPRRegisterClass; - VA = ArgLocs[++i]; // skip ahead to next loc - if (VA.isMemLoc()) { - // must be APCS to split like this - unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; - int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset()); - - // Create load node to retrieve arguments from the stack. - SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); - ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, NULL, 0); - } else { - Reg = MF.addLiveIn(VA.getLocReg(), RC); - ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); - } + assert((RegVT == MVT::i32 || RegVT == MVT::f32 || + (FloatABIType == FloatABI::Hard && RegVT == MVT::f64)) && + "RegVT not supported by FORMAL_ARGUMENTS Lowering"); - ArgValue = DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, - ArgValue, ArgValue2); + // Transform the arguments in physical registers into virtual ones. + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); + ArgValue = DAG.getCopyFromReg(Root, dl, Reg, RegVT); } // If this is an 8 or 16-bit value, it is really passed promoted @@ -1638,8 +1883,78 @@ static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) { return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); } -static SDValue ExpandSRx(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { - assert(N->getValueType(0) == MVT::i64 && +/// getZeroVector - Returns a vector of specified type with all zero elements. +/// +static SDValue getZeroVector(MVT VT, SelectionDAG &DAG, DebugLoc dl) { + assert(VT.isVector() && "Expected a vector type"); + + // Zero vectors are used to represent vector negation and in those cases + // will be implemented with the NEON VNEG instruction. However, VNEG does + // not support i64 elements, so sometimes the zero vectors will need to be + // explicitly constructed. For those cases, and potentially other uses in + // the future, always build zero vectors as <4 x i32> or <2 x i32> bitcasted + // to their dest type. This ensures they get CSE'd. + SDValue Vec; + SDValue Cst = DAG.getTargetConstant(0, MVT::i32); + if (VT.getSizeInBits() == 64) + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst); + else + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); + + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec); +} + +/// getOnesVector - Returns a vector of specified type with all bits set. +/// +static SDValue getOnesVector(MVT VT, SelectionDAG &DAG, DebugLoc dl) { + assert(VT.isVector() && "Expected a vector type"); + + // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest + // type. This ensures they get CSE'd. + SDValue Vec; + SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32); + if (VT.getSizeInBits() == 64) + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst); + else + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); + + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec); +} + +static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, + const ARMSubtarget *ST) { + MVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + // Lower vector shifts on NEON to use VSHL. + if (VT.isVector()) { + assert(ST->hasNEON() && "unexpected vector shift"); + + // Left shifts translate directly to the vshiftu intrinsic. + if (N->getOpcode() == ISD::SHL) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32), + N->getOperand(0), N->getOperand(1)); + + assert((N->getOpcode() == ISD::SRA || + N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"); + + // NEON uses the same intrinsics for both left and right shifts. For + // right shifts, the shift amounts are negative, so negate the vector of + // shift amounts. + MVT ShiftVT = N->getOperand(1).getValueType(); + SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT, + getZeroVector(ShiftVT, DAG, dl), + N->getOperand(1)); + Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ? + Intrinsic::arm_neon_vshifts : + Intrinsic::arm_neon_vshiftu); + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(vshiftInt, MVT::i32), + N->getOperand(0), NegatedCount); + } + + assert(VT == MVT::i64 && (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && "Unknown shift to lower!"); @@ -1652,7 +1967,6 @@ static SDValue ExpandSRx(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { if (ST->isThumb()) return SDValue(); // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr. - DebugLoc dl = N->getDebugLoc(); SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), DAG.getConstant(0, MVT::i32)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), @@ -1670,6 +1984,273 @@ static SDValue ExpandSRx(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); } +static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { + SDValue TmpOp0, TmpOp1; + bool Invert = false; + bool Swap = false; + unsigned Opc = 0; + + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + SDValue CC = Op.getOperand(2); + MVT VT = Op.getValueType(); + ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); + DebugLoc dl = Op.getDebugLoc(); + + if (Op.getOperand(1).getValueType().isFloatingPoint()) { + switch (SetCCOpcode) { + default: assert(0 && "Illegal FP comparison"); break; + case ISD::SETUNE: + case ISD::SETNE: Invert = true; // Fallthrough + case ISD::SETOEQ: + case ISD::SETEQ: Opc = ARMISD::VCEQ; break; + case ISD::SETOLT: + case ISD::SETLT: Swap = true; // Fallthrough + case ISD::SETOGT: + case ISD::SETGT: Opc = ARMISD::VCGT; break; + case ISD::SETOLE: + case ISD::SETLE: Swap = true; // Fallthrough + case ISD::SETOGE: + case ISD::SETGE: Opc = ARMISD::VCGE; break; + case ISD::SETUGE: Swap = true; // Fallthrough + case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break; + case ISD::SETUGT: Swap = true; // Fallthrough + case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break; + case ISD::SETUEQ: Invert = true; // Fallthrough + case ISD::SETONE: + // Expand this to (OLT | OGT). + TmpOp0 = Op0; + TmpOp1 = Op1; + Opc = ISD::OR; + Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); + Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1); + break; + case ISD::SETUO: Invert = true; // Fallthrough + case ISD::SETO: + // Expand this to (OLT | OGE). + TmpOp0 = Op0; + TmpOp1 = Op1; + Opc = ISD::OR; + Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); + Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1); + break; + } + } else { + // Integer comparisons. + switch (SetCCOpcode) { + default: assert(0 && "Illegal integer comparison"); break; + case ISD::SETNE: Invert = true; + case ISD::SETEQ: Opc = ARMISD::VCEQ; break; + case ISD::SETLT: Swap = true; + case ISD::SETGT: Opc = ARMISD::VCGT; break; + case ISD::SETLE: Swap = true; + case ISD::SETGE: Opc = ARMISD::VCGE; break; + case ISD::SETULT: Swap = true; + case ISD::SETUGT: Opc = ARMISD::VCGTU; break; + case ISD::SETULE: Swap = true; + case ISD::SETUGE: Opc = ARMISD::VCGEU; break; + } + + // Detect VTST (Vector Test Bits) = vicmp ne (and (op0, op1), zero). + if (Opc == ARMISD::VCEQ) { + + SDValue AndOp; + if (ISD::isBuildVectorAllZeros(Op1.getNode())) + AndOp = Op0; + else if (ISD::isBuildVectorAllZeros(Op0.getNode())) + AndOp = Op1; + + // Ignore bitconvert. + if (AndOp.getNode() && AndOp.getOpcode() == ISD::BIT_CONVERT) + AndOp = AndOp.getOperand(0); + + if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) { + Opc = ARMISD::VTST; + Op0 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(0)); + Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(1)); + Invert = !Invert; + } + } + } + + if (Swap) + std::swap(Op0, Op1); + + SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1); + + if (Invert) + Result = DAG.getNOT(dl, Result, VT); + + return Result; +} + +/// isVMOVSplat - Check if the specified splat value corresponds to an immediate +/// VMOV instruction, and if so, return the constant being splatted. +static SDValue isVMOVSplat(uint64_t SplatBits, uint64_t SplatUndef, + unsigned SplatBitSize, SelectionDAG &DAG) { + switch (SplatBitSize) { + case 8: + // Any 1-byte value is OK. + assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big"); + return DAG.getTargetConstant(SplatBits, MVT::i8); + + case 16: + // NEON's 16-bit VMOV supports splat values where only one byte is nonzero. + if ((SplatBits & ~0xff) == 0 || + (SplatBits & ~0xff00) == 0) + return DAG.getTargetConstant(SplatBits, MVT::i16); + break; + + case 32: + // NEON's 32-bit VMOV supports splat values where: + // * only one byte is nonzero, or + // * the least significant byte is 0xff and the second byte is nonzero, or + // * the least significant 2 bytes are 0xff and the third is nonzero. + if ((SplatBits & ~0xff) == 0 || + (SplatBits & ~0xff00) == 0 || + (SplatBits & ~0xff0000) == 0 || + (SplatBits & ~0xff000000) == 0) + return DAG.getTargetConstant(SplatBits, MVT::i32); + + if ((SplatBits & ~0xffff) == 0 && + ((SplatBits | SplatUndef) & 0xff) == 0xff) + return DAG.getTargetConstant(SplatBits | 0xff, MVT::i32); + + if ((SplatBits & ~0xffffff) == 0 && + ((SplatBits | SplatUndef) & 0xffff) == 0xffff) + return DAG.getTargetConstant(SplatBits | 0xffff, MVT::i32); + + // Note: there are a few 32-bit splat values (specifically: 00ffff00, + // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not + // VMOV.I32. A (very) minor optimization would be to replicate the value + // and fall through here to test for a valid 64-bit splat. But, then the + // caller would also need to check and handle the change in size. + break; + + case 64: { + // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff. + uint64_t BitMask = 0xff; + uint64_t Val = 0; + for (int ByteNum = 0; ByteNum < 8; ++ByteNum) { + if (((SplatBits | SplatUndef) & BitMask) == BitMask) + Val |= BitMask; + else if ((SplatBits & BitMask) != 0) + return SDValue(); + BitMask <<= 8; + } + return DAG.getTargetConstant(Val, MVT::i64); + } + + default: + assert(0 && "unexpected size for isVMOVSplat"); + break; + } + + return SDValue(); +} + +/// getVMOVImm - If this is a build_vector of constants which can be +/// formed by using a VMOV instruction of the specified element size, +/// return the constant being splatted. The ByteSize field indicates the +/// number of bytes of each element [1248]. +SDValue ARM::getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { + BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N); + APInt SplatBits, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, + HasAnyUndefs, ByteSize * 8)) + return SDValue(); + + if (SplatBitSize > ByteSize * 8) + return SDValue(); + + return isVMOVSplat(SplatBits.getZExtValue(), SplatUndef.getZExtValue(), + SplatBitSize, DAG); +} + +static SDValue BuildSplat(SDValue Val, MVT VT, SelectionDAG &DAG, DebugLoc dl) { + // Canonicalize all-zeros and all-ones vectors. + ConstantSDNode *ConstVal = dyn_cast<ConstantSDNode>(Val.getNode()); + if (ConstVal->isNullValue()) + return getZeroVector(VT, DAG, dl); + if (ConstVal->isAllOnesValue()) + return getOnesVector(VT, DAG, dl); + + MVT CanonicalVT; + if (VT.is64BitVector()) { + switch (Val.getValueType().getSizeInBits()) { + case 8: CanonicalVT = MVT::v8i8; break; + case 16: CanonicalVT = MVT::v4i16; break; + case 32: CanonicalVT = MVT::v2i32; break; + case 64: CanonicalVT = MVT::v1i64; break; + default: assert(0 && "unexpected splat element type"); break; + } + } else { + assert(VT.is128BitVector() && "unknown splat vector size"); + switch (Val.getValueType().getSizeInBits()) { + case 8: CanonicalVT = MVT::v16i8; break; + case 16: CanonicalVT = MVT::v8i16; break; + case 32: CanonicalVT = MVT::v4i32; break; + case 64: CanonicalVT = MVT::v2i64; break; + default: assert(0 && "unexpected splat element type"); break; + } + } + + // Build a canonical splat for this value. + SmallVector<SDValue, 8> Ops; + Ops.assign(CanonicalVT.getVectorNumElements(), Val); + SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, &Ops[0], + Ops.size()); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Res); +} + +// If this is a case we can't handle, return null and let the default +// expansion code take care of it. +static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { + BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); + assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); + DebugLoc dl = Op.getDebugLoc(); + + APInt SplatBits, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { + SDValue Val = isVMOVSplat(SplatBits.getZExtValue(), + SplatUndef.getZExtValue(), SplatBitSize, DAG); + if (Val.getNode()) + return BuildSplat(Val, Op.getValueType(), DAG, dl); + } + + return SDValue(); +} + +static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { + return Op; +} + +static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { + return Op; +} + +static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { + MVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + assert((VT == MVT::i8 || VT == MVT::i16) && + "unexpected type for custom-lowering vector extract"); + SDValue Vec = Op.getOperand(0); + SDValue Lane = Op.getOperand(1); + Op = DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); + Op = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Op, DAG.getValueType(VT)); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Op); +} + +static SDValue LowerCONCAT_VECTORS(SDValue Op) { + if (Op.getValueType().is128BitVector() && Op.getNumOperands() == 2) + return Op; + return SDValue(); +} + SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { switch (Op.getOpcode()) { default: assert(0 && "Don't know how to custom lower this!"); abort(); @@ -1695,8 +2276,15 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::BIT_CONVERT: return ExpandBIT_CONVERT(Op.getNode(), DAG); + case ISD::SHL: case ISD::SRL: - case ISD::SRA: return ExpandSRx(Op.getNode(), DAG,Subtarget); + case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget); + case ISD::VSETCC: return LowerVSETCC(Op, DAG); + case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); + case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); + case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); + case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op); } return SDValue(); } @@ -1715,7 +2303,7 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, return; case ISD::SRL: case ISD::SRA: { - SDValue Res = ExpandSRx(N, DAG, Subtarget); + SDValue Res = LowerShift(N, DAG, Subtarget); if (Res.getNode()) Results.push_back(Res); return; @@ -1900,6 +2488,294 @@ static SDValue PerformFMRRDCombine(SDNode *N, return SDValue(); } +/// getVShiftImm - Check if this is a valid build_vector for the immediate +/// operand of a vector shift operation, where all the elements of the +/// build_vector must have the same constant integer value. +static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { + // Ignore bit_converts. + while (Op.getOpcode() == ISD::BIT_CONVERT) + Op = Op.getOperand(0); + BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); + APInt SplatBits, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, + HasAnyUndefs, ElementBits) || + SplatBitSize > ElementBits) + return false; + Cnt = SplatBits.getSExtValue(); + return true; +} + +/// isVShiftLImm - Check if this is a valid build_vector for the immediate +/// operand of a vector shift left operation. That value must be in the range: +/// 0 <= Value < ElementBits for a left shift; or +/// 0 <= Value <= ElementBits for a long left shift. +static bool isVShiftLImm(SDValue Op, MVT VT, bool isLong, int64_t &Cnt) { + assert(VT.isVector() && "vector shift count is not a vector type"); + unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); + if (! getVShiftImm(Op, ElementBits, Cnt)) + return false; + return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits); +} + +/// isVShiftRImm - Check if this is a valid build_vector for the immediate +/// operand of a vector shift right operation. For a shift opcode, the value +/// is positive, but for an intrinsic the value count must be negative. The +/// absolute value must be in the range: +/// 1 <= |Value| <= ElementBits for a right shift; or +/// 1 <= |Value| <= ElementBits/2 for a narrow right shift. +static bool isVShiftRImm(SDValue Op, MVT VT, bool isNarrow, bool isIntrinsic, + int64_t &Cnt) { + assert(VT.isVector() && "vector shift count is not a vector type"); + unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); + if (! getVShiftImm(Op, ElementBits, Cnt)) + return false; + if (isIntrinsic) + Cnt = -Cnt; + return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits)); +} + +/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics. +static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { + unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + switch (IntNo) { + default: + // Don't do anything for most intrinsics. + break; + + // Vector shifts: check for immediate versions and lower them. + // Note: This is done during DAG combining instead of DAG legalizing because + // the build_vectors for 64-bit vector element shift counts are generally + // not legal, and it is hard to see their values after they get legalized to + // loads from a constant pool. + case Intrinsic::arm_neon_vshifts: + case Intrinsic::arm_neon_vshiftu: + case Intrinsic::arm_neon_vshiftls: + case Intrinsic::arm_neon_vshiftlu: + case Intrinsic::arm_neon_vshiftn: + case Intrinsic::arm_neon_vrshifts: + case Intrinsic::arm_neon_vrshiftu: + case Intrinsic::arm_neon_vrshiftn: + case Intrinsic::arm_neon_vqshifts: + case Intrinsic::arm_neon_vqshiftu: + case Intrinsic::arm_neon_vqshiftsu: + case Intrinsic::arm_neon_vqshiftns: + case Intrinsic::arm_neon_vqshiftnu: + case Intrinsic::arm_neon_vqshiftnsu: + case Intrinsic::arm_neon_vqrshiftns: + case Intrinsic::arm_neon_vqrshiftnu: + case Intrinsic::arm_neon_vqrshiftnsu: { + MVT VT = N->getOperand(1).getValueType(); + int64_t Cnt; + unsigned VShiftOpc = 0; + + switch (IntNo) { + case Intrinsic::arm_neon_vshifts: + case Intrinsic::arm_neon_vshiftu: + if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) { + VShiftOpc = ARMISD::VSHL; + break; + } + if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) { + VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? + ARMISD::VSHRs : ARMISD::VSHRu); + break; + } + return SDValue(); + + case Intrinsic::arm_neon_vshiftls: + case Intrinsic::arm_neon_vshiftlu: + if (isVShiftLImm(N->getOperand(2), VT, true, Cnt)) + break; + assert(0 && "invalid shift count for vshll intrinsic"); + abort(); + + case Intrinsic::arm_neon_vrshifts: + case Intrinsic::arm_neon_vrshiftu: + if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) + break; + return SDValue(); + + case Intrinsic::arm_neon_vqshifts: + case Intrinsic::arm_neon_vqshiftu: + if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) + break; + return SDValue(); + + case Intrinsic::arm_neon_vqshiftsu: + if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) + break; + assert(0 && "invalid shift count for vqshlu intrinsic"); + abort(); + + case Intrinsic::arm_neon_vshiftn: + case Intrinsic::arm_neon_vrshiftn: + case Intrinsic::arm_neon_vqshiftns: + case Intrinsic::arm_neon_vqshiftnu: + case Intrinsic::arm_neon_vqshiftnsu: + case Intrinsic::arm_neon_vqrshiftns: + case Intrinsic::arm_neon_vqrshiftnu: + case Intrinsic::arm_neon_vqrshiftnsu: + // Narrowing shifts require an immediate right shift. + if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt)) + break; + assert(0 && "invalid shift count for narrowing vector shift intrinsic"); + abort(); + + default: + assert(0 && "unhandled vector shift"); + } + + switch (IntNo) { + case Intrinsic::arm_neon_vshifts: + case Intrinsic::arm_neon_vshiftu: + // Opcode already set above. + break; + case Intrinsic::arm_neon_vshiftls: + case Intrinsic::arm_neon_vshiftlu: + if (Cnt == VT.getVectorElementType().getSizeInBits()) + VShiftOpc = ARMISD::VSHLLi; + else + VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ? + ARMISD::VSHLLs : ARMISD::VSHLLu); + break; + case Intrinsic::arm_neon_vshiftn: + VShiftOpc = ARMISD::VSHRN; break; + case Intrinsic::arm_neon_vrshifts: + VShiftOpc = ARMISD::VRSHRs; break; + case Intrinsic::arm_neon_vrshiftu: + VShiftOpc = ARMISD::VRSHRu; break; + case Intrinsic::arm_neon_vrshiftn: + VShiftOpc = ARMISD::VRSHRN; break; + case Intrinsic::arm_neon_vqshifts: + VShiftOpc = ARMISD::VQSHLs; break; + case Intrinsic::arm_neon_vqshiftu: + VShiftOpc = ARMISD::VQSHLu; break; + case Intrinsic::arm_neon_vqshiftsu: + VShiftOpc = ARMISD::VQSHLsu; break; + case Intrinsic::arm_neon_vqshiftns: + VShiftOpc = ARMISD::VQSHRNs; break; + case Intrinsic::arm_neon_vqshiftnu: + VShiftOpc = ARMISD::VQSHRNu; break; + case Intrinsic::arm_neon_vqshiftnsu: + VShiftOpc = ARMISD::VQSHRNsu; break; + case Intrinsic::arm_neon_vqrshiftns: + VShiftOpc = ARMISD::VQRSHRNs; break; + case Intrinsic::arm_neon_vqrshiftnu: + VShiftOpc = ARMISD::VQRSHRNu; break; + case Intrinsic::arm_neon_vqrshiftnsu: + VShiftOpc = ARMISD::VQRSHRNsu; break; + } + + return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), + N->getOperand(1), DAG.getConstant(Cnt, MVT::i32)); + } + + case Intrinsic::arm_neon_vshiftins: { + MVT VT = N->getOperand(1).getValueType(); + int64_t Cnt; + unsigned VShiftOpc = 0; + + if (isVShiftLImm(N->getOperand(3), VT, false, Cnt)) + VShiftOpc = ARMISD::VSLI; + else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt)) + VShiftOpc = ARMISD::VSRI; + else { + assert(0 && "invalid shift count for vsli/vsri intrinsic"); + abort(); + } + + return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), + N->getOperand(1), N->getOperand(2), + DAG.getConstant(Cnt, MVT::i32)); + } + + case Intrinsic::arm_neon_vqrshifts: + case Intrinsic::arm_neon_vqrshiftu: + // No immediate versions of these to check for. + break; + } + + return SDValue(); +} + +/// PerformShiftCombine - Checks for immediate versions of vector shifts and +/// lowers them. As with the vector shift intrinsics, this is done during DAG +/// combining instead of DAG legalizing because the build_vectors for 64-bit +/// vector element shift counts are generally not legal, and it is hard to see +/// their values after they get legalized to loads from a constant pool. +static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, + const ARMSubtarget *ST) { + MVT VT = N->getValueType(0); + + // Nothing to be done for scalar shifts. + if (! VT.isVector()) + return SDValue(); + + assert(ST->hasNEON() && "unexpected vector shift"); + int64_t Cnt; + + switch (N->getOpcode()) { + default: assert(0 && "unexpected shift opcode"); + + case ISD::SHL: + if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) + return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0), + DAG.getConstant(Cnt, MVT::i32)); + break; + + case ISD::SRA: + case ISD::SRL: + if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) { + unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ? + ARMISD::VSHRs : ARMISD::VSHRu); + return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0), + DAG.getConstant(Cnt, MVT::i32)); + } + } + return SDValue(); +} + +/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, +/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND. +static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, + const ARMSubtarget *ST) { + SDValue N0 = N->getOperand(0); + + // Check for sign- and zero-extensions of vector extract operations of 8- + // and 16-bit vector elements. NEON supports these directly. They are + // handled during DAG combining because type legalization will promote them + // to 32-bit types and it is messy to recognize the operations after that. + if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + SDValue Vec = N0.getOperand(0); + SDValue Lane = N0.getOperand(1); + MVT VT = N->getValueType(0); + MVT EltVT = N0.getValueType(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + if (VT == MVT::i32 && + (EltVT == MVT::i8 || EltVT == MVT::i16) && + TLI.isTypeLegal(Vec.getValueType())) { + + unsigned Opc = 0; + switch (N->getOpcode()) { + default: assert(0 && "unexpected opcode"); + case ISD::SIGN_EXTEND: + Opc = ARMISD::VGETLANEs; + break; + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + Opc = ARMISD::VGETLANEu; + break; + } + return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane); + } + } + + return SDValue(); +} + SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch (N->getOpcode()) { @@ -1907,8 +2783,17 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::ADD: return PerformADDCombine(N, DCI); case ISD::SUB: return PerformSUBCombine(N, DCI); case ARMISD::FMRRD: return PerformFMRRDCombine(N, DCI); + case ISD::INTRINSIC_WO_CHAIN: + return PerformIntrinsicCombine(N, DCI.DAG); + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + return PerformShiftCombine(N, DCI.DAG, Subtarget); + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + return PerformExtendCombine(N, DCI.DAG, Subtarget); } - return SDValue(); } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 8f53e39..631e37f 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -67,10 +67,65 @@ namespace llvm { EH_SJLJ_SETJMP, // SjLj exception handling setjmp EH_SJLJ_LONGJMP, // SjLj exception handling longjmp - THREAD_POINTER + THREAD_POINTER, + + VCEQ, // Vector compare equal. + VCGE, // Vector compare greater than or equal. + VCGEU, // Vector compare unsigned greater than or equal. + VCGT, // Vector compare greater than. + VCGTU, // Vector compare unsigned greater than. + VTST, // Vector test bits. + + // Vector shift by immediate: + VSHL, // ...left + VSHRs, // ...right (signed) + VSHRu, // ...right (unsigned) + VSHLLs, // ...left long (signed) + VSHLLu, // ...left long (unsigned) + VSHLLi, // ...left long (with maximum shift count) + VSHRN, // ...right narrow + + // Vector rounding shift by immediate: + VRSHRs, // ...right (signed) + VRSHRu, // ...right (unsigned) + VRSHRN, // ...right narrow + + // Vector saturating shift by immediate: + VQSHLs, // ...left (signed) + VQSHLu, // ...left (unsigned) + VQSHLsu, // ...left (signed to unsigned) + VQSHRNs, // ...right narrow (signed) + VQSHRNu, // ...right narrow (unsigned) + VQSHRNsu, // ...right narrow (signed to unsigned) + + // Vector saturating rounding shift by immediate: + VQRSHRNs, // ...right narrow (signed) + VQRSHRNu, // ...right narrow (unsigned) + VQRSHRNsu, // ...right narrow (signed to unsigned) + + // Vector shift and insert: + VSLI, // ...left + VSRI, // ...right + + // Vector get lane (VMOV scalar to ARM core register) + // (These are used for 8- and 16-bit element types only.) + VGETLANEu, // zero-extend vector extract element + VGETLANEs, // sign-extend vector extract element + + // Vector duplicate lane (128-bit result only; 64-bit is a shuffle) + VDUPLANEQ // splat a lane from a 64-bit vector to a 128-bit vector }; } + /// Define some predicates that are used for node matching. + namespace ARM { + /// getVMOVImm - If this is a build_vector of constants which can be + /// formed by using a VMOV instruction of the specified element size, + /// return the constant being splatted. The ByteSize field indicates the + /// number of bytes of each element [1248]. + SDValue getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG); + } + //===--------------------------------------------------------------------===// // ARMTargetLowering - ARM Implementation of the TargetLowering interface @@ -151,6 +206,21 @@ namespace llvm { /// unsigned ARMPCLabelIndex; + void addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT); + void addDRTypeForNEON(MVT VT); + void addQRTypeForNEON(MVT VT); + + typedef SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPassVector; + void PassF64ArgInRegs(CallSDNode *TheCall, SelectionDAG &DAG, + SDValue Chain, SDValue &Arg, + RegsToPassVector &RegsToPass, + CCValAssign &VA, CCValAssign &NextVA, + SDValue &StackPtr, + SmallVector<SDValue, 8> &MemOpChains, + ISD::ArgFlagsTy Flags); + SDValue GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, + SDValue &Root, SelectionDAG &DAG, DebugLoc dl); + CCAssignFn *CCAssignFnForNode(unsigned CC, bool Return) const; SDValue LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG, const SDValue &StackPtr, const CCValAssign &VA, diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 9a1e1c2..14cca7a 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -49,6 +49,11 @@ def VFPMiscFrm : Format<22>; def ThumbFrm : Format<23>; +def NEONFrm : Format<24>; +def NEONGetLnFrm : Format<25>; +def NEONSetLnFrm : Format<26>; +def NEONDupFrm : Format<27>; + // Misc flag for data processing instructions that indicates whether // the instruction has a Rn register operand. class UnaryDP { bit isUnaryDataProc = 1; } @@ -737,6 +742,14 @@ class TIx2<dag outs, dag ins, string asm, list<dag> pattern> class TJTI<dag outs, dag ins, string asm, list<dag> pattern> : ThumbI<outs, ins, AddrModeNone, SizeSpecial, asm, "", pattern>; +// ThumbPat - Same as Pat<>, but requires that the compiler be in Thumb mode. +class ThumbPat<dag pattern, dag result> : Pat<pattern, result> { + list<Predicate> Predicates = [IsThumb]; +} + +class ThumbV5Pat<dag pattern, dag result> : Pat<pattern, result> { + list<Predicate> Predicates = [IsThumb, HasV5T]; +} //===----------------------------------------------------------------------===// @@ -857,12 +870,102 @@ class AVConv5I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, string opc, //===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// ARM NEON Instruction templates. +// -// ThumbPat - Same as Pat<>, but requires that the compiler be in Thumb mode. -class ThumbPat<dag pattern, dag result> : Pat<pattern, result> { - list<Predicate> Predicates = [IsThumb]; -} - -class ThumbV5Pat<dag pattern, dag result> : Pat<pattern, result> { - list<Predicate> Predicates = [IsThumb, HasV5T]; -} +class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, string asm, + string cstr, list<dag> pattern> + : InstARM<am, Size4Bytes, im, NEONFrm, cstr> { + let OutOperandList = oops; + let InOperandList = iops; + let AsmString = asm; + let Pattern = pattern; + list<Predicate> Predicates = [HasNEON]; +} + +class NI<dag oops, dag iops, string asm, list<dag> pattern> + : NeonI<oops, iops, AddrModeNone, IndexModeNone, asm, "", pattern> { +} + +class NDataI<dag oops, dag iops, string asm, string cstr, list<dag> pattern> + : NeonI<oops, iops, AddrModeNone, IndexModeNone, asm, cstr, pattern> { + let Inst{31-25} = 0b1111001; +} + +// NEON "one register and a modified immediate" format. +class N1ModImm<bit op23, bits<3> op21_19, bits<4> op11_8, bit op7, bit op6, + bit op5, bit op4, + dag oops, dag iops, string asm, string cstr, list<dag> pattern> + : NDataI<oops, iops, asm, cstr, pattern> { + let Inst{23} = op23; + let Inst{21-19} = op21_19; + let Inst{11-8} = op11_8; + let Inst{7} = op7; + let Inst{6} = op6; + let Inst{5} = op5; + let Inst{4} = op4; +} + +// NEON 2 vector register format. +class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, + bits<5> op11_7, bit op6, bit op4, + dag oops, dag iops, string asm, string cstr, list<dag> pattern> + : NDataI<oops, iops, asm, cstr, pattern> { + let Inst{24-23} = op24_23; + let Inst{21-20} = op21_20; + let Inst{19-18} = op19_18; + let Inst{17-16} = op17_16; + let Inst{11-7} = op11_7; + let Inst{6} = op6; + let Inst{4} = op4; +} + +// NEON 2 vector register with immediate. +class N2VImm<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, + bit op6, bit op4, + dag oops, dag iops, string asm, string cstr, list<dag> pattern> + : NDataI<oops, iops, asm, cstr, pattern> { + let Inst{24} = op24; + let Inst{23} = op23; + let Inst{21-16} = op21_16; + let Inst{11-8} = op11_8; + let Inst{7} = op7; + let Inst{6} = op6; + let Inst{4} = op4; +} + +// NEON 3 vector register format. +class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4, + dag oops, dag iops, string asm, string cstr, list<dag> pattern> + : NDataI<oops, iops, asm, cstr, pattern> { + let Inst{24} = op24; + let Inst{23} = op23; + let Inst{21-20} = op21_20; + let Inst{11-8} = op11_8; + let Inst{6} = op6; + let Inst{4} = op4; +} + +// NEON VMOVs between scalar and core registers. +class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3, + dag oops, dag iops, Format f, string opc, string asm, + list<dag> pattern> + : AI<oops, iops, f, opc, asm, pattern> { + let Inst{27-20} = opcod1; + let Inst{11-8} = opcod2; + let Inst{6-5} = opcod3; + let Inst{4} = 1; + list<Predicate> Predicates = [HasNEON]; +} +class NVGetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3, + dag oops, dag iops, string opc, string asm, list<dag> pattern> + : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NEONGetLnFrm, opc, asm, + pattern>; +class NVSetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3, + dag oops, dag iops, string opc, string asm, list<dag> pattern> + : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NEONSetLnFrm, opc, asm, + pattern>; +class NVDup<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3, + dag oops, dag iops, string opc, string asm, list<dag> pattern> + : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NEONDupFrm, opc, asm, pattern>; diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index d19fb8e..e8da927 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -59,6 +59,8 @@ bool ARMInstrInfo::isMoveInstr(const MachineInstr &MI, return false; case ARM::FCPYS: case ARM::FCPYD: + case ARM::VMOVD: + case ARM::VMOVQ: SrcReg = MI.getOperand(1).getReg(); DstReg = MI.getOperand(0).getReg(); return true; @@ -528,6 +530,8 @@ bool ARMInstrInfo::copyRegToReg(MachineBasicBlock &MBB, else if (DestRC == ARM::DPRRegisterClass) AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYD), DestReg) .addReg(SrcReg)); + else if (DestRC == ARM::QPRRegisterClass) + BuildMI(MBB, I, DL, get(ARM::VMOVQ), DestReg).addReg(SrcReg); else return false; @@ -844,6 +848,10 @@ canFoldMemoryOperand(const MachineInstr *MI, case ARM::FCPYS: case ARM::FCPYD: return true; + + case ARM::VMOVD: + case ARM::VMOVQ: + return false; // FIXME } return false; diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h index 13ff3fe..9658f3b 100644 --- a/lib/Target/ARM/ARMInstrInfo.h +++ b/lib/Target/ARM/ARMInstrInfo.h @@ -114,6 +114,12 @@ namespace ARMII { // Thumb format ThumbFrm = 23 << FormShift, + // NEON format + NEONFrm = 24 << FormShift, + NEONGetLnFrm = 25 << FormShift, + NEONSetLnFrm = 26 << FormShift, + NEONDupFrm = 27 << FormShift, + //===------------------------------------------------------------------===// // Field shifts - such shifts are used to set field while generating // machine instructions. diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 4707e3b..44e67e9 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -93,9 +93,15 @@ def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", SDT_ARMEH_SJLJ_Setjmp>; def HasV5T : Predicate<"Subtarget->hasV5TOps()">; def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">; def HasV6 : Predicate<"Subtarget->hasV6Ops()">; +def HasV7 : Predicate<"Subtarget->hasV7Ops()">; +def HasVFP2 : Predicate<"Subtarget->hasVFP2()">; +def HasVFP3 : Predicate<"Subtarget->hasVFP3()">; +def HasNEON : Predicate<"Subtarget->hasNEON()">; def IsThumb : Predicate<"Subtarget->isThumb()">; def HasThumb2 : Predicate<"Subtarget->hasThumb2()">; def IsARM : Predicate<"!Subtarget->isThumb()">; +def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">; +def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">; //===----------------------------------------------------------------------===// // ARM Flag Definitions. @@ -518,6 +524,24 @@ def PICSTRB : AXI2stb<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), } } // isNotDuplicable = 1 + +// LEApcrel - Load a pc-relative address into a register without offending the +// assembler. +def LEApcrel : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, pred:$p), Pseudo, + !strconcat(!strconcat(".set PCRELV${:uid}, ($label-(", + "${:private}PCRELL${:uid}+8))\n"), + !strconcat("${:private}PCRELL${:uid}:\n\t", + "add$p $dst, pc, #PCRELV${:uid}")), + []>; + +def LEApcrelJT : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, i32imm:$id, pred:$p), + Pseudo, + !strconcat(!strconcat(".set PCRELV${:uid}, (${label}_${id:no_hash}-(", + "${:private}PCRELL${:uid}+8))\n"), + !strconcat("${:private}PCRELL${:uid}:\n\t", + "add$p $dst, pc, #PCRELV${:uid}")), + []>; + //===----------------------------------------------------------------------===// // Control Flow Instructions. // @@ -539,21 +563,22 @@ let isReturn = 1, isTerminator = 1 in LdStMulFrm, "ldm${p}${addr:submode} $addr, $dst1", []>; +// On non-Darwin platforms R9 is callee-saved. let isCall = 1, Itinerary = IIC_Br, Defs = [R0, R1, R2, R3, R12, LR, D0, D1, D2, D3, D4, D5, D6, D7, CPSR] in { def BL : ABXI<0b1011, (outs), (ins i32imm:$func, variable_ops), "bl ${func:call}", - [(ARMcall tglobaladdr:$func)]>; + [(ARMcall tglobaladdr:$func)]>, Requires<[IsNotDarwin]>; def BL_pred : ABI<0b1011, (outs), (ins i32imm:$func, variable_ops), "bl", " ${func:call}", - [(ARMcall_pred tglobaladdr:$func)]>; + [(ARMcall_pred tglobaladdr:$func)]>, Requires<[IsNotDarwin]>; // ARMv5T and above def BLX : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm, "blx $func", - [(ARMcall GPR:$func)]>, Requires<[IsARM, HasV5T]> { + [(ARMcall GPR:$func)]>, Requires<[IsARM, HasV5T, IsNotDarwin]> { let Inst{7-4} = 0b0011; let Inst{19-8} = 0b111111111111; let Inst{27-20} = 0b00010010; @@ -563,7 +588,36 @@ let isCall = 1, Itinerary = IIC_Br, // ARMv4T def BX : ABXIx2<(outs), (ins GPR:$func, variable_ops), "mov lr, pc\n\tbx $func", - [(ARMcall_nolink GPR:$func)]>; + [(ARMcall_nolink GPR:$func)]>, Requires<[IsNotDarwin]>; + } +} + +// On Darwin R9 is call-clobbered. +let isCall = 1, Itinerary = IIC_Br, + Defs = [R0, R1, R2, R3, R9, R12, LR, + D0, D1, D2, D3, D4, D5, D6, D7, CPSR] in { + def BLr9 : ABXI<0b1011, (outs), (ins i32imm:$func, variable_ops), + "bl ${func:call}", + [(ARMcall tglobaladdr:$func)]>, Requires<[IsDarwin]>; + + def BLr9_pred : ABI<0b1011, (outs), (ins i32imm:$func, variable_ops), + "bl", " ${func:call}", + [(ARMcall_pred tglobaladdr:$func)]>, Requires<[IsDarwin]>; + + // ARMv5T and above + def BLXr9 : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm, + "blx $func", + [(ARMcall GPR:$func)]>, Requires<[IsARM, HasV5T, IsDarwin]> { + let Inst{7-4} = 0b0011; + let Inst{19-8} = 0b111111111111; + let Inst{27-20} = 0b00010010; + } + + let Uses = [LR] in { + // ARMv4T + def BXr9 : ABXIx2<(outs), (ins GPR:$func, variable_ops), + "mov lr, pc\n\tbx $func", + [(ARMcall_nolink GPR:$func)]>, Requires<[IsDarwin]>; } } @@ -823,9 +877,9 @@ defm UXTH : AI_unary_rrot<0b01101111, defm UXTB16 : AI_unary_rrot<0b01101100, "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>; -def : ARMV6Pat<(and (shl GPR:$Src, 8), 0xFF00FF), +def : ARMV6Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF), (UXTB16r_rot GPR:$Src, 24)>; -def : ARMV6Pat<(and (srl GPR:$Src, 8), 0xFF00FF), +def : ARMV6Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF), (UXTB16r_rot GPR:$Src, 8)>; defm UXTAB : AI_bin_rrot<0b01101110, "uxtab", @@ -1006,7 +1060,7 @@ multiclass AI_smul<string opc, PatFrag opnode> { def BT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), !strconcat(opc, "bt"), " $dst, $a, $b", [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), - (sra GPR:$b, 16)))]>, + (sra GPR:$b, (i32 16))))]>, Requires<[IsARM, HasV5TE]> { let Inst{5} = 0; let Inst{6} = 1; @@ -1014,7 +1068,7 @@ multiclass AI_smul<string opc, PatFrag opnode> { def TB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), !strconcat(opc, "tb"), " $dst, $a, $b", - [(set GPR:$dst, (opnode (sra GPR:$a, 16), + [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), (sext_inreg GPR:$b, i16)))]>, Requires<[IsARM, HasV5TE]> { let Inst{5} = 1; @@ -1023,8 +1077,8 @@ multiclass AI_smul<string opc, PatFrag opnode> { def TT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), !strconcat(opc, "tt"), " $dst, $a, $b", - [(set GPR:$dst, (opnode (sra GPR:$a, 16), - (sra GPR:$b, 16)))]>, + [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), + (sra GPR:$b, (i32 16))))]>, Requires<[IsARM, HasV5TE]> { let Inst{5} = 1; let Inst{6} = 1; @@ -1033,7 +1087,7 @@ multiclass AI_smul<string opc, PatFrag opnode> { def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b), !strconcat(opc, "wb"), " $dst, $a, $b", [(set GPR:$dst, (sra (opnode GPR:$a, - (sext_inreg GPR:$b, i16)), 16))]>, + (sext_inreg GPR:$b, i16)), (i32 16)))]>, Requires<[IsARM, HasV5TE]> { let Inst{5} = 1; let Inst{6} = 0; @@ -1042,7 +1096,7 @@ multiclass AI_smul<string opc, PatFrag opnode> { def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b), !strconcat(opc, "wt"), " $dst, $a, $b", [(set GPR:$dst, (sra (opnode GPR:$a, - (sra GPR:$b, 16)), 16))]>, + (sra GPR:$b, (i32 16))), (i32 16)))]>, Requires<[IsARM, HasV5TE]> { let Inst{5} = 1; let Inst{6} = 1; @@ -1064,7 +1118,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { def BT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), !strconcat(opc, "bt"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16), - (sra GPR:$b, 16))))]>, + (sra GPR:$b, (i32 16)))))]>, Requires<[IsARM, HasV5TE]> { let Inst{5} = 0; let Inst{6} = 1; @@ -1072,7 +1126,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { def TB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), !strconcat(opc, "tb"), " $dst, $a, $b, $acc", - [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, 16), + [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), (sext_inreg GPR:$b, i16))))]>, Requires<[IsARM, HasV5TE]> { let Inst{5} = 1; @@ -1081,8 +1135,8 @@ multiclass AI_smla<string opc, PatFrag opnode> { def TT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), !strconcat(opc, "tt"), " $dst, $a, $b, $acc", - [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, 16), - (sra GPR:$b, 16))))]>, + [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), + (sra GPR:$b, (i32 16)))))]>, Requires<[IsARM, HasV5TE]> { let Inst{5} = 1; let Inst{6} = 1; @@ -1091,7 +1145,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), !strconcat(opc, "wb"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, - (sext_inreg GPR:$b, i16)), 16)))]>, + (sext_inreg GPR:$b, i16)), (i32 16))))]>, Requires<[IsARM, HasV5TE]> { let Inst{5} = 0; let Inst{6} = 0; @@ -1100,7 +1154,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), !strconcat(opc, "wt"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, - (sra GPR:$b, 16)), 16)))]>, + (sra GPR:$b, (i32 16))), (i32 16))))]>, Requires<[IsARM, HasV5TE]> { let Inst{5} = 0; let Inst{6} = 1; @@ -1136,10 +1190,10 @@ def REV : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src), def REV16 : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src), "rev16", " $dst, $src", [(set GPR:$dst, - (or (and (srl GPR:$src, 8), 0xFF), - (or (and (shl GPR:$src, 8), 0xFF00), - (or (and (srl GPR:$src, 8), 0xFF0000), - (and (shl GPR:$src, 8), 0xFF000000)))))]>, + (or (and (srl GPR:$src, (i32 8)), 0xFF), + (or (and (shl GPR:$src, (i32 8)), 0xFF00), + (or (and (srl GPR:$src, (i32 8)), 0xFF0000), + (and (shl GPR:$src, (i32 8)), 0xFF000000)))))]>, Requires<[IsARM, HasV6]> { let Inst{7-4} = 0b1011; let Inst{11-8} = 0b1111; @@ -1150,8 +1204,8 @@ def REVSH : AMiscA1I<0b01101111, (outs GPR:$dst), (ins GPR:$src), "revsh", " $dst, $src", [(set GPR:$dst, (sext_inreg - (or (srl (and GPR:$src, 0xFF00), 8), - (shl GPR:$src, 8)), i16))]>, + (or (srl (and GPR:$src, 0xFF00), (i32 8)), + (shl GPR:$src, (i32 8))), i16))]>, Requires<[IsARM, HasV6]> { let Inst{7-4} = 0b1011; let Inst{11-8} = 0b1111; @@ -1186,7 +1240,7 @@ def PKHTB : AMiscA1I<0b01101000, (outs GPR:$dst), // Alternate cases for PKHTB where identities eliminate some nodes. Note that // a shift amount of 0 is *not legal* here, it is PKHBT instead. -def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, 16)), +def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, (i32 16))), (PKHTB GPR:$src1, GPR:$src2, 16)>; def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), (and (srl GPR:$src2, imm1_15:$shamt), 0xFFFF)), @@ -1240,23 +1294,6 @@ def MOVCCi : AI1<0b1101, (outs GPR:$dst), RegConstraint<"$false = $dst">, UnaryDP; -// LEApcrel - Load a pc-relative address into a register without offending the -// assembler. -def LEApcrel : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, pred:$p), Pseudo, - !strconcat(!strconcat(".set PCRELV${:uid}, ($label-(", - "${:private}PCRELL${:uid}+8))\n"), - !strconcat("${:private}PCRELL${:uid}:\n\t", - "add$p $dst, pc, #PCRELV${:uid}")), - []>; - -def LEApcrelJT : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, i32imm:$id, pred:$p), - Pseudo, - !strconcat(!strconcat(".set PCRELV${:uid}, (${label}_${id:no_hash}-(", - "${:private}PCRELL${:uid}+8))\n"), - !strconcat("${:private}PCRELL${:uid}:\n\t", - "add$p $dst, pc, #PCRELV${:uid}")), - []>; - //===----------------------------------------------------------------------===// // TLS Instructions // @@ -1321,7 +1358,10 @@ def : ARMPat<(xor GPR:$LHS, so_imm2part:$RHS), // Direct calls -def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>; +def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>, + Requires<[IsNotDarwin]>; +def : ARMPat<(ARMcall texternalsym:$func), (BLr9 texternalsym:$func)>, + Requires<[IsDarwin]>; // zextload i1 -> zextload i8 def : ARMPat<(zextloadi1 addrmode2:$addr), (LDRB addrmode2:$addr)>; @@ -1335,47 +1375,54 @@ def : ARMPat<(extloadi8 addrmodepc:$addr), (PICLDRB addrmodepc:$addr)>; def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>; // smul* and smla* -def : ARMV5TEPat<(mul (sra (shl GPR:$a, 16), 16), (sra (shl GPR:$b, 16), 16)), +def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)), + (sra (shl GPR:$b, (i32 16)), (i32 16))), (SMULBB GPR:$a, GPR:$b)>; def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b), (SMULBB GPR:$a, GPR:$b)>; -def : ARMV5TEPat<(mul (sra (shl GPR:$a, 16), 16), (sra GPR:$b, 16)), +def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)), + (sra GPR:$b, (i32 16))), (SMULBT GPR:$a, GPR:$b)>; -def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, 16)), +def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, (i32 16))), (SMULBT GPR:$a, GPR:$b)>; -def : ARMV5TEPat<(mul (sra GPR:$a, 16), (sra (shl GPR:$b, 16), 16)), +def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), + (sra (shl GPR:$b, (i32 16)), (i32 16))), (SMULTB GPR:$a, GPR:$b)>; -def : ARMV5TEPat<(mul (sra GPR:$a, 16), sext_16_node:$b), +def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b), (SMULTB GPR:$a, GPR:$b)>; -def : ARMV5TEPat<(sra (mul GPR:$a, (sra (shl GPR:$b, 16), 16)), 16), +def : ARMV5TEPat<(sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))), + (i32 16)), (SMULWB GPR:$a, GPR:$b)>; -def : ARMV5TEPat<(sra (mul GPR:$a, sext_16_node:$b), 16), +def : ARMV5TEPat<(sra (mul GPR:$a, sext_16_node:$b), (i32 16)), (SMULWB GPR:$a, GPR:$b)>; def : ARMV5TEPat<(add GPR:$acc, - (mul (sra (shl GPR:$a, 16), 16), - (sra (shl GPR:$b, 16), 16))), + (mul (sra (shl GPR:$a, (i32 16)), (i32 16)), + (sra (shl GPR:$b, (i32 16)), (i32 16)))), (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; def : ARMV5TEPat<(add GPR:$acc, (mul sext_16_node:$a, sext_16_node:$b)), (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; def : ARMV5TEPat<(add GPR:$acc, - (mul (sra (shl GPR:$a, 16), 16), (sra GPR:$b, 16))), + (mul (sra (shl GPR:$a, (i32 16)), (i32 16)), + (sra GPR:$b, (i32 16)))), (SMLABT GPR:$a, GPR:$b, GPR:$acc)>; def : ARMV5TEPat<(add GPR:$acc, - (mul sext_16_node:$a, (sra GPR:$b, 16))), + (mul sext_16_node:$a, (sra GPR:$b, (i32 16)))), (SMLABT GPR:$a, GPR:$b, GPR:$acc)>; def : ARMV5TEPat<(add GPR:$acc, - (mul (sra GPR:$a, 16), (sra (shl GPR:$b, 16), 16))), + (mul (sra GPR:$a, (i32 16)), + (sra (shl GPR:$b, (i32 16)), (i32 16)))), (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; def : ARMV5TEPat<(add GPR:$acc, - (mul (sra GPR:$a, 16), sext_16_node:$b)), + (mul (sra GPR:$a, (i32 16)), sext_16_node:$b)), (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; def : ARMV5TEPat<(add GPR:$acc, - (sra (mul GPR:$a, (sra (shl GPR:$b, 16), 16)), 16)), + (sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))), + (i32 16))), (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>; def : ARMV5TEPat<(add GPR:$acc, - (sra (mul GPR:$a, sext_16_node:$b), 16)), + (sra (mul GPR:$a, sext_16_node:$b), (i32 16))), (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>; //===----------------------------------------------------------------------===// @@ -1395,3 +1442,9 @@ include "ARMInstrThumb2.td" // include "ARMInstrVFP.td" + +//===----------------------------------------------------------------------===// +// Advanced SIMD (NEON) Support +// + +include "ARMInstrNEON.td" diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td new file mode 100644 index 0000000..a62597b --- /dev/null +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -0,0 +1,1665 @@ +//===- ARMInstrNEON.td - NEON support for ARM -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the ARM NEON instruction set. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// NEON-specific DAG Nodes. +//===----------------------------------------------------------------------===// + +def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; + +def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>; +def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>; +def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>; +def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>; +def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; +def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>; + +// Types for vector shift by immediates. The "SHX" version is for long and +// narrow operations where the source and destination vectors have different +// types. The "SHINS" version is for shift and insert operations. +def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, + SDTCisVT<2, i32>]>; +def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, + SDTCisVT<2, i32>]>; +def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; + +def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>; +def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>; +def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>; +def NEONvshlls : SDNode<"ARMISD::VSHLLs", SDTARMVSHX>; +def NEONvshllu : SDNode<"ARMISD::VSHLLu", SDTARMVSHX>; +def NEONvshlli : SDNode<"ARMISD::VSHLLi", SDTARMVSHX>; +def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>; + +def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>; +def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>; +def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>; + +def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>; +def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>; +def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>; +def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>; +def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>; +def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>; + +def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>; +def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>; +def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>; + +def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>; +def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>; + +def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>, + SDTCisVT<2, i32>]>; +def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>; +def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; + +def NEONvduplaneq : SDNode<"ARMISD::VDUPLANEQ", + SDTypeProfile<1, 2, [SDTCisVT<2, i32>]>>; + +//===----------------------------------------------------------------------===// +// NEON operand definitions +//===----------------------------------------------------------------------===// + +// addrmode_neonldstm := reg +// +/* TODO: Take advantage of vldm. +def addrmode_neonldstm : Operand<i32>, + ComplexPattern<i32, 2, "SelectAddrModeNeonLdStM", []> { + let PrintMethod = "printAddrNeonLdStMOperand"; + let MIOperandInfo = (ops GPR, i32imm); +} +*/ + +//===----------------------------------------------------------------------===// +// NEON load / store instructions +//===----------------------------------------------------------------------===// + +/* TODO: Take advantage of vldm. +let mayLoad = 1 in { +def VLDMD : NI<(outs), + (ins addrmode_neonldstm:$addr, reglist:$dst1, variable_ops), + "vldm${addr:submode} ${addr:base}, $dst1", + []>; + +def VLDMS : NI<(outs), + (ins addrmode_neonldstm:$addr, reglist:$dst1, variable_ops), + "vldm${addr:submode} ${addr:base}, $dst1", + []>; +} +*/ + +// Use vldmia to load a Q register as a D register pair. +def VLDRQ : NI<(outs QPR:$dst), (ins GPR:$addr), + "vldmia $addr, ${dst:dregpair}", + [(set QPR:$dst, (v2f64 (load GPR:$addr)))]>; + +// Use vstmia to store a Q register as a D register pair. +def VSTRQ : NI<(outs), (ins QPR:$src, GPR:$addr), + "vstmia $addr, ${src:dregpair}", + [(store (v2f64 QPR:$src), GPR:$addr)]>; + + +//===----------------------------------------------------------------------===// +// NEON pattern fragments +//===----------------------------------------------------------------------===// + +// Extract D sub-registers of Q registers. +// (arm_dsubreg_0 is 5; arm_dsubreg_1 is 6) +def SubReg_i8_reg : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(5 + N->getZExtValue() / 8, MVT::i32); +}]>; +def SubReg_i16_reg : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(5 + N->getZExtValue() / 4, MVT::i32); +}]>; +def SubReg_i32_reg : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(5 + N->getZExtValue() / 2, MVT::i32); +}]>; +def SubReg_f64_reg : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(5 + N->getZExtValue(), MVT::i32); +}]>; + +// Translate lane numbers from Q registers to D subregs. +def SubReg_i8_lane : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(N->getZExtValue() & 7, MVT::i32); +}]>; +def SubReg_i16_lane : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(N->getZExtValue() & 3, MVT::i32); +}]>; +def SubReg_i32_lane : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(N->getZExtValue() & 1, MVT::i32); +}]>; + +//===----------------------------------------------------------------------===// +// Instruction Classes +//===----------------------------------------------------------------------===// + +// Basic 2-register operations, both double- and quad-register. +class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + ValueType ResTy, ValueType OpTy, SDNode OpNode> + : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), + (ins DPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "", + [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src))))]>; +class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + ValueType ResTy, ValueType OpTy, SDNode OpNode> + : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), + (ins QPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "", + [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src))))]>; + +// Basic 2-register intrinsics, both double- and quad-register. +class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), + (ins DPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "", + [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>; +class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), + (ins QPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "", + [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>; + +// Narrow 2-register intrinsics. +class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, + string OpcodeStr, ValueType TyD, ValueType TyQ, Intrinsic IntOp> + : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$dst), + (ins QPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "", + [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src))))]>; + +// Long 2-register intrinsics. (This is currently only used for VMOVL and is +// derived from N2VImm instead of N2V because of the way the size is encoded.) +class N2VLInt<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, + bit op6, bit op4, string OpcodeStr, ValueType TyQ, ValueType TyD, + Intrinsic IntOp> + : N2VImm<op24, op23, op21_16, op11_8, op7, op6, op4, (outs QPR:$dst), + (ins DPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "", + [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src))))]>; + +// Basic 3-register operations, both double- and quad-register. +class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + string OpcodeStr, ValueType ResTy, ValueType OpTy, + SDNode OpNode, bit Commutable> + : N3V<op24, op23, op21_20, op11_8, 0, op4, + (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), + !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", + [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]> { + let isCommutable = Commutable; +} +class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + string OpcodeStr, ValueType ResTy, ValueType OpTy, + SDNode OpNode, bit Commutable> + : N3V<op24, op23, op21_20, op11_8, 1, op4, + (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), + !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", + [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]> { + let isCommutable = Commutable; +} + +// Basic 3-register intrinsics, both double- and quad-register. +class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + string OpcodeStr, ValueType ResTy, ValueType OpTy, + Intrinsic IntOp, bit Commutable> + : N3V<op24, op23, op21_20, op11_8, 0, op4, + (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), + !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", + [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), (OpTy DPR:$src2))))]> { + let isCommutable = Commutable; +} +class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + string OpcodeStr, ValueType ResTy, ValueType OpTy, + Intrinsic IntOp, bit Commutable> + : N3V<op24, op23, op21_20, op11_8, 1, op4, + (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), + !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", + [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), (OpTy QPR:$src2))))]> { + let isCommutable = Commutable; +} + +// Multiply-Add/Sub operations, both double- and quad-register. +class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode OpNode> + : N3V<op24, op23, op21_20, op11_8, 0, op4, + (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), + !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", + [(set DPR:$dst, (Ty (OpNode DPR:$src1, + (Ty (MulOp DPR:$src2, DPR:$src3)))))]>; +class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode OpNode> + : N3V<op24, op23, op21_20, op11_8, 1, op4, + (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), + !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", + [(set QPR:$dst, (Ty (OpNode QPR:$src1, + (Ty (MulOp QPR:$src2, QPR:$src3)))))]>; + +// Neon 3-argument intrinsics, both double- and quad-register. +// The destination register is also used as the first source operand register. +class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + string OpcodeStr, ValueType ResTy, ValueType OpTy, + Intrinsic IntOp> + : N3V<op24, op23, op21_20, op11_8, 0, op4, + (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), + !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", + [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), + (OpTy DPR:$src2), (OpTy DPR:$src3))))]>; +class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + string OpcodeStr, ValueType ResTy, ValueType OpTy, + Intrinsic IntOp> + : N3V<op24, op23, op21_20, op11_8, 1, op4, + (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), + !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", + [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), + (OpTy QPR:$src2), (OpTy QPR:$src3))))]>; + +// Neon Long 3-argument intrinsic. The destination register is +// a quad-register and is also used as the first source operand register. +class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + string OpcodeStr, ValueType TyQ, ValueType TyD, Intrinsic IntOp> + : N3V<op24, op23, op21_20, op11_8, 0, op4, + (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), + !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", + [(set QPR:$dst, + (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2), (TyD DPR:$src3))))]>; + +// Narrowing 3-register intrinsics. +class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + string OpcodeStr, ValueType TyD, ValueType TyQ, + Intrinsic IntOp, bit Commutable> + : N3V<op24, op23, op21_20, op11_8, 0, op4, + (outs DPR:$dst), (ins QPR:$src1, QPR:$src2), + !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", + [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src1), (TyQ QPR:$src2))))]> { + let isCommutable = Commutable; +} + +// Long 3-register intrinsics. +class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + string OpcodeStr, ValueType TyQ, ValueType TyD, + Intrinsic IntOp, bit Commutable> + : N3V<op24, op23, op21_20, op11_8, 0, op4, + (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), + !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", + [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src1), (TyD DPR:$src2))))]> { + let isCommutable = Commutable; +} + +// Wide 3-register intrinsics. +class N3VWInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + string OpcodeStr, ValueType TyQ, ValueType TyD, + Intrinsic IntOp, bit Commutable> + : N3V<op24, op23, op21_20, op11_8, 0, op4, + (outs QPR:$dst), (ins QPR:$src1, DPR:$src2), + !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", + [(set QPR:$dst, (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2))))]> { + let isCommutable = Commutable; +} + +// Pairwise long 2-register intrinsics, both double- and quad-register. +class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), + (ins DPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "", + [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>; +class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), + (ins QPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "", + [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>; + +// Pairwise long 2-register accumulate intrinsics, +// both double- and quad-register. +// The destination register is also used as the first source operand register. +class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, + (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), + !strconcat(OpcodeStr, "\t$dst, $src2"), "$src1 = $dst", + [(set DPR:$dst, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$src2))))]>; +class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, + (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), + !strconcat(OpcodeStr, "\t$dst, $src2"), "$src1 = $dst", + [(set QPR:$dst, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$src2))))]>; + +// Shift by immediate, +// both double- and quad-register. +class N2VDSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, + bit op4, string OpcodeStr, ValueType Ty, SDNode OpNode> + : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4, + (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), + !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", + [(set DPR:$dst, (Ty (OpNode (Ty DPR:$src), (i32 imm:$SIMM))))]>; +class N2VQSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, + bit op4, string OpcodeStr, ValueType Ty, SDNode OpNode> + : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4, + (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), + !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", + [(set QPR:$dst, (Ty (OpNode (Ty QPR:$src), (i32 imm:$SIMM))))]>; + +// Long shift by immediate. +class N2VLSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, + bit op6, bit op4, string OpcodeStr, ValueType ResTy, + ValueType OpTy, SDNode OpNode> + : N2VImm<op24, op23, op21_16, op11_8, op7, op6, op4, + (outs QPR:$dst), (ins DPR:$src, i32imm:$SIMM), + !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", + [(set QPR:$dst, (ResTy (OpNode (OpTy DPR:$src), + (i32 imm:$SIMM))))]>; + +// Narrow shift by immediate. +class N2VNSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, + bit op6, bit op4, string OpcodeStr, ValueType ResTy, + ValueType OpTy, SDNode OpNode> + : N2VImm<op24, op23, op21_16, op11_8, op7, op6, op4, + (outs DPR:$dst), (ins QPR:$src, i32imm:$SIMM), + !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", + [(set DPR:$dst, (ResTy (OpNode (OpTy QPR:$src), + (i32 imm:$SIMM))))]>; + +// Shift right by immediate and accumulate, +// both double- and quad-register. +class N2VDShAdd<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, + bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> + : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4, + (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), + !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", + [(set DPR:$dst, (Ty (add DPR:$src1, + (Ty (ShOp DPR:$src2, (i32 imm:$SIMM))))))]>; +class N2VQShAdd<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, + bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> + : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4, + (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), + !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", + [(set QPR:$dst, (Ty (add QPR:$src1, + (Ty (ShOp QPR:$src2, (i32 imm:$SIMM))))))]>; + +// Shift by immediate and insert, +// both double- and quad-register. +class N2VDShIns<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, + bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> + : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4, + (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), + !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", + [(set DPR:$dst, (Ty (ShOp DPR:$src1, DPR:$src2, (i32 imm:$SIMM))))]>; +class N2VQShIns<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, + bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> + : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4, + (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), + !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", + [(set QPR:$dst, (Ty (ShOp QPR:$src1, QPR:$src2, (i32 imm:$SIMM))))]>; + +// Convert, with fractional bits immediate, +// both double- and quad-register. +class N2VCvtD<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, + bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, + Intrinsic IntOp> + : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4, + (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), + !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", + [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src), (i32 imm:$SIMM))))]>; +class N2VCvtQ<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, + bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, + Intrinsic IntOp> + : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4, + (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), + !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", + [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src), (i32 imm:$SIMM))))]>; + +//===----------------------------------------------------------------------===// +// Multiclasses +//===----------------------------------------------------------------------===// + +// Neon 3-register vector operations. + +// First with only element sizes of 8, 16 and 32 bits: +multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + string OpcodeStr, SDNode OpNode, bit Commutable = 0> { + // 64-bit vector types. + def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr, "8"), + v8i8, v8i8, OpNode, Commutable>; + def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr, "16"), + v4i16, v4i16, OpNode, Commutable>; + def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr, "32"), + v2i32, v2i32, OpNode, Commutable>; + + // 128-bit vector types. + def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr, "8"), + v16i8, v16i8, OpNode, Commutable>; + def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr, "16"), + v8i16, v8i16, OpNode, Commutable>; + def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr, "32"), + v4i32, v4i32, OpNode, Commutable>; +} + +// ....then also with element size 64 bits: +multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, + string OpcodeStr, SDNode OpNode, bit Commutable = 0> + : N3V_QHS<op24, op23, op11_8, op4, OpcodeStr, OpNode, Commutable> { + def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, !strconcat(OpcodeStr, "64"), + v1i64, v1i64, OpNode, Commutable>; + def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, !strconcat(OpcodeStr, "64"), + v2i64, v2i64, OpNode, Commutable>; +} + + +// Neon Narrowing 2-register vector intrinsics, +// source operand element sizes of 16, 32 and 64 bits: +multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, + bits<5> op11_7, bit op6, bit op4, string OpcodeStr, + Intrinsic IntOp> { + def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, + !strconcat(OpcodeStr, "16"), v8i8, v8i16, IntOp>; + def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, + !strconcat(OpcodeStr, "32"), v4i16, v4i32, IntOp>; + def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, + !strconcat(OpcodeStr, "64"), v2i32, v2i64, IntOp>; +} + + +// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). +// source operand element sizes of 16, 32 and 64 bits: +multiclass N2VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, + bit op4, string OpcodeStr, Intrinsic IntOp> { + def v8i16 : N2VLInt<op24, op23, 0b001000, op11_8, op7, op6, op4, + !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp>; + def v4i32 : N2VLInt<op24, op23, 0b010000, op11_8, op7, op6, op4, + !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>; + def v2i64 : N2VLInt<op24, op23, 0b100000, op11_8, op7, op6, op4, + !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>; +} + + +// Neon 3-register vector intrinsics. + +// First with only element sizes of 16 and 32 bits: +multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, + string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> { + // 64-bit vector types. + def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr,"16"), + v4i16, v4i16, IntOp, Commutable>; + def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr,"32"), + v2i32, v2i32, IntOp, Commutable>; + + // 128-bit vector types. + def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr,"16"), + v8i16, v8i16, IntOp, Commutable>; + def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr,"32"), + v4i32, v4i32, IntOp, Commutable>; +} + +// ....then also with element size of 8 bits: +multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> + : N3VInt_HS<op24, op23, op11_8, op4, OpcodeStr, IntOp, Commutable> { + def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr, "8"), + v8i8, v8i8, IntOp, Commutable>; + def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr, "8"), + v16i8, v16i8, IntOp, Commutable>; +} + +// ....then also with element size of 64 bits: +multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, + string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> + : N3VInt_QHS<op24, op23, op11_8, op4, OpcodeStr, IntOp, Commutable> { + def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, !strconcat(OpcodeStr,"64"), + v1i64, v1i64, IntOp, Commutable>; + def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, !strconcat(OpcodeStr,"64"), + v2i64, v2i64, IntOp, Commutable>; +} + + +// Neon Narrowing 3-register vector intrinsics, +// source operand element sizes of 16, 32 and 64 bits: +multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, + string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> { + def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr,"16"), + v8i8, v8i16, IntOp, Commutable>; + def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr,"32"), + v4i16, v4i32, IntOp, Commutable>; + def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr,"64"), + v2i32, v2i64, IntOp, Commutable>; +} + + +// Neon Long 3-register vector intrinsics. + +// First with only element sizes of 16 and 32 bits: +multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, + string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> { + def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr,"16"), + v4i32, v4i16, IntOp, Commutable>; + def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr,"32"), + v2i64, v2i32, IntOp, Commutable>; +} + +// ....then also with element size of 8 bits: +multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> + : N3VLInt_HS<op24, op23, op11_8, op4, OpcodeStr, IntOp, Commutable> { + def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr, "8"), + v8i16, v8i8, IntOp, Commutable>; +} + + +// Neon Wide 3-register vector intrinsics, +// source operand element sizes of 8, 16 and 32 bits: +multiclass N3VWInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> { + def v8i16 : N3VWInt<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr, "8"), + v8i16, v8i8, IntOp, Commutable>; + def v4i32 : N3VWInt<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr,"16"), + v4i32, v4i16, IntOp, Commutable>; + def v2i64 : N3VWInt<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr,"32"), + v2i64, v2i32, IntOp, Commutable>; +} + + +// Neon Multiply-Op vector operations, +// element sizes of 8, 16 and 32 bits: +multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + string OpcodeStr, SDNode OpNode> { + // 64-bit vector types. + def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, + !strconcat(OpcodeStr, "8"), v8i8, mul, OpNode>; + def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, + !strconcat(OpcodeStr, "16"), v4i16, mul, OpNode>; + def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, + !strconcat(OpcodeStr, "32"), v2i32, mul, OpNode>; + + // 128-bit vector types. + def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, + !strconcat(OpcodeStr, "8"), v16i8, mul, OpNode>; + def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, + !strconcat(OpcodeStr, "16"), v8i16, mul, OpNode>; + def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, + !strconcat(OpcodeStr, "32"), v4i32, mul, OpNode>; +} + + +// Neon 3-argument intrinsics, +// element sizes of 8, 16 and 32 bits: +multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + string OpcodeStr, Intrinsic IntOp> { + // 64-bit vector types. + def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, + !strconcat(OpcodeStr, "8"), v8i8, v8i8, IntOp>; + def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, + !strconcat(OpcodeStr, "16"), v4i16, v4i16, IntOp>; + def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, + !strconcat(OpcodeStr, "32"), v2i32, v2i32, IntOp>; + + // 128-bit vector types. + def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, + !strconcat(OpcodeStr, "8"), v16i8, v16i8, IntOp>; + def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, + !strconcat(OpcodeStr, "16"), v8i16, v8i16, IntOp>; + def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, + !strconcat(OpcodeStr, "32"), v4i32, v4i32, IntOp>; +} + + +// Neon Long 3-argument intrinsics. + +// First with only element sizes of 16 and 32 bits: +multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, + string OpcodeStr, Intrinsic IntOp> { + def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, + !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>; + def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, + !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>; +} + +// ....then also with element size of 8 bits: +multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + string OpcodeStr, Intrinsic IntOp> + : N3VLInt3_HS<op24, op23, op11_8, op4, OpcodeStr, IntOp> { + def v8i16 : N3VLInt3<op24, op23, 0b01, op11_8, op4, + !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp>; +} + + +// Neon 2-register vector intrinsics, +// element sizes of 8, 16 and 32 bits: +multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, + bits<5> op11_7, bit op4, string OpcodeStr, + Intrinsic IntOp> { + // 64-bit vector types. + def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, + !strconcat(OpcodeStr, "8"), v8i8, v8i8, IntOp>; + def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, + !strconcat(OpcodeStr, "16"), v4i16, v4i16, IntOp>; + def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, + !strconcat(OpcodeStr, "32"), v2i32, v2i32, IntOp>; + + // 128-bit vector types. + def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, + !strconcat(OpcodeStr, "8"), v16i8, v16i8, IntOp>; + def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, + !strconcat(OpcodeStr, "16"), v8i16, v8i16, IntOp>; + def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, + !strconcat(OpcodeStr, "32"), v4i32, v4i32, IntOp>; +} + + +// Neon Pairwise long 2-register intrinsics, +// element sizes of 8, 16 and 32 bits: +multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, + bits<5> op11_7, bit op4, + string OpcodeStr, Intrinsic IntOp> { + // 64-bit vector types. + def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, + !strconcat(OpcodeStr, "8"), v4i16, v8i8, IntOp>; + def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, + !strconcat(OpcodeStr, "16"), v2i32, v4i16, IntOp>; + def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, + !strconcat(OpcodeStr, "32"), v1i64, v2i32, IntOp>; + + // 128-bit vector types. + def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, + !strconcat(OpcodeStr, "8"), v8i16, v16i8, IntOp>; + def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, + !strconcat(OpcodeStr, "16"), v4i32, v8i16, IntOp>; + def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, + !strconcat(OpcodeStr, "32"), v2i64, v4i32, IntOp>; +} + + +// Neon Pairwise long 2-register accumulate intrinsics, +// element sizes of 8, 16 and 32 bits: +multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, + bits<5> op11_7, bit op4, + string OpcodeStr, Intrinsic IntOp> { + // 64-bit vector types. + def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, + !strconcat(OpcodeStr, "8"), v4i16, v8i8, IntOp>; + def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, + !strconcat(OpcodeStr, "16"), v2i32, v4i16, IntOp>; + def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, + !strconcat(OpcodeStr, "32"), v1i64, v2i32, IntOp>; + + // 128-bit vector types. + def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, + !strconcat(OpcodeStr, "8"), v8i16, v16i8, IntOp>; + def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, + !strconcat(OpcodeStr, "16"), v4i32, v8i16, IntOp>; + def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, + !strconcat(OpcodeStr, "32"), v2i64, v4i32, IntOp>; +} + + +// Neon 2-register vector shift by immediate, +// element sizes of 8, 16, 32 and 64 bits: +multiclass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, + string OpcodeStr, SDNode OpNode> { + // 64-bit vector types. + def v8i8 : N2VDSh<op24, op23, 0b001000, op11_8, 0, op4, + !strconcat(OpcodeStr, "8"), v8i8, OpNode>; + def v4i16 : N2VDSh<op24, op23, 0b010000, op11_8, 0, op4, + !strconcat(OpcodeStr, "16"), v4i16, OpNode>; + def v2i32 : N2VDSh<op24, op23, 0b100000, op11_8, 0, op4, + !strconcat(OpcodeStr, "32"), v2i32, OpNode>; + def v1i64 : N2VDSh<op24, op23, 0b000000, op11_8, 1, op4, + !strconcat(OpcodeStr, "64"), v1i64, OpNode>; + + // 128-bit vector types. + def v16i8 : N2VQSh<op24, op23, 0b001000, op11_8, 0, op4, + !strconcat(OpcodeStr, "8"), v16i8, OpNode>; + def v8i16 : N2VQSh<op24, op23, 0b010000, op11_8, 0, op4, + !strconcat(OpcodeStr, "16"), v8i16, OpNode>; + def v4i32 : N2VQSh<op24, op23, 0b100000, op11_8, 0, op4, + !strconcat(OpcodeStr, "32"), v4i32, OpNode>; + def v2i64 : N2VQSh<op24, op23, 0b000000, op11_8, 1, op4, + !strconcat(OpcodeStr, "64"), v2i64, OpNode>; +} + + +// Neon Shift-Accumulate vector operations, +// element sizes of 8, 16, 32 and 64 bits: +multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, + string OpcodeStr, SDNode ShOp> { + // 64-bit vector types. + def v8i8 : N2VDShAdd<op24, op23, 0b001000, op11_8, 0, op4, + !strconcat(OpcodeStr, "8"), v8i8, ShOp>; + def v4i16 : N2VDShAdd<op24, op23, 0b010000, op11_8, 0, op4, + !strconcat(OpcodeStr, "16"), v4i16, ShOp>; + def v2i32 : N2VDShAdd<op24, op23, 0b100000, op11_8, 0, op4, + !strconcat(OpcodeStr, "32"), v2i32, ShOp>; + def v1i64 : N2VDShAdd<op24, op23, 0b000000, op11_8, 1, op4, + !strconcat(OpcodeStr, "64"), v1i64, ShOp>; + + // 128-bit vector types. + def v16i8 : N2VQShAdd<op24, op23, 0b001000, op11_8, 0, op4, + !strconcat(OpcodeStr, "8"), v16i8, ShOp>; + def v8i16 : N2VQShAdd<op24, op23, 0b010000, op11_8, 0, op4, + !strconcat(OpcodeStr, "16"), v8i16, ShOp>; + def v4i32 : N2VQShAdd<op24, op23, 0b100000, op11_8, 0, op4, + !strconcat(OpcodeStr, "32"), v4i32, ShOp>; + def v2i64 : N2VQShAdd<op24, op23, 0b000000, op11_8, 1, op4, + !strconcat(OpcodeStr, "64"), v2i64, ShOp>; +} + + +// Neon Shift-Insert vector operations, +// element sizes of 8, 16, 32 and 64 bits: +multiclass N2VShIns_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, + string OpcodeStr, SDNode ShOp> { + // 64-bit vector types. + def v8i8 : N2VDShIns<op24, op23, 0b001000, op11_8, 0, op4, + !strconcat(OpcodeStr, "8"), v8i8, ShOp>; + def v4i16 : N2VDShIns<op24, op23, 0b010000, op11_8, 0, op4, + !strconcat(OpcodeStr, "16"), v4i16, ShOp>; + def v2i32 : N2VDShIns<op24, op23, 0b100000, op11_8, 0, op4, + !strconcat(OpcodeStr, "32"), v2i32, ShOp>; + def v1i64 : N2VDShIns<op24, op23, 0b000000, op11_8, 1, op4, + !strconcat(OpcodeStr, "64"), v1i64, ShOp>; + + // 128-bit vector types. + def v16i8 : N2VQShIns<op24, op23, 0b001000, op11_8, 0, op4, + !strconcat(OpcodeStr, "8"), v16i8, ShOp>; + def v8i16 : N2VQShIns<op24, op23, 0b010000, op11_8, 0, op4, + !strconcat(OpcodeStr, "16"), v8i16, ShOp>; + def v4i32 : N2VQShIns<op24, op23, 0b100000, op11_8, 0, op4, + !strconcat(OpcodeStr, "32"), v4i32, ShOp>; + def v2i64 : N2VQShIns<op24, op23, 0b000000, op11_8, 1, op4, + !strconcat(OpcodeStr, "64"), v2i64, ShOp>; +} + +//===----------------------------------------------------------------------===// +// Instruction Definitions. +//===----------------------------------------------------------------------===// + +// Vector Add Operations. + +// VADD : Vector Add (integer and floating-point) +defm VADD : N3V_QHSD<0, 0, 0b1000, 0, "vadd.i", add, 1>; +def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, "vadd.f32", v2f32, v2f32, fadd, 1>; +def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, "vadd.f32", v4f32, v4f32, fadd, 1>; +// VADDL : Vector Add Long (Q = D + D) +defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, "vaddl.s", int_arm_neon_vaddls, 1>; +defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, "vaddl.u", int_arm_neon_vaddlu, 1>; +// VADDW : Vector Add Wide (Q = Q + D) +defm VADDWs : N3VWInt_QHS<0,1,0b0001,0, "vaddw.s", int_arm_neon_vaddws, 0>; +defm VADDWu : N3VWInt_QHS<1,1,0b0001,0, "vaddw.u", int_arm_neon_vaddwu, 0>; +// VHADD : Vector Halving Add +defm VHADDs : N3VInt_QHS<0,0,0b0000,0, "vhadd.s", int_arm_neon_vhadds, 1>; +defm VHADDu : N3VInt_QHS<1,0,0b0000,0, "vhadd.u", int_arm_neon_vhaddu, 1>; +// VRHADD : Vector Rounding Halving Add +defm VRHADDs : N3VInt_QHS<0,0,0b0001,0, "vrhadd.s", int_arm_neon_vrhadds, 1>; +defm VRHADDu : N3VInt_QHS<1,0,0b0001,0, "vrhadd.u", int_arm_neon_vrhaddu, 1>; +// VQADD : Vector Saturating Add +defm VQADDs : N3VInt_QHSD<0,0,0b0000,1, "vqadd.s", int_arm_neon_vqadds, 1>; +defm VQADDu : N3VInt_QHSD<1,0,0b0000,1, "vqadd.u", int_arm_neon_vqaddu, 1>; +// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) +defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn.i", int_arm_neon_vaddhn, 1>; +// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) +defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn.i", int_arm_neon_vraddhn, 1>; + +// Vector Multiply Operations. + +// VMUL : Vector Multiply (integer, polynomial and floating-point) +defm VMUL : N3V_QHS<0, 0, 0b1001, 1, "vmul.i", mul, 1>; +def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, "vmul.p8", v8i8, v8i8, + int_arm_neon_vmulp, 1>; +def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, "vmul.p8", v16i8, v16i8, + int_arm_neon_vmulp, 1>; +def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, "vmul.f32", v2f32, v2f32, fmul, 1>; +def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, "vmul.f32", v4f32, v4f32, fmul, 1>; +// VQDMULH : Vector Saturating Doubling Multiply Returning High Half +defm VQDMULH : N3VInt_HS<0,0,0b1011,0, "vqdmulh.s", int_arm_neon_vqdmulh, 1>; +// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half +defm VQRDMULH : N3VInt_HS<1,0,0b1011,0, "vqrdmulh.s", int_arm_neon_vqrdmulh, 1>; +// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) +defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, "vmull.s", int_arm_neon_vmulls, 1>; +defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, "vmull.u", int_arm_neon_vmullu, 1>; +def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, "vmull.p8", v8i16, v8i8, + int_arm_neon_vmullp, 1>; +// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) +defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, "vqdmull.s", int_arm_neon_vqdmull, 1>; + +// Vector Multiply-Accumulate and Multiply-Subtract Operations. + +// VMLA : Vector Multiply Accumulate (integer and floating-point) +defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, "vmla.i", add>; +def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, "vmla.f32", v2f32, fmul, fadd>; +def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, "vmla.f32", v4f32, fmul, fadd>; +// VMLAL : Vector Multiply Accumulate Long (Q += D * D) +defm VMLALs : N3VLInt3_QHS<0,1,0b1000,0, "vmlal.s", int_arm_neon_vmlals>; +defm VMLALu : N3VLInt3_QHS<1,1,0b1000,0, "vmlal.u", int_arm_neon_vmlalu>; +// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) +defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, "vqdmlal.s", int_arm_neon_vqdmlal>; +// VMLS : Vector Multiply Subtract (integer and floating-point) +defm VMLS : N3VMulOp_QHS<0, 0, 0b1001, 0, "vmls.i", sub>; +def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, "vmls.f32", v2f32, fmul, fsub>; +def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, "vmls.f32", v4f32, fmul, fsub>; +// VMLSL : Vector Multiply Subtract Long (Q -= D * D) +defm VMLSLs : N3VLInt3_QHS<0,1,0b1010,0, "vmlsl.s", int_arm_neon_vmlsls>; +defm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, "vmlsl.u", int_arm_neon_vmlslu>; +// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) +defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, "vqdmlsl.s", int_arm_neon_vqdmlsl>; + +// Vector Subtract Operations. + +// VSUB : Vector Subtract (integer and floating-point) +defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, "vsub.i", sub, 0>; +def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, "vsub.f32", v2f32, v2f32, fsub, 0>; +def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, "vsub.f32", v4f32, v4f32, fsub, 0>; +// VSUBL : Vector Subtract Long (Q = D - D) +defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, "vsubl.s", int_arm_neon_vsubls, 1>; +defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, "vsubl.u", int_arm_neon_vsublu, 1>; +// VSUBW : Vector Subtract Wide (Q = Q - D) +defm VSUBWs : N3VWInt_QHS<0,1,0b0011,0, "vsubw.s", int_arm_neon_vsubws, 0>; +defm VSUBWu : N3VWInt_QHS<1,1,0b0011,0, "vsubw.u", int_arm_neon_vsubwu, 0>; +// VHSUB : Vector Halving Subtract +defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, "vhsub.s", int_arm_neon_vhsubs, 0>; +defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, "vhsub.u", int_arm_neon_vhsubu, 0>; +// VQSUB : Vector Saturing Subtract +defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, "vqsub.s", int_arm_neon_vqsubs, 0>; +defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, "vqsub.u", int_arm_neon_vqsubu, 0>; +// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) +defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn.i", int_arm_neon_vsubhn, 0>; +// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) +defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn.i", int_arm_neon_vrsubhn, 0>; + +// Vector Comparisons. + +// VCEQ : Vector Compare Equal +defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, "vceq.i", NEONvceq, 1>; +def VCEQfd : N3VD<0,0,0b00,0b1110,0, "vceq.f32", v2i32, v2f32, NEONvceq, 1>; +def VCEQfq : N3VQ<0,0,0b00,0b1110,0, "vceq.f32", v4i32, v4f32, NEONvceq, 1>; +// VCGE : Vector Compare Greater Than or Equal +defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, "vcge.s", NEONvcge, 0>; +defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, "vcge.u", NEONvcgeu, 0>; +def VCGEfd : N3VD<1,0,0b00,0b1110,0, "vcge.f32", v2i32, v2f32, NEONvcge, 0>; +def VCGEfq : N3VQ<1,0,0b00,0b1110,0, "vcge.f32", v4i32, v4f32, NEONvcge, 0>; +// VCGT : Vector Compare Greater Than +defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, "vcgt.s", NEONvcgt, 0>; +defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, "vcgt.u", NEONvcgtu, 0>; +def VCGTfd : N3VD<1,0,0b10,0b1110,0, "vcgt.f32", v2i32, v2f32, NEONvcgt, 0>; +def VCGTfq : N3VQ<1,0,0b10,0b1110,0, "vcgt.f32", v4i32, v4f32, NEONvcgt, 0>; +// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) +def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, "vacge.f32", v2i32, v2f32, + int_arm_neon_vacged, 0>; +def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, "vacge.f32", v4i32, v4f32, + int_arm_neon_vacgeq, 0>; +// VACGT : Vector Absolute Compare Greater Than (aka VCAGT) +def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, "vacgt.f32", v2i32, v2f32, + int_arm_neon_vacgtd, 0>; +def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, "vacgt.f32", v4i32, v4f32, + int_arm_neon_vacgtq, 0>; +// VTST : Vector Test Bits +defm VTST : N3V_QHS<0, 0, 0b1000, 1, "vtst.i", NEONvtst, 1>; + +// Vector Bitwise Operations. + +// VAND : Vector Bitwise AND +def VANDd : N3VD<0, 0, 0b00, 0b0001, 1, "vand", v2i32, v2i32, and, 1>; +def VANDq : N3VQ<0, 0, 0b00, 0b0001, 1, "vand", v4i32, v4i32, and, 1>; + +// VEOR : Vector Bitwise Exclusive OR +def VEORd : N3VD<1, 0, 0b00, 0b0001, 1, "veor", v2i32, v2i32, xor, 1>; +def VEORq : N3VQ<1, 0, 0b00, 0b0001, 1, "veor", v4i32, v4i32, xor, 1>; + +// VORR : Vector Bitwise OR +def VORRd : N3VD<0, 0, 0b10, 0b0001, 1, "vorr", v2i32, v2i32, or, 1>; +def VORRq : N3VQ<0, 0, 0b10, 0b0001, 1, "vorr", v4i32, v4i32, or, 1>; + +// VBIC : Vector Bitwise Bit Clear (AND NOT) +def VBICd : N3V<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), + (ins DPR:$src1, DPR:$src2), "vbic\t$dst, $src1, $src2", "", + [(set DPR:$dst, (v2i32 (and DPR:$src1,(vnot DPR:$src2))))]>; +def VBICq : N3V<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), + (ins QPR:$src1, QPR:$src2), "vbic\t$dst, $src1, $src2", "", + [(set QPR:$dst, (v4i32 (and QPR:$src1,(vnot QPR:$src2))))]>; + +// VORN : Vector Bitwise OR NOT +def VORNd : N3V<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst), + (ins DPR:$src1, DPR:$src2), "vorn\t$dst, $src1, $src2", "", + [(set DPR:$dst, (v2i32 (or DPR:$src1, (vnot DPR:$src2))))]>; +def VORNq : N3V<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst), + (ins QPR:$src1, QPR:$src2), "vorn\t$dst, $src1, $src2", "", + [(set QPR:$dst, (v4i32 (or QPR:$src1, (vnot QPR:$src2))))]>; + +// VMVN : Vector Bitwise NOT +def VMVNd : N2V<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, + (outs DPR:$dst), (ins DPR:$src), "vmvn\t$dst, $src", "", + [(set DPR:$dst, (v2i32 (vnot DPR:$src)))]>; +def VMVNq : N2V<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, + (outs QPR:$dst), (ins QPR:$src), "vmvn\t$dst, $src", "", + [(set QPR:$dst, (v4i32 (vnot QPR:$src)))]>; +def : Pat<(v2i32 (vnot_conv DPR:$src)), (VMVNd DPR:$src)>; +def : Pat<(v4i32 (vnot_conv QPR:$src)), (VMVNq QPR:$src)>; + +// VBSL : Vector Bitwise Select +def VBSLd : N3V<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), + (ins DPR:$src1, DPR:$src2, DPR:$src3), + "vbsl\t$dst, $src2, $src3", "$src1 = $dst", + [(set DPR:$dst, + (v2i32 (or (and DPR:$src2, DPR:$src1), + (and DPR:$src3, (vnot DPR:$src1)))))]>; +def VBSLq : N3V<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), + (ins QPR:$src1, QPR:$src2, QPR:$src3), + "vbsl\t$dst, $src2, $src3", "$src1 = $dst", + [(set QPR:$dst, + (v4i32 (or (and QPR:$src2, QPR:$src1), + (and QPR:$src3, (vnot QPR:$src1)))))]>; + +// VBIF : Vector Bitwise Insert if False +// like VBSL but with: "vbif\t$dst, $src3, $src1", "$src2 = $dst", +// VBIT : Vector Bitwise Insert if True +// like VBSL but with: "vbit\t$dst, $src2, $src1", "$src3 = $dst", +// These are not yet implemented. The TwoAddress pass will not go looking +// for equivalent operations with different register constraints; it just +// inserts copies. + +// Vector Absolute Differences. + +// VABD : Vector Absolute Difference +defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, "vabd.s", int_arm_neon_vabds, 0>; +defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, "vabd.u", int_arm_neon_vabdu, 0>; +def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, "vabd.f32", v2f32, v2f32, + int_arm_neon_vabdf, 0>; +def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, "vabd.f32", v4f32, v4f32, + int_arm_neon_vabdf, 0>; + +// VABDL : Vector Absolute Difference Long (Q = | D - D |) +defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, "vabdl.s", int_arm_neon_vabdls, 0>; +defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, "vabdl.u", int_arm_neon_vabdlu, 0>; + +// VABA : Vector Absolute Difference and Accumulate +defm VABAs : N3VInt3_QHS<0,1,0b0101,0, "vaba.s", int_arm_neon_vabas>; +defm VABAu : N3VInt3_QHS<1,1,0b0101,0, "vaba.u", int_arm_neon_vabau>; + +// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) +defm VABALs : N3VLInt3_QHS<0,1,0b0101,0, "vabal.s", int_arm_neon_vabals>; +defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, "vabal.u", int_arm_neon_vabalu>; + +// Vector Maximum and Minimum. + +// VMAX : Vector Maximum +defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, "vmax.s", int_arm_neon_vmaxs, 1>; +defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, "vmax.u", int_arm_neon_vmaxu, 1>; +def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, "vmax.f32", v2f32, v2f32, + int_arm_neon_vmaxf, 1>; +def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, "vmax.f32", v4f32, v4f32, + int_arm_neon_vmaxf, 1>; + +// VMIN : Vector Minimum +defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, "vmin.s", int_arm_neon_vmins, 1>; +defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, "vmin.u", int_arm_neon_vminu, 1>; +def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, "vmin.f32", v2f32, v2f32, + int_arm_neon_vminf, 1>; +def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, "vmin.f32", v4f32, v4f32, + int_arm_neon_vminf, 1>; + +// Vector Pairwise Operations. + +// VPADD : Vector Pairwise Add +def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, "vpadd.i8", v8i8, v8i8, + int_arm_neon_vpaddi, 0>; +def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, "vpadd.i16", v4i16, v4i16, + int_arm_neon_vpaddi, 0>; +def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, "vpadd.i32", v2i32, v2i32, + int_arm_neon_vpaddi, 0>; +def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, "vpadd.f32", v2f32, v2f32, + int_arm_neon_vpaddf, 0>; + +// VPADDL : Vector Pairwise Add Long +defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl.s", + int_arm_neon_vpaddls>; +defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl.u", + int_arm_neon_vpaddlu>; + +// VPADAL : Vector Pairwise Add and Accumulate Long +defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpadal.s", + int_arm_neon_vpadals>; +defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpadal.u", + int_arm_neon_vpadalu>; + +// VPMAX : Vector Pairwise Maximum +def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, "vpmax.s8", v8i8, v8i8, + int_arm_neon_vpmaxs, 0>; +def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, "vpmax.s16", v4i16, v4i16, + int_arm_neon_vpmaxs, 0>; +def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, "vpmax.s32", v2i32, v2i32, + int_arm_neon_vpmaxs, 0>; +def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, "vpmax.u8", v8i8, v8i8, + int_arm_neon_vpmaxu, 0>; +def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, "vpmax.u16", v4i16, v4i16, + int_arm_neon_vpmaxu, 0>; +def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, "vpmax.u32", v2i32, v2i32, + int_arm_neon_vpmaxu, 0>; +def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, "vpmax.f32", v2f32, v2f32, + int_arm_neon_vpmaxf, 0>; + +// VPMIN : Vector Pairwise Minimum +def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, "vpmin.s8", v8i8, v8i8, + int_arm_neon_vpmins, 0>; +def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, "vpmin.s16", v4i16, v4i16, + int_arm_neon_vpmins, 0>; +def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, "vpmin.s32", v2i32, v2i32, + int_arm_neon_vpmins, 0>; +def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, "vpmin.u8", v8i8, v8i8, + int_arm_neon_vpminu, 0>; +def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, "vpmin.u16", v4i16, v4i16, + int_arm_neon_vpminu, 0>; +def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, "vpmin.u32", v2i32, v2i32, + int_arm_neon_vpminu, 0>; +def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, "vpmin.f32", v2f32, v2f32, + int_arm_neon_vpminf, 0>; + +// Vector Reciprocal and Reciprocal Square Root Estimate and Step. + +// VRECPE : Vector Reciprocal Estimate +def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, "vrecpe.u32", + v2i32, v2i32, int_arm_neon_vrecpe>; +def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, "vrecpe.u32", + v4i32, v4i32, int_arm_neon_vrecpe>; +def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, "vrecpe.f32", + v2f32, v2f32, int_arm_neon_vrecpef>; +def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, "vrecpe.f32", + v4f32, v4f32, int_arm_neon_vrecpef>; + +// VRECPS : Vector Reciprocal Step +def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, "vrecps.f32", v2f32, v2f32, + int_arm_neon_vrecps, 1>; +def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, "vrecps.f32", v4f32, v4f32, + int_arm_neon_vrecps, 1>; + +// VRSQRTE : Vector Reciprocal Square Root Estimate +def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, "vrsqrte.u32", + v2i32, v2i32, int_arm_neon_vrsqrte>; +def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, "vrsqrte.u32", + v4i32, v4i32, int_arm_neon_vrsqrte>; +def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, "vrsqrte.f32", + v2f32, v2f32, int_arm_neon_vrsqrtef>; +def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, "vrsqrte.f32", + v4f32, v4f32, int_arm_neon_vrsqrtef>; + +// VRSQRTS : Vector Reciprocal Square Root Step +def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, "vrsqrts.f32", v2f32, v2f32, + int_arm_neon_vrsqrts, 1>; +def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, "vrsqrts.f32", v4f32, v4f32, + int_arm_neon_vrsqrts, 1>; + +// Vector Shifts. + +// VSHL : Vector Shift +defm VSHLs : N3VInt_QHSD<0, 0, 0b0100, 0, "vshl.s", int_arm_neon_vshifts, 0>; +defm VSHLu : N3VInt_QHSD<1, 0, 0b0100, 0, "vshl.u", int_arm_neon_vshiftu, 0>; +// VSHL : Vector Shift Left (Immediate) +defm VSHLi : N2VSh_QHSD<0, 1, 0b0111, 1, "vshl.i", NEONvshl>; +// VSHR : Vector Shift Right (Immediate) +defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, "vshr.s", NEONvshrs>; +defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, "vshr.u", NEONvshru>; + +// VSHLL : Vector Shift Left Long +def VSHLLs8 : N2VLSh<0, 1, 0b001000, 0b1010, 0, 0, 1, "vshll.s8", + v8i16, v8i8, NEONvshlls>; +def VSHLLs16 : N2VLSh<0, 1, 0b010000, 0b1010, 0, 0, 1, "vshll.s16", + v4i32, v4i16, NEONvshlls>; +def VSHLLs32 : N2VLSh<0, 1, 0b100000, 0b1010, 0, 0, 1, "vshll.s32", + v2i64, v2i32, NEONvshlls>; +def VSHLLu8 : N2VLSh<1, 1, 0b001000, 0b1010, 0, 0, 1, "vshll.u8", + v8i16, v8i8, NEONvshllu>; +def VSHLLu16 : N2VLSh<1, 1, 0b010000, 0b1010, 0, 0, 1, "vshll.u16", + v4i32, v4i16, NEONvshllu>; +def VSHLLu32 : N2VLSh<1, 1, 0b100000, 0b1010, 0, 0, 1, "vshll.u32", + v2i64, v2i32, NEONvshllu>; + +// VSHLL : Vector Shift Left Long (with maximum shift count) +def VSHLLi8 : N2VLSh<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll.i8", + v8i16, v8i8, NEONvshlli>; +def VSHLLi16 : N2VLSh<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll.i16", + v4i32, v4i16, NEONvshlli>; +def VSHLLi32 : N2VLSh<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll.i32", + v2i64, v2i32, NEONvshlli>; + +// VSHRN : Vector Shift Right and Narrow +def VSHRN16 : N2VNSh<0, 1, 0b001000, 0b1000, 0, 0, 1, "vshrn.i16", + v8i8, v8i16, NEONvshrn>; +def VSHRN32 : N2VNSh<0, 1, 0b010000, 0b1000, 0, 0, 1, "vshrn.i32", + v4i16, v4i32, NEONvshrn>; +def VSHRN64 : N2VNSh<0, 1, 0b100000, 0b1000, 0, 0, 1, "vshrn.i64", + v2i32, v2i64, NEONvshrn>; + +// VRSHL : Vector Rounding Shift +defm VRSHLs : N3VInt_QHSD<0,0,0b0101,0, "vrshl.s", int_arm_neon_vrshifts, 0>; +defm VRSHLu : N3VInt_QHSD<1,0,0b0101,0, "vrshl.u", int_arm_neon_vrshiftu, 0>; +// VRSHR : Vector Rounding Shift Right +defm VRSHRs : N2VSh_QHSD<0, 1, 0b0010, 1, "vrshr.s", NEONvrshrs>; +defm VRSHRu : N2VSh_QHSD<1, 1, 0b0010, 1, "vrshr.u", NEONvrshru>; + +// VRSHRN : Vector Rounding Shift Right and Narrow +def VRSHRN16 : N2VNSh<0, 1, 0b001000, 0b1000, 0, 1, 1, "vrshrn.i16", + v8i8, v8i16, NEONvrshrn>; +def VRSHRN32 : N2VNSh<0, 1, 0b010000, 0b1000, 0, 1, 1, "vrshrn.i32", + v4i16, v4i32, NEONvrshrn>; +def VRSHRN64 : N2VNSh<0, 1, 0b100000, 0b1000, 0, 1, 1, "vrshrn.i64", + v2i32, v2i64, NEONvrshrn>; + +// VQSHL : Vector Saturating Shift +defm VQSHLs : N3VInt_QHSD<0,0,0b0100,1, "vqshl.s", int_arm_neon_vqshifts, 0>; +defm VQSHLu : N3VInt_QHSD<1,0,0b0100,1, "vqshl.u", int_arm_neon_vqshiftu, 0>; +// VQSHL : Vector Saturating Shift Left (Immediate) +defm VQSHLsi : N2VSh_QHSD<0, 1, 0b0111, 1, "vqshl.s", NEONvqshls>; +defm VQSHLui : N2VSh_QHSD<1, 1, 0b0111, 1, "vqshl.u", NEONvqshlu>; +// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) +defm VQSHLsu : N2VSh_QHSD<1, 1, 0b0110, 1, "vqshlu.s", NEONvqshlsu>; + +// VQSHRN : Vector Saturating Shift Right and Narrow +def VQSHRNs16 : N2VNSh<0, 1, 0b001000, 0b1001, 0, 0, 1, "vqshrn.s16", + v8i8, v8i16, NEONvqshrns>; +def VQSHRNs32 : N2VNSh<0, 1, 0b010000, 0b1001, 0, 0, 1, "vqshrn.s32", + v4i16, v4i32, NEONvqshrns>; +def VQSHRNs64 : N2VNSh<0, 1, 0b100000, 0b1001, 0, 0, 1, "vqshrn.s64", + v2i32, v2i64, NEONvqshrns>; +def VQSHRNu16 : N2VNSh<1, 1, 0b001000, 0b1001, 0, 0, 1, "vqshrn.u16", + v8i8, v8i16, NEONvqshrnu>; +def VQSHRNu32 : N2VNSh<1, 1, 0b010000, 0b1001, 0, 0, 1, "vqshrn.u32", + v4i16, v4i32, NEONvqshrnu>; +def VQSHRNu64 : N2VNSh<1, 1, 0b100000, 0b1001, 0, 0, 1, "vqshrn.u64", + v2i32, v2i64, NEONvqshrnu>; + +// VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) +def VQSHRUN16 : N2VNSh<1, 1, 0b001000, 0b1000, 0, 0, 1, "vqshrun.s16", + v8i8, v8i16, NEONvqshrnsu>; +def VQSHRUN32 : N2VNSh<1, 1, 0b010000, 0b1000, 0, 0, 1, "vqshrun.s32", + v4i16, v4i32, NEONvqshrnsu>; +def VQSHRUN64 : N2VNSh<1, 1, 0b100000, 0b1000, 0, 0, 1, "vqshrun.s64", + v2i32, v2i64, NEONvqshrnsu>; + +// VQRSHL : Vector Saturating Rounding Shift +defm VQRSHLs : N3VInt_QHSD<0, 0, 0b0101, 1, "vqrshl.s", + int_arm_neon_vqrshifts, 0>; +defm VQRSHLu : N3VInt_QHSD<1, 0, 0b0101, 1, "vqrshl.u", + int_arm_neon_vqrshiftu, 0>; + +// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow +def VQRSHRNs16: N2VNSh<0, 1, 0b001000, 0b1001, 0, 1, 1, "vqrshrn.s16", + v8i8, v8i16, NEONvqrshrns>; +def VQRSHRNs32: N2VNSh<0, 1, 0b010000, 0b1001, 0, 1, 1, "vqrshrn.s32", + v4i16, v4i32, NEONvqrshrns>; +def VQRSHRNs64: N2VNSh<0, 1, 0b100000, 0b1001, 0, 1, 1, "vqrshrn.s64", + v2i32, v2i64, NEONvqrshrns>; +def VQRSHRNu16: N2VNSh<1, 1, 0b001000, 0b1001, 0, 1, 1, "vqrshrn.u16", + v8i8, v8i16, NEONvqrshrnu>; +def VQRSHRNu32: N2VNSh<1, 1, 0b010000, 0b1001, 0, 1, 1, "vqrshrn.u32", + v4i16, v4i32, NEONvqrshrnu>; +def VQRSHRNu64: N2VNSh<1, 1, 0b100000, 0b1001, 0, 1, 1, "vqrshrn.u64", + v2i32, v2i64, NEONvqrshrnu>; + +// VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) +def VQRSHRUN16: N2VNSh<1, 1, 0b001000, 0b1000, 0, 1, 1, "vqrshrun.s16", + v8i8, v8i16, NEONvqrshrnsu>; +def VQRSHRUN32: N2VNSh<1, 1, 0b010000, 0b1000, 0, 1, 1, "vqrshrun.s32", + v4i16, v4i32, NEONvqrshrnsu>; +def VQRSHRUN64: N2VNSh<1, 1, 0b100000, 0b1000, 0, 1, 1, "vqrshrun.s64", + v2i32, v2i64, NEONvqrshrnsu>; + +// VSRA : Vector Shift Right and Accumulate +defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra.s", NEONvshrs>; +defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra.u", NEONvshru>; +// VRSRA : Vector Rounding Shift Right and Accumulate +defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra.s", NEONvrshrs>; +defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra.u", NEONvrshru>; + +// VSLI : Vector Shift Left and Insert +defm VSLI : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli.", NEONvsli>; +// VSRI : Vector Shift Right and Insert +defm VSRI : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri.", NEONvsri>; + +// Vector Absolute and Saturating Absolute. + +// VABS : Vector Absolute Value +defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, "vabs.s", + int_arm_neon_vabs>; +def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, "vabs.f32", + v2f32, v2f32, int_arm_neon_vabsf>; +def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, "vabs.f32", + v4f32, v4f32, int_arm_neon_vabsf>; + +// VQABS : Vector Saturating Absolute Value +defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, "vqabs.s", + int_arm_neon_vqabs>; + +// Vector Negate. + +def vneg : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>; +def vneg_conv : PatFrag<(ops node:$in), (sub immAllZerosV_bc, node:$in)>; + +class VNEGD<bits<2> size, string OpcodeStr, ValueType Ty> + : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$dst), (ins DPR:$src), + !strconcat(OpcodeStr, "\t$dst, $src"), "", + [(set DPR:$dst, (Ty (vneg DPR:$src)))]>; +class VNEGQ<bits<2> size, string OpcodeStr, ValueType Ty> + : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$dst), (ins QPR:$src), + !strconcat(OpcodeStr, "\t$dst, $src"), "", + [(set QPR:$dst, (Ty (vneg QPR:$src)))]>; + +// VNEG : Vector Negate +def VNEGs8d : VNEGD<0b00, "vneg.s8", v8i8>; +def VNEGs16d : VNEGD<0b01, "vneg.s16", v4i16>; +def VNEGs32d : VNEGD<0b10, "vneg.s32", v2i32>; +def VNEGs8q : VNEGQ<0b00, "vneg.s8", v16i8>; +def VNEGs16q : VNEGQ<0b01, "vneg.s16", v8i16>; +def VNEGs32q : VNEGQ<0b10, "vneg.s32", v4i32>; + +// VNEG : Vector Negate (floating-point) +def VNEGf32d : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, + (outs DPR:$dst), (ins DPR:$src), "vneg.f32\t$dst, $src", "", + [(set DPR:$dst, (v2f32 (fneg DPR:$src)))]>; +def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, + (outs QPR:$dst), (ins QPR:$src), "vneg.f32\t$dst, $src", "", + [(set QPR:$dst, (v4f32 (fneg QPR:$src)))]>; + +def : Pat<(v8i8 (vneg_conv DPR:$src)), (VNEGs8d DPR:$src)>; +def : Pat<(v4i16 (vneg_conv DPR:$src)), (VNEGs16d DPR:$src)>; +def : Pat<(v2i32 (vneg_conv DPR:$src)), (VNEGs32d DPR:$src)>; +def : Pat<(v16i8 (vneg_conv QPR:$src)), (VNEGs8q QPR:$src)>; +def : Pat<(v8i16 (vneg_conv QPR:$src)), (VNEGs16q QPR:$src)>; +def : Pat<(v4i32 (vneg_conv QPR:$src)), (VNEGs32q QPR:$src)>; + +// VQNEG : Vector Saturating Negate +defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, "vqneg.s", + int_arm_neon_vqneg>; + +// Vector Bit Counting Operations. + +// VCLS : Vector Count Leading Sign Bits +defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, "vcls.s", + int_arm_neon_vcls>; +// VCLZ : Vector Count Leading Zeros +defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, "vclz.i", + int_arm_neon_vclz>; +// VCNT : Vector Count One Bits +def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, "vcnt.8", + v8i8, v8i8, int_arm_neon_vcnt>; +def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, "vcnt.8", + v16i8, v16i8, int_arm_neon_vcnt>; + +// Vector Move Operations. + +// VMOV : Vector Move (Register) + +def VMOVD : N3V<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src), + "vmov\t$dst, $src", "", []>; +def VMOVQ : N3V<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src), + "vmov\t$dst, $src", "", []>; + +// VMOV : Vector Move (Immediate) + +// VMOV_get_imm8 xform function: convert build_vector to VMOV.i8 imm. +def VMOV_get_imm8 : SDNodeXForm<build_vector, [{ + return ARM::getVMOVImm(N, 1, *CurDAG); +}]>; +def vmovImm8 : PatLeaf<(build_vector), [{ + return ARM::getVMOVImm(N, 1, *CurDAG).getNode() != 0; +}], VMOV_get_imm8>; + +// VMOV_get_imm16 xform function: convert build_vector to VMOV.i16 imm. +def VMOV_get_imm16 : SDNodeXForm<build_vector, [{ + return ARM::getVMOVImm(N, 2, *CurDAG); +}]>; +def vmovImm16 : PatLeaf<(build_vector), [{ + return ARM::getVMOVImm(N, 2, *CurDAG).getNode() != 0; +}], VMOV_get_imm16>; + +// VMOV_get_imm32 xform function: convert build_vector to VMOV.i32 imm. +def VMOV_get_imm32 : SDNodeXForm<build_vector, [{ + return ARM::getVMOVImm(N, 4, *CurDAG); +}]>; +def vmovImm32 : PatLeaf<(build_vector), [{ + return ARM::getVMOVImm(N, 4, *CurDAG).getNode() != 0; +}], VMOV_get_imm32>; + +// VMOV_get_imm64 xform function: convert build_vector to VMOV.i64 imm. +def VMOV_get_imm64 : SDNodeXForm<build_vector, [{ + return ARM::getVMOVImm(N, 8, *CurDAG); +}]>; +def vmovImm64 : PatLeaf<(build_vector), [{ + return ARM::getVMOVImm(N, 8, *CurDAG).getNode() != 0; +}], VMOV_get_imm64>; + +// Note: Some of the cmode bits in the following VMOV instructions need to +// be encoded based on the immed values. + +def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst), + (ins i8imm:$SIMM), "vmov.i8\t$dst, $SIMM", "", + [(set DPR:$dst, (v8i8 vmovImm8:$SIMM))]>; +def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst), + (ins i8imm:$SIMM), "vmov.i8\t$dst, $SIMM", "", + [(set QPR:$dst, (v16i8 vmovImm8:$SIMM))]>; + +def VMOVv4i16 : N1ModImm<1, 0b000, 0b1000, 0, 0, 0, 1, (outs DPR:$dst), + (ins i16imm:$SIMM), "vmov.i16\t$dst, $SIMM", "", + [(set DPR:$dst, (v4i16 vmovImm16:$SIMM))]>; +def VMOVv8i16 : N1ModImm<1, 0b000, 0b1000, 0, 1, 0, 1, (outs QPR:$dst), + (ins i16imm:$SIMM), "vmov.i16\t$dst, $SIMM", "", + [(set QPR:$dst, (v8i16 vmovImm16:$SIMM))]>; + +def VMOVv2i32 : N1ModImm<1, 0b000, 0b0000, 0, 0, 0, 1, (outs DPR:$dst), + (ins i32imm:$SIMM), "vmov.i32\t$dst, $SIMM", "", + [(set DPR:$dst, (v2i32 vmovImm32:$SIMM))]>; +def VMOVv4i32 : N1ModImm<1, 0b000, 0b0000, 0, 1, 0, 1, (outs QPR:$dst), + (ins i32imm:$SIMM), "vmov.i32\t$dst, $SIMM", "", + [(set QPR:$dst, (v4i32 vmovImm32:$SIMM))]>; + +def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst), + (ins i64imm:$SIMM), "vmov.i64\t$dst, $SIMM", "", + [(set DPR:$dst, (v1i64 vmovImm64:$SIMM))]>; +def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst), + (ins i64imm:$SIMM), "vmov.i64\t$dst, $SIMM", "", + [(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>; + +// VMOV : Vector Get Lane (move scalar to ARM core register) + +def VGETLNs8 : NVGetLane<0b11100101, 0b1011, 0b00, + (outs GPR:$dst), (ins DPR:$src, i32imm:$lane), + "vmov", ".s8\t$dst, $src[$lane]", + [(set GPR:$dst, (NEONvgetlanes (v8i8 DPR:$src), + imm:$lane))]>; +def VGETLNs16 : NVGetLane<0b11100001, 0b1011, 0b01, + (outs GPR:$dst), (ins DPR:$src, i32imm:$lane), + "vmov", ".s16\t$dst, $src[$lane]", + [(set GPR:$dst, (NEONvgetlanes (v4i16 DPR:$src), + imm:$lane))]>; +def VGETLNu8 : NVGetLane<0b11101101, 0b1011, 0b00, + (outs GPR:$dst), (ins DPR:$src, i32imm:$lane), + "vmov", ".u8\t$dst, $src[$lane]", + [(set GPR:$dst, (NEONvgetlaneu (v8i8 DPR:$src), + imm:$lane))]>; +def VGETLNu16 : NVGetLane<0b11101001, 0b1011, 0b01, + (outs GPR:$dst), (ins DPR:$src, i32imm:$lane), + "vmov", ".u16\t$dst, $src[$lane]", + [(set GPR:$dst, (NEONvgetlaneu (v4i16 DPR:$src), + imm:$lane))]>; +def VGETLNi32 : NVGetLane<0b11100001, 0b1011, 0b00, + (outs GPR:$dst), (ins DPR:$src, i32imm:$lane), + "vmov", ".32\t$dst, $src[$lane]", + [(set GPR:$dst, (extractelt (v2i32 DPR:$src), + imm:$lane))]>; +// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td +def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane), + (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src, + (SubReg_i8_reg imm:$lane))), + (SubReg_i8_lane imm:$lane))>; +def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane), + (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src, + (SubReg_i16_reg imm:$lane))), + (SubReg_i16_lane imm:$lane))>; +def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane), + (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src, + (SubReg_i8_reg imm:$lane))), + (SubReg_i8_lane imm:$lane))>; +def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane), + (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, + (SubReg_i16_reg imm:$lane))), + (SubReg_i16_lane imm:$lane))>; +def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), + (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, + (SubReg_i32_reg imm:$lane))), + (SubReg_i32_lane imm:$lane))>; +//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2), +// (EXTRACT_SUBREG QPR:$src1, (SubReg_f64_reg imm:$src2))>; +def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), + (EXTRACT_SUBREG QPR:$src1, (SubReg_f64_reg imm:$src2))>; + + +// VMOV : Vector Set Lane (move ARM core register to scalar) + +let Constraints = "$src1 = $dst" in { +def VSETLNi8 : NVSetLane<0b11100100, 0b1011, 0b00, (outs DPR:$dst), + (ins DPR:$src1, GPR:$src2, i32imm:$lane), + "vmov", ".8\t$dst[$lane], $src2", + [(set DPR:$dst, (vector_insert (v8i8 DPR:$src1), + GPR:$src2, imm:$lane))]>; +def VSETLNi16 : NVSetLane<0b11100000, 0b1011, 0b01, (outs DPR:$dst), + (ins DPR:$src1, GPR:$src2, i32imm:$lane), + "vmov", ".16\t$dst[$lane], $src2", + [(set DPR:$dst, (vector_insert (v4i16 DPR:$src1), + GPR:$src2, imm:$lane))]>; +def VSETLNi32 : NVSetLane<0b11100000, 0b1011, 0b00, (outs DPR:$dst), + (ins DPR:$src1, GPR:$src2, i32imm:$lane), + "vmov", ".32\t$dst[$lane], $src2", + [(set DPR:$dst, (insertelt (v2i32 DPR:$src1), + GPR:$src2, imm:$lane))]>; +} +def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), + (v16i8 (INSERT_SUBREG QPR:$src1, + (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, + (SubReg_i8_reg imm:$lane))), + GPR:$src2, (SubReg_i8_lane imm:$lane)), + (SubReg_i8_reg imm:$lane)))>; +def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), + (v8i16 (INSERT_SUBREG QPR:$src1, + (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, + (SubReg_i16_reg imm:$lane))), + GPR:$src2, (SubReg_i16_lane imm:$lane)), + (SubReg_i16_reg imm:$lane)))>; +def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), + (v4i32 (INSERT_SUBREG QPR:$src1, + (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, + (SubReg_i32_reg imm:$lane))), + GPR:$src2, (SubReg_i32_lane imm:$lane)), + (SubReg_i32_reg imm:$lane)))>; + +//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), +// (INSERT_SUBREG QPR:$src1, DPR:$src2, (SubReg_f64_reg imm:$src3))>; +def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), + (INSERT_SUBREG QPR:$src1, DPR:$src2, (SubReg_f64_reg imm:$src3))>; + +// VDUP : Vector Duplicate (from ARM core register to all elements) + +def splat_lo : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); + return SVOp->isSplat() && SVOp->getSplatIndex() == 0; +}]>; + +class VDUPD<bits<8> opcod1, bits<2> opcod3, string asmSize, ValueType Ty> + : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$dst), (ins GPR:$src), + "vdup", !strconcat(asmSize, "\t$dst, $src"), + [(set DPR:$dst, (Ty (splat_lo (scalar_to_vector GPR:$src), undef)))]>; +class VDUPQ<bits<8> opcod1, bits<2> opcod3, string asmSize, ValueType Ty> + : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$dst), (ins GPR:$src), + "vdup", !strconcat(asmSize, "\t$dst, $src"), + [(set QPR:$dst, (Ty (splat_lo (scalar_to_vector GPR:$src), undef)))]>; + +def VDUP8d : VDUPD<0b11101100, 0b00, ".8", v8i8>; +def VDUP16d : VDUPD<0b11101000, 0b01, ".16", v4i16>; +def VDUP32d : VDUPD<0b11101000, 0b00, ".32", v2i32>; +def VDUP8q : VDUPQ<0b11101110, 0b00, ".8", v16i8>; +def VDUP16q : VDUPQ<0b11101010, 0b01, ".16", v8i16>; +def VDUP32q : VDUPQ<0b11101010, 0b00, ".32", v4i32>; + +def VDUPfd : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$dst), (ins GPR:$src), + "vdup", ".32\t$dst, $src", + [(set DPR:$dst, (v2f32 (splat_lo + (scalar_to_vector + (f32 (bitconvert GPR:$src))), + undef)))]>; +def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src), + "vdup", ".32\t$dst, $src", + [(set QPR:$dst, (v4f32 (splat_lo + (scalar_to_vector + (f32 (bitconvert GPR:$src))), + undef)))]>; + +// VDUP : Vector Duplicate Lane (from scalar to all elements) + +def SHUFFLE_get_splat_lane : SDNodeXForm<vector_shuffle, [{ + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); + return CurDAG->getTargetConstant(SVOp->getSplatIndex(), MVT::i32); +}]>; + +def splat_lane : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); + return SVOp->isSplat(); +}], SHUFFLE_get_splat_lane>; + +class VDUPLND<bits<2> op19_18, bits<2> op17_16, string OpcodeStr, ValueType Ty> + : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 0, 0, + (outs DPR:$dst), (ins DPR:$src, i32imm:$lane), + !strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "", + [(set DPR:$dst, (Ty (splat_lane:$lane DPR:$src, undef)))]>; + +// vector_shuffle requires that the source and destination types match, so +// VDUP to a 128-bit result uses a target-specific VDUPLANEQ node. +class VDUPLNQ<bits<2> op19_18, bits<2> op17_16, string OpcodeStr, + ValueType ResTy, ValueType OpTy> + : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 1, 0, + (outs QPR:$dst), (ins DPR:$src, i32imm:$lane), + !strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "", + [(set QPR:$dst, (ResTy (NEONvduplaneq (OpTy DPR:$src), imm:$lane)))]>; + +def VDUPLN8d : VDUPLND<0b00, 0b01, "vdup.8", v8i8>; +def VDUPLN16d : VDUPLND<0b00, 0b10, "vdup.16", v4i16>; +def VDUPLN32d : VDUPLND<0b01, 0b00, "vdup.32", v2i32>; +def VDUPLNfd : VDUPLND<0b01, 0b00, "vdup.32", v2f32>; +def VDUPLN8q : VDUPLNQ<0b00, 0b01, "vdup.8", v16i8, v8i8>; +def VDUPLN16q : VDUPLNQ<0b00, 0b10, "vdup.16", v8i16, v4i16>; +def VDUPLN32q : VDUPLNQ<0b01, 0b00, "vdup.32", v4i32, v2i32>; +def VDUPLNfq : VDUPLNQ<0b01, 0b00, "vdup.32", v4f32, v2f32>; + +// VMOVN : Vector Narrowing Move +defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, "vmovn.i", + int_arm_neon_vmovn>; +// VQMOVN : Vector Saturating Narrowing Move +defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, "vqmovn.s", + int_arm_neon_vqmovns>; +defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, "vqmovn.u", + int_arm_neon_vqmovnu>; +defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, "vqmovun.s", + int_arm_neon_vqmovnsu>; +// VMOVL : Vector Lengthening Move +defm VMOVLs : N2VLInt_QHS<0,1,0b1010,0,0,1, "vmovl.s", int_arm_neon_vmovls>; +defm VMOVLu : N2VLInt_QHS<1,1,0b1010,0,0,1, "vmovl.u", int_arm_neon_vmovlu>; + +// Vector Conversions. + +// VCVT : Vector Convert Between Floating-Point and Integers +def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt.s32.f32", + v2i32, v2f32, fp_to_sint>; +def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt.u32.f32", + v2i32, v2f32, fp_to_uint>; +def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt.f32.s32", + v2f32, v2i32, sint_to_fp>; +def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt.f32.u32", + v2f32, v2i32, uint_to_fp>; + +def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt.s32.f32", + v4i32, v4f32, fp_to_sint>; +def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt.u32.f32", + v4i32, v4f32, fp_to_uint>; +def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt.f32.s32", + v4f32, v4i32, sint_to_fp>; +def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt.f32.u32", + v4f32, v4i32, uint_to_fp>; + +// VCVT : Vector Convert Between Floating-Point and Fixed-Point. +// Note: Some of the opcode bits in the following VCVT instructions need to +// be encoded based on the immed values. +def VCVTf2xsd : N2VCvtD<0, 1, 0b000000, 0b1111, 0, 1, "vcvt.s32.f32", + v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; +def VCVTf2xud : N2VCvtD<1, 1, 0b000000, 0b1111, 0, 1, "vcvt.u32.f32", + v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; +def VCVTxs2fd : N2VCvtD<0, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.s32", + v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; +def VCVTxu2fd : N2VCvtD<1, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.u32", + v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; + +def VCVTf2xsq : N2VCvtQ<0, 1, 0b000000, 0b1111, 0, 1, "vcvt.s32.f32", + v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; +def VCVTf2xuq : N2VCvtQ<1, 1, 0b000000, 0b1111, 0, 1, "vcvt.u32.f32", + v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; +def VCVTxs2fq : N2VCvtQ<0, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.s32", + v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; +def VCVTxu2fq : N2VCvtQ<1, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.u32", + v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; + +//===----------------------------------------------------------------------===// +// Non-Instruction Patterns +//===----------------------------------------------------------------------===// + +// bit_convert +def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; +def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; +def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; +def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>; +def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; +def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; +def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; +def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; +def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; +def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>; +def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; +def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; +def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; +def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; +def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; +def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; +def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; +def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; +def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; +def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; +def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>; +def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; +def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; +def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; +def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; +def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; +def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; +def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>; +def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; +def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; + +def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; +def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; +def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; +def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; +def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; +def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; +def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; +def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; +def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; +def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; +def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; +def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; +def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; +def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; +def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; +def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; +def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; +def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; +def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; +def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; +def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; +def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; +def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; +def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; +def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; +def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; +def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; +def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; +def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; +def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 9297f08..1def093 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -319,7 +319,7 @@ def tAND : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), def tASRri : TI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), "asr $dst, $lhs, $rhs", - [(set tGPR:$dst, (sra tGPR:$lhs, imm:$rhs))]>; + [(set tGPR:$dst, (sra tGPR:$lhs, (i32 imm:$rhs)))]>; def tASRrr : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), "asr $dst, $rhs", @@ -367,7 +367,7 @@ def tEOR : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), def tLSLri : TI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), "lsl $dst, $lhs, $rhs", - [(set tGPR:$dst, (shl tGPR:$lhs, imm:$rhs))]>; + [(set tGPR:$dst, (shl tGPR:$lhs, (i32 imm:$rhs)))]>; def tLSLrr : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), "lsl $dst, $rhs", @@ -375,7 +375,7 @@ def tLSLrr : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), def tLSRri : TI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), "lsr $dst, $lhs, $rhs", - [(set tGPR:$dst, (srl tGPR:$lhs, imm:$rhs))]>; + [(set tGPR:$dst, (srl tGPR:$lhs, (i32 imm:$rhs)))]>; def tLSRrr : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), "lsr $dst, $rhs", @@ -429,18 +429,18 @@ def tREV : TI<(outs tGPR:$dst), (ins tGPR:$src), def tREV16 : TI<(outs tGPR:$dst), (ins tGPR:$src), "rev16 $dst, $src", [(set tGPR:$dst, - (or (and (srl tGPR:$src, 8), 0xFF), - (or (and (shl tGPR:$src, 8), 0xFF00), - (or (and (srl tGPR:$src, 8), 0xFF0000), - (and (shl tGPR:$src, 8), 0xFF000000)))))]>, + (or (and (srl tGPR:$src, (i32 8)), 0xFF), + (or (and (shl tGPR:$src, (i32 8)), 0xFF00), + (or (and (srl tGPR:$src, (i32 8)), 0xFF0000), + (and (shl tGPR:$src, (i32 8)), 0xFF000000)))))]>, Requires<[IsThumb, HasV6]>; def tREVSH : TI<(outs tGPR:$dst), (ins tGPR:$src), "revsh $dst, $src", [(set tGPR:$dst, (sext_inreg - (or (srl (and tGPR:$src, 0xFFFF), 8), - (shl tGPR:$src, 8)), i16))]>, + (or (srl (and tGPR:$src, 0xFFFF), (i32 8)), + (shl tGPR:$src, (i32 8))), i16))]>, Requires<[IsThumb, HasV6]>; def tROR : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 07c71da..0aba2d5 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -160,7 +160,7 @@ def tMOVi16 : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), [(set GPR:$dst, imm0_65535:$src)]>, Requires<[HasThumb2]>; -let isTwoAddress = 1 in +let Constraints = "$src = $dst" in def tMOVTi16 : PseudoInst<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm), "movt $dst, $imm", [(set GPR:$dst, (or (and GPR:$src, 0xffff), diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp index bbc1300..bb0cc8f 100644 --- a/lib/Target/ARM/ARMRegisterInfo.cpp +++ b/lib/Target/ARM/ARMRegisterInfo.cpp @@ -235,8 +235,10 @@ ARMRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { }; static const unsigned DarwinCalleeSavedRegs[] = { + // Darwin ABI deviates from ARM standard ABI. R9 is not a callee-saved + // register. ARM::LR, ARM::R7, ARM::R6, ARM::R5, ARM::R4, - ARM::R11, ARM::R10, ARM::R9, ARM::R8, + ARM::R11, ARM::R10, ARM::R8, ARM::D15, ARM::D14, ARM::D13, ARM::D12, ARM::D11, ARM::D10, ARM::D9, ARM::D8, @@ -256,6 +258,7 @@ ARMRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, 0 }; + static const TargetRegisterClass * const ThumbCalleeSavedRegClasses[] = { &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::tGPRRegClass, @@ -265,7 +268,33 @@ ARMRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, 0 }; - return STI.isThumb() ? ThumbCalleeSavedRegClasses : CalleeSavedRegClasses; + + static const TargetRegisterClass * const DarwinCalleeSavedRegClasses[] = { + &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, + &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, + &ARM::GPRRegClass, &ARM::GPRRegClass, + + &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, + &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, + 0 + }; + + static const TargetRegisterClass * const DarwinThumbCalleeSavedRegClasses[] ={ + &ARM::GPRRegClass, &ARM::tGPRRegClass, &ARM::tGPRRegClass, + &ARM::tGPRRegClass, &ARM::tGPRRegClass, &ARM::GPRRegClass, + &ARM::GPRRegClass, &ARM::GPRRegClass, + + &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, + &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, + 0 + }; + + if (STI.isThumb()) { + return STI.isTargetDarwin() + ? DarwinThumbCalleeSavedRegClasses : ThumbCalleeSavedRegClasses; + } + return STI.isTargetDarwin() + ? DarwinCalleeSavedRegClasses : CalleeSavedRegClasses; } BitVector ARMRegisterInfo::getReservedRegs(const MachineFunction &MF) const { @@ -497,7 +526,9 @@ ARMRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { /// bool ARMRegisterInfo::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - return NoFramePointerElim || MFI->hasVarSizedObjects(); + return (NoFramePointerElim || + MFI->hasVarSizedObjects() || + MFI->isFrameAddressTaken()); } // hasReservedCallFrame - Under normal circumstances, when a frame pointer is diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index d864079..a057e5c 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -77,6 +77,34 @@ def D13 : ARMReg<13, "d13", [S26, S27]>; def D14 : ARMReg<14, "d14", [S28, S29]>; def D15 : ARMReg<15, "d15", [S30, S31]>; +// VFP3 defines 16 additional double registers +def D16 : ARMFReg<16, "d16">; def D17 : ARMFReg<17, "d16">; +def D18 : ARMFReg<18, "d16">; def D19 : ARMFReg<19, "d16">; +def D20 : ARMFReg<20, "d16">; def D21 : ARMFReg<21, "d16">; +def D22 : ARMFReg<22, "d16">; def D23 : ARMFReg<23, "d16">; +def D24 : ARMFReg<24, "d16">; def D25 : ARMFReg<25, "d16">; +def D26 : ARMFReg<26, "d16">; def D27 : ARMFReg<27, "d16">; +def D28 : ARMFReg<28, "d16">; def D29 : ARMFReg<29, "d16">; +def D30 : ARMFReg<30, "d16">; def D31 : ARMFReg<31, "d16">; + +// Advanced SIMD (NEON) defines 16 quad-word aliases +def Q0 : ARMReg< 0, "q0", [D0, D1]>; +def Q1 : ARMReg< 1, "q1", [D2, D3]>; +def Q2 : ARMReg< 2, "q2", [D4, D5]>; +def Q3 : ARMReg< 3, "q3", [D6, D7]>; +def Q4 : ARMReg< 4, "q4", [D8, D9]>; +def Q5 : ARMReg< 5, "q5", [D10, D11]>; +def Q6 : ARMReg< 6, "q6", [D12, D13]>; +def Q7 : ARMReg< 7, "q7", [D14, D15]>; +def Q8 : ARMReg< 8, "q8", [D16, D17]>; +def Q9 : ARMReg< 9, "q9", [D18, D19]>; +def Q10 : ARMReg<10, "q10", [D20, D21]>; +def Q11 : ARMReg<11, "q11", [D22, D23]>; +def Q12 : ARMReg<12, "q12", [D24, D25]>; +def Q13 : ARMReg<13, "q13", [D26, D27]>; +def Q14 : ARMReg<14, "q14", [D28, D29]>; +def Q15 : ARMReg<15, "q15", [D30, D31]>; + // Current Program Status Register. def CPSR : ARMReg<0, "cpsr">; @@ -87,6 +115,7 @@ def CPSR : ARMReg<0, "cpsr">; // sp == Stack Pointer // r12 == ip (scratch) // r7 == Frame Pointer (thumb-style backtraces) +// r9 == May be reserved as Thread Register // r11 == Frame Pointer (arm-style backtraces) // r10 == Stack Limit // @@ -115,13 +144,13 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::R8, ARM::R10, ARM::R11 }; - // FP is R7, R9 is available. + // FP is R7, R9 is available as non-callee-saved register. + // This is used by Darwin. static const unsigned ARM_GPR_AO_3[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3, - ARM::R12,ARM::LR, + ARM::R9, ARM::R12,ARM::LR, ARM::R4, ARM::R5, ARM::R6, - ARM::R8, ARM::R9, ARM::R10,ARM::R11, - ARM::R7 }; + ARM::R8, ARM::R10,ARM::R11,ARM::R7 }; // FP is R7, R9 is not available. static const unsigned ARM_GPR_AO_4[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3, @@ -155,17 +184,15 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, GPRClass::iterator I; if (Subtarget.isTargetDarwin()) { - if (Subtarget.isR9Reserved()) { + if (Subtarget.isR9Reserved()) I = ARM_GPR_AO_4 + (sizeof(ARM_GPR_AO_4)/sizeof(unsigned)); - } else { + else I = ARM_GPR_AO_3 + (sizeof(ARM_GPR_AO_3)/sizeof(unsigned)); - } } else { - if (Subtarget.isR9Reserved()) { + if (Subtarget.isR9Reserved()) I = ARM_GPR_AO_2 + (sizeof(ARM_GPR_AO_2)/sizeof(unsigned)); - } else { + else I = ARM_GPR_AO_1 + (sizeof(ARM_GPR_AO_1)/sizeof(unsigned)); - } } // Mac OS X requires FP not to be clobbered for backtracing purpose. @@ -208,14 +235,67 @@ def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> { }]; } +// Scalar single precision floating point register class.. def SPR : RegisterClass<"ARM", [f32], 32, [S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31]>; +// Scalar double precision floating point / generic 64-bit vector register +// class. // ARM requires only word alignment for double. It's more performant if it // is double-word alignment though. -def DPR : RegisterClass<"ARM", [f64], 64, [D0, D1, D2, D3, D4, D5, D6, D7, D8, - D9, D10, D11, D12, D13, D14, D15]>; +def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64, + [D0, D1, D2, D3, D4, D5, D6, D7, + D8, D9, D10, D11, D12, D13, D14, D15]> { + let SubRegClassList = [SPR, SPR]; + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + // VFP2 + static const unsigned ARM_DPR_VFP2[] = { + ARM::D0, ARM::D1, ARM::D2, ARM::D3, + ARM::D4, ARM::D5, ARM::D6, ARM::D7, + ARM::D8, ARM::D9, ARM::D10, ARM::D11, + ARM::D12, ARM::D13, ARM::D14, ARM::D15 }; + // VFP3 + static const unsigned ARM_DPR_VFP3[] = { + ARM::D0, ARM::D1, ARM::D2, ARM::D3, + ARM::D4, ARM::D5, ARM::D6, ARM::D7, + ARM::D8, ARM::D9, ARM::D10, ARM::D11, + ARM::D12, ARM::D13, ARM::D14, ARM::D15, + ARM::D16, ARM::D17, ARM::D18, ARM::D15, + ARM::D20, ARM::D21, ARM::D22, ARM::D23, + ARM::D24, ARM::D25, ARM::D26, ARM::D27, + ARM::D28, ARM::D29, ARM::D30, ARM::D31 }; + DPRClass::iterator + DPRClass::allocation_order_begin(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); + if (Subtarget.hasVFP3()) + return ARM_DPR_VFP3; + return ARM_DPR_VFP2; + } + + DPRClass::iterator + DPRClass::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); + if (Subtarget.hasVFP3()) + return ARM_DPR_VFP3 + (sizeof(ARM_DPR_VFP3)/sizeof(unsigned)); + else + return ARM_DPR_VFP2 + (sizeof(ARM_DPR_VFP2)/sizeof(unsigned)); + } + }]; +} + +// Generic 128-bit vector register class. +def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, + Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15]> { + let SubRegClassList = [SPR, SPR, SPR, SPR, DPR, DPR]; +} // Condition code registers. def CCR : RegisterClass<"ARM", [i32], 32, [CPSR]>; @@ -225,12 +305,40 @@ def CCR : RegisterClass<"ARM", [i32], 32, [CPSR]>; // sub registers for each register. // -def : SubRegSet<1, [D0, D1, D2, D3, D4, D5, D6, D7, - D8, D9, D10, D11, D12, D13, D14, D15], - [S0, S2, S4, S6, S8, S10, S12, S14, - S16, S18, S20, S22, S24, S26, S28, S30]>; +def arm_ssubreg_0 : PatLeaf<(i32 1)>; +def arm_ssubreg_1 : PatLeaf<(i32 2)>; +def arm_ssubreg_2 : PatLeaf<(i32 3)>; +def arm_ssubreg_3 : PatLeaf<(i32 4)>; +def arm_dsubreg_0 : PatLeaf<(i32 5)>; +def arm_dsubreg_1 : PatLeaf<(i32 6)>; -def : SubRegSet<2, [D0, D1, D2, D3, D4, D5, D6, D7, - D8, D9, D10, D11, D12, D13, D14, D15], - [S1, S3, S5, S7, S9, S11, S13, S15, +// S sub-registers of D registers. +def : SubRegSet<1, [D0, D1, D2, D3, D4, D5, D6, D7, + D8, D9, D10, D11, D12, D13, D14, D15], + [S0, S2, S4, S6, S8, S10, S12, S14, + S16, S18, S20, S22, S24, S26, S28, S30]>; +def : SubRegSet<2, [D0, D1, D2, D3, D4, D5, D6, D7, + D8, D9, D10, D11, D12, D13, D14, D15], + [S1, S3, S5, S7, S9, S11, S13, S15, S17, S19, S21, S23, S25, S27, S29, S31]>; + +// S sub-registers of Q registers. +def : SubRegSet<1, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7], + [S0, S4, S8, S12, S16, S20, S24, S28]>; +def : SubRegSet<2, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7], + [S1, S5, S9, S13, S17, S21, S25, S29]>; +def : SubRegSet<3, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7], + [S2, S6, S10, S14, S18, S22, S26, S30]>; +def : SubRegSet<4, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7], + [S3, S7, S11, S15, S19, S23, S27, S31]>; + +// D sub-registers of Q registers. +def : SubRegSet<5, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, + Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15], + [D0, D2, D4, D6, D8, D10, D12, D14, + D16, D18, D20, D22, D24, D26, D28, D30]>; +def : SubRegSet<6, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, + Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15], + [D1, D3, D5, D7, D9, D11, D13, D15, + D17, D19, D21, D23, D25, D27, D29, D31]>; + diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 7ac7b49..e611088 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -16,15 +16,20 @@ #include "llvm/Module.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" using namespace llvm; +static cl::opt<bool> +ReserveR9("arm-reserve-r9", cl::Hidden, + cl::desc("Reserve R9, making it unavailable as GPR")); + ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS, bool isThumb) : ARMArchVersion(V4T) , ARMFPUType(None) , IsThumb(isThumb) , ThumbMode(Thumb1) - , IsR9Reserved(false) + , IsR9Reserved(ReserveR9) , stackAlignment(4) , CPUString("generic") , TargetType(isELF) // Default to ELF unless otherwise specified. @@ -46,7 +51,7 @@ ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS, if (Len >= 5 && TT.substr(0, 4) == "armv") Idx = 4; - else if (Len >= 6 && TT.substr(0, 6) == "thumb") { + else if (Len >= 6 && TT.substr(0, 5) == "thumb") { IsThumb = true; if (Len >= 7 && TT[5] == 'v') Idx = 6; @@ -54,15 +59,19 @@ ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS, if (Idx) { unsigned SubVer = TT[Idx]; if (SubVer > '4' && SubVer <= '9') { - if (SubVer >= '7') + if (SubVer >= '7') { ARMArchVersion = V7A; - else if (SubVer == '6') + } else if (SubVer == '6') { ARMArchVersion = V6; - else if (SubVer == '5') { + if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2') + ARMArchVersion = V6T2; + } else if (SubVer == '5') { ARMArchVersion = V5T; if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e') ARMArchVersion = V5TE; } + if (ARMArchVersion >= V6T2) + ThumbMode = Thumb2; } } @@ -83,5 +92,5 @@ ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS, stackAlignment = 8; if (isTargetDarwin()) - IsR9Reserved = true; + IsR9Reserved = ReserveR9 | (ARMArchVersion < V6); } diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index 948a100..58ba50e 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -285,12 +285,22 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int opNum, const char *Modifier) { const MachineOperand &MO = MI->getOperand(opNum); switch (MO.getType()) { - case MachineOperand::MO_Register: - if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) - O << TM.getRegisterInfo()->get(MO.getReg()).AsmName; - else + case MachineOperand::MO_Register: { + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Modifier && strcmp(Modifier, "dregpair") == 0) { + unsigned DRegLo = TRI->getSubReg(Reg, 5); // arm_dsubreg_0 + unsigned DRegHi = TRI->getSubReg(Reg, 6); // arm_dsubreg_1 + O << '{' + << TRI->getAsmName(DRegLo) << "-" << TRI->getAsmName(DRegHi) + << '}'; + } else { + O << TRI->getAsmName(Reg); + } + } else assert(0 && "not implemented"); break; + } case MachineOperand::MO_Immediate: { if (!Modifier || strcmp(Modifier, "no_hash") != 0) O << "#"; diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt index 068c441e..0252a4a 100644 --- a/lib/Target/ARM/README.txt +++ b/lib/Target/ARM/README.txt @@ -552,3 +552,23 @@ __Z11no_overflowjj: //===---------------------------------------------------------------------===// +Some of the NEON intrinsics may be appropriate for more general use, either +as target-independent intrinsics or perhaps elsewhere in the ARM backend. +Some of them may also be lowered to target-independent SDNodes, and perhaps +some new SDNodes could be added. + +For example, maximum, minimum, and absolute value operations are well-defined +and standard operations, both for vector and scalar types. + +The current NEON-specific intrinsics for count leading zeros and count one +bits could perhaps be replaced by the target-independent ctlz and ctpop +intrinsics. It may also make sense to add a target-independent "ctls" +intrinsic for "count leading sign bits". Likewise, the backend could use +the target-independent SDNodes for these operations. + +ARMv6 has scalar saturating and halving adds and subtracts. The same +intrinsics could possibly be used for both NEON's vector implementations of +those operations and the ARMv6 scalar versions. + +//===---------------------------------------------------------------------===// + diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp index f113a48..122af70 100644 --- a/lib/Target/PIC16/PIC16ISelLowering.cpp +++ b/lib/Target/PIC16/PIC16ISelLowering.cpp @@ -702,10 +702,12 @@ void PIC16TargetLowering::LegalizeAddress(SDValue Ptr, SelectionDAG &DAG, if (Ptr.getOpcode() == ISD::ADD) { SDValue OperLeft = Ptr.getOperand(0); SDValue OperRight = Ptr.getOperand(1); - if (OperLeft.getOpcode() == ISD::Constant) { + if ((OperLeft.getOpcode() == ISD::Constant) && + (dyn_cast<ConstantSDNode>(OperLeft)->getZExtValue() < 32 )) { Offset = dyn_cast<ConstantSDNode>(OperLeft)->getZExtValue(); Ptr = OperRight; - } else if (OperRight.getOpcode() == ISD::Constant) { + } else if ((OperRight.getOpcode() == ISD::Constant) && + (dyn_cast<ConstantSDNode>(OperRight)->getZExtValue() < 32 )){ Offset = dyn_cast<ConstantSDNode>(OperRight)->getZExtValue(); Ptr = OperLeft; } diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp index 67fefbb..7b843df 100644 --- a/lib/Target/TargetData.cpp +++ b/lib/Target/TargetData.cpp @@ -23,6 +23,7 @@ #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/ManagedStatic.h" +#include "llvm/System/Mutex.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringExtras.h" #include <algorithm> @@ -345,11 +346,13 @@ typedef DenseMap<LayoutKey, StructLayout*, DenseMapLayoutKeyInfo> LayoutInfoTy; } static ManagedStatic<LayoutInfoTy> LayoutInfo; +static ManagedStatic<sys::SmartMutex<true> > LayoutLock; TargetData::~TargetData() { if (!LayoutInfo.isConstructed()) return; + sys::SmartScopedLock<true> Lock(&*LayoutLock); // Remove any layouts for this TD. LayoutInfoTy &TheMap = *LayoutInfo; for (LayoutInfoTy::iterator I = TheMap.begin(), E = TheMap.end(); I != E; ) { @@ -366,6 +369,7 @@ TargetData::~TargetData() { const StructLayout *TargetData::getStructLayout(const StructType *Ty) const { LayoutInfoTy &TheMap = *LayoutInfo; + sys::SmartScopedLock<true> Lock(&*LayoutLock); StructLayout *&SL = TheMap[LayoutKey(this, Ty)]; if (SL) return SL; @@ -390,6 +394,7 @@ const StructLayout *TargetData::getStructLayout(const StructType *Ty) const { void TargetData::InvalidateStructLayoutInfo(const StructType *Ty) const { if (!LayoutInfo.isConstructed()) return; // No cache. + sys::SmartScopedLock<true> Lock(&*LayoutLock); LayoutInfoTy::iterator I = LayoutInfo->find(LayoutKey(this, Ty)); if (I == LayoutInfo->end()) return; diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp index d84034b..315118f 100644 --- a/lib/Target/X86/X86ELFWriterInfo.cpp +++ b/lib/Target/X86/X86ELFWriterInfo.cpp @@ -12,11 +12,17 @@ //===----------------------------------------------------------------------===// #include "X86ELFWriterInfo.h" +#include "X86Relocations.h" #include "llvm/Function.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" + using namespace llvm; +//===----------------------------------------------------------------------===// +// Implementation of the X86ELFWriterInfo class +//===----------------------------------------------------------------------===// + X86ELFWriterInfo::X86ELFWriterInfo(TargetMachine &TM) : TargetELFWriterInfo(TM) { bool is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64; @@ -25,6 +31,34 @@ X86ELFWriterInfo::X86ELFWriterInfo(TargetMachine &TM) X86ELFWriterInfo::~X86ELFWriterInfo() {} +unsigned X86ELFWriterInfo::getRelocationType(unsigned MachineRelTy) const { + if (is64Bit) { + switch(MachineRelTy) { + case X86::reloc_pcrel_word: + return R_X86_64_PC32; + case X86::reloc_absolute_word: + return R_X86_64_32; + case X86::reloc_absolute_dword: + return R_X86_64_64; + case X86::reloc_picrel_word: + default: + assert(0 && "unknown relocation type"); + } + } else { + switch(MachineRelTy) { + case X86::reloc_pcrel_word: + return R_386_PC32; + case X86::reloc_absolute_word: + return R_386_32; + case X86::reloc_absolute_dword: + case X86::reloc_picrel_word: + default: + assert(0 && "unknown relocation type"); + } + } + return 0; +} + unsigned X86ELFWriterInfo::getFunctionAlignment(const Function *F) const { unsigned FnAlign = 4; @@ -36,3 +70,15 @@ unsigned X86ELFWriterInfo::getFunctionAlignment(const Function *F) const { return (1 << FnAlign); } + +long int X86ELFWriterInfo::getAddendForRelTy(unsigned RelTy) const { + if (is64Bit) { + switch(RelTy) { + case R_X86_64_PC32: return -4; + break; + default: + assert(0 && "unknown x86 relocation type"); + } + } + return 0; +} diff --git a/lib/Target/X86/X86ELFWriterInfo.h b/lib/Target/X86/X86ELFWriterInfo.h index e9c5bc4..96485b8 100644 --- a/lib/Target/X86/X86ELFWriterInfo.h +++ b/lib/Target/X86/X86ELFWriterInfo.h @@ -19,11 +19,43 @@ namespace llvm { class X86ELFWriterInfo : public TargetELFWriterInfo { + + // ELF Relocation types for X86 + enum X86RelocationType { + R_386_NONE = 0, + R_386_32 = 1, + R_386_PC32 = 2 + }; + + // ELF Relocation types for X86_64 + enum X86_64RelocationType { + R_X86_64_NONE = 0, + R_X86_64_64 = 1, + R_X86_64_PC32 = 2, + R_X86_64_32 = 10, + R_X86_64_32S = 11, + R_X86_64_PC64 = 24 + }; + public: X86ELFWriterInfo(TargetMachine &TM); virtual ~X86ELFWriterInfo(); + /// getFunctionAlignment - Returns the alignment for function 'F', targets + /// with different alignment constraints should overload this method virtual unsigned getFunctionAlignment(const Function *F) const; + + /// getRelocationType - Returns the target specific ELF Relocation type. + /// 'MachineRelTy' contains the object code independent relocation type + virtual unsigned getRelocationType(unsigned MachineRelTy) const; + + /// hasRelocationAddend - True if the target uses an addend in the + /// ELF relocation entry. + virtual bool hasRelocationAddend() const { return is64Bit ? true : false; } + + /// getAddendForRelTy - Gets the addend value for an ELF relocation entry + /// based on the target relocation type + virtual long int getAddendForRelTy(unsigned RelTy) const; }; } // end llvm namespace diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 326fb38..6c20e7d 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -96,7 +96,7 @@ namespace { void RewriteNonIntegerIVs(Loop *L); - ICmpInst *LinearFunctionTestReplace(Loop *L, SCEVHandle BackedgeTakenCount, + ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV* BackedgeTakenCount, Value *IndVar, BasicBlock *ExitingBlock, BranchInst *BI, @@ -128,7 +128,7 @@ Pass *llvm::createIndVarSimplifyPass() { /// SCEV analysis can determine a loop-invariant trip count of the loop, which /// is actually a much broader range than just linear tests. ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, - SCEVHandle BackedgeTakenCount, + const SCEV* BackedgeTakenCount, Value *IndVar, BasicBlock *ExitingBlock, BranchInst *BI, @@ -137,13 +137,13 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, // against the preincremented value, otherwise we prefer to compare against // the post-incremented value. Value *CmpIndVar; - SCEVHandle RHS = BackedgeTakenCount; + const SCEV* RHS = BackedgeTakenCount; if (ExitingBlock == L->getLoopLatch()) { // Add one to the "backedge-taken" count to get the trip count. // If this addition may overflow, we have to be more pessimistic and // cast the induction variable before doing the add. - SCEVHandle Zero = SE->getIntegerSCEV(0, BackedgeTakenCount->getType()); - SCEVHandle N = + const SCEV* Zero = SE->getIntegerSCEV(0, BackedgeTakenCount->getType()); + const SCEV* N = SE->getAddExpr(BackedgeTakenCount, SE->getIntegerSCEV(1, BackedgeTakenCount->getType())); if ((isa<SCEVConstant>(N) && !N->isZero()) || @@ -278,7 +278,7 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, // Okay, this instruction has a user outside of the current loop // and varies predictably *inside* the loop. Evaluate the value it // contains when the loop exits, if possible. - SCEVHandle ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); + const SCEV* ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); if (!ExitValue->isLoopInvariant(L)) continue; @@ -348,7 +348,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { BasicBlock *Header = L->getHeader(); BasicBlock *ExitingBlock = L->getExitingBlock(); // may be null - SCEVHandle BackedgeTakenCount = SE->getBackedgeTakenCount(L); + const SCEV* BackedgeTakenCount = SE->getBackedgeTakenCount(L); // Check to see if this loop has a computable loop-invariant execution count. // If so, this means that we can compute the final value of any expressions @@ -373,14 +373,14 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { NeedCannIV = true; } for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { - SCEVHandle Stride = IU->StrideOrder[i]; + const SCEV* Stride = IU->StrideOrder[i]; const Type *Ty = SE->getEffectiveSCEVType(Stride->getType()); if (!LargestType || SE->getTypeSizeInBits(Ty) > SE->getTypeSizeInBits(LargestType)) LargestType = Ty; - std::map<SCEVHandle, IVUsersOfOneStride *>::iterator SI = + std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI = IU->IVUsesByStride.find(IU->StrideOrder[i]); assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); @@ -473,21 +473,20 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType, // the need for the code evaluation methods to insert induction variables // of different sizes. for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { - SCEVHandle Stride = IU->StrideOrder[i]; + const SCEV* Stride = IU->StrideOrder[i]; - std::map<SCEVHandle, IVUsersOfOneStride *>::iterator SI = + std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI = IU->IVUsesByStride.find(IU->StrideOrder[i]); assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); ilist<IVStrideUse> &List = SI->second->Users; for (ilist<IVStrideUse>::iterator UI = List.begin(), E = List.end(); UI != E; ++UI) { - SCEVHandle Offset = UI->getOffset(); Value *Op = UI->getOperandValToReplace(); const Type *UseTy = Op->getType(); Instruction *User = UI->getUser(); // Compute the final addrec to expand into code. - SCEVHandle AR = IU->getReplacementExpr(*UI); + const SCEV* AR = IU->getReplacementExpr(*UI); Value *NewVal = 0; if (AR->isLoopInvariant(L)) { diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp index 6512672..302cdec 100644 --- a/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -187,7 +187,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { // Don't remove loops for which we can't solve the trip count. // They could be infinite, in which case we'd be changing program behavior. ScalarEvolution& SE = getAnalysis<ScalarEvolution>(); - SCEVHandle S = SE.getBackedgeTakenCount(L); + const SCEV* S = SE.getBackedgeTakenCount(L); if (isa<SCEVCouldNotCompute>(S)) return false; diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 7579748..ba60058 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -64,11 +64,11 @@ namespace { /// StrengthReduceStridedIVUsers. It contains the stride, the common base, as /// well as the PHI node and increment value created for rewrite. struct VISIBILITY_HIDDEN IVExpr { - SCEVHandle Stride; - SCEVHandle Base; + const SCEV* Stride; + const SCEV* Base; PHINode *PHI; - IVExpr(const SCEVHandle &stride, const SCEVHandle &base, PHINode *phi) + IVExpr(const SCEV* const stride, const SCEV* const base, PHINode *phi) : Stride(stride), Base(base), PHI(phi) {} }; @@ -77,7 +77,7 @@ namespace { struct VISIBILITY_HIDDEN IVsOfOneStride { std::vector<IVExpr> IVs; - void addIV(const SCEVHandle &Stride, const SCEVHandle &Base, PHINode *PHI) { + void addIV(const SCEV* const Stride, const SCEV* const Base, PHINode *PHI) { IVs.push_back(IVExpr(Stride, Base, PHI)); } }; @@ -91,11 +91,11 @@ namespace { /// IVsByStride - Keep track of all IVs that have been inserted for a /// particular stride. - std::map<SCEVHandle, IVsOfOneStride> IVsByStride; + std::map<const SCEV*, IVsOfOneStride> IVsByStride; /// StrideNoReuse - Keep track of all the strides whose ivs cannot be /// reused (nor should they be rewritten to reuse other strides). - SmallSet<SCEVHandle, 4> StrideNoReuse; + SmallSet<const SCEV*, 4> StrideNoReuse; /// DeadInsts - Keep track of instructions we may have made dead, so that /// we can remove them after we are done working. @@ -133,7 +133,7 @@ namespace { private: ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond, IVStrideUse* &CondUse, - const SCEVHandle* &CondStride); + const SCEV* const * &CondStride); void OptimizeIndvars(Loop *L); void OptimizeLoopCountIV(Loop *L); @@ -149,16 +149,16 @@ namespace { IVStrideUse* &CondUse); bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse, - const SCEVHandle *&CondStride); + const SCEV* const * &CondStride); bool RequiresTypeConversion(const Type *Ty, const Type *NewTy); - SCEVHandle CheckForIVReuse(bool, bool, bool, const SCEVHandle&, + const SCEV* CheckForIVReuse(bool, bool, bool, const SCEV* const&, IVExpr&, const Type*, const std::vector<BasedUser>& UsersToProcess); bool ValidScale(bool, int64_t, const std::vector<BasedUser>& UsersToProcess); bool ValidOffset(bool, int64_t, int64_t, const std::vector<BasedUser>& UsersToProcess); - SCEVHandle CollectIVUsers(const SCEVHandle &Stride, + const SCEV* CollectIVUsers(const SCEV* const &Stride, IVUsersOfOneStride &Uses, Loop *L, bool &AllUsesAreAddresses, @@ -168,11 +168,11 @@ namespace { const std::vector<BasedUser> &UsersToProcess, const Loop *L, bool AllUsesAreAddresses, - SCEVHandle Stride); + const SCEV* Stride); void PrepareToStrengthReduceFully( std::vector<BasedUser> &UsersToProcess, - SCEVHandle Stride, - SCEVHandle CommonExprs, + const SCEV* Stride, + const SCEV* CommonExprs, const Loop *L, SCEVExpander &PreheaderRewriter); void PrepareToStrengthReduceFromSmallerStride( @@ -182,13 +182,13 @@ namespace { Instruction *PreInsertPt); void PrepareToStrengthReduceWithNewPhi( std::vector<BasedUser> &UsersToProcess, - SCEVHandle Stride, - SCEVHandle CommonExprs, + const SCEV* Stride, + const SCEV* CommonExprs, Value *CommonBaseV, Instruction *IVIncInsertPt, const Loop *L, SCEVExpander &PreheaderRewriter); - void StrengthReduceStridedIVUsers(const SCEVHandle &Stride, + void StrengthReduceStridedIVUsers(const SCEV* const &Stride, IVUsersOfOneStride &Uses, Loop *L); void DeleteTriviallyDeadInstructions(); @@ -232,7 +232,7 @@ void LoopStrengthReduce::DeleteTriviallyDeadInstructions() { /// containsAddRecFromDifferentLoop - Determine whether expression S involves a /// subexpression that is an AddRec from a loop other than L. An outer loop /// of L is OK, but not an inner loop nor a disjoint loop. -static bool containsAddRecFromDifferentLoop(SCEVHandle S, Loop *L) { +static bool containsAddRecFromDifferentLoop(const SCEV* S, Loop *L) { // This is very common, put it first. if (isa<SCEVConstant>(S)) return false; @@ -327,7 +327,7 @@ namespace { /// this use. As the use is processed, information gets moved from this /// field to the Imm field (below). BasedUser values are sorted by this /// field. - SCEVHandle Base; + const SCEV* Base; /// Inst - The instruction using the induction variable. Instruction *Inst; @@ -340,7 +340,7 @@ namespace { /// before Inst, because it will be folded into the imm field of the /// instruction. This is also sometimes used for loop-variant values that /// must be added inside the loop. - SCEVHandle Imm; + const SCEV* Imm; /// Phi - The induction variable that performs the striding that /// should be used for this user. @@ -362,13 +362,13 @@ namespace { // Once we rewrite the code to insert the new IVs we want, update the // operands of Inst to use the new expression 'NewBase', with 'Imm' added // to it. - void RewriteInstructionToUseNewBase(const SCEVHandle &NewBase, + void RewriteInstructionToUseNewBase(const SCEV* const &NewBase, Instruction *InsertPt, SCEVExpander &Rewriter, Loop *L, Pass *P, LoopInfo &LI, SmallVectorImpl<WeakVH> &DeadInsts); - Value *InsertCodeForBaseAtPosition(const SCEVHandle &NewBase, + Value *InsertCodeForBaseAtPosition(const SCEV* const &NewBase, const Type *Ty, SCEVExpander &Rewriter, Instruction *IP, Loop *L, @@ -383,7 +383,7 @@ void BasedUser::dump() const { cerr << " Inst: " << *Inst; } -Value *BasedUser::InsertCodeForBaseAtPosition(const SCEVHandle &NewBase, +Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV* const &NewBase, const Type *Ty, SCEVExpander &Rewriter, Instruction *IP, Loop *L, @@ -407,7 +407,7 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEVHandle &NewBase, Value *Base = Rewriter.expandCodeFor(NewBase, 0, BaseInsertPt); - SCEVHandle NewValSCEV = SE->getUnknown(Base); + const SCEV* NewValSCEV = SE->getUnknown(Base); // If there is no immediate value, skip the next part. if (!Imm->isZero()) { @@ -430,7 +430,7 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEVHandle &NewBase, // value of NewBase in the case that it's a diffferent instruction from // the PHI that NewBase is computed from, or null otherwise. // -void BasedUser::RewriteInstructionToUseNewBase(const SCEVHandle &NewBase, +void BasedUser::RewriteInstructionToUseNewBase(const SCEV* const &NewBase, Instruction *NewBasePt, SCEVExpander &Rewriter, Loop *L, Pass *P, LoopInfo &LI, @@ -542,7 +542,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEVHandle &NewBase, /// fitsInAddressMode - Return true if V can be subsumed within an addressing /// mode, and does not need to be put in a register first. -static bool fitsInAddressMode(const SCEVHandle &V, const Type *AccessTy, +static bool fitsInAddressMode(const SCEV* const &V, const Type *AccessTy, const TargetLowering *TLI, bool HasBaseReg) { if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(V)) { int64_t VC = SC->getValue()->getSExtValue(); @@ -574,12 +574,12 @@ static bool fitsInAddressMode(const SCEVHandle &V, const Type *AccessTy, /// MoveLoopVariantsToImmediateField - Move any subexpressions from Val that are /// loop varying to the Imm operand. -static void MoveLoopVariantsToImmediateField(SCEVHandle &Val, SCEVHandle &Imm, +static void MoveLoopVariantsToImmediateField(const SCEV* &Val, const SCEV* &Imm, Loop *L, ScalarEvolution *SE) { if (Val->isLoopInvariant(L)) return; // Nothing to do. if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) { - SmallVector<SCEVHandle, 4> NewOps; + SmallVector<const SCEV*, 4> NewOps; NewOps.reserve(SAE->getNumOperands()); for (unsigned i = 0; i != SAE->getNumOperands(); ++i) @@ -597,10 +597,10 @@ static void MoveLoopVariantsToImmediateField(SCEVHandle &Val, SCEVHandle &Imm, Val = SE->getAddExpr(NewOps); } else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) { // Try to pull immediates out of the start value of nested addrec's. - SCEVHandle Start = SARE->getStart(); + const SCEV* Start = SARE->getStart(); MoveLoopVariantsToImmediateField(Start, Imm, L, SE); - SmallVector<SCEVHandle, 4> Ops(SARE->op_begin(), SARE->op_end()); + SmallVector<const SCEV*, 4> Ops(SARE->op_begin(), SARE->op_end()); Ops[0] = Start; Val = SE->getAddRecExpr(Ops, SARE->getLoop()); } else { @@ -616,15 +616,15 @@ static void MoveLoopVariantsToImmediateField(SCEVHandle &Val, SCEVHandle &Imm, /// Accumulate these immediate values into the Imm value. static void MoveImmediateValues(const TargetLowering *TLI, const Type *AccessTy, - SCEVHandle &Val, SCEVHandle &Imm, + const SCEV* &Val, const SCEV* &Imm, bool isAddress, Loop *L, ScalarEvolution *SE) { if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) { - SmallVector<SCEVHandle, 4> NewOps; + SmallVector<const SCEV*, 4> NewOps; NewOps.reserve(SAE->getNumOperands()); for (unsigned i = 0; i != SAE->getNumOperands(); ++i) { - SCEVHandle NewOp = SAE->getOperand(i); + const SCEV* NewOp = SAE->getOperand(i); MoveImmediateValues(TLI, AccessTy, NewOp, Imm, isAddress, L, SE); if (!NewOp->isLoopInvariant(L)) { @@ -643,11 +643,11 @@ static void MoveImmediateValues(const TargetLowering *TLI, return; } else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) { // Try to pull immediates out of the start value of nested addrec's. - SCEVHandle Start = SARE->getStart(); + const SCEV* Start = SARE->getStart(); MoveImmediateValues(TLI, AccessTy, Start, Imm, isAddress, L, SE); if (Start != SARE->getStart()) { - SmallVector<SCEVHandle, 4> Ops(SARE->op_begin(), SARE->op_end()); + SmallVector<const SCEV*, 4> Ops(SARE->op_begin(), SARE->op_end()); Ops[0] = Start; Val = SE->getAddRecExpr(Ops, SARE->getLoop()); } @@ -658,8 +658,8 @@ static void MoveImmediateValues(const TargetLowering *TLI, fitsInAddressMode(SME->getOperand(0), AccessTy, TLI, false) && SME->getNumOperands() == 2 && SME->isLoopInvariant(L)) { - SCEVHandle SubImm = SE->getIntegerSCEV(0, Val->getType()); - SCEVHandle NewOp = SME->getOperand(1); + const SCEV* SubImm = SE->getIntegerSCEV(0, Val->getType()); + const SCEV* NewOp = SME->getOperand(1); MoveImmediateValues(TLI, AccessTy, NewOp, SubImm, isAddress, L, SE); // If we extracted something out of the subexpressions, see if we can @@ -694,7 +694,7 @@ static void MoveImmediateValues(const TargetLowering *TLI, static void MoveImmediateValues(const TargetLowering *TLI, Instruction *User, - SCEVHandle &Val, SCEVHandle &Imm, + const SCEV* &Val, const SCEV* &Imm, bool isAddress, Loop *L, ScalarEvolution *SE) { const Type *AccessTy = getAccessType(User); @@ -704,19 +704,19 @@ static void MoveImmediateValues(const TargetLowering *TLI, /// SeparateSubExprs - Decompose Expr into all of the subexpressions that are /// added together. This is used to reassociate common addition subexprs /// together for maximal sharing when rewriting bases. -static void SeparateSubExprs(SmallVector<SCEVHandle, 16> &SubExprs, - SCEVHandle Expr, +static void SeparateSubExprs(SmallVector<const SCEV*, 16> &SubExprs, + const SCEV* Expr, ScalarEvolution *SE) { if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(Expr)) { for (unsigned j = 0, e = AE->getNumOperands(); j != e; ++j) SeparateSubExprs(SubExprs, AE->getOperand(j), SE); } else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Expr)) { - SCEVHandle Zero = SE->getIntegerSCEV(0, Expr->getType()); + const SCEV* Zero = SE->getIntegerSCEV(0, Expr->getType()); if (SARE->getOperand(0) == Zero) { SubExprs.push_back(Expr); } else { // Compute the addrec with zero as its base. - SmallVector<SCEVHandle, 4> Ops(SARE->op_begin(), SARE->op_end()); + SmallVector<const SCEV*, 4> Ops(SARE->op_begin(), SARE->op_end()); Ops[0] = Zero; // Start with zero base. SubExprs.push_back(SE->getAddRecExpr(Ops, SARE->getLoop())); @@ -740,7 +740,7 @@ struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; }; /// not remove anything. This looks for things like (a+b+c) and /// (a+c+d) and computes the common (a+c) subexpression. The common expression /// is *removed* from the Bases and returned. -static SCEVHandle +static const SCEV* RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses, ScalarEvolution *SE, Loop *L, const TargetLowering *TLI) { @@ -748,9 +748,9 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses, // Only one use? This is a very common case, so we handle it specially and // cheaply. - SCEVHandle Zero = SE->getIntegerSCEV(0, Uses[0].Base->getType()); - SCEVHandle Result = Zero; - SCEVHandle FreeResult = Zero; + const SCEV* Zero = SE->getIntegerSCEV(0, Uses[0].Base->getType()); + const SCEV* Result = Zero; + const SCEV* FreeResult = Zero; if (NumUses == 1) { // If the use is inside the loop, use its base, regardless of what it is: // it is clearly shared across all the IV's. If the use is outside the loop @@ -766,13 +766,13 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses, // Also track whether all uses of each expression can be moved into an // an addressing mode "for free"; such expressions are left within the loop. // struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; }; - std::map<SCEVHandle, SubExprUseData> SubExpressionUseData; + std::map<const SCEV*, SubExprUseData> SubExpressionUseData; // UniqueSubExprs - Keep track of all of the subexpressions we see in the // order we see them. - SmallVector<SCEVHandle, 16> UniqueSubExprs; + SmallVector<const SCEV*, 16> UniqueSubExprs; - SmallVector<SCEVHandle, 16> SubExprs; + SmallVector<const SCEV*, 16> SubExprs; unsigned NumUsesInsideLoop = 0; for (unsigned i = 0; i != NumUses; ++i) { // If the user is outside the loop, just ignore it for base computation. @@ -816,7 +816,7 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses, // Now that we know how many times each is used, build Result. Iterate over // UniqueSubexprs so that we have a stable ordering. for (unsigned i = 0, e = UniqueSubExprs.size(); i != e; ++i) { - std::map<SCEVHandle, SubExprUseData>::iterator I = + std::map<const SCEV*, SubExprUseData>::iterator I = SubExpressionUseData.find(UniqueSubExprs[i]); assert(I != SubExpressionUseData.end() && "Entry not found?"); if (I->second.Count == NumUsesInsideLoop) { // Found CSE! @@ -860,7 +860,7 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses, if (FreeResult != Zero) { SeparateSubExprs(SubExprs, FreeResult, SE); for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) { - std::map<SCEVHandle, SubExprUseData>::iterator I = + std::map<const SCEV*, SubExprUseData>::iterator I = SubExpressionUseData.find(SubExprs[j]); SubExpressionUseData.erase(I); } @@ -989,10 +989,10 @@ bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty1, /// be folded into the addressing mode, nor even that the factor be constant; /// a multiply (executed once) outside the loop is better than another IV /// within. Well, usually. -SCEVHandle LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, +const SCEV* LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, bool AllUsesAreAddresses, bool AllUsesAreOutsideLoop, - const SCEVHandle &Stride, + const SCEV* const &Stride, IVExpr &IV, const Type *Ty, const std::vector<BasedUser>& UsersToProcess) { if (StrideNoReuse.count(Stride)) @@ -1002,7 +1002,7 @@ SCEVHandle LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, int64_t SInt = SC->getValue()->getSExtValue(); for (unsigned NewStride = 0, e = IU->StrideOrder.size(); NewStride != e; ++NewStride) { - std::map<SCEVHandle, IVsOfOneStride>::iterator SI = + std::map<const SCEV*, IVsOfOneStride>::iterator SI = IVsByStride.find(IU->StrideOrder[NewStride]); if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first) || StrideNoReuse.count(SI->first)) @@ -1055,7 +1055,7 @@ SCEVHandle LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, // an existing IV if we can. for (unsigned NewStride = 0, e = IU->StrideOrder.size(); NewStride != e; ++NewStride) { - std::map<SCEVHandle, IVsOfOneStride>::iterator SI = + std::map<const SCEV*, IVsOfOneStride>::iterator SI = IVsByStride.find(IU->StrideOrder[NewStride]); if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first)) continue; @@ -1075,7 +1075,7 @@ SCEVHandle LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, // -1*old. for (unsigned NewStride = 0, e = IU->StrideOrder.size(); NewStride != e; ++NewStride) { - std::map<SCEVHandle, IVsOfOneStride>::iterator SI = + std::map<const SCEV*, IVsOfOneStride>::iterator SI = IVsByStride.find(IU->StrideOrder[NewStride]); if (SI == IVsByStride.end()) continue; @@ -1104,7 +1104,7 @@ static bool PartitionByIsUseOfPostIncrementedValue(const BasedUser &Val) { /// isNonConstantNegative - Return true if the specified scev is negated, but /// not a constant. -static bool isNonConstantNegative(const SCEVHandle &Expr) { +static bool isNonConstantNegative(const SCEV* const &Expr) { const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Expr); if (!Mul) return false; @@ -1121,7 +1121,7 @@ static bool isNonConstantNegative(const SCEVHandle &Expr) { /// of the strided accesses, as well as the old information from Uses. We /// progressively move information from the Base field to the Imm field, until /// we eventually have the full access expression to rewrite the use. -SCEVHandle LoopStrengthReduce::CollectIVUsers(const SCEVHandle &Stride, +const SCEV* LoopStrengthReduce::CollectIVUsers(const SCEV* const &Stride, IVUsersOfOneStride &Uses, Loop *L, bool &AllUsesAreAddresses, @@ -1152,7 +1152,7 @@ SCEVHandle LoopStrengthReduce::CollectIVUsers(const SCEVHandle &Stride, // for the strides (e.g. if we have "A+C+B" and "A+B+D" as our bases, find // "A+B"), emit it to the preheader, then remove the expression from the // UsersToProcess base values. - SCEVHandle CommonExprs = + const SCEV* CommonExprs = RemoveCommonExpressionsFromUseBases(UsersToProcess, SE, L, TLI); // Next, figure out what we can represent in the immediate fields of @@ -1218,7 +1218,7 @@ bool LoopStrengthReduce::ShouldUseFullStrengthReductionMode( const std::vector<BasedUser> &UsersToProcess, const Loop *L, bool AllUsesAreAddresses, - SCEVHandle Stride) { + const SCEV* Stride) { if (!EnableFullLSRMode) return false; @@ -1255,7 +1255,7 @@ bool LoopStrengthReduce::ShouldUseFullStrengthReductionMode( if (!Imm) Imm = SE->getIntegerSCEV(0, Stride->getType()); const Instruction *Inst = UsersToProcess[i].Inst; const Type *AccessTy = getAccessType(Inst); - SCEVHandle Diff = SE->getMinusSCEV(UsersToProcess[i].Imm, Imm); + const SCEV* Diff = SE->getMinusSCEV(UsersToProcess[i].Imm, Imm); if (!Diff->isZero() && (!AllUsesAreAddresses || !fitsInAddressMode(Diff, AccessTy, TLI, /*HasBaseReg=*/true))) @@ -1289,7 +1289,7 @@ bool LoopStrengthReduce::ShouldUseFullStrengthReductionMode( /// /// Return the created phi node. /// -static PHINode *InsertAffinePhi(SCEVHandle Start, SCEVHandle Step, +static PHINode *InsertAffinePhi(const SCEV* Start, const SCEV* Step, Instruction *IVIncInsertPt, const Loop *L, SCEVExpander &Rewriter) { @@ -1309,7 +1309,7 @@ static PHINode *InsertAffinePhi(SCEVHandle Start, SCEVHandle Step, // If the stride is negative, insert a sub instead of an add for the // increment. bool isNegative = isNonConstantNegative(Step); - SCEVHandle IncAmount = Step; + const SCEV* IncAmount = Step; if (isNegative) IncAmount = Rewriter.SE.getNegativeSCEV(Step); @@ -1348,13 +1348,13 @@ static void SortUsersToProcess(std::vector<BasedUser> &UsersToProcess) { // loop before users outside of the loop with a particular base. // // We would like to use stable_sort here, but we can't. The problem is that - // SCEVHandle's don't have a deterministic ordering w.r.t to each other, so + // const SCEV*'s don't have a deterministic ordering w.r.t to each other, so // we don't have anything to do a '<' comparison on. Because we think the // number of uses is small, do a horrible bubble sort which just relies on // ==. for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) { // Get a base value. - SCEVHandle Base = UsersToProcess[i].Base; + const SCEV* Base = UsersToProcess[i].Base; // Compact everything with this base to be consecutive with this one. for (unsigned j = i+1; j != e; ++j) { @@ -1373,8 +1373,8 @@ static void SortUsersToProcess(std::vector<BasedUser> &UsersToProcess) { void LoopStrengthReduce::PrepareToStrengthReduceFully( std::vector<BasedUser> &UsersToProcess, - SCEVHandle Stride, - SCEVHandle CommonExprs, + const SCEV* Stride, + const SCEV* CommonExprs, const Loop *L, SCEVExpander &PreheaderRewriter) { DOUT << " Fully reducing all users\n"; @@ -1386,9 +1386,9 @@ LoopStrengthReduce::PrepareToStrengthReduceFully( // TODO: The uses are grouped by base, but not sorted. We arbitrarily // pick the first Imm value here to start with, and adjust it for the // other uses. - SCEVHandle Imm = UsersToProcess[i].Imm; - SCEVHandle Base = UsersToProcess[i].Base; - SCEVHandle Start = SE->getAddExpr(CommonExprs, Base, Imm); + const SCEV* Imm = UsersToProcess[i].Imm; + const SCEV* Base = UsersToProcess[i].Base; + const SCEV* Start = SE->getAddExpr(CommonExprs, Base, Imm); PHINode *Phi = InsertAffinePhi(Start, Stride, IVIncInsertPt, L, PreheaderRewriter); // Loop over all the users with the same base. @@ -1420,8 +1420,8 @@ static Instruction *FindIVIncInsertPt(std::vector<BasedUser> &UsersToProcess, void LoopStrengthReduce::PrepareToStrengthReduceWithNewPhi( std::vector<BasedUser> &UsersToProcess, - SCEVHandle Stride, - SCEVHandle CommonExprs, + const SCEV* Stride, + const SCEV* CommonExprs, Value *CommonBaseV, Instruction *IVIncInsertPt, const Loop *L, @@ -1497,7 +1497,7 @@ static bool IsImmFoldedIntoAddrMode(GlobalValue *GV, int64_t Offset, /// StrengthReduceStridedIVUsers - Strength reduce all of the users of a single /// stride of IV. All of the users may have different starting values, and this /// may not be the only stride. -void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, +void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride, IVUsersOfOneStride &Uses, Loop *L) { // If all the users are moved to another stride, then there is nothing to do. @@ -1520,7 +1520,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, // move information from the Base field to the Imm field, until we eventually // have the full access expression to rewrite the use. std::vector<BasedUser> UsersToProcess; - SCEVHandle CommonExprs = CollectIVUsers(Stride, Uses, L, AllUsesAreAddresses, + const SCEV* CommonExprs = CollectIVUsers(Stride, Uses, L, AllUsesAreAddresses, AllUsesAreOutsideLoop, UsersToProcess); @@ -1538,8 +1538,8 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, // If all uses are addresses, consider sinking the immediate part of the // common expression back into uses if they can fit in the immediate fields. if (TLI && HaveCommonExprs && AllUsesAreAddresses) { - SCEVHandle NewCommon = CommonExprs; - SCEVHandle Imm = SE->getIntegerSCEV(0, ReplacedTy); + const SCEV* NewCommon = CommonExprs; + const SCEV* Imm = SE->getIntegerSCEV(0, ReplacedTy); MoveImmediateValues(TLI, Type::VoidTy, NewCommon, Imm, true, L, SE); if (!Imm->isZero()) { bool DoSink = true; @@ -1585,7 +1585,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, Value *CommonBaseV = Constant::getNullValue(ReplacedTy); - SCEVHandle RewriteFactor = SE->getIntegerSCEV(0, ReplacedTy); + const SCEV* RewriteFactor = SE->getIntegerSCEV(0, ReplacedTy); IVExpr ReuseIV(SE->getIntegerSCEV(0, Type::Int32Ty), SE->getIntegerSCEV(0, Type::Int32Ty), 0); @@ -1625,7 +1625,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, // strength-reduced forms. This outer loop handles all bases, the inner // loop handles all users of a particular base. while (!UsersToProcess.empty()) { - SCEVHandle Base = UsersToProcess.back().Base; + const SCEV* Base = UsersToProcess.back().Base; Instruction *Inst = UsersToProcess.back().Inst; // Emit the code for Base into the preheader. @@ -1679,7 +1679,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, User.Inst->moveBefore(IVIncInsertPt); } - SCEVHandle RewriteExpr = SE->getUnknown(RewriteOp); + const SCEV* RewriteExpr = SE->getUnknown(RewriteOp); if (SE->getEffectiveSCEVType(RewriteOp->getType()) != SE->getEffectiveSCEVType(ReplacedTy)) { @@ -1711,7 +1711,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, // The base has been used to initialize the PHI node but we don't want // it here. if (!ReuseIV.Base->isZero()) { - SCEVHandle typedBase = ReuseIV.Base; + const SCEV* typedBase = ReuseIV.Base; if (SE->getEffectiveSCEVType(RewriteExpr->getType()) != SE->getEffectiveSCEVType(ReuseIV.Base->getType())) { // It's possible the original IV is a larger type than the new IV, @@ -1776,10 +1776,10 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, /// set the IV user and stride information and return true, otherwise return /// false. bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse, - const SCEVHandle *&CondStride) { + const SCEV* const * &CondStride) { for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e && !CondUse; ++Stride) { - std::map<SCEVHandle, IVUsersOfOneStride *>::iterator SI = + std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI = IU->IVUsesByStride.find(IU->StrideOrder[Stride]); assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); @@ -1806,7 +1806,7 @@ namespace { const ScalarEvolution *SE; explicit StrideCompare(const ScalarEvolution *se) : SE(se) {} - bool operator()(const SCEVHandle &LHS, const SCEVHandle &RHS) { + bool operator()(const SCEV* const &LHS, const SCEV* const &RHS) { const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS); const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS); if (LHSC && RHSC) { @@ -1849,14 +1849,14 @@ namespace { /// if (v1 < 30) goto loop ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, IVStrideUse* &CondUse, - const SCEVHandle* &CondStride) { + const SCEV* const* &CondStride) { // If there's only one stride in the loop, there's nothing to do here. if (IU->StrideOrder.size() < 2) return Cond; // If there are other users of the condition's stride, don't bother // trying to change the condition because the stride will still // remain. - std::map<SCEVHandle, IVUsersOfOneStride *>::iterator I = + std::map<const SCEV*, IVUsersOfOneStride *>::iterator I = IU->IVUsesByStride.find(*CondStride); if (I == IU->IVUsesByStride.end() || I->second->Users.size() != 1) @@ -1873,11 +1873,11 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, const Type *NewCmpTy = NULL; unsigned TyBits = SE->getTypeSizeInBits(CmpTy); unsigned NewTyBits = 0; - SCEVHandle *NewStride = NULL; + const SCEV* *NewStride = NULL; Value *NewCmpLHS = NULL; Value *NewCmpRHS = NULL; int64_t Scale = 1; - SCEVHandle NewOffset = SE->getIntegerSCEV(0, CmpTy); + const SCEV* NewOffset = SE->getIntegerSCEV(0, CmpTy); if (ConstantInt *C = dyn_cast<ConstantInt>(Cond->getOperand(1))) { int64_t CmpVal = C->getValue().getSExtValue(); @@ -1889,7 +1889,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, // Look for a suitable stride / iv as replacement. for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { - std::map<SCEVHandle, IVUsersOfOneStride *>::iterator SI = + std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI = IU->IVUsesByStride.find(IU->StrideOrder[i]); if (!isa<SCEVConstant>(SI->first)) continue; @@ -1969,7 +1969,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, bool AllUsesAreAddresses = true; bool AllUsesAreOutsideLoop = true; std::vector<BasedUser> UsersToProcess; - SCEVHandle CommonExprs = CollectIVUsers(SI->first, *SI->second, L, + const SCEV* CommonExprs = CollectIVUsers(SI->first, *SI->second, L, AllUsesAreAddresses, AllUsesAreOutsideLoop, UsersToProcess); @@ -2104,13 +2104,13 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond, SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1)); if (!Sel || !Sel->hasOneUse()) return Cond; - SCEVHandle BackedgeTakenCount = SE->getBackedgeTakenCount(L); + const SCEV* BackedgeTakenCount = SE->getBackedgeTakenCount(L); if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) return Cond; - SCEVHandle One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType()); + const SCEV* One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType()); // Add one to the backedge-taken count to get the trip count. - SCEVHandle IterationCount = SE->getAddExpr(BackedgeTakenCount, One); + const SCEV* IterationCount = SE->getAddExpr(BackedgeTakenCount, One); // Check for a max calculation that matches the pattern. if (!isa<SCEVSMaxExpr>(IterationCount) && !isa<SCEVUMaxExpr>(IterationCount)) @@ -2123,13 +2123,13 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond, if (Max->getNumOperands() != 2) return Cond; - SCEVHandle MaxLHS = Max->getOperand(0); - SCEVHandle MaxRHS = Max->getOperand(1); + const SCEV* MaxLHS = Max->getOperand(0); + const SCEV* MaxRHS = Max->getOperand(1); if (!MaxLHS || MaxLHS != One) return Cond; // Check the relevant induction variable for conformance to // the pattern. - SCEVHandle IV = SE->getSCEV(Cond->getOperand(0)); + const SCEV* IV = SE->getSCEV(Cond->getOperand(0)); const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV); if (!AR || !AR->isAffine() || AR->getStart() != One || @@ -2175,13 +2175,13 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond, /// inside the loop then try to eliminate the cast opeation. void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { - SCEVHandle BackedgeTakenCount = SE->getBackedgeTakenCount(L); + const SCEV* BackedgeTakenCount = SE->getBackedgeTakenCount(L); if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) return; for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e; ++Stride) { - std::map<SCEVHandle, IVUsersOfOneStride *>::iterator SI = + std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI = IU->IVUsesByStride.find(IU->StrideOrder[Stride]); assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); if (!isa<SCEVConstant>(SI->first)) @@ -2311,7 +2311,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { // Search IVUsesByStride to find Cond's IVUse if there is one. IVStrideUse *CondUse = 0; - const SCEVHandle *CondStride = 0; + const SCEV* const *CondStride = 0; ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition()); if (!FindIVUserForCond(Cond, CondUse, CondStride)) return; // setcc doesn't use the IV. @@ -2341,7 +2341,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { int64_t SInt = SC->getValue()->getSExtValue(); for (unsigned NewStride = 0, ee = IU->StrideOrder.size(); NewStride != ee; ++NewStride) { - std::map<SCEVHandle, IVUsersOfOneStride *>::iterator SI = + std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI = IU->IVUsesByStride.find(IU->StrideOrder[NewStride]); if (!isa<SCEVConstant>(SI->first) || SI->first == *CondStride) continue; @@ -2355,7 +2355,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { bool AllUsesAreAddresses = true; bool AllUsesAreOutsideLoop = true; std::vector<BasedUser> UsersToProcess; - SCEVHandle CommonExprs = CollectIVUsers(SI->first, *SI->second, L, + const SCEV* CommonExprs = CollectIVUsers(SI->first, *SI->second, L, AllUsesAreAddresses, AllUsesAreOutsideLoop, UsersToProcess); @@ -2416,7 +2416,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) { // If the number of times the loop is executed isn't computable, give up. - SCEVHandle BackedgeTakenCount = SE->getBackedgeTakenCount(L); + const SCEV* BackedgeTakenCount = SE->getBackedgeTakenCount(L); if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) return; @@ -2445,9 +2445,9 @@ void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) { // Handle only tests for equality for the moment, and only stride 1. if (Cond->getPredicate() != CmpInst::ICMP_EQ) return; - SCEVHandle IV = SE->getSCEV(Cond->getOperand(0)); + const SCEV* IV = SE->getSCEV(Cond->getOperand(0)); const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV); - SCEVHandle One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType()); + const SCEV* One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType()); if (!AR || !AR->isAffine() || AR->getStepRecurrence(*SE) != One) return; // If the RHS of the comparison is defined inside the loop, the rewrite @@ -2563,7 +2563,7 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { // strides deterministic - not dependent on map order. for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e; ++Stride) { - std::map<SCEVHandle, IVUsersOfOneStride *>::iterator SI = + std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI = IU->IVUsesByStride.find(IU->StrideOrder[Stride]); assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); // FIXME: Generalize to non-affine IV's. |