diff options
Diffstat (limited to 'contrib/llvm/lib')
464 files changed, 33984 insertions, 12166 deletions
diff --git a/contrib/llvm/lib/Analysis/Analysis.cpp b/contrib/llvm/lib/Analysis/Analysis.cpp index 66e416cd..349c417 100644 --- a/contrib/llvm/lib/Analysis/Analysis.cpp +++ b/contrib/llvm/lib/Analysis/Analysis.cpp @@ -11,6 +11,8 @@ #include "llvm-c/Initialization.h" #include "llvm/Analysis/Verifier.h" #include "llvm/InitializePasses.h" +#include "llvm/IR/Module.h" +#include "llvm/PassRegistry.h" #include <cstring> using namespace llvm; diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp index ae6da1a..f8509dd 100644 --- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -88,7 +88,7 @@ static uint64_t getObjectSize(const Value *V, const DataLayout &TD, const TargetLibraryInfo &TLI, bool RoundToAlign = false) { uint64_t Size; - if (getUnderlyingObjectSize(V, Size, &TD, &TLI, RoundToAlign)) + if (getObjectSize(V, Size, &TD, &TLI, RoundToAlign)) return Size; return AliasAnalysis::UnknownSize; } @@ -98,6 +98,35 @@ static uint64_t getObjectSize(const Value *V, const DataLayout &TD, static bool isObjectSmallerThan(const Value *V, uint64_t Size, const DataLayout &TD, const TargetLibraryInfo &TLI) { + // Note that the meanings of the "object" are slightly different in the + // following contexts: + // c1: llvm::getObjectSize() + // c2: llvm.objectsize() intrinsic + // c3: isObjectSmallerThan() + // c1 and c2 share the same meaning; however, the meaning of "object" in c3 + // refers to the "entire object". + // + // Consider this example: + // char *p = (char*)malloc(100) + // char *q = p+80; + // + // In the context of c1 and c2, the "object" pointed by q refers to the + // stretch of memory of q[0:19]. So, getObjectSize(q) should return 20. + // + // However, in the context of c3, the "object" refers to the chunk of memory + // being allocated. So, the "object" has 100 bytes, and q points to the middle + // the "object". In case q is passed to isObjectSmallerThan() as the 1st + // parameter, before the llvm::getObjectSize() is called to get the size of + // entire object, we should: + // - either rewind the pointer q to the base-address of the object in + // question (in this case rewind to p), or + // - just give up. It is up to caller to make sure the pointer is pointing + // to the base address the object. + // + // We go for 2nd option for simplicity. + if (!isIdentifiedObject(V)) + return false; + // This function needs to use the aligned object size because we allow // reads a bit past the end given sufficient alignment. uint64_t ObjectSize = getObjectSize(V, TD, TLI, /*RoundToAlign*/true); diff --git a/contrib/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm/lib/Analysis/ConstantFolding.cpp index 09d7608..bc0dffc 100644 --- a/contrib/llvm/lib/Analysis/ConstantFolding.cpp +++ b/contrib/llvm/lib/Analysis/ConstantFolding.cpp @@ -17,6 +17,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/Analysis/ValueTracking.h" @@ -550,7 +551,7 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, if (Opc == Instruction::And && DL) { - unsigned BitWidth = DL->getTypeSizeInBits(Op0->getType()); + unsigned BitWidth = DL->getTypeSizeInBits(Op0->getType()->getScalarType()); APInt KnownZero0(BitWidth, 0), KnownOne0(BitWidth, 0); APInt KnownZero1(BitWidth, 0), KnownOne1(BitWidth, 0); ComputeMaskedBits(Op0, KnownZero0, KnownOne0, DL); @@ -880,19 +881,20 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD, TLI); } -/// ConstantFoldConstantExpression - Attempt to fold the constant expression -/// using the specified DataLayout. If successful, the constant result is -/// result is returned, if not, null is returned. -Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE, - const DataLayout *TD, - const TargetLibraryInfo *TLI) { - SmallVector<Constant*, 8> Ops; - for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end(); - i != e; ++i) { +static Constant * +ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout *TD, + const TargetLibraryInfo *TLI, + SmallPtrSet<ConstantExpr *, 4> &FoldedOps) { + SmallVector<Constant *, 8> Ops; + for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; + ++i) { Constant *NewC = cast<Constant>(*i); - // Recursively fold the ConstantExpr's operands. - if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC)) - NewC = ConstantFoldConstantExpression(NewCE, TD, TLI); + // Recursively fold the ConstantExpr's operands. If we have already folded + // a ConstantExpr, we don't have to process it again. + if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC)) { + if (FoldedOps.insert(NewCE)) + NewC = ConstantFoldConstantExpressionImpl(NewCE, TD, TLI, FoldedOps); + } Ops.push_back(NewC); } @@ -902,6 +904,16 @@ Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE, return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, TD, TLI); } +/// ConstantFoldConstantExpression - Attempt to fold the constant expression +/// using the specified DataLayout. If successful, the constant result is +/// result is returned, if not, null is returned. +Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE, + const DataLayout *TD, + const TargetLibraryInfo *TLI) { + SmallPtrSet<ConstantExpr *, 4> FoldedOps; + return ConstantFoldConstantExpressionImpl(CE, TD, TLI, FoldedOps); +} + /// ConstantFoldInstOperands - Attempt to constant fold an instruction with the /// specified opcode and operands. If successful, the constant result is /// returned, if not, null is returned. Note that this function can fail when diff --git a/contrib/llvm/lib/Analysis/IPA/IPA.cpp b/contrib/llvm/lib/Analysis/IPA/IPA.cpp index aa5164e..1c1816d 100644 --- a/contrib/llvm/lib/Analysis/IPA/IPA.cpp +++ b/contrib/llvm/lib/Analysis/IPA/IPA.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/InitializePasses.h" +#include "llvm/PassRegistry.h" #include "llvm-c/Initialization.h" using namespace llvm; diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp index 4a3c74e..bf77451 100644 --- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp @@ -1711,7 +1711,7 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred, // subobject at its beginning) or function, both are pointers to one past the // last element of the same array object, or one is a pointer to one past the // end of one array object and the other is a pointer to the start of a -// different array object that happens to immediately follow the first array +// different array object that happens to immediately follow the first array // object in the address space.) // // C11's version is more restrictive, however there's no reason why an argument diff --git a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp index d490d54..9c0d8ac 100644 --- a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -364,26 +364,6 @@ bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout *TD, return true; } -/// \brief Compute the size of the underlying object pointed by Ptr. Returns -/// true and the object size in Size if successful, and false otherwise. -/// If RoundToAlign is true, then Size is rounded up to the aligment of allocas, -/// byval arguments, and global variables. -bool llvm::getUnderlyingObjectSize(const Value *Ptr, uint64_t &Size, - const DataLayout *TD, - const TargetLibraryInfo *TLI, - bool RoundToAlign) { - if (!TD) - return false; - - ObjectSizeOffsetVisitor Visitor(TD, TLI, Ptr->getContext(), RoundToAlign); - SizeOffsetType Data = Visitor.compute(const_cast<Value*>(Ptr)); - if (!Visitor.knownSize(Data)) - return false; - - Size = Data.first.getZExtValue(); - return true; -} - STATISTIC(ObjectVisitorArgument, "Number of arguments with unsolved size and offset"); @@ -409,23 +389,16 @@ ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *TD, SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { V = V->stripPointerCasts(); + if (Instruction *I = dyn_cast<Instruction>(V)) { + // If we have already seen this instruction, bail out. Cycles can happen in + // unreachable code after constant propagation. + if (!SeenInsts.insert(I)) + return unknown(); - if (isa<Instruction>(V) || isa<GEPOperator>(V)) { - // Return cached value or insert unknown in cache if size of V was not - // computed yet in order to avoid recursions in PHis. - std::pair<CacheMapTy::iterator, bool> CacheVal = - CacheMap.insert(std::make_pair(V, unknown())); - if (!CacheVal.second) - return CacheVal.first->second; - - SizeOffsetType Result; if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) - Result = visitGEPOperator(*GEP); - else - Result = visit(cast<Instruction>(*V)); - return CacheMap[V] = Result; + return visitGEPOperator(*GEP); + return visit(*I); } - if (Argument *A = dyn_cast<Argument>(V)) return visitArgument(*A); if (ConstantPointerNull *P = dyn_cast<ConstantPointerNull>(V)) @@ -439,6 +412,8 @@ SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { if (CE->getOpcode() == Instruction::IntToPtr) return unknown(); // clueless + if (CE->getOpcode() == Instruction::GetElementPtr) + return visitGEPOperator(cast<GEPOperator>(*CE)); } DEBUG(dbgs() << "ObjectSizeOffsetVisitor::compute() unhandled value: " << *V @@ -572,21 +547,9 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitLoadInst(LoadInst&) { return unknown(); } -SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode &PHI) { - if (PHI.getNumIncomingValues() == 0) - return unknown(); - - SizeOffsetType Ret = compute(PHI.getIncomingValue(0)); - if (!bothKnown(Ret)) - return unknown(); - - // Verify that all PHI incoming pointers have the same size and offset. - for (unsigned i = 1, e = PHI.getNumIncomingValues(); i != e; ++i) { - SizeOffsetType EdgeData = compute(PHI.getIncomingValue(i)); - if (!bothKnown(EdgeData) || EdgeData != Ret) - return unknown(); - } - return Ret; +SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode&) { + // too complex to analyze statically. + return unknown(); } SizeOffsetType ObjectSizeOffsetVisitor::visitSelectInst(SelectInst &I) { diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index 2240e9d..c0009cb 100644 --- a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -47,9 +47,7 @@ STATISTIC(NumCacheCompleteNonLocalPtr, "Number of block queries that were completely cached"); // Limit for the number of instructions to scan in a block. -// FIXME: Figure out what a sane value is for this. -// (500 is relatively insane.) -static const int BlockScanLimit = 500; +static const int BlockScanLimit = 100; char MemoryDependenceAnalysis::ID = 0; @@ -913,7 +911,6 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, SmallVectorImpl<NonLocalDepResult> &Result, DenseMap<BasicBlock*, Value*> &Visited, bool SkipFirstBlock) { - // Look up the cached info for Pointer. ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad); @@ -1001,8 +998,17 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end(); I != E; ++I) { Visited.insert(std::make_pair(I->getBB(), Addr)); - if (!I->getResult().isNonLocal() && DT->isReachableFromEntry(I->getBB())) + if (I->getResult().isNonLocal()) { + continue; + } + + if (!DT) { + Result.push_back(NonLocalDepResult(I->getBB(), + MemDepResult::getUnknown(), + Addr)); + } else if (DT->isReachableFromEntry(I->getBB())) { Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(), Addr)); + } } ++NumCacheCompleteNonLocalPtr; return false; @@ -1047,9 +1053,16 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, NumSortedEntries); // If we got a Def or Clobber, add this to the list of results. - if (!Dep.isNonLocal() && DT->isReachableFromEntry(BB)) { - Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr())); - continue; + if (!Dep.isNonLocal()) { + if (!DT) { + Result.push_back(NonLocalDepResult(BB, + MemDepResult::getUnknown(), + Pointer.getAddr())); + continue; + } else if (DT->isReachableFromEntry(BB)) { + Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr())); + continue; + } } } diff --git a/contrib/llvm/lib/Analysis/RegionInfo.cpp b/contrib/llvm/lib/Analysis/RegionInfo.cpp index fad5074..8577025 100644 --- a/contrib/llvm/lib/Analysis/RegionInfo.cpp +++ b/contrib/llvm/lib/Analysis/RegionInfo.cpp @@ -79,10 +79,43 @@ void Region::replaceExit(BasicBlock *BB) { exit = BB; } +void Region::replaceEntryRecursive(BasicBlock *NewEntry) { + std::vector<Region *> RegionQueue; + BasicBlock *OldEntry = getEntry(); + + RegionQueue.push_back(this); + while (!RegionQueue.empty()) { + Region *R = RegionQueue.back(); + RegionQueue.pop_back(); + + R->replaceEntry(NewEntry); + for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI) + if ((*RI)->getEntry() == OldEntry) + RegionQueue.push_back(*RI); + } +} + +void Region::replaceExitRecursive(BasicBlock *NewExit) { + std::vector<Region *> RegionQueue; + BasicBlock *OldExit = getExit(); + + RegionQueue.push_back(this); + while (!RegionQueue.empty()) { + Region *R = RegionQueue.back(); + RegionQueue.pop_back(); + + R->replaceExit(NewExit); + for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI) + if ((*RI)->getExit() == OldExit) + RegionQueue.push_back(*RI); + } +} + bool Region::contains(const BasicBlock *B) const { BasicBlock *BB = const_cast<BasicBlock*>(B); - assert(DT->getNode(BB) && "BB not part of the dominance tree"); + if (!DT->getNode(BB)) + return false; BasicBlock *entry = getEntry(), *exit = getExit(); diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp index 6ea915f..f876748 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp @@ -3937,10 +3937,19 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { /// before taking the branch. For loops with multiple exits, it may not be the /// number times that the loop header executes because the loop may exit /// prematurely via another branch. +/// +/// FIXME: We conservatively call getBackedgeTakenCount(L) instead of +/// getExitCount(L, ExitingBlock) to compute a safe trip count considering all +/// loop exits. getExitCount() may return an exact count for this branch +/// assuming no-signed-wrap. The number of well-defined iterations may actually +/// be higher than this trip count if this exit test is skipped and the loop +/// exits via a different branch. Ideally, getExitCount() would know whether it +/// depends on a NSW assumption, and we would only fall back to a conservative +/// trip count in that case. unsigned ScalarEvolution:: -getSmallConstantTripCount(Loop *L, BasicBlock *ExitingBlock) { +getSmallConstantTripCount(Loop *L, BasicBlock */*ExitingBlock*/) { const SCEVConstant *ExitCount = - dyn_cast<SCEVConstant>(getExitCount(L, ExitingBlock)); + dyn_cast<SCEVConstant>(getBackedgeTakenCount(L)); if (!ExitCount) return 0; @@ -3967,8 +3976,8 @@ getSmallConstantTripCount(Loop *L, BasicBlock *ExitingBlock) { /// As explained in the comments for getSmallConstantTripCount, this assumes /// that control exits the loop via ExitingBlock. unsigned ScalarEvolution:: -getSmallConstantTripMultiple(Loop *L, BasicBlock *ExitingBlock) { - const SCEV *ExitCount = getExitCount(L, ExitingBlock); +getSmallConstantTripMultiple(Loop *L, BasicBlock */*ExitingBlock*/) { + const SCEV *ExitCount = getBackedgeTakenCount(L); if (ExitCount == getCouldNotCompute()) return 1; @@ -3997,7 +4006,7 @@ getSmallConstantTripMultiple(Loop *L, BasicBlock *ExitingBlock) { } // getExitCount - Get the expression for the number of loop iterations for which -// this loop is guaranteed not to exit via ExitintBlock. Otherwise return +// this loop is guaranteed not to exit via ExitingBlock. Otherwise return // SCEVCouldNotCompute. const SCEV *ScalarEvolution::getExitCount(Loop *L, BasicBlock *ExitingBlock) { return getBackedgeTakenInfo(L).getExact(ExitingBlock, this); @@ -4382,26 +4391,36 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) { // Proceed to the next level to examine the exit condition expression. return ComputeExitLimitFromCond(L, ExitBr->getCondition(), ExitBr->getSuccessor(0), - ExitBr->getSuccessor(1)); + ExitBr->getSuccessor(1), + /*IsSubExpr=*/false); } /// ComputeExitLimitFromCond - Compute the number of times the /// backedge of the specified loop will execute if its exit condition /// were a conditional branch of ExitCond, TBB, and FBB. +/// +/// @param IsSubExpr is true if ExitCond does not directly control the exit +/// branch. In this case, we cannot assume that the loop only exits when the +/// condition is true and cannot infer that failing to meet the condition prior +/// to integer wraparound results in undefined behavior. ScalarEvolution::ExitLimit ScalarEvolution::ComputeExitLimitFromCond(const Loop *L, Value *ExitCond, BasicBlock *TBB, - BasicBlock *FBB) { + BasicBlock *FBB, + bool IsSubExpr) { // Check if the controlling expression for this loop is an And or Or. if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) { if (BO->getOpcode() == Instruction::And) { // Recurse on the operands of the and. - ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB); - ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB); + bool EitherMayExit = L->contains(TBB); + ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB, + IsSubExpr || EitherMayExit); + ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB, + IsSubExpr || EitherMayExit); const SCEV *BECount = getCouldNotCompute(); const SCEV *MaxBECount = getCouldNotCompute(); - if (L->contains(TBB)) { + if (EitherMayExit) { // Both conditions must be true for the loop to continue executing. // Choose the less conservative count. if (EL0.Exact == getCouldNotCompute() || @@ -4429,11 +4448,14 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L, } if (BO->getOpcode() == Instruction::Or) { // Recurse on the operands of the or. - ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB); - ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB); + bool EitherMayExit = L->contains(FBB); + ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB, + IsSubExpr || EitherMayExit); + ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB, + IsSubExpr || EitherMayExit); const SCEV *BECount = getCouldNotCompute(); const SCEV *MaxBECount = getCouldNotCompute(); - if (L->contains(FBB)) { + if (EitherMayExit) { // Both conditions must be false for the loop to continue executing. // Choose the less conservative count. if (EL0.Exact == getCouldNotCompute() || @@ -4464,7 +4486,7 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L, // With an icmp, it may be feasible to compute an exact backedge-taken count. // Proceed to the next level to examine the icmp. if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond)) - return ComputeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB); + return ComputeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, IsSubExpr); // Check for a constant condition. These are normally stripped out by // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to @@ -4490,7 +4512,8 @@ ScalarEvolution::ExitLimit ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L, ICmpInst *ExitCond, BasicBlock *TBB, - BasicBlock *FBB) { + BasicBlock *FBB, + bool IsSubExpr) { // If the condition was exit on true, convert the condition to exit on false ICmpInst::Predicate Cond; @@ -4542,7 +4565,7 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L, switch (Cond) { case ICmpInst::ICMP_NE: { // while (X != Y) // Convert to: while (X-Y != 0) - ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L); + ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, IsSubExpr); if (EL.hasAnyInfo()) return EL; break; } @@ -4553,24 +4576,24 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L, break; } case ICmpInst::ICMP_SLT: { - ExitLimit EL = HowManyLessThans(LHS, RHS, L, true); + ExitLimit EL = HowManyLessThans(LHS, RHS, L, true, IsSubExpr); if (EL.hasAnyInfo()) return EL; break; } case ICmpInst::ICMP_SGT: { ExitLimit EL = HowManyLessThans(getNotSCEV(LHS), - getNotSCEV(RHS), L, true); + getNotSCEV(RHS), L, true, IsSubExpr); if (EL.hasAnyInfo()) return EL; break; } case ICmpInst::ICMP_ULT: { - ExitLimit EL = HowManyLessThans(LHS, RHS, L, false); + ExitLimit EL = HowManyLessThans(LHS, RHS, L, false, IsSubExpr); if (EL.hasAnyInfo()) return EL; break; } case ICmpInst::ICMP_UGT: { ExitLimit EL = HowManyLessThans(getNotSCEV(LHS), - getNotSCEV(RHS), L, false); + getNotSCEV(RHS), L, false, IsSubExpr); if (EL.hasAnyInfo()) return EL; break; } @@ -5439,7 +5462,7 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { /// effectively V != 0. We know and take advantage of the fact that this /// expression only being used in a comparison by zero context. ScalarEvolution::ExitLimit -ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { +ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr) { // If the value is a constant if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) { // If the value is already zero, the branch will execute zero times. @@ -5537,19 +5560,20 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { } // If the recurrence is known not to wraparound, unsigned divide computes the - // back edge count. We know that the value will either become zero (and thus - // the loop terminates), that the loop will terminate through some other exit - // condition first, or that the loop has undefined behavior. This means - // we can't "miss" the exit value, even with nonunit stride. + // back edge count. (Ideally we would have an "isexact" bit for udiv). We know + // that the value will either become zero (and thus the loop terminates), that + // the loop will terminate through some other exit condition first, or that + // the loop has undefined behavior. This means we can't "miss" the exit + // value, even with nonunit stride. // - // FIXME: Prove that loops always exhibits *acceptable* undefined - // behavior. Loops must exhibit defined behavior until a wrapped value is - // actually used. So the trip count computed by udiv could be smaller than the - // number of well-defined iterations. - if (AddRec->getNoWrapFlags(SCEV::FlagNW)) { - // FIXME: We really want an "isexact" bit for udiv. + // This is only valid for expressions that directly compute the loop exit. It + // is invalid for subexpressions in which the loop may exit through this + // branch even if this subexpression is false. In that case, the trip count + // computed by this udiv could be smaller than the number of well-defined + // iterations. + if (!IsSubExpr && AddRec->getNoWrapFlags(SCEV::FlagNW)) return getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step); - } + // Then, try to solve the above equation provided that Start is constant. if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start)) return SolveLinEquationWithOverflow(StepC->getValue()->getValue(), @@ -6315,9 +6339,14 @@ const SCEV *ScalarEvolution::getBECount(const SCEV *Start, /// HowManyLessThans - Return the number of times a backedge containing the /// specified less-than comparison will execute. If not computable, return /// CouldNotCompute. +/// +/// @param IsSubExpr is true when the LHS < RHS condition does not directly +/// control the branch. In this case, we can only compute an iteration count for +/// a subexpression that cannot overflow before evaluating true. ScalarEvolution::ExitLimit ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, - const Loop *L, bool isSigned) { + const Loop *L, bool isSigned, + bool IsSubExpr) { // Only handle: "ADDREC < LoopInvariant". if (!isLoopInvariant(RHS, L)) return getCouldNotCompute(); @@ -6326,10 +6355,12 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, return getCouldNotCompute(); // Check to see if we have a flag which makes analysis easy. - bool NoWrap = isSigned ? - AddRec->getNoWrapFlags((SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNW)) : - AddRec->getNoWrapFlags((SCEV::NoWrapFlags)(SCEV::FlagNUW | SCEV::FlagNW)); - + bool NoWrap = false; + if (!IsSubExpr) { + NoWrap = AddRec->getNoWrapFlags( + (SCEV::NoWrapFlags)(((isSigned ? SCEV::FlagNSW : SCEV::FlagNUW)) + | SCEV::FlagNW)); + } if (AddRec->isAffine()) { unsigned BitWidth = getTypeSizeInBits(AddRec->getType()); const SCEV *Step = AddRec->getStepRecurrence(*this); diff --git a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp index 68e43b2..bbf3c3a 100644 --- a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -71,6 +71,7 @@ using namespace llvm; // achieved by stripping the !tbaa tags from IR, but this option is sometimes // more convenient. static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true)); +static cl::opt<bool> EnableStructPathTBAA("struct-path-tbaa", cl::init(false)); namespace { /// TBAANode - This is a simple wrapper around an MDNode which provides a @@ -109,6 +110,97 @@ namespace { return CI->getValue()[0]; } }; + + /// This is a simple wrapper around an MDNode which provides a + /// higher-level interface by hiding the details of how alias analysis + /// information is encoded in its operands. + class TBAAStructTagNode { + /// This node should be created with createTBAAStructTagNode. + const MDNode *Node; + + public: + TBAAStructTagNode() : Node(0) {} + explicit TBAAStructTagNode(const MDNode *N) : Node(N) {} + + /// Get the MDNode for this TBAAStructTagNode. + const MDNode *getNode() const { return Node; } + + const MDNode *getBaseType() const { + return dyn_cast_or_null<MDNode>(Node->getOperand(0)); + } + const MDNode *getAccessType() const { + return dyn_cast_or_null<MDNode>(Node->getOperand(1)); + } + uint64_t getOffset() const { + return cast<ConstantInt>(Node->getOperand(2))->getZExtValue(); + } + /// TypeIsImmutable - Test if this TBAAStructTagNode represents a type for + /// objects which are not modified (by any means) in the context where this + /// AliasAnalysis is relevant. + bool TypeIsImmutable() const { + if (Node->getNumOperands() < 4) + return false; + ConstantInt *CI = dyn_cast<ConstantInt>(Node->getOperand(3)); + if (!CI) + return false; + return CI->getValue()[0]; + } + }; + + /// This is a simple wrapper around an MDNode which provides a + /// higher-level interface by hiding the details of how alias analysis + /// information is encoded in its operands. + class TBAAStructTypeNode { + /// This node should be created with createTBAAStructTypeNode. + const MDNode *Node; + + public: + TBAAStructTypeNode() : Node(0) {} + explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {} + + /// Get the MDNode for this TBAAStructTypeNode. + const MDNode *getNode() const { return Node; } + + /// Get this TBAAStructTypeNode's field in the type DAG with + /// given offset. Update the offset to be relative to the field type. + TBAAStructTypeNode getParent(uint64_t &Offset) const { + // Parent can be omitted for the root node. + if (Node->getNumOperands() < 2) + return TBAAStructTypeNode(); + + // Special handling for a scalar type node. + if (Node->getNumOperands() <= 3) { + MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1)); + if (!P) + return TBAAStructTypeNode(); + return TBAAStructTypeNode(P); + } + + // Assume the offsets are in order. We return the previous field if + // the current offset is bigger than the given offset. + unsigned TheIdx = 0; + for (unsigned Idx = 1; Idx < Node->getNumOperands(); Idx += 2) { + uint64_t Cur = cast<ConstantInt>(Node->getOperand(Idx + 1))-> + getZExtValue(); + if (Cur > Offset) { + assert(Idx >= 3 && + "TBAAStructTypeNode::getParent should have an offset match!"); + TheIdx = Idx - 2; + break; + } + } + // Move along the last field. + if (TheIdx == 0) + TheIdx = Node->getNumOperands() - 2; + uint64_t Cur = cast<ConstantInt>(Node->getOperand(TheIdx + 1))-> + getZExtValue(); + Offset -= Cur; + MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(TheIdx)); + if (!P) + return TBAAStructTypeNode(); + return TBAAStructTypeNode(P); + } + }; } namespace { @@ -137,6 +229,7 @@ namespace { } bool Aliases(const MDNode *A, const MDNode *B) const; + bool PathAliases(const MDNode *A, const MDNode *B) const; private: virtual void getAnalysisUsage(AnalysisUsage &AU) const; @@ -171,6 +264,9 @@ TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { bool TypeBasedAliasAnalysis::Aliases(const MDNode *A, const MDNode *B) const { + if (EnableStructPathTBAA) + return PathAliases(A, B); + // Keep track of the root node for A and B. TBAANode RootA, RootB; @@ -209,6 +305,67 @@ TypeBasedAliasAnalysis::Aliases(const MDNode *A, return false; } +/// Test whether the struct-path tag represented by A may alias the +/// struct-path tag represented by B. +bool +TypeBasedAliasAnalysis::PathAliases(const MDNode *A, + const MDNode *B) const { + // Keep track of the root node for A and B. + TBAAStructTypeNode RootA, RootB; + TBAAStructTagNode TagA(A), TagB(B); + + // TODO: We need to check if AccessType of TagA encloses AccessType of + // TagB to support aggregate AccessType. If yes, return true. + + // Start from the base type of A, follow the edge with the correct offset in + // the type DAG and adjust the offset until we reach the base type of B or + // until we reach the Root node. + // Compare the adjusted offset once we have the same base. + + // Climb the type DAG from base type of A to see if we reach base type of B. + const MDNode *BaseA = TagA.getBaseType(); + const MDNode *BaseB = TagB.getBaseType(); + uint64_t OffsetA = TagA.getOffset(), OffsetB = TagB.getOffset(); + for (TBAAStructTypeNode T(BaseA); ; ) { + if (T.getNode() == BaseB) + // Base type of A encloses base type of B, check if the offsets match. + return OffsetA == OffsetB; + + RootA = T; + // Follow the edge with the correct offset, OffsetA will be adjusted to + // be relative to the field type. + T = T.getParent(OffsetA); + if (!T.getNode()) + break; + } + + // Reset OffsetA and climb the type DAG from base type of B to see if we reach + // base type of A. + OffsetA = TagA.getOffset(); + for (TBAAStructTypeNode T(BaseB); ; ) { + if (T.getNode() == BaseA) + // Base type of B encloses base type of A, check if the offsets match. + return OffsetA == OffsetB; + + RootB = T; + // Follow the edge with the correct offset, OffsetB will be adjusted to + // be relative to the field type. + T = T.getParent(OffsetB); + if (!T.getNode()) + break; + } + + // Neither node is an ancestor of the other. + + // If they have different roots, they're part of different potentially + // unrelated type systems, so we must be conservative. + if (RootA.getNode() != RootB.getNode()) + return true; + + // If they have the same root, then we've proved there's no alias. + return false; +} + AliasAnalysis::AliasResult TypeBasedAliasAnalysis::alias(const Location &LocA, const Location &LocB) { @@ -240,7 +397,8 @@ bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Location &Loc, // If this is an "immutable" type, we can assume the pointer is pointing // to constant memory. - if (TBAANode(M).TypeIsImmutable()) + if ((!EnableStructPathTBAA && TBAANode(M).TypeIsImmutable()) || + (EnableStructPathTBAA && TBAAStructTagNode(M).TypeIsImmutable())) return true; return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); @@ -256,7 +414,8 @@ TypeBasedAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { // If this is an "immutable" type, we can assume the call doesn't write // to memory. if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) - if (TBAANode(M).TypeIsImmutable()) + if ((!EnableStructPathTBAA && TBAANode(M).TypeIsImmutable()) || + (EnableStructPathTBAA && TBAAStructTagNode(M).TypeIsImmutable())) Min = OnlyReadsMemory; return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min); @@ -298,3 +457,55 @@ TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS1, return AliasAnalysis::getModRefInfo(CS1, CS2); } + +MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { + if (!A || !B) + return NULL; + + if (A == B) + return A; + + // For struct-path aware TBAA, we use the access type of the tag. + if (EnableStructPathTBAA) { + A = cast_or_null<MDNode>(A->getOperand(1)); + if (!A) return 0; + B = cast_or_null<MDNode>(B->getOperand(1)); + if (!B) return 0; + } + + SmallVector<MDNode *, 4> PathA; + MDNode *T = A; + while (T) { + PathA.push_back(T); + T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0; + } + + SmallVector<MDNode *, 4> PathB; + T = B; + while (T) { + PathB.push_back(T); + T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0; + } + + int IA = PathA.size() - 1; + int IB = PathB.size() - 1; + + MDNode *Ret = 0; + while (IA >= 0 && IB >=0) { + if (PathA[IA] == PathB[IB]) + Ret = PathA[IA]; + else + break; + --IA; + --IB; + } + if (!EnableStructPathTBAA) + return Ret; + + if (!Ret) + return 0; + // We need to convert from a type node to a tag node. + Type *Int64 = IntegerType::get(A->getContext(), 64); + Value *Ops[3] = { Ret, Ret, ConstantInt::get(Int64, 0) }; + return MDNode::get(A->getContext(), Ops); +} diff --git a/contrib/llvm/lib/AsmParser/LLLexer.cpp b/contrib/llvm/lib/AsmParser/LLLexer.cpp index f46383b..e7a9f2a 100644 --- a/contrib/llvm/lib/AsmParser/LLLexer.cpp +++ b/contrib/llvm/lib/AsmParser/LLLexer.cpp @@ -582,6 +582,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(optsize); KEYWORD(readnone); KEYWORD(readonly); + KEYWORD(returned); KEYWORD(returns_twice); KEYWORD(signext); KEYWORD(sret); diff --git a/contrib/llvm/lib/AsmParser/LLParser.cpp b/contrib/llvm/lib/AsmParser/LLParser.cpp index c8da1f8..62d8070d 100644 --- a/contrib/llvm/lib/AsmParser/LLParser.cpp +++ b/contrib/llvm/lib/AsmParser/LLParser.cpp @@ -528,7 +528,7 @@ bool LLParser::ParseMDNodeID(MDNode *&Result) { if (Result) return false; // Otherwise, create MDNode forward reference. - MDNode *FwdNode = MDNode::getTemporary(Context, ArrayRef<Value*>()); + MDNode *FwdNode = MDNode::getTemporary(Context, None); ForwardRefMDNodes[MID] = std::make_pair(FwdNode, Lex.getLoc()); if (NumberedMetadata.size() <= MID) @@ -878,8 +878,9 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, // Target-independent attributes: case lltok::kw_align: { - // As a hack, we allow "align 2" on functions as a synonym for "alignstack - // 2". + // As a hack, we allow function alignment to be initially parsed as an + // attribute on a function declaration/definition or added to an attribute + // group and later moved to the alignment field. unsigned Alignment; if (inAttrGrp) { Lex.Lex(); @@ -943,6 +944,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, case lltok::kw_nest: case lltok::kw_noalias: case lltok::kw_nocapture: + case lltok::kw_returned: case lltok::kw_sret: HaveError |= Error(Lex.getLoc(), @@ -1155,21 +1157,35 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) { case lltok::kw_nest: B.addAttribute(Attribute::Nest); break; case lltok::kw_noalias: B.addAttribute(Attribute::NoAlias); break; case lltok::kw_nocapture: B.addAttribute(Attribute::NoCapture); break; + case lltok::kw_returned: B.addAttribute(Attribute::Returned); break; case lltok::kw_signext: B.addAttribute(Attribute::SExt); break; case lltok::kw_sret: B.addAttribute(Attribute::StructRet); break; case lltok::kw_zeroext: B.addAttribute(Attribute::ZExt); break; - case lltok::kw_alignstack: case lltok::kw_nounwind: - case lltok::kw_alwaysinline: case lltok::kw_optsize: - case lltok::kw_inlinehint: case lltok::kw_readnone: - case lltok::kw_minsize: case lltok::kw_readonly: - case lltok::kw_naked: case lltok::kw_returns_twice: - case lltok::kw_nobuiltin: case lltok::kw_sanitize_address: - case lltok::kw_noimplicitfloat: case lltok::kw_sanitize_memory: - case lltok::kw_noinline: case lltok::kw_sanitize_thread: - case lltok::kw_nonlazybind: case lltok::kw_ssp: - case lltok::kw_noredzone: case lltok::kw_sspreq: - case lltok::kw_noreturn: case lltok::kw_uwtable: + case lltok::kw_alignstack: + case lltok::kw_alwaysinline: + case lltok::kw_inlinehint: + case lltok::kw_minsize: + case lltok::kw_naked: + case lltok::kw_nobuiltin: + case lltok::kw_noduplicate: + case lltok::kw_noimplicitfloat: + case lltok::kw_noinline: + case lltok::kw_nonlazybind: + case lltok::kw_noredzone: + case lltok::kw_noreturn: + case lltok::kw_nounwind: + case lltok::kw_optsize: + case lltok::kw_readnone: + case lltok::kw_readonly: + case lltok::kw_returns_twice: + case lltok::kw_sanitize_address: + case lltok::kw_sanitize_memory: + case lltok::kw_sanitize_thread: + case lltok::kw_ssp: + case lltok::kw_sspreq: + case lltok::kw_sspstrong: + case lltok::kw_uwtable: HaveError |= Error(Lex.getLoc(), "invalid use of function-only attribute"); break; } @@ -1195,24 +1211,39 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) { case lltok::kw_zeroext: B.addAttribute(Attribute::ZExt); break; // Error handling. - case lltok::kw_sret: case lltok::kw_nocapture: - case lltok::kw_byval: case lltok::kw_nest: + case lltok::kw_align: + case lltok::kw_byval: + case lltok::kw_nest: + case lltok::kw_nocapture: + case lltok::kw_returned: + case lltok::kw_sret: HaveError |= Error(Lex.getLoc(), "invalid use of parameter-only attribute"); break; - case lltok::kw_align: case lltok::kw_noreturn: - case lltok::kw_alignstack: case lltok::kw_nounwind: - case lltok::kw_alwaysinline: case lltok::kw_optsize: - case lltok::kw_inlinehint: case lltok::kw_readnone: - case lltok::kw_minsize: case lltok::kw_readonly: - case lltok::kw_naked: case lltok::kw_returns_twice: - case lltok::kw_nobuiltin: case lltok::kw_sanitize_address: - case lltok::kw_noduplicate: case lltok::kw_sanitize_memory: - case lltok::kw_noimplicitfloat: case lltok::kw_sanitize_thread: - case lltok::kw_noinline: case lltok::kw_ssp: - case lltok::kw_nonlazybind: case lltok::kw_sspreq: - case lltok::kw_noredzone: case lltok::kw_sspstrong: - case lltok::kw_uwtable: + case lltok::kw_alignstack: + case lltok::kw_alwaysinline: + case lltok::kw_inlinehint: + case lltok::kw_minsize: + case lltok::kw_naked: + case lltok::kw_nobuiltin: + case lltok::kw_noduplicate: + case lltok::kw_noimplicitfloat: + case lltok::kw_noinline: + case lltok::kw_nonlazybind: + case lltok::kw_noredzone: + case lltok::kw_noreturn: + case lltok::kw_nounwind: + case lltok::kw_optsize: + case lltok::kw_readnone: + case lltok::kw_readonly: + case lltok::kw_returns_twice: + case lltok::kw_sanitize_address: + case lltok::kw_sanitize_memory: + case lltok::kw_sanitize_thread: + case lltok::kw_ssp: + case lltok::kw_sspreq: + case lltok::kw_sspstrong: + case lltok::kw_uwtable: HaveError |= Error(Lex.getLoc(), "invalid use of function-only attribute"); break; } @@ -4232,7 +4263,9 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) { if (ParseTypeAndValue(Ptr, Loc, PFS)) return true; - if (!Ptr->getType()->getScalarType()->isPointerTy()) + Type *BaseType = Ptr->getType(); + PointerType *BasePointerType = dyn_cast<PointerType>(BaseType->getScalarType()); + if (!BasePointerType) return Error(Loc, "base of getelementptr must be a pointer"); SmallVector<Value*, 16> Indices; @@ -4257,7 +4290,10 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) { Indices.push_back(Val); } - if (!GetElementPtrInst::getIndexedType(Ptr->getType(), Indices)) + if (!Indices.empty() && !BasePointerType->getElementType()->isSized()) + return Error(Loc, "base element of getelementptr must be sized"); + + if (!GetElementPtrInst::getIndexedType(BaseType, Indices)) return Error(Loc, "invalid getelementptr indices"); Inst = GetElementPtrInst::Create(Ptr, Indices); if (InBounds) diff --git a/contrib/llvm/lib/AsmParser/LLToken.h b/contrib/llvm/lib/AsmParser/LLToken.h index cd25ba3..3bf54fa 100644 --- a/contrib/llvm/lib/AsmParser/LLToken.h +++ b/contrib/llvm/lib/AsmParser/LLToken.h @@ -114,6 +114,7 @@ namespace lltok { kw_optsize, kw_readnone, kw_readonly, + kw_returned, kw_returns_twice, kw_signext, kw_ssp, diff --git a/contrib/llvm/lib/Bitcode/Reader/BitReader.cpp b/contrib/llvm/lib/Bitcode/Reader/BitReader.cpp index 5cd6c55..23630e5 100644 --- a/contrib/llvm/lib/Bitcode/Reader/BitReader.cpp +++ b/contrib/llvm/lib/Bitcode/Reader/BitReader.cpp @@ -10,6 +10,7 @@ #include "llvm-c/BitReader.h" #include "llvm/Bitcode/ReaderWriter.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/Support/MemoryBuffer.h" #include <cstring> #include <string> diff --git a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index f348843..e6ff4b4 100644 --- a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -405,7 +405,7 @@ Value *BitcodeReaderMDValueList::getValueFwdRef(unsigned Idx) { } // Create and return a placeholder, which will later be RAUW'd. - Value *V = MDNode::getTemporary(Context, ArrayRef<Value*>()); + Value *V = MDNode::getTemporary(Context, None); MDValuePtrs[Idx] = V; return V; } diff --git a/contrib/llvm/lib/Bitcode/Writer/BitWriter.cpp b/contrib/llvm/lib/Bitcode/Writer/BitWriter.cpp index 9f51c35..985208c 100644 --- a/contrib/llvm/lib/Bitcode/Writer/BitWriter.cpp +++ b/contrib/llvm/lib/Bitcode/Writer/BitWriter.cpp @@ -9,6 +9,7 @@ #include "llvm-c/BitWriter.h" #include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/IR/Module.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp index dd7282c..4731af5 100644 --- a/contrib/llvm/lib/CodeGen/Analysis.cpp +++ b/contrib/llvm/lib/CodeGen/Analysis.cpp @@ -201,62 +201,161 @@ ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) { } } +static bool isNoopBitcast(Type *T1, Type *T2, + const TargetLowering& TLI) { + return T1 == T2 || (T1->isPointerTy() && T2->isPointerTy()) || + (isa<VectorType>(T1) && isa<VectorType>(T2) && + TLI.isTypeLegal(EVT::getEVT(T1)) && TLI.isTypeLegal(EVT::getEVT(T2))); +} -/// getNoopInput - If V is a noop (i.e., lowers to no machine code), look -/// through it (and any transitive noop operands to it) and return its input -/// value. This is used to determine if a tail call can be formed. -/// -static const Value *getNoopInput(const Value *V, const TargetLowering &TLI) { - // If V is not an instruction, it can't be looked through. - const Instruction *I = dyn_cast<Instruction>(V); - if (I == 0 || !I->hasOneUse() || I->getNumOperands() == 0) return V; - - Value *Op = I->getOperand(0); +/// sameNoopInput - Return true if V1 == V2, else if either V1 or V2 is a noop +/// (i.e., lowers to no machine code), look through it (and any transitive noop +/// operands to it) and check if it has the same noop input value. This is +/// used to determine if a tail call can be formed. +static bool sameNoopInput(const Value *V1, const Value *V2, + SmallVectorImpl<unsigned> &Els1, + SmallVectorImpl<unsigned> &Els2, + const TargetLowering &TLI) { + using std::swap; + bool swapParity = false; + bool equalEls = Els1 == Els2; + while (true) { + if ((equalEls && V1 == V2) || isa<UndefValue>(V1) || isa<UndefValue>(V2)) { + if (swapParity) + // Revert to original Els1 and Els2 to avoid confusing recursive calls + swap(Els1, Els2); + return true; + } - // Look through truly no-op truncates. - if (isa<TruncInst>(I) && - TLI.isTruncateFree(I->getOperand(0)->getType(), I->getType())) - return getNoopInput(I->getOperand(0), TLI); - - // Look through truly no-op bitcasts. - if (isa<BitCastInst>(I)) { - // No type change at all. - if (Op->getType() == I->getType()) - return getNoopInput(Op, TLI); + // Try to look through V1; if V1 is not an instruction, it can't be looked + // through. + const Instruction *I = dyn_cast<Instruction>(V1); + const Value *NoopInput = 0; + if (I != 0 && I->getNumOperands() > 0) { + Value *Op = I->getOperand(0); + if (isa<TruncInst>(I)) { + // Look through truly no-op truncates. + if (TLI.isTruncateFree(Op->getType(), I->getType())) + NoopInput = Op; + } else if (isa<BitCastInst>(I)) { + // Look through truly no-op bitcasts. + if (isNoopBitcast(Op->getType(), I->getType(), TLI)) + NoopInput = Op; + } else if (isa<GetElementPtrInst>(I)) { + // Look through getelementptr + if (cast<GetElementPtrInst>(I)->hasAllZeroIndices()) + NoopInput = Op; + } else if (isa<IntToPtrInst>(I)) { + // Look through inttoptr. + // Make sure this isn't a truncating or extending cast. We could + // support this eventually, but don't bother for now. + if (!isa<VectorType>(I->getType()) && + TLI.getPointerTy().getSizeInBits() == + cast<IntegerType>(Op->getType())->getBitWidth()) + NoopInput = Op; + } else if (isa<PtrToIntInst>(I)) { + // Look through ptrtoint. + // Make sure this isn't a truncating or extending cast. We could + // support this eventually, but don't bother for now. + if (!isa<VectorType>(I->getType()) && + TLI.getPointerTy().getSizeInBits() == + cast<IntegerType>(I->getType())->getBitWidth()) + NoopInput = Op; + } else if (isa<CallInst>(I)) { + // Look through call + for (User::const_op_iterator i = I->op_begin(), + // Skip Callee + e = I->op_end() - 1; + i != e; ++i) { + unsigned attrInd = i - I->op_begin() + 1; + if (cast<CallInst>(I)->paramHasAttr(attrInd, Attribute::Returned) && + isNoopBitcast((*i)->getType(), I->getType(), TLI)) { + NoopInput = *i; + break; + } + } + } else if (isa<InvokeInst>(I)) { + // Look through invoke + for (User::const_op_iterator i = I->op_begin(), + // Skip BB, BB, Callee + e = I->op_end() - 3; + i != e; ++i) { + unsigned attrInd = i - I->op_begin() + 1; + if (cast<InvokeInst>(I)->paramHasAttr(attrInd, Attribute::Returned) && + isNoopBitcast((*i)->getType(), I->getType(), TLI)) { + NoopInput = *i; + break; + } + } + } + } - // Pointer to pointer cast. - if (Op->getType()->isPointerTy() && I->getType()->isPointerTy()) - return getNoopInput(Op, TLI); - - if (isa<VectorType>(Op->getType()) && isa<VectorType>(I->getType()) && - TLI.isTypeLegal(EVT::getEVT(Op->getType())) && - TLI.isTypeLegal(EVT::getEVT(I->getType()))) - return getNoopInput(Op, TLI); - } - - // Look through inttoptr. - if (isa<IntToPtrInst>(I) && !isa<VectorType>(I->getType())) { - // Make sure this isn't a truncating or extending cast. We could support - // this eventually, but don't bother for now. - if (TLI.getPointerTy().getSizeInBits() == - cast<IntegerType>(Op->getType())->getBitWidth()) - return getNoopInput(Op, TLI); - } + if (NoopInput) { + V1 = NoopInput; + continue; + } - // Look through ptrtoint. - if (isa<PtrToIntInst>(I) && !isa<VectorType>(I->getType())) { - // Make sure this isn't a truncating or extending cast. We could support - // this eventually, but don't bother for now. - if (TLI.getPointerTy().getSizeInBits() == - cast<IntegerType>(I->getType())->getBitWidth()) - return getNoopInput(Op, TLI); + // If we already swapped, avoid infinite loop + if (swapParity) + break; + + // Otherwise, swap V1<->V2, Els1<->Els2 + swap(V1, V2); + swap(Els1, Els2); + swapParity = !swapParity; } + for (unsigned n = 0; n < 2; ++n) { + if (isa<InsertValueInst>(V1)) { + if (isa<StructType>(V1->getType())) { + // Look through insertvalue + unsigned i, e; + for (i = 0, e = cast<StructType>(V1->getType())->getNumElements(); + i != e; ++i) { + const Value *InScalar = FindInsertedValue(const_cast<Value*>(V1), i); + if (InScalar == 0) + break; + Els1.push_back(i); + if (!sameNoopInput(InScalar, V2, Els1, Els2, TLI)) { + Els1.pop_back(); + break; + } + Els1.pop_back(); + } + if (i == e) { + if (swapParity) + swap(Els1, Els2); + return true; + } + } + } else if (!Els1.empty() && isa<ExtractValueInst>(V1)) { + const ExtractValueInst *EVI = cast<ExtractValueInst>(V1); + unsigned i = Els1.back(); + // If the scalar value being inserted is an extractvalue of the right + // index from the call, then everything is good. + if (isa<StructType>(EVI->getOperand(0)->getType()) && + EVI->getNumIndices() == 1 && EVI->getIndices()[0] == i) { + // Look through extractvalue + Els1.pop_back(); + if (sameNoopInput(EVI->getOperand(0), V2, Els1, Els2, TLI)) { + Els1.push_back(i); + if (swapParity) + swap(Els1, Els2); + return true; + } + Els1.push_back(i); + } + } - // Otherwise it's not something we can look through. - return V; -} + swap(V1, V2); + swap(Els1, Els2); + swapParity = !swapParity; + } + if (swapParity) + swap(Els1, Els2); + return false; +} /// Test if the given instruction is in a position to be optimized /// with a tail-call. This roughly means that it's in a block with @@ -264,7 +363,8 @@ static const Value *getNoopInput(const Value *V, const TargetLowering &TLI) { /// between it and the return. /// /// This function only tests target-independent requirements. -bool llvm::isInTailCallPosition(ImmutableCallSite CS,const TargetLowering &TLI){ +bool llvm::isInTailCallPosition(ImmutableCallSite CS, + const TargetLowering &TLI) { const Instruction *I = CS.getInstruction(); const BasicBlock *ExitBB = I->getParent(); const TerminatorInst *Term = ExitBB->getTerminator(); @@ -322,28 +422,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS,const TargetLowering &TLI){ CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) return false; - // Otherwise, make sure the unmodified return value of I is the return value. - // We handle two cases: multiple return values + scalars. - Value *RetVal = Ret->getOperand(0); - if (!isa<InsertValueInst>(RetVal) || !isa<StructType>(RetVal->getType())) - // Handle scalars first. - return getNoopInput(Ret->getOperand(0), TLI) == I; - - // If this is an aggregate return, look through the insert/extract values and - // see if each is transparent. - for (unsigned i = 0, e =cast<StructType>(RetVal->getType())->getNumElements(); - i != e; ++i) { - const Value *InScalar = FindInsertedValue(RetVal, i); - if (InScalar == 0) return false; - InScalar = getNoopInput(InScalar, TLI); - - // If the scalar value being inserted is an extractvalue of the right index - // from the call, then everything is good. - const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(InScalar); - if (EVI == 0 || EVI->getOperand(0) != I || EVI->getNumIndices() != 1 || - EVI->getIndices()[0] != i) - return false; - } - - return true; + // Otherwise, make sure the return value and I have the same value + SmallVector<unsigned, 4> Els1, Els2; + return sameNoopInput(Ret->getOperand(0), I, Els1, Els2, TLI); } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index d4a745d..84162ac 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -133,9 +133,13 @@ const DataLayout &AsmPrinter::getDataLayout() const { return *TM.getDataLayout(); } +StringRef AsmPrinter::getTargetTriple() const { + return TM.getTargetTriple(); +} + /// getCurrentSection() - Return the current section we are emitting to. const MCSection *AsmPrinter::getCurrentSection() const { - return OutStreamer.getCurrentSection(); + return OutStreamer.getCurrentSection().first; } @@ -813,7 +817,7 @@ void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { // caller might be in the middle of an dwarf expression. We should // probably assert that Reg >= 0 once debug info generation is more mature. - if (int Offset = MLoc.getOffset()) { + if (MLoc.isIndirect()) { if (Reg < 32) { OutStreamer.AddComment( dwarf::OperationEncodingString(dwarf::DW_OP_breg0 + Reg)); @@ -824,7 +828,7 @@ void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { OutStreamer.AddComment(Twine(Reg)); EmitULEB128(Reg); } - EmitSLEB128(Offset); + EmitSLEB128(MLoc.getOffset()); } else { if (Reg < 32) { OutStreamer.AddComment( @@ -1213,7 +1217,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { if (GV->getName() == "llvm.used") { if (MAI->hasNoDeadStrip()) // No need to emit this at all. - EmitLLVMUsedList(GV->getInitializer()); + EmitLLVMUsedList(cast<ConstantArray>(GV->getInitializer())); return true; } @@ -1256,11 +1260,8 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { /// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each /// global in the specified llvm.used list for which emitUsedDirectiveFor /// is true, as being used with this directive. -void AsmPrinter::EmitLLVMUsedList(const Constant *List) { +void AsmPrinter::EmitLLVMUsedList(const ConstantArray *InitList) { // Should be an array of 'i8*'. - const ConstantArray *InitList = dyn_cast<ConstantArray>(List); - if (InitList == 0) return; - for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { const GlobalValue *GV = dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts()); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 156acac..31e42d4 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -141,7 +141,7 @@ void AsmPrinter::EmitTTypeReference(const GlobalValue *GV, void AsmPrinter::EmitSectionOffset(const MCSymbol *Label, const MCSymbol *SectionLabel) const { // On COFF targets, we have to emit the special .secrel32 directive. - if (MAI->getDwarfSectionOffsetDirective()) { + if (MAI->needsDwarfSectionOffsetDirective()) { OutStreamer.EmitCOFFSecRel32(Label); return; } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index 57e0acd..673867a 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -112,8 +112,9 @@ DIE::~DIE() { delete Children[i]; } -/// Climb up the parent chain to get the compile unit DIE this DIE belongs to. -DIE *DIE::getCompileUnit() const{ +/// Climb up the parent chain to get the compile unit DIE to which this DIE +/// belongs. +DIE *DIE::getCompileUnit() const { DIE *p = getParent(); while (p) { if (p->getTag() == dwarf::DW_TAG_compile_unit) @@ -124,8 +125,7 @@ DIE *DIE::getCompileUnit() const{ } #ifndef NDEBUG -void DIE::print(raw_ostream &O, unsigned IncIndent) { - IndentCount += IncIndent; +void DIE::print(raw_ostream &O, unsigned IndentCount) const { const std::string Indent(IndentCount, ' '); bool isBlock = Abbrev.getTag() == 0; @@ -164,11 +164,10 @@ void DIE::print(raw_ostream &O, unsigned IncIndent) { IndentCount -= 2; for (unsigned j = 0, M = Children.size(); j < M; ++j) { - Children[j]->print(O, 4); + Children[j]->print(O, IndentCount+4); } if (!isBlock) O << "\n"; - IndentCount -= IncIndent; } void DIE::dump() { diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h index c332aa2..3c06001 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h @@ -139,8 +139,7 @@ namespace llvm { mutable unsigned IndentCount; public: explicit DIE(unsigned Tag) - : Offset(0), Size(0), Abbrev(Tag, dwarf::DW_CHILDREN_no), Parent(0), - IndentCount(0) {} + : Offset(0), Size(0), Abbrev(Tag, dwarf::DW_CHILDREN_no), Parent(0) {} virtual ~DIE(); // Accessors. @@ -179,7 +178,7 @@ namespace llvm { } #ifndef NDEBUG - void print(raw_ostream &O, unsigned IncIndent = 0); + void print(raw_ostream &O, unsigned IndentCount = 0) const; void dump(); #endif }; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index f9b6f94..89abcff 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -685,7 +685,7 @@ bool CompileUnit::addConstantValue(DIE *Die, const APInt &Val, return true; } -/// addTemplateParams - Add template parameters in buffer. +/// addTemplateParams - Add template parameters into buffer. void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) { // Add template parameters. for (unsigned i = 0, e = TParams.getNumElements(); i != e; ++i) { @@ -707,7 +707,7 @@ DIE *CompileUnit::getOrCreateContextDIE(DIDescriptor Context) { return getOrCreateNameSpace(DINameSpace(Context)); else if (Context.isSubprogram()) return getOrCreateSubprogramDIE(DISubprogram(Context)); - else + else return getDIE(Context); } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 2b180c6..8f08c63 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -94,9 +94,6 @@ class CompileUnit { /// DWARF version doesn't handle the language, return -1. int64_t getDefaultLowerBound() const; - /// getOrCreateContextDIE - Get context owner's DIE. - DIE *getOrCreateContextDIE(DIDescriptor Context); - public: CompileUnit(unsigned UID, unsigned L, DIE *D, AsmPrinter *A, DwarfDebug *DW, DwarfUnits *); @@ -372,6 +369,9 @@ public: /// createStaticMemberDIE - Create new static data member DIE. DIE *createStaticMemberDIE(DIDerivedType DT); + /// getOrCreateContextDIE - Get context owner's DIE. + DIE *getOrCreateContextDIE(DIDescriptor Context); + private: // DIEValueAllocator - All DIEValues are allocated through this allocator. diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 11eb983..1e706cc 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -94,6 +94,12 @@ static cl::opt<DefaultOnOff> SplitDwarf("split-dwarf", cl::Hidden, namespace { const char *DWARFGroupName = "DWARF Emission"; const char *DbgTimerName = "DWARF Debug Writer"; + + struct CompareFirst { + template <typename T> bool operator()(const T &lhs, const T &rhs) const { + return lhs.first < rhs.first; + } + }; } // end anonymous namespace //===----------------------------------------------------------------------===// @@ -170,12 +176,13 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0; DwarfStrSectionSym = TextSectionSym = 0; DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = DwarfLineSectionSym = 0; + DwarfAddrSectionSym = 0; DwarfAbbrevDWOSectionSym = DwarfStrDWOSectionSym = 0; FunctionBeginSym = FunctionEndSym = 0; // Turn on accelerator tables and older gdb compatibility // for Darwin. - bool IsDarwin = Triple(M->getTargetTriple()).isOSDarwin(); + bool IsDarwin = Triple(A->getTargetTriple()).isOSDarwin(); if (DarwinGDBCompat == Default) { if (IsDarwin) IsDarwinGDBCompat = true; @@ -596,9 +603,16 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { } else { // There is no need to emit empty lexical block DIE. - if (Children.empty()) + std::pair<ImportedEntityMap::const_iterator, + ImportedEntityMap::const_iterator> Range = std::equal_range( + ScopesWithImportedEntities.begin(), ScopesWithImportedEntities.end(), + std::pair<const MDNode *, const MDNode *>(DS, (const MDNode*)0), + CompareFirst()); + if (Children.empty() && Range.first == Range.second) return NULL; ScopeDIE = constructLexicalScopeDIE(TheCU, Scope); + for (ImportedEntityMap::const_iterator i = Range.first; i != Range.second; ++i) + constructImportedModuleDIE(TheCU, i->second, ScopeDIE); } if (!ScopeDIE) return NULL; @@ -643,7 +657,7 @@ unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName, // We look up the CUID/file/dir by concatenating them with a zero byte. SmallString<128> NamePair; - NamePair += CUID; + NamePair += utostr(CUID); NamePair += '\0'; NamePair += DirName; NamePair += '\0'; // Zero bytes are not allowed in paths. @@ -681,9 +695,12 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2, DIUnit.getLanguage()); NewCU->addString(Die, dwarf::DW_AT_name, FN); + // 2.17.1 requires that we use DW_AT_low_pc for a single entry point - // into an entity. We're using 0 (or a NULL label) for this. - NewCU->addLabelAddress(Die, dwarf::DW_AT_low_pc, NULL); + // into an entity. We're using 0 (or a NULL label) for this. For + // split dwarf it's in the skeleton CU so omit it here. + if (!useSplitDwarf()) + NewCU->addLabelAddress(Die, dwarf::DW_AT_low_pc, NULL); // Define start line table label for each Compile Unit. MCSymbol *LineTableStartSym = Asm->GetTempSymbol("line_table_start", @@ -691,21 +708,32 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { Asm->OutStreamer.getContext().setMCLineTableSymbol(LineTableStartSym, NewCU->getUniqueID()); + // Use a single line table if we are using .loc and generating assembly. + bool UseTheFirstCU = + (Asm->TM.hasMCUseLoc() && + Asm->OutStreamer.getKind() == MCStreamer::SK_AsmStreamer) || + (NewCU->getUniqueID() == 0); + // DW_AT_stmt_list is a offset of line number information for this - // compile unit in debug_line section. + // compile unit in debug_line section. For split dwarf this is + // left in the skeleton CU and so not included. // The line table entries are not always emitted in assembly, so it // is not okay to use line_table_start here. - if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, - NewCU->getUniqueID() == 0 ? - Asm->GetTempSymbol("section_line") : LineTableStartSym); - else if (NewCU->getUniqueID() == 0) - NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); - else - NewCU->addDelta(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, - LineTableStartSym, DwarfLineSectionSym); + if (!useSplitDwarf()) { + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, + UseTheFirstCU ? + Asm->GetTempSymbol("section_line") : LineTableStartSym); + else if (UseTheFirstCU) + NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); + else + NewCU->addDelta(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, + LineTableStartSym, DwarfLineSectionSym); + } - if (!CompilationDir.empty()) + // If we're using split dwarf the compilation dir is going to be in the + // skeleton CU and so we don't need to duplicate it here. + if (!useSplitDwarf() && !CompilationDir.empty()) NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); if (DIUnit.isOptimized()) NewCU->addFlag(Die, dwarf::DW_AT_APPLE_optimized); @@ -754,6 +782,41 @@ void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, TheCU->addGlobalName(SP.getName(), SubprogramDie); } +void DwarfDebug::constructImportedModuleDIE(CompileUnit *TheCU, + const MDNode *N) { + DIImportedModule Module(N); + if (!Module.Verify()) + return; + if (DIE *D = TheCU->getOrCreateContextDIE(Module.getContext())) + constructImportedModuleDIE(TheCU, Module, D); +} + +void DwarfDebug::constructImportedModuleDIE(CompileUnit *TheCU, const MDNode *N, + DIE *Context) { + DIImportedModule Module(N); + if (!Module.Verify()) + return; + return constructImportedModuleDIE(TheCU, Module, Context); +} + +void DwarfDebug::constructImportedModuleDIE(CompileUnit *TheCU, + const DIImportedModule &Module, + DIE *Context) { + assert(Module.Verify() && + "Use one of the MDNode * overloads to handle invalid metadata"); + assert(Context && "Should always have a context for an imported_module"); + DIE *IMDie = new DIE(dwarf::DW_TAG_imported_module); + TheCU->insertDIE(Module, IMDie); + DIE *NSDie = TheCU->getOrCreateNameSpace(Module.getNameSpace()); + unsigned FileID = getOrCreateSourceID(Module.getContext().getFilename(), + Module.getContext().getDirectory(), + TheCU->getUniqueID()); + TheCU->addUInt(IMDie, dwarf::DW_AT_decl_file, 0, FileID); + TheCU->addUInt(IMDie, dwarf::DW_AT_decl_line, 0, Module.getLineNumber()); + TheCU->addDIEEntry(IMDie, dwarf::DW_AT_import, dwarf::DW_FORM_ref4, NSDie); + Context->addChild(IMDie); +} + // Emit all Dwarf sections that should come prior to the content. Create // global DIEs and emit initial debug info sections. This is invoked by // the target AsmPrinter. @@ -775,6 +838,13 @@ void DwarfDebug::beginModule() { for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { DICompileUnit CUNode(CU_Nodes->getOperand(i)); CompileUnit *CU = constructCompileUnit(CUNode); + DIArray ImportedModules = CUNode.getImportedModules(); + for (unsigned i = 0, e = ImportedModules.getNumElements(); i != e; ++i) + ScopesWithImportedEntities.push_back(std::make_pair( + DIImportedModule(ImportedModules.getElement(i)).getContext(), + ImportedModules.getElement(i))); + std::sort(ScopesWithImportedEntities.begin(), + ScopesWithImportedEntities.end(), CompareFirst()); DIArray GVs = CUNode.getGlobalVariables(); for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) CU->createGlobalVariableDIE(GVs.getElement(i)); @@ -787,11 +857,16 @@ void DwarfDebug::beginModule() { DIArray RetainedTypes = CUNode.getRetainedTypes(); for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i) CU->getOrCreateTypeDIE(RetainedTypes.getElement(i)); + // Emit imported_modules last so that the relevant context is already + // available. + for (unsigned i = 0, e = ImportedModules.getNumElements(); i != e; ++i) + constructImportedModuleDIE(CU, ImportedModules.getElement(i)); // If we're splitting the dwarf out now that we've got the entire // CU then construct a skeleton CU based upon it. if (useSplitDwarf()) { - // This should be a unique identifier when we want to build .dwp files. - CU->addUInt(CU->getCUDie(), dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0); + // This should be a unique identifier when we want to build .dwp files. + CU->addUInt(CU->getCUDie(), dwarf::DW_AT_GNU_dwo_id, + dwarf::DW_FORM_data8, 0); // Now construct the skeleton CU associated. constructSkeletonCU(CUNode); } @@ -1099,7 +1174,13 @@ static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm, } if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm()) { MachineLocation MLoc; - MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); + // TODO: Currently an offset of 0 in a DBG_VALUE means + // we need to generate a direct register value. + // There is no way to specify an indirect value with offset 0. + if (MI->getOperand(1).getImm() == 0) + MLoc.set(MI->getOperand(0).getReg()); + else + MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); return DotDebugLocEntry(FLabel, SLabel, MLoc, Var); } if (MI->getOperand(0).isImm()) @@ -1366,7 +1447,12 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); assert(TheCU && "Unable to find compile unit!"); - Asm->OutStreamer.getContext().setDwarfCompileUnitID(TheCU->getUniqueID()); + if (Asm->TM.hasMCUseLoc() && + Asm->OutStreamer.getKind() == MCStreamer::SK_AsmStreamer) + // Use a single line table if we are using .loc and generating assembly. + Asm->OutStreamer.getContext().setDwarfCompileUnitID(0); + else + Asm->OutStreamer.getContext().setDwarfCompileUnitID(TheCU->getUniqueID()); FunctionBeginSym = Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber()); @@ -1768,9 +1854,12 @@ void DwarfDebug::emitSectionLabels() { emitSectionSym(Asm, TLOF.getDwarfPubTypesSection()); DwarfStrSectionSym = emitSectionSym(Asm, TLOF.getDwarfStrSection(), "info_string"); - if (useSplitDwarf()) + if (useSplitDwarf()) { DwarfStrDWOSectionSym = emitSectionSym(Asm, TLOF.getDwarfStrDWOSection(), "skel_string"); + DwarfAddrSectionSym = + emitSectionSym(Asm, TLOF.getDwarfAddrSection(), "addr_sec"); + } DwarfDebugRangeSectionSym = emitSectionSym(Asm, TLOF.getDwarfRangesSection(), "debug_range"); @@ -2538,9 +2627,14 @@ CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) { // This should be a unique identifier when we want to build .dwp files. NewCU->addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0); - // FIXME: The addr base should be relative for each compile unit, however, - // this one is going to be 0 anyhow. - NewCU->addUInt(Die, dwarf::DW_AT_GNU_addr_base, dwarf::DW_FORM_sec_offset, 0); + // Relocate to the beginning of the addr_base section, else 0 for the + // beginning of the one for this compile unit. + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + NewCU->addLabel(Die, dwarf::DW_AT_GNU_addr_base, dwarf::DW_FORM_sec_offset, + DwarfAddrSectionSym); + else + NewCU->addUInt(Die, dwarf::DW_AT_GNU_addr_base, + dwarf::DW_FORM_sec_offset, 0); // 2.17.1 requires that we use DW_AT_low_pc for a single entry point // into an entity. We're using 0, or a NULL label for this. @@ -2548,6 +2642,7 @@ CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) { // DW_AT_stmt_list is a offset of line number information for this // compile unit in debug_line section. + // FIXME: Should handle multiple compile units. if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset, DwarfLineSectionSym); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index 81e345e..24f758d 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -392,7 +392,7 @@ class DwarfDebug { // section offsets and are created by EmitSectionLabels. MCSymbol *DwarfInfoSectionSym, *DwarfAbbrevSectionSym; MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym; - MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym; + MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym, *DwarfAddrSectionSym; MCSymbol *FunctionBeginSym, *FunctionEndSym; MCSymbol *DwarfAbbrevDWOSectionSym, *DwarfStrDWOSectionSym; @@ -433,6 +433,10 @@ class DwarfDebug { // Holder for the skeleton information. DwarfUnits SkeletonHolder; + typedef SmallVector<std::pair<const MDNode *, const MDNode *>, 32> + ImportedEntityMap; + ImportedEntityMap ScopesWithImportedEntities; + private: void addScopeVariable(LexicalScope *LS, DbgVariable *Var); @@ -555,6 +559,18 @@ private: /// \brief Construct subprogram DIE. void constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N); + /// \brief Construct import_module DIE. + void constructImportedModuleDIE(CompileUnit *TheCU, const MDNode *N); + + /// \brief Construct import_module DIE. + void constructImportedModuleDIE(CompileUnit *TheCU, const MDNode *N, + DIE *Context); + + /// \brief Construct import_module DIE. + void constructImportedModuleDIE(CompileUnit *TheCU, + const DIImportedModule &Module, + DIE *Context); + /// \brief Register a source line with debug info. Returns the unique /// label that was emitted and which provides correspondence to the /// source line list. diff --git a/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp index 012ff8a..4a99184 100644 --- a/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -204,20 +204,25 @@ unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty); + bool IsFloat = Ty->getScalarType()->isFloatingPointTy(); + // Assume that floating point arithmetic operations cost twice as much as + // integer operations. + unsigned OpCost = (IsFloat ? 2 : 1); + if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { // The operation is legal. Assume it costs 1. - // If the type is split to multiple registers, assume that thre is some + // If the type is split to multiple registers, assume that there is some // overhead to this. // TODO: Once we have extract/insert subvector cost we need to use them. if (LT.first > 1) - return LT.first * 2; - return LT.first * 1; + return LT.first * 2 * OpCost; + return LT.first * 1 * OpCost; } if (!TLI->isOperationExpand(ISD, LT.second)) { // If the operation is custom lowered then assume // thare the code is twice as expensive. - return LT.first * 2; + return LT.first * 2 * OpCost; } // Else, assume that we need to scalarize this op. @@ -230,7 +235,7 @@ unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, } // We don't know anything about this scalar instruction. - return 1; + return OpCost; } unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, diff --git a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp index dee339a..38ae17d 100644 --- a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -117,7 +117,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { float totalWeight = 0; SmallPtrSet<MachineInstr*, 8> visited; - // Find the best physreg hist and the best virtreg hint. + // Find the best physreg hint and the best virtreg hint. float bestPhys = 0, bestVirt = 0; unsigned hintPhys = 0, hintVirt = 0; diff --git a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp index f1d4ace..75f4b96 100644 --- a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp +++ b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp @@ -32,7 +32,7 @@ CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf, // No stack is used. StackOffset = 0; - clearFirstByValReg(); + clearByValRegsInfo(); UsedRegs.resize((TRI.getNumRegs()+31)/32); } diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp index 35ec68d..c641991 100644 --- a/contrib/llvm/lib/CodeGen/CodeGen.cpp +++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/InitializePasses.h" +#include "llvm/PassRegistry.h" #include "llvm-c/Initialization.h" using namespace llvm; diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp index 9958d7d..8264d6d 100644 --- a/contrib/llvm/lib/CodeGen/IfConversion.cpp +++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp @@ -1039,6 +1039,10 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { return false; } + if (CvtBBI->BB->hasAddressTaken()) + // Conservatively abort if-conversion if BB's address is taken. + return false; + if (Kind == ICSimpleFalse) if (TII->ReverseBranchCondition(Cond)) llvm_unreachable("Unable to reverse branch condition!"); @@ -1054,6 +1058,10 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { // Copy instructions in the true block, predicate them, and add them to // the entry block. CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs); + + // RemoveExtraEdges won't work if the block has an unanalyzable branch, so + // explicitly remove CvtBBI as a successor. + BBI.BB->removeSuccessor(CvtBBI->BB); } else { PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs); @@ -1112,6 +1120,10 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { return false; } + if (CvtBBI->BB->hasAddressTaken()) + // Conservatively abort if-conversion if BB's address is taken. + return false; + if (Kind == ICTriangleFalse || Kind == ICTriangleFRev) if (TII->ReverseBranchCondition(Cond)) llvm_unreachable("Unable to reverse branch condition!"); @@ -1146,6 +1158,10 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { // Copy instructions in the true block, predicate them, and add them to // the entry block. CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs, true); + + // RemoveExtraEdges won't work if the block has an unanalyzable branch, so + // explicitly remove CvtBBI as a successor. + BBI.BB->removeSuccessor(CvtBBI->BB); } else { // Predicate the 'true' block after removing its branch. CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB); @@ -1176,7 +1192,8 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { // block. By not merging them, we make it possible to iteratively // ifcvt the blocks. if (!HasEarlyExit && - NextBBI->BB->pred_size() == 1 && !NextBBI->HasFallThrough) { + NextBBI->BB->pred_size() == 1 && !NextBBI->HasFallThrough && + !NextBBI->BB->hasAddressTaken()) { MergeBlocks(BBI, *NextBBI); FalseBBDead = true; } else { @@ -1226,6 +1243,10 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, return false; } + if (TrueBBI.BB->hasAddressTaken() || FalseBBI.BB->hasAddressTaken()) + // Conservatively abort if-conversion if either BB has its address taken. + return false; + // Put the predicated instructions from the 'true' block before the // instructions from the 'false' block, unless the true block would clobber // the predicate, in which case, do the opposite. @@ -1374,7 +1395,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, // tail, add a unconditional branch to it. if (TailBB) { BBInfo &TailBBI = BBAnalysis[TailBB->getNumber()]; - bool CanMergeTail = !TailBBI.HasFallThrough; + bool CanMergeTail = !TailBBI.HasFallThrough && + !TailBBI.BB->hasAddressTaken(); // There may still be a fall-through edge from BBI1 or BBI2 to TailBB; // check if there are any other predecessors besides those. unsigned NumPreds = TailBB->pred_size(); @@ -1543,6 +1565,9 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, /// i.e., when FromBBI's branch is being moved, add those successor edges to /// ToBBI. void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { + assert(!FromBBI.BB->hasAddressTaken() && + "Removing a BB whose address is taken!"); + ToBBI.BB->splice(ToBBI.BB->end(), FromBBI.BB, FromBBI.BB->begin(), FromBBI.BB->end()); diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp index c6d1a18..35295fe 100644 --- a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp @@ -955,18 +955,21 @@ void InlineSpiller::reMaterializeAll() { Edit->eliminateDeadDefs(DeadDefs, RegsToSpill); // Get rid of deleted and empty intervals. - for (unsigned i = RegsToSpill.size(); i != 0; --i) { - unsigned Reg = RegsToSpill[i-1]; - if (!LIS.hasInterval(Reg)) { - RegsToSpill.erase(RegsToSpill.begin() + (i - 1)); + unsigned ResultPos = 0; + for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) { + unsigned Reg = RegsToSpill[i]; + if (!LIS.hasInterval(Reg)) continue; - } + LiveInterval &LI = LIS.getInterval(Reg); - if (!LI.empty()) + if (LI.empty()) { + Edit->eraseVirtReg(Reg); continue; - Edit->eraseVirtReg(Reg); - RegsToSpill.erase(RegsToSpill.begin() + (i - 1)); + } + + RegsToSpill[ResultPos++] = Reg; } + RegsToSpill.erase(RegsToSpill.begin() + ResultPos, RegsToSpill.end()); DEBUG(dbgs() << RegsToSpill.size() << " registers to spill after remat.\n"); } diff --git a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp index 07f0ccf..d894f66 100644 --- a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp +++ b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp @@ -453,6 +453,12 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1)); break; + case Intrinsic::annotation: + case Intrinsic::ptr_annotation: + // Just drop the annotation, but forward the value + CI->replaceAllUsesWith(CI->getOperand(0)); + break; + case Intrinsic::var_annotation: break; // Strip out annotate intrinsic diff --git a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp index 352ef94..26a1176 100644 --- a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -46,13 +46,16 @@ namespace { class FrameRef { MachineBasicBlock::iterator MI; // Instr referencing the frame int64_t LocalOffset; // Local offset of the frame idx referenced + int FrameIdx; // The frame index public: - FrameRef(MachineBasicBlock::iterator I, int64_t Offset) : - MI(I), LocalOffset(Offset) {} + FrameRef(MachineBasicBlock::iterator I, int64_t Offset, int Idx) : + MI(I), LocalOffset(Offset), FrameIdx(Idx) {} bool operator<(const FrameRef &RHS) const { return LocalOffset < RHS.LocalOffset; } - MachineBasicBlock::iterator getMachineInstr() { return MI; } + MachineBasicBlock::iterator getMachineInstr() const { return MI; } + int64_t getLocalOffset() const { return LocalOffset; } + int getFrameIndex() const { return FrameIdx; } }; class LocalStackSlotPass: public MachineFunctionPass { @@ -194,22 +197,15 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { } static inline bool -lookupCandidateBaseReg(const SmallVector<std::pair<unsigned, int64_t>, 8> &Regs, - std::pair<unsigned, int64_t> &RegOffset, +lookupCandidateBaseReg(int64_t BaseOffset, int64_t FrameSizeAdjust, int64_t LocalFrameOffset, const MachineInstr *MI, const TargetRegisterInfo *TRI) { - unsigned e = Regs.size(); - for (unsigned i = 0; i < e; ++i) { - RegOffset = Regs[i]; - // Check if the relative offset from the where the base register references - // to the target address is in range for the instruction. - int64_t Offset = FrameSizeAdjust + LocalFrameOffset - RegOffset.second; - if (TRI->isFrameOffsetLegal(MI, Offset)) - return true; - } - return false; + // Check if the relative offset from the where the base register references + // to the target address is in range for the instruction. + int64_t Offset = FrameSizeAdjust + LocalFrameOffset - BaseOffset; + return TRI->isFrameOffsetLegal(MI, Offset); } bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { @@ -233,9 +229,6 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { // choose the first one). SmallVector<FrameRef, 64> FrameReferenceInsns; - // A base register definition is a register + offset pair. - SmallVector<std::pair<unsigned, int64_t>, 8> BaseRegisters; - for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { MachineInstr *MI = I; @@ -258,8 +251,12 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { // Don't try this with values not in the local block. if (!MFI->isObjectPreAllocated(MI->getOperand(i).getIndex())) break; + int Idx = MI->getOperand(i).getIndex(); + int64_t LocalOffset = LocalOffsets[Idx]; + if (!TRI->needsFrameBaseReg(MI, LocalOffset)) + break; FrameReferenceInsns. - push_back(FrameRef(MI, LocalOffsets[MI->getOperand(i).getIndex()])); + push_back(FrameRef(MI, LocalOffset, Idx)); break; } } @@ -271,86 +268,106 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { MachineBasicBlock *Entry = Fn.begin(); + unsigned BaseReg = 0; + int64_t BaseOffset = 0; + // Loop through the frame references and allocate for them as necessary. for (int ref = 0, e = FrameReferenceInsns.size(); ref < e ; ++ref) { - MachineBasicBlock::iterator I = - FrameReferenceInsns[ref].getMachineInstr(); + FrameRef &FR = FrameReferenceInsns[ref]; + MachineBasicBlock::iterator I = FR.getMachineInstr(); MachineInstr *MI = I; - for (unsigned idx = 0, e = MI->getNumOperands(); idx != e; ++idx) { - // Consider replacing all frame index operands that reference - // an object allocated in the local block. - if (MI->getOperand(idx).isFI()) { - int FrameIdx = MI->getOperand(idx).getIndex(); - - assert(MFI->isObjectPreAllocated(FrameIdx) && - "Only pre-allocated locals expected!"); - - DEBUG(dbgs() << "Considering: " << *MI); - if (TRI->needsFrameBaseReg(MI, LocalOffsets[FrameIdx])) { - unsigned BaseReg = 0; - int64_t Offset = 0; - int64_t FrameSizeAdjust = - StackGrowsDown ? MFI->getLocalFrameSize() : 0; - - DEBUG(dbgs() << " Replacing FI in: " << *MI); - - // If we have a suitable base register available, use it; otherwise - // create a new one. Note that any offset encoded in the - // instruction itself will be taken into account by the target, - // so we don't have to adjust for it here when reusing a base - // register. - std::pair<unsigned, int64_t> RegOffset; - if (lookupCandidateBaseReg(BaseRegisters, RegOffset, - FrameSizeAdjust, - LocalOffsets[FrameIdx], - MI, TRI)) { - DEBUG(dbgs() << " Reusing base register " << - RegOffset.first << "\n"); - // We found a register to reuse. - BaseReg = RegOffset.first; - Offset = FrameSizeAdjust + LocalOffsets[FrameIdx] - - RegOffset.second; - } else { - // No previously defined register was in range, so create a - // new one. - int64_t InstrOffset = TRI->getFrameIndexInstrOffset(MI, idx); - const MachineFunction *MF = MI->getParent()->getParent(); - const TargetRegisterClass *RC = TRI->getPointerRegClass(*MF); - BaseReg = Fn.getRegInfo().createVirtualRegister(RC); - - DEBUG(dbgs() << " Materializing base register " << BaseReg << - " at frame local offset " << - LocalOffsets[FrameIdx] + InstrOffset << "\n"); - - // Tell the target to insert the instruction to initialize - // the base register. - // MachineBasicBlock::iterator InsertionPt = Entry->begin(); - TRI->materializeFrameBaseRegister(Entry, BaseReg, FrameIdx, - InstrOffset); - - // The base register already includes any offset specified - // by the instruction, so account for that so it doesn't get - // applied twice. - Offset = -InstrOffset; - - int64_t BaseOffset = FrameSizeAdjust + LocalOffsets[FrameIdx] + - InstrOffset; - BaseRegisters.push_back( - std::pair<unsigned, int64_t>(BaseReg, BaseOffset)); - ++NumBaseRegisters; - UsedBaseReg = true; - } - assert(BaseReg != 0 && "Unable to allocate virtual base register!"); - - // Modify the instruction to use the new base register rather - // than the frame index operand. - TRI->resolveFrameIndex(I, BaseReg, Offset); - DEBUG(dbgs() << "Resolved: " << *MI); - - ++NumReplacements; - } + int64_t LocalOffset = FR.getLocalOffset(); + int FrameIdx = FR.getFrameIndex(); + assert(MFI->isObjectPreAllocated(FrameIdx) && + "Only pre-allocated locals expected!"); + + DEBUG(dbgs() << "Considering: " << *MI); + + unsigned idx = 0; + for (unsigned f = MI->getNumOperands(); idx != f; ++idx) { + if (!MI->getOperand(idx).isFI()) + continue; + + if (FrameIdx == I->getOperand(idx).getIndex()) + break; + } + + assert(idx < MI->getNumOperands() && "Cannot find FI operand"); + + int64_t Offset = 0; + int64_t FrameSizeAdjust = StackGrowsDown ? MFI->getLocalFrameSize() : 0; + + DEBUG(dbgs() << " Replacing FI in: " << *MI); + + // If we have a suitable base register available, use it; otherwise + // create a new one. Note that any offset encoded in the + // instruction itself will be taken into account by the target, + // so we don't have to adjust for it here when reusing a base + // register. + if (UsedBaseReg && lookupCandidateBaseReg(BaseOffset, FrameSizeAdjust, + LocalOffset, MI, TRI)) { + DEBUG(dbgs() << " Reusing base register " << BaseReg << "\n"); + // We found a register to reuse. + Offset = FrameSizeAdjust + LocalOffset - BaseOffset; + } else { + // No previously defined register was in range, so create a // new one. + + int64_t InstrOffset = TRI->getFrameIndexInstrOffset(MI, idx); + + int64_t PrevBaseOffset = BaseOffset; + BaseOffset = FrameSizeAdjust + LocalOffset + InstrOffset; + + // We'd like to avoid creating single-use virtual base registers. + // Because the FrameRefs are in sorted order, and we've already + // processed all FrameRefs before this one, just check whether or not + // the next FrameRef will be able to reuse this new register. If not, + // then don't bother creating it. + bool CanReuse = false; + for (int refn = ref + 1; refn < e; ++refn) { + FrameRef &FRN = FrameReferenceInsns[refn]; + MachineBasicBlock::iterator J = FRN.getMachineInstr(); + MachineInstr *MIN = J; + + CanReuse = lookupCandidateBaseReg(BaseOffset, FrameSizeAdjust, + FRN.getLocalOffset(), MIN, TRI); + break; } + + if (!CanReuse) { + BaseOffset = PrevBaseOffset; + continue; + } + + const MachineFunction *MF = MI->getParent()->getParent(); + const TargetRegisterClass *RC = TRI->getPointerRegClass(*MF); + BaseReg = Fn.getRegInfo().createVirtualRegister(RC); + + DEBUG(dbgs() << " Materializing base register " << BaseReg << + " at frame local offset " << LocalOffset + InstrOffset << "\n"); + + // Tell the target to insert the instruction to initialize + // the base register. + // MachineBasicBlock::iterator InsertionPt = Entry->begin(); + TRI->materializeFrameBaseRegister(Entry, BaseReg, FrameIdx, + InstrOffset); + + // The base register already includes any offset specified + // by the instruction, so account for that so it doesn't get + // applied twice. + Offset = -InstrOffset; + + ++NumBaseRegisters; + UsedBaseReg = true; } + assert(BaseReg != 0 && "Unable to allocate virtual base register!"); + + // Modify the instruction to use the new base register rather + // than the frame index operand. + TRI->resolveFrameIndex(I, BaseReg, Offset); + DEBUG(dbgs() << "Resolved: " << *MI); + + ++NumReplacements; } + return UsedBaseReg; } diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp index 898e165..78e9950 100644 --- a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -37,7 +37,7 @@ using namespace llvm; MachineBasicBlock::MachineBasicBlock(MachineFunction &mf, const BasicBlock *bb) : BB(bb), Number(-1), xParent(&mf), Alignment(0), IsLandingPad(false), - AddressTaken(false) { + AddressTaken(false), CachedMCSymbol(NULL) { Insts.Parent = this; } @@ -48,12 +48,16 @@ MachineBasicBlock::~MachineBasicBlock() { /// getSymbol - Return the MCSymbol for this basic block. /// MCSymbol *MachineBasicBlock::getSymbol() const { - const MachineFunction *MF = getParent(); - MCContext &Ctx = MF->getContext(); - const char *Prefix = Ctx.getAsmInfo().getPrivateGlobalPrefix(); - return Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" + - Twine(MF->getFunctionNumber()) + "_" + - Twine(getNumber())); + if (!CachedMCSymbol) { + const MachineFunction *MF = getParent(); + MCContext &Ctx = MF->getContext(); + const char *Prefix = Ctx.getAsmInfo().getPrivateGlobalPrefix(); + CachedMCSymbol = Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" + + Twine(MF->getFunctionNumber()) + + "_" + Twine(getNumber())); + } + + return CachedMCSymbol; } diff --git a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp index cd948e2..bfba503 100644 --- a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -39,6 +39,7 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" @@ -52,6 +53,11 @@ STATISTIC(CondBranchTakenFreq, STATISTIC(UncondBranchTakenFreq, "Potential frequency of taking unconditional branches"); +static cl::opt<unsigned> AlignAllBlock("align-all-blocks", + cl::desc("Force the alignment of all " + "blocks in the function."), + cl::init(0), cl::Hidden); + namespace { class BlockChain; /// \brief Type for our function-wide basic block -> block chain mapping. @@ -1088,6 +1094,12 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { BlockToChain.clear(); ChainAllocator.DestroyAll(); + if (AlignAllBlock) + // Align all of the blocks in the function to a specific alignment. + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); + FI != FE; ++FI) + FI->setAlignment(AlignAllBlock); + // We always return true as we have no way to track whether the final order // differs from the original order. return true; diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp index 0ea9ae0..8af9d05 100644 --- a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp @@ -326,8 +326,7 @@ void MachineModuleInfo::AnalyzeModule(const Module &M) { if (!GV || !GV->hasInitializer()) return; // Should be an array of 'i8*'. - const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer()); - if (InitList == 0) return; + const ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer()); for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) if (const Function *F = diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp index 1af00e8..68372f6 100644 --- a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp @@ -15,6 +15,8 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Support/raw_os_ostream.h" + using namespace llvm; MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) @@ -106,13 +108,59 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){ /// clearVirtRegs - Remove all virtual registers (after physreg assignment). void MachineRegisterInfo::clearVirtRegs() { #ifndef NDEBUG - for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) - assert(VRegInfo[TargetRegisterInfo::index2VirtReg(i)].second == 0 && - "Vreg use list non-empty still?"); + for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (!VRegInfo[Reg].second) + continue; + verifyUseList(Reg); + llvm_unreachable("Remaining virtual register operands"); + } #endif VRegInfo.clear(); } +void MachineRegisterInfo::verifyUseList(unsigned Reg) const { +#ifndef NDEBUG + bool Valid = true; + for (reg_iterator I = reg_begin(Reg), E = reg_end(); I != E; ++I) { + MachineOperand *MO = &I.getOperand(); + MachineInstr *MI = MO->getParent(); + if (!MI) { + errs() << PrintReg(Reg, TRI) << " use list MachineOperand " << MO + << " has no parent instruction.\n"; + Valid = false; + } + MachineOperand *MO0 = &MI->getOperand(0); + unsigned NumOps = MI->getNumOperands(); + if (!(MO >= MO0 && MO < MO0+NumOps)) { + errs() << PrintReg(Reg, TRI) << " use list MachineOperand " << MO + << " doesn't belong to parent MI: " << *MI; + Valid = false; + } + if (!MO->isReg()) { + errs() << PrintReg(Reg, TRI) << " MachineOperand " << MO << ": " << *MO + << " is not a register\n"; + Valid = false; + } + if (MO->getReg() != Reg) { + errs() << PrintReg(Reg, TRI) << " use-list MachineOperand " << MO << ": " + << *MO << " is the wrong register\n"; + Valid = false; + } + } + assert(Valid && "Invalid use list"); +#endif +} + +void MachineRegisterInfo::verifyUseLists() const { +#ifndef NDEBUG + for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) + verifyUseList(TargetRegisterInfo::index2VirtReg(i)); + for (unsigned i = 1, e = TRI->getNumRegs(); i != e; ++i) + verifyUseList(i); +#endif +} + /// Add MO to the linked list of operands for its register. void MachineRegisterInfo::addRegOperandToUseList(MachineOperand *MO) { assert(!MO->isOnRegUseList() && "Already on list"); diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp index 5bd2349..fff6b2b 100644 --- a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp @@ -51,7 +51,11 @@ static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden, static bool ViewMISchedDAGs = false; #endif // NDEBUG -// Experimental heuristics +// FIXME: remove this flag after initial testing. It should always be a good +// thing. +static cl::opt<bool> EnableCopyConstrain("misched-vcopy", cl::Hidden, + cl::desc("Constrain vreg copies."), cl::init(true)); + static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden, cl::desc("Enable load clustering."), cl::init(true)); @@ -323,6 +327,10 @@ ScheduleDAGMI::~ScheduleDAGMI() { delete SchedImpl; } +bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) { + return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU); +} + bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) { if (SuccSU != &ExitSU) { // Do not use WillCreateCycle, it assumes SD scheduling. @@ -404,6 +412,8 @@ void ScheduleDAGMI::releasePredecessors(SUnit *SU) { } } +/// This is normally called from the main scheduler loop but may also be invoked +/// by the scheduling strategy to perform additional code motion. void ScheduleDAGMI::moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos) { // Advance RegionBegin if the first instruction moves down. @@ -505,6 +515,14 @@ updateScheduledPressure(const std::vector<unsigned> &NewMaxPressure) { if ((int)NewMaxPressure[ID] > MaxUnits) MaxUnits = NewMaxPressure[ID]; } + DEBUG( + for (unsigned i = 0, e = NewMaxPressure.size(); i < e; ++i) { + unsigned Limit = TRI->getRegPressureSetLimit(i); + if (NewMaxPressure[i] > Limit ) { + dbgs() << " " << TRI->getRegPressureSetName(i) << ": " + << NewMaxPressure[i] << " > " << Limit << "\n"; + } + }); } /// schedule - Called back from MachineScheduler::runOnMachineFunction @@ -905,6 +923,184 @@ void MacroFusion::apply(ScheduleDAGMI *DAG) { } //===----------------------------------------------------------------------===// +// CopyConstrain - DAG post-processing to encourage copy elimination. +//===----------------------------------------------------------------------===// + +namespace { +/// \brief Post-process the DAG to create weak edges from all uses of a copy to +/// the one use that defines the copy's source vreg, most likely an induction +/// variable increment. +class CopyConstrain : public ScheduleDAGMutation { + // Transient state. + SlotIndex RegionBeginIdx; + // RegionEndIdx is the slot index of the last non-debug instruction in the + // scheduling region. So we may have RegionBeginIdx == RegionEndIdx. + SlotIndex RegionEndIdx; +public: + CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {} + + virtual void apply(ScheduleDAGMI *DAG); + +protected: + void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG); +}; +} // anonymous + +/// constrainLocalCopy handles two possibilities: +/// 1) Local src: +/// I0: = dst +/// I1: src = ... +/// I2: = dst +/// I3: dst = src (copy) +/// (create pred->succ edges I0->I1, I2->I1) +/// +/// 2) Local copy: +/// I0: dst = src (copy) +/// I1: = dst +/// I2: src = ... +/// I3: = dst +/// (create pred->succ edges I1->I2, I3->I2) +/// +/// Although the MachineScheduler is currently constrained to single blocks, +/// this algorithm should handle extended blocks. An EBB is a set of +/// contiguously numbered blocks such that the previous block in the EBB is +/// always the single predecessor. +void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG) { + LiveIntervals *LIS = DAG->getLIS(); + MachineInstr *Copy = CopySU->getInstr(); + + // Check for pure vreg copies. + unsigned SrcReg = Copy->getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + return; + + unsigned DstReg = Copy->getOperand(0).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(DstReg)) + return; + + // Check if either the dest or source is local. If it's live across a back + // edge, it's not local. Note that if both vregs are live across the back + // edge, we cannot successfully contrain the copy without cyclic scheduling. + unsigned LocalReg = DstReg; + unsigned GlobalReg = SrcReg; + LiveInterval *LocalLI = &LIS->getInterval(LocalReg); + if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) { + LocalReg = SrcReg; + GlobalReg = DstReg; + LocalLI = &LIS->getInterval(LocalReg); + if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) + return; + } + LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg); + + // Find the global segment after the start of the local LI. + LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex()); + // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a + // local live range. We could create edges from other global uses to the local + // start, but the coalescer should have already eliminated these cases, so + // don't bother dealing with it. + if (GlobalSegment == GlobalLI->end()) + return; + + // If GlobalSegment is killed at the LocalLI->start, the call to find() + // returned the next global segment. But if GlobalSegment overlaps with + // LocalLI->start, then advance to the next segement. If a hole in GlobalLI + // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole. + if (GlobalSegment->contains(LocalLI->beginIndex())) + ++GlobalSegment; + + if (GlobalSegment == GlobalLI->end()) + return; + + // Check if GlobalLI contains a hole in the vicinity of LocalLI. + if (GlobalSegment != GlobalLI->begin()) { + // Two address defs have no hole. + if (SlotIndex::isSameInstr(llvm::prior(GlobalSegment)->end, + GlobalSegment->start)) { + return; + } + // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise + // it would be a disconnected component in the live range. + assert(llvm::prior(GlobalSegment)->start < LocalLI->beginIndex() && + "Disconnected LRG within the scheduling region."); + } + MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start); + if (!GlobalDef) + return; + + SUnit *GlobalSU = DAG->getSUnit(GlobalDef); + if (!GlobalSU) + return; + + // GlobalDef is the bottom of the GlobalLI hole. Open the hole by + // constraining the uses of the last local def to precede GlobalDef. + SmallVector<SUnit*,8> LocalUses; + const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex()); + MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def); + SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef); + for (SUnit::const_succ_iterator + I = LastLocalSU->Succs.begin(), E = LastLocalSU->Succs.end(); + I != E; ++I) { + if (I->getKind() != SDep::Data || I->getReg() != LocalReg) + continue; + if (I->getSUnit() == GlobalSU) + continue; + if (!DAG->canAddEdge(GlobalSU, I->getSUnit())) + return; + LocalUses.push_back(I->getSUnit()); + } + // Open the top of the GlobalLI hole by constraining any earlier global uses + // to precede the start of LocalLI. + SmallVector<SUnit*,8> GlobalUses; + MachineInstr *FirstLocalDef = + LIS->getInstructionFromIndex(LocalLI->beginIndex()); + SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef); + for (SUnit::const_pred_iterator + I = GlobalSU->Preds.begin(), E = GlobalSU->Preds.end(); I != E; ++I) { + if (I->getKind() != SDep::Anti || I->getReg() != GlobalReg) + continue; + if (I->getSUnit() == FirstLocalSU) + continue; + if (!DAG->canAddEdge(FirstLocalSU, I->getSUnit())) + return; + GlobalUses.push_back(I->getSUnit()); + } + DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n"); + // Add the weak edges. + for (SmallVectorImpl<SUnit*>::const_iterator + I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) { + DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU(" + << GlobalSU->NodeNum << ")\n"); + DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak)); + } + for (SmallVectorImpl<SUnit*>::const_iterator + I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) { + DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU(" + << FirstLocalSU->NodeNum << ")\n"); + DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak)); + } +} + +/// \brief Callback from DAG postProcessing to create weak edges to encourage +/// copy elimination. +void CopyConstrain::apply(ScheduleDAGMI *DAG) { + MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end()); + if (FirstPos == DAG->end()) + return; + RegionBeginIdx = DAG->getLIS()->getInstructionIndex(&*FirstPos); + RegionEndIdx = DAG->getLIS()->getInstructionIndex( + &*priorNonDebug(DAG->end(), DAG->begin())); + + for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) { + SUnit *SU = &DAG->SUnits[Idx]; + if (!SU->getInstr()->isCopy()) + continue; + + constrainLocalCopy(SU, DAG); + } +} + +//===----------------------------------------------------------------------===// // ConvergingScheduler - Implementation of the standard MachineSchedStrategy. //===----------------------------------------------------------------------===// @@ -916,7 +1112,7 @@ public: /// Represent the type of SchedCandidate found within a single queue. /// pickNodeBidirectional depends on these listed by decreasing priority. enum CandReason { - NoCand, SingleExcess, SingleCritical, Cluster, + NoCand, PhysRegCopy, SingleExcess, SingleCritical, Cluster, Weak, ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse, NodeOrder}; @@ -1191,6 +1387,8 @@ protected: const RegPressureTracker &RPTracker, SchedCandidate &Candidate); + void reschedulePhysRegCopies(SUnit *SU, bool isTop); + #ifndef NDEBUG void traceCandidate(const SchedCandidate &Cand); #endif @@ -1339,6 +1537,8 @@ void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) { for (ReadyQueue::iterator I = Available.begin(), E = Available.end(); I != E; ++I) { unsigned L = getUnscheduledLatency(*I); + DEBUG(dbgs() << " " << Available.getName() + << " RemLatency SU(" << (*I)->NodeNum << ") " << L << '\n'); if (L > RemLatency) RemLatency = L; } @@ -1349,10 +1549,13 @@ void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) { RemLatency = L; } unsigned CriticalPathLimit = Rem->CriticalPath + SchedModel->getILPWindow(); + DEBUG(dbgs() << " " << Available.getName() + << " ExpectedLatency " << ExpectedLatency + << " CP Limit " << CriticalPathLimit << '\n'); if (RemLatency + ExpectedLatency >= CriticalPathLimit && RemLatency > Rem->getMaxRemainingCount(SchedModel)) { Policy.ReduceLatency = true; - DEBUG(dbgs() << "Increase ILP: " << Available.getName() << '\n'); + DEBUG(dbgs() << " Increase ILP: " << Available.getName() << '\n'); } } @@ -1569,7 +1772,8 @@ void ConvergingScheduler::balanceZones( if ((int)(Rem->getMaxRemainingCount(SchedModel) - RemainingCritCount) > (int)SchedModel->getLatencyFactor()) { CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx; - DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce " + DEBUG(dbgs() << " Balance " << CriticalZone.Available.getName() + << " reduce " << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name << '\n'); } @@ -1580,7 +1784,8 @@ void ConvergingScheduler::balanceZones( if ((int)(OppositeZone.ExpectedCount - OppositeCount) > (int)SchedModel->getLatencyFactor()) { OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx; - DEBUG(dbgs() << "Balance " << OppositeZone.Available.getName() << " demand " + DEBUG(dbgs() << " Balance " << OppositeZone.Available.getName() + << " demand " << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name << '\n'); } @@ -1604,7 +1809,7 @@ void ConvergingScheduler::checkResourceLimits( if (Top.CritResIdx != Rem.CritResIdx) { TopCand.Policy.ReduceResIdx = Top.CritResIdx; BotCand.Policy.ReduceResIdx = Bot.CritResIdx; - DEBUG(dbgs() << "Reduce scheduled " + DEBUG(dbgs() << " Reduce scheduled " << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n'); } return; @@ -1621,7 +1826,7 @@ void ConvergingScheduler::checkResourceLimits( && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) { TopCand.Policy.ReduceLatency = true; BotCand.Policy.ReduceLatency = true; - DEBUG(dbgs() << "Reduce scheduled latency " << Top.ExpectedLatency + DEBUG(dbgs() << " Reduce scheduled latency " << Top.ExpectedLatency << " + " << Bot.ExpectedLatency << '\n'); } return; @@ -1696,6 +1901,34 @@ static unsigned getWeakLeft(const SUnit *SU, bool isTop) { return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft; } +/// Minimize physical register live ranges. Regalloc wants them adjacent to +/// their physreg def/use. +/// +/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf +/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled +/// with the operation that produces or consumes the physreg. We'll do this when +/// regalloc has support for parallel copies. +static int biasPhysRegCopy(const SUnit *SU, bool isTop) { + const MachineInstr *MI = SU->getInstr(); + if (!MI->isCopy()) + return 0; + + unsigned ScheduledOper = isTop ? 1 : 0; + unsigned UnscheduledOper = isTop ? 0 : 1; + // If we have already scheduled the physreg produce/consumer, immediately + // schedule the copy. + if (TargetRegisterInfo::isPhysicalRegister( + MI->getOperand(ScheduledOper).getReg())) + return 1; + // If the physreg is at the boundary, defer it. Otherwise schedule it + // immediately to free the dependent. We can hoist the copy later. + bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft; + if (TargetRegisterInfo::isPhysicalRegister( + MI->getOperand(UnscheduledOper).getReg())) + return AtBoundary ? -1 : 1; + return 0; +} + /// Apply a set of heursitics to a new candidate. Heuristics are currently /// hierarchical. This may be more efficient than a graduated cost model because /// we don't need to evaluate all aspects of the model for each node in the @@ -1723,6 +1956,12 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, TryCand.Reason = NodeOrder; return; } + + if (tryGreater(biasPhysRegCopy(TryCand.SU, Zone.isTop()), + biasPhysRegCopy(Cand.SU, Zone.isTop()), + TryCand, Cand, PhysRegCopy)) + return; + // Avoid exceeding the target's limit. if (tryLess(TryCand.RPDelta.Excess.UnitIncrease, Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess)) @@ -1749,12 +1988,16 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU, TryCand, Cand, Cluster)) return; - // Currently, weak edges are for clustering, so we hard-code that reason. - // However, deferring the current TryCand will not change Cand's reason. + + // Weak edges are for clustering and other constraints. + // + // Deferring TryCand here does not change Cand's reason. This is good in the + // sense that a bad candidate shouldn't affect a previous candidate's + // goodness, but bad in that it is assymetric and depends on queue order. CandReason OrigReason = Cand.Reason; if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()), getWeakLeft(Cand.SU, Zone.isTop()), - TryCand, Cand, Cluster)) { + TryCand, Cand, Weak)) { Cand.Reason = OrigReason; return; } @@ -1825,20 +2068,20 @@ static bool compareRPDelta(const RegPressureDelta &LHS, // Avoid increasing the max critical pressure in the scheduled region. if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) { - DEBUG(dbgs() << "RP excess top - bot: " + DEBUG(dbgs() << " RP excess top - bot: " << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n'); return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease; } // Avoid increasing the max critical pressure in the scheduled region. if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) { - DEBUG(dbgs() << "RP critical top - bot: " + DEBUG(dbgs() << " RP critical top - bot: " << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease) << '\n'); return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease; } // Avoid increasing the max pressure of the entire region. if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) { - DEBUG(dbgs() << "RP current top - bot: " + DEBUG(dbgs() << " RP current top - bot: " << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease) << '\n'); return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease; @@ -1851,9 +2094,11 @@ const char *ConvergingScheduler::getReasonStr( ConvergingScheduler::CandReason Reason) { switch (Reason) { case NoCand: return "NOCAND "; + case PhysRegCopy: return "PREG-COPY"; case SingleExcess: return "REG-EXCESS"; case SingleCritical: return "REG-CRIT "; case Cluster: return "CLUSTER "; + case Weak: return "WEAK "; case SingleMax: return "REG-MAX "; case MultiPressure: return "REG-MULTI "; case ResourceReduce: return "RES-REDUCE"; @@ -1953,8 +2198,7 @@ void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone, static void tracePick(const ConvergingScheduler::SchedCandidate &Cand, bool IsTop) { - DEBUG(dbgs() << "Pick " << (IsTop ? "Top" : "Bot") - << " SU(" << Cand.SU->NodeNum << ") " + DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ") << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n'); } @@ -1964,10 +2208,12 @@ SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) { // efficient, but also provides the best heuristics for CriticalPSets. if (SUnit *SU = Bot.pickOnlyChoice()) { IsTopNode = false; + DEBUG(dbgs() << "Pick Top NOCAND\n"); return SU; } if (SUnit *SU = Top.pickOnlyChoice()) { IsTopNode = true; + DEBUG(dbgs() << "Pick Bot NOCAND\n"); return SU; } CandPolicy NoPolicy; @@ -2065,21 +2311,53 @@ SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) { if (SU->isBottomReady()) Bot.removeReady(SU); - DEBUG(dbgs() << "Scheduling " << *SU->getInstr()); + DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr()); return SU; } +void ConvergingScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) { + + MachineBasicBlock::iterator InsertPos = SU->getInstr(); + if (!isTop) + ++InsertPos; + SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs; + + // Find already scheduled copies with a single physreg dependence and move + // them just above the scheduled instruction. + for (SmallVectorImpl<SDep>::iterator I = Deps.begin(), E = Deps.end(); + I != E; ++I) { + if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg())) + continue; + SUnit *DepSU = I->getSUnit(); + if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1) + continue; + MachineInstr *Copy = DepSU->getInstr(); + if (!Copy->isCopy()) + continue; + DEBUG(dbgs() << " Rescheduling physreg copy "; + I->getSUnit()->dump(DAG)); + DAG->moveInstruction(Copy, InsertPos); + } +} + /// Update the scheduler's state after scheduling a node. This is the same node /// that was just returned by pickNode(). However, ScheduleDAGMI needs to update /// it's state based on the current cycle before MachineSchedStrategy does. +/// +/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling +/// them here. See comments in biasPhysRegCopy. void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) { if (IsTopNode) { SU->TopReadyCycle = Top.CurrCycle; Top.bumpNode(SU); + if (SU->hasPhysRegUses) + reschedulePhysRegCopies(SU, true); } else { SU->BotReadyCycle = Bot.CurrCycle; Bot.bumpNode(SU); + if (SU->hasPhysRegDefs) + reschedulePhysRegCopies(SU, false); } } @@ -2090,6 +2368,12 @@ static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) { "-misched-topdown incompatible with -misched-bottomup"); ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler()); // Register DAG post-processors. + // + // FIXME: extend the mutation API to allow earlier mutations to instantiate + // data and pass it to later mutations. Have a single mutation that gathers + // the interesting nodes in one pass. + if (EnableCopyConstrain) + DAG->addMutation(new CopyConstrain(DAG->TII, DAG->TRI)); if (EnableLoadCluster) DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI)); if (EnableMacroFusion) @@ -2179,12 +2463,12 @@ public: SUnit *SU = ReadyQ.back(); ReadyQ.pop_back(); IsTopNode = false; - DEBUG(dbgs() << "*** Scheduling " << "SU(" << SU->NodeNum << "): " - << *SU->getInstr() + DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") " << " ILP: " << DAG->getDFSResult()->getILP(SU) << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @" << DAG->getDFSResult()->getSubtreeLevel( - DAG->getDFSResult()->getSubtreeID(SU)) << '\n'); + DAG->getDFSResult()->getSubtreeID(SU)) << '\n' + << "Scheduling " << *SU->getInstr()); return SU; } diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp index 49d8c4e..00f702c 100644 --- a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp +++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp @@ -1200,8 +1200,10 @@ unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const { return std::max(Instrs, PRMax); } + unsigned MachineTraceMetrics::Trace:: -getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks) const { +getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks, + ArrayRef<const MCSchedClassDesc*> ExtraInstrs) const { // Add up resources above and below the center block. ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum()); ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum()); @@ -1210,6 +1212,18 @@ getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks) const { unsigned PRCycles = PRDepths[K] + PRHeights[K]; for (unsigned I = 0; I != Extrablocks.size(); ++I) PRCycles += TE.MTM.getProcResourceCycles(Extrablocks[I]->getNumber())[K]; + for (unsigned I = 0; I != ExtraInstrs.size(); ++I) { + const MCSchedClassDesc* SC = ExtraInstrs[I]; + if (!SC->isValid()) + continue; + for (TargetSchedModel::ProcResIter + PI = TE.MTM.SchedModel.getWriteProcResBegin(SC), + PE = TE.MTM.SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) { + if (PI->ProcResourceIdx != K) + continue; + PRCycles += (PI->Cycles * TE.MTM.SchedModel.getResourceFactor(K)); + } + } PRMax = std::max(PRMax, PRCycles); } // Convert to cycle count. diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp index 4b12300..037043f 100644 --- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp @@ -472,6 +472,9 @@ void MachineVerifier::visitMachineFunctionBefore() { if (MInfo.Succs.size() != I->succ_size()) report("MBB has duplicate entries in its successor list.", I); } + + // Check that the register use lists are sane. + MRI->verifyUseLists(); } // Does iterator point to a and b as the first two elements? diff --git a/contrib/llvm/lib/CodeGen/Passes.cpp b/contrib/llvm/lib/CodeGen/Passes.cpp index 1af65c8..bfbc062 100644 --- a/contrib/llvm/lib/CodeGen/Passes.cpp +++ b/contrib/llvm/lib/CodeGen/Passes.cpp @@ -93,9 +93,10 @@ static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden, /// simple binary flags that either suppress the pass or do nothing. /// i.e. -disable-mypass=false has no effect. /// These should be converted to boolOrDefault in order to use applyOverride. -static AnalysisID applyDisable(AnalysisID PassID, bool Override) { +static IdentifyingPassPtr applyDisable(IdentifyingPassPtr PassID, + bool Override) { if (Override) - return 0; + return IdentifyingPassPtr(); return PassID; } @@ -103,19 +104,20 @@ static AnalysisID applyDisable(AnalysisID PassID, bool Override) { /// flags with ternary conditions. TargetID is passed through by default. The /// pass is suppressed when the option is false. When the option is true, the /// StandardID is selected if the target provides no default. -static AnalysisID applyOverride(AnalysisID TargetID, cl::boolOrDefault Override, - AnalysisID StandardID) { +static IdentifyingPassPtr applyOverride(IdentifyingPassPtr TargetID, + cl::boolOrDefault Override, + AnalysisID StandardID) { switch (Override) { case cl::BOU_UNSET: return TargetID; case cl::BOU_TRUE: - if (TargetID) + if (TargetID.isValid()) return TargetID; if (StandardID == 0) report_fatal_error("Target cannot enable pass"); return StandardID; case cl::BOU_FALSE: - return 0; + return IdentifyingPassPtr(); } llvm_unreachable("Invalid command line option state"); } @@ -132,7 +134,8 @@ static AnalysisID applyOverride(AnalysisID TargetID, cl::boolOrDefault Override, /// StandardID may be a pseudo ID. In that case TargetID is the name of the real /// pass to run. This allows multiple options to control a single pass depending /// on where in the pipeline that pass is added. -static AnalysisID overridePass(AnalysisID StandardID, AnalysisID TargetID) { +static IdentifyingPassPtr overridePass(AnalysisID StandardID, + IdentifyingPassPtr TargetID) { if (StandardID == &PostRASchedulerID) return applyDisable(TargetID, DisablePostRA); @@ -200,11 +203,11 @@ public: // user interface. For example, a target may disable a standard pass by // default by substituting a pass ID of zero, and the user may still enable // that standard pass with an explicit command line option. - DenseMap<AnalysisID,AnalysisID> TargetPasses; + DenseMap<AnalysisID,IdentifyingPassPtr> TargetPasses; /// Store the pairs of <AnalysisID, AnalysisID> of which the second pass /// is inserted after each instance of the first one. - SmallVector<std::pair<AnalysisID, AnalysisID>, 4> InsertedPasses; + SmallVector<std::pair<AnalysisID, IdentifyingPassPtr>, 4> InsertedPasses; }; } // namespace llvm @@ -239,9 +242,13 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) /// Insert InsertedPassID pass after TargetPassID. void TargetPassConfig::insertPass(AnalysisID TargetPassID, - AnalysisID InsertedPassID) { - assert(TargetPassID != InsertedPassID && "Insert a pass after itself!"); - std::pair<AnalysisID, AnalysisID> P(TargetPassID, InsertedPassID); + IdentifyingPassPtr InsertedPassID) { + assert(((!InsertedPassID.isInstance() && + TargetPassID != InsertedPassID.getID()) || + (InsertedPassID.isInstance() && + TargetPassID != InsertedPassID.getInstance()->getPassID())) && + "Insert a pass after itself!"); + std::pair<AnalysisID, IdentifyingPassPtr> P(TargetPassID, InsertedPassID); Impl->InsertedPasses.push_back(P); } @@ -265,12 +272,12 @@ void TargetPassConfig::setOpt(bool &Opt, bool Val) { } void TargetPassConfig::substitutePass(AnalysisID StandardID, - AnalysisID TargetID) { + IdentifyingPassPtr TargetID) { Impl->TargetPasses[StandardID] = TargetID; } -AnalysisID TargetPassConfig::getPassSubstitution(AnalysisID ID) const { - DenseMap<AnalysisID, AnalysisID>::const_iterator +IdentifyingPassPtr TargetPassConfig::getPassSubstitution(AnalysisID ID) const { + DenseMap<AnalysisID, IdentifyingPassPtr>::const_iterator I = Impl->TargetPasses.find(ID); if (I == Impl->TargetPasses.end()) return ID; @@ -303,24 +310,39 @@ void TargetPassConfig::addPass(Pass *P) { /// Add a CodeGen pass at this point in the pipeline after checking for target /// and command line overrides. +/// +/// addPass cannot return a pointer to the pass instance because is internal the +/// PassManager and the instance we create here may already be freed. AnalysisID TargetPassConfig::addPass(AnalysisID PassID) { - AnalysisID TargetID = getPassSubstitution(PassID); - AnalysisID FinalID = overridePass(PassID, TargetID); - if (FinalID == 0) - return FinalID; - - Pass *P = Pass::createPass(FinalID); - if (!P) - llvm_unreachable("Pass ID not registered"); - addPass(P); + IdentifyingPassPtr TargetID = getPassSubstitution(PassID); + IdentifyingPassPtr FinalPtr = overridePass(PassID, TargetID); + if (!FinalPtr.isValid()) + return 0; + + Pass *P; + if (FinalPtr.isInstance()) + P = FinalPtr.getInstance(); + else { + P = Pass::createPass(FinalPtr.getID()); + if (!P) + llvm_unreachable("Pass ID not registered"); + } + AnalysisID FinalID = P->getPassID(); + addPass(P); // Ends the lifetime of P. + // Add the passes after the pass P if there is any. - for (SmallVector<std::pair<AnalysisID, AnalysisID>, 4>::iterator + for (SmallVector<std::pair<AnalysisID, IdentifyingPassPtr>, 4>::iterator I = Impl->InsertedPasses.begin(), E = Impl->InsertedPasses.end(); I != E; ++I) { if ((*I).first == PassID) { - assert((*I).second && "Illegal Pass ID!"); - Pass *NP = Pass::createPass((*I).second); - assert(NP && "Pass ID not registered"); + assert((*I).second.isValid() && "Illegal Pass ID!"); + Pass *NP; + if ((*I).second.isInstance()) + NP = (*I).second.getInstance(); + else { + NP = Pass::createPass((*I).second.getID()); + assert(NP && "Pass ID not registered"); + } addPass(NP); } } @@ -687,14 +709,6 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { addPass(&VirtRegRewriterID); printAndVerify("After Virtual Register Rewriter"); - // FinalizeRegAlloc is convenient until MachineInstrBundles is more mature, - // but eventually, all users of it should probably be moved to addPostRA and - // it can go away. Currently, it's the intended place for targets to run - // FinalizeMachineBundles, because passes other than MachineScheduling an - // RegAlloc itself may not be aware of bundles. - if (addFinalizeRegAlloc()) - printAndVerify("After RegAlloc finalization"); - // Perform stack slot coloring and post-ra machine LICM. // // FIXME: Re-enable coloring with register when it's capable of adding diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp index e5872df..959dd7d 100644 --- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -824,6 +824,12 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { // The instruction stream may change in the loop, so check BB->end() // directly. for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { + // We might end up here again with a NULL iterator if we scavenged a + // register for which we inserted spill code for definition by what was + // originally the first instruction in BB. + if (I == MachineBasicBlock::iterator(NULL)) + I = BB->begin(); + MachineInstr *MI = I; MachineBasicBlock::iterator J = llvm::next(I); MachineBasicBlock::iterator P = I == BB->begin() ? @@ -883,8 +889,6 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { "The register scavenger has an unexpected position"); I = P; RS->unprocess(P); - - // RS->skipTo(I == BB->begin() ? NULL : llvm::prior(I)); } else ++I; } diff --git a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp index 0b6dc68..7fcfe9e 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp @@ -63,7 +63,7 @@ class RABasic : public MachineFunctionPass, public RegAllocBase MachineFunction *MF; // state - std::auto_ptr<Spiller> SpillerInstance; + OwningPtr<Spiller> SpillerInstance; std::priority_queue<LiveInterval*, std::vector<LiveInterval*>, CompSpillWeight> Queue; diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp index 6d84176..9eed1fc 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -78,7 +78,7 @@ class RAGreedy : public MachineFunctionPass, LiveDebugVariables *DebugVars; // state - std::auto_ptr<Spiller> SpillerInstance; + OwningPtr<Spiller> SpillerInstance; std::priority_queue<std::pair<unsigned, unsigned> > Queue; unsigned NextCascade; @@ -166,8 +166,8 @@ class RAGreedy : public MachineFunctionPass, }; // splitting state. - std::auto_ptr<SplitAnalysis> SA; - std::auto_ptr<SplitEditor> SE; + OwningPtr<SplitAnalysis> SA; + OwningPtr<SplitEditor> SE; /// Cached per-block interference maps InterferenceCache IntfCache; diff --git a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp index 607edac..15a88e2 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp @@ -34,6 +34,7 @@ #include "llvm/CodeGen/RegAllocPBQP.h" #include "RegisterCoalescer.h" #include "Spiller.h" +#include "llvm/ADT/OwningPtr.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -89,8 +90,8 @@ public: static char ID; /// Construct a PBQP register allocator. - RegAllocPBQP(std::auto_ptr<PBQPBuilder> b, char *cPassID=0) - : MachineFunctionPass(ID), builder(b), customPassID(cPassID) { + RegAllocPBQP(OwningPtr<PBQPBuilder> &b, char *cPassID=0) + : MachineFunctionPass(ID), builder(b.take()), customPassID(cPassID) { initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); @@ -121,7 +122,7 @@ private: typedef std::set<unsigned> RegSet; - std::auto_ptr<PBQPBuilder> builder; + OwningPtr<PBQPBuilder> builder; char *customPassID; @@ -132,7 +133,7 @@ private: const MachineLoopInfo *loopInfo; MachineRegisterInfo *mri; - std::auto_ptr<Spiller> spiller; + OwningPtr<Spiller> spiller; LiveIntervals *lis; LiveStacks *lss; VirtRegMap *vrm; @@ -186,16 +187,15 @@ unsigned PBQPRAProblem::getPRegForOption(unsigned vreg, unsigned option) const { return allowedSet[option - 1]; } -std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf, - const LiveIntervals *lis, - const MachineLoopInfo *loopInfo, - const RegSet &vregs) { +PBQPRAProblem *PBQPBuilder::build(MachineFunction *mf, const LiveIntervals *lis, + const MachineLoopInfo *loopInfo, + const RegSet &vregs) { LiveIntervals *LIS = const_cast<LiveIntervals*>(lis); MachineRegisterInfo *mri = &mf->getRegInfo(); const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo(); - std::auto_ptr<PBQPRAProblem> p(new PBQPRAProblem()); + OwningPtr<PBQPRAProblem> p(new PBQPRAProblem()); PBQP::Graph &g = p->getGraph(); RegSet pregs; @@ -282,7 +282,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf, } } - return p; + return p.take(); } void PBQPBuilder::addSpillCosts(PBQP::Vector &costVec, @@ -311,13 +311,12 @@ void PBQPBuilder::addInterferenceCosts( } } -std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build( - MachineFunction *mf, +PBQPRAProblem *PBQPBuilderWithCoalescing::build(MachineFunction *mf, const LiveIntervals *lis, const MachineLoopInfo *loopInfo, const RegSet &vregs) { - std::auto_ptr<PBQPRAProblem> p = PBQPBuilder::build(mf, lis, loopInfo, vregs); + OwningPtr<PBQPRAProblem> p(PBQPBuilder::build(mf, lis, loopInfo, vregs)); PBQP::Graph &g = p->getGraph(); const TargetMachine &tm = mf->getTarget(); @@ -391,7 +390,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build( } } - return p; + return p.take(); } void PBQPBuilderWithCoalescing::addPhysRegCoalesce(PBQP::Vector &costVec, @@ -584,8 +583,8 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { while (!pbqpAllocComplete) { DEBUG(dbgs() << " PBQP Regalloc round " << round << ":\n"); - std::auto_ptr<PBQPRAProblem> problem = - builder->build(mf, lis, loopInfo, vregsToAlloc); + OwningPtr<PBQPRAProblem> problem( + builder->build(mf, lis, loopInfo, vregsToAlloc)); #ifndef NDEBUG if (pbqpDumpGraphs) { @@ -621,18 +620,18 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { } FunctionPass* llvm::createPBQPRegisterAllocator( - std::auto_ptr<PBQPBuilder> builder, + OwningPtr<PBQPBuilder> &builder, char *customPassID) { return new RegAllocPBQP(builder, customPassID); } FunctionPass* llvm::createDefaultPBQPRegisterAllocator() { - if (pbqpCoalescing) { - return createPBQPRegisterAllocator( - std::auto_ptr<PBQPBuilder>(new PBQPBuilderWithCoalescing())); - } // else - return createPBQPRegisterAllocator( - std::auto_ptr<PBQPBuilder>(new PBQPBuilder())); + OwningPtr<PBQPBuilder> Builder; + if (pbqpCoalescing) + Builder.reset(new PBQPBuilderWithCoalescing()); + else + Builder.reset(new PBQPBuilder()); + return createPBQPRegisterAllocator(Builder); } #undef DEBUG_TYPE diff --git a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp index 07ace7a..f82ccbe 100644 --- a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp @@ -154,14 +154,13 @@ void RegScavenger::unprocess() { assert(Tracking && "Cannot unprocess because we're not tracking"); MachineInstr *MI = MBBI; - if (MI->isDebugValue()) - return; - - determineKillsAndDefs(); + if (!MI->isDebugValue()) { + determineKillsAndDefs(); - // Commit the changes. - setUsed(KillRegs); - setUnused(DefRegs); + // Commit the changes. + setUsed(KillRegs); + setUnused(DefRegs); + } if (MBBI == MBB->begin()) { MBBI = MachineBasicBlock::iterator(NULL); diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index 71e7a21..e4da6a4 100644 --- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -262,6 +262,9 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { if (UseOp < 0) Dep = SDep(SU, SDep::Artificial); else { + // Set the hasPhysRegDefs only for physreg defs that have a use within + // the scheduling region. + SU->hasPhysRegDefs = true; Dep = SDep(SU, SDep::Data, *Alias); RegUse = UseSU->getInstr(); Dep.setMinLatency( @@ -318,6 +321,7 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { } if (!MO.isDef()) { + SU->hasPhysRegUses = true; // Either insert a new Reg2SUnits entry with an empty SUnits list, or // retrieve the existing SUnits list for this register's uses. // Push this SUnit on the use list. diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index eb16095..2e09ec0 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -205,6 +205,7 @@ namespace { SDValue visitCTTZ_ZERO_UNDEF(SDNode *N); SDValue visitCTPOP(SDNode *N); SDValue visitSELECT(SDNode *N); + SDValue visitVSELECT(SDNode *N); SDValue visitSELECT_CC(SDNode *N); SDValue visitSETCC(SDNode *N); SDValue visitSIGN_EXTEND(SDNode *N); @@ -243,7 +244,6 @@ namespace { SDValue visitCONCAT_VECTORS(SDNode *N); SDValue visitEXTRACT_SUBVECTOR(SDNode *N); SDValue visitVECTOR_SHUFFLE(SDNode *N); - SDValue visitMEMBARRIER(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS); @@ -1127,6 +1127,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N); case ISD::CTPOP: return visitCTPOP(N); case ISD::SELECT: return visitSELECT(N); + case ISD::VSELECT: return visitVSELECT(N); case ISD::SELECT_CC: return visitSELECT_CC(N); case ISD::SETCC: return visitSETCC(N); case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); @@ -1165,7 +1166,6 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); - case ISD::MEMBARRIER: return visitMEMBARRIER(N); } return SDValue(); } @@ -4164,6 +4164,46 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitVSELECT(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + DebugLoc DL = N->getDebugLoc(); + + // Canonicalize integer abs. + // vselect (setg[te] X, 0), X, -X -> + // vselect (setgt X, -1), X, -X -> + // vselect (setl[te] X, 0), -X, X -> + // Y = sra (X, size(X)-1); xor (add (X, Y), Y) + if (N0.getOpcode() == ISD::SETCC) { + SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); + ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); + bool isAbs = false; + bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode()); + + if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) || + (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) && + N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1)) + isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode()); + else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) && + N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1)) + isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); + + if (isAbs) { + EVT VT = LHS.getValueType(); + SDValue Shift = DAG.getNode( + ISD::SRA, DL, VT, LHS, + DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, VT)); + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift); + AddToWorkList(Shift.getNode()); + AddToWorkList(Add.getNode()); + return DAG.getNode(ISD::XOR, DL, VT, Add, Shift); + } + } + + return SDValue(); +} + SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -4453,7 +4493,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if (N0.getOpcode() == ISD::SETCC) { // sext(setcc) -> sext_in_reg(vsetcc) for vectors. // Only do this before legalize for now. - if (VT.isVector() && !LegalOperations) { + if (VT.isVector() && !LegalOperations && + TLI.getBooleanContents(true) == + TargetLowering::ZeroOrNegativeOneBooleanContent) { EVT N0VT = N0.getOperand(0).getValueType(); // On some architectures (such as SSE/NEON/etc) the SETCC result type is // of the same size as the compared operands. Only optimize sext(setcc()) @@ -7110,25 +7152,40 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() == BasePtr.getNode() && "Expected BasePtr operand"); - APInt OV = - cast<ConstantSDNode>(Offset)->getAPIntValue(); - if (AM == ISD::PRE_DEC) - OV = -OV; + // We need to replace ptr0 in the following expression: + // x0 * offset0 + y0 * ptr0 = t0 + // knowing that + // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store) + // + // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the + // indexed load/store and the expresion that needs to be re-written. + // + // Therefore, we have: + // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1 ConstantSDNode *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx)); - APInt CNV = CN->getAPIntValue(); - if (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) - CNV += OV; - else - CNV -= OV; + int X0, X1, Y0, Y1; + APInt Offset0 = CN->getAPIntValue(); + APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue(); - SDValue NewOp1 = Result.getValue(isLoad ? 1 : 0); - SDValue NewOp2 = DAG.getConstant(CNV, CN->getValueType(0)); - if (OffsetIdx == 0) - std::swap(NewOp1, NewOp2); + X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1; + Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1; + X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1; + Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1; - SDValue NewUse = DAG.getNode(OtherUses[i]->getOpcode(), + unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD; + + APInt CNV = Offset0; + if (X0 < 0) CNV = -CNV; + if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1; + else CNV = CNV - Offset1; + + // We can now generate the new expression. + SDValue NewOp1 = DAG.getConstant(CNV, CN->getValueType(0)); + SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0); + + SDValue NewUse = DAG.getNode(Opcode, OtherUses[i]->getDebugLoc(), OtherUses[i]->getValueType(0), NewOp1, NewOp2); DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse); @@ -9065,6 +9122,51 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { if (ISD::allOperandsUndef(N)) return DAG.getUNDEF(N->getValueType(0)); + // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR + // nodes often generate nop CONCAT_VECTOR nodes. + // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that + // place the incoming vectors at the exact same location. + SDValue SingleSource = SDValue(); + unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements(); + + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDValue Op = N->getOperand(i); + + if (Op.getOpcode() == ISD::UNDEF) + continue; + + // Check if this is the identity extract: + if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR) + return SDValue(); + + // Find the single incoming vector for the extract_subvector. + if (SingleSource.getNode()) { + if (Op.getOperand(0) != SingleSource) + return SDValue(); + } else { + SingleSource = Op.getOperand(0); + + // Check the source type is the same as the type of the result. + // If not, this concat may extend the vector, so we can not + // optimize it away. + if (SingleSource.getValueType() != N->getValueType(0)) + return SDValue(); + } + + unsigned IdentityIndex = i * PartNumElem; + ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + // The extract index must be constant. + if (!CS) + return SDValue(); + + // Check that we are reading from the identity index. + if (CS->getZExtValue() != IdentityIndex) + return SDValue(); + } + + if (SingleSource.getNode()) + return SingleSource; + return SDValue(); } @@ -9125,6 +9227,44 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { return SDValue(); } +// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat. +static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); + + SmallVector<SDValue, 4> Ops; + EVT ConcatVT = N0.getOperand(0).getValueType(); + unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements(); + unsigned NumConcats = NumElts / NumElemsPerConcat; + + // Look at every vector that's inserted. We're looking for exact + // subvector-sized copies from a concatenated vector + for (unsigned I = 0; I != NumConcats; ++I) { + // Make sure we're dealing with a copy. + unsigned Begin = I * NumElemsPerConcat; + if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0) + return SDValue(); + + for (unsigned J = 1; J != NumElemsPerConcat; ++J) { + if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J)) + return SDValue(); + } + + unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat; + if (FirstElt < N0.getNumOperands()) + Ops.push_back(N0.getOperand(FirstElt)); + else + Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands())); + } + + return DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, Ops.data(), + Ops.size()); +} + SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { EVT VT = N->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); @@ -9226,6 +9366,17 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } } + if (N0.getOpcode() == ISD::CONCAT_VECTORS && + Level < AfterLegalizeVectorOps && + (N1.getOpcode() == ISD::UNDEF || + (N1.getOpcode() == ISD::CONCAT_VECTORS && + N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) { + SDValue V = partitionShuffleOfConcats(N, DAG); + + if (V.getNode()) + return V; + } + // If this shuffle node is simply a swizzle of another shuffle node, // and it reverses the swizzle of the previous shuffle then we can // optimize shuffle(shuffle(x, undef), undef) -> x. @@ -9262,59 +9413,6 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return SDValue(); } -SDValue DAGCombiner::visitMEMBARRIER(SDNode* N) { - if (!TLI.getShouldFoldAtomicFences()) - return SDValue(); - - SDValue atomic = N->getOperand(0); - switch (atomic.getOpcode()) { - case ISD::ATOMIC_CMP_SWAP: - case ISD::ATOMIC_SWAP: - case ISD::ATOMIC_LOAD_ADD: - case ISD::ATOMIC_LOAD_SUB: - case ISD::ATOMIC_LOAD_AND: - case ISD::ATOMIC_LOAD_OR: - case ISD::ATOMIC_LOAD_XOR: - case ISD::ATOMIC_LOAD_NAND: - case ISD::ATOMIC_LOAD_MIN: - case ISD::ATOMIC_LOAD_MAX: - case ISD::ATOMIC_LOAD_UMIN: - case ISD::ATOMIC_LOAD_UMAX: - break; - default: - return SDValue(); - } - - SDValue fence = atomic.getOperand(0); - if (fence.getOpcode() != ISD::MEMBARRIER) - return SDValue(); - - switch (atomic.getOpcode()) { - case ISD::ATOMIC_CMP_SWAP: - return SDValue(DAG.UpdateNodeOperands(atomic.getNode(), - fence.getOperand(0), - atomic.getOperand(1), atomic.getOperand(2), - atomic.getOperand(3)), atomic.getResNo()); - case ISD::ATOMIC_SWAP: - case ISD::ATOMIC_LOAD_ADD: - case ISD::ATOMIC_LOAD_SUB: - case ISD::ATOMIC_LOAD_AND: - case ISD::ATOMIC_LOAD_OR: - case ISD::ATOMIC_LOAD_XOR: - case ISD::ATOMIC_LOAD_NAND: - case ISD::ATOMIC_LOAD_MIN: - case ISD::ATOMIC_LOAD_MAX: - case ISD::ATOMIC_LOAD_UMIN: - case ISD::ATOMIC_LOAD_UMAX: - return SDValue(DAG.UpdateNodeOperands(atomic.getNode(), - fence.getOperand(0), - atomic.getOperand(1), atomic.getOperand(2)), - atomic.getResNo()); - default: - return SDValue(); - } -} - /// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform /// an AND to a vector_shuffle with the destination vector and a zero vector. /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 9ac738e..288499a 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1505,3 +1505,61 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { return true; } + +bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) { + assert(LI->hasOneUse() && + "tryToFoldLoad expected a LoadInst with a single use"); + // We know that the load has a single use, but don't know what it is. If it + // isn't one of the folded instructions, then we can't succeed here. Handle + // this by scanning the single-use users of the load until we get to FoldInst. + unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs. + + const Instruction *TheUser = LI->use_back(); + while (TheUser != FoldInst && // Scan up until we find FoldInst. + // Stay in the right block. + TheUser->getParent() == FoldInst->getParent() && + --MaxUsers) { // Don't scan too far. + // If there are multiple or no uses of this instruction, then bail out. + if (!TheUser->hasOneUse()) + return false; + + TheUser = TheUser->use_back(); + } + + // If we didn't find the fold instruction, then we failed to collapse the + // sequence. + if (TheUser != FoldInst) + return false; + + // Don't try to fold volatile loads. Target has to deal with alignment + // constraints. + if (LI->isVolatile()) + return false; + + // Figure out which vreg this is going into. If there is no assigned vreg yet + // then there actually was no reference to it. Perhaps the load is referenced + // by a dead instruction. + unsigned LoadReg = getRegForValue(LI); + if (LoadReg == 0) + return false; + + // We can't fold if this vreg has no uses or more than one use. Multiple uses + // may mean that the instruction got lowered to multiple MIs, or the use of + // the loaded value ended up being multiple operands of the result. + if (!MRI.hasOneUse(LoadReg)) + return false; + + MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LoadReg); + MachineInstr *User = &*RI; + + // Set the insertion point properly. Folding the load can cause generation of + // other random instructions (like sign extends) for addressing modes; make + // sure they get inserted in a logical place before the new instruction. + FuncInfo.InsertPt = User; + FuncInfo.MBB = User->getParent(); + + // Ask the target to try folding the load. + return tryToFoldLoadIntoMI(User, RI.getOperandNo(), LI); +} + + diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 51cc254..2a1d8c2 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2759,8 +2759,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(DAG.getConstant(0, MVT::i32)); Results.push_back(Node->getOperand(0)); break; - case ISD::ATOMIC_FENCE: - case ISD::MEMBARRIER: { + case ISD::ATOMIC_FENCE: { // If the target didn't lower this, lower it to '__sync_synchronize()' call // FIXME: handle "fence singlethread" more efficiently. TargetLowering::ArgListTy Args; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index d19c13b..cd2f060 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -777,7 +777,6 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { Res = PromoteIntOp_CONVERT_RNDSAT(N); break; case ISD::INSERT_VECTOR_ELT: Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break; - case ISD::MEMBARRIER: Res = PromoteIntOp_MEMBARRIER(N); break; case ISD::SCALAR_TO_VECTOR: Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break; case ISD::VSELECT: @@ -961,17 +960,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, N->getOperand(1), Idx), 0); } -SDValue DAGTypeLegalizer::PromoteIntOp_MEMBARRIER(SDNode *N) { - SDValue NewOps[6]; - DebugLoc dl = N->getDebugLoc(); - NewOps[0] = N->getOperand(0); - for (unsigned i = 1; i < array_lengthof(NewOps); ++i) { - SDValue Flag = GetPromotedInteger(N->getOperand(i)); - NewOps[i] = DAG.getZeroExtendInReg(Flag, dl, MVT::i1); - } - return SDValue(DAG.UpdateNodeOperands(N, NewOps, array_lengthof(NewOps)), 0); -} - SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) { // Integer SCALAR_TO_VECTOR operands are implicitly truncated, so just promote // the operand in place. diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 54ea926..1c4274a 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -270,7 +270,6 @@ private: SDValue PromoteIntOp_EXTRACT_ELEMENT(SDNode *N); SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N); - SDValue PromoteIntOp_MEMBARRIER(SDNode *N); SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N); SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo); @@ -582,6 +581,7 @@ private: SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo); SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N); + SDValue SplitVecOp_TRUNCATE(SDNode *N); SDValue SplitVecOp_VSETCC(SDNode *N); SDValue SplitVecOp_FP_ROUND(SDNode *N); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 5ec8535..04c6bfd 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1046,6 +1046,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break; case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break; + case ISD::TRUNCATE: Res = SplitVecOp_TRUNCATE(N); break; case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break; case ISD::STORE: Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo); @@ -1062,7 +1063,6 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::FTRUNC: - case ISD::TRUNCATE: case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: @@ -1272,8 +1272,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { DebugLoc DL = N->getDebugLoc(); - // The input operands all must have the same type, and we know the result the - // result type is valid. Convert this to a buildvector which extracts all the + // The input operands all must have the same type, and we know the result + // type is valid. Convert this to a buildvector which extracts all the // input elements. // TODO: If the input elements are power-two vectors, we could convert this to // a new CONCAT_VECTORS node with elements that are half-wide. @@ -1293,6 +1293,66 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { &Elts[0], Elts.size()); } +SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) { + // The result type is legal, but the input type is illegal. If splitting + // ends up with the result type of each half still being legal, just + // do that. If, however, that would result in an illegal result type, + // we can try to get more clever with power-two vectors. Specifically, + // split the input type, but also widen the result element size, then + // concatenate the halves and truncate again. For example, consider a target + // where v8i8 is legal and v8i32 is not (ARM, which doesn't have 256-bit + // vectors). To perform a "%res = v8i8 trunc v8i32 %in" we do: + // %inlo = v4i32 extract_subvector %in, 0 + // %inhi = v4i32 extract_subvector %in, 4 + // %lo16 = v4i16 trunc v4i32 %inlo + // %hi16 = v4i16 trunc v4i32 %inhi + // %in16 = v8i16 concat_vectors v4i16 %lo16, v4i16 %hi16 + // %res = v8i8 trunc v8i16 %in16 + // + // Without this transform, the original truncate would end up being + // scalarized, which is pretty much always a last resort. + SDValue InVec = N->getOperand(0); + EVT InVT = InVec->getValueType(0); + EVT OutVT = N->getValueType(0); + unsigned NumElements = OutVT.getVectorNumElements(); + // Widening should have already made sure this is a power-two vector + // if we're trying to split it at all. assert() that's true, just in case. + assert(!(NumElements & 1) && "Splitting vector, but not in half!"); + + unsigned InElementSize = InVT.getVectorElementType().getSizeInBits(); + unsigned OutElementSize = OutVT.getVectorElementType().getSizeInBits(); + + // If the input elements are only 1/2 the width of the result elements, + // just use the normal splitting. Our trick only work if there's room + // to split more than once. + if (InElementSize <= OutElementSize * 2) + return SplitVecOp_UnaryOp(N); + DebugLoc DL = N->getDebugLoc(); + + // Extract the halves of the input via extract_subvector. + EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), + InVT.getVectorElementType(), NumElements/2); + SDValue InLoVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, InVec, + DAG.getIntPtrConstant(0)); + SDValue InHiVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, InVec, + DAG.getIntPtrConstant(NumElements/2)); + // Truncate them to 1/2 the element size. + EVT HalfElementVT = EVT::getIntegerVT(*DAG.getContext(), InElementSize/2); + EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, + NumElements/2); + SDValue HalfLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InLoVec); + SDValue HalfHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InHiVec); + // Concatenate them to get the full intermediate truncation result. + EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements); + SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo, + HalfHi); + // Now finish up by truncating all the way down to the original result + // type. This should normally be something that ends up being legal directly, + // but in theory if a target has very wide vectors and an annoyingly + // restricted set of legal types, this split can chain to build things up. + return DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec); +} + SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) { assert(N->getValueType(0).isVector() && N->getOperand(0).getValueType().isVector() && diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 6424431..15235c8 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2785,7 +2785,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT, } // Handle the scalar case first. - if (Outputs.size() == 1) + if (Scalar1 && Scalar2) return Outputs.back(); // Otherwise build a big vector out of the scalar elements we generated. @@ -5252,14 +5252,14 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, MachineSDNode * SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT) { SDVTList VTs = getVTList(VT); - return getMachineNode(Opcode, dl, VTs, 0, 0); + return getMachineNode(Opcode, dl, VTs, None); } MachineSDNode * SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, SDValue Op1) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1 }; - return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * @@ -5267,7 +5267,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1, Op2 }; - return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * @@ -5275,20 +5275,20 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, SDValue Op1, SDValue Op2, SDValue Op3) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1, Op2, Op3 }; - return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, - const SDValue *Ops, unsigned NumOps) { + ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(VT); - return getMachineNode(Opcode, dl, VTs, Ops, NumOps); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2) { SDVTList VTs = getVTList(VT1, VT2); - return getMachineNode(Opcode, dl, VTs, 0, 0); + return getMachineNode(Opcode, dl, VTs, None); } MachineSDNode * @@ -5296,7 +5296,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2, SDValue Op1) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1 }; - return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * @@ -5304,7 +5304,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2, SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1, Op2 }; - return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * @@ -5313,15 +5313,15 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, SDValue Op2, SDValue Op3) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1, Op2, Op3 }; - return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2, - const SDValue *Ops, unsigned NumOps) { + ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(VT1, VT2); - return getMachineNode(Opcode, dl, VTs, Ops, NumOps); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * @@ -5330,7 +5330,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT1, VT2, VT3); SDValue Ops[] = { Op1, Op2 }; - return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * @@ -5339,39 +5339,41 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, SDValue Op1, SDValue Op2, SDValue Op3) { SDVTList VTs = getVTList(VT1, VT2, VT3); SDValue Ops[] = { Op1, Op2, Op3 }; - return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2, EVT VT3, - const SDValue *Ops, unsigned NumOps) { + ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(VT1, VT2, VT3); - return getMachineNode(Opcode, dl, VTs, Ops, NumOps); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2, EVT VT3, EVT VT4, - const SDValue *Ops, unsigned NumOps) { + ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(VT1, VT2, VT3, VT4); - return getMachineNode(Opcode, dl, VTs, Ops, NumOps); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, ArrayRef<EVT> ResultTys, - const SDValue *Ops, unsigned NumOps) { + ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(&ResultTys[0], ResultTys.size()); - return getMachineNode(Opcode, dl, VTs, Ops, NumOps); + return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs, - const SDValue *Ops, unsigned NumOps) { + ArrayRef<SDValue> OpsArray) { bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue; MachineSDNode *N; void *IP = 0; + const SDValue *Ops = OpsArray.data(); + unsigned NumOps = OpsArray.size(); if (DoCSE) { FoldingSetNodeID ID; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index ce40cd6..67db211 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -314,7 +314,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, } else { Ctx.emitError(ErrMsg); } - report_fatal_error("Cannot handle scalar-to-vector conversion!"); + return DAG.getUNDEF(ValueVT); } if (ValueVT.getVectorNumElements() == 1 && @@ -5034,6 +5034,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, Res); return 0; } + case Intrinsic::annotation: + case Intrinsic::ptr_annotation: + // Drop the intrinsic, but forward the value + setValue(&I, getValue(I.getOperand(0))); + return 0; case Intrinsic::var_annotation: // Discard annotate attributes return 0; @@ -5232,6 +5237,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, Entry.isSRet = true; Entry.isNest = false; Entry.isByVal = false; + Entry.isReturned = false; Entry.Alignment = Align; Args.push_back(Entry); RetTy = Type::getVoidTy(FTy->getContext()); @@ -5249,13 +5255,14 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, Entry.Node = ArgNode; Entry.Ty = V->getType(); unsigned attrInd = i - CS.arg_begin() + 1; - Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt); - Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt); - Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg); - Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet); - Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest); - Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal); - Entry.Alignment = CS.getParamAlignment(attrInd); + Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt); + Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt); + Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg); + Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet); + Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest); + Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal); + Entry.isReturned = CS.paramHasAttr(attrInd, Attribute::Returned); + Entry.Alignment = CS.getParamAlignment(attrInd); Args.push_back(Entry); } @@ -6169,10 +6176,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { MatchedRegs.RegVTs.push_back(RegVT); MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag); - i != e; ++i) - MatchedRegs.Regs.push_back - (RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT))); - + i != e; ++i) { + if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT)) + MatchedRegs.Regs.push_back(RegInfo.createVirtualRegister(RC)); + else { + LLVMContext &Ctx = *DAG.getContext(); + Ctx.emitError(CS.getInstruction(), "inline asm error: This value" + " type register class is not natively supported!"); + report_fatal_error("inline asm error: This value type register " + "class is not natively supported!"); + } + } // Use the produced MatchedRegs object to MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), Chain, &Flag, CS.getInstruction()); @@ -6389,6 +6403,28 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) { /// migrated to using LowerCall, this hook should be integrated into SDISel. std::pair<SDValue, SDValue> TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { + // Handle the incoming return values from the call. + CLI.Ins.clear(); + SmallVector<EVT, 4> RetTys; + ComputeValueVTs(*this, CLI.RetTy, RetTys); + for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + EVT VT = RetTys[I]; + MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); + unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); + for (unsigned i = 0; i != NumRegs; ++i) { + ISD::InputArg MyFlags; + MyFlags.VT = RegisterVT; + MyFlags.Used = CLI.IsReturnValueUsed; + if (CLI.RetSExt) + MyFlags.Flags.setSExt(); + if (CLI.RetZExt) + MyFlags.Flags.setZExt(); + if (CLI.IsInReg) + MyFlags.Flags.setInReg(); + CLI.Ins.push_back(MyFlags); + } + } + // Handle all of the outgoing arguments. CLI.Outs.clear(); CLI.OutVals.clear(); @@ -6442,6 +6478,26 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { else if (Args[i].isZExt) ExtendKind = ISD::ZERO_EXTEND; + // Conservatively only handle 'returned' on non-vectors for now + if (Args[i].isReturned && !Op.getValueType().isVector()) { + assert(CLI.RetTy == Args[i].Ty && RetTys.size() == NumValues && + "unexpected use of 'returned'"); + // Before passing 'returned' to the target lowering code, ensure that + // either the register MVT and the actual EVT are the same size or that + // the return value and argument are extended in the same way; in these + // cases it's safe to pass the argument register value unchanged as the + // return register value (although it's at the target's option whether + // to do so) + // TODO: allow code generation to take advantage of partially preserved + // registers rather than clobbering the entire register when the + // parameter extension method is not compatible with the return + // extension method + if ((NumParts * PartVT.getSizeInBits() == VT.getSizeInBits()) || + (ExtendKind != ISD::ANY_EXTEND && + CLI.RetSExt == Args[i].isSExt && CLI.RetZExt == Args[i].isZExt)) + Flags.setReturned(); + } + getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, CLI.CS ? CLI.CS->getInstruction() : 0, ExtendKind); @@ -6461,28 +6517,6 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { } } - // Handle the incoming return values from the call. - CLI.Ins.clear(); - SmallVector<EVT, 4> RetTys; - ComputeValueVTs(*this, CLI.RetTy, RetTys); - for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { - EVT VT = RetTys[I]; - MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); - unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); - for (unsigned i = 0; i != NumRegs; ++i) { - ISD::InputArg MyFlags; - MyFlags.VT = RegisterVT; - MyFlags.Used = CLI.IsReturnValueUsed; - if (CLI.RetSExt) - MyFlags.Flags.setSExt(); - if (CLI.RetZExt) - MyFlags.Flags.setZExt(); - if (CLI.IsInReg) - MyFlags.Flags.setInReg(); - CLI.Ins.push_back(MyFlags); - } - } - SmallVector<SDValue, 4> InVals; CLI.Chain = LowerCall(CLI, InVals); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 3b5823b..47b0391 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -54,7 +54,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::DELETED_NODE: return "<<Deleted Node!>>"; #endif case ISD::PREFETCH: return "Prefetch"; - case ISD::MEMBARRIER: return "MemBarrier"; case ISD::ATOMIC_FENCE: return "AtomicFence"; case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap"; case ISD::ATOMIC_SWAP: return "AtomicSwap"; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index eeea9e4..e21f26e 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -63,12 +63,16 @@ STATISTIC(NumFastIselSuccess, "Number of instructions fast isel selected"); STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel"); STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG"); STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path"); +STATISTIC(NumEntryBlocks, "Number of entry blocks encountered"); +STATISTIC(NumFastIselFailLowerArguments, + "Number of entry blocks where fast isel failed to lower arguments"); #ifndef NDEBUG static cl::opt<bool> EnableFastISelVerbose2("fast-isel-verbose2", cl::Hidden, cl::desc("Enable extra verbose messages in the \"fast\" " "instruction selector")); + // Terminators STATISTIC(NumFastIselFailRet,"Fast isel fails on Ret"); STATISTIC(NumFastIselFailBr,"Fast isel fails on Br"); @@ -742,7 +746,7 @@ public: } // end anonymous namespace void SelectionDAGISel::DoInstructionSelection() { - DEBUG(errs() << "===== Instruction selection begins: BB#" + DEBUG(dbgs() << "===== Instruction selection begins: BB#" << FuncInfo->MBB->getNumber() << " '" << FuncInfo->MBB->getName() << "'\n"); @@ -801,7 +805,7 @@ void SelectionDAGISel::DoInstructionSelection() { CurDAG->setRoot(Dummy.getValue()); } - DEBUG(errs() << "===== Instruction selection ends:\n"); + DEBUG(dbgs() << "===== Instruction selection ends:\n"); PostprocessISelDAG(); } @@ -831,84 +835,6 @@ void SelectionDAGISel::PrepareEHLandingPad() { if (Reg) MBB->addLiveIn(Reg); } -/// TryToFoldFastISelLoad - We're checking to see if we can fold the specified -/// load into the specified FoldInst. Note that we could have a sequence where -/// multiple LLVM IR instructions are folded into the same machineinstr. For -/// example we could have: -/// A: x = load i32 *P -/// B: y = icmp A, 42 -/// C: br y, ... -/// -/// In this scenario, LI is "A", and FoldInst is "C". We know about "B" (and -/// any other folded instructions) because it is between A and C. -/// -/// If we succeed in folding the load into the operation, return true. -/// -bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI, - const Instruction *FoldInst, - FastISel *FastIS) { - // We know that the load has a single use, but don't know what it is. If it - // isn't one of the folded instructions, then we can't succeed here. Handle - // this by scanning the single-use users of the load until we get to FoldInst. - unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs. - - const Instruction *TheUser = LI->use_back(); - while (TheUser != FoldInst && // Scan up until we find FoldInst. - // Stay in the right block. - TheUser->getParent() == FoldInst->getParent() && - --MaxUsers) { // Don't scan too far. - // If there are multiple or no uses of this instruction, then bail out. - if (!TheUser->hasOneUse()) - return false; - - TheUser = TheUser->use_back(); - } - - // If we didn't find the fold instruction, then we failed to collapse the - // sequence. - if (TheUser != FoldInst) - return false; - - // Don't try to fold volatile loads. Target has to deal with alignment - // constraints. - if (LI->isVolatile()) return false; - - // Figure out which vreg this is going into. If there is no assigned vreg yet - // then there actually was no reference to it. Perhaps the load is referenced - // by a dead instruction. - unsigned LoadReg = FastIS->getRegForValue(LI); - if (LoadReg == 0) - return false; - - // Check to see what the uses of this vreg are. If it has no uses, or more - // than one use (at the machine instr level) then we can't fold it. - MachineRegisterInfo::reg_iterator RI = RegInfo->reg_begin(LoadReg); - if (RI == RegInfo->reg_end()) - return false; - - // See if there is exactly one use of the vreg. If there are multiple uses, - // then the instruction got lowered to multiple machine instructions or the - // use of the loaded value ended up being multiple operands of the result, in - // either case, we can't fold this. - MachineRegisterInfo::reg_iterator PostRI = RI; ++PostRI; - if (PostRI != RegInfo->reg_end()) - return false; - - assert(RI.getOperand().isUse() && - "The only use of the vreg must be a use, we haven't emitted the def!"); - - MachineInstr *User = &*RI; - - // Set the insertion point properly. Folding the load can cause generation of - // other random instructions (like sign extends) for addressing modes, make - // sure they get inserted in a logical place before the new instruction. - FuncInfo->InsertPt = User; - FuncInfo->MBB = User->getParent(); - - // Ask the target to try folding the load. - return FastIS->TryToFoldLoad(User, RI.getOperandNo(), LI); -} - /// isFoldedOrDeadInstruction - Return true if the specified instruction is /// side-effect free and is either dead or folded into a generated instruction. /// Return false if it needs to be emitted. @@ -1054,9 +980,12 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Emit code for any incoming arguments. This must happen before // beginning FastISel on the entry block. if (LLVMBB == &Fn.getEntryBlock()) { + ++NumEntryBlocks; + // Lower any arguments needed in this block if this is the entry block. if (!FastIS->LowerArguments()) { // Fast isel failed to lower these arguments + ++NumFastIselFailLowerArguments; if (EnableFastISelAbortArgs) llvm_unreachable("FastISel didn't lower all arguments"); @@ -1106,7 +1035,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { } if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) && BeforeInst->hasOneUse() && - TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS)) { + FastIS->tryToFoldLoad(cast<LoadInst>(BeforeInst), Inst)) { // If we succeeded, don't re-select the load. BI = llvm::next(BasicBlock::const_iterator(BeforeInst)); --NumFastIselRemaining; @@ -1178,8 +1107,10 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FastIS->recomputeInsertPt(); } else { // Lower any arguments needed in this block if this is the entry block. - if (LLVMBB == &Fn.getEntryBlock()) + if (LLVMBB == &Fn.getEntryBlock()) { + ++NumEntryBlocks; LowerArguments(Fn); + } } if (Begin != BI) @@ -1771,7 +1702,7 @@ UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain, if (!NowDeadNodes.empty()) CurDAG->RemoveDeadNodes(NowDeadNodes); - DEBUG(errs() << "ISEL: Match complete!\n"); + DEBUG(dbgs() << "ISEL: Match complete!\n"); } enum ChainResult { @@ -2276,9 +2207,9 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, SmallVector<SDNode*, 3> ChainNodesMatched; SmallVector<SDNode*, 3> GlueResultNodesMatched; - DEBUG(errs() << "ISEL: Starting pattern match on root node: "; + DEBUG(dbgs() << "ISEL: Starting pattern match on root node: "; NodeToMatch->dump(CurDAG); - errs() << '\n'); + dbgs() << '\n'); // Determine where to start the interpreter. Normally we start at opcode #0, // but if the state machine starts with an OPC_SwitchOpcode, then we @@ -2290,7 +2221,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // Already computed the OpcodeOffset table, just index into it. if (N.getOpcode() < OpcodeOffset.size()) MatcherIndex = OpcodeOffset[N.getOpcode()]; - DEBUG(errs() << " Initial Opcode index to " << MatcherIndex << "\n"); + DEBUG(dbgs() << " Initial Opcode index to " << MatcherIndex << "\n"); } else if (MatcherTable[0] == OPC_SwitchOpcode) { // Otherwise, the table isn't computed, but the state machine does start @@ -2357,7 +2288,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (!Result) break; - DEBUG(errs() << " Skipped scope entry (due to false predicate) at " + DEBUG(dbgs() << " Skipped scope entry (due to false predicate) at " << "index " << MatcherIndexOfPredicate << ", continuing at " << FailIndex << "\n"); ++NumDAGIselRetries; @@ -2487,7 +2418,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (CaseSize == 0) break; // Otherwise, execute the case we found. - DEBUG(errs() << " OpcodeSwitch from " << SwitchStart + DEBUG(dbgs() << " OpcodeSwitch from " << SwitchStart << " to " << MatcherIndex << "\n"); continue; } @@ -2519,7 +2450,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (CaseSize == 0) break; // Otherwise, execute the case we found. - DEBUG(errs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString() + DEBUG(dbgs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString() << "] from " << SwitchStart << " to " << MatcherIndex<<'\n'); continue; } @@ -2787,7 +2718,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // If this is a normal EmitNode command, just create the new node and // add the results to the RecordedNodes list. Res = CurDAG->getMachineNode(TargetOpc, NodeToMatch->getDebugLoc(), - VTList, Ops.data(), Ops.size()); + VTList, Ops); // Add all the non-glue/non-chain results to the RecordedNodes list. for (unsigned i = 0, e = VTs.size(); i != e; ++i) { @@ -2863,9 +2794,9 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, ->setMemRefs(MemRefs, MemRefs + NumMemRefs); } - DEBUG(errs() << " " + DEBUG(dbgs() << " " << (Opcode == OPC_MorphNodeTo ? "Morphed" : "Created") - << " node: "; Res->dump(CurDAG); errs() << "\n"); + << " node: "; Res->dump(CurDAG); dbgs() << "\n"); // If this was a MorphNodeTo then we're completely done! if (Opcode == OPC_MorphNodeTo) { @@ -2940,7 +2871,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // If the code reached this point, then the match failed. See if there is // another child to try in the current 'Scope', otherwise pop it until we // find a case to check. - DEBUG(errs() << " Match failed at index " << CurrentOpcodeIndex << "\n"); + DEBUG(dbgs() << " Match failed at index " << CurrentOpcodeIndex << "\n"); ++NumDAGIselRetries; while (1) { if (MatchScopes.empty()) { @@ -2960,7 +2891,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, MatchedMemRefs.resize(LastScope.NumMatchedMemRefs); MatcherIndex = LastScope.FailIndex; - DEBUG(errs() << " Continuing at " << MatcherIndex << "\n"); + DEBUG(dbgs() << " Continuing at " << MatcherIndex << "\n"); InputChain = LastScope.InputChain; InputGlue = LastScope.InputGlue; diff --git a/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp b/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp index 9ab4918..2feea59 100644 --- a/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp +++ b/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp @@ -70,14 +70,14 @@ ShrinkWrapFunc("shrink-wrap-func", cl::Hidden, // Debugging level for shrink wrapping. enum ShrinkWrapDebugLevel { - None, BasicInfo, Iterations, Details + Disabled, BasicInfo, Iterations, Details }; static cl::opt<enum ShrinkWrapDebugLevel> ShrinkWrapDebugging("shrink-wrap-dbg", cl::Hidden, cl::desc("Print shrink wrapping debugging information"), cl::values( - clEnumVal(None , "disable debug output"), + clEnumVal(Disabled , "disable debug output"), clEnumVal(BasicInfo , "print basic DF sets"), clEnumVal(Iterations, "print SR sets for each iteration"), clEnumVal(Details , "print all DF sets"), diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp index f42bdbd..8074d16 100644 --- a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -620,12 +620,55 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) { TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, const TargetLoweringObjectFile *tlof) : TM(tm), TD(TM.getDataLayout()), TLOF(*tlof) { + initActions(); + + // Perform these initializations only once. + IsLittleEndian = TD->isLittleEndian(); + PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0)); + MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8; + MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize + = MaxStoresPerMemmoveOptSize = 4; + UseUnderscoreSetJmp = false; + UseUnderscoreLongJmp = false; + SelectIsExpensive = false; + IntDivIsCheap = false; + Pow2DivIsCheap = false; + JumpIsExpensive = false; + PredictableSelectIsExpensive = false; + StackPointerRegisterToSaveRestore = 0; + ExceptionPointerRegister = 0; + ExceptionSelectorRegister = 0; + BooleanContents = UndefinedBooleanContent; + BooleanVectorContents = UndefinedBooleanContent; + SchedPreferenceInfo = Sched::ILP; + JumpBufSize = 0; + JumpBufAlignment = 0; + MinFunctionAlignment = 0; + PrefFunctionAlignment = 0; + PrefLoopAlignment = 0; + MinStackArgumentAlignment = 1; + InsertFencesForAtomic = false; + SupportJumpTables = true; + MinimumJumpTableEntries = 4; + + InitLibcallNames(LibcallRoutineNames, TM); + InitCmpLibcallCCs(CmpLibcallCCs); + InitLibcallCallingConvs(LibcallCallingConvs); +} + +TargetLoweringBase::~TargetLoweringBase() { + delete &TLOF; +} + +void TargetLoweringBase::initActions() { // All operations default to being supported. memset(OpActions, 0, sizeof(OpActions)); memset(LoadExtActions, 0, sizeof(LoadExtActions)); memset(TruncStoreActions, 0, sizeof(TruncStoreActions)); memset(IndexedModeActions, 0, sizeof(IndexedModeActions)); memset(CondCodeActions, 0, sizeof(CondCodeActions)); + memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*)); + memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray)); // Set default actions for various operations. for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) { @@ -702,45 +745,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, // here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP. // setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand); - - IsLittleEndian = TD->isLittleEndian(); - PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0)); - memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*)); - memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray)); - MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8; - MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize - = MaxStoresPerMemmoveOptSize = 4; - UseUnderscoreSetJmp = false; - UseUnderscoreLongJmp = false; - SelectIsExpensive = false; - IntDivIsCheap = false; - Pow2DivIsCheap = false; - JumpIsExpensive = false; - PredictableSelectIsExpensive = false; - StackPointerRegisterToSaveRestore = 0; - ExceptionPointerRegister = 0; - ExceptionSelectorRegister = 0; - BooleanContents = UndefinedBooleanContent; - BooleanVectorContents = UndefinedBooleanContent; - SchedPreferenceInfo = Sched::ILP; - JumpBufSize = 0; - JumpBufAlignment = 0; - MinFunctionAlignment = 0; - PrefFunctionAlignment = 0; - PrefLoopAlignment = 0; - MinStackArgumentAlignment = 1; - ShouldFoldAtomicFences = false; - InsertFencesForAtomic = false; - SupportJumpTables = true; - MinimumJumpTableEntries = 4; - - InitLibcallNames(LibcallRoutineNames, TM); - InitCmpLibcallCCs(CmpLibcallCCs); - InitLibcallCallingConvs(LibcallCallingConvs); -} - -TargetLoweringBase::~TargetLoweringBase() { - delete &TLOF; } MVT TargetLoweringBase::getScalarShiftAmountTy(EVT LHSTy) const { diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 3bdca4c..7e7359a 100644 --- a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -523,11 +523,6 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, const MCSection *TargetLoweringObjectFileMachO:: SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang, const TargetMachine &TM) const { - - // Handle thread local data. - if (Kind.isThreadBSS()) return TLSBSSSection; - if (Kind.isThreadData()) return TLSDataSection; - if (Kind.isText()) return GV->isWeakForLinker() ? TextCoalSection : TextSection; @@ -580,6 +575,10 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, if (Kind.isBSSLocal()) return DataBSSSection; + // Handle thread local data. + if (Kind.isThreadBSS()) return TLSBSSSection; + if (Kind.isThreadData()) return TLSDataSection; + // Otherwise, just drop the variable in the normal data section. return DataSection; } @@ -782,3 +781,49 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, return getDataSection(); } +void TargetLoweringObjectFileCOFF:: +emitModuleFlags(MCStreamer &Streamer, + ArrayRef<Module::ModuleFlagEntry> ModuleFlags, + Mangler *Mang, const TargetMachine &TM) const { + MDNode *LinkerOptions = 0; + + // Look for the "Linker Options" flag, since it's the only one we support. + for (ArrayRef<Module::ModuleFlagEntry>::iterator + i = ModuleFlags.begin(), e = ModuleFlags.end(); i != e; ++i) { + const Module::ModuleFlagEntry &MFE = *i; + StringRef Key = MFE.Key->getString(); + Value *Val = MFE.Val; + if (Key == "Linker Options") { + LinkerOptions = cast<MDNode>(Val); + break; + } + } + if (!LinkerOptions) + return; + + // Emit the linker options to the linker .drectve section. According to the + // spec, this section is a space-separated string containing flags for linker. + const MCSection *Sec = getDrectveSection(); + Streamer.SwitchSection(Sec); + for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) { + MDNode *MDOptions = cast<MDNode>(LinkerOptions->getOperand(i)); + for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) { + MDString *MDOption = cast<MDString>(MDOptions->getOperand(ii)); + StringRef Op = MDOption->getString(); + // Lead with a space for consistency with our dllexport implementation. + std::string Escaped(" "); + if (Op.find(" ") != StringRef::npos) { + // The PE-COFF spec says args with spaces must be quoted. It doesn't say + // how to escape quotes, but it probably uses this algorithm: + // http://msdn.microsoft.com/en-us/library/17w5ykft(v=vs.85).aspx + // FIXME: Reuse escaping code from Support/Windows/Program.inc + Escaped.push_back('\"'); + Escaped.append(Op); + Escaped.push_back('\"'); + } else { + Escaped.append(Op); + } + Streamer.EmitBytes(Escaped); + } + } +} diff --git a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp index 0f59d01..435a5e7 100644 --- a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp +++ b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp @@ -50,3 +50,29 @@ StringRef TargetOptions::getTrapFunctionName() const { return TrapFuncName; } +bool TargetOptions::operator==(const TargetOptions &TO) { +#define ARE_EQUAL(X) X == TO.X + return + ARE_EQUAL(UnsafeFPMath) && + ARE_EQUAL(NoInfsFPMath) && + ARE_EQUAL(NoNaNsFPMath) && + ARE_EQUAL(HonorSignDependentRoundingFPMathOption) && + ARE_EQUAL(UseSoftFloat) && + ARE_EQUAL(NoZerosInBSS) && + ARE_EQUAL(JITExceptionHandling) && + ARE_EQUAL(JITEmitDebugInfo) && + ARE_EQUAL(JITEmitDebugInfoToDisk) && + ARE_EQUAL(GuaranteedTailCallOpt) && + ARE_EQUAL(DisableTailCalls) && + ARE_EQUAL(StackAlignmentOverride) && + ARE_EQUAL(RealignStack) && + ARE_EQUAL(SSPBufferSize) && + ARE_EQUAL(EnableFastISel) && + ARE_EQUAL(PositionIndependentExecutable) && + ARE_EQUAL(EnableSegmentedStacks) && + ARE_EQUAL(UseInitArray) && + ARE_EQUAL(TrapFuncName) && + ARE_EQUAL(FloatABIType) && + ARE_EQUAL(AllowFPOpFusion); +#undef ARE_EQUAL +} diff --git a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp index 783bfa1..1bf14db 100644 --- a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp +++ b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp @@ -128,6 +128,8 @@ resolveSchedClass(const MachineInstr *MI) const { // Get the definition's scheduling class descriptor from this machine model. unsigned SchedClass = MI->getDesc().getSchedClass(); const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass); + if (!SCDesc->isValid()) + return SCDesc; #ifndef NDEBUG unsigned NIter = 0; diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index e6dfe10..7ca2bee 100644 --- a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -43,6 +43,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetInstrInfo.h" @@ -58,6 +59,12 @@ STATISTIC(Num3AddrSunk, "Number of 3-address instructions sunk"); STATISTIC(NumReSchedUps, "Number of instructions re-scheduled up"); STATISTIC(NumReSchedDowns, "Number of instructions re-scheduled down"); +// Temporary flag to disable rescheduling. +static cl::opt<bool> +EnableRescheduling("twoaddr-reschedule", + cl::desc("Coalesce copies by rescheduling (default=true)"), + cl::init(true), cl::Hidden); + namespace { class TwoAddressInstructionPass : public MachineFunctionPass { MachineFunction *MF; @@ -426,10 +433,7 @@ static bool isKilled(MachineInstr &MI, unsigned Reg, /// isTwoAddrUse - Return true if the specified MI uses the specified register /// as a two-address use. If so, return the destination register by reference. static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) { - const MCInstrDesc &MCID = MI.getDesc(); - unsigned NumOps = MI.isInlineAsm() - ? MI.getNumOperands() : MCID.getNumOperands(); - for (unsigned i = 0; i != NumOps; ++i) { + for (unsigned i = 0, NumOps = MI.getNumOperands(); i != NumOps; ++i) { const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg) continue; @@ -1144,7 +1148,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, // If there is one more use of regB later in the same MBB, consider // re-schedule this MI below it. - if (rescheduleMIBelowKill(mi, nmi, regB)) { + if (EnableRescheduling && rescheduleMIBelowKill(mi, nmi, regB)) { ++NumReSchedDowns; return true; } @@ -1163,7 +1167,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, // If there is one more use of regB later in the same MBB, consider // re-schedule it before this MI if it's legal. - if (rescheduleKillAboveMI(mi, nmi, regB)) { + if (EnableRescheduling && rescheduleKillAboveMI(mi, nmi, regB)) { ++NumReSchedUps; return true; } diff --git a/contrib/llvm/lib/DebugInfo/DWARFCompileUnit.cpp b/contrib/llvm/lib/DebugInfo/DWARFCompileUnit.cpp index e3e4ccd..4f0eed4 100644 --- a/contrib/llvm/lib/DebugInfo/DWARFCompileUnit.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARFCompileUnit.cpp @@ -9,7 +9,7 @@ #include "DWARFCompileUnit.h" #include "DWARFContext.h" -#include "DWARFFormValue.h" +#include "llvm/DebugInfo/DWARFFormValue.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" @@ -165,7 +165,7 @@ size_t DWARFCompileUnit::extractDIEsIfNeeded(bool cu_die_only) { // we were told to parse const uint8_t *fixed_form_sizes = - DWARFFormValue::getFixedFormSizesForAddressSize(getAddressByteSize()); + DWARFFormValue::getFixedFormSizes(getAddressByteSize(), getVersion()); while (offset < next_cu_offset && die.extractFast(this, fixed_form_sizes, &offset)) { diff --git a/contrib/llvm/lib/DebugInfo/DWARFContext.cpp b/contrib/llvm/lib/DebugInfo/DWARFContext.cpp index 9e19310..9f52133 100644 --- a/contrib/llvm/lib/DebugInfo/DWARFContext.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARFContext.cpp @@ -9,6 +9,9 @@ #include "DWARFContext.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Compression.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/Format.h" #include "llvm/Support/Path.h" @@ -107,36 +110,43 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) { } if (DumpType == DIDT_All || DumpType == DIDT_AbbrevDwo) { - OS << "\n.debug_abbrev.dwo contents:\n"; - getDebugAbbrevDWO()->dump(OS); + const DWARFDebugAbbrev *D = getDebugAbbrevDWO(); + if (D) { + OS << "\n.debug_abbrev.dwo contents:\n"; + getDebugAbbrevDWO()->dump(OS); + } } - if (DumpType == DIDT_All || DumpType == DIDT_InfoDwo) { - OS << "\n.debug_info.dwo contents:\n"; - for (unsigned i = 0, e = getNumDWOCompileUnits(); i != e; ++i) - getDWOCompileUnitAtIndex(i)->dump(OS); - } + if (DumpType == DIDT_All || DumpType == DIDT_InfoDwo) + if (getNumDWOCompileUnits()) { + OS << "\n.debug_info.dwo contents:\n"; + for (unsigned i = 0, e = getNumDWOCompileUnits(); i != e; ++i) + getDWOCompileUnitAtIndex(i)->dump(OS); + } - if (DumpType == DIDT_All || DumpType == DIDT_StrDwo) { - OS << "\n.debug_str.dwo contents:\n"; - DataExtractor strDWOData(getStringDWOSection(), isLittleEndian(), 0); - offset = 0; - uint32_t strDWOOffset = 0; - while (const char *s = strDWOData.getCStr(&offset)) { - OS << format("0x%8.8x: \"%s\"\n", strDWOOffset, s); - strDWOOffset = offset; + if (DumpType == DIDT_All || DumpType == DIDT_StrDwo) + if (!getStringDWOSection().empty()) { + OS << "\n.debug_str.dwo contents:\n"; + DataExtractor strDWOData(getStringDWOSection(), isLittleEndian(), 0); + offset = 0; + uint32_t strDWOOffset = 0; + while (const char *s = strDWOData.getCStr(&offset)) { + OS << format("0x%8.8x: \"%s\"\n", strDWOOffset, s); + strDWOOffset = offset; + } } - } - if (DumpType == DIDT_All || DumpType == DIDT_StrOffsetsDwo) { - OS << "\n.debug_str_offsets.dwo contents:\n"; - DataExtractor strOffsetExt(getStringOffsetDWOSection(), isLittleEndian(), 0); - offset = 0; - while (offset < getStringOffsetDWOSection().size()) { - OS << format("0x%8.8x: ", offset); - OS << format("%8.8x\n", strOffsetExt.getU32(&offset)); + if (DumpType == DIDT_All || DumpType == DIDT_StrOffsetsDwo) + if (!getStringOffsetDWOSection().empty()) { + OS << "\n.debug_str_offsets.dwo contents:\n"; + DataExtractor strOffsetExt(getStringOffsetDWOSection(), isLittleEndian(), 0); + offset = 0; + uint64_t size = getStringOffsetDWOSection().size(); + while (offset < size) { + OS << format("0x%8.8x: ", offset); + OS << format("%8.8x\n", strOffsetExt.getU32(&offset)); + } } - } } const DWARFDebugAbbrev *DWARFContext::getDebugAbbrev() { @@ -482,6 +492,22 @@ DIInliningInfo DWARFContext::getInliningInfoForAddress(uint64_t Address, return InliningInfo; } +static bool consumeCompressedDebugSectionHeader(StringRef &data, + uint64_t &OriginalSize) { + // Consume "ZLIB" prefix. + if (!data.startswith("ZLIB")) + return false; + data = data.substr(4); + // Consume uncompressed section size (big-endian 8 bytes). + DataExtractor extractor(data, false, 8); + uint32_t Offset = 0; + OriginalSize = extractor.getU64(&Offset); + if (Offset == 0) + return false; + data = data.substr(Offset); + return true; +} + DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) : IsLittleEndian(Obj->isLittleEndian()), AddressSize(Obj->getBytesInAddress()) { @@ -495,49 +521,55 @@ DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) : i->getContents(data); name = name.substr(name.find_first_not_of("._")); // Skip . and _ prefixes. - if (name == "debug_info") - InfoSection = data; - else if (name == "debug_abbrev") - AbbrevSection = data; - else if (name == "debug_line") - LineSection = data; - else if (name == "debug_aranges") - ARangeSection = data; - else if (name == "debug_frame") - DebugFrameSection = data; - else if (name == "debug_str") - StringSection = data; - else if (name == "debug_ranges") { + + // Check if debug info section is compressed with zlib. + if (name.startswith("zdebug_")) { + uint64_t OriginalSize; + if (!zlib::isAvailable() || + !consumeCompressedDebugSectionHeader(data, OriginalSize)) + continue; + OwningPtr<MemoryBuffer> UncompressedSection; + if (zlib::uncompress(data, UncompressedSection, OriginalSize) != + zlib::StatusOK) + continue; + // Make data point to uncompressed section contents and save its contents. + name = name.substr(1); + data = UncompressedSection->getBuffer(); + UncompressedSections.push_back(UncompressedSection.take()); + } + + StringRef *Section = StringSwitch<StringRef*>(name) + .Case("debug_info", &InfoSection) + .Case("debug_abbrev", &AbbrevSection) + .Case("debug_line", &LineSection) + .Case("debug_aranges", &ARangeSection) + .Case("debug_frame", &DebugFrameSection) + .Case("debug_str", &StringSection) + .Case("debug_ranges", &RangeSection) + .Case("debug_pubnames", &PubNamesSection) + .Case("debug_info.dwo", &InfoDWOSection) + .Case("debug_abbrev.dwo", &AbbrevDWOSection) + .Case("debug_str.dwo", &StringDWOSection) + .Case("debug_str_offsets.dwo", &StringOffsetDWOSection) + .Case("debug_addr", &AddrSection) + // Any more debug info sections go here. + .Default(0); + if (!Section) + continue; + *Section = data; + if (name == "debug_ranges") { // FIXME: Use the other dwo range section when we emit it. RangeDWOSection = data; - RangeSection = data; } - else if (name == "debug_pubnames") - PubNamesSection = data; - else if (name == "debug_info.dwo") - InfoDWOSection = data; - else if (name == "debug_abbrev.dwo") - AbbrevDWOSection = data; - else if (name == "debug_str.dwo") - StringDWOSection = data; - else if (name == "debug_str_offsets.dwo") - StringOffsetDWOSection = data; - else if (name == "debug_addr") - AddrSection = data; - // Any more debug info sections go here. - else - continue; // TODO: Add support for relocations in other sections as needed. // Record relocations for the debug_info and debug_line sections. - RelocAddrMap *Map; - if (name == "debug_info") - Map = &InfoRelocMap; - else if (name == "debug_info.dwo") - Map = &InfoDWORelocMap; - else if (name == "debug_line") - Map = &LineRelocMap; - else + RelocAddrMap *Map = StringSwitch<RelocAddrMap*>(name) + .Case("debug_info", &InfoRelocMap) + .Case("debug_info.dwo", &InfoDWORelocMap) + .Case("debug_line", &LineRelocMap) + .Default(0); + if (!Map) continue; if (i->begin_relocations() != i->end_relocations()) { @@ -547,7 +579,7 @@ DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) : reloc_e = i->end_relocations(); reloc_i != reloc_e; reloc_i.increment(ec)) { uint64_t Address; - reloc_i->getAddress(Address); + reloc_i->getOffset(Address); uint64_t Type; reloc_i->getType(Type); uint64_t SymAddr = 0; @@ -593,4 +625,8 @@ DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) : } } +DWARFContextInMemory::~DWARFContextInMemory() { + DeleteContainerPointers(UncompressedSections); +} + void DWARFContextInMemory::anchor() { } diff --git a/contrib/llvm/lib/DebugInfo/DWARFContext.h b/contrib/llvm/lib/DebugInfo/DWARFContext.h index 37b2729..78c18e6 100644 --- a/contrib/llvm/lib/DebugInfo/DWARFContext.h +++ b/contrib/llvm/lib/DebugInfo/DWARFContext.h @@ -161,8 +161,11 @@ class DWARFContextInMemory : public DWARFContext { StringRef RangeDWOSection; StringRef AddrSection; + SmallVector<MemoryBuffer*, 4> UncompressedSections; + public: DWARFContextInMemory(object::ObjectFile *); + ~DWARFContextInMemory(); virtual bool isLittleEndian() const { return IsLittleEndian; } virtual uint8_t getAddressSize() const { return AddressSize; } virtual const RelocAddrMap &infoRelocMap() const { return InfoRelocMap; } diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.cpp b/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.cpp index 02b15d6..10be7b4 100644 --- a/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.cpp @@ -11,7 +11,7 @@ #include "DWARFCompileUnit.h" #include "DWARFContext.h" #include "DWARFDebugAbbrev.h" -#include "DWARFFormValue.h" +#include "llvm/DebugInfo/DWARFFormValue.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/Format.h" @@ -94,279 +94,87 @@ void DWARFDebugInfoEntryMinimal::dumpAttribute(raw_ostream &OS, OS << ")\n"; } -bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *cu, - const uint8_t *fixed_form_sizes, - uint32_t *offset_ptr) { - Offset = *offset_ptr; - - DataExtractor debug_info_data = cu->getDebugInfoExtractor(); - uint64_t abbrCode = debug_info_data.getULEB128(offset_ptr); - - assert(fixed_form_sizes); // For best performance this should be specified! - - if (abbrCode) { - uint32_t offset = *offset_ptr; - - AbbrevDecl = cu->getAbbreviations()->getAbbreviationDeclaration(abbrCode); - - // Skip all data in the .debug_info for the attributes - const uint32_t numAttributes = AbbrevDecl->getNumAttributes(); - uint32_t i; - uint16_t form; - for (i=0; i<numAttributes; ++i) { - - form = AbbrevDecl->getFormByIndex(i); - - // FIXME: Currently we're checking if this is less than the last - // entry in the fixed_form_sizes table, but this should be changed - // to use dynamic dispatch. - const uint8_t fixed_skip_size = (form < DW_FORM_ref_sig8) ? - fixed_form_sizes[form] : 0; - if (fixed_skip_size) - offset += fixed_skip_size; - else { - bool form_is_indirect = false; - do { - form_is_indirect = false; - uint32_t form_size = 0; - switch (form) { - // Blocks if inlined data that have a length field and the data bytes - // inlined in the .debug_info. - case DW_FORM_exprloc: - case DW_FORM_block: - form_size = debug_info_data.getULEB128(&offset); - break; - case DW_FORM_block1: - form_size = debug_info_data.getU8(&offset); - break; - case DW_FORM_block2: - form_size = debug_info_data.getU16(&offset); - break; - case DW_FORM_block4: - form_size = debug_info_data.getU32(&offset); - break; - - // Inlined NULL terminated C-strings - case DW_FORM_string: - debug_info_data.getCStr(&offset); - break; - - // Compile unit address sized values - case DW_FORM_addr: - case DW_FORM_ref_addr: - form_size = cu->getAddressByteSize(); - break; - - // 0 sized form. - case DW_FORM_flag_present: - form_size = 0; - break; - - // 1 byte values - case DW_FORM_data1: - case DW_FORM_flag: - case DW_FORM_ref1: - form_size = 1; - break; - - // 2 byte values - case DW_FORM_data2: - case DW_FORM_ref2: - form_size = 2; - break; - - // 4 byte values - case DW_FORM_strp: - case DW_FORM_data4: - case DW_FORM_ref4: - form_size = 4; - break; - - // 8 byte values - case DW_FORM_data8: - case DW_FORM_ref8: - case DW_FORM_ref_sig8: - form_size = 8; - break; - - // signed or unsigned LEB 128 values - case DW_FORM_sdata: - case DW_FORM_udata: - case DW_FORM_ref_udata: - case DW_FORM_GNU_str_index: - case DW_FORM_GNU_addr_index: - debug_info_data.getULEB128(&offset); - break; - - case DW_FORM_indirect: - form_is_indirect = true; - form = debug_info_data.getULEB128(&offset); - break; - - // FIXME: 64-bit for DWARF64 - case DW_FORM_sec_offset: - debug_info_data.getU32(offset_ptr); - break; - - default: - *offset_ptr = Offset; - return false; - } - offset += form_size; - } while (form_is_indirect); - } - } - *offset_ptr = offset; - return true; - } else { +bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *CU, + const uint8_t *FixedFormSizes, + uint32_t *OffsetPtr) { + Offset = *OffsetPtr; + DataExtractor DebugInfoData = CU->getDebugInfoExtractor(); + uint64_t AbbrCode = DebugInfoData.getULEB128(OffsetPtr); + if (0 == AbbrCode) { + // NULL debug tag entry. AbbrevDecl = NULL; - return true; // NULL debug tag entry + return true; + } + AbbrevDecl = CU->getAbbreviations()->getAbbreviationDeclaration(AbbrCode); + assert(AbbrevDecl); + assert(FixedFormSizes); // For best performance this should be specified! + + // Skip all data in the .debug_info for the attributes + for (uint32_t i = 0, n = AbbrevDecl->getNumAttributes(); i < n; ++i) { + uint16_t Form = AbbrevDecl->getFormByIndex(i); + + // FIXME: Currently we're checking if this is less than the last + // entry in the fixed_form_sizes table, but this should be changed + // to use dynamic dispatch. + uint8_t FixedFormSize = + (Form < DW_FORM_ref_sig8) ? FixedFormSizes[Form] : 0; + if (FixedFormSize) + *OffsetPtr += FixedFormSize; + else if (!DWARFFormValue::skipValue(Form, DebugInfoData, OffsetPtr, + CU)) { + // Restore the original offset. + *OffsetPtr = Offset; + return false; + } } + return true; } bool -DWARFDebugInfoEntryMinimal::extract(const DWARFCompileUnit *cu, - uint32_t *offset_ptr) { - DataExtractor debug_info_data = cu->getDebugInfoExtractor(); - const uint32_t cu_end_offset = cu->getNextCompileUnitOffset(); - const uint8_t cu_addr_size = cu->getAddressByteSize(); - uint32_t offset = *offset_ptr; - if ((offset < cu_end_offset) && debug_info_data.isValidOffset(offset)) { - Offset = offset; - - uint64_t abbrCode = debug_info_data.getULEB128(&offset); - - if (abbrCode) { - AbbrevDecl = cu->getAbbreviations()->getAbbreviationDeclaration(abbrCode); - - if (AbbrevDecl) { - uint16_t tag = AbbrevDecl->getTag(); - - bool isCompileUnitTag = tag == DW_TAG_compile_unit; - if(cu && isCompileUnitTag) - const_cast<DWARFCompileUnit*>(cu)->setBaseAddress(0); - - // Skip all data in the .debug_info for the attributes - const uint32_t numAttributes = AbbrevDecl->getNumAttributes(); - for (uint32_t i = 0; i != numAttributes; ++i) { - uint16_t attr = AbbrevDecl->getAttrByIndex(i); - uint16_t form = AbbrevDecl->getFormByIndex(i); - - if (isCompileUnitTag && - ((attr == DW_AT_entry_pc) || (attr == DW_AT_low_pc))) { - DWARFFormValue form_value(form); - if (form_value.extractValue(debug_info_data, &offset, cu)) { - if (attr == DW_AT_low_pc || attr == DW_AT_entry_pc) - const_cast<DWARFCompileUnit*>(cu) - ->setBaseAddress(form_value.getUnsigned()); - } - } else { - bool form_is_indirect = false; - do { - form_is_indirect = false; - register uint32_t form_size = 0; - switch (form) { - // Blocks if inlined data that have a length field and the data - // bytes // inlined in the .debug_info - case DW_FORM_exprloc: - case DW_FORM_block: - form_size = debug_info_data.getULEB128(&offset); - break; - case DW_FORM_block1: - form_size = debug_info_data.getU8(&offset); - break; - case DW_FORM_block2: - form_size = debug_info_data.getU16(&offset); - break; - case DW_FORM_block4: - form_size = debug_info_data.getU32(&offset); - break; - - // Inlined NULL terminated C-strings - case DW_FORM_string: - debug_info_data.getCStr(&offset); - break; - - // Compile unit address sized values - case DW_FORM_addr: - case DW_FORM_ref_addr: - form_size = cu_addr_size; - break; - - // 0 byte value - case DW_FORM_flag_present: - form_size = 0; - break; - - // 1 byte values - case DW_FORM_data1: - case DW_FORM_flag: - case DW_FORM_ref1: - form_size = 1; - break; - - // 2 byte values - case DW_FORM_data2: - case DW_FORM_ref2: - form_size = 2; - break; - - // 4 byte values - case DW_FORM_strp: - form_size = 4; - break; - - case DW_FORM_data4: - case DW_FORM_ref4: - form_size = 4; - break; - - // 8 byte values - case DW_FORM_data8: - case DW_FORM_ref8: - case DW_FORM_ref_sig8: - form_size = 8; - break; - - // signed or unsigned LEB 128 values - case DW_FORM_sdata: - case DW_FORM_udata: - case DW_FORM_ref_udata: - case DW_FORM_GNU_str_index: - case DW_FORM_GNU_addr_index: - debug_info_data.getULEB128(&offset); - break; - - case DW_FORM_indirect: - form = debug_info_data.getULEB128(&offset); - form_is_indirect = true; - break; - - // FIXME: 64-bit for DWARF64. - case DW_FORM_sec_offset: - debug_info_data.getU32(offset_ptr); - break; - - default: - *offset_ptr = offset; - return false; - } - - offset += form_size; - } while (form_is_indirect); - } - } - *offset_ptr = offset; - return true; +DWARFDebugInfoEntryMinimal::extract(const DWARFCompileUnit *CU, + uint32_t *OffsetPtr) { + DataExtractor DebugInfoData = CU->getDebugInfoExtractor(); + const uint32_t CUEndOffset = CU->getNextCompileUnitOffset(); + Offset = *OffsetPtr; + if ((Offset >= CUEndOffset) || !DebugInfoData.isValidOffset(Offset)) + return false; + uint64_t AbbrCode = DebugInfoData.getULEB128(OffsetPtr); + if (0 == AbbrCode) { + // NULL debug tag entry. + AbbrevDecl = NULL; + return true; + } + AbbrevDecl = CU->getAbbreviations()->getAbbreviationDeclaration(AbbrCode); + if (0 == AbbrevDecl) { + // Restore the original offset. + *OffsetPtr = Offset; + return false; + } + bool IsCompileUnitTag = (AbbrevDecl->getTag() == DW_TAG_compile_unit); + if (IsCompileUnitTag) + const_cast<DWARFCompileUnit*>(CU)->setBaseAddress(0); + + // Skip all data in the .debug_info for the attributes + for (uint32_t i = 0, n = AbbrevDecl->getNumAttributes(); i < n; ++i) { + uint16_t Attr = AbbrevDecl->getAttrByIndex(i); + uint16_t Form = AbbrevDecl->getFormByIndex(i); + + if (IsCompileUnitTag && + ((Attr == DW_AT_entry_pc) || (Attr == DW_AT_low_pc))) { + DWARFFormValue FormValue(Form); + if (FormValue.extractValue(DebugInfoData, OffsetPtr, CU)) { + if (Attr == DW_AT_low_pc || Attr == DW_AT_entry_pc) + const_cast<DWARFCompileUnit*>(CU) + ->setBaseAddress(FormValue.getUnsigned()); } - } else { - AbbrevDecl = NULL; - *offset_ptr = offset; - return true; // NULL debug tag entry + } else if (!DWARFFormValue::skipValue(Form, DebugInfoData, OffsetPtr, + CU)) { + // Restore the original offset. + *OffsetPtr = Offset; + return false; } } - - return false; + return true; } bool DWARFDebugInfoEntryMinimal::isSubprogramDIE() const { diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.h b/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.h index 9c1b2be..9003591 100644 --- a/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.h +++ b/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.h @@ -45,12 +45,17 @@ public: uint32_t *offset_ptr, uint16_t attr, uint16_t form, unsigned indent = 0) const; - bool extractFast(const DWARFCompileUnit *cu, const uint8_t *fixed_form_sizes, - uint32_t *offset_ptr); + /// Extracts a debug info entry, which is a child of a given compile unit, + /// starting at a given offset. If DIE can't be extracted, returns false and + /// doesn't change OffsetPtr. + bool extractFast(const DWARFCompileUnit *CU, const uint8_t *FixedFormSizes, + uint32_t *OffsetPtr); /// Extract a debug info entry for a given compile unit from the /// .debug_info and .debug_abbrev data starting at the given offset. - bool extract(const DWARFCompileUnit *cu, uint32_t *offset_ptr); + /// If compile unit can't be parsed, returns false and doesn't change + /// OffsetPtr. + bool extract(const DWARFCompileUnit *CU, uint32_t *OffsetPtr); uint32_t getTag() const { return AbbrevDecl ? AbbrevDecl->getTag() : 0; } bool isNULL() const { return AbbrevDecl == 0; } diff --git a/contrib/llvm/lib/DebugInfo/DWARFFormValue.cpp b/contrib/llvm/lib/DebugInfo/DWARFFormValue.cpp index 9f807aa..c5583f9 100644 --- a/contrib/llvm/lib/DebugInfo/DWARFFormValue.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARFFormValue.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// -#include "DWARFFormValue.h" +#include "llvm/DebugInfo/DWARFFormValue.h" #include "DWARFCompileUnit.h" #include "DWARFContext.h" #include "llvm/Support/Debug.h" @@ -18,39 +18,16 @@ using namespace llvm; using namespace dwarf; -static const uint8_t form_sizes_addr4[] = { - 0, // 0x00 unused - 4, // 0x01 DW_FORM_addr - 0, // 0x02 unused - 0, // 0x03 DW_FORM_block2 - 0, // 0x04 DW_FORM_block4 - 2, // 0x05 DW_FORM_data2 - 4, // 0x06 DW_FORM_data4 - 8, // 0x07 DW_FORM_data8 - 0, // 0x08 DW_FORM_string - 0, // 0x09 DW_FORM_block - 0, // 0x0a DW_FORM_block1 - 1, // 0x0b DW_FORM_data1 - 1, // 0x0c DW_FORM_flag - 0, // 0x0d DW_FORM_sdata - 4, // 0x0e DW_FORM_strp - 0, // 0x0f DW_FORM_udata - 4, // 0x10 DW_FORM_ref_addr - 1, // 0x11 DW_FORM_ref1 - 2, // 0x12 DW_FORM_ref2 - 4, // 0x13 DW_FORM_ref4 - 8, // 0x14 DW_FORM_ref8 - 0, // 0x15 DW_FORM_ref_udata - 0, // 0x16 DW_FORM_indirect - 4, // 0x17 DW_FORM_sec_offset - 0, // 0x18 DW_FORM_exprloc - 0, // 0x19 DW_FORM_flag_present - 8, // 0x20 DW_FORM_ref_sig8 +namespace { +template <uint8_t AddrSize, uint8_t RefAddrSize> struct FixedFormSizes { + static const uint8_t sizes[]; }; +} -static const uint8_t form_sizes_addr8[] = { +template <uint8_t AddrSize, uint8_t RefAddrSize> +const uint8_t FixedFormSizes<AddrSize, RefAddrSize>::sizes[] = { 0, // 0x00 unused - 8, // 0x01 DW_FORM_addr + AddrSize, // 0x01 DW_FORM_addr 0, // 0x02 unused 0, // 0x03 DW_FORM_block2 0, // 0x04 DW_FORM_block4 @@ -65,7 +42,7 @@ static const uint8_t form_sizes_addr8[] = { 0, // 0x0d DW_FORM_sdata 4, // 0x0e DW_FORM_strp 0, // 0x0f DW_FORM_udata - 8, // 0x10 DW_FORM_ref_addr + RefAddrSize, // 0x10 DW_FORM_ref_addr 1, // 0x11 DW_FORM_ref1 2, // 0x12 DW_FORM_ref2 4, // 0x13 DW_FORM_ref4 @@ -78,13 +55,23 @@ static const uint8_t form_sizes_addr8[] = { 8, // 0x20 DW_FORM_ref_sig8 }; +static uint8_t getRefAddrSize(uint8_t AddrSize, uint16_t Version) { + // FIXME: Support DWARF64. + return (Version == 2) ? AddrSize : 4; +} + const uint8_t * -DWARFFormValue::getFixedFormSizesForAddressSize(uint8_t addr_size) { - switch (addr_size) { - case 4: return form_sizes_addr4; - case 8: return form_sizes_addr8; - } - return NULL; +DWARFFormValue::getFixedFormSizes(uint8_t AddrSize, uint16_t Version) { + uint8_t RefAddrSize = getRefAddrSize(AddrSize, Version); + if (AddrSize == 4 && RefAddrSize == 4) + return FixedFormSizes<4, 4>::sizes; + if (AddrSize == 4 && RefAddrSize == 8) + return FixedFormSizes<4, 8>::sizes; + if (AddrSize == 8 && RefAddrSize == 4) + return FixedFormSizes<8, 4>::sizes; + if (AddrSize == 8 && RefAddrSize == 8) + return FixedFormSizes<8, 8>::sizes; + return 0; } bool @@ -100,14 +87,16 @@ DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr, switch (Form) { case DW_FORM_addr: case DW_FORM_ref_addr: { - RelocAddrMap::const_iterator AI - = cu->getRelocMap()->find(*offset_ptr); + uint16_t AddrSize = + (Form == DW_FORM_addr) + ? cu->getAddressByteSize() + : getRefAddrSize(cu->getAddressByteSize(), cu->getVersion()); + RelocAddrMap::const_iterator AI = cu->getRelocMap()->find(*offset_ptr); if (AI != cu->getRelocMap()->end()) { const std::pair<uint8_t, int64_t> &R = AI->second; - Value.uval = data.getUnsigned(offset_ptr, cu->getAddressByteSize()) + - R.second; + Value.uval = data.getUnsigned(offset_ptr, AddrSize) + R.second; } else - Value.uval = data.getUnsigned(offset_ptr, cu->getAddressByteSize()); + Value.uval = data.getUnsigned(offset_ptr, AddrSize); break; } case DW_FORM_exprloc: @@ -172,10 +161,17 @@ DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr, Form = data.getULEB128(offset_ptr); indirect = true; break; - case DW_FORM_sec_offset: + case DW_FORM_sec_offset: { // FIXME: This is 64-bit for DWARF64. - Value.uval = data.getU32(offset_ptr); + RelocAddrMap::const_iterator AI + = cu->getRelocMap()->find(*offset_ptr); + if (AI != cu->getRelocMap()->end()) { + const std::pair<uint8_t, int64_t> &R = AI->second; + Value.uval = data.getU32(offset_ptr) + R.second; + } else + Value.uval = data.getU32(offset_ptr); break; + } case DW_FORM_flag_present: Value.uval = 1; break; @@ -216,7 +212,6 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data, uint32_t *offset_ptr, const DWARFCompileUnit *cu) { bool indirect = false; do { - indirect = false; switch (form) { // Blocks if inlined data that have a length field and the data bytes // inlined in the .debug_info @@ -249,9 +244,11 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data, // Compile unit address sized values case DW_FORM_addr: - case DW_FORM_ref_addr: *offset_ptr += cu->getAddressByteSize(); return true; + case DW_FORM_ref_addr: + *offset_ptr += getRefAddrSize(cu->getAddressByteSize(), cu->getVersion()); + return true; // 0 byte values - implied from the form. case DW_FORM_flag_present: diff --git a/contrib/llvm/lib/DebugInfo/DWARFFormValue.h b/contrib/llvm/lib/DebugInfo/DWARFFormValue.h deleted file mode 100644 index b863001..0000000 --- a/contrib/llvm/lib/DebugInfo/DWARFFormValue.h +++ /dev/null @@ -1,82 +0,0 @@ -//===-- DWARFFormValue.h ----------------------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_DEBUGINFO_DWARFFORMVALUE_H -#define LLVM_DEBUGINFO_DWARFFORMVALUE_H - -#include "llvm/Support/DataExtractor.h" - -namespace llvm { - -class DWARFCompileUnit; -class raw_ostream; - -class DWARFFormValue { -public: - struct ValueType { - ValueType() : data(NULL) { - uval = 0; - } - - union { - uint64_t uval; - int64_t sval; - const char* cstr; - }; - const uint8_t* data; - }; - - enum { - eValueTypeInvalid = 0, - eValueTypeUnsigned, - eValueTypeSigned, - eValueTypeCStr, - eValueTypeBlock - }; - -private: - uint16_t Form; // Form for this value. - ValueType Value; // Contains all data for the form. - -public: - DWARFFormValue(uint16_t form = 0) : Form(form) {} - uint16_t getForm() const { return Form; } - const ValueType& value() const { return Value; } - void dump(raw_ostream &OS, const DWARFCompileUnit* cu) const; - bool extractValue(DataExtractor data, uint32_t *offset_ptr, - const DWARFCompileUnit *cu); - bool isInlinedCStr() const { - return Value.data != NULL && Value.data == (const uint8_t*)Value.cstr; - } - const uint8_t *BlockData() const; - uint64_t getReference(const DWARFCompileUnit* cu) const; - - /// Resolve any compile unit specific references so that we don't need - /// the compile unit at a later time in order to work with the form - /// value. - bool resolveCompileUnitReferences(const DWARFCompileUnit* cu); - uint64_t getUnsigned() const { return Value.uval; } - int64_t getSigned() const { return Value.sval; } - const char *getAsCString(const DataExtractor *debug_str_data_ptr) const; - const char *getIndirectCString(const DataExtractor *, - const DataExtractor *) const; - uint64_t getIndirectAddress(const DataExtractor *, - const DWARFCompileUnit *) const; - bool skipValue(DataExtractor debug_info_data, uint32_t *offset_ptr, - const DWARFCompileUnit *cu) const; - static bool skipValue(uint16_t form, DataExtractor debug_info_data, - uint32_t *offset_ptr, const DWARFCompileUnit *cu); - static bool isBlockForm(uint16_t form); - static bool isDataForm(uint16_t form); - static const uint8_t *getFixedFormSizesForAddressSize(uint8_t addr_size); -}; - -} - -#endif diff --git a/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp b/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp index 906a3a3..e43ba4f 100644 --- a/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp @@ -948,7 +948,7 @@ static void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!"); const uint8_t *Src = (const uint8_t *)IntVal.getRawData(); - if (sys::isLittleEndianHost()) { + if (sys::IsLittleEndianHost) { // Little-endian host - the source is ordered from LSB to MSB. Order the // destination from LSB to MSB: Do a straight copy. memcpy(Dst, Src, StoreBytes); @@ -1009,7 +1009,7 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val, break; } - if (sys::isLittleEndianHost() != getDataLayout()->isLittleEndian()) + if (sys::IsLittleEndianHost != getDataLayout()->isLittleEndian()) // Host and target are different endian - reverse the stored bytes. std::reverse((uint8_t*)Ptr, StoreBytes + (uint8_t*)Ptr); } @@ -1021,7 +1021,7 @@ static void LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) { uint8_t *Dst = reinterpret_cast<uint8_t *>( const_cast<uint64_t *>(IntVal.getRawData())); - if (sys::isLittleEndianHost()) + if (sys::IsLittleEndianHost) // Little-endian host - the destination must be ordered from LSB to MSB. // The source is ordered from LSB to MSB: Do a straight copy. memcpy(Dst, Src, LoadBytes); diff --git a/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp index f4e8246..f9b08a0 100644 --- a/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp +++ b/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp @@ -15,11 +15,33 @@ #include "llvm-c/ExecutionEngine.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/GenericValue.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Module.h" #include "llvm/Support/ErrorHandling.h" #include <cstring> using namespace llvm; +// Wrapping the C bindings types. +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(GenericValue, LLVMGenericValueRef) + +inline DataLayout *unwrap(LLVMTargetDataRef P) { + return reinterpret_cast<DataLayout*>(P); +} + +inline LLVMTargetDataRef wrap(const DataLayout *P) { + return reinterpret_cast<LLVMTargetDataRef>(const_cast<DataLayout*>(P)); +} + +inline TargetLibraryInfo *unwrap(LLVMTargetLibraryInfoRef P) { + return reinterpret_cast<TargetLibraryInfo*>(P); +} + +inline LLVMTargetLibraryInfoRef wrap(const TargetLibraryInfo *P) { + TargetLibraryInfo *X = const_cast<TargetLibraryInfo*>(P); + return reinterpret_cast<LLVMTargetLibraryInfoRef>(X); +} + /*===-- Operations on generic values --------------------------------------===*/ LLVMGenericValueRef LLVMCreateGenericValueOfInt(LLVMTypeRef Ty, @@ -132,6 +154,59 @@ LLVMBool LLVMCreateJITCompilerForModule(LLVMExecutionEngineRef *OutJIT, return 1; } +void LLVMInitializeMCJITCompilerOptions(LLVMMCJITCompilerOptions *PassedOptions, + size_t SizeOfPassedOptions) { + LLVMMCJITCompilerOptions options; + options.OptLevel = 0; + options.CodeModel = LLVMCodeModelJITDefault; + options.NoFramePointerElim = false; + options.EnableFastISel = false; + + memcpy(PassedOptions, &options, + std::min(sizeof(options), SizeOfPassedOptions)); +} + +LLVMBool LLVMCreateMCJITCompilerForModule( + LLVMExecutionEngineRef *OutJIT, LLVMModuleRef M, + LLVMMCJITCompilerOptions *PassedOptions, size_t SizeOfPassedOptions, + char **OutError) { + LLVMMCJITCompilerOptions options; + // If the user passed a larger sized options struct, then they were compiled + // against a newer LLVM. Tell them that something is wrong. + if (SizeOfPassedOptions > sizeof(options)) { + *OutError = strdup( + "Refusing to use options struct that is larger than my own; assuming " + "LLVM library mismatch."); + return 1; + } + + // Defend against the user having an old version of the API by ensuring that + // any fields they didn't see are cleared. We must defend against fields being + // set to the bitwise equivalent of zero, and assume that this means "do the + // default" as if that option hadn't been available. + LLVMInitializeMCJITCompilerOptions(&options, sizeof(options)); + memcpy(&options, PassedOptions, SizeOfPassedOptions); + + TargetOptions targetOptions; + targetOptions.NoFramePointerElim = options.NoFramePointerElim; + targetOptions.EnableFastISel = options.EnableFastISel; + + std::string Error; + EngineBuilder builder(unwrap(M)); + builder.setEngineKind(EngineKind::JIT) + .setErrorStr(&Error) + .setUseMCJIT(true) + .setOptLevel((CodeGenOpt::Level)options.OptLevel) + .setCodeModel(unwrap(options.CodeModel)) + .setTargetOptions(targetOptions); + if (ExecutionEngine *JIT = builder.create()) { + *OutJIT = wrap(JIT); + return 0; + } + *OutError = strdup(Error.c_str()); + return 1; +} + LLVMBool LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE, LLVMModuleProviderRef MP, char **OutError) { @@ -176,6 +251,8 @@ void LLVMRunStaticDestructors(LLVMExecutionEngineRef EE) { int LLVMRunFunctionAsMain(LLVMExecutionEngineRef EE, LLVMValueRef F, unsigned ArgC, const char * const *ArgV, const char * const *EnvP) { + unwrap(EE)->finalizeObject(); + std::vector<std::string> ArgVec; for (unsigned I = 0; I != ArgC; ++I) ArgVec.push_back(ArgV[I]); @@ -186,6 +263,8 @@ int LLVMRunFunctionAsMain(LLVMExecutionEngineRef EE, LLVMValueRef F, LLVMGenericValueRef LLVMRunFunction(LLVMExecutionEngineRef EE, LLVMValueRef F, unsigned NumArgs, LLVMGenericValueRef *Args) { + unwrap(EE)->finalizeObject(); + std::vector<GenericValue> ArgVec; ArgVec.reserve(NumArgs); for (unsigned I = 0; I != NumArgs; ++I) @@ -234,7 +313,8 @@ LLVMBool LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name, return 1; } -void *LLVMRecompileAndRelinkFunction(LLVMExecutionEngineRef EE, LLVMValueRef Fn) { +void *LLVMRecompileAndRelinkFunction(LLVMExecutionEngineRef EE, + LLVMValueRef Fn) { return unwrap(EE)->recompileAndRelinkFunction(unwrap<Function>(Fn)); } @@ -248,5 +328,7 @@ void LLVMAddGlobalMapping(LLVMExecutionEngineRef EE, LLVMValueRef Global, } void *LLVMGetPointerToGlobal(LLVMExecutionEngineRef EE, LLVMValueRef Global) { + unwrap(EE)->finalizeObject(); + return unwrap(EE)->getPointerToGlobal(unwrap<GlobalValue>(Global)); } diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp index 526c04e..b95a9e8 100644 --- a/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp +++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -114,6 +114,15 @@ static void executeFRemInst(GenericValue &Dest, GenericValue Src1, Dest.IntVal = APInt(1,Src1.IntVal.OP(Src2.IntVal)); \ break; +#define IMPLEMENT_VECTOR_INTEGER_ICMP(OP, TY) \ + case Type::VectorTyID: { \ + assert(Src1.AggregateVal.size() == Src2.AggregateVal.size()); \ + Dest.AggregateVal.resize( Src1.AggregateVal.size() ); \ + for( uint32_t _i=0;_i<Src1.AggregateVal.size();_i++) \ + Dest.AggregateVal[_i].IntVal = APInt(1, \ + Src1.AggregateVal[_i].IntVal.OP(Src2.AggregateVal[_i].IntVal));\ + } break; + // Handle pointers specially because they must be compared with only as much // width as the host has. We _do not_ want to be comparing 64 bit values when // running on a 32-bit target, otherwise the upper 32 bits might mess up @@ -129,6 +138,7 @@ static GenericValue executeICMP_EQ(GenericValue Src1, GenericValue Src2, GenericValue Dest; switch (Ty->getTypeID()) { IMPLEMENT_INTEGER_ICMP(eq,Ty); + IMPLEMENT_VECTOR_INTEGER_ICMP(eq,Ty); IMPLEMENT_POINTER_ICMP(==); default: dbgs() << "Unhandled type for ICMP_EQ predicate: " << *Ty << "\n"; @@ -142,6 +152,7 @@ static GenericValue executeICMP_NE(GenericValue Src1, GenericValue Src2, GenericValue Dest; switch (Ty->getTypeID()) { IMPLEMENT_INTEGER_ICMP(ne,Ty); + IMPLEMENT_VECTOR_INTEGER_ICMP(ne,Ty); IMPLEMENT_POINTER_ICMP(!=); default: dbgs() << "Unhandled type for ICMP_NE predicate: " << *Ty << "\n"; @@ -155,6 +166,7 @@ static GenericValue executeICMP_ULT(GenericValue Src1, GenericValue Src2, GenericValue Dest; switch (Ty->getTypeID()) { IMPLEMENT_INTEGER_ICMP(ult,Ty); + IMPLEMENT_VECTOR_INTEGER_ICMP(ult,Ty); IMPLEMENT_POINTER_ICMP(<); default: dbgs() << "Unhandled type for ICMP_ULT predicate: " << *Ty << "\n"; @@ -168,6 +180,7 @@ static GenericValue executeICMP_SLT(GenericValue Src1, GenericValue Src2, GenericValue Dest; switch (Ty->getTypeID()) { IMPLEMENT_INTEGER_ICMP(slt,Ty); + IMPLEMENT_VECTOR_INTEGER_ICMP(slt,Ty); IMPLEMENT_POINTER_ICMP(<); default: dbgs() << "Unhandled type for ICMP_SLT predicate: " << *Ty << "\n"; @@ -181,6 +194,7 @@ static GenericValue executeICMP_UGT(GenericValue Src1, GenericValue Src2, GenericValue Dest; switch (Ty->getTypeID()) { IMPLEMENT_INTEGER_ICMP(ugt,Ty); + IMPLEMENT_VECTOR_INTEGER_ICMP(ugt,Ty); IMPLEMENT_POINTER_ICMP(>); default: dbgs() << "Unhandled type for ICMP_UGT predicate: " << *Ty << "\n"; @@ -194,6 +208,7 @@ static GenericValue executeICMP_SGT(GenericValue Src1, GenericValue Src2, GenericValue Dest; switch (Ty->getTypeID()) { IMPLEMENT_INTEGER_ICMP(sgt,Ty); + IMPLEMENT_VECTOR_INTEGER_ICMP(sgt,Ty); IMPLEMENT_POINTER_ICMP(>); default: dbgs() << "Unhandled type for ICMP_SGT predicate: " << *Ty << "\n"; @@ -207,6 +222,7 @@ static GenericValue executeICMP_ULE(GenericValue Src1, GenericValue Src2, GenericValue Dest; switch (Ty->getTypeID()) { IMPLEMENT_INTEGER_ICMP(ule,Ty); + IMPLEMENT_VECTOR_INTEGER_ICMP(ule,Ty); IMPLEMENT_POINTER_ICMP(<=); default: dbgs() << "Unhandled type for ICMP_ULE predicate: " << *Ty << "\n"; @@ -220,6 +236,7 @@ static GenericValue executeICMP_SLE(GenericValue Src1, GenericValue Src2, GenericValue Dest; switch (Ty->getTypeID()) { IMPLEMENT_INTEGER_ICMP(sle,Ty); + IMPLEMENT_VECTOR_INTEGER_ICMP(sle,Ty); IMPLEMENT_POINTER_ICMP(<=); default: dbgs() << "Unhandled type for ICMP_SLE predicate: " << *Ty << "\n"; @@ -233,6 +250,7 @@ static GenericValue executeICMP_UGE(GenericValue Src1, GenericValue Src2, GenericValue Dest; switch (Ty->getTypeID()) { IMPLEMENT_INTEGER_ICMP(uge,Ty); + IMPLEMENT_VECTOR_INTEGER_ICMP(uge,Ty); IMPLEMENT_POINTER_ICMP(>=); default: dbgs() << "Unhandled type for ICMP_UGE predicate: " << *Ty << "\n"; @@ -246,6 +264,7 @@ static GenericValue executeICMP_SGE(GenericValue Src1, GenericValue Src2, GenericValue Dest; switch (Ty->getTypeID()) { IMPLEMENT_INTEGER_ICMP(sge,Ty); + IMPLEMENT_VECTOR_INTEGER_ICMP(sge,Ty); IMPLEMENT_POINTER_ICMP(>=); default: dbgs() << "Unhandled type for ICMP_SGE predicate: " << *Ty << "\n"; @@ -285,12 +304,29 @@ void Interpreter::visitICmpInst(ICmpInst &I) { Dest.IntVal = APInt(1,Src1.TY##Val OP Src2.TY##Val); \ break +#define IMPLEMENT_VECTOR_FCMP_T(OP, TY) \ + assert(Src1.AggregateVal.size() == Src2.AggregateVal.size()); \ + Dest.AggregateVal.resize( Src1.AggregateVal.size() ); \ + for( uint32_t _i=0;_i<Src1.AggregateVal.size();_i++) \ + Dest.AggregateVal[_i].IntVal = APInt(1, \ + Src1.AggregateVal[_i].TY##Val OP Src2.AggregateVal[_i].TY##Val);\ + break; + +#define IMPLEMENT_VECTOR_FCMP(OP) \ + case Type::VectorTyID: \ + if(dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) { \ + IMPLEMENT_VECTOR_FCMP_T(OP, Float); \ + } else { \ + IMPLEMENT_VECTOR_FCMP_T(OP, Double); \ + } + static GenericValue executeFCMP_OEQ(GenericValue Src1, GenericValue Src2, Type *Ty) { GenericValue Dest; switch (Ty->getTypeID()) { IMPLEMENT_FCMP(==, Float); IMPLEMENT_FCMP(==, Double); + IMPLEMENT_VECTOR_FCMP(==); default: dbgs() << "Unhandled type for FCmp EQ instruction: " << *Ty << "\n"; llvm_unreachable(0); @@ -298,17 +334,65 @@ static GenericValue executeFCMP_OEQ(GenericValue Src1, GenericValue Src2, return Dest; } +#define IMPLEMENT_SCALAR_NANS(TY, X,Y) \ + if (TY->isFloatTy()) { \ + if (X.FloatVal != X.FloatVal || Y.FloatVal != Y.FloatVal) { \ + Dest.IntVal = APInt(1,false); \ + return Dest; \ + } \ + } else { \ + if (X.DoubleVal != X.DoubleVal || Y.DoubleVal != Y.DoubleVal) { \ + Dest.IntVal = APInt(1,false); \ + return Dest; \ + } \ + } + +#define MASK_VECTOR_NANS_T(X,Y, TZ, FLAG) \ + assert(X.AggregateVal.size() == Y.AggregateVal.size()); \ + Dest.AggregateVal.resize( X.AggregateVal.size() ); \ + for( uint32_t _i=0;_i<X.AggregateVal.size();_i++) { \ + if (X.AggregateVal[_i].TZ##Val != X.AggregateVal[_i].TZ##Val || \ + Y.AggregateVal[_i].TZ##Val != Y.AggregateVal[_i].TZ##Val) \ + Dest.AggregateVal[_i].IntVal = APInt(1,FLAG); \ + else { \ + Dest.AggregateVal[_i].IntVal = APInt(1,!FLAG); \ + } \ + } + +#define MASK_VECTOR_NANS(TY, X,Y, FLAG) \ + if (TY->isVectorTy()) { \ + if (dyn_cast<VectorType>(TY)->getElementType()->isFloatTy()) { \ + MASK_VECTOR_NANS_T(X, Y, Float, FLAG) \ + } else { \ + MASK_VECTOR_NANS_T(X, Y, Double, FLAG) \ + } \ + } \ + + + static GenericValue executeFCMP_ONE(GenericValue Src1, GenericValue Src2, - Type *Ty) { + Type *Ty) +{ GenericValue Dest; + // if input is scalar value and Src1 or Src2 is NaN return false + IMPLEMENT_SCALAR_NANS(Ty, Src1, Src2) + // if vector input detect NaNs and fill mask + MASK_VECTOR_NANS(Ty, Src1, Src2, false) + GenericValue DestMask = Dest; switch (Ty->getTypeID()) { IMPLEMENT_FCMP(!=, Float); IMPLEMENT_FCMP(!=, Double); - - default: - dbgs() << "Unhandled type for FCmp NE instruction: " << *Ty << "\n"; - llvm_unreachable(0); + IMPLEMENT_VECTOR_FCMP(!=); + default: + dbgs() << "Unhandled type for FCmp NE instruction: " << *Ty << "\n"; + llvm_unreachable(0); } + // in vector case mask out NaN elements + if (Ty->isVectorTy()) + for( size_t _i=0; _i<Src1.AggregateVal.size(); _i++) + if (DestMask.AggregateVal[_i].IntVal == false) + Dest.AggregateVal[_i].IntVal = APInt(1,false); + return Dest; } @@ -318,6 +402,7 @@ static GenericValue executeFCMP_OLE(GenericValue Src1, GenericValue Src2, switch (Ty->getTypeID()) { IMPLEMENT_FCMP(<=, Float); IMPLEMENT_FCMP(<=, Double); + IMPLEMENT_VECTOR_FCMP(<=); default: dbgs() << "Unhandled type for FCmp LE instruction: " << *Ty << "\n"; llvm_unreachable(0); @@ -331,6 +416,7 @@ static GenericValue executeFCMP_OGE(GenericValue Src1, GenericValue Src2, switch (Ty->getTypeID()) { IMPLEMENT_FCMP(>=, Float); IMPLEMENT_FCMP(>=, Double); + IMPLEMENT_VECTOR_FCMP(>=); default: dbgs() << "Unhandled type for FCmp GE instruction: " << *Ty << "\n"; llvm_unreachable(0); @@ -344,6 +430,7 @@ static GenericValue executeFCMP_OLT(GenericValue Src1, GenericValue Src2, switch (Ty->getTypeID()) { IMPLEMENT_FCMP(<, Float); IMPLEMENT_FCMP(<, Double); + IMPLEMENT_VECTOR_FCMP(<); default: dbgs() << "Unhandled type for FCmp LT instruction: " << *Ty << "\n"; llvm_unreachable(0); @@ -357,6 +444,7 @@ static GenericValue executeFCMP_OGT(GenericValue Src1, GenericValue Src2, switch (Ty->getTypeID()) { IMPLEMENT_FCMP(>, Float); IMPLEMENT_FCMP(>, Double); + IMPLEMENT_VECTOR_FCMP(>); default: dbgs() << "Unhandled type for FCmp GT instruction: " << *Ty << "\n"; llvm_unreachable(0); @@ -375,18 +463,32 @@ static GenericValue executeFCMP_OGT(GenericValue Src1, GenericValue Src2, return Dest; \ } +#define IMPLEMENT_VECTOR_UNORDERED(TY, X,Y, _FUNC) \ + if (TY->isVectorTy()) { \ + GenericValue DestMask = Dest; \ + Dest = _FUNC(Src1, Src2, Ty); \ + for( size_t _i=0; _i<Src1.AggregateVal.size(); _i++) \ + if (DestMask.AggregateVal[_i].IntVal == true) \ + Dest.AggregateVal[_i].IntVal = APInt(1,true); \ + return Dest; \ + } static GenericValue executeFCMP_UEQ(GenericValue Src1, GenericValue Src2, Type *Ty) { GenericValue Dest; IMPLEMENT_UNORDERED(Ty, Src1, Src2) + MASK_VECTOR_NANS(Ty, Src1, Src2, true) + IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OEQ) return executeFCMP_OEQ(Src1, Src2, Ty); + } static GenericValue executeFCMP_UNE(GenericValue Src1, GenericValue Src2, Type *Ty) { GenericValue Dest; IMPLEMENT_UNORDERED(Ty, Src1, Src2) + MASK_VECTOR_NANS(Ty, Src1, Src2, true) + IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_ONE) return executeFCMP_ONE(Src1, Src2, Ty); } @@ -394,6 +496,8 @@ static GenericValue executeFCMP_ULE(GenericValue Src1, GenericValue Src2, Type *Ty) { GenericValue Dest; IMPLEMENT_UNORDERED(Ty, Src1, Src2) + MASK_VECTOR_NANS(Ty, Src1, Src2, true) + IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OLE) return executeFCMP_OLE(Src1, Src2, Ty); } @@ -401,6 +505,8 @@ static GenericValue executeFCMP_UGE(GenericValue Src1, GenericValue Src2, Type *Ty) { GenericValue Dest; IMPLEMENT_UNORDERED(Ty, Src1, Src2) + MASK_VECTOR_NANS(Ty, Src1, Src2, true) + IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OGE) return executeFCMP_OGE(Src1, Src2, Ty); } @@ -408,6 +514,8 @@ static GenericValue executeFCMP_ULT(GenericValue Src1, GenericValue Src2, Type *Ty) { GenericValue Dest; IMPLEMENT_UNORDERED(Ty, Src1, Src2) + MASK_VECTOR_NANS(Ty, Src1, Src2, true) + IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OLT) return executeFCMP_OLT(Src1, Src2, Ty); } @@ -415,33 +523,88 @@ static GenericValue executeFCMP_UGT(GenericValue Src1, GenericValue Src2, Type *Ty) { GenericValue Dest; IMPLEMENT_UNORDERED(Ty, Src1, Src2) + MASK_VECTOR_NANS(Ty, Src1, Src2, true) + IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OGT) return executeFCMP_OGT(Src1, Src2, Ty); } static GenericValue executeFCMP_ORD(GenericValue Src1, GenericValue Src2, Type *Ty) { GenericValue Dest; - if (Ty->isFloatTy()) + if(Ty->isVectorTy()) { + assert(Src1.AggregateVal.size() == Src2.AggregateVal.size()); + Dest.AggregateVal.resize( Src1.AggregateVal.size() ); + if(dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) { + for( size_t _i=0;_i<Src1.AggregateVal.size();_i++) + Dest.AggregateVal[_i].IntVal = APInt(1, + ( (Src1.AggregateVal[_i].FloatVal == + Src1.AggregateVal[_i].FloatVal) && + (Src2.AggregateVal[_i].FloatVal == + Src2.AggregateVal[_i].FloatVal))); + } else { + for( size_t _i=0;_i<Src1.AggregateVal.size();_i++) + Dest.AggregateVal[_i].IntVal = APInt(1, + ( (Src1.AggregateVal[_i].DoubleVal == + Src1.AggregateVal[_i].DoubleVal) && + (Src2.AggregateVal[_i].DoubleVal == + Src2.AggregateVal[_i].DoubleVal))); + } + } else if (Ty->isFloatTy()) Dest.IntVal = APInt(1,(Src1.FloatVal == Src1.FloatVal && Src2.FloatVal == Src2.FloatVal)); - else + else { Dest.IntVal = APInt(1,(Src1.DoubleVal == Src1.DoubleVal && Src2.DoubleVal == Src2.DoubleVal)); + } return Dest; } static GenericValue executeFCMP_UNO(GenericValue Src1, GenericValue Src2, Type *Ty) { GenericValue Dest; - if (Ty->isFloatTy()) + if(Ty->isVectorTy()) { + assert(Src1.AggregateVal.size() == Src2.AggregateVal.size()); + Dest.AggregateVal.resize( Src1.AggregateVal.size() ); + if(dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) { + for( size_t _i=0;_i<Src1.AggregateVal.size();_i++) + Dest.AggregateVal[_i].IntVal = APInt(1, + ( (Src1.AggregateVal[_i].FloatVal != + Src1.AggregateVal[_i].FloatVal) || + (Src2.AggregateVal[_i].FloatVal != + Src2.AggregateVal[_i].FloatVal))); + } else { + for( size_t _i=0;_i<Src1.AggregateVal.size();_i++) + Dest.AggregateVal[_i].IntVal = APInt(1, + ( (Src1.AggregateVal[_i].DoubleVal != + Src1.AggregateVal[_i].DoubleVal) || + (Src2.AggregateVal[_i].DoubleVal != + Src2.AggregateVal[_i].DoubleVal))); + } + } else if (Ty->isFloatTy()) Dest.IntVal = APInt(1,(Src1.FloatVal != Src1.FloatVal || Src2.FloatVal != Src2.FloatVal)); - else + else { Dest.IntVal = APInt(1,(Src1.DoubleVal != Src1.DoubleVal || Src2.DoubleVal != Src2.DoubleVal)); + } return Dest; } +static GenericValue executeFCMP_BOOL(GenericValue Src1, GenericValue Src2, + const Type *Ty, const bool val) { + GenericValue Dest; + if(Ty->isVectorTy()) { + assert(Src1.AggregateVal.size() == Src2.AggregateVal.size()); + Dest.AggregateVal.resize( Src1.AggregateVal.size() ); + for( size_t _i=0; _i<Src1.AggregateVal.size(); _i++) + Dest.AggregateVal[_i].IntVal = APInt(1,val); + } else { + Dest.IntVal = APInt(1, val); + } + + return Dest; +} + void Interpreter::visitFCmpInst(FCmpInst &I) { ExecutionContext &SF = ECStack.back(); Type *Ty = I.getOperand(0)->getType(); @@ -450,8 +613,14 @@ void Interpreter::visitFCmpInst(FCmpInst &I) { GenericValue R; // Result switch (I.getPredicate()) { - case FCmpInst::FCMP_FALSE: R.IntVal = APInt(1,false); break; - case FCmpInst::FCMP_TRUE: R.IntVal = APInt(1,true); break; + default: + dbgs() << "Don't know how to handle this FCmp predicate!\n-->" << I; + llvm_unreachable(0); + break; + case FCmpInst::FCMP_FALSE: R = executeFCMP_BOOL(Src1, Src2, Ty, false); + break; + case FCmpInst::FCMP_TRUE: R = executeFCMP_BOOL(Src1, Src2, Ty, true); + break; case FCmpInst::FCMP_ORD: R = executeFCMP_ORD(Src1, Src2, Ty); break; case FCmpInst::FCMP_UNO: R = executeFCMP_UNO(Src1, Src2, Ty); break; case FCmpInst::FCMP_UEQ: R = executeFCMP_UEQ(Src1, Src2, Ty); break; @@ -466,9 +635,6 @@ void Interpreter::visitFCmpInst(FCmpInst &I) { case FCmpInst::FCMP_OLE: R = executeFCMP_OLE(Src1, Src2, Ty); break; case FCmpInst::FCMP_UGE: R = executeFCMP_UGE(Src1, Src2, Ty); break; case FCmpInst::FCMP_OGE: R = executeFCMP_OGE(Src1, Src2, Ty); break; - default: - dbgs() << "Don't know how to handle this FCmp predicate!\n-->" << I; - llvm_unreachable(0); } SetValue(&I, R, SF); @@ -502,16 +668,8 @@ static GenericValue executeCmpInst(unsigned predicate, GenericValue Src1, case FCmpInst::FCMP_ULE: return executeFCMP_ULE(Src1, Src2, Ty); case FCmpInst::FCMP_OGE: return executeFCMP_OGE(Src1, Src2, Ty); case FCmpInst::FCMP_UGE: return executeFCMP_UGE(Src1, Src2, Ty); - case FCmpInst::FCMP_FALSE: { - GenericValue Result; - Result.IntVal = APInt(1, false); - return Result; - } - case FCmpInst::FCMP_TRUE: { - GenericValue Result; - Result.IntVal = APInt(1, true); - return Result; - } + case FCmpInst::FCMP_FALSE: return executeFCMP_BOOL(Src1, Src2, Ty, false); + case FCmpInst::FCMP_TRUE: return executeFCMP_BOOL(Src1, Src2, Ty, true); default: dbgs() << "Unhandled Cmp predicate\n"; llvm_unreachable(0); @@ -525,27 +683,105 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) { GenericValue Src2 = getOperandValue(I.getOperand(1), SF); GenericValue R; // Result - switch (I.getOpcode()) { - case Instruction::Add: R.IntVal = Src1.IntVal + Src2.IntVal; break; - case Instruction::Sub: R.IntVal = Src1.IntVal - Src2.IntVal; break; - case Instruction::Mul: R.IntVal = Src1.IntVal * Src2.IntVal; break; - case Instruction::FAdd: executeFAddInst(R, Src1, Src2, Ty); break; - case Instruction::FSub: executeFSubInst(R, Src1, Src2, Ty); break; - case Instruction::FMul: executeFMulInst(R, Src1, Src2, Ty); break; - case Instruction::FDiv: executeFDivInst(R, Src1, Src2, Ty); break; - case Instruction::FRem: executeFRemInst(R, Src1, Src2, Ty); break; - case Instruction::UDiv: R.IntVal = Src1.IntVal.udiv(Src2.IntVal); break; - case Instruction::SDiv: R.IntVal = Src1.IntVal.sdiv(Src2.IntVal); break; - case Instruction::URem: R.IntVal = Src1.IntVal.urem(Src2.IntVal); break; - case Instruction::SRem: R.IntVal = Src1.IntVal.srem(Src2.IntVal); break; - case Instruction::And: R.IntVal = Src1.IntVal & Src2.IntVal; break; - case Instruction::Or: R.IntVal = Src1.IntVal | Src2.IntVal; break; - case Instruction::Xor: R.IntVal = Src1.IntVal ^ Src2.IntVal; break; - default: - dbgs() << "Don't know how to handle this binary operator!\n-->" << I; - llvm_unreachable(0); + // First process vector operation + if (Ty->isVectorTy()) { + assert(Src1.AggregateVal.size() == Src2.AggregateVal.size()); + R.AggregateVal.resize(Src1.AggregateVal.size()); + + // Macros to execute binary operation 'OP' over integer vectors +#define INTEGER_VECTOR_OPERATION(OP) \ + for (unsigned i = 0; i < R.AggregateVal.size(); ++i) \ + R.AggregateVal[i].IntVal = \ + Src1.AggregateVal[i].IntVal OP Src2.AggregateVal[i].IntVal; + + // Additional macros to execute binary operations udiv/sdiv/urem/srem since + // they have different notation. +#define INTEGER_VECTOR_FUNCTION(OP) \ + for (unsigned i = 0; i < R.AggregateVal.size(); ++i) \ + R.AggregateVal[i].IntVal = \ + Src1.AggregateVal[i].IntVal.OP(Src2.AggregateVal[i].IntVal); + + // Macros to execute binary operation 'OP' over floating point type TY + // (float or double) vectors +#define FLOAT_VECTOR_FUNCTION(OP, TY) \ + for (unsigned i = 0; i < R.AggregateVal.size(); ++i) \ + R.AggregateVal[i].TY = \ + Src1.AggregateVal[i].TY OP Src2.AggregateVal[i].TY; + + // Macros to choose appropriate TY: float or double and run operation + // execution +#define FLOAT_VECTOR_OP(OP) { \ + if (dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) \ + FLOAT_VECTOR_FUNCTION(OP, FloatVal) \ + else { \ + if (dyn_cast<VectorType>(Ty)->getElementType()->isDoubleTy()) \ + FLOAT_VECTOR_FUNCTION(OP, DoubleVal) \ + else { \ + dbgs() << "Unhandled type for OP instruction: " << *Ty << "\n"; \ + llvm_unreachable(0); \ + } \ + } \ +} + + switch(I.getOpcode()){ + default: + dbgs() << "Don't know how to handle this binary operator!\n-->" << I; + llvm_unreachable(0); + break; + case Instruction::Add: INTEGER_VECTOR_OPERATION(+) break; + case Instruction::Sub: INTEGER_VECTOR_OPERATION(-) break; + case Instruction::Mul: INTEGER_VECTOR_OPERATION(*) break; + case Instruction::UDiv: INTEGER_VECTOR_FUNCTION(udiv) break; + case Instruction::SDiv: INTEGER_VECTOR_FUNCTION(sdiv) break; + case Instruction::URem: INTEGER_VECTOR_FUNCTION(urem) break; + case Instruction::SRem: INTEGER_VECTOR_FUNCTION(srem) break; + case Instruction::And: INTEGER_VECTOR_OPERATION(&) break; + case Instruction::Or: INTEGER_VECTOR_OPERATION(|) break; + case Instruction::Xor: INTEGER_VECTOR_OPERATION(^) break; + case Instruction::FAdd: FLOAT_VECTOR_OP(+) break; + case Instruction::FSub: FLOAT_VECTOR_OP(-) break; + case Instruction::FMul: FLOAT_VECTOR_OP(*) break; + case Instruction::FDiv: FLOAT_VECTOR_OP(/) break; + case Instruction::FRem: + if (dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) + for (unsigned i = 0; i < R.AggregateVal.size(); ++i) + R.AggregateVal[i].FloatVal = + fmod(Src1.AggregateVal[i].FloatVal, Src2.AggregateVal[i].FloatVal); + else { + if (dyn_cast<VectorType>(Ty)->getElementType()->isDoubleTy()) + for (unsigned i = 0; i < R.AggregateVal.size(); ++i) + R.AggregateVal[i].DoubleVal = + fmod(Src1.AggregateVal[i].DoubleVal, Src2.AggregateVal[i].DoubleVal); + else { + dbgs() << "Unhandled type for Rem instruction: " << *Ty << "\n"; + llvm_unreachable(0); + } + } + break; + } + } else { + switch (I.getOpcode()) { + default: + dbgs() << "Don't know how to handle this binary operator!\n-->" << I; + llvm_unreachable(0); + break; + case Instruction::Add: R.IntVal = Src1.IntVal + Src2.IntVal; break; + case Instruction::Sub: R.IntVal = Src1.IntVal - Src2.IntVal; break; + case Instruction::Mul: R.IntVal = Src1.IntVal * Src2.IntVal; break; + case Instruction::FAdd: executeFAddInst(R, Src1, Src2, Ty); break; + case Instruction::FSub: executeFSubInst(R, Src1, Src2, Ty); break; + case Instruction::FMul: executeFMulInst(R, Src1, Src2, Ty); break; + case Instruction::FDiv: executeFDivInst(R, Src1, Src2, Ty); break; + case Instruction::FRem: executeFRemInst(R, Src1, Src2, Ty); break; + case Instruction::UDiv: R.IntVal = Src1.IntVal.udiv(Src2.IntVal); break; + case Instruction::SDiv: R.IntVal = Src1.IntVal.sdiv(Src2.IntVal); break; + case Instruction::URem: R.IntVal = Src1.IntVal.urem(Src2.IntVal); break; + case Instruction::SRem: R.IntVal = Src1.IntVal.srem(Src2.IntVal); break; + case Instruction::And: R.IntVal = Src1.IntVal & Src2.IntVal; break; + case Instruction::Or: R.IntVal = Src1.IntVal | Src2.IntVal; break; + case Instruction::Xor: R.IntVal = Src1.IntVal ^ Src2.IntVal; break; + } } - SetValue(&I, R, SF); } diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp index fee10e1..38aa547 100644 --- a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp +++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp @@ -14,6 +14,7 @@ #include "llvm/ExecutionEngine/MCJIT.h" #include "llvm/ExecutionEngine/ObjectBuffer.h" #include "llvm/ExecutionEngine/ObjectImage.h" +#include "llvm/ExecutionEngine/SectionMemoryManager.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" @@ -46,13 +47,14 @@ ExecutionEngine *MCJIT::createJIT(Module *M, // FIXME: Don't do this here. sys::DynamicLibrary::LoadLibraryPermanently(0, NULL); - return new MCJIT(M, TM, JMM, GVsWithCode); + return new MCJIT(M, TM, JMM ? JMM : new SectionMemoryManager(), GVsWithCode); } MCJIT::MCJIT(Module *m, TargetMachine *tm, RTDyldMemoryManager *MM, bool AllocateGVsWithCode) - : ExecutionEngine(m), TM(tm), Ctx(0), MemMgr(MM), Dyld(MM), - isCompiled(false), M(m) { + : ExecutionEngine(m), TM(tm), Ctx(0), + MemMgr(MM ? MM : new SectionMemoryManager()), Dyld(MemMgr), + IsLoaded(false), M(m), ObjCache(0) { setDataLayout(TM->getDataLayout()); } @@ -64,7 +66,11 @@ MCJIT::~MCJIT() { delete TM; } -void MCJIT::emitObject(Module *m) { +void MCJIT::setObjectCache(ObjectCache* NewCache) { + ObjCache = NewCache; +} + +ObjectBufferStream* MCJIT::emitObject(Module *m) { /// Currently, MCJIT only supports a single module and the module passed to /// this function call is expected to be the contained module. The module /// is passed as a parameter here to prepare for multiple module support in @@ -77,30 +83,66 @@ void MCJIT::emitObject(Module *m) { // FIXME: Track compilation state on a per-module basis when multiple modules // are supported. // Re-compilation is not supported - if (isCompiled) - return; + assert(!IsLoaded); PassManager PM; PM.add(new DataLayout(*TM->getDataLayout())); // The RuntimeDyld will take ownership of this shortly - OwningPtr<ObjectBufferStream> Buffer(new ObjectBufferStream()); + OwningPtr<ObjectBufferStream> CompiledObject(new ObjectBufferStream()); // Turn the machine code intermediate representation into bytes in memory // that may be executed. - if (TM->addPassesToEmitMC(PM, Ctx, Buffer->getOStream(), false)) { + if (TM->addPassesToEmitMC(PM, Ctx, CompiledObject->getOStream(), false)) { report_fatal_error("Target does not support MC emission!"); } // Initialize passes. PM.run(*m); // Flush the output buffer to get the generated code into memory - Buffer->flush(); + CompiledObject->flush(); + + // If we have an object cache, tell it about the new object. + // Note that we're using the compiled image, not the loaded image (as below). + if (ObjCache) { + // MemoryBuffer is a thin wrapper around the actual memory, so it's OK + // to create a temporary object here and delete it after the call. + OwningPtr<MemoryBuffer> MB(CompiledObject->getMemBuffer()); + ObjCache->notifyObjectCompiled(m, MB.get()); + } + + return CompiledObject.take(); +} + +void MCJIT::loadObject(Module *M) { + + // Get a thread lock to make sure we aren't trying to load multiple times + MutexGuard locked(lock); + + // FIXME: Track compilation state on a per-module basis when multiple modules + // are supported. + // Re-compilation is not supported + if (IsLoaded) + return; + + OwningPtr<ObjectBuffer> ObjectToLoad; + // Try to load the pre-compiled object from cache if possible + if (0 != ObjCache) { + OwningPtr<MemoryBuffer> PreCompiledObject(ObjCache->getObjectCopy(M)); + if (0 != PreCompiledObject.get()) + ObjectToLoad.reset(new ObjectBuffer(PreCompiledObject.take())); + } + + // If the cache did not contain a suitable object, compile the object + if (!ObjectToLoad) { + ObjectToLoad.reset(emitObject(M)); + assert(ObjectToLoad.get() && "Compilation did not produce an object."); + } // Load the object into the dynamic linker. // handing off ownership of the buffer - LoadedObject.reset(Dyld.loadObject(Buffer.take())); + LoadedObject.reset(Dyld.loadObject(ObjectToLoad.take())); if (!LoadedObject) report_fatal_error(Dyld.getErrorString()); @@ -113,7 +155,7 @@ void MCJIT::emitObject(Module *m) { NotifyObjectEmitted(*LoadedObject); // FIXME: Add support for per-module compilation state - isCompiled = true; + IsLoaded = true; } // FIXME: Add a parameter to identify which object is being finalized when @@ -122,19 +164,18 @@ void MCJIT::emitObject(Module *m) { // protection in the interface. void MCJIT::finalizeObject() { // If the module hasn't been compiled, just do that. - if (!isCompiled) { - // If the call to Dyld.resolveRelocations() is removed from emitObject() + if (!IsLoaded) { + // If the call to Dyld.resolveRelocations() is removed from loadObject() // we'll need to do that here. - emitObject(M); - - // Set page permissions. - MemMgr->applyPermissions(); - - return; + loadObject(M); + } else { + // Resolve any relocations. + Dyld.resolveRelocations(); } - // Resolve any relocations. - Dyld.resolveRelocations(); + StringRef EHData = Dyld.getEHFrameSection(); + if (!EHData.empty()) + MemMgr->registerEHFrames(EHData); // Set page permissions. MemMgr->applyPermissions(); @@ -151,8 +192,8 @@ void *MCJIT::getPointerToFunction(Function *F) { // dies. // FIXME: Add support for per-module compilation state - if (!isCompiled) - emitObject(M); + if (!IsLoaded) + loadObject(M); if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) { bool AbortOnFailure = !F->hasExternalWeakLinkage(); @@ -284,8 +325,8 @@ GenericValue MCJIT::runFunction(Function *F, void *MCJIT::getPointerToNamedFunction(const std::string &Name, bool AbortOnFailure) { // FIXME: Add support for per-module compilation state - if (!isCompiled) - emitObject(M); + if (!IsLoaded) + loadObject(M); if (!isSymbolSearchingDisabled() && MemMgr) { void *ptr = MemMgr->getPointerToNamedFunction(Name, false); diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h index 283a8e5..8c4bf6e 100644 --- a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h +++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h @@ -12,6 +12,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/ObjectCache.h" #include "llvm/ExecutionEngine/RuntimeDyld.h" #include "llvm/PassManager.h" @@ -34,16 +35,23 @@ class MCJIT : public ExecutionEngine { SmallVector<JITEventListener*, 2> EventListeners; // FIXME: Add support for multiple modules - bool isCompiled; + bool IsLoaded; Module *M; OwningPtr<ObjectImage> LoadedObject; + // An optional ObjectCache to be notified of compiled objects and used to + // perform lookup of pre-compiled code to avoid re-compilation. + ObjectCache *ObjCache; + public: ~MCJIT(); /// @name ExecutionEngine interface implementation /// @{ + /// Sets the object manager that MCJIT should use to avoid compilation. + virtual void setObjectCache(ObjectCache *manager); + virtual void finalizeObject(); virtual void *getPointerToBasicBlock(BasicBlock *BB); @@ -102,7 +110,9 @@ protected: /// this function call is expected to be the contained module. The module /// is passed as a parameter here to prepare for multiple module support in /// the future. - void emitObject(Module *M); + ObjectBufferStream* emitObject(Module *M); + + void loadObject(Module *M); void NotifyObjectEmitted(const ObjectImage& Obj); void NotifyFreeingObject(const ObjectImage& Obj); diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp b/contrib/llvm/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp index fa35acd..bac77ce 100644 --- a/contrib/llvm/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp +++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp @@ -138,9 +138,46 @@ bool SectionMemoryManager::applyPermissions(std::string *ErrMsg) // Read-write data memory already has the correct permissions + // Some platforms with separate data cache and instruction cache require + // explicit cache flush, otherwise JIT code manipulations (like resolved + // relocations) will get to the data cache but not to the instruction cache. + invalidateInstructionCache(); + return false; } +// Determine whether we can register EH tables. +#if (defined(__GNUC__) && !defined(__ARM_EABI__) && \ + !defined(__USING_SJLJ_EXCEPTIONS__)) +#define HAVE_EHTABLE_SUPPORT 1 +#else +#define HAVE_EHTABLE_SUPPORT 0 +#endif + +#if HAVE_EHTABLE_SUPPORT +extern "C" void __register_frame(void*); + +static const char *processFDE(const char *Entry) { + const char *P = Entry; + uint32_t Length = *((uint32_t*)P); + P += 4; + uint32_t Offset = *((uint32_t*)P); + if (Offset != 0) + __register_frame((void*)Entry); + return P + Length; +} +#endif + +void SectionMemoryManager::registerEHFrames(StringRef SectionData) { +#if HAVE_EHTABLE_SUPPORT + const char *P = SectionData.data(); + const char *End = SectionData.data() + SectionData.size(); + do { + P = processFDE(P); + } while(P != End); +#endif +} + error_code SectionMemoryManager::applyMemoryGroupPermissions(MemoryGroup &MemGroup, unsigned Permissions) { diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index 409b25f..a08b508 100644 --- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -25,10 +25,15 @@ using namespace llvm::object; // Empty out-of-line virtual destructor as the key function. RTDyldMemoryManager::~RTDyldMemoryManager() {} +void RTDyldMemoryManager::registerEHFrames(StringRef SectionData) {} RuntimeDyldImpl::~RuntimeDyldImpl() {} namespace llvm { +StringRef RuntimeDyldImpl::getEHFrameSection() { + return StringRef(); +} + // Resolve the relocations for all symbols we currently know about. void RuntimeDyldImpl::resolveRelocations() { // First, resolve relocations associated with external symbols. @@ -96,7 +101,8 @@ ObjectImage *RuntimeDyldImpl::loadObject(ObjectBuffer *InputBuffer) { bool isCommon = flags & SymbolRef::SF_Common; if (isCommon) { // Add the common symbols to a list. We'll allocate them all below. - uint64_t Align = getCommonSymbolAlignment(*i); + uint32_t Align; + Check(i->getAlignment(Align)); uint64_t Size = 0; Check(i->getSize(Size)); CommonSize += Size + Align; @@ -154,18 +160,8 @@ ObjectImage *RuntimeDyldImpl::loadObject(ObjectBuffer *InputBuffer) { isFirstRelocation = false; } - ObjRelocationInfo RI; - RI.SectionID = SectionID; - Check(i->getAdditionalInfo(RI.AdditionalInfo)); - Check(i->getOffset(RI.Offset)); - Check(i->getSymbol(RI.Symbol)); - Check(i->getType(RI.Type)); - - DEBUG(dbgs() << "\t\tAddend: " << RI.AdditionalInfo - << " Offset: " << format("%p", (uintptr_t)RI.Offset) - << " Type: " << (uint32_t)(RI.Type & 0xffffffffL) - << "\n"); - processRelocationRef(RI, *obj, LocalSections, LocalSymbols, Stubs); + processRelocationRef(SectionID, *i, *obj, LocalSections, LocalSymbols, + Stubs); } } @@ -183,7 +179,7 @@ void RuntimeDyldImpl::emitCommonSymbols(ObjectImage &Obj, if (!Addr) report_fatal_error("Unable to allocate memory for common symbols!"); uint64_t Offset = 0; - Sections.push_back(SectionEntry(StringRef(), Addr, TotalSize, TotalSize, 0)); + Sections.push_back(SectionEntry(StringRef(), Addr, TotalSize, 0)); memset(Addr, 0, TotalSize); DEBUG(dbgs() << "emitCommonSection SectionID: " << SectionID @@ -243,6 +239,12 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj, Check(Section.isReadOnlyData(IsReadOnly)); Check(Section.getSize(DataSize)); Check(Section.getName(Name)); + if (StubSize > 0) { + unsigned StubAlignment = getStubAlignment(); + unsigned EndAlignment = (DataSize | Alignment) & -(DataSize | Alignment); + if (StubAlignment > EndAlignment) + StubBufSize += StubAlignment - EndAlignment; + } unsigned Allocate; unsigned SectionID = Sections.size(); @@ -295,8 +297,7 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj, << "\n"); } - Sections.push_back(SectionEntry(Name, Addr, Allocate, DataSize, - (uintptr_t)pData)); + Sections.push_back(SectionEntry(Name, Addr, DataSize, (uintptr_t)pData)); return SectionID; } @@ -339,7 +340,25 @@ void RuntimeDyldImpl::addRelocationForSymbol(const RelocationEntry &RE, } uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr) { - if (Arch == Triple::arm) { + if (Arch == Triple::aarch64) { + // This stub has to be able to access the full address space, + // since symbol lookup won't necessarily find a handy, in-range, + // PLT stub for functions which could be anywhere. + uint32_t *StubAddr = (uint32_t*)Addr; + + // Stub can use ip0 (== x16) to calculate address + *StubAddr = 0xd2e00010; // movz ip0, #:abs_g3:<addr> + StubAddr++; + *StubAddr = 0xf2c00010; // movk ip0, #:abs_g2_nc:<addr> + StubAddr++; + *StubAddr = 0xf2a00010; // movk ip0, #:abs_g1_nc:<addr> + StubAddr++; + *StubAddr = 0xf2800010; // movk ip0, #:abs_g0_nc:<addr> + StubAddr++; + *StubAddr = 0xd61f0200; // br ip0 + + return Addr; + } else if (Arch == Triple::arm) { // TODO: There is only ARM far stub now. We should add the Thumb stub, // and stubs for branches Thumb - ARM and ARM - Thumb. uint32_t *StubAddr = (uint32_t*)Addr; @@ -380,6 +399,13 @@ uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr) { writeInt32BE(Addr+40, 0x4E800420); // bctr return Addr; + } else if (Arch == Triple::systemz) { + writeInt16BE(Addr, 0xC418); // lgrl %r1,.+8 + writeInt16BE(Addr+2, 0x0000); + writeInt16BE(Addr+4, 0x0004); + writeInt16BE(Addr+6, 0x07F1); // brc 15,%r1 + // 8-byte address stored at Addr + 8 + return Addr; } return Addr; } @@ -401,26 +427,14 @@ void RuntimeDyldImpl::reassignSectionAddress(unsigned SectionID, Sections[SectionID].LoadAddress = Addr; } -void RuntimeDyldImpl::resolveRelocationEntry(const RelocationEntry &RE, - uint64_t Value) { - // Ignore relocations for sections that were not loaded - if (Sections[RE.SectionID].Address != 0) { - DEBUG(dbgs() << "\tSectionID: " << RE.SectionID - << " + " << RE.Offset << " (" - << format("%p", Sections[RE.SectionID].Address + RE.Offset) << ")" - << " RelType: " << RE.RelType - << " Addend: " << RE.Addend - << "\n"); - - resolveRelocation(Sections[RE.SectionID], RE.Offset, - Value, RE.RelType, RE.Addend); - } -} - void RuntimeDyldImpl::resolveRelocationList(const RelocationList &Relocs, uint64_t Value) { for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { - resolveRelocationEntry(Relocs[i], Value); + const RelocationEntry &RE = Relocs[i]; + // Ignore relocations for sections that were not loaded + if (Sections[RE.SectionID].Address == 0) + continue; + resolveRelocation(RE, Value); } } @@ -534,4 +548,8 @@ StringRef RuntimeDyld::getErrorString() { return Dyld->getErrorString(); } +StringRef RuntimeDyld::getEHFrameSection() { + return Dyld->getEHFrameSection(); +} + } // end namespace llvm diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index b8537b1..d4d84d3 100644 --- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -41,7 +41,7 @@ error_code check(error_code Err) { template<class ELFT> class DyldELFObject : public ELFObjectFile<ELFT> { - LLVM_ELF_IMPORT_TYPES(ELFT) + LLVM_ELF_IMPORT_TYPES_ELFT(ELFT) typedef Elf_Shdr_Impl<ELFT> Elf_Shdr; typedef Elf_Sym_Impl<ELFT> Elf_Sym; @@ -151,6 +151,14 @@ void DyldELFObject<ELFT>::updateSymbolAddress(const SymbolRef &SymRef, namespace llvm { +StringRef RuntimeDyldELF::getEHFrameSection() { + for (int i = 0, e = Sections.size(); i != e; ++i) { + if (Sections[i].Name == ".eh_frame") + return StringRef((const char*)Sections[i].Address, Sections[i].Size); + } + return StringRef(); +} + ObjectImage *RuntimeDyldELF::createObjectImage(ObjectBuffer *Buffer) { if (Buffer->getBufferSize() < ELF::EI_NIDENT) llvm_unreachable("Unexpected ELF object size"); @@ -269,6 +277,85 @@ void RuntimeDyldELF::resolveX86Relocation(const SectionEntry &Section, } } +void RuntimeDyldELF::resolveAArch64Relocation(const SectionEntry &Section, + uint64_t Offset, + uint64_t Value, + uint32_t Type, + int64_t Addend) { + uint32_t *TargetPtr = reinterpret_cast<uint32_t*>(Section.Address + Offset); + uint64_t FinalAddress = Section.LoadAddress + Offset; + + DEBUG(dbgs() << "resolveAArch64Relocation, LocalAddress: 0x" + << format("%llx", Section.Address + Offset) + << " FinalAddress: 0x" << format("%llx",FinalAddress) + << " Value: 0x" << format("%llx",Value) + << " Type: 0x" << format("%x",Type) + << " Addend: 0x" << format("%llx",Addend) + << "\n"); + + switch (Type) { + default: + llvm_unreachable("Relocation type not implemented yet!"); + break; + case ELF::R_AARCH64_ABS64: { + uint64_t *TargetPtr = reinterpret_cast<uint64_t*>(Section.Address + Offset); + *TargetPtr = Value + Addend; + break; + } + case ELF::R_AARCH64_PREL32: { // test-shift.ll (.eh_frame) + uint64_t Result = Value + Addend - FinalAddress; + assert(static_cast<int64_t>(Result) >= INT32_MIN && + static_cast<int64_t>(Result) <= UINT32_MAX); + *TargetPtr = static_cast<uint32_t>(Result & 0xffffffffU); + break; + } + case ELF::R_AARCH64_CALL26: // fallthrough + case ELF::R_AARCH64_JUMP26: { + // Operation: S+A-P. Set Call or B immediate value to bits fff_fffc of the + // calculation. + uint64_t BranchImm = Value + Addend - FinalAddress; + + // "Check that -2^27 <= result < 2^27". + assert(-(1LL << 27) <= static_cast<int64_t>(BranchImm) && + static_cast<int64_t>(BranchImm) < (1LL << 27)); + // Immediate goes in bits 25:0 of B and BL. + *TargetPtr |= static_cast<uint32_t>(BranchImm & 0xffffffcU) >> 2; + break; + } + case ELF::R_AARCH64_MOVW_UABS_G3: { + uint64_t Result = Value + Addend; + // Immediate goes in bits 20:5 of MOVZ/MOVK instruction + *TargetPtr |= Result >> (48 - 5); + // Shift is "lsl #48", in bits 22:21 + *TargetPtr |= 3 << 21; + break; + } + case ELF::R_AARCH64_MOVW_UABS_G2_NC: { + uint64_t Result = Value + Addend; + // Immediate goes in bits 20:5 of MOVZ/MOVK instruction + *TargetPtr |= ((Result & 0xffff00000000ULL) >> (32 - 5)); + // Shift is "lsl #32", in bits 22:21 + *TargetPtr |= 2 << 21; + break; + } + case ELF::R_AARCH64_MOVW_UABS_G1_NC: { + uint64_t Result = Value + Addend; + // Immediate goes in bits 20:5 of MOVZ/MOVK instruction + *TargetPtr |= ((Result & 0xffff0000U) >> (16 - 5)); + // Shift is "lsl #16", in bits 22:21 + *TargetPtr |= 1 << 21; + break; + } + case ELF::R_AARCH64_MOVW_UABS_G0_NC: { + uint64_t Result = Value + Addend; + // Immediate goes in bits 20:5 of MOVZ/MOVK instruction + *TargetPtr |= ((Result & 0xffffU) << 5); + // Shift is "lsl #0", in bits 22:21. No action needed. + break; + } + } +} + void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section, uint64_t Offset, uint32_t Value, @@ -541,6 +628,11 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section, llvm_unreachable("Relocation R_PPC64_REL32 overflow"); writeInt32BE(LocalAddress, delta); } break; + case ELF::R_PPC64_REL64: { + uint64_t FinalAddress = (Section.LoadAddress + Offset); + uint64_t Delta = Value - FinalAddress + Addend; + writeInt64BE(LocalAddress, Delta); + } break; case ELF::R_PPC64_ADDR64 : writeInt64BE(LocalAddress, Value + Addend); break; @@ -560,6 +652,48 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section, } } +void RuntimeDyldELF::resolveSystemZRelocation(const SectionEntry &Section, + uint64_t Offset, + uint64_t Value, + uint32_t Type, + int64_t Addend) { + uint8_t *LocalAddress = Section.Address + Offset; + switch (Type) { + default: + llvm_unreachable("Relocation type not implemented yet!"); + break; + case ELF::R_390_PC16DBL: + case ELF::R_390_PLT16DBL: { + int64_t Delta = (Value + Addend) - (Section.LoadAddress + Offset); + assert(int16_t(Delta / 2) * 2 == Delta && "R_390_PC16DBL overflow"); + writeInt16BE(LocalAddress, Delta / 2); + break; + } + case ELF::R_390_PC32DBL: + case ELF::R_390_PLT32DBL: { + int64_t Delta = (Value + Addend) - (Section.LoadAddress + Offset); + assert(int32_t(Delta / 2) * 2 == Delta && "R_390_PC32DBL overflow"); + writeInt32BE(LocalAddress, Delta / 2); + break; + } + case ELF::R_390_PC32: { + int64_t Delta = (Value + Addend) - (Section.LoadAddress + Offset); + assert(int32_t(Delta) == Delta && "R_390_PC32 overflow"); + writeInt32BE(LocalAddress, Delta); + break; + } + case ELF::R_390_64: + writeInt64BE(LocalAddress, Value + Addend); + break; + } +} + +void RuntimeDyldELF::resolveRelocation(const RelocationEntry &RE, + uint64_t Value) { + const SectionEntry &Section = Sections[RE.SectionID]; + return resolveRelocation(Section, RE.Offset, Value, RE.RelType, RE.Addend); +} + void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section, uint64_t Offset, uint64_t Value, @@ -574,6 +708,9 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section, (uint32_t)(Value & 0xffffffffL), Type, (uint32_t)(Addend & 0xffffffffL)); break; + case Triple::aarch64: + resolveAArch64Relocation(Section, Offset, Value, Type, Addend); + break; case Triple::arm: // Fall through. case Triple::thumb: resolveARMRelocation(Section, Offset, @@ -589,19 +726,25 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section, case Triple::ppc64: resolvePPC64Relocation(Section, Offset, Value, Type, Addend); break; + case Triple::systemz: + resolveSystemZRelocation(Section, Offset, Value, Type, Addend); + break; default: llvm_unreachable("Unsupported CPU type!"); } } -void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel, +void RuntimeDyldELF::processRelocationRef(unsigned SectionID, + RelocationRef RelI, ObjectImage &Obj, ObjSectionToIDMap &ObjSectionToID, const SymbolTableMap &Symbols, StubMap &Stubs) { - - uint32_t RelType = (uint32_t)(Rel.Type & 0xffffffffL); - intptr_t Addend = (intptr_t)Rel.AdditionalInfo; - const SymbolRef &Symbol = Rel.Symbol; + uint64_t RelType; + Check(RelI.getType(RelType)); + int64_t Addend; + Check(RelI.getAdditionalInfo(Addend)); + SymbolRef Symbol; + Check(RelI.getSymbol(Symbol)); // Obtain the symbol name which is referenced in the relocation StringRef TargetName; @@ -617,14 +760,14 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel, Symbol.getType(SymType); if (lsi != Symbols.end()) { Value.SectionID = lsi->second.first; - Value.Addend = lsi->second.second; + Value.Addend = lsi->second.second + Addend; } else { // Search for the symbol in the global symbol table SymbolTableMap::const_iterator gsi = GlobalSymbolTable.find(TargetName.data()); if (gsi != GlobalSymbolTable.end()) { Value.SectionID = gsi->second.first; - Value.Addend = gsi->second.second; + Value.Addend = gsi->second.second + Addend; } else { switch (SymType) { case SymbolRef::ST_Debug: { @@ -657,21 +800,73 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel, } } } - DEBUG(dbgs() << "\t\tRel.SectionID: " << Rel.SectionID - << " Rel.Offset: " << Rel.Offset + uint64_t Offset; + Check(RelI.getOffset(Offset)); + + DEBUG(dbgs() << "\t\tSectionID: " << SectionID + << " Offset: " << Offset << "\n"); - if (Arch == Triple::arm && + if (Arch == Triple::aarch64 && + (RelType == ELF::R_AARCH64_CALL26 || + RelType == ELF::R_AARCH64_JUMP26)) { + // This is an AArch64 branch relocation, need to use a stub function. + DEBUG(dbgs() << "\t\tThis is an AArch64 branch relocation."); + SectionEntry &Section = Sections[SectionID]; + + // Look for an existing stub. + StubMap::const_iterator i = Stubs.find(Value); + if (i != Stubs.end()) { + resolveRelocation(Section, Offset, + (uint64_t)Section.Address + i->second, RelType, 0); + DEBUG(dbgs() << " Stub function found\n"); + } else { + // Create a new stub function. + DEBUG(dbgs() << " Create a new stub function\n"); + Stubs[Value] = Section.StubOffset; + uint8_t *StubTargetAddr = createStubFunction(Section.Address + + Section.StubOffset); + + RelocationEntry REmovz_g3(SectionID, + StubTargetAddr - Section.Address, + ELF::R_AARCH64_MOVW_UABS_G3, Value.Addend); + RelocationEntry REmovk_g2(SectionID, + StubTargetAddr - Section.Address + 4, + ELF::R_AARCH64_MOVW_UABS_G2_NC, Value.Addend); + RelocationEntry REmovk_g1(SectionID, + StubTargetAddr - Section.Address + 8, + ELF::R_AARCH64_MOVW_UABS_G1_NC, Value.Addend); + RelocationEntry REmovk_g0(SectionID, + StubTargetAddr - Section.Address + 12, + ELF::R_AARCH64_MOVW_UABS_G0_NC, Value.Addend); + + if (Value.SymbolName) { + addRelocationForSymbol(REmovz_g3, Value.SymbolName); + addRelocationForSymbol(REmovk_g2, Value.SymbolName); + addRelocationForSymbol(REmovk_g1, Value.SymbolName); + addRelocationForSymbol(REmovk_g0, Value.SymbolName); + } else { + addRelocationForSection(REmovz_g3, Value.SectionID); + addRelocationForSection(REmovk_g2, Value.SectionID); + addRelocationForSection(REmovk_g1, Value.SectionID); + addRelocationForSection(REmovk_g0, Value.SectionID); + } + resolveRelocation(Section, Offset, + (uint64_t)Section.Address + Section.StubOffset, + RelType, 0); + Section.StubOffset += getMaxStubSize(); + } + } else if (Arch == Triple::arm && (RelType == ELF::R_ARM_PC24 || RelType == ELF::R_ARM_CALL || RelType == ELF::R_ARM_JUMP24)) { // This is an ARM branch relocation, need to use a stub function. DEBUG(dbgs() << "\t\tThis is an ARM branch relocation."); - SectionEntry &Section = Sections[Rel.SectionID]; + SectionEntry &Section = Sections[SectionID]; // Look for an existing stub. StubMap::const_iterator i = Stubs.find(Value); if (i != Stubs.end()) { - resolveRelocation(Section, Rel.Offset, + resolveRelocation(Section, Offset, (uint64_t)Section.Address + i->second, RelType, 0); DEBUG(dbgs() << " Stub function found\n"); } else { @@ -680,14 +875,14 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel, Stubs[Value] = Section.StubOffset; uint8_t *StubTargetAddr = createStubFunction(Section.Address + Section.StubOffset); - RelocationEntry RE(Rel.SectionID, StubTargetAddr - Section.Address, + RelocationEntry RE(SectionID, StubTargetAddr - Section.Address, ELF::R_ARM_ABS32, Value.Addend); if (Value.SymbolName) addRelocationForSymbol(RE, Value.SymbolName); else addRelocationForSection(RE, Value.SectionID); - resolveRelocation(Section, Rel.Offset, + resolveRelocation(Section, Offset, (uint64_t)Section.Address + Section.StubOffset, RelType, 0); Section.StubOffset += getMaxStubSize(); @@ -696,8 +891,8 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel, RelType == ELF::R_MIPS_26) { // This is an Mips branch relocation, need to use a stub function. DEBUG(dbgs() << "\t\tThis is a Mips branch relocation."); - SectionEntry &Section = Sections[Rel.SectionID]; - uint8_t *Target = Section.Address + Rel.Offset; + SectionEntry &Section = Sections[SectionID]; + uint8_t *Target = Section.Address + Offset; uint32_t *TargetAddress = (uint32_t *)Target; // Extract the addend from the instruction. @@ -708,7 +903,7 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel, // Look up for existing stub. StubMap::const_iterator i = Stubs.find(Value); if (i != Stubs.end()) { - resolveRelocation(Section, Rel.Offset, + resolveRelocation(Section, Offset, (uint64_t)Section.Address + i->second, RelType, 0); DEBUG(dbgs() << " Stub function found\n"); } else { @@ -719,10 +914,10 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel, Section.StubOffset); // Creating Hi and Lo relocations for the filled stub instructions. - RelocationEntry REHi(Rel.SectionID, + RelocationEntry REHi(SectionID, StubTargetAddr - Section.Address, ELF::R_MIPS_HI16, Value.Addend); - RelocationEntry RELo(Rel.SectionID, + RelocationEntry RELo(SectionID, StubTargetAddr - Section.Address + 4, ELF::R_MIPS_LO16, Value.Addend); @@ -734,7 +929,7 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel, addRelocationForSection(RELo, Value.SectionID); } - resolveRelocation(Section, Rel.Offset, + resolveRelocation(Section, Offset, (uint64_t)Section.Address + Section.StubOffset, RelType, 0); Section.StubOffset += getMaxStubSize(); @@ -744,8 +939,8 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel, // A PPC branch relocation will need a stub function if the target is // an external symbol (Symbol::ST_Unknown) or if the target address // is not within the signed 24-bits branch address. - SectionEntry &Section = Sections[Rel.SectionID]; - uint8_t *Target = Section.Address + Rel.Offset; + SectionEntry &Section = Sections[SectionID]; + uint8_t *Target = Section.Address + Offset; bool RangeOverflow = false; if (SymType != SymbolRef::ST_Unknown) { // A function call may points to the .opd entry, so the final symbol value @@ -755,7 +950,7 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel, int32_t delta = static_cast<int32_t>(Target - RelocTarget); // If it is within 24-bits branch range, just set the branch target if (SignExtend32<24>(delta) == delta) { - RelocationEntry RE(Rel.SectionID, Rel.Offset, RelType, Value.Addend); + RelocationEntry RE(SectionID, Offset, RelType, Value.Addend); if (Value.SymbolName) addRelocationForSymbol(RE, Value.SymbolName); else @@ -770,7 +965,7 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel, StubMap::const_iterator i = Stubs.find(Value); if (i != Stubs.end()) { // Symbol function stub already created, just relocate to it - resolveRelocation(Section, Rel.Offset, + resolveRelocation(Section, Offset, (uint64_t)Section.Address + i->second, RelType, 0); DEBUG(dbgs() << " Stub function found\n"); } else { @@ -779,21 +974,21 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel, Stubs[Value] = Section.StubOffset; uint8_t *StubTargetAddr = createStubFunction(Section.Address + Section.StubOffset); - RelocationEntry RE(Rel.SectionID, StubTargetAddr - Section.Address, + RelocationEntry RE(SectionID, StubTargetAddr - Section.Address, ELF::R_PPC64_ADDR64, Value.Addend); // Generates the 64-bits address loads as exemplified in section // 4.5.1 in PPC64 ELF ABI. - RelocationEntry REhst(Rel.SectionID, + RelocationEntry REhst(SectionID, StubTargetAddr - Section.Address + 2, ELF::R_PPC64_ADDR16_HIGHEST, Value.Addend); - RelocationEntry REhr(Rel.SectionID, + RelocationEntry REhr(SectionID, StubTargetAddr - Section.Address + 6, ELF::R_PPC64_ADDR16_HIGHER, Value.Addend); - RelocationEntry REh(Rel.SectionID, + RelocationEntry REh(SectionID, StubTargetAddr - Section.Address + 14, ELF::R_PPC64_ADDR16_HI, Value.Addend); - RelocationEntry REl(Rel.SectionID, + RelocationEntry REl(SectionID, StubTargetAddr - Section.Address + 18, ELF::R_PPC64_ADDR16_LO, Value.Addend); @@ -809,7 +1004,7 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel, addRelocationForSection(REl, Value.SectionID); } - resolveRelocation(Section, Rel.Offset, + resolveRelocation(Section, Offset, (uint64_t)Section.Address + Section.StubOffset, RelType, 0); if (SymType == SymbolRef::ST_Unknown) @@ -819,7 +1014,7 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel, } } } else { - RelocationEntry RE(Rel.SectionID, Rel.Offset, RelType, Value.Addend); + RelocationEntry RE(SectionID, Offset, RelType, Value.Addend); // Extra check to avoid relocation againt empty symbols (usually // the R_PPC64_TOC). if (Value.SymbolName && !TargetName.empty()) @@ -827,8 +1022,55 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel, else addRelocationForSection(RE, Value.SectionID); } + } else if (Arch == Triple::systemz && + (RelType == ELF::R_390_PLT32DBL || + RelType == ELF::R_390_GOTENT)) { + // Create function stubs for both PLT and GOT references, regardless of + // whether the GOT reference is to data or code. The stub contains the + // full address of the symbol, as needed by GOT references, and the + // executable part only adds an overhead of 8 bytes. + // + // We could try to conserve space by allocating the code and data + // parts of the stub separately. However, as things stand, we allocate + // a stub for every relocation, so using a GOT in JIT code should be + // no less space efficient than using an explicit constant pool. + DEBUG(dbgs() << "\t\tThis is a SystemZ indirect relocation."); + SectionEntry &Section = Sections[SectionID]; + + // Look for an existing stub. + StubMap::const_iterator i = Stubs.find(Value); + uintptr_t StubAddress; + if (i != Stubs.end()) { + StubAddress = uintptr_t(Section.Address) + i->second; + DEBUG(dbgs() << " Stub function found\n"); + } else { + // Create a new stub function. + DEBUG(dbgs() << " Create a new stub function\n"); + + uintptr_t BaseAddress = uintptr_t(Section.Address); + uintptr_t StubAlignment = getStubAlignment(); + StubAddress = (BaseAddress + Section.StubOffset + + StubAlignment - 1) & -StubAlignment; + unsigned StubOffset = StubAddress - BaseAddress; + + Stubs[Value] = StubOffset; + createStubFunction((uint8_t *)StubAddress); + RelocationEntry RE(SectionID, StubOffset + 8, + ELF::R_390_64, Value.Addend - Addend); + if (Value.SymbolName) + addRelocationForSymbol(RE, Value.SymbolName); + else + addRelocationForSection(RE, Value.SectionID); + Section.StubOffset = StubOffset + getMaxStubSize(); + } + + if (RelType == ELF::R_390_GOTENT) + resolveRelocation(Section, Offset, StubAddress + 8, + ELF::R_390_PC32DBL, Addend); + else + resolveRelocation(Section, Offset, StubAddress, RelType, Addend); } else { - RelocationEntry RE(Rel.SectionID, Rel.Offset, RelType, Value.Addend); + RelocationEntry RE(SectionID, Offset, RelType, Value.Addend); if (Value.SymbolName) addRelocationForSymbol(RE, Value.SymbolName); else @@ -836,13 +1078,6 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel, } } -unsigned RuntimeDyldELF::getCommonSymbolAlignment(const SymbolRef &Sym) { - // In ELF, the value of an SHN_COMMON symbol is its alignment requirement. - uint64_t Align; - Check(Sym.getValue(Align)); - return Align; -} - bool RuntimeDyldELF::isCompatibleFormat(const ObjectBuffer *Buffer) const { if (Buffer->getBufferSize() < strlen(ELF::ElfMagic)) return false; diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h index 07e704b..794c7ec 100644 --- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h +++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h @@ -31,7 +31,12 @@ namespace { } // end anonymous namespace class RuntimeDyldELF : public RuntimeDyldImpl { -protected: + void resolveRelocation(const SectionEntry &Section, + uint64_t Offset, + uint64_t Value, + uint32_t Type, + int64_t Addend); + void resolveX86_64Relocation(const SectionEntry &Section, uint64_t Offset, uint64_t Value, @@ -44,6 +49,12 @@ protected: uint32_t Type, int32_t Addend); + void resolveAArch64Relocation(const SectionEntry &Section, + uint64_t Offset, + uint64_t Value, + uint32_t Type, + int64_t Addend); + void resolveARMRelocation(const SectionEntry &Section, uint64_t Offset, uint32_t Value, @@ -62,21 +73,11 @@ protected: uint32_t Type, int64_t Addend); - virtual void resolveRelocation(const SectionEntry &Section, - uint64_t Offset, - uint64_t Value, - uint32_t Type, - int64_t Addend); - - virtual void processRelocationRef(const ObjRelocationInfo &Rel, - ObjectImage &Obj, - ObjSectionToIDMap &ObjSectionToID, - const SymbolTableMap &Symbols, - StubMap &Stubs); - - unsigned getCommonSymbolAlignment(const SymbolRef &Sym); - - virtual ObjectImage *createObjectImage(ObjectBuffer *InputBuffer); + void resolveSystemZRelocation(const SectionEntry &Section, + uint64_t Offset, + uint64_t Value, + uint32_t Type, + int64_t Addend); uint64_t findPPC64TOC() const; void findOPDEntrySection(ObjectImage &Obj, @@ -84,12 +85,19 @@ protected: RelocationValueRef &Rel); public: - RuntimeDyldELF(RTDyldMemoryManager *mm) - : RuntimeDyldImpl(mm) {} + RuntimeDyldELF(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {} + virtual void resolveRelocation(const RelocationEntry &RE, uint64_t Value); + virtual void processRelocationRef(unsigned SectionID, + RelocationRef RelI, + ObjectImage &Obj, + ObjSectionToIDMap &ObjSectionToID, + const SymbolTableMap &Symbols, + StubMap &Stubs); + virtual bool isCompatibleFormat(const ObjectBuffer *Buffer) const; + virtual ObjectImage *createObjectImage(ObjectBuffer *InputBuffer); + virtual StringRef getEHFrameSection(); virtual ~RuntimeDyldELF(); - - bool isCompatibleFormat(const ObjectBuffer *Buffer) const; }; } // end namespace llvm diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h index f100994..383ffab 100644 --- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h +++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h @@ -49,7 +49,7 @@ public: /// Address - address in the linker's memory where the section resides. uint8_t *Address; - /// Size - section size. + /// Size - section size. Doesn't include the stubs. size_t Size; /// LoadAddress - the address of the section in the target process's memory. @@ -67,9 +67,9 @@ public: uintptr_t ObjAddress; SectionEntry(StringRef name, uint8_t *address, size_t size, - uintptr_t stubOffset, uintptr_t objAddress) + uintptr_t objAddress) : Name(name), Address(address), Size(size), LoadAddress((uintptr_t)address), - StubOffset(stubOffset), ObjAddress(objAddress) {} + StubOffset(size), ObjAddress(objAddress) {} }; /// RelocationEntry - used to represent relocations internally in the dynamic @@ -89,20 +89,20 @@ public: /// used to make a relocation section relative instead of symbol relative. intptr_t Addend; + /// True if this is a PCRel relocation (MachO specific). + bool IsPCRel; + + /// The size of this relocation (MachO specific). + unsigned Size; + RelocationEntry(unsigned id, uint64_t offset, uint32_t type, int64_t addend) - : SectionID(id), Offset(offset), RelType(type), Addend(addend) {} -}; + : SectionID(id), Offset(offset), RelType(type), Addend(addend), + IsPCRel(false), Size(0) {} -/// ObjRelocationInfo - relocation information as read from the object file. -/// Used to pass around data taken from object::RelocationRef, together with -/// the section to which the relocation points (represented by a SectionID). -class ObjRelocationInfo { -public: - unsigned SectionID; - uint64_t Offset; - SymbolRef Symbol; - uint64_t Type; - int64_t AdditionalInfo; + RelocationEntry(unsigned id, uint64_t offset, uint32_t type, int64_t addend, + bool IsPCRel, unsigned Size) + : SectionID(id), Offset(offset), RelType(type), Addend(addend), + IsPCRel(IsPCRel), Size(Size) {} }; class RelocationValueRef { @@ -166,16 +166,29 @@ protected: Triple::ArchType Arch; inline unsigned getMaxStubSize() { + if (Arch == Triple::aarch64) + return 20; // movz; movk; movk; movk; br if (Arch == Triple::arm || Arch == Triple::thumb) return 8; // 32-bit instruction and 32-bit address else if (Arch == Triple::mipsel || Arch == Triple::mips) return 16; else if (Arch == Triple::ppc64) return 44; + else if (Arch == Triple::x86_64) + return 8; // GOT + else if (Arch == Triple::systemz) + return 16; else return 0; } + inline unsigned getStubAlignment() { + if (Arch == Triple::systemz) + return 8; + else + return 1; + } + bool HasError; std::string ErrorStr; @@ -194,22 +207,15 @@ protected: return (uint8_t*)Sections[SectionID].Address; } - // Subclasses can override this method to get the alignment requirement of - // a common symbol. Returns no alignment requirement if not implemented. - virtual unsigned getCommonSymbolAlignment(const SymbolRef &Sym) { - return 0; - } - - void writeInt16BE(uint8_t *Addr, uint16_t Value) { - if (sys::isLittleEndianHost()) + if (sys::IsLittleEndianHost) Value = sys::SwapByteOrder(Value); *Addr = (Value >> 8) & 0xFF; *(Addr+1) = Value & 0xFF; } void writeInt32BE(uint8_t *Addr, uint32_t Value) { - if (sys::isLittleEndianHost()) + if (sys::IsLittleEndianHost) Value = sys::SwapByteOrder(Value); *Addr = (Value >> 24) & 0xFF; *(Addr+1) = (Value >> 16) & 0xFF; @@ -218,7 +224,7 @@ protected: } void writeInt64BE(uint8_t *Addr, uint64_t Value) { - if (sys::isLittleEndianHost()) + if (sys::IsLittleEndianHost) Value = sys::SwapByteOrder(Value); *Addr = (Value >> 56) & 0xFF; *(Addr+1) = (Value >> 48) & 0xFF; @@ -269,24 +275,16 @@ protected: /// \brief Resolves relocations from Relocs list with address from Value. void resolveRelocationList(const RelocationList &Relocs, uint64_t Value); - void resolveRelocationEntry(const RelocationEntry &RE, uint64_t Value); /// \brief A object file specific relocation resolver - /// \param Section The section where the relocation is being applied - /// \param Offset The offset into the section for this relocation + /// \param RE The relocation to be resolved /// \param Value Target symbol address to apply the relocation action - /// \param Type object file specific relocation type - /// \param Addend A constant addend used to compute the value to be stored - /// into the relocatable field - virtual void resolveRelocation(const SectionEntry &Section, - uint64_t Offset, - uint64_t Value, - uint32_t Type, - int64_t Addend) = 0; + virtual void resolveRelocation(const RelocationEntry &RE, uint64_t Value) = 0; /// \brief Parses the object file relocation and stores it to Relocations /// or SymbolRelocations (this depends on the object file type). - virtual void processRelocationRef(const ObjRelocationInfo &Rel, + virtual void processRelocationRef(unsigned SectionID, + RelocationRef RelI, ObjectImage &Obj, ObjSectionToIDMap &ObjSectionToID, const SymbolTableMap &Symbols, @@ -336,6 +334,8 @@ public: StringRef getErrorString() { return ErrorStr; } virtual bool isCompatibleFormat(const ObjectBuffer *Buffer) const = 0; + + virtual StringRef getEHFrameSection(); }; } // end namespace llvm diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp index bcc3df1..01a3fd9 100644 --- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp +++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp @@ -21,16 +21,87 @@ using namespace llvm::object; namespace llvm { +static unsigned char *processFDE(unsigned char *P, intptr_t DeltaForText, intptr_t DeltaForEH) { + uint32_t Length = *((uint32_t*)P); + P += 4; + unsigned char *Ret = P + Length; + uint32_t Offset = *((uint32_t*)P); + if (Offset == 0) // is a CIE + return Ret; + + P += 4; + intptr_t FDELocation = *((intptr_t*)P); + intptr_t NewLocation = FDELocation - DeltaForText; + *((intptr_t*)P) = NewLocation; + P += sizeof(intptr_t); + + // Skip the FDE address range + P += sizeof(intptr_t); + + uint8_t Augmentationsize = *P; + P += 1; + if (Augmentationsize != 0) { + intptr_t LSDA = *((intptr_t*)P); + intptr_t NewLSDA = LSDA - DeltaForEH; + *((intptr_t*)P) = NewLSDA; + } + + return Ret; +} + +static intptr_t computeDelta(SectionEntry *A, SectionEntry *B) { + intptr_t ObjDistance = A->ObjAddress - B->ObjAddress; + intptr_t MemDistance = A->LoadAddress - B->LoadAddress; + return ObjDistance - MemDistance; +} + +StringRef RuntimeDyldMachO::getEHFrameSection() { + SectionEntry *Text = NULL; + SectionEntry *EHFrame = NULL; + SectionEntry *ExceptTab = NULL; + for (int i = 0, e = Sections.size(); i != e; ++i) { + if (Sections[i].Name == "__eh_frame") + EHFrame = &Sections[i]; + else if (Sections[i].Name == "__text") + Text = &Sections[i]; + else if (Sections[i].Name == "__gcc_except_tab") + ExceptTab = &Sections[i]; + } + if (Text == NULL || EHFrame == NULL) + return StringRef(); + + intptr_t DeltaForText = computeDelta(Text, EHFrame); + intptr_t DeltaForEH = 0; + if (ExceptTab) + DeltaForEH = computeDelta(ExceptTab, EHFrame); + + unsigned char *P = EHFrame->Address; + unsigned char *End = P + EHFrame->Size; + do { + P = processFDE(P, DeltaForText, DeltaForEH); + } while(P != End); + + return StringRef((char*)EHFrame->Address, EHFrame->Size); +} + +void RuntimeDyldMachO::resolveRelocation(const RelocationEntry &RE, + uint64_t Value) { + const SectionEntry &Section = Sections[RE.SectionID]; + return resolveRelocation(Section, RE.Offset, Value, RE.RelType, RE.Addend, + RE.IsPCRel, RE.Size); +} + void RuntimeDyldMachO::resolveRelocation(const SectionEntry &Section, uint64_t Offset, uint64_t Value, uint32_t Type, - int64_t Addend) { + int64_t Addend, + bool isPCRel, + unsigned LogSize) { uint8_t *LocalAddress = Section.Address + Offset; uint64_t FinalAddress = Section.LoadAddress + Offset; - bool isPCRel = (Type >> 24) & 1; - unsigned MachoType = (Type >> 28) & 0xf; - unsigned Size = 1 << ((Type >> 25) & 3); + unsigned MachoType = Type; + unsigned Size = 1 << LogSize; DEBUG(dbgs() << "resolveRelocation LocalAddress: " << format("%p", LocalAddress) @@ -205,89 +276,111 @@ bool RuntimeDyldMachO::resolveARMRelocation(uint8_t *LocalAddress, return false; } -void RuntimeDyldMachO::processRelocationRef(const ObjRelocationInfo &Rel, +void RuntimeDyldMachO::processRelocationRef(unsigned SectionID, + RelocationRef RelI, ObjectImage &Obj, ObjSectionToIDMap &ObjSectionToID, const SymbolTableMap &Symbols, StubMap &Stubs) { + const ObjectFile *OF = Obj.getObjectFile(); + const MachOObjectFile *MachO = static_cast<const MachOObjectFile*>(OF); + macho::RelocationEntry RE = MachO->getRelocation(RelI.getRawDataRefImpl()); - uint32_t RelType = (uint32_t) (Rel.Type & 0xffffffffL); + uint32_t RelType = MachO->getAnyRelocationType(RE); RelocationValueRef Value; - SectionEntry &Section = Sections[Rel.SectionID]; + SectionEntry &Section = Sections[SectionID]; + + bool isExtern = MachO->getPlainRelocationExternal(RE); + bool IsPCRel = MachO->getAnyRelocationPCRel(RE); + unsigned Size = MachO->getAnyRelocationLength(RE); + uint64_t Offset; + RelI.getOffset(Offset); + uint8_t *LocalAddress = Section.Address + Offset; + unsigned NumBytes = 1 << Size; + uint64_t Addend = 0; + memcpy(&Addend, LocalAddress, NumBytes); - bool isExtern = (RelType >> 27) & 1; if (isExtern) { // Obtain the symbol name which is referenced in the relocation + SymbolRef Symbol; + RelI.getSymbol(Symbol); StringRef TargetName; - const SymbolRef &Symbol = Rel.Symbol; Symbol.getName(TargetName); // First search for the symbol in the local symbol table SymbolTableMap::const_iterator lsi = Symbols.find(TargetName.data()); if (lsi != Symbols.end()) { Value.SectionID = lsi->second.first; - Value.Addend = lsi->second.second; + Value.Addend = lsi->second.second + Addend; } else { // Search for the symbol in the global symbol table SymbolTableMap::const_iterator gsi = GlobalSymbolTable.find(TargetName.data()); if (gsi != GlobalSymbolTable.end()) { Value.SectionID = gsi->second.first; - Value.Addend = gsi->second.second; - } else + Value.Addend = gsi->second.second + Addend; + } else { Value.SymbolName = TargetName.data(); + Value.Addend = Addend; + } } } else { - error_code err; - uint8_t sectionIndex = static_cast<uint8_t>(RelType & 0xFF); - section_iterator si = Obj.begin_sections(), - se = Obj.end_sections(); - for (uint8_t i = 1; i < sectionIndex; i++) { - error_code err; - si.increment(err); - if (si == se) - break; - } - assert(si != se && "No section containing relocation!"); - Value.SectionID = findOrEmitSection(Obj, *si, true, ObjSectionToID); - Value.Addend = 0; - // FIXME: The size and type of the relocation determines if we can - // encode an Addend in the target location itself, and if so, how many - // bytes we should read in order to get it. We don't yet support doing - // that, and just assuming it's sizeof(intptr_t) is blatantly wrong. - //Value.Addend = *(const intptr_t *)Target; - if (Value.Addend) { - // The MachO addend is an offset from the current section. We need it - // to be an offset from the destination section - Value.Addend += Section.ObjAddress - Sections[Value.SectionID].ObjAddress; - } + SectionRef Sec = MachO->getRelocationSection(RE); + Value.SectionID = findOrEmitSection(Obj, Sec, true, ObjSectionToID); + uint64_t Addr; + Sec.getAddress(Addr); + Value.Addend = Addend - Addr; } - if (Arch == Triple::arm && (RelType & 0xf) == macho::RIT_ARM_Branch24Bit) { + if (Arch == Triple::x86_64 && RelType == macho::RIT_X86_64_GOT) { + assert(IsPCRel); + assert(Size == 2); + StubMap::const_iterator i = Stubs.find(Value); + uint8_t *Addr; + if (i != Stubs.end()) { + Addr = Section.Address + i->second; + } else { + Stubs[Value] = Section.StubOffset; + uint8_t *GOTEntry = Section.Address + Section.StubOffset; + RelocationEntry RE(SectionID, Section.StubOffset, + macho::RIT_X86_64_Unsigned, Value.Addend - 4, false, + 3); + if (Value.SymbolName) + addRelocationForSymbol(RE, Value.SymbolName); + else + addRelocationForSection(RE, Value.SectionID); + Section.StubOffset += 8; + Addr = GOTEntry; + } + resolveRelocation(Section, Offset, (uint64_t)Addr, + macho::RIT_X86_64_Unsigned, 4, true, 2); + } else if (Arch == Triple::arm && + (RelType & 0xf) == macho::RIT_ARM_Branch24Bit) { // This is an ARM branch relocation, need to use a stub function. // Look up for existing stub. StubMap::const_iterator i = Stubs.find(Value); if (i != Stubs.end()) - resolveRelocation(Section, Rel.Offset, + resolveRelocation(Section, Offset, (uint64_t)Section.Address + i->second, - RelType, 0); + RelType, 0, IsPCRel, Size); else { // Create a new stub function. Stubs[Value] = Section.StubOffset; uint8_t *StubTargetAddr = createStubFunction(Section.Address + Section.StubOffset); - RelocationEntry RE(Rel.SectionID, StubTargetAddr - Section.Address, + RelocationEntry RE(SectionID, StubTargetAddr - Section.Address, macho::RIT_Vanilla, Value.Addend); if (Value.SymbolName) addRelocationForSymbol(RE, Value.SymbolName); else addRelocationForSection(RE, Value.SectionID); - resolveRelocation(Section, Rel.Offset, + resolveRelocation(Section, Offset, (uint64_t)Section.Address + Section.StubOffset, - RelType, 0); + RelType, 0, IsPCRel, Size); Section.StubOffset += getMaxStubSize(); } } else { - RelocationEntry RE(Rel.SectionID, Rel.Offset, RelType, Value.Addend); + RelocationEntry RE(SectionID, Offset, RelType, Value.Addend, + IsPCRel, Size); if (Value.SymbolName) addRelocationForSymbol(RE, Value.SymbolName); else diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h index 62d8487..df8d3bb 100644 --- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h +++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h @@ -16,7 +16,7 @@ #include "RuntimeDyldImpl.h" #include "llvm/ADT/IndexedMap.h" -#include "llvm/Object/MachOObject.h" +#include "llvm/Object/MachO.h" #include "llvm/Support/Format.h" using namespace llvm; @@ -25,7 +25,6 @@ using namespace llvm::object; namespace llvm { class RuntimeDyldMachO : public RuntimeDyldImpl { -protected: bool resolveI386Relocation(uint8_t *LocalAddress, uint64_t FinalAddress, uint64_t Value, @@ -48,22 +47,25 @@ protected: unsigned Size, int64_t Addend); - virtual void processRelocationRef(const ObjRelocationInfo &Rel, + void resolveRelocation(const SectionEntry &Section, + uint64_t Offset, + uint64_t Value, + uint32_t Type, + int64_t Addend, + bool isPCRel, + unsigned Size); +public: + RuntimeDyldMachO(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {} + + virtual void resolveRelocation(const RelocationEntry &RE, uint64_t Value); + virtual void processRelocationRef(unsigned SectionID, + RelocationRef RelI, ObjectImage &Obj, ObjSectionToIDMap &ObjSectionToID, const SymbolTableMap &Symbols, StubMap &Stubs); - -public: - virtual void resolveRelocation(const SectionEntry &Section, - uint64_t Offset, - uint64_t Value, - uint32_t Type, - int64_t Addend); - - RuntimeDyldMachO(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {} - - bool isCompatibleFormat(const ObjectBuffer *Buffer) const; + virtual bool isCompatibleFormat(const ObjectBuffer *Buffer) const; + virtual StringRef getEHFrameSection(); }; } // end namespace llvm diff --git a/contrib/llvm/lib/IR/AsmWriter.cpp b/contrib/llvm/lib/IR/AsmWriter.cpp index fb591a8..7761127d 100644 --- a/contrib/llvm/lib/IR/AsmWriter.cpp +++ b/contrib/llvm/lib/IR/AsmWriter.cpp @@ -1605,6 +1605,29 @@ void AssemblyWriter::printFunction(const Function *F) { if (F->isMaterializable()) Out << "; Materializable\n"; + const AttributeSet &Attrs = F->getAttributes(); + if (Attrs.hasAttributes(AttributeSet::FunctionIndex)) { + AttributeSet AS = Attrs.getFnAttributes(); + std::string AttrStr; + + unsigned Idx = 0; + for (unsigned E = AS.getNumSlots(); Idx != E; ++Idx) + if (AS.getSlotIndex(Idx) == AttributeSet::FunctionIndex) + break; + + for (AttributeSet::iterator I = AS.begin(Idx), E = AS.end(Idx); + I != E; ++I) { + Attribute Attr = *I; + if (!Attr.isStringAttribute()) { + if (!AttrStr.empty()) AttrStr += ' '; + AttrStr += Attr.getAsString(); + } + } + + if (!AttrStr.empty()) + Out << "; Function Attrs: " << AttrStr << '\n'; + } + if (F->isDeclaration()) Out << "declare "; else @@ -1620,7 +1643,6 @@ void AssemblyWriter::printFunction(const Function *F) { } FunctionType *FT = F->getFunctionType(); - const AttributeSet &Attrs = F->getAttributes(); if (Attrs.hasAttributes(AttributeSet::ReturnIndex)) Out << Attrs.getAsString(AttributeSet::ReturnIndex) << ' '; TypePrinter.print(F->getReturnType(), Out); @@ -1761,10 +1783,8 @@ void AssemblyWriter::printBasicBlock(const BasicBlock *BB) { /// which slot it occupies. /// void AssemblyWriter::printInfoComment(const Value &V) { - if (AnnotationWriter) { + if (AnnotationWriter) AnnotationWriter->printInfoComment(V, Out); - return; - } } // This member is called for each Instruction in a function.. diff --git a/contrib/llvm/lib/IR/AttributeImpl.h b/contrib/llvm/lib/IR/AttributeImpl.h index ad2670d..0b6228b 100644 --- a/contrib/llvm/lib/IR/AttributeImpl.h +++ b/contrib/llvm/lib/IR/AttributeImpl.h @@ -228,7 +228,7 @@ public: /// is the index of the return, parameter, or function object that the /// attributes are applied to, not the index into the AttrNodes list where the /// attributes reside. - uint64_t getSlotIndex(unsigned Slot) const { + unsigned getSlotIndex(unsigned Slot) const { return AttrNodes[Slot].first; } @@ -248,15 +248,15 @@ public: typedef AttributeSetNode::iterator iterator; typedef AttributeSetNode::const_iterator const_iterator; - iterator begin(unsigned Idx) - { return AttrNodes[Idx].second->begin(); } - iterator end(unsigned Idx) - { return AttrNodes[Idx].second->end(); } + iterator begin(unsigned Slot) + { return AttrNodes[Slot].second->begin(); } + iterator end(unsigned Slot) + { return AttrNodes[Slot].second->end(); } - const_iterator begin(unsigned Idx) const - { return AttrNodes[Idx].second->begin(); } - const_iterator end(unsigned Idx) const - { return AttrNodes[Idx].second->end(); } + const_iterator begin(unsigned Slot) const + { return AttrNodes[Slot].second->begin(); } + const_iterator end(unsigned Slot) const + { return AttrNodes[Slot].second->end(); } void Profile(FoldingSetNodeID &ID) const { Profile(ID, AttrNodes); @@ -270,7 +270,7 @@ public: } // FIXME: This atrocity is temporary. - uint64_t Raw(uint64_t Index) const; + uint64_t Raw(unsigned Index) const; }; } // end llvm namespace diff --git a/contrib/llvm/lib/IR/Attributes.cpp b/contrib/llvm/lib/IR/Attributes.cpp index 2d82891..4fe6f9d 100644 --- a/contrib/llvm/lib/IR/Attributes.cpp +++ b/contrib/llvm/lib/IR/Attributes.cpp @@ -195,6 +195,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const { return "readnone"; if (hasAttribute(Attribute::ReadOnly)) return "readonly"; + if (hasAttribute(Attribute::Returned)) + return "returned"; if (hasAttribute(Attribute::ReturnsTwice)) return "returns_twice"; if (hasAttribute(Attribute::SExt)) @@ -393,6 +395,7 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) { case Attribute::SanitizeThread: return 1ULL << 36; case Attribute::SanitizeMemory: return 1ULL << 37; case Attribute::NoBuiltin: return 1ULL << 38; + case Attribute::Returned: return 1ULL << 39; } llvm_unreachable("Unsupported attribute type"); } @@ -481,11 +484,12 @@ unsigned AttributeSetNode::getStackAlignment() const { } std::string AttributeSetNode::getAsString(bool InAttrGrp) const { - std::string Str = ""; + std::string Str; for (SmallVectorImpl<Attribute>::const_iterator I = AttrList.begin(), - E = AttrList.end(); I != E; ) { + E = AttrList.end(); I != E; ++I) { + if (I != AttrList.begin()) + Str += ' '; Str += I->getAsString(InAttrGrp); - if (++I != E) Str += " "; } return Str; } @@ -494,7 +498,7 @@ std::string AttributeSetNode::getAsString(bool InAttrGrp) const { // AttributeSetImpl Definition //===----------------------------------------------------------------------===// -uint64_t AttributeSetImpl::Raw(uint64_t Index) const { +uint64_t AttributeSetImpl::Raw(unsigned Index) const { for (unsigned I = 0, E = getNumAttributes(); I != E; ++I) { if (getSlotIndex(I) != Index) continue; const AttributeSetNode *ASN = AttrNodes[I].second; @@ -592,7 +596,7 @@ AttributeSet AttributeSet::get(LLVMContext &C, return getImpl(C, Attrs); } -AttributeSet AttributeSet::get(LLVMContext &C, unsigned Idx, AttrBuilder &B) { +AttributeSet AttributeSet::get(LLVMContext &C, unsigned Index, AttrBuilder &B) { if (!B.hasAttributes()) return AttributeSet(); @@ -604,29 +608,29 @@ AttributeSet AttributeSet::get(LLVMContext &C, unsigned Idx, AttrBuilder &B) { continue; if (Kind == Attribute::Alignment) - Attrs.push_back(std::make_pair(Idx, Attribute:: + Attrs.push_back(std::make_pair(Index, Attribute:: getWithAlignment(C, B.getAlignment()))); else if (Kind == Attribute::StackAlignment) - Attrs.push_back(std::make_pair(Idx, Attribute:: + Attrs.push_back(std::make_pair(Index, Attribute:: getWithStackAlignment(C, B.getStackAlignment()))); else - Attrs.push_back(std::make_pair(Idx, Attribute::get(C, Kind))); + Attrs.push_back(std::make_pair(Index, Attribute::get(C, Kind))); } // Add target-dependent (string) attributes. for (AttrBuilder::td_iterator I = B.td_begin(), E = B.td_end(); I != E; ++I) - Attrs.push_back(std::make_pair(Idx, Attribute::get(C, I->first,I->second))); + Attrs.push_back(std::make_pair(Index, Attribute::get(C, I->first,I->second))); return get(C, Attrs); } -AttributeSet AttributeSet::get(LLVMContext &C, unsigned Idx, +AttributeSet AttributeSet::get(LLVMContext &C, unsigned Index, ArrayRef<Attribute::AttrKind> Kind) { SmallVector<std::pair<unsigned, Attribute>, 8> Attrs; for (ArrayRef<Attribute::AttrKind>::iterator I = Kind.begin(), E = Kind.end(); I != E; ++I) - Attrs.push_back(std::make_pair(Idx, Attribute::get(C, *I))); + Attrs.push_back(std::make_pair(Index, Attribute::get(C, *I))); return get(C, Attrs); } @@ -643,20 +647,20 @@ AttributeSet AttributeSet::get(LLVMContext &C, ArrayRef<AttributeSet> Attrs) { return getImpl(C, AttrNodeVec); } -AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Idx, +AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Index, Attribute::AttrKind Attr) const { - if (hasAttribute(Idx, Attr)) return *this; - return addAttributes(C, Idx, AttributeSet::get(C, Idx, Attr)); + if (hasAttribute(Index, Attr)) return *this; + return addAttributes(C, Index, AttributeSet::get(C, Index, Attr)); } -AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Idx, +AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Index, StringRef Kind) const { llvm::AttrBuilder B; B.addAttribute(Kind); - return addAttributes(C, Idx, AttributeSet::get(C, Idx, B)); + return addAttributes(C, Index, AttributeSet::get(C, Index, B)); } -AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx, +AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Index, AttributeSet Attrs) const { if (!pImpl) return Attrs; if (!Attrs.pImpl) return *this; @@ -664,8 +668,8 @@ AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx, #ifndef NDEBUG // FIXME it is not obvious how this should work for alignment. For now, say // we can't change a known alignment. - unsigned OldAlign = getParamAlignment(Idx); - unsigned NewAlign = Attrs.getParamAlignment(Idx); + unsigned OldAlign = getParamAlignment(Index); + unsigned NewAlign = Attrs.getParamAlignment(Index); assert((!OldAlign || !NewAlign || OldAlign == NewAlign) && "Attempt to change alignment!"); #endif @@ -676,8 +680,8 @@ AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx, AttributeSet AS; uint64_t LastIndex = 0; for (unsigned I = 0, E = NumAttrs; I != E; ++I) { - if (getSlotIndex(I) >= Idx) { - if (getSlotIndex(I) == Idx) AS = getSlotAttributes(LastIndex++); + if (getSlotIndex(I) >= Index) { + if (getSlotIndex(I) == Index) AS = getSlotAttributes(LastIndex++); break; } LastIndex = I + 1; @@ -686,17 +690,17 @@ AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx, // Now add the attribute into the correct slot. There may already be an // AttributeSet there. - AttrBuilder B(AS, Idx); + AttrBuilder B(AS, Index); for (unsigned I = 0, E = Attrs.pImpl->getNumAttributes(); I != E; ++I) - if (Attrs.getSlotIndex(I) == Idx) { + if (Attrs.getSlotIndex(I) == Index) { for (AttributeSetImpl::const_iterator II = Attrs.pImpl->begin(I), IE = Attrs.pImpl->end(I); II != IE; ++II) B.addAttribute(*II); break; } - AttrSet.push_back(AttributeSet::get(C, Idx, B)); + AttrSet.push_back(AttributeSet::get(C, Index, B)); // Add the remaining attribute slots. for (unsigned I = LastIndex, E = NumAttrs; I < E; ++I) @@ -705,13 +709,13 @@ AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx, return get(C, AttrSet); } -AttributeSet AttributeSet::removeAttribute(LLVMContext &C, unsigned Idx, +AttributeSet AttributeSet::removeAttribute(LLVMContext &C, unsigned Index, Attribute::AttrKind Attr) const { - if (!hasAttribute(Idx, Attr)) return *this; - return removeAttributes(C, Idx, AttributeSet::get(C, Idx, Attr)); + if (!hasAttribute(Index, Attr)) return *this; + return removeAttributes(C, Index, AttributeSet::get(C, Index, Attr)); } -AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Idx, +AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Index, AttributeSet Attrs) const { if (!pImpl) return AttributeSet(); if (!Attrs.pImpl) return *this; @@ -719,7 +723,7 @@ AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Idx, #ifndef NDEBUG // FIXME it is not obvious how this should work for alignment. // For now, say we can't pass in alignment, which no current use does. - assert(!Attrs.hasAttribute(Idx, Attribute::Alignment) && + assert(!Attrs.hasAttribute(Index, Attribute::Alignment) && "Attempt to change alignment!"); #endif @@ -729,8 +733,8 @@ AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Idx, AttributeSet AS; uint64_t LastIndex = 0; for (unsigned I = 0, E = NumAttrs; I != E; ++I) { - if (getSlotIndex(I) >= Idx) { - if (getSlotIndex(I) == Idx) AS = getSlotAttributes(LastIndex++); + if (getSlotIndex(I) >= Index) { + if (getSlotIndex(I) == Index) AS = getSlotAttributes(LastIndex++); break; } LastIndex = I + 1; @@ -739,15 +743,15 @@ AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Idx, // Now remove the attribute from the correct slot. There may already be an // AttributeSet there. - AttrBuilder B(AS, Idx); + AttrBuilder B(AS, Index); for (unsigned I = 0, E = Attrs.pImpl->getNumAttributes(); I != E; ++I) - if (Attrs.getSlotIndex(I) == Idx) { - B.removeAttributes(Attrs.pImpl->getSlotAttributes(I), Idx); + if (Attrs.getSlotIndex(I) == Index) { + B.removeAttributes(Attrs.pImpl->getSlotAttributes(I), Index); break; } - AttrSet.push_back(AttributeSet::get(C, Idx, B)); + AttrSet.push_back(AttributeSet::get(C, Index, B)); // Add the remaining attribute slots. for (unsigned I = LastIndex, E = NumAttrs; I < E; ++I) @@ -764,11 +768,11 @@ LLVMContext &AttributeSet::getContext() const { return pImpl->getContext(); } -AttributeSet AttributeSet::getParamAttributes(unsigned Idx) const { - return pImpl && hasAttributes(Idx) ? +AttributeSet AttributeSet::getParamAttributes(unsigned Index) const { + return pImpl && hasAttributes(Index) ? AttributeSet::get(pImpl->getContext(), ArrayRef<std::pair<unsigned, AttributeSetNode*> >( - std::make_pair(Idx, getAttributes(Idx)))) : + std::make_pair(Index, getAttributes(Index)))) : AttributeSet(); } @@ -848,27 +852,27 @@ std::string AttributeSet::getAsString(unsigned Index, } /// \brief The attributes for the specified index are returned. -AttributeSetNode *AttributeSet::getAttributes(unsigned Idx) const { +AttributeSetNode *AttributeSet::getAttributes(unsigned Index) const { if (!pImpl) return 0; // Loop through to find the attribute node we want. for (unsigned I = 0, E = pImpl->getNumAttributes(); I != E; ++I) - if (pImpl->getSlotIndex(I) == Idx) + if (pImpl->getSlotIndex(I) == Index) return pImpl->getSlotNode(I); return 0; } -AttributeSet::iterator AttributeSet::begin(unsigned Idx) const { +AttributeSet::iterator AttributeSet::begin(unsigned Slot) const { if (!pImpl) return ArrayRef<Attribute>().begin(); - return pImpl->begin(Idx); + return pImpl->begin(Slot); } -AttributeSet::iterator AttributeSet::end(unsigned Idx) const { +AttributeSet::iterator AttributeSet::end(unsigned Slot) const { if (!pImpl) return ArrayRef<Attribute>().end(); - return pImpl->end(Idx); + return pImpl->end(Slot); } //===----------------------------------------------------------------------===// @@ -882,7 +886,7 @@ unsigned AttributeSet::getNumSlots() const { return pImpl ? pImpl->getNumAttributes() : 0; } -uint64_t AttributeSet::getSlotIndex(unsigned Slot) const { +unsigned AttributeSet::getSlotIndex(unsigned Slot) const { assert(pImpl && Slot < pImpl->getNumAttributes() && "Slot # out of range!"); return pImpl->getSlotIndex(Slot); @@ -919,13 +923,13 @@ void AttributeSet::dump() const { // AttrBuilder Method Implementations //===----------------------------------------------------------------------===// -AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Idx) +AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Index) : Attrs(0), Alignment(0), StackAlignment(0) { AttributeSetImpl *pImpl = AS.pImpl; if (!pImpl) return; for (unsigned I = 0, E = pImpl->getNumAttributes(); I != E; ++I) { - if (pImpl->getSlotIndex(I) != Idx) continue; + if (pImpl->getSlotIndex(I) != Index) continue; for (AttributeSetImpl::const_iterator II = pImpl->begin(I), IE = pImpl->end(I); II != IE; ++II) @@ -982,16 +986,16 @@ AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) { } AttrBuilder &AttrBuilder::removeAttributes(AttributeSet A, uint64_t Index) { - unsigned Idx = ~0U; + unsigned Slot = ~0U; for (unsigned I = 0, E = A.getNumSlots(); I != E; ++I) if (A.getSlotIndex(I) == Index) { - Idx = I; + Slot = I; break; } - assert(Idx != ~0U && "Couldn't find index in AttributeSet!"); + assert(Slot != ~0U && "Couldn't find index in AttributeSet!"); - for (AttributeSet::iterator I = A.begin(Idx), E = A.end(Idx); I != E; ++I) { + for (AttributeSet::iterator I = A.begin(Slot), E = A.end(Slot); I != E; ++I) { Attribute Attr = *I; if (Attr.isEnumAttribute() || Attr.isAlignAttribute()) { Attribute::AttrKind Kind = I->getKindAsEnum(); @@ -1069,16 +1073,16 @@ bool AttrBuilder::hasAttributes() const { } bool AttrBuilder::hasAttributes(AttributeSet A, uint64_t Index) const { - unsigned Idx = ~0U; + unsigned Slot = ~0U; for (unsigned I = 0, E = A.getNumSlots(); I != E; ++I) if (A.getSlotIndex(I) == Index) { - Idx = I; + Slot = I; break; } - assert(Idx != ~0U && "Couldn't find the index!"); + assert(Slot != ~0U && "Couldn't find the index!"); - for (AttributeSet::iterator I = A.begin(Idx), E = A.end(Idx); + for (AttributeSet::iterator I = A.begin(Slot), E = A.end(Slot); I != E; ++I) { Attribute Attr = *I; if (Attr.isEnumAttribute() || Attr.isAlignAttribute()) { @@ -1109,33 +1113,6 @@ bool AttrBuilder::operator==(const AttrBuilder &B) { return Alignment == B.Alignment && StackAlignment == B.StackAlignment; } -void AttrBuilder::removeFunctionOnlyAttrs() { - removeAttribute(Attribute::NoReturn) - .removeAttribute(Attribute::NoUnwind) - .removeAttribute(Attribute::ReadNone) - .removeAttribute(Attribute::ReadOnly) - .removeAttribute(Attribute::NoInline) - .removeAttribute(Attribute::AlwaysInline) - .removeAttribute(Attribute::OptimizeForSize) - .removeAttribute(Attribute::StackProtect) - .removeAttribute(Attribute::StackProtectReq) - .removeAttribute(Attribute::StackProtectStrong) - .removeAttribute(Attribute::NoRedZone) - .removeAttribute(Attribute::NoImplicitFloat) - .removeAttribute(Attribute::Naked) - .removeAttribute(Attribute::InlineHint) - .removeAttribute(Attribute::StackAlignment) - .removeAttribute(Attribute::UWTable) - .removeAttribute(Attribute::NonLazyBind) - .removeAttribute(Attribute::ReturnsTwice) - .removeAttribute(Attribute::SanitizeAddress) - .removeAttribute(Attribute::SanitizeThread) - .removeAttribute(Attribute::SanitizeMemory) - .removeAttribute(Attribute::MinSize) - .removeAttribute(Attribute::NoDuplicate) - .removeAttribute(Attribute::NoBuiltin); -} - AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) { // FIXME: Remove this in 4.0. if (!Val) return *this; diff --git a/contrib/llvm/lib/IR/Constants.cpp b/contrib/llvm/lib/IR/Constants.cpp index 1abb656..2c6971c 100644 --- a/contrib/llvm/lib/IR/Constants.cpp +++ b/contrib/llvm/lib/IR/Constants.cpp @@ -237,18 +237,21 @@ void Constant::destroyConstantImpl() { delete this; } -/// canTrap - Return true if evaluation of this constant could trap. This is -/// true for things like constant expressions that could divide by zero. -bool Constant::canTrap() const { - assert(getType()->isFirstClassType() && "Cannot evaluate aggregate vals!"); +static bool canTrapImpl(const Constant *C, + SmallPtrSet<const ConstantExpr *, 4> &NonTrappingOps) { + assert(C->getType()->isFirstClassType() && "Cannot evaluate aggregate vals!"); // The only thing that could possibly trap are constant exprs. - const ConstantExpr *CE = dyn_cast<ConstantExpr>(this); - if (!CE) return false; + const ConstantExpr *CE = dyn_cast<ConstantExpr>(C); + if (!CE) + return false; // ConstantExpr traps if any operands can trap. - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) - if (CE->getOperand(i)->canTrap()) - return true; + for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) { + if (ConstantExpr *Op = dyn_cast<ConstantExpr>(CE->getOperand(i))) { + if (NonTrappingOps.insert(Op) && canTrapImpl(Op, NonTrappingOps)) + return true; + } + } // Otherwise, only specific operations can trap. switch (CE->getOpcode()) { @@ -267,6 +270,13 @@ bool Constant::canTrap() const { } } +/// canTrap - Return true if evaluation of this constant could trap. This is +/// true for things like constant expressions that could divide by zero. +bool Constant::canTrap() const { + SmallPtrSet<const ConstantExpr *, 4> NonTrappingOps; + return canTrapImpl(this, NonTrappingOps); +} + /// isThreadDependent - Return true if the value can vary between threads. bool Constant::isThreadDependent() const { SmallPtrSet<const Constant*, 64> Visited; diff --git a/contrib/llvm/lib/IR/ConstantsContext.h b/contrib/llvm/lib/IR/ConstantsContext.h index e995858..32bed95 100644 --- a/contrib/llvm/lib/IR/ConstantsContext.h +++ b/contrib/llvm/lib/IR/ConstantsContext.h @@ -318,7 +318,7 @@ struct ExprMapKeyType { ArrayRef<Constant*> ops, unsigned short flags = 0, unsigned short optionalflags = 0, - ArrayRef<unsigned> inds = ArrayRef<unsigned>()) + ArrayRef<unsigned> inds = None) : opcode(opc), subclassoptionaldata(optionalflags), subclassdata(flags), operands(ops.begin(), ops.end()), indices(inds.begin(), inds.end()) {} uint8_t opcode; diff --git a/contrib/llvm/lib/IR/Core.cpp b/contrib/llvm/lib/IR/Core.cpp index 983b49c..889d574 100644 --- a/contrib/llvm/lib/IR/Core.cpp +++ b/contrib/llvm/lib/IR/Core.cpp @@ -21,7 +21,9 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/PassManager.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" @@ -1301,6 +1303,53 @@ void LLVMSetGlobalConstant(LLVMValueRef GlobalVar, LLVMBool IsConstant) { unwrap<GlobalVariable>(GlobalVar)->setConstant(IsConstant != 0); } +LLVMThreadLocalMode LLVMGetThreadLocalMode(LLVMValueRef GlobalVar) { + switch (unwrap<GlobalVariable>(GlobalVar)->getThreadLocalMode()) { + case GlobalVariable::NotThreadLocal: + return LLVMNotThreadLocal; + case GlobalVariable::GeneralDynamicTLSModel: + return LLVMGeneralDynamicTLSModel; + case GlobalVariable::LocalDynamicTLSModel: + return LLVMLocalDynamicTLSModel; + case GlobalVariable::InitialExecTLSModel: + return LLVMInitialExecTLSModel; + case GlobalVariable::LocalExecTLSModel: + return LLVMLocalExecTLSModel; + } + + llvm_unreachable("Invalid GlobalVariable thread local mode"); +} + +void LLVMSetThreadLocalMode(LLVMValueRef GlobalVar, LLVMThreadLocalMode Mode) { + GlobalVariable *GV = unwrap<GlobalVariable>(GlobalVar); + + switch (Mode) { + case LLVMNotThreadLocal: + GV->setThreadLocalMode(GlobalVariable::NotThreadLocal); + break; + case LLVMGeneralDynamicTLSModel: + GV->setThreadLocalMode(GlobalVariable::GeneralDynamicTLSModel); + break; + case LLVMLocalDynamicTLSModel: + GV->setThreadLocalMode(GlobalVariable::LocalDynamicTLSModel); + break; + case LLVMInitialExecTLSModel: + GV->setThreadLocalMode(GlobalVariable::InitialExecTLSModel); + break; + case LLVMLocalExecTLSModel: + GV->setThreadLocalMode(GlobalVariable::LocalExecTLSModel); + break; + } +} + +LLVMBool LLVMIsExternallyInitialized(LLVMValueRef GlobalVar) { + return unwrap<GlobalVariable>(GlobalVar)->isExternallyInitialized(); +} + +void LLVMSetExternallyInitialized(LLVMValueRef GlobalVar, LLVMBool IsExtInit) { + unwrap<GlobalVariable>(GlobalVar)->setExternallyInitialized(IsExtInit); +} + /*--.. Operations on aliases ......................................--*/ LLVMValueRef LLVMAddAlias(LLVMModuleRef M, LLVMTypeRef Ty, LLVMValueRef Aliasee, @@ -1396,6 +1445,18 @@ void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) { Func->setAttributes(PALnew); } +void LLVMAddTargetDependentFunctionAttr(LLVMValueRef Fn, const char *A, + const char *V) { + Function *Func = unwrap<Function>(Fn); + AttributeSet::AttrIndex Idx = + AttributeSet::AttrIndex(AttributeSet::FunctionIndex); + AttrBuilder B; + + B.addAttribute(A, V); + AttributeSet Set = AttributeSet::get(Func->getContext(), Idx, B); + Func->addAttributes(Idx, Set); +} + void LLVMRemoveFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) { Function *Func = unwrap<Function>(Fn); const AttributeSet PAL = Func->getAttributes(); @@ -2331,6 +2392,42 @@ LLVMValueRef LLVMBuildPtrDiff(LLVMBuilderRef B, LLVMValueRef LHS, return wrap(unwrap(B)->CreatePtrDiff(unwrap(LHS), unwrap(RHS), Name)); } +LLVMValueRef LLVMBuildAtomicRMW(LLVMBuilderRef B,LLVMAtomicRMWBinOp op, + LLVMValueRef PTR, LLVMValueRef Val, + LLVMAtomicOrdering ordering, + LLVMBool singleThread) { + AtomicRMWInst::BinOp intop; + switch (op) { + case LLVMAtomicRMWBinOpXchg: intop = AtomicRMWInst::Xchg; break; + case LLVMAtomicRMWBinOpAdd: intop = AtomicRMWInst::Add; break; + case LLVMAtomicRMWBinOpSub: intop = AtomicRMWInst::Sub; break; + case LLVMAtomicRMWBinOpAnd: intop = AtomicRMWInst::And; break; + case LLVMAtomicRMWBinOpNand: intop = AtomicRMWInst::Nand; break; + case LLVMAtomicRMWBinOpOr: intop = AtomicRMWInst::Or; break; + case LLVMAtomicRMWBinOpXor: intop = AtomicRMWInst::Xor; break; + case LLVMAtomicRMWBinOpMax: intop = AtomicRMWInst::Max; break; + case LLVMAtomicRMWBinOpMin: intop = AtomicRMWInst::Min; break; + case LLVMAtomicRMWBinOpUMax: intop = AtomicRMWInst::UMax; break; + case LLVMAtomicRMWBinOpUMin: intop = AtomicRMWInst::UMin; break; + } + AtomicOrdering intordering; + switch (ordering) { + case LLVMAtomicOrderingNotAtomic: intordering = NotAtomic; break; + case LLVMAtomicOrderingUnordered: intordering = Unordered; break; + case LLVMAtomicOrderingMonotonic: intordering = Monotonic; break; + case LLVMAtomicOrderingAcquire: intordering = Acquire; break; + case LLVMAtomicOrderingRelease: intordering = Release; break; + case LLVMAtomicOrderingAcquireRelease: + intordering = AcquireRelease; + break; + case LLVMAtomicOrderingSequentiallyConsistent: + intordering = SequentiallyConsistent; + break; + } + return wrap(unwrap(B)->CreateAtomicRMW(intop, unwrap(PTR), unwrap(Val), + intordering, singleThread ? SingleThread : CrossThread)); +} + /*===-- Module providers --------------------------------------------------===*/ @@ -2397,6 +2494,13 @@ LLVMMemoryBufferRef LLVMCreateMemoryBufferWithMemoryRangeCopy( StringRef(BufferName))); } +const char *LLVMGetBufferStart(LLVMMemoryBufferRef MemBuf) { + return unwrap(MemBuf)->getBufferStart(); +} + +size_t LLVMGetBufferSize(LLVMMemoryBufferRef MemBuf) { + return unwrap(MemBuf)->getBufferSize(); +} void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf) { delete unwrap(MemBuf); diff --git a/contrib/llvm/lib/IR/DIBuilder.cpp b/contrib/llvm/lib/IR/DIBuilder.cpp index 9d6e840..0980e80 100644 --- a/contrib/llvm/lib/IR/DIBuilder.cpp +++ b/contrib/llvm/lib/IR/DIBuilder.cpp @@ -61,6 +61,9 @@ void DIBuilder::finalize() { DIArray GVs = getOrCreateArray(AllGVs); DIType(TempGVs).replaceAllUsesWith(GVs); + + DIArray IMs = getOrCreateArray(AllImportedModules); + DIType(TempImportedModules).replaceAllUsesWith(IMs); } /// getNonCompileUnitScope - If N is compile unit return NULL otherwise return @@ -101,6 +104,8 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, TempGVs = MDNode::getTemporary(VMContext, TElts); + TempImportedModules = MDNode::getTemporary(VMContext, TElts); + Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_compile_unit), createFilePathPair(VMContext, Filename, Directory), @@ -113,6 +118,7 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, TempRetainTypes, TempSubprograms, TempGVs, + TempImportedModules, MDString::get(VMContext, SplitName) }; TheCU = DICompileUnit(MDNode::get(VMContext, Elts)); @@ -122,6 +128,21 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, NMD->addOperand(TheCU); } +DIImportedModule DIBuilder::createImportedModule(DIScope Context, + DINameSpace NS, + unsigned Line) { + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_imported_module), + Context, + NS, + ConstantInt::get(Type::getInt32Ty(VMContext), Line), + }; + DIImportedModule M(MDNode::get(VMContext, Elts)); + assert(M.Verify() && "Imported module should be valid"); + AllImportedModules.push_back(M); + return M; +} + /// createFile - Create a file descriptor to hold debugging information /// for a file. DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) { @@ -225,7 +246,8 @@ DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits, return DIDerivedType(MDNode::get(VMContext, Elts)); } -DIDerivedType DIBuilder::createMemberPointerType(DIType PointeeTy, DIType Base) { +DIDerivedType DIBuilder::createMemberPointerType(DIType PointeeTy, + DIType Base) { // Pointer types are encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_ptr_to_member_type), @@ -427,7 +449,7 @@ DIType DIBuilder::createObjCIVar(StringRef Name, DIObjCProperty DIBuilder::createObjCProperty(StringRef Name, DIFile File, unsigned LineNumber, StringRef GetterName, - StringRef SetterName, + StringRef SetterName, unsigned PropertyAttributes, DIType Ty) { Value *Elts[] = { @@ -601,7 +623,7 @@ DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) { DICompositeType DIBuilder::createEnumerationType( DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, DIArray Elements, - DIType ClassType) { + DIType UnderlyingType) { // TAG_enumeration_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_enumeration_type), @@ -613,7 +635,7 @@ DICompositeType DIBuilder::createEnumerationType( ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), ConstantInt::get(Type::getInt32Ty(VMContext), 0), ConstantInt::get(Type::getInt32Ty(VMContext), 0), - ClassType, + UnderlyingType, Elements, ConstantInt::get(Type::getInt32Ty(VMContext), 0), Constant::getNullValue(Type::getInt32Ty(VMContext)) diff --git a/contrib/llvm/lib/IR/DataLayout.cpp b/contrib/llvm/lib/IR/DataLayout.cpp index ecd5216..5658f56 100644 --- a/contrib/llvm/lib/IR/DataLayout.cpp +++ b/contrib/llvm/lib/IR/DataLayout.cpp @@ -41,7 +41,7 @@ char DataLayout::ID = 0; // Support for StructLayout //===----------------------------------------------------------------------===// -StructLayout::StructLayout(StructType *ST, const DataLayout &TD) { +StructLayout::StructLayout(StructType *ST, const DataLayout &DL) { assert(!ST->isOpaque() && "Cannot get layout of opaque structs"); StructAlignment = 0; StructSize = 0; @@ -50,7 +50,7 @@ StructLayout::StructLayout(StructType *ST, const DataLayout &TD) { // Loop over each of the elements, placing them in memory. for (unsigned i = 0, e = NumElements; i != e; ++i) { Type *Ty = ST->getElementType(i); - unsigned TyAlign = ST->isPacked() ? 1 : TD.getABITypeAlignment(Ty); + unsigned TyAlign = ST->isPacked() ? 1 : DL.getABITypeAlignment(Ty); // Add padding if necessary to align the data element properly. if ((StructSize & (TyAlign-1)) != 0) @@ -60,7 +60,7 @@ StructLayout::StructLayout(StructType *ST, const DataLayout &TD) { StructAlignment = std::max(TyAlign, StructAlignment); MemberOffsets[i] = StructSize; - StructSize += TD.getTypeAllocSize(Ty); // Consume space for this data item + StructSize += DL.getTypeAllocSize(Ty); // Consume space for this data item } // Empty structures have alignment of 1 byte. diff --git a/contrib/llvm/lib/IR/DebugInfo.cpp b/contrib/llvm/lib/IR/DebugInfo.cpp index 0ffe99d..ec83dca 100644 --- a/contrib/llvm/lib/IR/DebugInfo.cpp +++ b/contrib/llvm/lib/IR/DebugInfo.cpp @@ -64,7 +64,8 @@ bool DIDescriptor::Verify() const { DISubrange(DbgNode).Verify() || DIEnumerator(DbgNode).Verify() || DIObjCProperty(DbgNode).Verify() || DITemplateTypeParameter(DbgNode).Verify() || - DITemplateValueParameter(DbgNode).Verify()); + DITemplateValueParameter(DbgNode).Verify() || + DIImportedModule(DbgNode).Verify()); } static Value *getField(const MDNode *DbgNode, unsigned Elt) { @@ -336,6 +337,12 @@ bool DIDescriptor::isEnumerator() const { bool DIDescriptor::isObjCProperty() const { return DbgNode && getTag() == dwarf::DW_TAG_APPLE_property; } + +/// \brief Return true if the specified tag is DW_TAG_imported_module. +bool DIDescriptor::isImportedModule() const { + return DbgNode && getTag() == dwarf::DW_TAG_imported_module; +} + //===----------------------------------------------------------------------===// // Simple Descriptor Constructors and other Methods //===----------------------------------------------------------------------===// @@ -418,7 +425,7 @@ bool DICompileUnit::Verify() const { if (N.empty()) return false; // It is possible that directory and produce string is empty. - return DbgNode->getNumOperands() == 12; + return DbgNode->getNumOperands() == 13; } /// Verify - Verify that an ObjC property is well formed. @@ -580,6 +587,11 @@ bool DITemplateValueParameter::Verify() const { return isTemplateValueParameter() && DbgNode->getNumOperands() == 8; } +/// \brief Verify that the imported module descriptor is well formed. +bool DIImportedModule::Verify() const { + return isImportedModule() && DbgNode->getNumOperands() == 4; +} + /// getOriginalTypeSize - If this type is derived from a base type then /// return base type size. uint64_t DIDerivedType::getOriginalTypeSize() const { @@ -694,7 +706,7 @@ StringRef DIScope::getDirectory() const { } DIArray DICompileUnit::getEnumTypes() const { - if (!DbgNode || DbgNode->getNumOperands() < 12) + if (!DbgNode || DbgNode->getNumOperands() < 13) return DIArray(); if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(7))) @@ -703,7 +715,7 @@ DIArray DICompileUnit::getEnumTypes() const { } DIArray DICompileUnit::getRetainedTypes() const { - if (!DbgNode || DbgNode->getNumOperands() < 12) + if (!DbgNode || DbgNode->getNumOperands() < 13) return DIArray(); if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(8))) @@ -712,7 +724,7 @@ DIArray DICompileUnit::getRetainedTypes() const { } DIArray DICompileUnit::getSubprograms() const { - if (!DbgNode || DbgNode->getNumOperands() < 12) + if (!DbgNode || DbgNode->getNumOperands() < 13) return DIArray(); if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(9))) @@ -722,7 +734,7 @@ DIArray DICompileUnit::getSubprograms() const { DIArray DICompileUnit::getGlobalVariables() const { - if (!DbgNode || DbgNode->getNumOperands() < 12) + if (!DbgNode || DbgNode->getNumOperands() < 13) return DIArray(); if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(10))) @@ -730,6 +742,15 @@ DIArray DICompileUnit::getGlobalVariables() const { return DIArray(); } +DIArray DICompileUnit::getImportedModules() const { + if (!DbgNode || DbgNode->getNumOperands() < 13) + return DIArray(); + + if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(11))) + return DIArray(N); + return DIArray(); +} + /// fixupObjcLikeName - Replace contains special characters used /// in a typical Objective-C names with '.' in a given string. static void fixupObjcLikeName(StringRef Str, SmallVectorImpl<char> &Out) { @@ -1054,8 +1075,13 @@ void DIScope::printInternal(raw_ostream &OS) const { void DICompileUnit::printInternal(raw_ostream &OS) const { DIScope::printInternal(OS); - if (const char *Lang = dwarf::LanguageString(getLanguage())) - OS << " [" << Lang << ']'; + OS << " ["; + unsigned Lang = getLanguage(); + if (const char *LangStr = dwarf::LanguageString(Lang)) + OS << LangStr; + else + (OS << "lang 0x").write_hex(Lang); + OS << ']'; } void DIEnumerator::printInternal(raw_ostream &OS) const { diff --git a/contrib/llvm/lib/IR/Function.cpp b/contrib/llvm/lib/IR/Function.cpp index 1e72b90..7f7efab 100644 --- a/contrib/llvm/lib/IR/Function.cpp +++ b/contrib/llvm/lib/IR/Function.cpp @@ -124,6 +124,13 @@ bool Argument::hasStructRetAttr() const { hasAttribute(1, Attribute::StructRet); } +/// hasReturnedAttr - Return true if this argument has the returned attribute on +/// it in its containing function. +bool Argument::hasReturnedAttr() const { + return getParent()->getAttributes(). + hasAttribute(getArgNo()+1, Attribute::Returned); +} + /// addAttr - Add attributes to an argument. void Argument::addAttr(AttributeSet AS) { assert(AS.getNumSlots() <= 1 && diff --git a/contrib/llvm/lib/IR/Metadata.cpp b/contrib/llvm/lib/IR/Metadata.cpp index 0228aeb..6a6b7af 100644 --- a/contrib/llvm/lib/IR/Metadata.cpp +++ b/contrib/llvm/lib/IR/Metadata.cpp @@ -403,42 +403,6 @@ void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) { } } -MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { - if (!A || !B) - return NULL; - - if (A == B) - return A; - - SmallVector<MDNode *, 4> PathA; - MDNode *T = A; - while (T) { - PathA.push_back(T); - T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0; - } - - SmallVector<MDNode *, 4> PathB; - T = B; - while (T) { - PathB.push_back(T); - T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0; - } - - int IA = PathA.size() - 1; - int IB = PathB.size() - 1; - - MDNode *Ret = 0; - while (IA >= 0 && IB >=0) { - if (PathA[IA] == PathB[IB]) - Ret = PathA[IA]; - else - break; - --IA; - --IB; - } - return Ret; -} - MDNode *MDNode::getMostGenericFPMath(MDNode *A, MDNode *B) { if (!A || !B) return NULL; diff --git a/contrib/llvm/lib/IR/PassManager.cpp b/contrib/llvm/lib/IR/PassManager.cpp index 3c968aa..387094a 100644 --- a/contrib/llvm/lib/IR/PassManager.cpp +++ b/contrib/llvm/lib/IR/PassManager.cpp @@ -42,14 +42,14 @@ namespace llvm { // Different debug levels that can be enabled... enum PassDebugLevel { - None, Arguments, Structure, Executions, Details + Disabled, Arguments, Structure, Executions, Details }; static cl::opt<enum PassDebugLevel> PassDebugging("debug-pass", cl::Hidden, cl::desc("Print PassManager debugging information"), cl::values( - clEnumVal(None , "disable debug output"), + clEnumVal(Disabled , "disable debug output"), clEnumVal(Arguments , "print pass arguments to pass to 'opt'"), clEnumVal(Structure , "print pass structure before run()"), clEnumVal(Executions, "print pass name before it is executed"), diff --git a/contrib/llvm/lib/IR/Type.cpp b/contrib/llvm/lib/IR/Type.cpp index 1e6a51a..46c61fc 100644 --- a/contrib/llvm/lib/IR/Type.cpp +++ b/contrib/llvm/lib/IR/Type.cpp @@ -380,7 +380,7 @@ FunctionType *FunctionType::get(Type *ReturnType, } FunctionType *FunctionType::get(Type *Result, bool isVarArg) { - return get(Result, ArrayRef<Type *>(), isVarArg); + return get(Result, None, isVarArg); } /// isValidReturnType - Return true if the specified type is valid as a return @@ -499,7 +499,7 @@ StructType *StructType::create(LLVMContext &Context, StringRef Name) { } StructType *StructType::get(LLVMContext &Context, bool isPacked) { - return get(Context, llvm::ArrayRef<Type*>(), isPacked); + return get(Context, None, isPacked); } StructType *StructType::get(Type *type, ...) { diff --git a/contrib/llvm/lib/IR/Value.cpp b/contrib/llvm/lib/IR/Value.cpp index adc702e..89a3c05 100644 --- a/contrib/llvm/lib/IR/Value.cpp +++ b/contrib/llvm/lib/IR/Value.cpp @@ -118,7 +118,7 @@ bool Value::isUsedInBasicBlock(const BasicBlock *BB) const { for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) { if (std::find(I->op_begin(), I->op_end(), this) != I->op_end()) return true; - if (MaxBlockSize-- == 0) // If the block is larger fall back to use_iterator + if (--MaxBlockSize == 0) // If the block is larger fall back to use_iterator break; } @@ -333,6 +333,7 @@ namespace { // Various metrics for how much to strip off of pointers. enum PointerStripKind { PSK_ZeroIndices, + PSK_ZeroIndicesAndAliases, PSK_InBoundsConstantIndices, PSK_InBounds }; @@ -350,6 +351,7 @@ static Value *stripPointerCastsAndOffsets(Value *V) { do { if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { switch (StripKind) { + case PSK_ZeroIndicesAndAliases: case PSK_ZeroIndices: if (!GEP->hasAllZeroIndices()) return V; @@ -367,7 +369,7 @@ static Value *stripPointerCastsAndOffsets(Value *V) { } else if (Operator::getOpcode(V) == Instruction::BitCast) { V = cast<Operator>(V)->getOperand(0); } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { - if (GA->mayBeOverridden()) + if (StripKind == PSK_ZeroIndices || GA->mayBeOverridden()) return V; V = GA->getAliasee(); } else { @@ -381,6 +383,10 @@ static Value *stripPointerCastsAndOffsets(Value *V) { } // namespace Value *Value::stripPointerCasts() { + return stripPointerCastsAndOffsets<PSK_ZeroIndicesAndAliases>(this); +} + +Value *Value::stripPointerCastsNoFollowAliases() { return stripPointerCastsAndOffsets<PSK_ZeroIndices>(this); } diff --git a/contrib/llvm/lib/IR/Verifier.cpp b/contrib/llvm/lib/IR/Verifier.cpp index 8bfbb32..d106173 100644 --- a/contrib/llvm/lib/IR/Verifier.cpp +++ b/contrib/llvm/lib/IR/Verifier.cpp @@ -301,9 +301,12 @@ namespace { bool VerifyIntrinsicType(Type *Ty, ArrayRef<Intrinsic::IITDescriptor> &Infos, SmallVectorImpl<Type*> &ArgTys); - void VerifyParameterAttrs(AttributeSet Attrs, uint64_t Idx, Type *Ty, + bool VerifyAttributeCount(AttributeSet Attrs, unsigned Params); + void VerifyAttributeTypes(AttributeSet Attrs, unsigned Idx, + bool isFunction, const Value *V); + void VerifyParameterAttrs(AttributeSet Attrs, unsigned Idx, Type *Ty, bool isReturnValue, const Value *V); - void VerifyFunctionAttrs(FunctionType *FT, const AttributeSet &Attrs, + void VerifyFunctionAttrs(FunctionType *FT, AttributeSet Attrs, const Value *V); void WriteValue(const Value *V) { @@ -446,6 +449,30 @@ void Verifier::visitGlobalVariable(GlobalVariable &GV) { } } + if (GV.hasName() && (GV.getName() == "llvm.used" || + GV.getName() == "llvm.compiler_used")) { + Assert1(!GV.hasInitializer() || GV.hasAppendingLinkage(), + "invalid linkage for intrinsic global variable", &GV); + Type *GVType = GV.getType()->getElementType(); + if (ArrayType *ATy = dyn_cast<ArrayType>(GVType)) { + PointerType *PTy = dyn_cast<PointerType>(ATy->getElementType()); + Assert1(PTy, "wrong type for intrinsic global variable", &GV); + if (GV.hasInitializer()) { + Constant *Init = GV.getInitializer(); + ConstantArray *InitArray = dyn_cast<ConstantArray>(Init); + Assert1(InitArray, "wrong initalizer for intrinsic global variable", + Init); + for (unsigned i = 0, e = InitArray->getNumOperands(); i != e; ++i) { + Value *V = Init->getOperand(i)->stripPointerCasts(); + // stripPointerCasts strips aliases, so we only need to check for + // variables and functions. + Assert1(isa<GlobalVariable>(V) || isa<Function>(V), + "invalid llvm.used member", V); + } + } + } + } + visitGlobalValue(GV); } @@ -626,44 +653,74 @@ void Verifier::visitModuleFlag(MDNode *Op, DenseMap<MDString*, MDNode*>&SeenIDs, } } +void Verifier::VerifyAttributeTypes(AttributeSet Attrs, unsigned Idx, + bool isFunction, const Value* V) { + unsigned Slot = ~0U; + for (unsigned I = 0, E = Attrs.getNumSlots(); I != E; ++I) + if (Attrs.getSlotIndex(I) == Idx) { + Slot = I; + break; + } + + assert(Slot != ~0U && "Attribute set inconsistency!"); + + for (AttributeSet::iterator I = Attrs.begin(Slot), E = Attrs.end(Slot); + I != E; ++I) { + if (I->isStringAttribute()) + continue; + + if (I->getKindAsEnum() == Attribute::NoReturn || + I->getKindAsEnum() == Attribute::NoUnwind || + I->getKindAsEnum() == Attribute::ReadNone || + I->getKindAsEnum() == Attribute::ReadOnly || + I->getKindAsEnum() == Attribute::NoInline || + I->getKindAsEnum() == Attribute::AlwaysInline || + I->getKindAsEnum() == Attribute::OptimizeForSize || + I->getKindAsEnum() == Attribute::StackProtect || + I->getKindAsEnum() == Attribute::StackProtectReq || + I->getKindAsEnum() == Attribute::StackProtectStrong || + I->getKindAsEnum() == Attribute::NoRedZone || + I->getKindAsEnum() == Attribute::NoImplicitFloat || + I->getKindAsEnum() == Attribute::Naked || + I->getKindAsEnum() == Attribute::InlineHint || + I->getKindAsEnum() == Attribute::StackAlignment || + I->getKindAsEnum() == Attribute::UWTable || + I->getKindAsEnum() == Attribute::NonLazyBind || + I->getKindAsEnum() == Attribute::ReturnsTwice || + I->getKindAsEnum() == Attribute::SanitizeAddress || + I->getKindAsEnum() == Attribute::SanitizeThread || + I->getKindAsEnum() == Attribute::SanitizeMemory || + I->getKindAsEnum() == Attribute::MinSize || + I->getKindAsEnum() == Attribute::NoDuplicate || + I->getKindAsEnum() == Attribute::NoBuiltin) { + if (!isFunction) + CheckFailed("Attribute '" + I->getKindAsString() + + "' only applies to functions!", V); + return; + } else if (isFunction) { + CheckFailed("Attribute '" + I->getKindAsString() + + "' does not apply to functions!", V); + return; + } + } +} + // VerifyParameterAttrs - Check the given attributes for an argument or return // value of the specified type. The value V is printed in error messages. -void Verifier::VerifyParameterAttrs(AttributeSet Attrs, uint64_t Idx, Type *Ty, +void Verifier::VerifyParameterAttrs(AttributeSet Attrs, unsigned Idx, Type *Ty, bool isReturnValue, const Value *V) { if (!Attrs.hasAttributes(Idx)) return; - Assert1(!Attrs.hasAttribute(Idx, Attribute::NoReturn) && - !Attrs.hasAttribute(Idx, Attribute::NoUnwind) && - !Attrs.hasAttribute(Idx, Attribute::ReadNone) && - !Attrs.hasAttribute(Idx, Attribute::ReadOnly) && - !Attrs.hasAttribute(Idx, Attribute::NoInline) && - !Attrs.hasAttribute(Idx, Attribute::AlwaysInline) && - !Attrs.hasAttribute(Idx, Attribute::OptimizeForSize) && - !Attrs.hasAttribute(Idx, Attribute::StackProtect) && - !Attrs.hasAttribute(Idx, Attribute::StackProtectReq) && - !Attrs.hasAttribute(Idx, Attribute::NoRedZone) && - !Attrs.hasAttribute(Idx, Attribute::NoImplicitFloat) && - !Attrs.hasAttribute(Idx, Attribute::Naked) && - !Attrs.hasAttribute(Idx, Attribute::InlineHint) && - !Attrs.hasAttribute(Idx, Attribute::StackAlignment) && - !Attrs.hasAttribute(Idx, Attribute::UWTable) && - !Attrs.hasAttribute(Idx, Attribute::NonLazyBind) && - !Attrs.hasAttribute(Idx, Attribute::ReturnsTwice) && - !Attrs.hasAttribute(Idx, Attribute::SanitizeAddress) && - !Attrs.hasAttribute(Idx, Attribute::SanitizeThread) && - !Attrs.hasAttribute(Idx, Attribute::SanitizeMemory) && - !Attrs.hasAttribute(Idx, Attribute::MinSize) && - !Attrs.hasAttribute(Idx, Attribute::NoBuiltin), - "Some attributes in '" + Attrs.getAsString(Idx) + - "' only apply to functions!", V); + VerifyAttributeTypes(Attrs, Idx, false, V); if (isReturnValue) Assert1(!Attrs.hasAttribute(Idx, Attribute::ByVal) && !Attrs.hasAttribute(Idx, Attribute::Nest) && !Attrs.hasAttribute(Idx, Attribute::StructRet) && - !Attrs.hasAttribute(Idx, Attribute::NoCapture), - "Attribute 'byval', 'nest', 'sret', and 'nocapture' " + !Attrs.hasAttribute(Idx, Attribute::NoCapture) && + !Attrs.hasAttribute(Idx, Attribute::Returned), + "Attribute 'byval', 'nest', 'sret', 'nocapture', and 'returned' " "do not apply to return values!", V); // Check for mutually incompatible attributes. @@ -683,6 +740,10 @@ void Verifier::VerifyParameterAttrs(AttributeSet Attrs, uint64_t Idx, Type *Ty, Attrs.hasAttribute(Idx, Attribute::InReg))), "Attributes " "'byval, nest, and inreg' are incompatible!", V); + Assert1(!(Attrs.hasAttribute(Idx, Attribute::StructRet) && + Attrs.hasAttribute(Idx, Attribute::Returned)), "Attributes " + "'sret and returned' are incompatible!", V); + Assert1(!(Attrs.hasAttribute(Idx, Attribute::ZExt) && Attrs.hasAttribute(Idx, Attribute::SExt)), "Attributes " "'zeroext and signext' are incompatible!", V); @@ -712,81 +773,51 @@ void Verifier::VerifyParameterAttrs(AttributeSet Attrs, uint64_t Idx, Type *Ty, // VerifyFunctionAttrs - Check parameter attributes against a function type. // The value V is printed in error messages. -void Verifier::VerifyFunctionAttrs(FunctionType *FT, - const AttributeSet &Attrs, +void Verifier::VerifyFunctionAttrs(FunctionType *FT, AttributeSet Attrs, const Value *V) { if (Attrs.isEmpty()) return; bool SawNest = false; + bool SawReturned = false; for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) { - unsigned Index = Attrs.getSlotIndex(i); + unsigned Idx = Attrs.getSlotIndex(i); Type *Ty; - if (Index == 0) + if (Idx == 0) Ty = FT->getReturnType(); - else if (Index-1 < FT->getNumParams()) - Ty = FT->getParamType(Index-1); + else if (Idx-1 < FT->getNumParams()) + Ty = FT->getParamType(Idx-1); else break; // VarArgs attributes, verified elsewhere. - VerifyParameterAttrs(Attrs, Index, Ty, Index == 0, V); + VerifyParameterAttrs(Attrs, Idx, Ty, Idx == 0, V); - if (Attrs.hasAttribute(i, Attribute::Nest)) { + if (Idx == 0) + continue; + + if (Attrs.hasAttribute(Idx, Attribute::Nest)) { Assert1(!SawNest, "More than one parameter has attribute nest!", V); SawNest = true; } - if (Attrs.hasAttribute(Index, Attribute::StructRet)) - Assert1(Index == 1, "Attribute sret is not on first parameter!", V); + if (Attrs.hasAttribute(Idx, Attribute::Returned)) { + Assert1(!SawReturned, "More than one parameter has attribute returned!", + V); + Assert1(Ty->canLosslesslyBitCastTo(FT->getReturnType()), "Incompatible " + "argument and return types for 'returned' attribute", V); + SawReturned = true; + } + + if (Attrs.hasAttribute(Idx, Attribute::StructRet)) + Assert1(Idx == 1, "Attribute sret is not on first parameter!", V); } if (!Attrs.hasAttributes(AttributeSet::FunctionIndex)) return; - AttrBuilder NotFn(Attrs, AttributeSet::FunctionIndex); - NotFn.removeFunctionOnlyAttrs(); - Assert1(NotFn.empty(), "Attributes '" + - AttributeSet::get(V->getContext(), - AttributeSet::FunctionIndex, - NotFn).getAsString(AttributeSet::FunctionIndex) + - "' do not apply to the function!", V); - - // Check for mutually incompatible attributes. - Assert1(!((Attrs.hasAttribute(AttributeSet::FunctionIndex, - Attribute::ByVal) && - Attrs.hasAttribute(AttributeSet::FunctionIndex, - Attribute::Nest)) || - (Attrs.hasAttribute(AttributeSet::FunctionIndex, - Attribute::ByVal) && - Attrs.hasAttribute(AttributeSet::FunctionIndex, - Attribute::StructRet)) || - (Attrs.hasAttribute(AttributeSet::FunctionIndex, - Attribute::Nest) && - Attrs.hasAttribute(AttributeSet::FunctionIndex, - Attribute::StructRet))), - "Attributes 'byval, nest, and sret' are incompatible!", V); - - Assert1(!((Attrs.hasAttribute(AttributeSet::FunctionIndex, - Attribute::ByVal) && - Attrs.hasAttribute(AttributeSet::FunctionIndex, - Attribute::Nest)) || - (Attrs.hasAttribute(AttributeSet::FunctionIndex, - Attribute::ByVal) && - Attrs.hasAttribute(AttributeSet::FunctionIndex, - Attribute::InReg)) || - (Attrs.hasAttribute(AttributeSet::FunctionIndex, - Attribute::Nest) && - Attrs.hasAttribute(AttributeSet::FunctionIndex, - Attribute::InReg))), - "Attributes 'byval, nest, and inreg' are incompatible!", V); - - Assert1(!(Attrs.hasAttribute(AttributeSet::FunctionIndex, - Attribute::ZExt) && - Attrs.hasAttribute(AttributeSet::FunctionIndex, - Attribute::SExt)), - "Attributes 'zeroext and signext' are incompatible!", V); + VerifyAttributeTypes(Attrs, AttributeSet::FunctionIndex, true, V); Assert1(!(Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone) && @@ -801,7 +832,7 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT, "Attributes 'noinline and alwaysinline' are incompatible!", V); } -static bool VerifyAttributeCount(const AttributeSet &Attrs, unsigned Params) { +bool Verifier::VerifyAttributeCount(AttributeSet Attrs, unsigned Params) { if (Attrs.getNumSlots() == 0) return true; @@ -837,7 +868,7 @@ void Verifier::visitFunction(Function &F) { Assert1(!F.hasStructRetAttr() || F.getReturnType()->isVoidTy(), "Invalid struct return type!", &F); - const AttributeSet &Attrs = F.getAttributes(); + AttributeSet Attrs = F.getAttributes(); Assert1(VerifyAttributeCount(Attrs, FT->getNumParams()), "Attribute after last parameter!", &F); @@ -1350,7 +1381,7 @@ void Verifier::VerifyCallSite(CallSite CS) { "Call parameter type does not match function signature!", CS.getArgument(i), FTy->getParamType(i), I); - const AttributeSet &Attrs = CS.getAttributes(); + AttributeSet Attrs = CS.getAttributes(); Assert1(VerifyAttributeCount(Attrs, CS.arg_size()), "Attribute after last parameter!", I); @@ -1358,15 +1389,41 @@ void Verifier::VerifyCallSite(CallSite CS) { // Verify call attributes. VerifyFunctionAttrs(FTy, Attrs, I); - if (FTy->isVarArg()) + if (FTy->isVarArg()) { + // FIXME? is 'nest' even legal here? + bool SawNest = false; + bool SawReturned = false; + + for (unsigned Idx = 1; Idx < 1 + FTy->getNumParams(); ++Idx) { + if (Attrs.hasAttribute(Idx, Attribute::Nest)) + SawNest = true; + if (Attrs.hasAttribute(Idx, Attribute::Returned)) + SawReturned = true; + } + // Check attributes on the varargs part. for (unsigned Idx = 1 + FTy->getNumParams(); Idx <= CS.arg_size(); ++Idx) { - VerifyParameterAttrs(Attrs, Idx, CS.getArgument(Idx-1)->getType(), - false, I); + Type *Ty = CS.getArgument(Idx-1)->getType(); + VerifyParameterAttrs(Attrs, Idx, Ty, false, I); + + if (Attrs.hasAttribute(Idx, Attribute::Nest)) { + Assert1(!SawNest, "More than one parameter has attribute nest!", I); + SawNest = true; + } + + if (Attrs.hasAttribute(Idx, Attribute::Returned)) { + Assert1(!SawReturned, "More than one parameter has attribute returned!", + I); + Assert1(Ty->canLosslesslyBitCastTo(FTy->getReturnType()), + "Incompatible argument and return types for 'returned' " + "attribute", I); + SawReturned = true; + } Assert1(!Attrs.hasAttribute(Idx, Attribute::StructRet), "Attribute 'sret' cannot be used for vararg call arguments!", I); } + } // Verify that there's no metadata unless it's a direct call to an intrinsic. if (CS.getCalledFunction() == 0 || diff --git a/contrib/llvm/lib/Linker/LinkModules.cpp b/contrib/llvm/lib/Linker/LinkModules.cpp index 74cbdad..d2e13c9 100644 --- a/contrib/llvm/lib/Linker/LinkModules.cpp +++ b/contrib/llvm/lib/Linker/LinkModules.cpp @@ -13,21 +13,15 @@ #include "llvm/Linker.h" #include "llvm-c/Linker.h" -#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/IR/TypeFinder.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/ValueMapper.h" -#include <cctype> using namespace llvm; //===----------------------------------------------------------------------===// @@ -35,6 +29,8 @@ using namespace llvm; //===----------------------------------------------------------------------===// namespace { + typedef SmallPtrSet<StructType*, 32> TypeSet; + class TypeMapTy : public ValueMapTypeRemapper { /// MappedTypes - This is a mapping from a source type to a destination type /// to use. @@ -55,6 +51,9 @@ class TypeMapTy : public ValueMapTypeRemapper { SmallPtrSet<StructType*, 16> DstResolvedOpaqueTypes; public: + TypeMapTy(TypeSet &Set) : DstStructTypesSet(Set) {} + + TypeSet &DstStructTypesSet; /// addTypeMapping - Indicate that the specified type in the destination /// module is conceptually equivalent to the specified type in the source /// module. @@ -331,13 +330,20 @@ Type *TypeMapTy::getImpl(Type *Ty) { StructType *STy = cast<StructType>(Ty); // If the type is opaque, we can just use it directly. - if (STy->isOpaque()) + if (STy->isOpaque()) { + // A named structure type from src module is used. Add it to the Set of + // identified structs in the destination module. + DstStructTypesSet.insert(STy); return *Entry = STy; + } // Otherwise we create a new type and resolve its body later. This will be // resolved by the top level of get(). SrcDefinitionsToResolve.push_back(STy); StructType *DTy = StructType::create(STy->getContext()); + // A new identified structure type was created. Add it to the set of + // identified structs in the destination module. + DstStructTypesSet.insert(DTy); DstResolvedOpaqueTypes.insert(DTy); return *Entry = DTy; } @@ -379,8 +385,8 @@ namespace { public: std::string ErrorMsg; - ModuleLinker(Module *dstM, Module *srcM, unsigned mode) - : DstM(dstM), SrcM(srcM), Mode(mode) { } + ModuleLinker(Module *dstM, TypeSet &Set, Module *srcM, unsigned mode) + : DstM(dstM), SrcM(srcM), TypeMap(Set), Mode(mode) { } bool run(); @@ -594,11 +600,6 @@ void ModuleLinker::computeTypeMapping() { SmallPtrSet<StructType*, 32> SrcStructTypesSet(SrcStructTypes.begin(), SrcStructTypes.end()); - TypeFinder DstStructTypes; - DstStructTypes.run(*DstM, true); - SmallPtrSet<StructType*, 32> DstStructTypesSet(DstStructTypes.begin(), - DstStructTypes.end()); - for (unsigned i = 0, e = SrcStructTypes.size(); i != e; ++i) { StructType *ST = SrcStructTypes[i]; if (!ST->hasName()) continue; @@ -629,7 +630,7 @@ void ModuleLinker::computeTypeMapping() { // we prefer to take the '%C' version. So we are then left with both // '%C.1' and '%C' being used for the same types. This leads to some // variables using one type and some using the other. - if (!SrcStructTypesSet.count(DST) && DstStructTypesSet.count(DST)) + if (!SrcStructTypesSet.count(DST) && TypeMap.DstStructTypesSet.count(DST)) TypeMap.addTypeMapping(DST, ST); } @@ -1287,6 +1288,25 @@ bool ModuleLinker::run() { return false; } +Linker::Linker(Module *M) : Composite(M) { + TypeFinder StructTypes; + StructTypes.run(*M, true); + IdentifiedStructTypes.insert(StructTypes.begin(), StructTypes.end()); +} + +Linker::~Linker() { +} + +bool Linker::linkInModule(Module *Src, unsigned Mode, std::string *ErrorMsg) { + ModuleLinker TheLinker(Composite, IdentifiedStructTypes, Src, Mode); + if (TheLinker.run()) { + if (ErrorMsg) + *ErrorMsg = TheLinker.ErrorMsg; + return true; + } + return false; +} + //===----------------------------------------------------------------------===// // LinkModules entrypoint. //===----------------------------------------------------------------------===// @@ -1298,13 +1318,8 @@ bool ModuleLinker::run() { /// and shouldn't be relied on to be consistent. bool Linker::LinkModules(Module *Dest, Module *Src, unsigned Mode, std::string *ErrorMsg) { - ModuleLinker TheLinker(Dest, Src, Mode); - if (TheLinker.run()) { - if (ErrorMsg) *ErrorMsg = TheLinker.ErrorMsg; - return true; - } - - return false; + Linker L(Dest); + return L.linkInModule(Src, Mode, ErrorMsg); } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Linker/Linker.cpp b/contrib/llvm/lib/Linker/Linker.cpp deleted file mode 100644 index 74d24f2..0000000 --- a/contrib/llvm/lib/Linker/Linker.cpp +++ /dev/null @@ -1,70 +0,0 @@ -//===- lib/Linker/Linker.cpp - Basic Linker functionality ----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains basic Linker functionality that all usages will need. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Linker.h" -#include "llvm/Bitcode/ReaderWriter.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/system_error.h" -using namespace llvm; - -Linker::Linker(StringRef progname, StringRef modname, - LLVMContext& C, unsigned flags): - Context(C), - Composite(new Module(modname, C)), - Flags(flags), - Error(), - ProgramName(progname) { } - -Linker::Linker(StringRef progname, Module* aModule, unsigned flags) : - Context(aModule->getContext()), - Composite(aModule), - Flags(flags), - Error(), - ProgramName(progname) { } - -Linker::~Linker() { - delete Composite; -} - -bool -Linker::error(StringRef message) { - Error = message; - if (!(Flags&QuietErrors)) - errs() << ProgramName << ": error: " << message << "\n"; - return true; -} - -bool -Linker::warning(StringRef message) { - Error = message; - if (!(Flags&QuietWarnings)) - errs() << ProgramName << ": warning: " << message << "\n"; - return false; -} - -void -Linker::verbose(StringRef message) { - if (Flags&Verbose) - errs() << " " << message << "\n"; -} - -Module* -Linker::releaseModule() { - Module* result = Composite; - Error.clear(); - Composite = 0; - Flags = 0; - return result; -} diff --git a/contrib/llvm/lib/MC/MCAsmInfo.cpp b/contrib/llvm/lib/MC/MCAsmInfo.cpp index 51bb435..9e60884 100644 --- a/contrib/llvm/lib/MC/MCAsmInfo.cpp +++ b/contrib/llvm/lib/MC/MCAsmInfo.cpp @@ -87,10 +87,10 @@ MCAsmInfo::MCAsmInfo() { SupportsDebugInformation = false; ExceptionsType = ExceptionHandling::None; DwarfUsesInlineInfoSection = false; - DwarfSectionOffsetDirective = 0; DwarfUsesRelocationsAcrossSections = true; DwarfRegNumForCFI = false; HasMicrosoftFastStdCallMangling = false; + NeedsDwarfSectionOffsetDirective = false; } MCAsmInfo::~MCAsmInfo() { diff --git a/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp b/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp index fd79193..33350d9 100644 --- a/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp +++ b/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp @@ -36,8 +36,8 @@ MCAsmInfoCOFF::MCAsmInfoCOFF() { // Set up DWARF directives HasLEB128 = true; // Target asm supports leb128 directives (little-endian) SupportsDebugInformation = true; - DwarfSectionOffsetDirective = "\t.secrel32\t"; HasMicrosoftFastStdCallMangling = true; + NeedsDwarfSectionOffsetDirective = true; } void MCAsmInfoMicrosoft::anchor() { } diff --git a/contrib/llvm/lib/MC/MCAsmStreamer.cpp b/contrib/llvm/lib/MC/MCAsmStreamer.cpp index 35613b4..9e86785 100644 --- a/contrib/llvm/lib/MC/MCAsmStreamer.cpp +++ b/contrib/llvm/lib/MC/MCAsmStreamer.cpp @@ -124,19 +124,15 @@ public: /// @name MCStreamer Interface /// @{ - virtual void ChangeSection(const MCSection *Section); + virtual void ChangeSection(const MCSection *Section, + const MCExpr *Subsection); virtual void InitSections() { InitToTextSection(); } virtual void InitToTextSection() { - // FIXME, this is MachO specific, but the testsuite - // expects this. - SwitchSection(getContext().getMachOSection( - "__TEXT", "__text", - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, - 0, SectionKind::getText())); + SwitchSection(getContext().getObjectFileInfo()->getTextSection()); } virtual void EmitLabel(MCSymbol *Symbol); @@ -333,9 +329,10 @@ static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) { return Value & ((uint64_t) (int64_t) -1 >> (64 - Bytes * 8)); } -void MCAsmStreamer::ChangeSection(const MCSection *Section) { +void MCAsmStreamer::ChangeSection(const MCSection *Section, + const MCExpr *Subsection) { assert(Section && "Cannot switch to a null section!"); - Section->PrintSwitchToSection(MAI, OS); + Section->PrintSwitchToSection(MAI, OS, Subsection); } void MCAsmStreamer::EmitEHSymAttributes(const MCSymbol *Symbol, @@ -642,7 +639,8 @@ static void PrintQuotedString(StringRef Data, raw_ostream &OS) { void MCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) { - assert(getCurrentSection() && "Cannot emit contents before setting section!"); + assert(getCurrentSection().first && + "Cannot emit contents before setting section!"); if (Data.empty()) return; if (Data.size() == 1) { @@ -673,7 +671,8 @@ void MCAsmStreamer::EmitIntValue(uint64_t Value, unsigned Size, void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, unsigned AddrSpace) { - assert(getCurrentSection() && "Cannot emit contents before setting section!"); + assert(getCurrentSection().first && + "Cannot emit contents before setting section!"); const char *Directive = 0; switch (Size) { default: break; @@ -1368,7 +1367,8 @@ void MCAsmStreamer::EmitTCEntry(const MCSymbol &S) { } void MCAsmStreamer::EmitInstruction(const MCInst &Inst) { - assert(getCurrentSection() && "Cannot emit contents before setting section!"); + assert(getCurrentSection().first && + "Cannot emit contents before setting section!"); // Show the encoding in a comment if we have a code emitter. if (Emitter) diff --git a/contrib/llvm/lib/MC/MCAssembler.cpp b/contrib/llvm/lib/MC/MCAssembler.cpp index 1829266..fb5ab28 100644 --- a/contrib/llvm/lib/MC/MCAssembler.cpp +++ b/contrib/llvm/lib/MC/MCAssembler.cpp @@ -243,6 +243,36 @@ MCSectionData::MCSectionData(const MCSection &_Section, MCAssembler *A) A->getSectionList().push_back(this); } +MCSectionData::iterator +MCSectionData::getSubsectionInsertionPoint(unsigned Subsection) { + if (Subsection == 0 && SubsectionFragmentMap.empty()) + return end(); + + SmallVectorImpl<std::pair<unsigned, MCFragment *> >::iterator MI = + std::lower_bound(SubsectionFragmentMap.begin(), SubsectionFragmentMap.end(), + std::make_pair(Subsection, (MCFragment *)0)); + bool ExactMatch = false; + if (MI != SubsectionFragmentMap.end()) { + ExactMatch = MI->first == Subsection; + if (ExactMatch) + ++MI; + } + iterator IP; + if (MI == SubsectionFragmentMap.end()) + IP = end(); + else + IP = MI->second; + if (!ExactMatch && Subsection != 0) { + // The GNU as documentation claims that subsections have an alignment of 4, + // although this appears not to be the case. + MCFragment *F = new MCDataFragment(); + SubsectionFragmentMap.insert(MI, std::make_pair(Subsection, F)); + getFragmentList().insert(IP, F); + F->setParent(this); + } + return IP; +} + /* *** */ MCSymbolData::MCSymbolData() : Symbol(0) {} diff --git a/contrib/llvm/lib/MC/MCDwarf.cpp b/contrib/llvm/lib/MC/MCDwarf.cpp index 0f8f074..18982e9 100644 --- a/contrib/llvm/lib/MC/MCDwarf.cpp +++ b/contrib/llvm/lib/MC/MCDwarf.cpp @@ -197,6 +197,8 @@ static inline void EmitDwarfLineTable(MCStreamer *MCOS, // actually a DW_LNE_end_sequence. // Switch to the section to be able to create a symbol at its end. + // TODO: keep track of the last subsection so that this symbol appears in the + // correct place. MCOS->SwitchSection(Section); MCContext &context = MCOS->getContext(); @@ -787,7 +789,7 @@ void MCGenDwarfLabelEntry::Make(MCSymbol *Symbol, MCStreamer *MCOS, if (Symbol->isTemporary()) return; MCContext &context = MCOS->getContext(); - if (context.getGenDwarfSection() != MCOS->getCurrentSection()) + if (context.getGenDwarfSection() != MCOS->getCurrentSection().first) return; // The dwarf label's name does not have the symbol name's leading @@ -899,7 +901,7 @@ namespace { /// EmitCompactUnwind - Emit the unwind information in a compact way. If /// we're successful, return 'true'. Otherwise, return 'false' and it will /// emit the normal CIE and FDE. - bool EmitCompactUnwind(MCStreamer &streamer, + void EmitCompactUnwind(MCStreamer &streamer, const MCDwarfFrameInfo &frame); const MCSymbol &EmitCIE(MCStreamer &streamer, @@ -1139,7 +1141,7 @@ void FrameEmitterImpl::EmitCFIInstructions(MCStreamer &streamer, /// EmitCompactUnwind - Emit the unwind information in a compact way. If we're /// successful, return 'true'. Otherwise, return 'false' and it will emit the /// normal CIE and FDE. -bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer, +void FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer, const MCDwarfFrameInfo &Frame) { MCContext &Context = Streamer.getContext(); const MCObjectFileInfo *MOFI = Context.getObjectFileInfo(); @@ -1168,14 +1170,13 @@ bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer, // .quad except_tab1 uint32_t Encoding = Frame.CompactUnwindEncoding; - if (!Encoding) return false; + if (!Encoding) return; + bool DwarfEHFrameOnly = (Encoding == MOFI->getCompactUnwindDwarfEHFrameOnly()); // The encoding needs to know we have an LSDA. - if (Frame.Lsda) + if (!DwarfEHFrameOnly && Frame.Lsda) Encoding |= 0x40000000; - Streamer.SwitchSection(MOFI->getCompactUnwindSection()); - // Range Start unsigned FDEEncoding = MOFI->getFDEEncoding(UsingCFI); unsigned Size = getSizeForEncoding(Streamer, FDEEncoding); @@ -1194,11 +1195,10 @@ bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer, Twine::utohexstr(Encoding)); Streamer.EmitIntValue(Encoding, Size); - // Personality Function Size = getSizeForEncoding(Streamer, dwarf::DW_EH_PE_absptr); if (VerboseAsm) Streamer.AddComment("Personality Function"); - if (Frame.Personality) + if (!DwarfEHFrameOnly && Frame.Personality) Streamer.EmitSymbolValue(Frame.Personality, Size); else Streamer.EmitIntValue(0, Size); // No personality fn @@ -1206,12 +1206,10 @@ bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer, // LSDA Size = getSizeForEncoding(Streamer, Frame.LsdaEncoding); if (VerboseAsm) Streamer.AddComment("LSDA"); - if (Frame.Lsda) + if (!DwarfEHFrameOnly && Frame.Lsda) Streamer.EmitSymbolValue(Frame.Lsda, Size); else Streamer.EmitIntValue(0, Size); // No LSDA - - return true; } const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer, @@ -1421,7 +1419,6 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer, } // Call Frame Instructions - EmitCFIInstructions(streamer, frame.Instructions, frame.Begin); // Padding @@ -1482,12 +1479,23 @@ void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer, ArrayRef<MCDwarfFrameInfo> FrameArray = Streamer.getFrameInfos(); // Emit the compact unwind info if available. - if (IsEH && MOFI->getCompactUnwindSection()) - for (unsigned i = 0, n = Streamer.getNumFrameInfos(); i < n; ++i) { - const MCDwarfFrameInfo &Frame = Streamer.getFrameInfo(i); - if (Frame.CompactUnwindEncoding) + if (IsEH && MOFI->getCompactUnwindSection()) { + unsigned NumFrameInfos = Streamer.getNumFrameInfos(); + bool SectionEmitted = false; + + if (NumFrameInfos) { + for (unsigned i = 0; i < NumFrameInfos; ++i) { + const MCDwarfFrameInfo &Frame = Streamer.getFrameInfo(i); + if (Frame.CompactUnwindEncoding == 0) continue; + if (!SectionEmitted) { + Streamer.SwitchSection(MOFI->getCompactUnwindSection()); + Streamer.EmitValueToAlignment(Context.getAsmInfo().getPointerSize()); + SectionEmitted = true; + } Emitter.EmitCompactUnwind(Streamer, Frame); + } } + } const MCSection &Section = IsEH ? *MOFI->getEHFrameSection() : *MOFI->getDwarfFrameSection(); diff --git a/contrib/llvm/lib/MC/MCELFStreamer.cpp b/contrib/llvm/lib/MC/MCELFStreamer.cpp index 7f5f1b6..116f86f 100644 --- a/contrib/llvm/lib/MC/MCELFStreamer.cpp +++ b/contrib/llvm/lib/MC/MCELFStreamer.cpp @@ -13,6 +13,7 @@ #include "llvm/MC/MCELFStreamer.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" @@ -108,14 +109,15 @@ void MCELFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { llvm_unreachable("invalid assembler flag!"); } -void MCELFStreamer::ChangeSection(const MCSection *Section) { +void MCELFStreamer::ChangeSection(const MCSection *Section, + const MCExpr *Subsection) { MCSectionData *CurSection = getCurrentSectionData(); if (CurSection && CurSection->isBundleLocked()) report_fatal_error("Unterminated .bundle_lock when changing a section"); const MCSymbol *Grp = static_cast<const MCSectionELF *>(Section)->getGroup(); if (Grp) getAssembler().getOrCreateSymbolData(*Grp); - this->MCObjectStreamer::ChangeSection(Section); + this->MCObjectStreamer::ChangeSection(Section, Subsection); } void MCELFStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) { @@ -126,6 +128,26 @@ void MCELFStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) { Alias->setVariableValue(Value); } +// When GNU as encounters more than one .type declaration for an object it seems +// to use a mechanism similar to the one below to decide which type is actually +// used in the object file. The greater of T1 and T2 is selected based on the +// following ordering: +// STT_NOTYPE < STT_OBJECT < STT_FUNC < STT_GNU_IFUNC < STT_TLS < anything else +// If neither T1 < T2 nor T2 < T1 according to this ordering, use T2 (the user +// provided type). +static unsigned CombineSymbolTypes(unsigned T1, unsigned T2) { + unsigned TypeOrdering[] = {ELF::STT_NOTYPE, ELF::STT_OBJECT, ELF::STT_FUNC, + ELF::STT_GNU_IFUNC, ELF::STT_TLS}; + for (unsigned i = 0; i != array_lengthof(TypeOrdering); ++i) { + if (T1 == TypeOrdering[i]) + return T2; + if (T2 == TypeOrdering[i]) + return T1; + } + + return T2; +} + void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) { // Indirect symbols are handled differently, to match how 'as' handles @@ -187,27 +209,34 @@ void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol, break; case MCSA_ELF_TypeFunction: - MCELF::SetType(SD, ELF::STT_FUNC); + MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD), + ELF::STT_FUNC)); break; case MCSA_ELF_TypeIndFunction: - MCELF::SetType(SD, ELF::STT_GNU_IFUNC); + MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD), + ELF::STT_GNU_IFUNC)); break; case MCSA_ELF_TypeObject: - MCELF::SetType(SD, ELF::STT_OBJECT); + MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD), + ELF::STT_OBJECT)); break; case MCSA_ELF_TypeTLS: - MCELF::SetType(SD, ELF::STT_TLS); + MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD), + ELF::STT_TLS)); break; case MCSA_ELF_TypeCommon: - MCELF::SetType(SD, ELF::STT_COMMON); + // TODO: Emit these as a common symbol. + MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD), + ELF::STT_OBJECT)); break; case MCSA_ELF_TypeNoType: - MCELF::SetType(SD, ELF::STT_NOTYPE); + MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD), + ELF::STT_NOTYPE)); break; case MCSA_Protected: @@ -290,7 +319,7 @@ void MCELFStreamer::EmitValueToAlignment(unsigned ByteAlignment, // entry in the module's symbol table (the first being the null symbol). void MCELFStreamer::EmitFileDirective(StringRef Filename) { MCSymbol *Symbol = getAssembler().getContext().GetOrCreateSymbol(Filename); - Symbol->setSection(*getCurrentSection()); + Symbol->setSection(*getCurrentSection().first); Symbol->setAbsolute(); MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); @@ -406,11 +435,13 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst) { // Optimize memory usage by emitting the instruction to a // MCCompactEncodedInstFragment when not in a bundle-locked group and // there are no fixups registered. - MCCompactEncodedInstFragment *CEIF = new MCCompactEncodedInstFragment(SD); + MCCompactEncodedInstFragment *CEIF = new MCCompactEncodedInstFragment(); + insert(CEIF); CEIF->getContents().append(Code.begin(), Code.end()); return; } else { - DF = new MCDataFragment(SD); + DF = new MCDataFragment(); + insert(DF); if (SD->getBundleLockState() == MCSectionData::BundleLockedAlignToEnd) { // If this is a new fragment created for a bundle-locked group, and the // group was marked as "align_to_end", set a flag in the fragment. diff --git a/contrib/llvm/lib/MC/MCExpr.cpp b/contrib/llvm/lib/MC/MCExpr.cpp index cd4d144..06bc72f 100644 --- a/contrib/llvm/lib/MC/MCExpr.cpp +++ b/contrib/llvm/lib/MC/MCExpr.cpp @@ -250,6 +250,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) { case VK_Mips_GOT_LO16: return "GOT_LO16"; case VK_Mips_CALL_HI16: return "CALL_HI16"; case VK_Mips_CALL_LO16: return "CALL_LO16"; + case VK_COFF_IMGREL32: return "IMGREL32"; } llvm_unreachable("Invalid variant kind"); } @@ -285,6 +286,44 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) { .Case("dtpoff", VK_DTPOFF) .Case("TLVP", VK_TLVP) .Case("tlvp", VK_TLVP) + .Case("IMGREL", VK_COFF_IMGREL32) + .Case("imgrel", VK_COFF_IMGREL32) + .Case("SECREL32", VK_SECREL) + .Case("secrel32", VK_SECREL) + .Case("HA", VK_PPC_GAS_HA16) + .Case("ha", VK_PPC_GAS_HA16) + .Case("L", VK_PPC_GAS_LO16) + .Case("l", VK_PPC_GAS_LO16) + .Case("TOCBASE", VK_PPC_TOC) + .Case("tocbase", VK_PPC_TOC) + .Case("TOC", VK_PPC_TOC_ENTRY) + .Case("toc", VK_PPC_TOC_ENTRY) + .Case("TOC@HA", VK_PPC_TOC16_HA) + .Case("toc@ha", VK_PPC_TOC16_HA) + .Case("TOC@L", VK_PPC_TOC16_LO) + .Case("toc@l", VK_PPC_TOC16_LO) + .Case("TLS", VK_PPC_TLS) + .Case("tls", VK_PPC_TLS) + .Case("TPREL@HA", VK_PPC_TPREL16_HA) + .Case("tprel@ha", VK_PPC_TPREL16_HA) + .Case("TPREL@L", VK_PPC_TPREL16_LO) + .Case("tprel@l", VK_PPC_TPREL16_LO) + .Case("DTPREL@HA", VK_PPC_DTPREL16_HA) + .Case("dtprel@ha", VK_PPC_DTPREL16_HA) + .Case("DTPREL@L", VK_PPC_DTPREL16_LO) + .Case("dtprel@l", VK_PPC_DTPREL16_LO) + .Case("GOT@TPREL@HA", VK_PPC_GOT_TPREL16_HA) + .Case("got@tprel@ha", VK_PPC_GOT_TPREL16_HA) + .Case("GOT@TPREL@L", VK_PPC_GOT_TPREL16_LO) + .Case("got@tprel@l", VK_PPC_GOT_TPREL16_LO) + .Case("GOT@TLSGD@HA", VK_PPC_GOT_TLSGD16_HA) + .Case("got@tlsgd@ha", VK_PPC_GOT_TLSGD16_HA) + .Case("GOT@TLSGD@L", VK_PPC_GOT_TLSGD16_LO) + .Case("got@tlsgd@l", VK_PPC_GOT_TLSGD16_LO) + .Case("GOT@TLSLD@HA", VK_PPC_GOT_TLSLD16_HA) + .Case("got@tlsld@ha", VK_PPC_GOT_TLSLD16_HA) + .Case("GOT@TLSLD@L", VK_PPC_GOT_TLSLD16_LO) + .Case("got@tlsld@l", VK_PPC_GOT_TLSLD16_LO) .Default(VK_Invalid); } diff --git a/contrib/llvm/lib/MC/MCMachOStreamer.cpp b/contrib/llvm/lib/MC/MCMachOStreamer.cpp index 7d08d0e..e08b01b 100644 --- a/contrib/llvm/lib/MC/MCMachOStreamer.cpp +++ b/contrib/llvm/lib/MC/MCMachOStreamer.cpp @@ -122,11 +122,11 @@ void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) { assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); // isSymbolLinkerVisible uses the section. - Symbol->setSection(*getCurrentSection()); + Symbol->setSection(*getCurrentSection().first); // We have to create a new fragment if this is an atom defining symbol, // fragments cannot span atoms. if (getAssembler().isSymbolLinkerVisible(*Symbol)) - new MCDataFragment(getCurrentSectionData()); + insert(new MCDataFragment()); MCObjectStreamer::EmitLabel(Symbol); diff --git a/contrib/llvm/lib/MC/MCNullStreamer.cpp b/contrib/llvm/lib/MC/MCNullStreamer.cpp index c872b22..659706a 100644 --- a/contrib/llvm/lib/MC/MCNullStreamer.cpp +++ b/contrib/llvm/lib/MC/MCNullStreamer.cpp @@ -30,13 +30,14 @@ namespace { virtual void InitSections() { } - virtual void ChangeSection(const MCSection *Section) { + virtual void ChangeSection(const MCSection *Section, + const MCExpr *Subsection) { } virtual void EmitLabel(MCSymbol *Symbol) { assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); - assert(getCurrentSection() && "Cannot emit before setting section!"); - Symbol->setSection(*getCurrentSection()); + assert(getCurrentSection().first &&"Cannot emit before setting section!"); + Symbol->setSection(*getCurrentSection().first); } virtual void EmitDebugLabel(MCSymbol *Symbol) { EmitLabel(Symbol); diff --git a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp index d19e79a..96b62f1 100644 --- a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp +++ b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp @@ -145,12 +145,16 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) { LSDASection = Ctx->getMachOSection("__TEXT", "__gcc_except_tab", 0, SectionKind::getReadOnlyWithRel()); - if (T.isMacOSX() && !T.isMacOSXVersionLT(10, 6)) + if (T.isMacOSX() && !T.isMacOSXVersionLT(10, 6)) { CompactUnwindSection = Ctx->getMachOSection("__LD", "__compact_unwind", MCSectionMachO::S_ATTR_DEBUG, SectionKind::getReadOnly()); + if (T.getArch() == Triple::x86_64 || T.getArch() == Triple::x86) + CompactUnwindDwarfEHFrameOnly = 0x04000000; + } + // Debug Information. DwarfAccelNamesSection = Ctx->getMachOSection("__DWARF", "__apple_names", @@ -291,6 +295,22 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8; TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8; + } else if (T.getArch() == Triple::systemz) { + // All currently-defined code models guarantee that 4-byte PC-relative + // values will be in range. + if (RelocM == Reloc::PIC_) { + PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_sdata4; + LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; + FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; + TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_sdata4; + } else { + PersonalityEncoding = dwarf::DW_EH_PE_absptr; + LSDAEncoding = dwarf::DW_EH_PE_absptr; + FDEEncoding = dwarf::DW_EH_PE_absptr; + TTypeEncoding = dwarf::DW_EH_PE_absptr; + } } // Solaris requires different flags for .eh_frame to seemingly every other @@ -629,6 +649,8 @@ void MCObjectFileInfo::InitMCObjectFileInfo(StringRef TT, Reloc::Model relocm, PersonalityEncoding = LSDAEncoding = FDEEncoding = FDECFIEncoding = TTypeEncoding = dwarf::DW_EH_PE_absptr; + CompactUnwindDwarfEHFrameOnly = 0; + EHFrameSection = 0; // Created on demand. CompactUnwindSection = 0; // Used only by selected targets. DwarfAccelNamesSection = 0; // Used only by selected targets. diff --git a/contrib/llvm/lib/MC/MCObjectStreamer.cpp b/contrib/llvm/lib/MC/MCObjectStreamer.cpp index 0d2ce83..d21ce8d 100644 --- a/contrib/llvm/lib/MC/MCObjectStreamer.cpp +++ b/contrib/llvm/lib/MC/MCObjectStreamer.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCObjectStreamer.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" @@ -45,14 +46,15 @@ void MCObjectStreamer::reset() { if (Assembler) Assembler->reset(); CurSectionData = 0; + CurInsertionPoint = MCSectionData::iterator(); MCStreamer::reset(); } MCFragment *MCObjectStreamer::getCurrentFragment() const { assert(getCurrentSectionData() && "No current section!"); - if (!getCurrentSectionData()->empty()) - return &getCurrentSectionData()->getFragmentList().back(); + if (CurInsertionPoint != getCurrentSectionData()->getFragmentList().begin()) + return prior(CurInsertionPoint); return 0; } @@ -61,8 +63,10 @@ MCDataFragment *MCObjectStreamer::getOrCreateDataFragment() const { MCDataFragment *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment()); // When bundling is enabled, we don't want to add data to a fragment that // already has instructions (see MCELFStreamer::EmitInstToData for details) - if (!F || (Assembler->isBundlingEnabled() && F->hasInstructions())) - F = new MCDataFragment(getCurrentSectionData()); + if (!F || (Assembler->isBundlingEnabled() && F->hasInstructions())) { + F = new MCDataFragment(); + insert(F); + } return F; } @@ -145,7 +149,7 @@ void MCObjectStreamer::EmitULEB128Value(const MCExpr *Value) { return; } Value = ForceExpAbs(Value); - new MCLEBFragment(*Value, false, getCurrentSectionData()); + insert(new MCLEBFragment(*Value, false)); } void MCObjectStreamer::EmitSLEB128Value(const MCExpr *Value) { @@ -155,7 +159,7 @@ void MCObjectStreamer::EmitSLEB128Value(const MCExpr *Value) { return; } Value = ForceExpAbs(Value); - new MCLEBFragment(*Value, true, getCurrentSectionData()); + insert(new MCLEBFragment(*Value, true)); } void MCObjectStreamer::EmitWeakReference(MCSymbol *Alias, @@ -163,10 +167,20 @@ void MCObjectStreamer::EmitWeakReference(MCSymbol *Alias, report_fatal_error("This file format doesn't support weak aliases."); } -void MCObjectStreamer::ChangeSection(const MCSection *Section) { +void MCObjectStreamer::ChangeSection(const MCSection *Section, + const MCExpr *Subsection) { assert(Section && "Cannot switch to a null section!"); CurSectionData = &getAssembler().getOrCreateSectionData(*Section); + + int64_t IntSubsection = 0; + if (Subsection && + !Subsection->EvaluateAsAbsolute(IntSubsection, getAssembler())) + report_fatal_error("Cannot evaluate subsection number"); + if (IntSubsection < 0 || IntSubsection > 8192) + report_fatal_error("Subsection number out of range"); + CurInsertionPoint = + CurSectionData->getSubsectionInsertionPoint(unsigned(IntSubsection)); } void MCObjectStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { @@ -185,7 +199,7 @@ void MCObjectStreamer::EmitInstruction(const MCInst &Inst) { // Now that a machine instruction has been assembled into this section, make // a line entry for any .loc directive that has been seen. - MCLineEntry::Make(this, getCurrentSection()); + MCLineEntry::Make(this, getCurrentSection().first); // If this instruction doesn't need relaxation, just emit it as data. MCAssembler &Assembler = getAssembler(); @@ -216,8 +230,8 @@ void MCObjectStreamer::EmitInstruction(const MCInst &Inst) { void MCObjectStreamer::EmitInstToFragment(const MCInst &Inst) { // Always create a new, separate fragment here, because its size can change // during relaxation. - MCRelaxableFragment *IF = - new MCRelaxableFragment(Inst, getCurrentSectionData()); + MCRelaxableFragment *IF = new MCRelaxableFragment(Inst); + insert(IF); SmallString<128> Code; raw_svector_ostream VecOS(Code); @@ -258,7 +272,7 @@ void MCObjectStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta, return; } AddrDelta = ForceExpAbs(AddrDelta); - new MCDwarfLineAddrFragment(LineDelta, *AddrDelta, getCurrentSectionData()); + insert(new MCDwarfLineAddrFragment(LineDelta, *AddrDelta)); } void MCObjectStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel, @@ -270,7 +284,7 @@ void MCObjectStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel, return; } AddrDelta = ForceExpAbs(AddrDelta); - new MCDwarfCallFrameFragment(*AddrDelta, getCurrentSectionData()); + insert(new MCDwarfCallFrameFragment(*AddrDelta)); } void MCObjectStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) { @@ -284,8 +298,7 @@ void MCObjectStreamer::EmitValueToAlignment(unsigned ByteAlignment, unsigned MaxBytesToEmit) { if (MaxBytesToEmit == 0) MaxBytesToEmit = ByteAlignment; - new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit, - getCurrentSectionData()); + insert(new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit)); // Update the maximum alignment on the current section if necessary. if (ByteAlignment > getCurrentSectionData()->getAlignment()) @@ -302,7 +315,7 @@ bool MCObjectStreamer::EmitValueToOffset(const MCExpr *Offset, unsigned char Value) { int64_t Res; if (Offset->EvaluateAsAbsolute(Res, getAssembler())) { - new MCOrgFragment(*Offset, Value, getCurrentSectionData()); + insert(new MCOrgFragment(*Offset, Value)); return false; } diff --git a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp index 804734c..edefdb4 100644 --- a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp @@ -201,9 +201,9 @@ public: } virtual bool Warning(SMLoc L, const Twine &Msg, - ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()); + ArrayRef<SMRange> Ranges = None); virtual bool Error(SMLoc L, const Twine &Msg, - ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()); + ArrayRef<SMRange> Ranges = None); virtual const AsmToken &Lex(); @@ -221,6 +221,7 @@ public: bool parseExpression(const MCExpr *&Res); virtual bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc); + virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc); virtual bool parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc); virtual bool parseAbsoluteExpression(int64_t &Res); @@ -285,7 +286,7 @@ private: void PrintMacroInstantiations(); void PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg, - ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) const { + ArrayRef<SMRange> Ranges = None) const { SrcMgr.PrintMessage(Loc, Kind, Msg, Ranges); } static void DiagHandler(const SMDiagnostic &Diag, void *Context); @@ -601,7 +602,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { // If we are generating dwarf for assembly source files save the initial text // section and generate a .file directive. if (getContext().getGenDwarfForAssembly()) { - getContext().setGenDwarfSection(getStreamer().getCurrentSection()); + getContext().setGenDwarfSection(getStreamer().getCurrentSection().first); MCSymbol *SectionStartSym = getContext().CreateTempSymbol(); getStreamer().EmitLabel(SectionStartSym); getContext().setGenDwarfSectionStartSym(SectionStartSym); @@ -666,7 +667,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { } void AsmParser::checkForValidSection() { - if (!ParsingInlineAsm && !getStreamer().getCurrentSection()) { + if (!ParsingInlineAsm && !getStreamer().getCurrentSection().first) { TokError("expected section directive before assembly directive"); Out.InitToTextSection(); } @@ -869,6 +870,10 @@ bool AsmParser::parseExpression(const MCExpr *&Res) { return parseExpression(Res, EndLoc); } +bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { + return ParsePrimaryExpr(Res, EndLoc); +} + const MCExpr * AsmParser::ApplyModifierToExpr(const MCExpr *E, MCSymbolRefExpr::VariantKind Variant) { @@ -1087,7 +1092,7 @@ bool AsmParser::ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res, MCBinaryExpr::Opcode Dummy; unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind(), Dummy); if (TokPrec < NextTokPrec) { - if (ParseBinOpRHS(Precedence+1, RHS, EndLoc)) return true; + if (ParseBinOpRHS(TokPrec+1, RHS, EndLoc)) return true; } // Merge LHS and RHS according to operator. @@ -1488,7 +1493,8 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { // section is the initial text section then generate a .loc directive for // the instruction. if (!HadError && getContext().getGenDwarfForAssembly() && - getContext().getGenDwarfSection() == getStreamer().getCurrentSection()) { + getContext().getGenDwarfSection() == + getStreamer().getCurrentSection().first) { unsigned Line = SrcMgr.FindLineNumber(IDLoc, CurBuffer); @@ -1978,7 +1984,6 @@ static bool IsUsedIn(const MCSymbol *Sym, const MCExpr *Value) { case MCExpr::Binary: { const MCBinaryExpr *BE = static_cast<const MCBinaryExpr*>(Value); return IsUsedIn(Sym, BE->getLHS()) || IsUsedIn(Sym, BE->getRHS()); - break; } case MCExpr::Target: case MCExpr::Constant: @@ -2479,7 +2484,7 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) { // Check whether we should use optimal code alignment for this .align // directive. - bool UseCodeAlign = getStreamer().getCurrentSection()->UseCodeAlign(); + bool UseCodeAlign = getStreamer().getCurrentSection().first->UseCodeAlign(); if ((!HasFillExpr || Lexer.getMAI().getTextAlignFillValue() == FillExpr) && ValueSize == 1 && UseCodeAlign) { getStreamer().EmitCodeAlignment(Alignment, MaxBytesToFill); @@ -2631,12 +2636,10 @@ bool AsmParser::ParseDirectiveLoc() { Flags |= DWARF2_FLAG_IS_STMT; else return Error(Loc, "is_stmt value not 0 or 1"); - } - else { + } else { return Error(Loc, "is_stmt value not the constant value of 0 or 1"); } - } - else if (Name == "isa") { + } else if (Name == "isa") { Loc = getTok().getLoc(); const MCExpr *Value; if (parseExpression(Value)) @@ -2647,16 +2650,13 @@ bool AsmParser::ParseDirectiveLoc() { if (Value < 0) return Error(Loc, "isa number less than zero"); Isa = Value; - } - else { + } else { return Error(Loc, "isa number not a constant value"); } - } - else if (Name == "discriminator") { + } else if (Name == "discriminator") { if (parseAbsoluteExpression(Discriminator)) return true; - } - else { + } else { return Error(Loc, "unknown sub-directive in '.loc' directive"); } @@ -3615,18 +3615,17 @@ bool AsmParser::ParseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) { bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) { if (TheCondState.TheCond != AsmCond::IfCond && TheCondState.TheCond != AsmCond::ElseIfCond) - Error(DirectiveLoc, "Encountered a .elseif that doesn't follow a .if or " - " an .elseif"); + Error(DirectiveLoc, "Encountered a .elseif that doesn't follow a .if or " + " an .elseif"); TheCondState.TheCond = AsmCond::ElseIfCond; bool LastIgnoreState = false; if (!TheCondStack.empty()) - LastIgnoreState = TheCondStack.back().Ignore; + LastIgnoreState = TheCondStack.back().Ignore; if (LastIgnoreState || TheCondState.CondMet) { TheCondState.Ignore = true; eatToEndOfStatement(); - } - else { + } else { int64_t ExprValue; if (parseAbsoluteExpression(ExprValue)) return true; @@ -3652,8 +3651,8 @@ bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) { if (TheCondState.TheCond != AsmCond::IfCond && TheCondState.TheCond != AsmCond::ElseIfCond) - Error(DirectiveLoc, "Encountered a .else that doesn't follow a .if or an " - ".elseif"); + Error(DirectiveLoc, "Encountered a .else that doesn't follow a .if or an " + ".elseif"); TheCondState.TheCond = AsmCond::ElseCond; bool LastIgnoreState = false; if (!TheCondStack.empty()) @@ -4046,19 +4045,17 @@ static int RewritesSort(const void *A, const void *B) { if (AsmRewriteB->Loc.getPointer() < AsmRewriteA->Loc.getPointer()) return 1; - // It's possible to have a SizeDirective rewrite and an Input/Output rewrite - // to the same location. Make sure the SizeDirective rewrite is performed - // first. This also ensure the sort algorithm is stable. - if (AsmRewriteA->Kind == AOK_SizeDirective) { - assert ((AsmRewriteB->Kind == AOK_Input || AsmRewriteB->Kind == AOK_Output) && - "Expected an Input/Output rewrite!"); + // It's possible to have a SizeDirective, Imm/ImmPrefix and an Input/Output + // rewrite to the same location. Make sure the SizeDirective rewrite is + // performed first, then the Imm/ImmPrefix and finally the Input/Output. This + // ensures the sort algorithm is stable. + if (AsmRewritePrecedence [AsmRewriteA->Kind] > + AsmRewritePrecedence [AsmRewriteB->Kind]) return -1; - } - if (AsmRewriteB->Kind == AOK_SizeDirective) { - assert ((AsmRewriteA->Kind == AOK_Input || AsmRewriteA->Kind == AOK_Output) && - "Expected an Input/Output rewrite!"); + + if (AsmRewritePrecedence [AsmRewriteA->Kind] < + AsmRewritePrecedence [AsmRewriteB->Kind]) return 1; - } llvm_unreachable ("Unstable rewrite sort."); } @@ -4118,28 +4115,27 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString, } // Expr/Input or Output. - bool IsVarDecl; - unsigned Length, Size, Type; - void *OpDecl = SI.LookupInlineAsmIdentifier(Operand->getName(), AsmLoc, - Length, Size, Type, - IsVarDecl); + StringRef SymName = Operand->getSymName(); + if (SymName.empty()) + continue; + + void *OpDecl = Operand->getOpDecl(); if (!OpDecl) continue; bool isOutput = (i == 1) && Desc.mayStore(); + SMLoc Start = SMLoc::getFromPointer(SymName.data()); if (isOutput) { ++InputIdx; OutputDecls.push_back(OpDecl); OutputDeclsAddressOf.push_back(Operand->needAddressOf()); OutputConstraints.push_back('=' + Operand->getConstraint().str()); - AsmStrRewrites.push_back(AsmRewrite(AOK_Output, Operand->getStartLoc(), - Operand->getNameLen())); + AsmStrRewrites.push_back(AsmRewrite(AOK_Output, Start, SymName.size())); } else { InputDecls.push_back(OpDecl); InputDeclsAddressOf.push_back(Operand->needAddressOf()); InputConstraints.push_back(Operand->getConstraint().str()); - AsmStrRewrites.push_back(AsmRewrite(AOK_Input, Operand->getStartLoc(), - Operand->getNameLen())); + AsmStrRewrites.push_back(AsmRewrite(AOK_Input, Start, SymName.size())); } } } @@ -4182,20 +4178,17 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString, for (SmallVectorImpl<AsmRewrite>::iterator I = AsmStrRewrites.begin(), E = AsmStrRewrites.end(); I != E; ++I) { + AsmRewriteKind Kind = (*I).Kind; + if (Kind == AOK_Delete) + continue; + const char *Loc = (*I).Loc.getPointer(); assert(Loc >= AsmStart && "Expected Loc to be at or after Start!"); - unsigned AdditionalSkip = 0; - AsmRewriteKind Kind = (*I).Kind; - // Emit everything up to the immediate/expression. unsigned Len = Loc - AsmStart; - if (Len) { - // For Input/Output operands we need to remove the brackets, if present. - if ((Kind == AOK_Input || Kind == AOK_Output) && Loc[-1] == '[') - --Len; + if (Len) OS << StringRef(AsmStart, Len); - } // Skip the original expression. if (Kind == AOK_Skip) { @@ -4203,6 +4196,7 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString, continue; } + unsigned AdditionalSkip = 0; // Rewrite expressions in $N notation. switch (Kind) { default: break; @@ -4249,11 +4243,6 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString, // Skip the original expression. AsmStart = Loc + (*I).Len + AdditionalSkip; - - // For Input/Output operands we need to remove the brackets, if present. - if ((Kind == AOK_Input || Kind == AOK_Output) && AsmStart != AsmEnd && - *AsmStart == ']') - ++AsmStart; } // Emit the remainder of the asm string. diff --git a/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp index 6d6409f..7eb8b74 100644 --- a/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp +++ b/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp @@ -566,10 +566,10 @@ bool DarwinAsmParser::ParseDirectivePopSection(StringRef, SMLoc) { /// ParseDirectivePrevious: /// ::= .previous bool DarwinAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) { - const MCSection *PreviousSection = getStreamer().getPreviousSection(); - if (PreviousSection == NULL) + MCSectionSubPair PreviousSection = getStreamer().getPreviousSection(); + if (PreviousSection.first == NULL) return TokError(".previous without corresponding .section"); - getStreamer().SwitchSection(PreviousSection); + getStreamer().SwitchSection(PreviousSection.first, PreviousSection.second); return false; } diff --git a/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp index 4c45e08..3134fc3 100644 --- a/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp +++ b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp @@ -76,6 +76,7 @@ public: &ELFAsmParser::ParseDirectiveSymbolAttribute>(".internal"); addDirectiveHandler< &ELFAsmParser::ParseDirectiveSymbolAttribute>(".hidden"); + addDirectiveHandler<&ELFAsmParser::ParseDirectiveSubsection>(".subsection"); } // FIXME: Part of this logic is duplicated in the MCELFStreamer. What is @@ -147,9 +148,11 @@ public: bool ParseDirectiveVersion(StringRef, SMLoc); bool ParseDirectiveWeakref(StringRef, SMLoc); bool ParseDirectiveSymbolAttribute(StringRef, SMLoc); + bool ParseDirectiveSubsection(StringRef, SMLoc); private: bool ParseSectionName(StringRef &SectionName); + bool ParseSectionArguments(bool IsPush); }; } @@ -191,12 +194,15 @@ bool ELFAsmParser::ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) { bool ELFAsmParser::ParseSectionSwitch(StringRef Section, unsigned Type, unsigned Flags, SectionKind Kind) { - if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in section switching directive"); - Lex(); + const MCExpr *Subsection = 0; + if (getLexer().isNot(AsmToken::EndOfStatement)) { + if (getParser().parseExpression(Subsection)) + return true; + } getStreamer().SwitchSection(getContext().getELFSection( - Section, Type, Flags, Kind)); + Section, Type, Flags, Kind), + Subsection); return false; } @@ -316,7 +322,7 @@ static int parseSectionFlags(StringRef flagsStr) { bool ELFAsmParser::ParseDirectivePushSection(StringRef s, SMLoc loc) { getStreamer().PushSection(); - if (ParseDirectiveSection(s, loc)) { + if (ParseSectionArguments(/*IsPush=*/true)) { getStreamer().PopSection(); return true; } @@ -332,6 +338,10 @@ bool ELFAsmParser::ParseDirectivePopSection(StringRef, SMLoc) { // FIXME: This is a work in progress. bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) { + return ParseSectionArguments(/*IsPush=*/false); +} + +bool ELFAsmParser::ParseSectionArguments(bool IsPush) { StringRef SectionName; if (ParseSectionName(SectionName)) @@ -341,6 +351,7 @@ bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) { int64_t Size = 0; StringRef GroupName; unsigned Flags = 0; + const MCExpr *Subsection = 0; // Set the defaults first. if (SectionName == ".fini" || SectionName == ".init" || @@ -352,6 +363,14 @@ bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) { if (getLexer().is(AsmToken::Comma)) { Lex(); + if (IsPush && getLexer().isNot(AsmToken::String)) { + if (getParser().parseExpression(Subsection)) + return true; + if (getLexer().isNot(AsmToken::Comma)) + goto EndStmt; + Lex(); + } + if (getLexer().isNot(AsmToken::String)) return TokError("expected string in directive"); @@ -408,6 +427,7 @@ bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) { } } +EndStmt: if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in directive"); @@ -444,15 +464,16 @@ bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) { SectionKind Kind = computeSectionKind(Flags); getStreamer().SwitchSection(getContext().getELFSection(SectionName, Type, Flags, Kind, Size, - GroupName)); + GroupName), + Subsection); return false; } bool ELFAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) { - const MCSection *PreviousSection = getStreamer().getPreviousSection(); - if (PreviousSection == NULL) + MCSectionSubPair PreviousSection = getStreamer().getPreviousSection(); + if (PreviousSection.first == NULL) return TokError(".previous without corresponding .section"); - getStreamer().SwitchSection(PreviousSection); + getStreamer().SwitchSection(PreviousSection.first, PreviousSection.second); return false; } @@ -613,6 +634,20 @@ bool ELFAsmParser::ParseDirectiveWeakref(StringRef, SMLoc) { return false; } +bool ELFAsmParser::ParseDirectiveSubsection(StringRef, SMLoc) { + const MCExpr *Subsection = 0; + if (getLexer().isNot(AsmToken::EndOfStatement)) { + if (getParser().parseExpression(Subsection)) + return true; + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in directive"); + + getStreamer().SubSection(Subsection); + return false; +} + namespace llvm { MCAsmParserExtension *createELFAsmParser() { diff --git a/contrib/llvm/lib/MC/MCPureStreamer.cpp b/contrib/llvm/lib/MC/MCPureStreamer.cpp index 0e04c55..8ae724f 100644 --- a/contrib/llvm/lib/MC/MCPureStreamer.cpp +++ b/contrib/llvm/lib/MC/MCPureStreamer.cpp @@ -12,9 +12,8 @@ #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCObjectStreamer.h" -// FIXME: Remove this. -#include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ErrorHandling.h" @@ -113,25 +112,22 @@ void MCPureStreamer::InitSections() { } void MCPureStreamer::InitToTextSection() { - // FIMXE: To what!? - SwitchSection(getContext().getMachOSection("__TEXT", "__text", - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, - 0, SectionKind::getText())); + SwitchSection(getContext().getObjectFileInfo()->getTextSection()); } void MCPureStreamer::EmitLabel(MCSymbol *Symbol) { assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); - assert(getCurrentSection() && "Cannot emit before setting section!"); + assert(getCurrentSection().first && "Cannot emit before setting section!"); - Symbol->setSection(*getCurrentSection()); + Symbol->setSection(*getCurrentSection().first); MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); // We have to create a new fragment if this is an atom defining symbol, // fragments cannot span atoms. if (getAssembler().isSymbolLinkerVisible(SD.getSymbol())) - new MCDataFragment(getCurrentSectionData()); + insert(new MCDataFragment()); // FIXME: This is wasteful, we don't necessarily need to create a data // fragment. Instead, we should mark the symbol as pointing into the data @@ -166,8 +162,7 @@ void MCPureStreamer::EmitValueToAlignment(unsigned ByteAlignment, // MCObjectStreamer. if (MaxBytesToEmit == 0) MaxBytesToEmit = ByteAlignment; - new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit, - getCurrentSectionData()); + insert(new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit)); // Update the maximum alignment on the current section if necessary. if (ByteAlignment > getCurrentSectionData()->getAlignment()) @@ -180,8 +175,8 @@ void MCPureStreamer::EmitCodeAlignment(unsigned ByteAlignment, // MCObjectStreamer. if (MaxBytesToEmit == 0) MaxBytesToEmit = ByteAlignment; - MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit, - getCurrentSectionData()); + MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit); + insert(F); F->setEmitNops(true); // Update the maximum alignment on the current section if necessary. @@ -191,13 +186,13 @@ void MCPureStreamer::EmitCodeAlignment(unsigned ByteAlignment, bool MCPureStreamer::EmitValueToOffset(const MCExpr *Offset, unsigned char Value) { - new MCOrgFragment(*Offset, Value, getCurrentSectionData()); + insert(new MCOrgFragment(*Offset, Value)); return false; } void MCPureStreamer::EmitInstToFragment(const MCInst &Inst) { - MCRelaxableFragment *IF = - new MCRelaxableFragment(Inst, getCurrentSectionData()); + MCRelaxableFragment *IF = new MCRelaxableFragment(Inst); + insert(IF); // Add the fixups and data. // diff --git a/contrib/llvm/lib/MC/MCSectionCOFF.cpp b/contrib/llvm/lib/MC/MCSectionCOFF.cpp index aac9377..6cedf06 100644 --- a/contrib/llvm/lib/MC/MCSectionCOFF.cpp +++ b/contrib/llvm/lib/MC/MCSectionCOFF.cpp @@ -29,7 +29,8 @@ bool MCSectionCOFF::ShouldOmitSectionDirective(StringRef Name, } void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, - raw_ostream &OS) const { + raw_ostream &OS, + const MCExpr *Subsection) const { // standard sections don't require the '.section' if (ShouldOmitSectionDirective(SectionName, MAI)) { diff --git a/contrib/llvm/lib/MC/MCSectionELF.cpp b/contrib/llvm/lib/MC/MCSectionELF.cpp index 0775cfa..bf1a984 100644 --- a/contrib/llvm/lib/MC/MCSectionELF.cpp +++ b/contrib/llvm/lib/MC/MCSectionELF.cpp @@ -10,6 +10,7 @@ #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ELF.h" #include "llvm/Support/raw_ostream.h" @@ -32,10 +33,14 @@ bool MCSectionELF::ShouldOmitSectionDirective(StringRef Name, } void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, - raw_ostream &OS) const { + raw_ostream &OS, + const MCExpr *Subsection) const { if (ShouldOmitSectionDirective(SectionName, MAI)) { - OS << '\t' << getSectionName() << '\n'; + OS << '\t' << getSectionName(); + if (Subsection) + OS << '\t' << *Subsection; + OS << '\n'; return; } @@ -129,6 +134,9 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, if (Flags & ELF::SHF_GROUP) OS << "," << Group->getName() << ",comdat"; OS << '\n'; + + if (Subsection) + OS << "\t.subsection\t" << *Subsection << '\n'; } bool MCSectionELF::UseCodeAlign() const { diff --git a/contrib/llvm/lib/MC/MCSectionMachO.cpp b/contrib/llvm/lib/MC/MCSectionMachO.cpp index fc32315..8704513 100644 --- a/contrib/llvm/lib/MC/MCSectionMachO.cpp +++ b/contrib/llvm/lib/MC/MCSectionMachO.cpp @@ -91,7 +91,8 @@ MCSectionMachO::MCSectionMachO(StringRef Segment, StringRef Section, } void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI, - raw_ostream &OS) const { + raw_ostream &OS, + const MCExpr *Subsection) const { OS << "\t.section\t" << getSegmentName() << ',' << getSectionName(); // Get the section type and attributes. diff --git a/contrib/llvm/lib/MC/MCStreamer.cpp b/contrib/llvm/lib/MC/MCStreamer.cpp index d02e553..8f1895e 100644 --- a/contrib/llvm/lib/MC/MCStreamer.cpp +++ b/contrib/llvm/lib/MC/MCStreamer.cpp @@ -24,8 +24,7 @@ using namespace llvm; MCStreamer::MCStreamer(StreamerKind Kind, MCContext &Ctx) : Kind(Kind), Context(Ctx), EmitEHFrame(true), EmitDebugFrame(false), CurrentW64UnwindInfo(0), LastSymbol(0), AutoInitSections(false) { - const MCSection *section = 0; - SectionStack.push_back(std::make_pair(section, section)); + SectionStack.push_back(std::pair<MCSectionSubPair, MCSectionSubPair>()); } MCStreamer::~MCStreamer() { @@ -36,13 +35,13 @@ MCStreamer::~MCStreamer() { void MCStreamer::reset() { for (unsigned i = 0; i < getNumW64UnwindInfos(); ++i) delete W64UnwindInfos[i]; + W64UnwindInfos.clear(); EmitEHFrame = true; EmitDebugFrame = false; CurrentW64UnwindInfo = 0; LastSymbol = 0; - const MCSection *section = 0; SectionStack.clear(); - SectionStack.push_back(std::make_pair(section, section)); + SectionStack.push_back(std::pair<MCSectionSubPair, MCSectionSubPair>()); } const MCExpr *MCStreamer::BuildSymbolDiff(MCContext &Context, @@ -188,15 +187,15 @@ void MCStreamer::EmitEHSymAttributes(const MCSymbol *Symbol, void MCStreamer::EmitLabel(MCSymbol *Symbol) { assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); - assert(getCurrentSection() && "Cannot emit before setting section!"); - Symbol->setSection(*getCurrentSection()); + assert(getCurrentSection().first && "Cannot emit before setting section!"); + Symbol->setSection(*getCurrentSection().first); LastSymbol = Symbol; } void MCStreamer::EmitDebugLabel(MCSymbol *Symbol) { assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); - assert(getCurrentSection() && "Cannot emit before setting section!"); - Symbol->setSection(*getCurrentSection()); + assert(getCurrentSection().first && "Cannot emit before setting section!"); + Symbol->setSection(*getCurrentSection().first); LastSymbol = Symbol; } diff --git a/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp index 6dffed7..518b59e 100644 --- a/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp +++ b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp @@ -147,8 +147,7 @@ public: object_t *createCOFFEntity(StringRef Name, list_t &List); void DefineSection(MCSectionData const &SectionData); - void DefineSymbol(MCSymbol const &Symbol, - MCSymbolData const &SymbolData, + void DefineSymbol(MCSymbolData const &SymbolData, MCAssembler &Assembler); void MakeSymbolReal(COFFSymbol &S, size_t Index); @@ -410,25 +409,23 @@ void WinCOFFObjectWriter::DefineSection(MCSectionData const &SectionData) { /// This function takes a section data object from the assembler /// and creates the associated COFF symbol staging object. -void WinCOFFObjectWriter::DefineSymbol(MCSymbol const &Symbol, - MCSymbolData const &SymbolData, +void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData, MCAssembler &Assembler) { + MCSymbol const &Symbol = SymbolData.getSymbol(); COFFSymbol *coff_symbol = GetOrCreateCOFFSymbol(&Symbol); - - coff_symbol->Data.Type = (SymbolData.getFlags() & 0x0000FFFF) >> 0; - coff_symbol->Data.StorageClass = (SymbolData.getFlags() & 0x00FF0000) >> 16; + SymbolMap[&Symbol] = coff_symbol; if (SymbolData.getFlags() & COFF::SF_WeakExternal) { coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL; if (Symbol.isVariable()) { - coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL; + const MCSymbolRefExpr *SymRef = + dyn_cast<MCSymbolRefExpr>(Symbol.getVariableValue()); - // FIXME: This assert message isn't very good. - assert(Symbol.getVariableValue()->getKind() == MCExpr::SymbolRef && - "Value must be a SymbolRef!"); + if (!SymRef) + report_fatal_error("Weak externals may only alias symbols"); - coff_symbol->Other = GetOrCreateCOFFSymbol(&Symbol); + coff_symbol->Other = GetOrCreateCOFFSymbol(&SymRef->getSymbol()); } else { std::string WeakName = std::string(".weak.") + Symbol.getName().str() @@ -448,23 +445,29 @@ void WinCOFFObjectWriter::DefineSymbol(MCSymbol const &Symbol, coff_symbol->Aux[0].Aux.WeakExternal.TagIndex = 0; coff_symbol->Aux[0].Aux.WeakExternal.Characteristics = COFF::IMAGE_WEAK_EXTERN_SEARCH_LIBRARY; - } - // If no storage class was specified in the streamer, define it here. - if (coff_symbol->Data.StorageClass == 0) { - bool external = SymbolData.isExternal() || (SymbolData.Fragment == NULL); + coff_symbol->MCData = &SymbolData; + } else { + const MCSymbolData &ResSymData = + Assembler.getSymbolData(Symbol.AliasedSymbol()); - coff_symbol->Data.StorageClass = - external ? COFF::IMAGE_SYM_CLASS_EXTERNAL : COFF::IMAGE_SYM_CLASS_STATIC; - } + coff_symbol->Data.Type = (ResSymData.getFlags() & 0x0000FFFF) >> 0; + coff_symbol->Data.StorageClass = (ResSymData.getFlags() & 0x00FF0000) >> 16; - if (SymbolData.Fragment != NULL) - coff_symbol->Section = - SectionMap[&SymbolData.Fragment->getParent()->getSection()]; + // If no storage class was specified in the streamer, define it here. + if (coff_symbol->Data.StorageClass == 0) { + bool external = ResSymData.isExternal() || (ResSymData.Fragment == NULL); - // Bind internal COFF symbol to MC symbol. - coff_symbol->MCData = &SymbolData; - SymbolMap[&Symbol] = coff_symbol; + coff_symbol->Data.StorageClass = + external ? COFF::IMAGE_SYM_CLASS_EXTERNAL : COFF::IMAGE_SYM_CLASS_STATIC; + } + + if (ResSymData.Fragment != NULL) + coff_symbol->Section = + SectionMap[&ResSymData.Fragment->getParent()->getSection()]; + + coff_symbol->MCData = &ResSymData; + } } /// making a section real involves assigned it a number and putting @@ -620,9 +623,7 @@ void WinCOFFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm, for (MCAssembler::const_symbol_iterator i = Asm.symbol_begin(), e = Asm.symbol_end(); i != e; i++) { if (ExportSymbol(*i, Asm)) { - const MCSymbol &Alias = i->getSymbol(); - const MCSymbol &Symbol = Alias.AliasedSymbol(); - DefineSymbol(Alias, Asm.getSymbolData(Symbol), Asm); + DefineSymbol(*i, Asm); } } } @@ -689,13 +690,8 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm, ++Reloc.Symb->Relocations; Reloc.Data.VirtualAddress += Fixup.getOffset(); - - unsigned FixupKind = Fixup.getKind(); - - if (CrossSection) - FixupKind = FK_PCRel_4; - - Reloc.Data.Type = TargetObjectWriter->getRelocType(FixupKind); + Reloc.Data.Type = TargetObjectWriter->getRelocType(Target, Fixup, + CrossSection); // FIXME: Can anyone explain what this does other than adjust for the size // of the offset? diff --git a/contrib/llvm/lib/Object/COFFObjectFile.cpp b/contrib/llvm/lib/Object/COFFObjectFile.cpp index ca90e0e..70fec32 100644 --- a/contrib/llvm/lib/Object/COFFObjectFile.cpp +++ b/contrib/llvm/lib/Object/COFFObjectFile.cpp @@ -429,7 +429,7 @@ relocation_iterator COFFObjectFile::getSectionRelEnd(DataRefImpl Sec) const { } COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &ec) - : ObjectFile(Binary::ID_COFF, Object, ec) + : ObjectFile(Binary::ID_COFF, Object) , Header(0) , SectionTable(0) , SymbolTable(0) @@ -705,8 +705,7 @@ error_code COFFObjectFile::getRelocationNext(DataRefImpl Rel, } error_code COFFObjectFile::getRelocationAddress(DataRefImpl Rel, uint64_t &Res) const { - Res = toRel(Rel)->VirtualAddress; - return object_error::success; + report_fatal_error("getRelocationAddress not implemented in COFFObjectFile"); } error_code COFFObjectFile::getRelocationOffset(DataRefImpl Rel, uint64_t &Res) const { diff --git a/contrib/llvm/lib/Object/MachOObject.cpp b/contrib/llvm/lib/Object/MachOObject.cpp deleted file mode 100644 index c9c341a..0000000 --- a/contrib/llvm/lib/Object/MachOObject.cpp +++ /dev/null @@ -1,422 +0,0 @@ -//===- MachOObject.cpp - Mach-O Object File Wrapper -----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Object/MachOObject.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/DataExtractor.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Host.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/SwapByteOrder.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; -using namespace llvm::object; - -/* Translation Utilities */ - -template<typename T> -static void SwapValue(T &Value) { - Value = sys::SwapByteOrder(Value); -} - -template<typename T> -static void SwapStruct(T &Value); - -template<typename T> -static void ReadInMemoryStruct(const MachOObject &MOO, - StringRef Buffer, uint64_t Base, - InMemoryStruct<T> &Res) { - typedef T struct_type; - uint64_t Size = sizeof(struct_type); - - // Check that the buffer contains the expected data. - if (Base + Size > Buffer.size()) { - Res = 0; - return; - } - - // Check whether we can return a direct pointer. - struct_type *Ptr = reinterpret_cast<struct_type *>( - const_cast<char *>(Buffer.data() + Base)); - if (!MOO.isSwappedEndian()) { - Res = Ptr; - return; - } - - // Otherwise, copy the struct and translate the values. - Res = *Ptr; - SwapStruct(*Res); -} - -/* *** */ - -MachOObject::MachOObject(MemoryBuffer *Buffer_, bool IsLittleEndian_, - bool Is64Bit_) - : Buffer(Buffer_), IsLittleEndian(IsLittleEndian_), Is64Bit(Is64Bit_), - IsSwappedEndian(IsLittleEndian != sys::isLittleEndianHost()), - HasStringTable(false), LoadCommands(0), NumLoadedCommands(0) { - // Load the common header. - memcpy(&Header, Buffer->getBuffer().data(), sizeof(Header)); - if (IsSwappedEndian) { - SwapValue(Header.Magic); - SwapValue(Header.CPUType); - SwapValue(Header.CPUSubtype); - SwapValue(Header.FileType); - SwapValue(Header.NumLoadCommands); - SwapValue(Header.SizeOfLoadCommands); - SwapValue(Header.Flags); - } - - if (is64Bit()) { - memcpy(&Header64Ext, Buffer->getBuffer().data() + sizeof(Header), - sizeof(Header64Ext)); - if (IsSwappedEndian) { - SwapValue(Header64Ext.Reserved); - } - } - - // Create the load command array if sane. - if (getHeader().NumLoadCommands < (1 << 20)) - LoadCommands = new LoadCommandInfo[getHeader().NumLoadCommands]; -} - -MachOObject::~MachOObject() { - delete [] LoadCommands; -} - -MachOObject *MachOObject::LoadFromBuffer(MemoryBuffer *Buffer, - std::string *ErrorStr) { - // First, check the magic value and initialize the basic object info. - bool IsLittleEndian = false, Is64Bit = false; - StringRef Magic = Buffer->getBuffer().slice(0, 4); - if (Magic == "\xFE\xED\xFA\xCE") { - } else if (Magic == "\xCE\xFA\xED\xFE") { - IsLittleEndian = true; - } else if (Magic == "\xFE\xED\xFA\xCF") { - Is64Bit = true; - } else if (Magic == "\xCF\xFA\xED\xFE") { - IsLittleEndian = true; - Is64Bit = true; - } else { - if (ErrorStr) *ErrorStr = "not a Mach object file (invalid magic)"; - return 0; - } - - // Ensure that the at least the full header is present. - unsigned HeaderSize = Is64Bit ? macho::Header64Size : macho::Header32Size; - if (Buffer->getBufferSize() < HeaderSize) { - if (ErrorStr) *ErrorStr = "not a Mach object file (invalid header)"; - return 0; - } - - OwningPtr<MachOObject> Object(new MachOObject(Buffer, IsLittleEndian, - Is64Bit)); - - // Check for bogus number of load commands. - if (Object->getHeader().NumLoadCommands >= (1 << 20)) { - if (ErrorStr) *ErrorStr = "not a Mach object file (unreasonable header)"; - return 0; - } - - if (ErrorStr) *ErrorStr = ""; - return Object.take(); -} - -StringRef MachOObject::getData(size_t Offset, size_t Size) const { - return Buffer->getBuffer().substr(Offset,Size); -} - -void MachOObject::RegisterStringTable(macho::SymtabLoadCommand &SLC) { - HasStringTable = true; - StringTable = Buffer->getBuffer().substr(SLC.StringTableOffset, - SLC.StringTableSize); -} - -const MachOObject::LoadCommandInfo & -MachOObject::getLoadCommandInfo(unsigned Index) const { - assert(Index < getHeader().NumLoadCommands && "Invalid index!"); - - // Load the command, if necessary. - if (Index >= NumLoadedCommands) { - uint64_t Offset; - if (Index == 0) { - Offset = getHeaderSize(); - } else { - const LoadCommandInfo &Prev = getLoadCommandInfo(Index - 1); - Offset = Prev.Offset + Prev.Command.Size; - } - - LoadCommandInfo &Info = LoadCommands[Index]; - memcpy(&Info.Command, Buffer->getBuffer().data() + Offset, - sizeof(macho::LoadCommand)); - if (IsSwappedEndian) { - SwapValue(Info.Command.Type); - SwapValue(Info.Command.Size); - } - Info.Offset = Offset; - NumLoadedCommands = Index + 1; - } - - return LoadCommands[Index]; -} - -template<> -void SwapStruct(macho::SegmentLoadCommand &Value) { - SwapValue(Value.Type); - SwapValue(Value.Size); - SwapValue(Value.VMAddress); - SwapValue(Value.VMSize); - SwapValue(Value.FileOffset); - SwapValue(Value.FileSize); - SwapValue(Value.MaxVMProtection); - SwapValue(Value.InitialVMProtection); - SwapValue(Value.NumSections); - SwapValue(Value.Flags); -} -void MachOObject::ReadSegmentLoadCommand(const LoadCommandInfo &LCI, - InMemoryStruct<macho::SegmentLoadCommand> &Res) const { - ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res); -} - -template<> -void SwapStruct(macho::Segment64LoadCommand &Value) { - SwapValue(Value.Type); - SwapValue(Value.Size); - SwapValue(Value.VMAddress); - SwapValue(Value.VMSize); - SwapValue(Value.FileOffset); - SwapValue(Value.FileSize); - SwapValue(Value.MaxVMProtection); - SwapValue(Value.InitialVMProtection); - SwapValue(Value.NumSections); - SwapValue(Value.Flags); -} -void MachOObject::ReadSegment64LoadCommand(const LoadCommandInfo &LCI, - InMemoryStruct<macho::Segment64LoadCommand> &Res) const { - ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res); -} - -template<> -void SwapStruct(macho::SymtabLoadCommand &Value) { - SwapValue(Value.Type); - SwapValue(Value.Size); - SwapValue(Value.SymbolTableOffset); - SwapValue(Value.NumSymbolTableEntries); - SwapValue(Value.StringTableOffset); - SwapValue(Value.StringTableSize); -} -void MachOObject::ReadSymtabLoadCommand(const LoadCommandInfo &LCI, - InMemoryStruct<macho::SymtabLoadCommand> &Res) const { - ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res); -} - -template<> -void SwapStruct(macho::DysymtabLoadCommand &Value) { - SwapValue(Value.Type); - SwapValue(Value.Size); - SwapValue(Value.LocalSymbolsIndex); - SwapValue(Value.NumLocalSymbols); - SwapValue(Value.ExternalSymbolsIndex); - SwapValue(Value.NumExternalSymbols); - SwapValue(Value.UndefinedSymbolsIndex); - SwapValue(Value.NumUndefinedSymbols); - SwapValue(Value.TOCOffset); - SwapValue(Value.NumTOCEntries); - SwapValue(Value.ModuleTableOffset); - SwapValue(Value.NumModuleTableEntries); - SwapValue(Value.ReferenceSymbolTableOffset); - SwapValue(Value.NumReferencedSymbolTableEntries); - SwapValue(Value.IndirectSymbolTableOffset); - SwapValue(Value.NumIndirectSymbolTableEntries); - SwapValue(Value.ExternalRelocationTableOffset); - SwapValue(Value.NumExternalRelocationTableEntries); - SwapValue(Value.LocalRelocationTableOffset); - SwapValue(Value.NumLocalRelocationTableEntries); -} -void MachOObject::ReadDysymtabLoadCommand(const LoadCommandInfo &LCI, - InMemoryStruct<macho::DysymtabLoadCommand> &Res) const { - ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res); -} - -template<> -void SwapStruct(macho::LinkeditDataLoadCommand &Value) { - SwapValue(Value.Type); - SwapValue(Value.Size); - SwapValue(Value.DataOffset); - SwapValue(Value.DataSize); -} -void MachOObject::ReadLinkeditDataLoadCommand(const LoadCommandInfo &LCI, - InMemoryStruct<macho::LinkeditDataLoadCommand> &Res) const { - ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res); -} - -template<> -void SwapStruct(macho::LinkerOptionsLoadCommand &Value) { - SwapValue(Value.Type); - SwapValue(Value.Size); - SwapValue(Value.Count); -} -void MachOObject::ReadLinkerOptionsLoadCommand(const LoadCommandInfo &LCI, - InMemoryStruct<macho::LinkerOptionsLoadCommand> &Res) const { - ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res); -} - -template<> -void SwapStruct(macho::IndirectSymbolTableEntry &Value) { - SwapValue(Value.Index); -} -void -MachOObject::ReadIndirectSymbolTableEntry(const macho::DysymtabLoadCommand &DLC, - unsigned Index, - InMemoryStruct<macho::IndirectSymbolTableEntry> &Res) const { - uint64_t Offset = (DLC.IndirectSymbolTableOffset + - Index * sizeof(macho::IndirectSymbolTableEntry)); - ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res); -} - - -template<> -void SwapStruct(macho::Section &Value) { - SwapValue(Value.Address); - SwapValue(Value.Size); - SwapValue(Value.Offset); - SwapValue(Value.Align); - SwapValue(Value.RelocationTableOffset); - SwapValue(Value.NumRelocationTableEntries); - SwapValue(Value.Flags); - SwapValue(Value.Reserved1); - SwapValue(Value.Reserved2); -} -void MachOObject::ReadSection(const LoadCommandInfo &LCI, - unsigned Index, - InMemoryStruct<macho::Section> &Res) const { - assert(LCI.Command.Type == macho::LCT_Segment && - "Unexpected load command info!"); - uint64_t Offset = (LCI.Offset + sizeof(macho::SegmentLoadCommand) + - Index * sizeof(macho::Section)); - ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res); -} - -template<> -void SwapStruct(macho::Section64 &Value) { - SwapValue(Value.Address); - SwapValue(Value.Size); - SwapValue(Value.Offset); - SwapValue(Value.Align); - SwapValue(Value.RelocationTableOffset); - SwapValue(Value.NumRelocationTableEntries); - SwapValue(Value.Flags); - SwapValue(Value.Reserved1); - SwapValue(Value.Reserved2); - SwapValue(Value.Reserved3); -} -void MachOObject::ReadSection64(const LoadCommandInfo &LCI, - unsigned Index, - InMemoryStruct<macho::Section64> &Res) const { - assert(LCI.Command.Type == macho::LCT_Segment64 && - "Unexpected load command info!"); - uint64_t Offset = (LCI.Offset + sizeof(macho::Segment64LoadCommand) + - Index * sizeof(macho::Section64)); - ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res); -} - -template<> -void SwapStruct(macho::RelocationEntry &Value) { - SwapValue(Value.Word0); - SwapValue(Value.Word1); -} -void MachOObject::ReadRelocationEntry(uint64_t RelocationTableOffset, - unsigned Index, - InMemoryStruct<macho::RelocationEntry> &Res) const { - uint64_t Offset = (RelocationTableOffset + - Index * sizeof(macho::RelocationEntry)); - ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res); -} - -template<> -void SwapStruct(macho::SymbolTableEntry &Value) { - SwapValue(Value.StringIndex); - SwapValue(Value.Flags); - SwapValue(Value.Value); -} -void MachOObject::ReadSymbolTableEntry(uint64_t SymbolTableOffset, - unsigned Index, - InMemoryStruct<macho::SymbolTableEntry> &Res) const { - uint64_t Offset = (SymbolTableOffset + - Index * sizeof(macho::SymbolTableEntry)); - ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res); -} - -template<> -void SwapStruct(macho::Symbol64TableEntry &Value) { - SwapValue(Value.StringIndex); - SwapValue(Value.Flags); - SwapValue(Value.Value); -} -void MachOObject::ReadSymbol64TableEntry(uint64_t SymbolTableOffset, - unsigned Index, - InMemoryStruct<macho::Symbol64TableEntry> &Res) const { - uint64_t Offset = (SymbolTableOffset + - Index * sizeof(macho::Symbol64TableEntry)); - ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res); -} - -template<> -void SwapStruct(macho::DataInCodeTableEntry &Value) { - SwapValue(Value.Offset); - SwapValue(Value.Length); - SwapValue(Value.Kind); -} -void MachOObject::ReadDataInCodeTableEntry(uint64_t TableOffset, - unsigned Index, - InMemoryStruct<macho::DataInCodeTableEntry> &Res) const { - uint64_t Offset = (TableOffset + - Index * sizeof(macho::DataInCodeTableEntry)); - ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res); -} - -void MachOObject::ReadULEB128s(uint64_t Index, - SmallVectorImpl<uint64_t> &Out) const { - DataExtractor extractor(Buffer->getBuffer(), true, 0); - - uint32_t offset = Index; - uint64_t data = 0; - while (uint64_t delta = extractor.getULEB128(&offset)) { - data += delta; - Out.push_back(data); - } -} - -/* ** */ -// Object Dumping Facilities -void MachOObject::dump() const { print(dbgs()); dbgs() << '\n'; } -void MachOObject::dumpHeader() const { printHeader(dbgs()); dbgs() << '\n'; } - -void MachOObject::printHeader(raw_ostream &O) const { - O << "('cputype', " << Header.CPUType << ")\n"; - O << "('cpusubtype', " << Header.CPUSubtype << ")\n"; - O << "('filetype', " << Header.FileType << ")\n"; - O << "('num_load_commands', " << Header.NumLoadCommands << ")\n"; - O << "('load_commands_size', " << Header.SizeOfLoadCommands << ")\n"; - O << "('flag', " << Header.Flags << ")\n"; - - // Print extended header if 64-bit. - if (is64Bit()) - O << "('reserved', " << Header64Ext.Reserved << ")\n"; -} - -void MachOObject::print(raw_ostream &O) const { - O << "Header:\n"; - printHeader(O); - O << "Load Commands:\n"; - - O << "Buffer:\n"; -} diff --git a/contrib/llvm/lib/Object/MachOObjectFile.cpp b/contrib/llvm/lib/Object/MachOObjectFile.cpp index 6501df9..dfd8d3d 100644 --- a/contrib/llvm/lib/Object/MachOObjectFile.cpp +++ b/contrib/llvm/lib/Object/MachOObjectFile.cpp @@ -15,7 +15,9 @@ #include "llvm/Object/MachO.h" #include "llvm/ADT/Triple.h" #include "llvm/Object/MachOFormat.h" +#include "llvm/Support/DataExtractor.h" #include "llvm/Support/Format.h" +#include "llvm/Support/Host.h" #include "llvm/Support/MemoryBuffer.h" #include <cctype> #include <cstring> @@ -27,236 +29,560 @@ using namespace object; namespace llvm { namespace object { -MachOObjectFile::MachOObjectFile(MemoryBuffer *Object, MachOObject *MOO, - error_code &ec) - : ObjectFile(Binary::ID_MachO, Object, ec), - MachOObj(MOO), - RegisteredStringTable(std::numeric_limits<uint32_t>::max()) { - DataRefImpl DRI; - moveToNextSection(DRI); - uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands; - while (DRI.d.a < LoadCommandCount) { - Sections.push_back(DRI); - DRI.d.b++; - moveToNextSection(DRI); +struct SymbolTableEntryBase { + uint32_t StringIndex; + uint8_t Type; + uint8_t SectionIndex; + uint16_t Flags; +}; + +struct SectionBase { + char Name[16]; + char SegmentName[16]; +}; + +template<typename T> +static void SwapValue(T &Value) { + Value = sys::SwapByteOrder(Value); +} + +template<typename T> +static void SwapStruct(T &Value); + +template<> +void SwapStruct(macho::RelocationEntry &H) { + SwapValue(H.Word0); + SwapValue(H.Word1); +} + +template<> +void SwapStruct(macho::LoadCommand &L) { + SwapValue(L.Type); + SwapValue(L.Size); +} + +template<> +void SwapStruct(SymbolTableEntryBase &S) { + SwapValue(S.StringIndex); + SwapValue(S.Flags); +} + +template<> +void SwapStruct(macho::Section &S) { + SwapValue(S.Address); + SwapValue(S.Size); + SwapValue(S.Offset); + SwapValue(S.Align); + SwapValue(S.RelocationTableOffset); + SwapValue(S.NumRelocationTableEntries); + SwapValue(S.Flags); + SwapValue(S.Reserved1); + SwapValue(S.Reserved2); +} + +template<> +void SwapStruct(macho::Section64 &S) { + SwapValue(S.Address); + SwapValue(S.Size); + SwapValue(S.Offset); + SwapValue(S.Align); + SwapValue(S.RelocationTableOffset); + SwapValue(S.NumRelocationTableEntries); + SwapValue(S.Flags); + SwapValue(S.Reserved1); + SwapValue(S.Reserved2); + SwapValue(S.Reserved3); +} + +template<> +void SwapStruct(macho::SymbolTableEntry &S) { + SwapValue(S.StringIndex); + SwapValue(S.Flags); + SwapValue(S.Value); +} + +template<> +void SwapStruct(macho::Symbol64TableEntry &S) { + SwapValue(S.StringIndex); + SwapValue(S.Flags); + SwapValue(S.Value); +} + +template<> +void SwapStruct(macho::Header &H) { + SwapValue(H.Magic); + SwapValue(H.CPUType); + SwapValue(H.CPUSubtype); + SwapValue(H.FileType); + SwapValue(H.NumLoadCommands); + SwapValue(H.SizeOfLoadCommands); + SwapValue(H.Flags); +} + +template<> +void SwapStruct(macho::Header64Ext &E) { + SwapValue(E.Reserved); +} + +template<> +void SwapStruct(macho::SymtabLoadCommand &C) { + SwapValue(C.Type); + SwapValue(C.Size); + SwapValue(C.SymbolTableOffset); + SwapValue(C.NumSymbolTableEntries); + SwapValue(C.StringTableOffset); + SwapValue(C.StringTableSize); +} + +template<> +void SwapStruct(macho::DysymtabLoadCommand &C) { + SwapValue(C.Type); + SwapValue(C.Size); + SwapValue(C.LocalSymbolsIndex); + SwapValue(C.NumLocalSymbols); + SwapValue(C.ExternalSymbolsIndex); + SwapValue(C.NumExternalSymbols); + SwapValue(C.UndefinedSymbolsIndex); + SwapValue(C.NumUndefinedSymbols); + SwapValue(C.TOCOffset); + SwapValue(C.NumTOCEntries); + SwapValue(C.ModuleTableOffset); + SwapValue(C.NumModuleTableEntries); + SwapValue(C.ReferenceSymbolTableOffset); + SwapValue(C.NumReferencedSymbolTableEntries); + SwapValue(C.IndirectSymbolTableOffset); + SwapValue(C.NumIndirectSymbolTableEntries); + SwapValue(C.ExternalRelocationTableOffset); + SwapValue(C.NumExternalRelocationTableEntries); + SwapValue(C.LocalRelocationTableOffset); + SwapValue(C.NumLocalRelocationTableEntries); +} + +template<> +void SwapStruct(macho::LinkeditDataLoadCommand &C) { + SwapValue(C.Type); + SwapValue(C.Size); + SwapValue(C.DataOffset); + SwapValue(C.DataSize); +} + +template<> +void SwapStruct(macho::SegmentLoadCommand &C) { + SwapValue(C.Type); + SwapValue(C.Size); + SwapValue(C.VMAddress); + SwapValue(C.VMSize); + SwapValue(C.FileOffset); + SwapValue(C.FileSize); + SwapValue(C.MaxVMProtection); + SwapValue(C.InitialVMProtection); + SwapValue(C.NumSections); + SwapValue(C.Flags); +} + +template<> +void SwapStruct(macho::Segment64LoadCommand &C) { + SwapValue(C.Type); + SwapValue(C.Size); + SwapValue(C.VMAddress); + SwapValue(C.VMSize); + SwapValue(C.FileOffset); + SwapValue(C.FileSize); + SwapValue(C.MaxVMProtection); + SwapValue(C.InitialVMProtection); + SwapValue(C.NumSections); + SwapValue(C.Flags); +} + +template<> +void SwapStruct(macho::IndirectSymbolTableEntry &C) { + SwapValue(C.Index); +} + +template<> +void SwapStruct(macho::LinkerOptionsLoadCommand &C) { + SwapValue(C.Type); + SwapValue(C.Size); + SwapValue(C.Count); +} + +template<> +void SwapStruct(macho::DataInCodeTableEntry &C) { + SwapValue(C.Offset); + SwapValue(C.Length); + SwapValue(C.Kind); +} + +template<typename T> +T getStruct(const MachOObjectFile *O, const char *P) { + T Cmd; + memcpy(&Cmd, P, sizeof(T)); + if (O->isLittleEndian() != sys::IsLittleEndianHost) + SwapStruct(Cmd); + return Cmd; +} + +static uint32_t +getSegmentLoadCommandNumSections(const MachOObjectFile *O, + const MachOObjectFile::LoadCommandInfo &L) { + if (O->is64Bit()) { + macho::Segment64LoadCommand S = O->getSegment64LoadCommand(L); + return S.NumSections; } + macho::SegmentLoadCommand S = O->getSegmentLoadCommand(L); + return S.NumSections; } +static const char * +getSectionPtr(const MachOObjectFile *O, MachOObjectFile::LoadCommandInfo L, + unsigned Sec) { + uintptr_t CommandAddr = reinterpret_cast<uintptr_t>(L.Ptr); -ObjectFile *ObjectFile::createMachOObjectFile(MemoryBuffer *Buffer) { + bool Is64 = O->is64Bit(); + unsigned SegmentLoadSize = Is64 ? sizeof(macho::Segment64LoadCommand) : + sizeof(macho::SegmentLoadCommand); + unsigned SectionSize = Is64 ? sizeof(macho::Section64) : + sizeof(macho::Section); + + uintptr_t SectionAddr = CommandAddr + SegmentLoadSize + Sec * SectionSize; + return reinterpret_cast<const char*>(SectionAddr); +} + +static const char *getPtr(const MachOObjectFile *O, size_t Offset) { + return O->getData().substr(Offset, 1).data(); +} + +static SymbolTableEntryBase +getSymbolTableEntryBase(const MachOObjectFile *O, DataRefImpl DRI) { + const char *P = reinterpret_cast<const char *>(DRI.p); + return getStruct<SymbolTableEntryBase>(O, P); +} + +static StringRef parseSegmentOrSectionName(const char *P) { + if (P[15] == 0) + // Null terminated. + return P; + // Not null terminated, so this is a 16 char string. + return StringRef(P, 16); +} + +// Helper to advance a section or symbol iterator multiple increments at a time. +template<class T> +static error_code advance(T &it, size_t Val) { error_code ec; - std::string Err; - MachOObject *MachOObj = MachOObject::LoadFromBuffer(Buffer, &Err); - if (!MachOObj) - return NULL; - // MachOObject takes ownership of the Buffer we passed to it, and - // MachOObjectFile does, too, so we need to make sure they don't get the - // same object. A MemoryBuffer is cheap (it's just a reference to memory, - // not a copy of the memory itself), so just make a new copy here for - // the MachOObjectFile. - MemoryBuffer *NewBuffer = - MemoryBuffer::getMemBuffer(Buffer->getBuffer(), - Buffer->getBufferIdentifier(), false); - return new MachOObjectFile(NewBuffer, MachOObj, ec); -} - -/*===-- Symbols -----------------------------------------------------------===*/ - -void MachOObjectFile::moveToNextSymbol(DataRefImpl &DRI) const { - uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands; - while (DRI.d.a < LoadCommandCount) { - LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a); - if (LCI.Command.Type == macho::LCT_Symtab) { - InMemoryStruct<macho::SymtabLoadCommand> SymtabLoadCmd; - MachOObj->ReadSymtabLoadCommand(LCI, SymtabLoadCmd); - if (DRI.d.b < SymtabLoadCmd->NumSymbolTableEntries) - return; + while (Val--) { + it.increment(ec); + } + return ec; +} + +template<class T> +static void advanceTo(T &it, size_t Val) { + if (error_code ec = advance(it, Val)) + report_fatal_error(ec.message()); +} + +static unsigned getCPUType(const MachOObjectFile *O) { + return O->getHeader().CPUType; +} + +static void printRelocationTargetName(const MachOObjectFile *O, + const macho::RelocationEntry &RE, + raw_string_ostream &fmt) { + bool IsScattered = O->isRelocationScattered(RE); + + // Target of a scattered relocation is an address. In the interest of + // generating pretty output, scan through the symbol table looking for a + // symbol that aligns with that address. If we find one, print it. + // Otherwise, we just print the hex address of the target. + if (IsScattered) { + uint32_t Val = O->getPlainRelocationSymbolNum(RE); + + error_code ec; + for (symbol_iterator SI = O->begin_symbols(), SE = O->end_symbols(); + SI != SE; SI.increment(ec)) { + if (ec) report_fatal_error(ec.message()); + + uint64_t Addr; + StringRef Name; + + if ((ec = SI->getAddress(Addr))) + report_fatal_error(ec.message()); + if (Addr != Val) continue; + if ((ec = SI->getName(Name))) + report_fatal_error(ec.message()); + fmt << Name; + return; + } + + // If we couldn't find a symbol that this relocation refers to, try + // to find a section beginning instead. + for (section_iterator SI = O->begin_sections(), SE = O->end_sections(); + SI != SE; SI.increment(ec)) { + if (ec) report_fatal_error(ec.message()); + + uint64_t Addr; + StringRef Name; + + if ((ec = SI->getAddress(Addr))) + report_fatal_error(ec.message()); + if (Addr != Val) continue; + if ((ec = SI->getName(Name))) + report_fatal_error(ec.message()); + fmt << Name; + return; } - DRI.d.a++; - DRI.d.b = 0; + fmt << format("0x%x", Val); + return; } -} -void MachOObjectFile::getSymbolTableEntry(DataRefImpl DRI, - InMemoryStruct<macho::SymbolTableEntry> &Res) const { - InMemoryStruct<macho::SymtabLoadCommand> SymtabLoadCmd; - LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a); - MachOObj->ReadSymtabLoadCommand(LCI, SymtabLoadCmd); + StringRef S; + bool isExtern = O->getPlainRelocationExternal(RE); + uint64_t Val = O->getAnyRelocationAddress(RE); - if (RegisteredStringTable != DRI.d.a) { - MachOObj->RegisterStringTable(*SymtabLoadCmd); - RegisteredStringTable = DRI.d.a; + if (isExtern) { + symbol_iterator SI = O->begin_symbols(); + advanceTo(SI, Val); + SI->getName(S); + } else { + section_iterator SI = O->begin_sections(); + advanceTo(SI, Val); + SI->getName(S); } - MachOObj->ReadSymbolTableEntry(SymtabLoadCmd->SymbolTableOffset, DRI.d.b, - Res); + fmt << S; +} + +static uint32_t getPlainRelocationAddress(const macho::RelocationEntry &RE) { + return RE.Word0; +} + +static unsigned +getScatteredRelocationAddress(const macho::RelocationEntry &RE) { + return RE.Word0 & 0xffffff; +} + +static bool getPlainRelocationPCRel(const MachOObjectFile *O, + const macho::RelocationEntry &RE) { + if (O->isLittleEndian()) + return (RE.Word1 >> 24) & 1; + return (RE.Word1 >> 7) & 1; +} + +static bool +getScatteredRelocationPCRel(const MachOObjectFile *O, + const macho::RelocationEntry &RE) { + return (RE.Word0 >> 30) & 1; +} + +static unsigned getPlainRelocationLength(const MachOObjectFile *O, + const macho::RelocationEntry &RE) { + if (O->isLittleEndian()) + return (RE.Word1 >> 25) & 3; + return (RE.Word1 >> 5) & 3; +} + +static unsigned +getScatteredRelocationLength(const macho::RelocationEntry &RE) { + return (RE.Word0 >> 28) & 3; } -void MachOObjectFile::getSymbol64TableEntry(DataRefImpl DRI, - InMemoryStruct<macho::Symbol64TableEntry> &Res) const { - InMemoryStruct<macho::SymtabLoadCommand> SymtabLoadCmd; - LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a); - MachOObj->ReadSymtabLoadCommand(LCI, SymtabLoadCmd); +static unsigned getPlainRelocationType(const MachOObjectFile *O, + const macho::RelocationEntry &RE) { + if (O->isLittleEndian()) + return RE.Word1 >> 28; + return RE.Word1 & 0xf; +} - if (RegisteredStringTable != DRI.d.a) { - MachOObj->RegisterStringTable(*SymtabLoadCmd); - RegisteredStringTable = DRI.d.a; +static unsigned getScatteredRelocationType(const macho::RelocationEntry &RE) { + return (RE.Word0 >> 24) & 0xf; +} + +static uint32_t getSectionFlags(const MachOObjectFile *O, + DataRefImpl Sec) { + if (O->is64Bit()) { + macho::Section64 Sect = O->getSection64(Sec); + return Sect.Flags; } + macho::Section Sect = O->getSection(Sec); + return Sect.Flags; +} + +MachOObjectFile::MachOObjectFile(MemoryBuffer *Object, + bool IsLittleEndian, bool Is64bits, + error_code &ec) + : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object), + SymtabLoadCmd(NULL), DysymtabLoadCmd(NULL) { + uint32_t LoadCommandCount = this->getHeader().NumLoadCommands; + macho::LoadCommandType SegmentLoadType = is64Bit() ? + macho::LCT_Segment64 : macho::LCT_Segment; + + MachOObjectFile::LoadCommandInfo Load = getFirstLoadCommandInfo(); + for (unsigned I = 0; ; ++I) { + if (Load.C.Type == macho::LCT_Symtab) { + assert(!SymtabLoadCmd && "Multiple symbol tables"); + SymtabLoadCmd = Load.Ptr; + } else if (Load.C.Type == macho::LCT_Dysymtab) { + assert(!DysymtabLoadCmd && "Multiple dynamic symbol tables"); + DysymtabLoadCmd = Load.Ptr; + } else if (Load.C.Type == SegmentLoadType) { + uint32_t NumSections = getSegmentLoadCommandNumSections(this, Load); + for (unsigned J = 0; J < NumSections; ++J) { + const char *Sec = getSectionPtr(this, Load, J); + Sections.push_back(Sec); + } + } - MachOObj->ReadSymbol64TableEntry(SymtabLoadCmd->SymbolTableOffset, DRI.d.b, - Res); + if (I == LoadCommandCount - 1) + break; + else + Load = getNextLoadCommandInfo(Load); + } } +error_code MachOObjectFile::getSymbolNext(DataRefImpl Symb, + SymbolRef &Res) const { + unsigned SymbolTableEntrySize = is64Bit() ? + sizeof(macho::Symbol64TableEntry) : + sizeof(macho::SymbolTableEntry); + Symb.p += SymbolTableEntrySize; + Res = SymbolRef(Symb, this); + return object_error::success; +} -error_code MachOObjectFile::getSymbolNext(DataRefImpl DRI, - SymbolRef &Result) const { - DRI.d.b++; - moveToNextSymbol(DRI); - Result = SymbolRef(DRI, this); +error_code MachOObjectFile::getSymbolName(DataRefImpl Symb, + StringRef &Res) const { + StringRef StringTable = getStringTableData(); + SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, Symb); + const char *Start = &StringTable.data()[Entry.StringIndex]; + Res = StringRef(Start); return object_error::success; } -error_code MachOObjectFile::getSymbolName(DataRefImpl DRI, - StringRef &Result) const { - if (MachOObj->is64Bit()) { - InMemoryStruct<macho::Symbol64TableEntry> Entry; - getSymbol64TableEntry(DRI, Entry); - Result = MachOObj->getStringAtIndex(Entry->StringIndex); +error_code MachOObjectFile::getSymbolAddress(DataRefImpl Symb, + uint64_t &Res) const { + if (is64Bit()) { + macho::Symbol64TableEntry Entry = getSymbol64TableEntry(Symb); + Res = Entry.Value; } else { - InMemoryStruct<macho::SymbolTableEntry> Entry; - getSymbolTableEntry(DRI, Entry); - Result = MachOObj->getStringAtIndex(Entry->StringIndex); + macho::SymbolTableEntry Entry = getSymbolTableEntry(Symb); + Res = Entry.Value; } return object_error::success; } -error_code MachOObjectFile::getSymbolFileOffset(DataRefImpl DRI, - uint64_t &Result) const { - if (MachOObj->is64Bit()) { - InMemoryStruct<macho::Symbol64TableEntry> Entry; - getSymbol64TableEntry(DRI, Entry); - Result = Entry->Value; - if (Entry->SectionIndex) { - InMemoryStruct<macho::Section64> Section; - getSection64(Sections[Entry->SectionIndex-1], Section); - Result += Section->Offset - Section->Address; - } - } else { - InMemoryStruct<macho::SymbolTableEntry> Entry; - getSymbolTableEntry(DRI, Entry); - Result = Entry->Value; - if (Entry->SectionIndex) { - InMemoryStruct<macho::Section> Section; - getSection(Sections[Entry->SectionIndex-1], Section); - Result += Section->Offset - Section->Address; +error_code +MachOObjectFile::getSymbolFileOffset(DataRefImpl Symb, + uint64_t &Res) const { + SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, Symb); + getSymbolAddress(Symb, Res); + if (Entry.SectionIndex) { + uint64_t Delta; + DataRefImpl SecRel; + SecRel.d.a = Entry.SectionIndex-1; + if (is64Bit()) { + macho::Section64 Sec = getSection64(SecRel); + Delta = Sec.Offset - Sec.Address; + } else { + macho::Section Sec = getSection(SecRel); + Delta = Sec.Offset - Sec.Address; } + + Res += Delta; } return object_error::success; } -error_code MachOObjectFile::getSymbolAddress(DataRefImpl DRI, - uint64_t &Result) const { - if (MachOObj->is64Bit()) { - InMemoryStruct<macho::Symbol64TableEntry> Entry; - getSymbol64TableEntry(DRI, Entry); - Result = Entry->Value; +error_code MachOObjectFile::getSymbolAlignment(DataRefImpl DRI, + uint32_t &Result) const { + uint32_t flags; + this->getSymbolFlags(DRI, flags); + if (flags & SymbolRef::SF_Common) { + SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, DRI); + Result = 1 << MachO::GET_COMM_ALIGN(Entry.Flags); } else { - InMemoryStruct<macho::SymbolTableEntry> Entry; - getSymbolTableEntry(DRI, Entry); - Result = Entry->Value; + Result = 0; } return object_error::success; } error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI, uint64_t &Result) const { - uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands; uint64_t BeginOffset; uint64_t EndOffset = 0; uint8_t SectionIndex; - if (MachOObj->is64Bit()) { - InMemoryStruct<macho::Symbol64TableEntry> Entry; - getSymbol64TableEntry(DRI, Entry); - BeginOffset = Entry->Value; - SectionIndex = Entry->SectionIndex; - if (!SectionIndex) { - uint32_t flags = SymbolRef::SF_None; - getSymbolFlags(DRI, flags); - if (flags & SymbolRef::SF_Common) - Result = Entry->Value; - else - Result = UnknownAddressOrSize; - return object_error::success; - } - // Unfortunately symbols are unsorted so we need to touch all - // symbols from load command - DRI.d.b = 0; - uint32_t Command = DRI.d.a; - while (Command == DRI.d.a) { - moveToNextSymbol(DRI); - if (DRI.d.a < LoadCommandCount) { - getSymbol64TableEntry(DRI, Entry); - if (Entry->SectionIndex == SectionIndex && Entry->Value > BeginOffset) - if (!EndOffset || Entry->Value < EndOffset) - EndOffset = Entry->Value; - } - DRI.d.b++; - } - } else { - InMemoryStruct<macho::SymbolTableEntry> Entry; - getSymbolTableEntry(DRI, Entry); - BeginOffset = Entry->Value; - SectionIndex = Entry->SectionIndex; - if (!SectionIndex) { - uint32_t flags = SymbolRef::SF_None; - getSymbolFlags(DRI, flags); - if (flags & SymbolRef::SF_Common) - Result = Entry->Value; - else - Result = UnknownAddressOrSize; - return object_error::success; - } - // Unfortunately symbols are unsorted so we need to touch all - // symbols from load command - DRI.d.b = 0; - uint32_t Command = DRI.d.a; - while (Command == DRI.d.a) { - moveToNextSymbol(DRI); - if (DRI.d.a < LoadCommandCount) { - getSymbolTableEntry(DRI, Entry); - if (Entry->SectionIndex == SectionIndex && Entry->Value > BeginOffset) - if (!EndOffset || Entry->Value < EndOffset) - EndOffset = Entry->Value; - } - DRI.d.b++; - } + + SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, DRI); + uint64_t Value; + getSymbolAddress(DRI, Value); + + BeginOffset = Value; + + SectionIndex = Entry.SectionIndex; + if (!SectionIndex) { + uint32_t flags = SymbolRef::SF_None; + this->getSymbolFlags(DRI, flags); + if (flags & SymbolRef::SF_Common) + Result = Value; + else + Result = UnknownAddressOrSize; + return object_error::success; + } + // Unfortunately symbols are unsorted so we need to touch all + // symbols from load command + error_code ec; + for (symbol_iterator I = begin_symbols(), E = end_symbols(); I != E; + I.increment(ec)) { + DataRefImpl DRI = I->getRawDataRefImpl(); + Entry = getSymbolTableEntryBase(this, DRI); + getSymbolAddress(DRI, Value); + if (Entry.SectionIndex == SectionIndex && Value > BeginOffset) + if (!EndOffset || Value < EndOffset) + EndOffset = Value; } if (!EndOffset) { uint64_t Size; - getSectionSize(Sections[SectionIndex-1], Size); - getSectionAddress(Sections[SectionIndex-1], EndOffset); + DataRefImpl Sec; + Sec.d.a = SectionIndex-1; + getSectionSize(Sec, Size); + getSectionAddress(Sec, EndOffset); EndOffset += Size; } Result = EndOffset - BeginOffset; return object_error::success; } -error_code MachOObjectFile::getSymbolNMTypeChar(DataRefImpl DRI, - char &Result) const { - uint8_t Type, Flags; - if (MachOObj->is64Bit()) { - InMemoryStruct<macho::Symbol64TableEntry> Entry; - getSymbol64TableEntry(DRI, Entry); - Type = Entry->Type; - Flags = Entry->Flags; - } else { - InMemoryStruct<macho::SymbolTableEntry> Entry; - getSymbolTableEntry(DRI, Entry); - Type = Entry->Type; - Flags = Entry->Flags; +error_code MachOObjectFile::getSymbolType(DataRefImpl Symb, + SymbolRef::Type &Res) const { + SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, Symb); + uint8_t n_type = Entry.Type; + + Res = SymbolRef::ST_Other; + + // If this is a STAB debugging symbol, we can do nothing more. + if (n_type & MachO::NlistMaskStab) { + Res = SymbolRef::ST_Debug; + return object_error::success; + } + + switch (n_type & MachO::NlistMaskType) { + case MachO::NListTypeUndefined : + Res = SymbolRef::ST_Unknown; + break; + case MachO::NListTypeSection : + Res = SymbolRef::ST_Function; + break; } + return object_error::success; +} + +error_code MachOObjectFile::getSymbolNMTypeChar(DataRefImpl Symb, + char &Res) const { + SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, Symb); + uint8_t Type = Entry.Type; + uint16_t Flags = Entry.Flags; char Char; switch (Type & macho::STF_TypeMask) { @@ -274,25 +600,16 @@ error_code MachOObjectFile::getSymbolNMTypeChar(DataRefImpl DRI, if (Flags & (macho::STF_External | macho::STF_PrivateExtern)) Char = toupper(static_cast<unsigned char>(Char)); - Result = Char; + Res = Char; return object_error::success; } error_code MachOObjectFile::getSymbolFlags(DataRefImpl DRI, uint32_t &Result) const { - uint16_t MachOFlags; - uint8_t MachOType; - if (MachOObj->is64Bit()) { - InMemoryStruct<macho::Symbol64TableEntry> Entry; - getSymbol64TableEntry(DRI, Entry); - MachOFlags = Entry->Flags; - MachOType = Entry->Type; - } else { - InMemoryStruct<macho::SymbolTableEntry> Entry; - getSymbolTableEntry(DRI, Entry); - MachOFlags = Entry->Flags; - MachOType = Entry->Type; - } + SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, DRI); + + uint8_t MachOType = Entry.Type; + uint16_t MachOFlags = Entry.Flags; // TODO: Correctly set SF_ThreadLocal Result = SymbolRef::SF_None; @@ -305,8 +622,12 @@ error_code MachOObjectFile::getSymbolFlags(DataRefImpl DRI, if (MachOType & MachO::NlistMaskExternal) { Result |= SymbolRef::SF_Global; - if ((MachOType & MachO::NlistMaskType) == MachO::NListTypeUndefined) - Result |= SymbolRef::SF_Common; + if ((MachOType & MachO::NlistMaskType) == MachO::NListTypeUndefined) { + uint64_t Value; + getSymbolAddress(DRI, Value); + if (Value) + Result |= SymbolRef::SF_Common; + } } if (MachOFlags & (MachO::NListDescWeakRef | MachO::NListDescWeakDef)) @@ -318,55 +639,20 @@ error_code MachOObjectFile::getSymbolFlags(DataRefImpl DRI, return object_error::success; } -error_code MachOObjectFile::getSymbolSection(DataRefImpl Symb, - section_iterator &Res) const { - uint8_t index; - if (MachOObj->is64Bit()) { - InMemoryStruct<macho::Symbol64TableEntry> Entry; - getSymbol64TableEntry(Symb, Entry); - index = Entry->SectionIndex; - } else { - InMemoryStruct<macho::SymbolTableEntry> Entry; - getSymbolTableEntry(Symb, Entry); - index = Entry->SectionIndex; - } +error_code +MachOObjectFile::getSymbolSection(DataRefImpl Symb, + section_iterator &Res) const { + SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, Symb); + uint8_t index = Entry.SectionIndex; - if (index == 0) + if (index == 0) { Res = end_sections(); - else - Res = section_iterator(SectionRef(Sections[index-1], this)); - - return object_error::success; -} - -error_code MachOObjectFile::getSymbolType(DataRefImpl Symb, - SymbolRef::Type &Res) const { - uint8_t n_type; - if (MachOObj->is64Bit()) { - InMemoryStruct<macho::Symbol64TableEntry> Entry; - getSymbol64TableEntry(Symb, Entry); - n_type = Entry->Type; } else { - InMemoryStruct<macho::SymbolTableEntry> Entry; - getSymbolTableEntry(Symb, Entry); - n_type = Entry->Type; - } - Res = SymbolRef::ST_Other; - - // If this is a STAB debugging symbol, we can do nothing more. - if (n_type & MachO::NlistMaskStab) { - Res = SymbolRef::ST_Debug; - return object_error::success; + DataRefImpl DRI; + DRI.d.a = index - 1; + Res = section_iterator(SectionRef(DRI, this)); } - switch (n_type & MachO::NlistMaskType) { - case MachO::NListTypeUndefined : - Res = SymbolRef::ST_Unknown; - break; - case MachO::NListTypeSection : - Res = SymbolRef::ST_Function; - break; - } return object_error::success; } @@ -375,242 +661,101 @@ error_code MachOObjectFile::getSymbolValue(DataRefImpl Symb, report_fatal_error("getSymbolValue unimplemented in MachOObjectFile"); } -symbol_iterator MachOObjectFile::begin_symbols() const { - // DRI.d.a = segment number; DRI.d.b = symbol index. - DataRefImpl DRI; - moveToNextSymbol(DRI); - return symbol_iterator(SymbolRef(DRI, this)); -} - -symbol_iterator MachOObjectFile::end_symbols() const { - DataRefImpl DRI; - DRI.d.a = MachOObj->getHeader().NumLoadCommands; - return symbol_iterator(SymbolRef(DRI, this)); -} - -symbol_iterator MachOObjectFile::begin_dynamic_symbols() const { - // TODO: implement - report_fatal_error("Dynamic symbols unimplemented in MachOObjectFile"); -} - -symbol_iterator MachOObjectFile::end_dynamic_symbols() const { - // TODO: implement - report_fatal_error("Dynamic symbols unimplemented in MachOObjectFile"); -} - -library_iterator MachOObjectFile::begin_libraries_needed() const { - // TODO: implement - report_fatal_error("Needed libraries unimplemented in MachOObjectFile"); -} - -library_iterator MachOObjectFile::end_libraries_needed() const { - // TODO: implement - report_fatal_error("Needed libraries unimplemented in MachOObjectFile"); -} - -StringRef MachOObjectFile::getLoadName() const { - // TODO: Implement - report_fatal_error("get_load_name() unimplemented in MachOObjectFile"); -} - -/*===-- Sections ----------------------------------------------------------===*/ - -void MachOObjectFile::moveToNextSection(DataRefImpl &DRI) const { - uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands; - while (DRI.d.a < LoadCommandCount) { - LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a); - if (LCI.Command.Type == macho::LCT_Segment) { - InMemoryStruct<macho::SegmentLoadCommand> SegmentLoadCmd; - MachOObj->ReadSegmentLoadCommand(LCI, SegmentLoadCmd); - if (DRI.d.b < SegmentLoadCmd->NumSections) - return; - } else if (LCI.Command.Type == macho::LCT_Segment64) { - InMemoryStruct<macho::Segment64LoadCommand> Segment64LoadCmd; - MachOObj->ReadSegment64LoadCommand(LCI, Segment64LoadCmd); - if (DRI.d.b < Segment64LoadCmd->NumSections) - return; - } - - DRI.d.a++; - DRI.d.b = 0; - } -} - -error_code MachOObjectFile::getSectionNext(DataRefImpl DRI, - SectionRef &Result) const { - DRI.d.b++; - moveToNextSection(DRI); - Result = SectionRef(DRI, this); +error_code MachOObjectFile::getSectionNext(DataRefImpl Sec, + SectionRef &Res) const { + Sec.d.a++; + Res = SectionRef(Sec, this); return object_error::success; } -void -MachOObjectFile::getSection(DataRefImpl DRI, - InMemoryStruct<macho::Section> &Res) const { - LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a); - MachOObj->ReadSection(LCI, DRI.d.b, Res); -} - -std::size_t MachOObjectFile::getSectionIndex(DataRefImpl Sec) const { - SectionList::const_iterator loc = - std::find(Sections.begin(), Sections.end(), Sec); - assert(loc != Sections.end() && "Sec is not a valid section!"); - return std::distance(Sections.begin(), loc); -} - -void -MachOObjectFile::getSection64(DataRefImpl DRI, - InMemoryStruct<macho::Section64> &Res) const { - LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a); - MachOObj->ReadSection64(LCI, DRI.d.b, Res); -} - -static bool is64BitLoadCommand(const MachOObject *MachOObj, DataRefImpl DRI) { - LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a); - if (LCI.Command.Type == macho::LCT_Segment64) - return true; - assert(LCI.Command.Type == macho::LCT_Segment && "Unexpected Type."); - return false; -} - -static StringRef parseSegmentOrSectionName(const char *P) { - if (P[15] == 0) - // Null terminated. - return P; - // Not null terminated, so this is a 16 char string. - return StringRef(P, 16); -} - -error_code MachOObjectFile::getSectionName(DataRefImpl DRI, - StringRef &Result) const { - if (is64BitLoadCommand(MachOObj.get(), DRI)) { - LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a); - unsigned SectionOffset = LCI.Offset + sizeof(macho::Segment64LoadCommand) + - DRI.d.b * sizeof(macho::Section64); - StringRef Data = MachOObj->getData(SectionOffset, sizeof(macho::Section64)); - const macho::Section64 *sec = - reinterpret_cast<const macho::Section64*>(Data.data()); - Result = parseSegmentOrSectionName(sec->Name); - } else { - LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a); - unsigned SectionOffset = LCI.Offset + sizeof(macho::SegmentLoadCommand) + - DRI.d.b * sizeof(macho::Section); - StringRef Data = MachOObj->getData(SectionOffset, sizeof(macho::Section)); - const macho::Section *sec = - reinterpret_cast<const macho::Section*>(Data.data()); - Result = parseSegmentOrSectionName(sec->Name); - } +error_code +MachOObjectFile::getSectionName(DataRefImpl Sec, StringRef &Result) const { + ArrayRef<char> Raw = getSectionRawName(Sec); + Result = parseSegmentOrSectionName(Raw.data()); return object_error::success; } -error_code MachOObjectFile::getSectionFinalSegmentName(DataRefImpl Sec, - StringRef &Res) const { - if (is64BitLoadCommand(MachOObj.get(), Sec)) { - LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(Sec.d.a); - unsigned SectionOffset = LCI.Offset + sizeof(macho::Segment64LoadCommand) + - Sec.d.b * sizeof(macho::Section64); - StringRef Data = MachOObj->getData(SectionOffset, sizeof(macho::Section64)); - const macho::Section64 *sec = - reinterpret_cast<const macho::Section64*>(Data.data()); - Res = parseSegmentOrSectionName(sec->SegmentName); +error_code +MachOObjectFile::getSectionAddress(DataRefImpl Sec, uint64_t &Res) const { + if (is64Bit()) { + macho::Section64 Sect = getSection64(Sec); + Res = Sect.Address; } else { - LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(Sec.d.a); - unsigned SectionOffset = LCI.Offset + sizeof(macho::SegmentLoadCommand) + - Sec.d.b * sizeof(macho::Section); - StringRef Data = MachOObj->getData(SectionOffset, sizeof(macho::Section)); - const macho::Section *sec = - reinterpret_cast<const macho::Section*>(Data.data()); - Res = parseSegmentOrSectionName(sec->SegmentName); + macho::Section Sect = getSection(Sec); + Res = Sect.Address; } return object_error::success; } -error_code MachOObjectFile::getSectionAddress(DataRefImpl DRI, - uint64_t &Result) const { - if (is64BitLoadCommand(MachOObj.get(), DRI)) { - InMemoryStruct<macho::Section64> Sect; - getSection64(DRI, Sect); - Result = Sect->Address; +error_code +MachOObjectFile::getSectionSize(DataRefImpl Sec, uint64_t &Res) const { + if (is64Bit()) { + macho::Section64 Sect = getSection64(Sec); + Res = Sect.Size; } else { - InMemoryStruct<macho::Section> Sect; - getSection(DRI, Sect); - Result = Sect->Address; + macho::Section Sect = getSection(Sec); + Res = Sect.Size; } - return object_error::success; -} -error_code MachOObjectFile::getSectionSize(DataRefImpl DRI, - uint64_t &Result) const { - if (is64BitLoadCommand(MachOObj.get(), DRI)) { - InMemoryStruct<macho::Section64> Sect; - getSection64(DRI, Sect); - Result = Sect->Size; - } else { - InMemoryStruct<macho::Section> Sect; - getSection(DRI, Sect); - Result = Sect->Size; - } return object_error::success; } -error_code MachOObjectFile::getSectionContents(DataRefImpl DRI, - StringRef &Result) const { - if (is64BitLoadCommand(MachOObj.get(), DRI)) { - InMemoryStruct<macho::Section64> Sect; - getSection64(DRI, Sect); - Result = MachOObj->getData(Sect->Offset, Sect->Size); +error_code +MachOObjectFile::getSectionContents(DataRefImpl Sec, StringRef &Res) const { + uint32_t Offset; + uint64_t Size; + + if (is64Bit()) { + macho::Section64 Sect = getSection64(Sec); + Offset = Sect.Offset; + Size = Sect.Size; } else { - InMemoryStruct<macho::Section> Sect; - getSection(DRI, Sect); - Result = MachOObj->getData(Sect->Offset, Sect->Size); + macho::Section Sect =getSection(Sec); + Offset = Sect.Offset; + Size = Sect.Size; } + + Res = this->getData().substr(Offset, Size); return object_error::success; } -error_code MachOObjectFile::getSectionAlignment(DataRefImpl DRI, - uint64_t &Result) const { - if (is64BitLoadCommand(MachOObj.get(), DRI)) { - InMemoryStruct<macho::Section64> Sect; - getSection64(DRI, Sect); - Result = uint64_t(1) << Sect->Align; +error_code +MachOObjectFile::getSectionAlignment(DataRefImpl Sec, uint64_t &Res) const { + uint32_t Align; + if (is64Bit()) { + macho::Section64 Sect = getSection64(Sec); + Align = Sect.Align; } else { - InMemoryStruct<macho::Section> Sect; - getSection(DRI, Sect); - Result = uint64_t(1) << Sect->Align; + macho::Section Sect = getSection(Sec); + Align = Sect.Align; } + + Res = uint64_t(1) << Align; return object_error::success; } -error_code MachOObjectFile::isSectionText(DataRefImpl DRI, - bool &Result) const { - if (is64BitLoadCommand(MachOObj.get(), DRI)) { - InMemoryStruct<macho::Section64> Sect; - getSection64(DRI, Sect); - Result = Sect->Flags & macho::SF_PureInstructions; - } else { - InMemoryStruct<macho::Section> Sect; - getSection(DRI, Sect); - Result = Sect->Flags & macho::SF_PureInstructions; - } +error_code +MachOObjectFile::isSectionText(DataRefImpl Sec, bool &Res) const { + uint32_t Flags = getSectionFlags(this, Sec); + Res = Flags & macho::SF_PureInstructions; return object_error::success; } -error_code MachOObjectFile::isSectionData(DataRefImpl DRI, - bool &Result) const { +error_code MachOObjectFile::isSectionData(DataRefImpl DRI, bool &Result) const { // FIXME: Unimplemented. Result = false; return object_error::success; } -error_code MachOObjectFile::isSectionBSS(DataRefImpl DRI, - bool &Result) const { +error_code MachOObjectFile::isSectionBSS(DataRefImpl DRI, bool &Result) const { // FIXME: Unimplemented. Result = false; return object_error::success; } -error_code MachOObjectFile::isSectionRequiredForExecution(DataRefImpl Sec, - bool &Result) const { +error_code +MachOObjectFile::isSectionRequiredForExecution(DataRefImpl Sec, + bool &Result) const { // FIXME: Unimplemented. Result = true; return object_error::success; @@ -623,22 +768,12 @@ error_code MachOObjectFile::isSectionVirtual(DataRefImpl Sec, return object_error::success; } -error_code MachOObjectFile::isSectionZeroInit(DataRefImpl DRI, - bool &Result) const { - if (MachOObj->is64Bit()) { - InMemoryStruct<macho::Section64> Sect; - getSection64(DRI, Sect); - unsigned SectionType = Sect->Flags & MachO::SectionFlagMaskSectionType; - Result = (SectionType == MachO::SectionTypeZeroFill || - SectionType == MachO::SectionTypeZeroFillLarge); - } else { - InMemoryStruct<macho::Section> Sect; - getSection(DRI, Sect); - unsigned SectionType = Sect->Flags & MachO::SectionFlagMaskSectionType; - Result = (SectionType == MachO::SectionTypeZeroFill || - SectionType == MachO::SectionTypeZeroFillLarge); - } - +error_code +MachOObjectFile::isSectionZeroInit(DataRefImpl Sec, bool &Res) const { + uint32_t Flags = getSectionFlags(this, Sec); + unsigned SectionType = Flags & MachO::SectionFlagMaskSectionType; + Res = SectionType == MachO::SectionTypeZeroFill || + SectionType == MachO::SectionTypeZeroFillLarge; return object_error::success; } @@ -653,11 +788,11 @@ error_code MachOObjectFile::isSectionReadOnlyData(DataRefImpl Sec, return object_error::success; } -error_code MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec, - DataRefImpl Symb, - bool &Result) const { +error_code +MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec, DataRefImpl Symb, + bool &Result) const { SymbolRef::Type ST; - getSymbolType(Symb, ST); + this->getSymbolType(Symb, ST); if (ST == SymbolRef::ST_Unknown) { Result = false; return object_error::success; @@ -668,164 +803,107 @@ error_code MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec, getSectionSize(Sec, SectEnd); SectEnd += SectBegin; - if (MachOObj->is64Bit()) { - InMemoryStruct<macho::Symbol64TableEntry> Entry; - getSymbol64TableEntry(Symb, Entry); - uint64_t SymAddr= Entry->Value; - Result = (SymAddr >= SectBegin) && (SymAddr < SectEnd); - } else { - InMemoryStruct<macho::SymbolTableEntry> Entry; - getSymbolTableEntry(Symb, Entry); - uint64_t SymAddr= Entry->Value; - Result = (SymAddr >= SectBegin) && (SymAddr < SectEnd); - } + uint64_t SymAddr; + getSymbolAddress(Symb, SymAddr); + Result = (SymAddr >= SectBegin) && (SymAddr < SectEnd); return object_error::success; } relocation_iterator MachOObjectFile::getSectionRelBegin(DataRefImpl Sec) const { - DataRefImpl ret; - ret.d.b = getSectionIndex(Sec); - return relocation_iterator(RelocationRef(ret, this)); -} -relocation_iterator MachOObjectFile::getSectionRelEnd(DataRefImpl Sec) const { - uint32_t last_reloc; - if (is64BitLoadCommand(MachOObj.get(), Sec)) { - InMemoryStruct<macho::Section64> Sect; - getSection64(Sec, Sect); - last_reloc = Sect->NumRelocationTableEntries; + uint32_t Offset; + if (is64Bit()) { + macho::Section64 Sect = getSection64(Sec); + Offset = Sect.RelocationTableOffset; } else { - InMemoryStruct<macho::Section> Sect; - getSection(Sec, Sect); - last_reloc = Sect->NumRelocationTableEntries; + macho::Section Sect = getSection(Sec); + Offset = Sect.RelocationTableOffset; } - DataRefImpl ret; - ret.d.a = last_reloc; - ret.d.b = getSectionIndex(Sec); - return relocation_iterator(RelocationRef(ret, this)); -} - -section_iterator MachOObjectFile::begin_sections() const { - DataRefImpl DRI; - moveToNextSection(DRI); - return section_iterator(SectionRef(DRI, this)); -} -section_iterator MachOObjectFile::end_sections() const { - DataRefImpl DRI; - DRI.d.a = MachOObj->getHeader().NumLoadCommands; - return section_iterator(SectionRef(DRI, this)); + DataRefImpl Ret; + Ret.p = reinterpret_cast<uintptr_t>(getPtr(this, Offset)); + return relocation_iterator(RelocationRef(Ret, this)); } -/*===-- Relocations -------------------------------------------------------===*/ - -void MachOObjectFile:: -getRelocation(DataRefImpl Rel, - InMemoryStruct<macho::RelocationEntry> &Res) const { - uint32_t relOffset; - if (MachOObj->is64Bit()) { - InMemoryStruct<macho::Section64> Sect; - getSection64(Sections[Rel.d.b], Sect); - relOffset = Sect->RelocationTableOffset; +relocation_iterator +MachOObjectFile::getSectionRelEnd(DataRefImpl Sec) const { + uint32_t Offset; + uint32_t Num; + if (is64Bit()) { + macho::Section64 Sect = getSection64(Sec); + Offset = Sect.RelocationTableOffset; + Num = Sect.NumRelocationTableEntries; } else { - InMemoryStruct<macho::Section> Sect; - getSection(Sections[Rel.d.b], Sect); - relOffset = Sect->RelocationTableOffset; + macho::Section Sect = getSection(Sec); + Offset = Sect.RelocationTableOffset; + Num = Sect.NumRelocationTableEntries; } - MachOObj->ReadRelocationEntry(relOffset, Rel.d.a, Res); + + const macho::RelocationEntry *P = + reinterpret_cast<const macho::RelocationEntry*>(getPtr(this, Offset)); + + DataRefImpl Ret; + Ret.p = reinterpret_cast<uintptr_t>(P + Num); + return relocation_iterator(RelocationRef(Ret, this)); } + error_code MachOObjectFile::getRelocationNext(DataRefImpl Rel, RelocationRef &Res) const { - ++Rel.d.a; + const macho::RelocationEntry *P = + reinterpret_cast<const macho::RelocationEntry *>(Rel.p); + Rel.p = reinterpret_cast<uintptr_t>(P + 1); Res = RelocationRef(Rel, this); return object_error::success; } -error_code MachOObjectFile::getRelocationAddress(DataRefImpl Rel, - uint64_t &Res) const { - const uint8_t* sectAddress = 0; - if (MachOObj->is64Bit()) { - InMemoryStruct<macho::Section64> Sect; - getSection64(Sections[Rel.d.b], Sect); - sectAddress += Sect->Address; - } else { - InMemoryStruct<macho::Section> Sect; - getSection(Sections[Rel.d.b], Sect); - sectAddress += Sect->Address; - } - InMemoryStruct<macho::RelocationEntry> RE; - getRelocation(Rel, RE); - - unsigned Arch = getArch(); - bool isScattered = (Arch != Triple::x86_64) && - (RE->Word0 & macho::RF_Scattered); - uint64_t RelAddr = 0; - if (isScattered) - RelAddr = RE->Word0 & 0xFFFFFF; - else - RelAddr = RE->Word0; - Res = reinterpret_cast<uintptr_t>(sectAddress + RelAddr); - return object_error::success; +error_code +MachOObjectFile::getRelocationAddress(DataRefImpl Rel, uint64_t &Res) const { + report_fatal_error("getRelocationAddress not implemented in MachOObjectFile"); } + error_code MachOObjectFile::getRelocationOffset(DataRefImpl Rel, uint64_t &Res) const { - InMemoryStruct<macho::RelocationEntry> RE; - getRelocation(Rel, RE); - - unsigned Arch = getArch(); - bool isScattered = (Arch != Triple::x86_64) && - (RE->Word0 & macho::RF_Scattered); - if (isScattered) - Res = RE->Word0 & 0xFFFFFF; - else - Res = RE->Word0; + macho::RelocationEntry RE = getRelocation(Rel); + Res = getAnyRelocationAddress(RE); return object_error::success; } -error_code MachOObjectFile::getRelocationSymbol(DataRefImpl Rel, - SymbolRef &Res) const { - InMemoryStruct<macho::RelocationEntry> RE; - getRelocation(Rel, RE); - uint32_t SymbolIdx = RE->Word1 & 0xffffff; - bool isExtern = (RE->Word1 >> 27) & 1; - DataRefImpl Sym; - moveToNextSymbol(Sym); - if (isExtern) { - for (unsigned i = 0; i < SymbolIdx; i++) { - Sym.d.b++; - moveToNextSymbol(Sym); - assert(Sym.d.a < MachOObj->getHeader().NumLoadCommands && - "Relocation symbol index out of range!"); - } +error_code +MachOObjectFile::getRelocationSymbol(DataRefImpl Rel, SymbolRef &Res) const { + macho::RelocationEntry RE = getRelocation(Rel); + uint32_t SymbolIdx = getPlainRelocationSymbolNum(RE); + bool isExtern = getPlainRelocationExternal(RE); + if (!isExtern) { + Res = *end_symbols(); + return object_error::success; } + + macho::SymtabLoadCommand S = getSymtabLoadCommand(); + unsigned SymbolTableEntrySize = is64Bit() ? + sizeof(macho::Symbol64TableEntry) : + sizeof(macho::SymbolTableEntry); + uint64_t Offset = S.SymbolTableOffset + SymbolIdx * SymbolTableEntrySize; + DataRefImpl Sym; + Sym.p = reinterpret_cast<uintptr_t>(getPtr(this, Offset)); Res = SymbolRef(Sym, this); return object_error::success; } + error_code MachOObjectFile::getRelocationType(DataRefImpl Rel, uint64_t &Res) const { - InMemoryStruct<macho::RelocationEntry> RE; - getRelocation(Rel, RE); - Res = RE->Word0; - Res <<= 32; - Res |= RE->Word1; + macho::RelocationEntry RE = getRelocation(Rel); + Res = getAnyRelocationType(RE); return object_error::success; } -error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel, - SmallVectorImpl<char> &Result) const { - // TODO: Support scattered relocations. - StringRef res; - InMemoryStruct<macho::RelocationEntry> RE; - getRelocation(Rel, RE); - unsigned Arch = getArch(); - bool isScattered = (Arch != Triple::x86_64) && - (RE->Word0 & macho::RF_Scattered); +error_code +MachOObjectFile::getRelocationTypeName(DataRefImpl Rel, + SmallVectorImpl<char> &Result) const { + StringRef res; + uint64_t RType; + getRelocationType(Rel, RType); - unsigned r_type; - if (isScattered) - r_type = (RE->Word0 >> 24) & 0xF; - else - r_type = (RE->Word1 >> 28) & 0xF; + unsigned Arch = this->getArch(); switch (Arch) { case Triple::x86: { @@ -837,10 +915,10 @@ error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel, "GENERIC_RELOC_LOCAL_SECTDIFF", "GENERIC_RELOC_TLV" }; - if (r_type > 6) + if (RType > 6) res = "Unknown"; else - res = Table[r_type]; + res = Table[RType]; break; } case Triple::x86_64: { @@ -856,10 +934,10 @@ error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel, "X86_64_RELOC_SIGNED_4", "X86_64_RELOC_TLV" }; - if (r_type > 9) + if (RType > 9) res = "Unknown"; else - res = Table[r_type]; + res = Table[RType]; break; } case Triple::arm: { @@ -875,10 +953,10 @@ error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel, "ARM_RELOC_HALF", "ARM_RELOC_HALF_SECTDIFF" }; - if (r_type > 9) + if (RType > 9) res = "Unknown"; else - res = Table[r_type]; + res = Table[RType]; break; } case Triple::ppc: { @@ -900,7 +978,7 @@ error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel, "PPC_RELOC_LO14_SECTDIFF", "PPC_RELOC_LOCAL_SECTDIFF" }; - res = Table[r_type]; + res = Table[RType]; break; } case Triple::UnknownArch: @@ -910,193 +988,79 @@ error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel, Result.append(res.begin(), res.end()); return object_error::success; } + error_code MachOObjectFile::getRelocationAdditionalInfo(DataRefImpl Rel, int64_t &Res) const { - InMemoryStruct<macho::RelocationEntry> RE; - getRelocation(Rel, RE); - bool isExtern = (RE->Word1 >> 27) & 1; Res = 0; - if (!isExtern) { - const uint8_t* sectAddress = base(); - if (MachOObj->is64Bit()) { - InMemoryStruct<macho::Section64> Sect; - getSection64(Sections[Rel.d.b], Sect); - sectAddress += Sect->Offset; - } else { - InMemoryStruct<macho::Section> Sect; - getSection(Sections[Rel.d.b], Sect); - sectAddress += Sect->Offset; - } - Res = reinterpret_cast<uintptr_t>(sectAddress); - } return object_error::success; } -// Helper to advance a section or symbol iterator multiple increments at a time. -template<class T> -error_code advance(T &it, size_t Val) { - error_code ec; - while (Val--) { - it.increment(ec); - } - return ec; -} - -template<class T> -void advanceTo(T &it, size_t Val) { - if (error_code ec = advance(it, Val)) - report_fatal_error(ec.message()); -} - -void MachOObjectFile::printRelocationTargetName( - InMemoryStruct<macho::RelocationEntry>& RE, - raw_string_ostream &fmt) const { - unsigned Arch = getArch(); - bool isScattered = (Arch != Triple::x86_64) && - (RE->Word0 & macho::RF_Scattered); - - // Target of a scattered relocation is an address. In the interest of - // generating pretty output, scan through the symbol table looking for a - // symbol that aligns with that address. If we find one, print it. - // Otherwise, we just print the hex address of the target. - if (isScattered) { - uint32_t Val = RE->Word1; - - error_code ec; - for (symbol_iterator SI = begin_symbols(), SE = end_symbols(); SI != SE; - SI.increment(ec)) { - if (ec) report_fatal_error(ec.message()); - - uint64_t Addr; - StringRef Name; - - if ((ec = SI->getAddress(Addr))) - report_fatal_error(ec.message()); - if (Addr != Val) continue; - if ((ec = SI->getName(Name))) - report_fatal_error(ec.message()); - fmt << Name; - return; - } - - // If we couldn't find a symbol that this relocation refers to, try - // to find a section beginning instead. - for (section_iterator SI = begin_sections(), SE = end_sections(); SI != SE; - SI.increment(ec)) { - if (ec) report_fatal_error(ec.message()); - - uint64_t Addr; - StringRef Name; - - if ((ec = SI->getAddress(Addr))) - report_fatal_error(ec.message()); - if (Addr != Val) continue; - if ((ec = SI->getName(Name))) - report_fatal_error(ec.message()); - fmt << Name; - return; - } - - fmt << format("0x%x", Val); - return; - } - - StringRef S; - bool isExtern = (RE->Word1 >> 27) & 1; - uint32_t Val = RE->Word1 & 0xFFFFFF; - - if (isExtern) { - symbol_iterator SI = begin_symbols(); - advanceTo(SI, Val); - SI->getName(S); - } else { - section_iterator SI = begin_sections(); - advanceTo(SI, Val); - SI->getName(S); - } - - fmt << S; -} - -error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel, +error_code +MachOObjectFile::getRelocationValueString(DataRefImpl Rel, SmallVectorImpl<char> &Result) const { - InMemoryStruct<macho::RelocationEntry> RE; - getRelocation(Rel, RE); + macho::RelocationEntry RE = getRelocation(Rel); - unsigned Arch = getArch(); - bool isScattered = (Arch != Triple::x86_64) && - (RE->Word0 & macho::RF_Scattered); + unsigned Arch = this->getArch(); std::string fmtbuf; raw_string_ostream fmt(fmtbuf); - - unsigned Type; - if (isScattered) - Type = (RE->Word0 >> 24) & 0xF; - else - Type = (RE->Word1 >> 28) & 0xF; - - bool isPCRel; - if (isScattered) - isPCRel = ((RE->Word0 >> 30) & 1); - else - isPCRel = ((RE->Word1 >> 24) & 1); + unsigned Type = this->getAnyRelocationType(RE); + bool IsPCRel = this->getAnyRelocationPCRel(RE); // Determine any addends that should be displayed with the relocation. // These require decoding the relocation type, which is triple-specific. // X86_64 has entirely custom relocation types. if (Arch == Triple::x86_64) { - bool isPCRel = ((RE->Word1 >> 24) & 1); + bool isPCRel = getAnyRelocationPCRel(RE); switch (Type) { case macho::RIT_X86_64_GOTLoad: // X86_64_RELOC_GOT_LOAD case macho::RIT_X86_64_GOT: { // X86_64_RELOC_GOT - printRelocationTargetName(RE, fmt); + printRelocationTargetName(this, RE, fmt); fmt << "@GOT"; if (isPCRel) fmt << "PCREL"; break; } case macho::RIT_X86_64_Subtractor: { // X86_64_RELOC_SUBTRACTOR - InMemoryStruct<macho::RelocationEntry> RENext; DataRefImpl RelNext = Rel; RelNext.d.a++; - getRelocation(RelNext, RENext); + macho::RelocationEntry RENext = getRelocation(RelNext); // X86_64_SUBTRACTOR must be followed by a relocation of type // X86_64_RELOC_UNSIGNED. // NOTE: Scattered relocations don't exist on x86_64. - unsigned RType = (RENext->Word1 >> 28) & 0xF; + unsigned RType = getAnyRelocationType(RENext); if (RType != 0) report_fatal_error("Expected X86_64_RELOC_UNSIGNED after " "X86_64_RELOC_SUBTRACTOR."); // The X86_64_RELOC_UNSIGNED contains the minuend symbol, // X86_64_SUBTRACTOR contains to the subtrahend. - printRelocationTargetName(RENext, fmt); + printRelocationTargetName(this, RENext, fmt); fmt << "-"; - printRelocationTargetName(RE, fmt); + printRelocationTargetName(this, RE, fmt); break; } case macho::RIT_X86_64_TLV: - printRelocationTargetName(RE, fmt); + printRelocationTargetName(this, RE, fmt); fmt << "@TLV"; if (isPCRel) fmt << "P"; break; case macho::RIT_X86_64_Signed1: // X86_64_RELOC_SIGNED1 - printRelocationTargetName(RE, fmt); + printRelocationTargetName(this, RE, fmt); fmt << "-1"; break; case macho::RIT_X86_64_Signed2: // X86_64_RELOC_SIGNED2 - printRelocationTargetName(RE, fmt); + printRelocationTargetName(this, RE, fmt); fmt << "-2"; break; case macho::RIT_X86_64_Signed4: // X86_64_RELOC_SIGNED4 - printRelocationTargetName(RE, fmt); + printRelocationTargetName(this, RE, fmt); fmt << "-4"; break; default: - printRelocationTargetName(RE, fmt); + printRelocationTargetName(this, RE, fmt); break; } // X86 and ARM share some relocation types in common. @@ -1106,27 +1070,21 @@ error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel, case macho::RIT_Pair: // GENERIC_RELOC_PAIR - prints no info return object_error::success; case macho::RIT_Difference: { // GENERIC_RELOC_SECTDIFF - InMemoryStruct<macho::RelocationEntry> RENext; DataRefImpl RelNext = Rel; RelNext.d.a++; - getRelocation(RelNext, RENext); + macho::RelocationEntry RENext = getRelocation(RelNext); // X86 sect diff's must be followed by a relocation of type // GENERIC_RELOC_PAIR. - bool isNextScattered = (Arch != Triple::x86_64) && - (RENext->Word0 & macho::RF_Scattered); - unsigned RType; - if (isNextScattered) - RType = (RENext->Word0 >> 24) & 0xF; - else - RType = (RENext->Word1 >> 28) & 0xF; + unsigned RType = getAnyRelocationType(RENext); + if (RType != 1) report_fatal_error("Expected GENERIC_RELOC_PAIR after " "GENERIC_RELOC_SECTDIFF."); - printRelocationTargetName(RE, fmt); + printRelocationTargetName(this, RE, fmt); fmt << "-"; - printRelocationTargetName(RENext, fmt); + printRelocationTargetName(this, RENext, fmt); break; } } @@ -1136,37 +1094,30 @@ error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel, // handled in the generic code. switch (Type) { case macho::RIT_Generic_LocalDifference:{// GENERIC_RELOC_LOCAL_SECTDIFF - InMemoryStruct<macho::RelocationEntry> RENext; DataRefImpl RelNext = Rel; RelNext.d.a++; - getRelocation(RelNext, RENext); + macho::RelocationEntry RENext = getRelocation(RelNext); // X86 sect diff's must be followed by a relocation of type // GENERIC_RELOC_PAIR. - bool isNextScattered = (Arch != Triple::x86_64) && - (RENext->Word0 & macho::RF_Scattered); - unsigned RType; - if (isNextScattered) - RType = (RENext->Word0 >> 24) & 0xF; - else - RType = (RENext->Word1 >> 28) & 0xF; + unsigned RType = getAnyRelocationType(RENext); if (RType != 1) report_fatal_error("Expected GENERIC_RELOC_PAIR after " "GENERIC_RELOC_LOCAL_SECTDIFF."); - printRelocationTargetName(RE, fmt); + printRelocationTargetName(this, RE, fmt); fmt << "-"; - printRelocationTargetName(RENext, fmt); + printRelocationTargetName(this, RENext, fmt); break; } case macho::RIT_Generic_TLV: { - printRelocationTargetName(RE, fmt); + printRelocationTargetName(this, RE, fmt); fmt << "@TLV"; - if (isPCRel) fmt << "P"; + if (IsPCRel) fmt << "P"; break; } default: - printRelocationTargetName(RE, fmt); + printRelocationTargetName(this, RE, fmt); } } else { // ARM-specific relocations switch (Type) { @@ -1174,33 +1125,21 @@ error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel, case macho::RIT_ARM_HalfDifference: { // ARM_RELOC_HALF_SECTDIFF // Half relocations steal a bit from the length field to encode // whether this is an upper16 or a lower16 relocation. - bool isUpper; - if (isScattered) - isUpper = (RE->Word0 >> 28) & 1; - else - isUpper = (RE->Word1 >> 25) & 1; + bool isUpper = getAnyRelocationLength(RE) >> 1; if (isUpper) fmt << ":upper16:("; else fmt << ":lower16:("; - printRelocationTargetName(RE, fmt); + printRelocationTargetName(this, RE, fmt); - InMemoryStruct<macho::RelocationEntry> RENext; DataRefImpl RelNext = Rel; RelNext.d.a++; - getRelocation(RelNext, RENext); + macho::RelocationEntry RENext = getRelocation(RelNext); // ARM half relocs must be followed by a relocation of type // ARM_RELOC_PAIR. - bool isNextScattered = (Arch != Triple::x86_64) && - (RENext->Word0 & macho::RF_Scattered); - unsigned RType; - if (isNextScattered) - RType = (RENext->Word0 >> 24) & 0xF; - else - RType = (RENext->Word1 >> 28) & 0xF; - + unsigned RType = getAnyRelocationType(RENext); if (RType != 1) report_fatal_error("Expected ARM_RELOC_PAIR after " "GENERIC_RELOC_HALF"); @@ -1214,38 +1153,30 @@ error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel, // symbol/section pointer of the follow-on relocation. if (Type == macho::RIT_ARM_HalfDifference) { fmt << "-"; - printRelocationTargetName(RENext, fmt); + printRelocationTargetName(this, RENext, fmt); } fmt << ")"; break; } default: { - printRelocationTargetName(RE, fmt); + printRelocationTargetName(this, RE, fmt); } } } } else - printRelocationTargetName(RE, fmt); + printRelocationTargetName(this, RE, fmt); fmt.flush(); Result.append(fmtbuf.begin(), fmtbuf.end()); return object_error::success; } -error_code MachOObjectFile::getRelocationHidden(DataRefImpl Rel, - bool &Result) const { - InMemoryStruct<macho::RelocationEntry> RE; - getRelocation(Rel, RE); - +error_code +MachOObjectFile::getRelocationHidden(DataRefImpl Rel, bool &Result) const { unsigned Arch = getArch(); - bool isScattered = (Arch != Triple::x86_64) && - (RE->Word0 & macho::RF_Scattered); - unsigned Type; - if (isScattered) - Type = (RE->Word0 >> 24) & 0xF; - else - Type = (RE->Word1 >> 28) & 0xF; + uint64_t Type; + getRelocationType(Rel, Type); Result = false; @@ -1259,12 +1190,10 @@ error_code MachOObjectFile::getRelocationHidden(DataRefImpl Rel, if (Type == macho::RIT_X86_64_Unsigned && Rel.d.a > 0) { DataRefImpl RelPrev = Rel; RelPrev.d.a--; - InMemoryStruct<macho::RelocationEntry> REPrev; - getRelocation(RelPrev, REPrev); - - unsigned PrevType = (REPrev->Word1 >> 28) & 0xF; - - if (PrevType == macho::RIT_X86_64_Subtractor) Result = true; + uint64_t PrevType; + getRelocationType(RelPrev, PrevType); + if (PrevType == macho::RIT_X86_64_Subtractor) + Result = true; } } @@ -1281,16 +1210,70 @@ error_code MachOObjectFile::getLibraryPath(DataRefImpl LibData, report_fatal_error("Needed libraries unimplemented in MachOObjectFile"); } +symbol_iterator MachOObjectFile::begin_symbols() const { + DataRefImpl DRI; + if (!SymtabLoadCmd) + return symbol_iterator(SymbolRef(DRI, this)); + + macho::SymtabLoadCommand Symtab = getSymtabLoadCommand(); + DRI.p = reinterpret_cast<uintptr_t>(getPtr(this, Symtab.SymbolTableOffset)); + return symbol_iterator(SymbolRef(DRI, this)); +} + +symbol_iterator MachOObjectFile::end_symbols() const { + DataRefImpl DRI; + if (!SymtabLoadCmd) + return symbol_iterator(SymbolRef(DRI, this)); + + macho::SymtabLoadCommand Symtab = getSymtabLoadCommand(); + unsigned SymbolTableEntrySize = is64Bit() ? + sizeof(macho::Symbol64TableEntry) : + sizeof(macho::SymbolTableEntry); + unsigned Offset = Symtab.SymbolTableOffset + + Symtab.NumSymbolTableEntries * SymbolTableEntrySize; + DRI.p = reinterpret_cast<uintptr_t>(getPtr(this, Offset)); + return symbol_iterator(SymbolRef(DRI, this)); +} + +symbol_iterator MachOObjectFile::begin_dynamic_symbols() const { + // TODO: implement + report_fatal_error("Dynamic symbols unimplemented in MachOObjectFile"); +} + +symbol_iterator MachOObjectFile::end_dynamic_symbols() const { + // TODO: implement + report_fatal_error("Dynamic symbols unimplemented in MachOObjectFile"); +} + +section_iterator MachOObjectFile::begin_sections() const { + DataRefImpl DRI; + return section_iterator(SectionRef(DRI, this)); +} + +section_iterator MachOObjectFile::end_sections() const { + DataRefImpl DRI; + DRI.d.a = Sections.size(); + return section_iterator(SectionRef(DRI, this)); +} -/*===-- Miscellaneous -----------------------------------------------------===*/ +library_iterator MachOObjectFile::begin_libraries_needed() const { + // TODO: implement + report_fatal_error("Needed libraries unimplemented in MachOObjectFile"); +} + +library_iterator MachOObjectFile::end_libraries_needed() const { + // TODO: implement + report_fatal_error("Needed libraries unimplemented in MachOObjectFile"); +} uint8_t MachOObjectFile::getBytesInAddress() const { - return MachOObj->is64Bit() ? 8 : 4; + return is64Bit() ? 8 : 4; } StringRef MachOObjectFile::getFileFormatName() const { - if (!MachOObj->is64Bit()) { - switch (MachOObj->getHeader().CPUType) { + unsigned CPUType = getCPUType(this); + if (!is64Bit()) { + switch (CPUType) { case llvm::MachO::CPUTypeI386: return "Mach-O 32-bit i386"; case llvm::MachO::CPUTypeARM: @@ -1298,18 +1281,18 @@ StringRef MachOObjectFile::getFileFormatName() const { case llvm::MachO::CPUTypePowerPC: return "Mach-O 32-bit ppc"; default: - assert((MachOObj->getHeader().CPUType & llvm::MachO::CPUArchABI64) == 0 && + assert((CPUType & llvm::MachO::CPUArchABI64) == 0 && "64-bit object file when we're not 64-bit?"); return "Mach-O 32-bit unknown"; } } // Make sure the cpu type has the correct mask. - assert((MachOObj->getHeader().CPUType & llvm::MachO::CPUArchABI64) + assert((CPUType & llvm::MachO::CPUArchABI64) == llvm::MachO::CPUArchABI64 && "32-bit object file when we're 64-bit?"); - switch (MachOObj->getHeader().CPUType) { + switch (CPUType) { case llvm::MachO::CPUTypeX86_64: return "Mach-O 64-bit x86-64"; case llvm::MachO::CPUTypePowerPC64: @@ -1320,7 +1303,7 @@ StringRef MachOObjectFile::getFileFormatName() const { } unsigned MachOObjectFile::getArch() const { - switch (MachOObj->getHeader().CPUType) { + switch (getCPUType(this)) { case llvm::MachO::CPUTypeI386: return Triple::x86; case llvm::MachO::CPUTypeX86_64: @@ -1336,5 +1319,260 @@ unsigned MachOObjectFile::getArch() const { } } +StringRef MachOObjectFile::getLoadName() const { + // TODO: Implement + report_fatal_error("get_load_name() unimplemented in MachOObjectFile"); +} + +relocation_iterator MachOObjectFile::getSectionRelBegin(unsigned Index) const { + DataRefImpl DRI; + DRI.d.a = Index; + return getSectionRelBegin(DRI); +} + +relocation_iterator MachOObjectFile::getSectionRelEnd(unsigned Index) const { + DataRefImpl DRI; + DRI.d.a = Index; + return getSectionRelEnd(DRI); +} + +StringRef +MachOObjectFile::getSectionFinalSegmentName(DataRefImpl Sec) const { + ArrayRef<char> Raw = getSectionRawFinalSegmentName(Sec); + return parseSegmentOrSectionName(Raw.data()); +} + +ArrayRef<char> +MachOObjectFile::getSectionRawName(DataRefImpl Sec) const { + const SectionBase *Base = + reinterpret_cast<const SectionBase*>(Sections[Sec.d.a]); + return ArrayRef<char>(Base->Name); +} + +ArrayRef<char> +MachOObjectFile::getSectionRawFinalSegmentName(DataRefImpl Sec) const { + const SectionBase *Base = + reinterpret_cast<const SectionBase*>(Sections[Sec.d.a]); + return ArrayRef<char>(Base->SegmentName); +} + +bool +MachOObjectFile::isRelocationScattered(const macho::RelocationEntry &RE) + const { + if (getCPUType(this) == llvm::MachO::CPUTypeX86_64) + return false; + return getPlainRelocationAddress(RE) & macho::RF_Scattered; +} + +unsigned MachOObjectFile::getPlainRelocationSymbolNum(const macho::RelocationEntry &RE) const { + if (isLittleEndian()) + return RE.Word1 & 0xffffff; + return RE.Word1 >> 8; +} + +bool MachOObjectFile::getPlainRelocationExternal(const macho::RelocationEntry &RE) const { + if (isLittleEndian()) + return (RE.Word1 >> 27) & 1; + return (RE.Word1 >> 4) & 1; +} + +bool +MachOObjectFile::getScatteredRelocationScattered(const macho::RelocationEntry &RE) const { + return RE.Word0 >> 31; +} + +uint32_t +MachOObjectFile::getScatteredRelocationValue(const macho::RelocationEntry &RE) const { + return RE.Word1; +} + +unsigned +MachOObjectFile::getAnyRelocationAddress(const macho::RelocationEntry &RE) const { + if (isRelocationScattered(RE)) + return getScatteredRelocationAddress(RE); + return getPlainRelocationAddress(RE); +} + +unsigned +MachOObjectFile::getAnyRelocationPCRel(const macho::RelocationEntry &RE) const { + if (isRelocationScattered(RE)) + return getScatteredRelocationPCRel(this, RE); + return getPlainRelocationPCRel(this, RE); +} + +unsigned +MachOObjectFile::getAnyRelocationLength(const macho::RelocationEntry &RE) const { + if (isRelocationScattered(RE)) + return getScatteredRelocationLength(RE); + return getPlainRelocationLength(this, RE); +} + +unsigned +MachOObjectFile::getAnyRelocationType(const macho::RelocationEntry &RE) const { + if (isRelocationScattered(RE)) + return getScatteredRelocationType(RE); + return getPlainRelocationType(this, RE); +} + +SectionRef +MachOObjectFile::getRelocationSection(const macho::RelocationEntry &RE) const { + if (isRelocationScattered(RE) || getPlainRelocationExternal(RE)) + return *end_sections(); + unsigned SecNum = getPlainRelocationSymbolNum(RE) - 1; + DataRefImpl DRI; + DRI.d.a = SecNum; + return SectionRef(DRI, this); +} + +MachOObjectFile::LoadCommandInfo +MachOObjectFile::getFirstLoadCommandInfo() const { + MachOObjectFile::LoadCommandInfo Load; + + unsigned HeaderSize = is64Bit() ? macho::Header64Size : macho::Header32Size; + Load.Ptr = getPtr(this, HeaderSize); + Load.C = getStruct<macho::LoadCommand>(this, Load.Ptr); + return Load; +} + +MachOObjectFile::LoadCommandInfo +MachOObjectFile::getNextLoadCommandInfo(const LoadCommandInfo &L) const { + MachOObjectFile::LoadCommandInfo Next; + Next.Ptr = L.Ptr + L.C.Size; + Next.C = getStruct<macho::LoadCommand>(this, Next.Ptr); + return Next; +} + +macho::Section MachOObjectFile::getSection(DataRefImpl DRI) const { + return getStruct<macho::Section>(this, Sections[DRI.d.a]); +} + +macho::Section64 MachOObjectFile::getSection64(DataRefImpl DRI) const { + return getStruct<macho::Section64>(this, Sections[DRI.d.a]); +} + +macho::Section MachOObjectFile::getSection(const LoadCommandInfo &L, + unsigned Index) const { + const char *Sec = getSectionPtr(this, L, Index); + return getStruct<macho::Section>(this, Sec); +} + +macho::Section64 MachOObjectFile::getSection64(const LoadCommandInfo &L, + unsigned Index) const { + const char *Sec = getSectionPtr(this, L, Index); + return getStruct<macho::Section64>(this, Sec); +} + +macho::SymbolTableEntry +MachOObjectFile::getSymbolTableEntry(DataRefImpl DRI) const { + const char *P = reinterpret_cast<const char *>(DRI.p); + return getStruct<macho::SymbolTableEntry>(this, P); +} + +macho::Symbol64TableEntry +MachOObjectFile::getSymbol64TableEntry(DataRefImpl DRI) const { + const char *P = reinterpret_cast<const char *>(DRI.p); + return getStruct<macho::Symbol64TableEntry>(this, P); +} + +macho::LinkeditDataLoadCommand +MachOObjectFile::getLinkeditDataLoadCommand(const MachOObjectFile::LoadCommandInfo &L) const { + return getStruct<macho::LinkeditDataLoadCommand>(this, L.Ptr); +} + +macho::SegmentLoadCommand +MachOObjectFile::getSegmentLoadCommand(const LoadCommandInfo &L) const { + return getStruct<macho::SegmentLoadCommand>(this, L.Ptr); +} + +macho::Segment64LoadCommand +MachOObjectFile::getSegment64LoadCommand(const LoadCommandInfo &L) const { + return getStruct<macho::Segment64LoadCommand>(this, L.Ptr); +} + +macho::LinkerOptionsLoadCommand +MachOObjectFile::getLinkerOptionsLoadCommand(const LoadCommandInfo &L) const { + return getStruct<macho::LinkerOptionsLoadCommand>(this, L.Ptr); +} + +macho::RelocationEntry +MachOObjectFile::getRelocation(DataRefImpl Rel) const { + const char *P = reinterpret_cast<const char *>(Rel.p); + return getStruct<macho::RelocationEntry>(this, P); +} + +macho::Header MachOObjectFile::getHeader() const { + return getStruct<macho::Header>(this, getPtr(this, 0)); +} + +macho::Header64Ext MachOObjectFile::getHeader64Ext() const { + return + getStruct<macho::Header64Ext>(this, getPtr(this, sizeof(macho::Header))); +} + +macho::IndirectSymbolTableEntry MachOObjectFile::getIndirectSymbolTableEntry( + const macho::DysymtabLoadCommand &DLC, + unsigned Index) const { + uint64_t Offset = DLC.IndirectSymbolTableOffset + + Index * sizeof(macho::IndirectSymbolTableEntry); + return getStruct<macho::IndirectSymbolTableEntry>(this, getPtr(this, Offset)); +} + +macho::DataInCodeTableEntry +MachOObjectFile::getDataInCodeTableEntry(uint32_t DataOffset, + unsigned Index) const { + uint64_t Offset = DataOffset + Index * sizeof(macho::DataInCodeTableEntry); + return getStruct<macho::DataInCodeTableEntry>(this, getPtr(this, Offset)); +} + +macho::SymtabLoadCommand MachOObjectFile::getSymtabLoadCommand() const { + return getStruct<macho::SymtabLoadCommand>(this, SymtabLoadCmd); +} + +macho::DysymtabLoadCommand MachOObjectFile::getDysymtabLoadCommand() const { + return getStruct<macho::DysymtabLoadCommand>(this, DysymtabLoadCmd); +} + +StringRef MachOObjectFile::getStringTableData() const { + macho::SymtabLoadCommand S = getSymtabLoadCommand(); + return getData().substr(S.StringTableOffset, S.StringTableSize); +} + +bool MachOObjectFile::is64Bit() const { + return getType() == getMachOType(false, true) || + getType() == getMachOType(true, true); +} + +void MachOObjectFile::ReadULEB128s(uint64_t Index, + SmallVectorImpl<uint64_t> &Out) const { + DataExtractor extractor(ObjectFile::getData(), true, 0); + + uint32_t offset = Index; + uint64_t data = 0; + while (uint64_t delta = extractor.getULEB128(&offset)) { + data += delta; + Out.push_back(data); + } +} + +ObjectFile *ObjectFile::createMachOObjectFile(MemoryBuffer *Buffer) { + StringRef Magic = Buffer->getBuffer().slice(0, 4); + error_code ec; + ObjectFile *Ret; + if (Magic == "\xFE\xED\xFA\xCE") + Ret = new MachOObjectFile(Buffer, false, false, ec); + else if (Magic == "\xCE\xFA\xED\xFE") + Ret = new MachOObjectFile(Buffer, true, false, ec); + else if (Magic == "\xFE\xED\xFA\xCF") + Ret = new MachOObjectFile(Buffer, false, true, ec); + else if (Magic == "\xCF\xFA\xED\xFE") + Ret = new MachOObjectFile(Buffer, true, true, ec); + else + return NULL; + + if (ec) + return NULL; + return Ret; +} + } // end namespace object } // end namespace llvm diff --git a/contrib/llvm/lib/Object/Object.cpp b/contrib/llvm/lib/Object/Object.cpp index f061ea7..3e2c78e 100644 --- a/contrib/llvm/lib/Object/Object.cpp +++ b/contrib/llvm/lib/Object/Object.cpp @@ -12,12 +12,51 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/SmallVector.h" #include "llvm/Object/ObjectFile.h" #include "llvm-c/Object.h" using namespace llvm; using namespace object; +inline ObjectFile *unwrap(LLVMObjectFileRef OF) { + return reinterpret_cast<ObjectFile*>(OF); +} + +inline LLVMObjectFileRef wrap(const ObjectFile *OF) { + return reinterpret_cast<LLVMObjectFileRef>(const_cast<ObjectFile*>(OF)); +} + +inline section_iterator *unwrap(LLVMSectionIteratorRef SI) { + return reinterpret_cast<section_iterator*>(SI); +} + +inline LLVMSectionIteratorRef +wrap(const section_iterator *SI) { + return reinterpret_cast<LLVMSectionIteratorRef> + (const_cast<section_iterator*>(SI)); +} + +inline symbol_iterator *unwrap(LLVMSymbolIteratorRef SI) { + return reinterpret_cast<symbol_iterator*>(SI); +} + +inline LLVMSymbolIteratorRef +wrap(const symbol_iterator *SI) { + return reinterpret_cast<LLVMSymbolIteratorRef> + (const_cast<symbol_iterator*>(SI)); +} + +inline relocation_iterator *unwrap(LLVMRelocationIteratorRef SI) { + return reinterpret_cast<relocation_iterator*>(SI); +} + +inline LLVMRelocationIteratorRef +wrap(const relocation_iterator *SI) { + return reinterpret_cast<LLVMRelocationIteratorRef> + (const_cast<relocation_iterator*>(SI)); +} + // ObjectFile creation LLVMObjectFileRef LLVMCreateObjectFile(LLVMMemoryBufferRef MemBuf) { return wrap(ObjectFile::createObjectFile(unwrap(MemBuf))); diff --git a/contrib/llvm/lib/Object/ObjectFile.cpp b/contrib/llvm/lib/Object/ObjectFile.cpp index 860c87b..77fd995 100644 --- a/contrib/llvm/lib/Object/ObjectFile.cpp +++ b/contrib/llvm/lib/Object/ObjectFile.cpp @@ -23,10 +23,16 @@ using namespace object; void ObjectFile::anchor() { } -ObjectFile::ObjectFile(unsigned int Type, MemoryBuffer *source, error_code &ec) +ObjectFile::ObjectFile(unsigned int Type, MemoryBuffer *source) : Binary(Type, source) { } +error_code ObjectFile::getSymbolAlignment(DataRefImpl DRI, + uint32_t &Result) const { + Result = 0; + return object_error::success; +} + ObjectFile *ObjectFile::createObjectFile(MemoryBuffer *Object) { if (!Object || Object->getBufferSize() < 64) return 0; diff --git a/contrib/llvm/lib/Support/CommandLine.cpp b/contrib/llvm/lib/Support/CommandLine.cpp index 560d7eb..18d3db5 100644 --- a/contrib/llvm/lib/Support/CommandLine.cpp +++ b/contrib/llvm/lib/Support/CommandLine.cpp @@ -33,6 +33,7 @@ #include "llvm/Support/system_error.h" #include <cerrno> #include <cstdlib> +#include <map> using namespace llvm; using namespace cl; @@ -106,6 +107,17 @@ void Option::addArgument() { MarkOptionsChanged(); } +// This collects the different option categories that have been registered. +typedef SmallPtrSet<OptionCategory*,16> OptionCatSet; +static ManagedStatic<OptionCatSet> RegisteredOptionCategories; + +// Initialise the general option category. +OptionCategory llvm::cl::GeneralCategory("General options"); + +void OptionCategory::registerCategory() +{ + RegisteredOptionCategories->insert(this); +} //===----------------------------------------------------------------------===// // Basic, shared command line option processing machinery. @@ -1222,11 +1234,20 @@ sortOpts(StringMap<Option*> &OptMap, namespace { class HelpPrinter { +protected: const bool ShowHidden; + typedef SmallVector<std::pair<const char *, Option*>,128> StrOptionPairVector; + // Print the options. Opts is assumed to be alphabetically sorted. + virtual void printOptions(StrOptionPairVector &Opts, size_t MaxArgLen) { + for (size_t i = 0, e = Opts.size(); i != e; ++i) + Opts[i].second->printOptionInfo(MaxArgLen); + } public: explicit HelpPrinter(bool showHidden) : ShowHidden(showHidden) {} + virtual ~HelpPrinter() {} + // Invoke the printer. void operator=(bool Value) { if (Value == false) return; @@ -1236,7 +1257,7 @@ public: StringMap<Option*> OptMap; GetOptionInfo(PositionalOpts, SinkOpts, OptMap); - SmallVector<std::pair<const char *, Option*>, 128> Opts; + StrOptionPairVector Opts; sortOpts(OptMap, Opts, ShowHidden); if (ProgramOverview) @@ -1267,12 +1288,12 @@ public: MaxArgLen = std::max(MaxArgLen, Opts[i].second->getOptionWidth()); outs() << "OPTIONS:\n"; - for (size_t i = 0, e = Opts.size(); i != e; ++i) - Opts[i].second->printOptionInfo(MaxArgLen); + printOptions(Opts, MaxArgLen); // Print any extra help the user has declared. for (std::vector<const char *>::iterator I = MoreHelp->begin(), - E = MoreHelp->end(); I != E; ++I) + E = MoreHelp->end(); + I != E; ++I) outs() << *I; MoreHelp->clear(); @@ -1280,21 +1301,152 @@ public: exit(1); } }; + +class CategorizedHelpPrinter : public HelpPrinter { +public: + explicit CategorizedHelpPrinter(bool showHidden) : HelpPrinter(showHidden) {} + + // Helper function for printOptions(). + // It shall return true if A's name should be lexographically + // ordered before B's name. It returns false otherwise. + static bool OptionCategoryCompare(OptionCategory *A, OptionCategory *B) { + int Length = strcmp(A->getName(), B->getName()); + assert(Length != 0 && "Duplicate option categories"); + return Length < 0; + } + + // Make sure we inherit our base class's operator=() + using HelpPrinter::operator= ; + +protected: + virtual void printOptions(StrOptionPairVector &Opts, size_t MaxArgLen) { + std::vector<OptionCategory *> SortedCategories; + std::map<OptionCategory *, std::vector<Option *> > CategorizedOptions; + + // Collect registered option categories into vector in preperation for + // sorting. + for (OptionCatSet::const_iterator I = RegisteredOptionCategories->begin(), + E = RegisteredOptionCategories->end(); + I != E; ++I) + SortedCategories.push_back(*I); + + // Sort the different option categories alphabetically. + assert(SortedCategories.size() > 0 && "No option categories registered!"); + std::sort(SortedCategories.begin(), SortedCategories.end(), + OptionCategoryCompare); + + // Create map to empty vectors. + for (std::vector<OptionCategory *>::const_iterator + I = SortedCategories.begin(), + E = SortedCategories.end(); + I != E; ++I) + CategorizedOptions[*I] = std::vector<Option *>(); + + // Walk through pre-sorted options and assign into categories. + // Because the options are already alphabetically sorted the + // options within categories will also be alphabetically sorted. + for (size_t I = 0, E = Opts.size(); I != E; ++I) { + Option *Opt = Opts[I].second; + assert(CategorizedOptions.count(Opt->Category) > 0 && + "Option has an unregistered category"); + CategorizedOptions[Opt->Category].push_back(Opt); + } + + // Now do printing. + for (std::vector<OptionCategory *>::const_iterator + Category = SortedCategories.begin(), + E = SortedCategories.end(); + Category != E; ++Category) { + // Hide empty categories for -help, but show for -help-hidden. + bool IsEmptyCategory = CategorizedOptions[*Category].size() == 0; + if (!ShowHidden && IsEmptyCategory) + continue; + + // Print category information. + outs() << "\n"; + outs() << (*Category)->getName() << ":\n"; + + // Check if description is set. + if ((*Category)->getDescription() != 0) + outs() << (*Category)->getDescription() << "\n\n"; + else + outs() << "\n"; + + // When using -help-hidden explicitly state if the category has no + // options associated with it. + if (IsEmptyCategory) { + outs() << " This option category has no options.\n"; + continue; + } + // Loop over the options in the category and print. + for (std::vector<Option *>::const_iterator + Opt = CategorizedOptions[*Category].begin(), + E = CategorizedOptions[*Category].end(); + Opt != E; ++Opt) + (*Opt)->printOptionInfo(MaxArgLen); + } + } +}; + +// This wraps the Uncategorizing and Categorizing printers and decides +// at run time which should be invoked. +class HelpPrinterWrapper { +private: + HelpPrinter &UncategorizedPrinter; + CategorizedHelpPrinter &CategorizedPrinter; + +public: + explicit HelpPrinterWrapper(HelpPrinter &UncategorizedPrinter, + CategorizedHelpPrinter &CategorizedPrinter) : + UncategorizedPrinter(UncategorizedPrinter), + CategorizedPrinter(CategorizedPrinter) { } + + // Invoke the printer. + void operator=(bool Value); +}; + } // End anonymous namespace -// Define the two HelpPrinter instances that are used to print out help, or -// help-hidden... -// -static HelpPrinter NormalPrinter(false); -static HelpPrinter HiddenPrinter(true); +// Declare the four HelpPrinter instances that are used to print out help, or +// help-hidden as an uncategorized list or in categories. +static HelpPrinter UncategorizedNormalPrinter(false); +static HelpPrinter UncategorizedHiddenPrinter(true); +static CategorizedHelpPrinter CategorizedNormalPrinter(false); +static CategorizedHelpPrinter CategorizedHiddenPrinter(true); + +// Declare HelpPrinter wrappers that will decide whether or not to invoke +// a categorizing help printer +static HelpPrinterWrapper WrappedNormalPrinter(UncategorizedNormalPrinter, + CategorizedNormalPrinter); +static HelpPrinterWrapper WrappedHiddenPrinter(UncategorizedHiddenPrinter, + CategorizedHiddenPrinter); + +// Define uncategorized help printers. +// -help-list is hidden by default because if Option categories are being used +// then -help behaves the same as -help-list. static cl::opt<HelpPrinter, true, parser<bool> > -HOp("help", cl::desc("Display available options (-help-hidden for more)"), - cl::location(NormalPrinter), cl::ValueDisallowed); +HLOp("help-list", + cl::desc("Display list of available options (-help-list-hidden for more)"), + cl::location(UncategorizedNormalPrinter), cl::Hidden, cl::ValueDisallowed); static cl::opt<HelpPrinter, true, parser<bool> > +HLHOp("help-list-hidden", + cl::desc("Display list of all available options"), + cl::location(UncategorizedHiddenPrinter), cl::Hidden, cl::ValueDisallowed); + +// Define uncategorized/categorized help printers. These printers change their +// behaviour at runtime depending on whether one or more Option categories have +// been declared. +static cl::opt<HelpPrinterWrapper, true, parser<bool> > +HOp("help", cl::desc("Display available options (-help-hidden for more)"), + cl::location(WrappedNormalPrinter), cl::ValueDisallowed); + +static cl::opt<HelpPrinterWrapper, true, parser<bool> > HHOp("help-hidden", cl::desc("Display all available options"), - cl::location(HiddenPrinter), cl::Hidden, cl::ValueDisallowed); + cl::location(WrappedHiddenPrinter), cl::Hidden, cl::ValueDisallowed); + + static cl::opt<bool> PrintOptions("print-options", @@ -1306,6 +1458,24 @@ PrintAllOptions("print-all-options", cl::desc("Print all option values after command line parsing"), cl::Hidden, cl::init(false)); +void HelpPrinterWrapper::operator=(bool Value) { + if (Value == false) + return; + + // Decide which printer to invoke. If more than one option category is + // registered then it is useful to show the categorized help instead of + // uncategorized help. + if (RegisteredOptionCategories->size() > 1) { + // unhide -help-list option so user can have uncategorized output if they + // want it. + HLOp.setHiddenFlag(NotHidden); + + CategorizedPrinter = true; // Invoke categorized printer + } + else + UncategorizedPrinter = true; // Invoke uncategorized printer +} + // Print the value of each option. void cl::PrintOptionValues() { if (!PrintOptions && !PrintAllOptions) return; @@ -1393,14 +1563,22 @@ VersOp("version", cl::desc("Display the version of this program"), cl::location(VersionPrinterInstance), cl::ValueDisallowed); // Utility function for printing the help message. -void cl::PrintHelpMessage() { - // This looks weird, but it actually prints the help message. The - // NormalPrinter variable is a HelpPrinter and the help gets printed when - // its operator= is invoked. That's because the "normal" usages of the - // help printer is to be assigned true/false depending on whether the - // -help option was given or not. Since we're circumventing that we have - // to make it look like -help was given, so we assign true. - NormalPrinter = true; +void cl::PrintHelpMessage(bool Hidden, bool Categorized) { + // This looks weird, but it actually prints the help message. The Printers are + // types of HelpPrinter and the help gets printed when its operator= is + // invoked. That's because the "normal" usages of the help printer is to be + // assigned true/false depending on whether -help or -help-hidden was given or + // not. Since we're circumventing that we have to make it look like -help or + // -help-hidden were given, so we assign true. + + if (!Hidden && !Categorized) + UncategorizedNormalPrinter = true; + else if (!Hidden && Categorized) + CategorizedNormalPrinter = true; + else if (Hidden && !Categorized) + UncategorizedHiddenPrinter = true; + else + CategorizedHiddenPrinter = true; } /// Utility function for printing version number. @@ -1418,3 +1596,13 @@ void cl::AddExtraVersionPrinter(void (*func)()) { ExtraVersionPrinters->push_back(func); } + +void cl::getRegisteredOptions(StringMap<Option*> &Map) +{ + // Get all the options. + SmallVector<Option*, 4> PositionalOpts; //NOT USED + SmallVector<Option*, 4> SinkOpts; //NOT USED + assert(Map.size() == 0 && "StringMap must be empty"); + GetOptionInfo(PositionalOpts, SinkOpts, Map); + return; +} diff --git a/contrib/llvm/lib/Support/Compression.cpp b/contrib/llvm/lib/Support/Compression.cpp new file mode 100644 index 0000000..fd8a874 --- /dev/null +++ b/contrib/llvm/lib/Support/Compression.cpp @@ -0,0 +1,97 @@ +//===--- Compression.cpp - Compression implementation ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements compression functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Compression.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Config/config.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MemoryBuffer.h" +#if LLVM_ENABLE_ZLIB == 1 && HAVE_ZLIB_H +#include <zlib.h> +#endif + +using namespace llvm; + +#if LLVM_ENABLE_ZLIB == 1 && HAVE_LIBZ +static int encodeZlibCompressionLevel(zlib::CompressionLevel Level) { + switch (Level) { + case zlib::NoCompression: return 0; + case zlib::BestSpeedCompression: return 1; + case zlib::DefaultCompression: return Z_DEFAULT_COMPRESSION; + case zlib::BestSizeCompression: return 9; + } + llvm_unreachable("Invalid zlib::CompressionLevel!"); +} + +static zlib::Status encodeZlibReturnValue(int ReturnValue) { + switch (ReturnValue) { + case Z_OK: return zlib::StatusOK; + case Z_MEM_ERROR: return zlib::StatusOutOfMemory; + case Z_BUF_ERROR: return zlib::StatusBufferTooShort; + case Z_STREAM_ERROR: return zlib::StatusInvalidArg; + case Z_DATA_ERROR: return zlib::StatusInvalidData; + default: llvm_unreachable("unknown zlib return status!"); + } +} + +bool zlib::isAvailable() { return true; } +zlib::Status zlib::compress(StringRef InputBuffer, + OwningPtr<MemoryBuffer> &CompressedBuffer, + CompressionLevel Level) { + unsigned long CompressedSize = ::compressBound(InputBuffer.size()); + OwningArrayPtr<char> TmpBuffer(new char[CompressedSize]); + int CLevel = encodeZlibCompressionLevel(Level); + Status Res = encodeZlibReturnValue(::compress2( + (Bytef *)TmpBuffer.get(), &CompressedSize, + (const Bytef *)InputBuffer.data(), InputBuffer.size(), CLevel)); + if (Res == StatusOK) { + CompressedBuffer.reset(MemoryBuffer::getMemBufferCopy( + StringRef(TmpBuffer.get(), CompressedSize))); + // Tell MSan that memory initialized by zlib is valid. + __msan_unpoison(CompressedBuffer->getBufferStart(), CompressedSize); + } + return Res; +} + +zlib::Status zlib::uncompress(StringRef InputBuffer, + OwningPtr<MemoryBuffer> &UncompressedBuffer, + size_t UncompressedSize) { + OwningArrayPtr<char> TmpBuffer(new char[UncompressedSize]); + Status Res = encodeZlibReturnValue( + ::uncompress((Bytef *)TmpBuffer.get(), (uLongf *)&UncompressedSize, + (const Bytef *)InputBuffer.data(), InputBuffer.size())); + if (Res == StatusOK) { + UncompressedBuffer.reset(MemoryBuffer::getMemBufferCopy( + StringRef(TmpBuffer.get(), UncompressedSize))); + // Tell MSan that memory initialized by zlib is valid. + __msan_unpoison(UncompressedBuffer->getBufferStart(), UncompressedSize); + } + return Res; +} + +#else +bool zlib::isAvailable() { return false; } +zlib::Status zlib::compress(StringRef InputBuffer, + OwningPtr<MemoryBuffer> &CompressedBuffer, + CompressionLevel Level) { + return zlib::StatusUnsupported; +} +zlib::Status zlib::uncompress(StringRef InputBuffer, + OwningPtr<MemoryBuffer> &UncompressedBuffer, + size_t UncompressedSize) { + return zlib::StatusUnsupported; +} +#endif + diff --git a/contrib/llvm/lib/Support/DataExtractor.cpp b/contrib/llvm/lib/Support/DataExtractor.cpp index 3d5cce0..a564d21 100644 --- a/contrib/llvm/lib/Support/DataExtractor.cpp +++ b/contrib/llvm/lib/Support/DataExtractor.cpp @@ -20,7 +20,7 @@ static T getU(uint32_t *offset_ptr, const DataExtractor *de, uint32_t offset = *offset_ptr; if (de->isValidOffsetForDataOfSize(offset, sizeof(val))) { std::memcpy(&val, &Data[offset], sizeof(val)); - if (sys::isLittleEndianHost() != isLittleEndian) + if (sys::IsLittleEndianHost != isLittleEndian) val = sys::SwapByteOrder(val); // Advance the offset diff --git a/contrib/llvm/lib/Support/FoldingSet.cpp b/contrib/llvm/lib/Support/FoldingSet.cpp index 36e33b5..145f12d 100644 --- a/contrib/llvm/lib/Support/FoldingSet.cpp +++ b/contrib/llvm/lib/Support/FoldingSet.cpp @@ -101,7 +101,7 @@ void FoldingSetNodeID::AddString(StringRef String) { // Otherwise do it the hard way. // To be compatible with above bulk transfer, we need to take endianness // into account. - if (sys::isBigEndianHost()) { + if (sys::IsBigEndianHost) { for (Pos += 4; Pos <= Size; Pos += 4) { unsigned V = ((unsigned char)String[Pos - 4] << 24) | ((unsigned char)String[Pos - 3] << 16) | @@ -110,7 +110,7 @@ void FoldingSetNodeID::AddString(StringRef String) { Bits.push_back(V); } } else { - assert(sys::isLittleEndianHost() && "Unexpected host endianness"); + assert(sys::IsLittleEndianHost && "Unexpected host endianness"); for (Pos += 4; Pos <= Size; Pos += 4) { unsigned V = ((unsigned char)String[Pos - 1] << 24) | ((unsigned char)String[Pos - 2] << 16) | diff --git a/contrib/llvm/lib/Support/Host.cpp b/contrib/llvm/lib/Support/Host.cpp index 73d98d1..a7c7a95 100644 --- a/contrib/llvm/lib/Support/Host.cpp +++ b/contrib/llvm/lib/Support/Host.cpp @@ -112,19 +112,19 @@ static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX, #endif } -static bool OSHasAVXSupport() {
-#if defined(__GNUC__)
- // Check xgetbv; this uses a .byte sequence instead of the instruction
- // directly because older assemblers do not include support for xgetbv and
- // there is no easy way to conditionally compile based on the assembler used.
- int rEAX, rEDX;
- __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a" (rEAX), "=d" (rEDX) : "c" (0));
-#elif defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219
- unsigned long long rEAX = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
-#else
- int rEAX = 0; // Ensures we return false
-#endif
- return (rEAX & 6) == 6;
+static bool OSHasAVXSupport() { +#if defined(__GNUC__) + // Check xgetbv; this uses a .byte sequence instead of the instruction + // directly because older assemblers do not include support for xgetbv and + // there is no easy way to conditionally compile based on the assembler used. + int rEAX, rEDX; + __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a" (rEAX), "=d" (rEDX) : "c" (0)); +#elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) + unsigned long long rEAX = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); +#else + int rEAX = 0; // Ensures we return false +#endif + return (rEAX & 6) == 6; } static void DetectX86FamilyModel(unsigned EAX, unsigned &Family, @@ -355,10 +355,15 @@ std::string sys::getHostCPUName() { case 20: return "btver1"; case 21: - if (Model <= 15) - return "bdver1"; - else if (Model <= 31) + if (!HasAVX) // If the OS doesn't support AVX provide a sane fallback. + return "btver1"; + if (Model > 15 && Model <= 31) return "bdver2"; + return "bdver1"; + case 22: + if (!HasAVX) // If the OS doesn't support AVX provide a sane fallback. + return "btver1"; + return "btver2"; default: return "generic"; } @@ -608,7 +613,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features){ #endif std::string sys::getProcessTriple() { - Triple PT(LLVM_HOSTTRIPLE); + Triple PT(LLVM_HOST_TRIPLE); if (sizeof(void *) == 8 && PT.isArch32Bit()) PT = PT.get64BitArchVariant(); diff --git a/contrib/llvm/lib/Support/LockFileManager.cpp b/contrib/llvm/lib/Support/LockFileManager.cpp index 92d8b83..2917e27 100644 --- a/contrib/llvm/lib/Support/LockFileManager.cpp +++ b/contrib/llvm/lib/Support/LockFileManager.cpp @@ -174,8 +174,8 @@ void LockFileManager::waitForUnlock() { Interval.tv_sec = 0; Interval.tv_nsec = 1000000; #endif - // Don't wait more than an hour for the file to appear. - const unsigned MaxSeconds = 3600; + // Don't wait more than five minutes for the file to appear. + unsigned MaxSeconds = 300; bool LockFileGone = false; do { // Sleep for the designated interval, to allow the owning process time to @@ -187,21 +187,48 @@ void LockFileManager::waitForUnlock() { #else nanosleep(&Interval, NULL); #endif - // If the lock file no longer exists, wait for the actual file. bool Exists = false; + bool LockFileJustDisappeared = false; + + // If the lock file is still expected to be there, check whether it still + // is. if (!LockFileGone) { if (!sys::fs::exists(LockFileName.str(), Exists) && !Exists) { LockFileGone = true; + LockFileJustDisappeared = true; Exists = false; } } + + // If the lock file is no longer there, check if the original file is + // available now. if (LockFileGone) { - if (!sys::fs::exists(FileName.str(), Exists) && Exists) + if (!sys::fs::exists(FileName.str(), Exists) && Exists) { return; + } + + // The lock file is gone, so now we're waiting for the original file to + // show up. If this just happened, reset our waiting intervals and keep + // waiting. + if (LockFileJustDisappeared) { + MaxSeconds = 5; + +#if LLVM_ON_WIN32 + Interval = 1; +#else + Interval.tv_sec = 0; + Interval.tv_nsec = 1000000; +#endif + continue; + } } - if (!processStillExecuting((*Owner).first, (*Owner).second)) + // If we're looking for the lock file to disappear, but the process + // owning the lock died without cleaning up, just bail out. + if (!LockFileGone && + !processStillExecuting((*Owner).first, (*Owner).second)) { return; + } // Exponentially increase the time we wait for the lock to be removed. #if LLVM_ON_WIN32 diff --git a/contrib/llvm/lib/Support/PathV2.cpp b/contrib/llvm/lib/Support/PathV2.cpp index 58a6ea7..ac53a9e9 100644 --- a/contrib/llvm/lib/Support/PathV2.cpp +++ b/contrib/llvm/lib/Support/PathV2.cpp @@ -789,8 +789,11 @@ file_magic identify_magic(StringRef magic) { case '\177': if (magic[1] == 'E' && magic[2] == 'L' && magic[3] == 'F') { - if (magic.size() >= 18 && magic[17] == 0) - switch (magic[16]) { + bool Data2MSB = magic[5] == 2; + unsigned high = Data2MSB ? 16 : 17; + unsigned low = Data2MSB ? 17 : 16; + if (magic.size() >= 18 && magic[high] == 0) + switch (magic[low]) { default: break; case 1: return file_magic::elf_relocatable; case 2: return file_magic::elf_executable; diff --git a/contrib/llvm/lib/Support/Triple.cpp b/contrib/llvm/lib/Support/Triple.cpp index d2508ac..412e34c 100644 --- a/contrib/llvm/lib/Support/Triple.cpp +++ b/contrib/llvm/lib/Support/Triple.cpp @@ -32,6 +32,7 @@ const char *Triple::getArchTypeName(ArchType Kind) { case r600: return "r600"; case sparc: return "sparc"; case sparcv9: return "sparcv9"; + case systemz: return "s390x"; case tce: return "tce"; case thumb: return "thumb"; case x86: return "i386"; @@ -76,6 +77,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) { case sparcv9: case sparc: return "sparc"; + case systemz: return "systemz"; + case x86: case x86_64: return "x86"; @@ -170,6 +173,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { .Case("hexagon", hexagon) .Case("sparc", sparc) .Case("sparcv9", sparcv9) + .Case("systemz", systemz) .Case("tce", tce) .Case("thumb", thumb) .Case("x86", x86) @@ -233,6 +237,7 @@ static Triple::ArchType parseArch(StringRef ArchName) { .Case("mips64el", Triple::mips64el) .Case("r600", Triple::r600) .Case("hexagon", Triple::hexagon) + .Case("s390x", Triple::systemz) .Case("sparc", Triple::sparc) .Case("sparcv9", Triple::sparcv9) .Case("tce", Triple::tce) @@ -687,6 +692,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) { case llvm::Triple::nvptx64: case llvm::Triple::ppc64: case llvm::Triple::sparcv9: + case llvm::Triple::systemz: case llvm::Triple::x86_64: case llvm::Triple::spir64: return 64; @@ -712,6 +718,7 @@ Triple Triple::get32BitArchVariant() const { case Triple::UnknownArch: case Triple::aarch64: case Triple::msp430: + case Triple::systemz: T.setArch(UnknownArch); break; @@ -769,6 +776,7 @@ Triple Triple::get64BitArchVariant() const { case Triple::nvptx64: case Triple::ppc64: case Triple::sparcv9: + case Triple::systemz: case Triple::x86_64: // Already 64-bit. break; diff --git a/contrib/llvm/lib/Support/Unix/Memory.inc b/contrib/llvm/lib/Support/Unix/Memory.inc index f397408..2bb9bf1 100644 --- a/contrib/llvm/lib/Support/Unix/Memory.inc +++ b/contrib/llvm/lib/Support/Unix/Memory.inc @@ -325,7 +325,7 @@ void Memory::InvalidateInstructionCache(const void *Addr, for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize) asm volatile("icbi 0, %0" : : "r"(Line)); asm volatile("isync"); -# elif defined(__arm__) && defined(__GNUC__) && !defined(__FreeBSD__) +# elif (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__) && !defined(__FreeBSD__) // FIXME: Can we safely always call this for __GNUC__ everywhere? const char *Start = static_cast<const char *>(Addr); const char *End = Start + Len; diff --git a/contrib/llvm/lib/Support/Unix/PathV2.inc b/contrib/llvm/lib/Support/Unix/PathV2.inc index a3dfd4b..7e0aead 100644 --- a/contrib/llvm/lib/Support/Unix/PathV2.inc +++ b/contrib/llvm/lib/Support/Unix/PathV2.inc @@ -430,9 +430,7 @@ rety_open_create: if (SavedErrno == errc::file_exists) goto retry_random_path; // If path prefix doesn't exist, try to create it. - if (SavedErrno == errc::no_such_file_or_directory && - !exists(path::parent_path(RandomPath)) && - !TriedToCreateParent) { + if (SavedErrno == errc::no_such_file_or_directory && !TriedToCreateParent) { TriedToCreateParent = true; StringRef p(RandomPath); SmallString<64> dir_to_create; diff --git a/contrib/llvm/lib/Support/Unix/Program.inc b/contrib/llvm/lib/Support/Unix/Program.inc index 117151c..aa03d48 100644 --- a/contrib/llvm/lib/Support/Unix/Program.inc +++ b/contrib/llvm/lib/Support/Unix/Program.inc @@ -32,6 +32,9 @@ #if HAVE_FCNTL_H #include <fcntl.h> #endif +#if HAVE_UNISTD_H +#include <unistd.h> +#endif #ifdef HAVE_POSIX_SPAWN #include <spawn.h> #if !defined(__APPLE__) @@ -409,4 +412,25 @@ error_code Program::ChangeStderrToBinary(){ return make_error_code(errc::success); } +bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) { + static long ArgMax = sysconf(_SC_ARG_MAX); + + // System says no practical limit. + if (ArgMax == -1) + return true; + + // Conservatively account for space required by environment variables. + ArgMax /= 2; + + size_t ArgLength = 0; + for (ArrayRef<const char*>::iterator I = Args.begin(), E = Args.end(); + I != E; ++I) { + ArgLength += strlen(*I) + 1; + if (ArgLength > size_t(ArgMax)) { + return false; + } + } + return true; +} + } diff --git a/contrib/llvm/lib/Support/Unix/Signals.inc b/contrib/llvm/lib/Support/Unix/Signals.inc index 66338f1..64d1fc1 100644 --- a/contrib/llvm/lib/Support/Unix/Signals.inc +++ b/contrib/llvm/lib/Support/Unix/Signals.inc @@ -27,10 +27,12 @@ #if HAVE_SYS_STAT_H #include <sys/stat.h> #endif -#if HAVE_DLFCN_H && __GNUG__ -#include <dlfcn.h> +#if HAVE_CXXABI_H #include <cxxabi.h> #endif +#if HAVE_DLFCN_H +#include <dlfcn.h> +#endif #if HAVE_MACH_MACH_H #include <mach/mach.h> #endif @@ -184,6 +186,15 @@ static RETSIGTYPE SignalHandler(int Sig) { // Otherwise if it is a fault (like SEGV) run any handler. for (unsigned i = 0, e = CallBacksToRun.size(); i != e; ++i) CallBacksToRun[i].first(CallBacksToRun[i].second); + +#ifdef __s390__ + // On S/390, certain signals are delivered with PSW Address pointing to + // *after* the faulting instruction. Simply returning from the signal + // handler would continue execution after that point, instead of + // re-raising the signal. Raise the signal manually in those cases. + if (Sig == SIGILL || Sig == SIGFPE || Sig == SIGTRAP) + raise(Sig); +#endif } void llvm::sys::RunInterruptHandlers() { @@ -290,9 +301,13 @@ void llvm::sys::PrintStackTrace(FILE *FD) { (int)(sizeof(void*) * 2) + 2, (unsigned long)StackTrace[i]); if (dlinfo.dli_sname != NULL) { - int res; fputc(' ', FD); +# if HAVE_CXXABI_H + int res; char* d = abi::__cxa_demangle(dlinfo.dli_sname, NULL, NULL, &res); +# else + char* d = NULL; +# endif if (d == NULL) fputs(dlinfo.dli_sname, FD); else fputs(d, FD); free(d); diff --git a/contrib/llvm/lib/Support/Windows/Program.inc b/contrib/llvm/lib/Support/Windows/Program.inc index 691d6d4..619ae5d 100644 --- a/contrib/llvm/lib/Support/Windows/Program.inc +++ b/contrib/llvm/lib/Support/Windows/Program.inc @@ -126,20 +126,58 @@ static bool ArgNeedsQuotes(const char *Str) { return Str[0] == '\0' || strpbrk(Str, "\t \"&\'()*<>\\`^|") != 0; } +/// CountPrecedingBackslashes - Returns the number of backslashes preceding Cur +/// in the C string Start. +static unsigned int CountPrecedingBackslashes(const char *Start, + const char *Cur) { + unsigned int Count = 0; + --Cur; + while (Cur >= Start && *Cur == '\\') { + ++Count; + --Cur; + } + return Count; +} + +/// EscapePrecedingEscapes - Append a backslash to Dst for every backslash +/// preceding Cur in the Start string. Assumes Dst has enough space. +static char *EscapePrecedingEscapes(char *Dst, const char *Start, + const char *Cur) { + unsigned PrecedingEscapes = CountPrecedingBackslashes(Start, Cur); + while (PrecedingEscapes > 0) { + *Dst++ = '\\'; + --PrecedingEscapes; + } + return Dst; +} /// ArgLenWithQuotes - Check whether argument needs to be quoted when calling /// CreateProcess and returns length of quoted arg with escaped quotes static unsigned int ArgLenWithQuotes(const char *Str) { - unsigned int len = ArgNeedsQuotes(Str) ? 2 : 0; + const char *Start = Str; + bool Quoted = ArgNeedsQuotes(Str); + unsigned int len = Quoted ? 2 : 0; while (*Str != '\0') { - if (*Str == '\"') - ++len; + if (*Str == '\"') { + // We need to add a backslash, but ensure that it isn't escaped. + unsigned PrecedingEscapes = CountPrecedingBackslashes(Start, Str); + len += PrecedingEscapes + 1; + } + // Note that we *don't* need to escape runs of backslashes that don't + // precede a double quote! See MSDN: + // http://msdn.microsoft.com/en-us/library/17w5ykft%28v=vs.85%29.aspx ++len; ++Str; } + if (Quoted) { + // Make sure the closing quote doesn't get escaped by a trailing backslash. + unsigned PrecedingEscapes = CountPrecedingBackslashes(Start, Str); + len += PrecedingEscapes + 1; + } + return len; } @@ -180,20 +218,27 @@ Program::Execute(const Path& path, for (unsigned i = 0; args[i]; i++) { const char *arg = args[i]; + const char *start = arg; bool needsQuoting = ArgNeedsQuotes(arg); if (needsQuoting) *p++ = '"'; while (*arg != '\0') { - if (*arg == '\"') + if (*arg == '\"') { + // Escape all preceding escapes (if any), and then escape the quote. + p = EscapePrecedingEscapes(p, start, arg); *p++ = '\\'; + } *p++ = *arg++; } - if (needsQuoting) + if (needsQuoting) { + // Make sure our quote doesn't get escaped by a trailing backslash. + p = EscapePrecedingEscapes(p, start, arg); *p++ = '"'; + } *p++ = ' '; } @@ -396,4 +441,20 @@ error_code Program::ChangeStderrToBinary(){ return make_error_code(errc::success); } +bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) { + // The documented max length of the command line passed to CreateProcess. + static const size_t MaxCommandStringLength = 32768; + size_t ArgLength = 0; + for (ArrayRef<const char*>::iterator I = Args.begin(), E = Args.end(); + I != E; ++I) { + // Account for the trailing space for every arg but the last one and the + // trailing NULL of the last argument. + ArgLength += ArgLenWithQuotes(*I) + 1; + if (ArgLength > MaxCommandStringLength) { + return false; + } + } + return true; +} + } diff --git a/contrib/llvm/lib/Support/Windows/Signals.inc b/contrib/llvm/lib/Support/Windows/Signals.inc index 3dd6660..b18b4d1 100644 --- a/contrib/llvm/lib/Support/Windows/Signals.inc +++ b/contrib/llvm/lib/Support/Windows/Signals.inc @@ -178,6 +178,19 @@ namespace llvm { //===----------------------------------------------------------------------===// #ifdef _MSC_VER +/// AvoidMessageBoxHook - Emulates hitting "retry" from an "abort, retry, +/// ignore" CRT debug report dialog. "retry" raises an exception which +/// ultimately triggers our stack dumper. +static int AvoidMessageBoxHook(int ReportType, char *Message, int *Return) { + // Set *Return to the retry code for the return value of _CrtDbgReport: + // http://msdn.microsoft.com/en-us/library/8hyw4sy7(v=vs.71).aspx + // This may also trigger just-in-time debugging via DebugBreak(). + if (Return) + *Return = 1; + // Don't call _CrtDbgReport. + return TRUE; +} + /// CRTReportHook - Function called on a CRT debugging event. static int CRTReportHook(int ReportType, char *Message, int *Return) { // Don't cause a DebugBreak() on return. @@ -238,6 +251,15 @@ static void RegisterHandler() { OldFilter = SetUnhandledExceptionFilter(LLVMUnhandledExceptionFilter); SetConsoleCtrlHandler(LLVMConsoleCtrlHandler, TRUE); +#ifdef _MSC_VER + const char *EnableMsgbox = getenv("LLVM_ENABLE_CRT_REPORT"); + if (!EnableMsgbox || strcmp("0", EnableMsgbox) == 0) { + // Setting a report hook overrides the default behavior of popping an "abort, + // retry, or ignore" dialog. + _CrtSetReportHook(AvoidMessageBoxHook); + } +#endif + // Environment variable to disable any kind of crash dialog. if (getenv("LLVM_DISABLE_CRASH_REPORT")) { #ifdef _MSC_VER diff --git a/contrib/llvm/lib/Support/YAMLParser.cpp b/contrib/llvm/lib/Support/YAMLParser.cpp index 2cead20..213f5e1 100644 --- a/contrib/llvm/lib/Support/YAMLParser.cpp +++ b/contrib/llvm/lib/Support/YAMLParser.cpp @@ -260,7 +260,7 @@ public: Token getNext(); void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message, - ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) { + ArrayRef<SMRange> Ranges = None) { SM.PrintMessage(Loc, Kind, Message, Ranges); } diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index dc41f2f..daa7f1d 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -367,9 +367,8 @@ AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // shoving a base register and an offset into the instruction then we may well // need to scavenge registers. We should either specifically add an // callee-save register for this purpose or allocate an extra spill slot. - bool BigStack = - (RS && MFI->estimateStackSize(MF) >= TII.estimateRSStackLimit(MF)) + MFI->estimateStackSize(MF) >= TII.estimateRSStackLimit(MF) || MFI->hasVarSizedObjects() // Access will be from X29: messes things up || (MFI->adjustsStack() && !hasReservedCallFrame(MF)); @@ -392,6 +391,8 @@ AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, if (ExtraReg != 0) { MF.getRegInfo().setPhysRegUsed(ExtraReg); } else { + assert(RS && "Expect register scavenger to be available"); + // Create a stack slot for scavenging purposes. PrologEpilogInserter // helpfully places it near either SP or FP for us to avoid // infinitely-regression during scavenging. diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 46b8221..102c71b 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -70,6 +70,15 @@ public: return SelectCVTFixedPosOperand(N, FixedPos, RegWidth); } + /// Used for pre-lowered address-reference nodes, so we already know + /// the fields match. This operand's job is simply to add an + /// appropriate shift operand (i.e. 0) to the MOVZ/MOVK instruction. + bool SelectMOVWAddressRef(SDValue N, SDValue &Imm, SDValue &Shift) { + Imm = N; + Shift = CurDAG->getTargetConstant(0, MVT::i32); + return true; + } + bool SelectFPZeroOperand(SDValue N, SDValue &Dummy); bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, @@ -88,6 +97,13 @@ public: bool SelectTSTBOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth); + SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32, + unsigned Op64); + + /// Put the given constant into a pool and return a DAG which will give its + /// address. + SDValue getConstantPoolItemAddress(DebugLoc DL, const Constant *CV); + SDNode *TrySelectToMoveImm(SDNode *N); SDNode *LowerToFPLitPool(SDNode *Node); SDNode *SelectToLitPool(SDNode *N); @@ -224,12 +240,51 @@ SDNode *AArch64DAGToDAGISel::TrySelectToMoveImm(SDNode *Node) { return ResNode; } +SDValue +AArch64DAGToDAGISel::getConstantPoolItemAddress(DebugLoc DL, + const Constant *CV) { + EVT PtrVT = TLI.getPointerTy(); + + switch (TLI.getTargetMachine().getCodeModel()) { + case CodeModel::Small: { + unsigned Alignment = + TLI.getDataLayout()->getABITypeAlignment(CV->getType()); + return CurDAG->getNode( + AArch64ISD::WrapperSmall, DL, PtrVT, + CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_NO_FLAG), + CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_LO12), + CurDAG->getConstant(Alignment, MVT::i32)); + } + case CodeModel::Large: { + SDNode *LitAddr; + LitAddr = CurDAG->getMachineNode( + AArch64::MOVZxii, DL, PtrVT, + CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G3), + CurDAG->getTargetConstant(0, MVT::i32)); + LitAddr = CurDAG->getMachineNode( + AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0), + CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G2_NC), + CurDAG->getTargetConstant(0, MVT::i32)); + LitAddr = CurDAG->getMachineNode( + AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0), + CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G1_NC), + CurDAG->getTargetConstant(0, MVT::i32)); + LitAddr = CurDAG->getMachineNode( + AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0), + CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G0_NC), + CurDAG->getTargetConstant(0, MVT::i32)); + return SDValue(LitAddr, 0); + } + default: + llvm_unreachable("Only small and large code models supported now"); + } +} + SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) { DebugLoc DL = Node->getDebugLoc(); uint64_t UnsignedVal = cast<ConstantSDNode>(Node)->getZExtValue(); int64_t SignedVal = cast<ConstantSDNode>(Node)->getSExtValue(); EVT DestType = Node->getValueType(0); - EVT PtrVT = TLI.getPointerTy(); // Since we may end up loading a 64-bit constant from a 32-bit entry the // constant in the pool may have a different type to the eventual node. @@ -256,14 +311,8 @@ SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) { Constant *CV = ConstantInt::get(Type::getIntNTy(*CurDAG->getContext(), MemType.getSizeInBits()), UnsignedVal); - SDValue PoolAddr; + SDValue PoolAddr = getConstantPoolItemAddress(DL, CV); unsigned Alignment = TLI.getDataLayout()->getABITypeAlignment(CV->getType()); - PoolAddr = CurDAG->getNode(AArch64ISD::WrapperSmall, DL, PtrVT, - CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, - AArch64II::MO_NO_FLAG), - CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, - AArch64II::MO_LO12), - CurDAG->getConstant(Alignment, MVT::i32)); return CurDAG->getExtLoad(Extension, DL, DestType, CurDAG->getEntryNode(), PoolAddr, @@ -276,20 +325,10 @@ SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) { SDNode *AArch64DAGToDAGISel::LowerToFPLitPool(SDNode *Node) { DebugLoc DL = Node->getDebugLoc(); const ConstantFP *FV = cast<ConstantFPSDNode>(Node)->getConstantFPValue(); - EVT PtrVT = TLI.getPointerTy(); EVT DestType = Node->getValueType(0); unsigned Alignment = TLI.getDataLayout()->getABITypeAlignment(FV->getType()); - SDValue PoolAddr; - - assert(TM.getCodeModel() == CodeModel::Small && - "Only small code model supported"); - PoolAddr = CurDAG->getNode(AArch64ISD::WrapperSmall, DL, PtrVT, - CurDAG->getTargetConstantPool(FV, PtrVT, 0, 0, - AArch64II::MO_NO_FLAG), - CurDAG->getTargetConstantPool(FV, PtrVT, 0, 0, - AArch64II::MO_LO12), - CurDAG->getConstant(Alignment, MVT::i32)); + SDValue PoolAddr = getConstantPoolItemAddress(DL, FV); return CurDAG->getLoad(DestType, DL, CurDAG->getEntryNode(), PoolAddr, MachinePointerInfo::getConstantPool(), @@ -318,6 +357,38 @@ AArch64DAGToDAGISel::SelectTSTBOperand(SDValue N, SDValue &FixedPos, return true; } +SDNode *AArch64DAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8, + unsigned Op16,unsigned Op32, + unsigned Op64) { + // Mostly direct translation to the given operations, except that we preserve + // the AtomicOrdering for use later on. + AtomicSDNode *AN = cast<AtomicSDNode>(Node); + EVT VT = AN->getMemoryVT(); + + unsigned Op; + if (VT == MVT::i8) + Op = Op8; + else if (VT == MVT::i16) + Op = Op16; + else if (VT == MVT::i32) + Op = Op32; + else if (VT == MVT::i64) + Op = Op64; + else + llvm_unreachable("Unexpected atomic operation"); + + SmallVector<SDValue, 4> Ops; + for (unsigned i = 1; i < AN->getNumOperands(); ++i) + Ops.push_back(AN->getOperand(i)); + + Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32)); + Ops.push_back(AN->getOperand(0)); // Chain moves to the end + + return CurDAG->SelectNodeTo(Node, Op, + AN->getValueType(0), MVT::Other, + &Ops[0], Ops.size()); +} + SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { // Dump information about the Node being selected DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n"); @@ -328,6 +399,78 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { } switch (Node->getOpcode()) { + case ISD::ATOMIC_LOAD_ADD: + return SelectAtomic(Node, + AArch64::ATOMIC_LOAD_ADD_I8, + AArch64::ATOMIC_LOAD_ADD_I16, + AArch64::ATOMIC_LOAD_ADD_I32, + AArch64::ATOMIC_LOAD_ADD_I64); + case ISD::ATOMIC_LOAD_SUB: + return SelectAtomic(Node, + AArch64::ATOMIC_LOAD_SUB_I8, + AArch64::ATOMIC_LOAD_SUB_I16, + AArch64::ATOMIC_LOAD_SUB_I32, + AArch64::ATOMIC_LOAD_SUB_I64); + case ISD::ATOMIC_LOAD_AND: + return SelectAtomic(Node, + AArch64::ATOMIC_LOAD_AND_I8, + AArch64::ATOMIC_LOAD_AND_I16, + AArch64::ATOMIC_LOAD_AND_I32, + AArch64::ATOMIC_LOAD_AND_I64); + case ISD::ATOMIC_LOAD_OR: + return SelectAtomic(Node, + AArch64::ATOMIC_LOAD_OR_I8, + AArch64::ATOMIC_LOAD_OR_I16, + AArch64::ATOMIC_LOAD_OR_I32, + AArch64::ATOMIC_LOAD_OR_I64); + case ISD::ATOMIC_LOAD_XOR: + return SelectAtomic(Node, + AArch64::ATOMIC_LOAD_XOR_I8, + AArch64::ATOMIC_LOAD_XOR_I16, + AArch64::ATOMIC_LOAD_XOR_I32, + AArch64::ATOMIC_LOAD_XOR_I64); + case ISD::ATOMIC_LOAD_NAND: + return SelectAtomic(Node, + AArch64::ATOMIC_LOAD_NAND_I8, + AArch64::ATOMIC_LOAD_NAND_I16, + AArch64::ATOMIC_LOAD_NAND_I32, + AArch64::ATOMIC_LOAD_NAND_I64); + case ISD::ATOMIC_LOAD_MIN: + return SelectAtomic(Node, + AArch64::ATOMIC_LOAD_MIN_I8, + AArch64::ATOMIC_LOAD_MIN_I16, + AArch64::ATOMIC_LOAD_MIN_I32, + AArch64::ATOMIC_LOAD_MIN_I64); + case ISD::ATOMIC_LOAD_MAX: + return SelectAtomic(Node, + AArch64::ATOMIC_LOAD_MAX_I8, + AArch64::ATOMIC_LOAD_MAX_I16, + AArch64::ATOMIC_LOAD_MAX_I32, + AArch64::ATOMIC_LOAD_MAX_I64); + case ISD::ATOMIC_LOAD_UMIN: + return SelectAtomic(Node, + AArch64::ATOMIC_LOAD_UMIN_I8, + AArch64::ATOMIC_LOAD_UMIN_I16, + AArch64::ATOMIC_LOAD_UMIN_I32, + AArch64::ATOMIC_LOAD_UMIN_I64); + case ISD::ATOMIC_LOAD_UMAX: + return SelectAtomic(Node, + AArch64::ATOMIC_LOAD_UMAX_I8, + AArch64::ATOMIC_LOAD_UMAX_I16, + AArch64::ATOMIC_LOAD_UMAX_I32, + AArch64::ATOMIC_LOAD_UMAX_I64); + case ISD::ATOMIC_SWAP: + return SelectAtomic(Node, + AArch64::ATOMIC_SWAP_I8, + AArch64::ATOMIC_SWAP_I16, + AArch64::ATOMIC_SWAP_I32, + AArch64::ATOMIC_SWAP_I64); + case ISD::ATOMIC_CMP_SWAP: + return SelectAtomic(Node, + AArch64::ATOMIC_CMP_SWAP_I8, + AArch64::ATOMIC_CMP_SWAP_I16, + AArch64::ATOMIC_CMP_SWAP_I32, + AArch64::ATOMIC_CMP_SWAP_I64); case ISD::FrameIndex: { int FI = cast<FrameIndexSDNode>(Node)->getIndex(); EVT PtrTy = TLI.getPointerTy(); diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index e9f4497..56f6751 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -59,13 +59,6 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) computeRegisterProperties(); - // Some atomic operations can be folded into load-acquire or store-release - // instructions on AArch64. It's marginally simpler to let LLVM expand - // everything out to a barrier and then recombine the (few) barriers we can. - setInsertFencesForAtomic(true); - setTargetDAGCombine(ISD::ATOMIC_FENCE); - setTargetDAGCombine(ISD::ATOMIC_STORE); - // We combine OR nodes for bitfield and NEON BSL operations. setTargetDAGCombine(ISD::OR); @@ -275,27 +268,34 @@ EVT AArch64TargetLowering::getSetCCResultType(EVT VT) const { return VT.changeVectorElementTypeToInteger(); } -static void getExclusiveOperation(unsigned Size, unsigned &ldrOpc, - unsigned &strOpc) { - switch (Size) { - default: llvm_unreachable("unsupported size for atomic binary op!"); - case 1: - ldrOpc = AArch64::LDXR_byte; - strOpc = AArch64::STXR_byte; - break; - case 2: - ldrOpc = AArch64::LDXR_hword; - strOpc = AArch64::STXR_hword; - break; - case 4: - ldrOpc = AArch64::LDXR_word; - strOpc = AArch64::STXR_word; - break; - case 8: - ldrOpc = AArch64::LDXR_dword; - strOpc = AArch64::STXR_dword; - break; - } +static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord, + unsigned &LdrOpc, + unsigned &StrOpc) { + static unsigned LoadBares[] = {AArch64::LDXR_byte, AArch64::LDXR_hword, + AArch64::LDXR_word, AArch64::LDXR_dword}; + static unsigned LoadAcqs[] = {AArch64::LDAXR_byte, AArch64::LDAXR_hword, + AArch64::LDAXR_word, AArch64::LDAXR_dword}; + static unsigned StoreBares[] = {AArch64::STXR_byte, AArch64::STXR_hword, + AArch64::STXR_word, AArch64::STXR_dword}; + static unsigned StoreRels[] = {AArch64::STLXR_byte, AArch64::STLXR_hword, + AArch64::STLXR_word, AArch64::STLXR_dword}; + + unsigned *LoadOps, *StoreOps; + if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent) + LoadOps = LoadAcqs; + else + LoadOps = LoadBares; + + if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent) + StoreOps = StoreRels; + else + StoreOps = StoreBares; + + assert(isPowerOf2_32(Size) && Size <= 8 && + "unsupported size for atomic binary op!"); + + LdrOpc = LoadOps[Log2_32(Size)]; + StrOpc = StoreOps[Log2_32(Size)]; } MachineBasicBlock * @@ -313,12 +313,13 @@ AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, unsigned dest = MI->getOperand(0).getReg(); unsigned ptr = MI->getOperand(1).getReg(); unsigned incr = MI->getOperand(2).getReg(); + AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm()); DebugLoc dl = MI->getDebugLoc(); MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); unsigned ldrOpc, strOpc; - getExclusiveOperation(Size, ldrOpc, strOpc); + getExclusiveOperation(Size, Ord, ldrOpc, strOpc); MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); @@ -397,6 +398,8 @@ AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI, unsigned dest = MI->getOperand(0).getReg(); unsigned ptr = MI->getOperand(1).getReg(); unsigned incr = MI->getOperand(2).getReg(); + AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm()); + unsigned oldval = dest; DebugLoc dl = MI->getDebugLoc(); @@ -411,7 +414,7 @@ AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI, } unsigned ldrOpc, strOpc; - getExclusiveOperation(Size, ldrOpc, strOpc); + getExclusiveOperation(Size, Ord, ldrOpc, strOpc); MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); @@ -479,6 +482,7 @@ AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI, unsigned ptr = MI->getOperand(1).getReg(); unsigned oldval = MI->getOperand(2).getReg(); unsigned newval = MI->getOperand(3).getReg(); + AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(4).getImm()); const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); @@ -487,7 +491,7 @@ AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI, TRCsp = Size == 8 ? &AArch64::GPR64xspRegClass : &AArch64::GPR32wspRegClass; unsigned ldrOpc, strOpc; - getExclusiveOperation(Size, ldrOpc, strOpc); + getExclusiveOperation(Size, Ord, ldrOpc, strOpc); MachineFunction *MF = BB->getParent(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); @@ -777,6 +781,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN"; case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER"; case AArch64ISD::TLSDESCCALL: return "AArch64ISD::TLSDESCCALL"; + case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge"; case AArch64ISD::WrapperSmall: return "AArch64ISD::WrapperSmall"; default: return NULL; @@ -1662,17 +1667,26 @@ AArch64TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(); const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); - assert(getTargetMachine().getCodeModel() == CodeModel::Small - && "Only small code model supported at the moment"); - - // The most efficient code is PC-relative anyway for the small memory model, - // so we don't need to worry about relocation model. - return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, - DAG.getTargetBlockAddress(BA, PtrVT, 0, - AArch64II::MO_NO_FLAG), - DAG.getTargetBlockAddress(BA, PtrVT, 0, - AArch64II::MO_LO12), - DAG.getConstant(/*Alignment=*/ 4, MVT::i32)); + switch(getTargetMachine().getCodeModel()) { + case CodeModel::Small: + // The most efficient code is PC-relative anyway for the small memory model, + // so we don't need to worry about relocation model. + return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, + DAG.getTargetBlockAddress(BA, PtrVT, 0, + AArch64II::MO_NO_FLAG), + DAG.getTargetBlockAddress(BA, PtrVT, 0, + AArch64II::MO_LO12), + DAG.getConstant(/*Alignment=*/ 4, MVT::i32)); + case CodeModel::Large: + return DAG.getNode( + AArch64ISD::WrapperLarge, DL, PtrVT, + DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G3), + DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G2_NC), + DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G1_NC), + DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G0_NC)); + default: + llvm_unreachable("Only small and large code models supported now"); + } } @@ -1841,12 +1855,33 @@ AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, } SDValue -AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op, - SelectionDAG &DAG) const { - // TableGen doesn't have easy access to the CodeModel or RelocationModel, so - // we make that distinction here. +AArch64TargetLowering::LowerGlobalAddressELFLarge(SDValue Op, + SelectionDAG &DAG) const { + assert(getTargetMachine().getCodeModel() == CodeModel::Large); + assert(getTargetMachine().getRelocationModel() == Reloc::Static); + + EVT PtrVT = getPointerTy(); + DebugLoc dl = Op.getDebugLoc(); + const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op); + const GlobalValue *GV = GN->getGlobal(); + + SDValue GlobalAddr = DAG.getNode( + AArch64ISD::WrapperLarge, dl, PtrVT, + DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G3), + DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G2_NC), + DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G1_NC), + DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G0_NC)); - // We support the small memory model for now. + if (GN->getOffset() != 0) + return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalAddr, + DAG.getConstant(GN->getOffset(), PtrVT)); + + return GlobalAddr; +} + +SDValue +AArch64TargetLowering::LowerGlobalAddressELFSmall(SDValue Op, + SelectionDAG &DAG) const { assert(getTargetMachine().getCodeModel() == CodeModel::Small); EVT PtrVT = getPointerTy(); @@ -1925,6 +1960,22 @@ AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op, return GlobalRef; } +SDValue +AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op, + SelectionDAG &DAG) const { + // TableGen doesn't have easy access to the CodeModel or RelocationModel, so + // we make those distinctions here. + + switch (getTargetMachine().getCodeModel()) { + case CodeModel::Small: + return LowerGlobalAddressELFSmall(Op, DAG); + case CodeModel::Large: + return LowerGlobalAddressELFLarge(Op, DAG); + default: + llvm_unreachable("Only small and large code models supported now"); + } +} + SDValue AArch64TargetLowering::LowerTLSDescCall(SDValue SymAddr, SDValue DescAddr, DebugLoc DL, @@ -1974,6 +2025,8 @@ AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetELF() && "TLS not implemented for non-ELF targets"); + assert(getTargetMachine().getCodeModel() == CodeModel::Small + && "TLS only supported in small memory model"); const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal()); @@ -2082,14 +2135,27 @@ SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); DebugLoc dl = JT->getDebugLoc(); + EVT PtrVT = getPointerTy(); // When compiling PIC, jump tables get put in the code section so a static // relocation-style is acceptable for both cases. - return DAG.getNode(AArch64ISD::WrapperSmall, dl, getPointerTy(), - DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()), - DAG.getTargetJumpTable(JT->getIndex(), getPointerTy(), - AArch64II::MO_LO12), - DAG.getConstant(1, MVT::i32)); + switch (getTargetMachine().getCodeModel()) { + case CodeModel::Small: + return DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT, + DAG.getTargetJumpTable(JT->getIndex(), PtrVT), + DAG.getTargetJumpTable(JT->getIndex(), PtrVT, + AArch64II::MO_LO12), + DAG.getConstant(1, MVT::i32)); + case CodeModel::Large: + return DAG.getNode( + AArch64ISD::WrapperLarge, dl, PtrVT, + DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G3), + DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G2_NC), + DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G1_NC), + DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G0_NC)); + default: + llvm_unreachable("Only small and large code models supported now"); + } } // (SELECT_CC lhs, rhs, iftrue, iffalse, condcode) @@ -2377,78 +2443,6 @@ static SDValue PerformANDCombine(SDNode *N, DAG.getConstant(LSB + Width - 1, MVT::i64)); } -static SDValue PerformATOMIC_FENCECombine(SDNode *FenceNode, - TargetLowering::DAGCombinerInfo &DCI) { - // An atomic operation followed by an acquiring atomic fence can be reduced to - // an acquiring load. The atomic operation provides a convenient pointer to - // load from. If the original operation was a load anyway we can actually - // combine the two operations into an acquiring load. - SelectionDAG &DAG = DCI.DAG; - SDValue AtomicOp = FenceNode->getOperand(0); - AtomicSDNode *AtomicNode = dyn_cast<AtomicSDNode>(AtomicOp); - - // A fence on its own can't be optimised - if (!AtomicNode) - return SDValue(); - - AtomicOrdering FenceOrder - = static_cast<AtomicOrdering>(FenceNode->getConstantOperandVal(1)); - SynchronizationScope FenceScope - = static_cast<SynchronizationScope>(FenceNode->getConstantOperandVal(2)); - - if (FenceOrder != Acquire || FenceScope != AtomicNode->getSynchScope()) - return SDValue(); - - // If the original operation was an ATOMIC_LOAD then we'll be replacing it, so - // the chain we use should be its input, otherwise we'll put our store after - // it so we use its output chain. - SDValue Chain = AtomicNode->getOpcode() == ISD::ATOMIC_LOAD ? - AtomicNode->getChain() : AtomicOp; - - // We have an acquire fence with a handy atomic operation nearby, we can - // convert the fence into a load-acquire, discarding the result. - DebugLoc DL = FenceNode->getDebugLoc(); - SDValue Op = DAG.getAtomic(ISD::ATOMIC_LOAD, DL, AtomicNode->getMemoryVT(), - AtomicNode->getValueType(0), - Chain, // Chain - AtomicOp.getOperand(1), // Pointer - AtomicNode->getMemOperand(), Acquire, - FenceScope); - - if (AtomicNode->getOpcode() == ISD::ATOMIC_LOAD) - DAG.ReplaceAllUsesWith(AtomicNode, Op.getNode()); - - return Op.getValue(1); -} - -static SDValue PerformATOMIC_STORECombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { - // A releasing atomic fence followed by an atomic store can be combined into a - // single store operation. - SelectionDAG &DAG = DCI.DAG; - AtomicSDNode *AtomicNode = cast<AtomicSDNode>(N); - SDValue FenceOp = AtomicNode->getOperand(0); - - if (FenceOp.getOpcode() != ISD::ATOMIC_FENCE) - return SDValue(); - - AtomicOrdering FenceOrder - = static_cast<AtomicOrdering>(FenceOp->getConstantOperandVal(1)); - SynchronizationScope FenceScope - = static_cast<SynchronizationScope>(FenceOp->getConstantOperandVal(2)); - - if (FenceOrder != Release || FenceScope != AtomicNode->getSynchScope()) - return SDValue(); - - DebugLoc DL = AtomicNode->getDebugLoc(); - return DAG.getAtomic(ISD::ATOMIC_STORE, DL, AtomicNode->getMemoryVT(), - FenceOp.getOperand(0), // Chain - AtomicNode->getOperand(1), // Pointer - AtomicNode->getOperand(2), // Value - AtomicNode->getMemOperand(), Release, - FenceScope); -} - /// For a true bitfield insert, the bits getting into that contiguous mask /// should come from the low part of an existing value: they must be formed from /// a compatible SHL operation (unless they're already low). This function @@ -2804,8 +2798,6 @@ AArch64TargetLowering::PerformDAGCombine(SDNode *N, switch (N->getOpcode()) { default: break; case ISD::AND: return PerformANDCombine(N, DCI); - case ISD::ATOMIC_FENCE: return PerformATOMIC_FENCECombine(N, DCI); - case ISD::ATOMIC_STORE: return PerformATOMIC_STORECombine(N, DCI); case ISD::OR: return PerformORCombine(N, DCI, Subtarget); case ISD::SRA: return PerformSRACombine(N, DCI); } diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 4960d28..d49b3ee 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -103,7 +103,12 @@ namespace AArch64ISD { UBFX, // Wraps an address which the ISelLowering phase has decided should be - // created using the small absolute memory model: i.e. adrp/add or + // created using the large memory model style: i.e. a sequence of four + // movz/movk instructions. + WrapperLarge, + + // Wraps an address which the ISelLowering phase has decided should be + // created using the small memory model style: i.e. adrp/add or // adrp/mem-op. This exists to prevent bare TargetAddresses which may never // get selected. WrapperSmall @@ -206,7 +211,11 @@ public: SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, bool IsSigned) const; + + SDValue LowerGlobalAddressELFSmall(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalAddressELFLarge(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerTLSDescCall(SDValue SymAddr, SDValue DescAddr, DebugLoc DL, SelectionDAG &DAG) const; SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td index cb93471..9dd122f 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // This file describes AArch64 instruction formats, down to the level of the // instruction's overall class. -// ===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 37be5e4..d2cfc7d 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -70,12 +70,20 @@ def A64cmn : PatFrag<(ops node:$lhs, node:$rhs), // made for a variable/address at ISelLowering. // + The output of ISelLowering should be selectable (hence the Wrapper, // rather than a bare target opcode) -def SDTAArch64Wrapper : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, - SDTCisSameAs<1, 2>, - SDTCisVT<3, i32>, - SDTCisPtrTy<0>]>; +def SDTAArch64WrapperLarge : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, + SDTCisSameAs<0, 4>, + SDTCisPtrTy<0>]>; -def A64WrapperSmall : SDNode<"AArch64ISD::WrapperSmall", SDTAArch64Wrapper>; +def A64WrapperLarge :SDNode<"AArch64ISD::WrapperLarge", SDTAArch64WrapperLarge>; + +def SDTAArch64WrapperSmall : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, + SDTCisSameAs<1, 2>, + SDTCisVT<3, i32>, + SDTCisPtrTy<0>]>; + +def A64WrapperSmall :SDNode<"AArch64ISD::WrapperSmall", SDTAArch64WrapperSmall>; def SDTAArch64GOTLoad : SDTypeProfile<1, 1, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; @@ -159,49 +167,55 @@ let Defs = [XSP], Uses = [XSP] in { // Atomic operation pseudo-instructions //===----------------------------------------------------------------------===// -let usesCustomInserter = 1 in { -multiclass AtomicSizes<string opname> { - def _I8 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr), - [(set i32:$dst, (!cast<SDNode>(opname # "_8") i64:$ptr, i32:$incr))]>; - def _I16 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr), - [(set i32:$dst, (!cast<SDNode>(opname # "_16") i64:$ptr, i32:$incr))]>; - def _I32 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr), - [(set i32:$dst, (!cast<SDNode>(opname # "_32") i64:$ptr, i32:$incr))]>; - def _I64 : PseudoInst<(outs GPR64:$dst), (ins GPR64:$ptr, GPR64:$incr), - [(set i64:$dst, (!cast<SDNode>(opname # "_64") i64:$ptr, i64:$incr))]>; -} -} - -defm ATOMIC_LOAD_ADD : AtomicSizes<"atomic_load_add">; -defm ATOMIC_LOAD_SUB : AtomicSizes<"atomic_load_sub">; -defm ATOMIC_LOAD_AND : AtomicSizes<"atomic_load_and">; -defm ATOMIC_LOAD_OR : AtomicSizes<"atomic_load_or">; -defm ATOMIC_LOAD_XOR : AtomicSizes<"atomic_load_xor">; -defm ATOMIC_LOAD_NAND : AtomicSizes<"atomic_load_nand">; -defm ATOMIC_SWAP : AtomicSizes<"atomic_swap">; +// These get selected from C++ code as a pretty much direct translation from the +// generic DAG nodes. The one exception is the AtomicOrdering is added as an +// operand so that the eventual lowering can make use of it and choose +// acquire/release operations when required. + +let usesCustomInserter = 1, hasCtrlDep = 1, mayLoad = 1, mayStore = 1 in { +multiclass AtomicSizes { + def _I8 : PseudoInst<(outs GPR32:$dst), + (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>; + def _I16 : PseudoInst<(outs GPR32:$dst), + (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>; + def _I32 : PseudoInst<(outs GPR32:$dst), + (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>; + def _I64 : PseudoInst<(outs GPR64:$dst), + (ins GPR64xsp:$ptr, GPR64:$incr, i32imm:$ordering), []>; +} +} + +defm ATOMIC_LOAD_ADD : AtomicSizes; +defm ATOMIC_LOAD_SUB : AtomicSizes; +defm ATOMIC_LOAD_AND : AtomicSizes; +defm ATOMIC_LOAD_OR : AtomicSizes; +defm ATOMIC_LOAD_XOR : AtomicSizes; +defm ATOMIC_LOAD_NAND : AtomicSizes; +defm ATOMIC_SWAP : AtomicSizes; let Defs = [NZCV] in { // These operations need a CMP to calculate the correct value - defm ATOMIC_LOAD_MIN : AtomicSizes<"atomic_load_min">; - defm ATOMIC_LOAD_MAX : AtomicSizes<"atomic_load_max">; - defm ATOMIC_LOAD_UMIN : AtomicSizes<"atomic_load_umin">; - defm ATOMIC_LOAD_UMAX : AtomicSizes<"atomic_load_umax">; -} - -let usesCustomInserter = 1, Defs = [NZCV] in { -def ATOMIC_CMP_SWAP_I8 - : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new), - [(set i32:$dst, (atomic_cmp_swap_8 i64:$ptr, i32:$old, i32:$new))]>; -def ATOMIC_CMP_SWAP_I16 - : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new), - [(set i32:$dst, (atomic_cmp_swap_16 i64:$ptr, i32:$old, i32:$new))]>; -def ATOMIC_CMP_SWAP_I32 - : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new), - [(set i32:$dst, (atomic_cmp_swap_32 i64:$ptr, i32:$old, i32:$new))]>; -def ATOMIC_CMP_SWAP_I64 - : PseudoInst<(outs GPR64:$dst), (ins GPR64:$ptr, GPR64:$old, GPR64:$new), - [(set i64:$dst, (atomic_cmp_swap_64 i64:$ptr, i64:$old, i64:$new))]>; + defm ATOMIC_LOAD_MIN : AtomicSizes; + defm ATOMIC_LOAD_MAX : AtomicSizes; + defm ATOMIC_LOAD_UMIN : AtomicSizes; + defm ATOMIC_LOAD_UMAX : AtomicSizes; +} + +class AtomicCmpSwap<RegisterClass GPRData> + : PseudoInst<(outs GPRData:$dst), + (ins GPR64xsp:$ptr, GPRData:$old, GPRData:$new, + i32imm:$ordering), []> { + let usesCustomInserter = 1; + let hasCtrlDep = 1; + let mayLoad = 1; + let mayStore = 1; + let Defs = [NZCV]; } +def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap<GPR32>; +def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap<GPR32>; +def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap<GPR32>; +def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap<GPR64>; + //===----------------------------------------------------------------------===// // Add-subtract (extended register) instructions //===----------------------------------------------------------------------===// @@ -2579,7 +2593,8 @@ defm LDAR : A64I_LRex<"ldar", 0b101>; class acquiring_load<PatFrag base> : PatFrag<(ops node:$ptr), (base node:$ptr), [{ - return cast<AtomicSDNode>(N)->getOrdering() == Acquire; + AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering(); + return Ordering == Acquire || Ordering == SequentiallyConsistent; }]>; def atomic_load_acquire_8 : acquiring_load<atomic_load_8>; @@ -2610,7 +2625,8 @@ class A64I_SLexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs, class releasing_store<PatFrag base> : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ - return cast<AtomicSDNode>(N)->getOrdering() == Release; + AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering(); + return Ordering == Release || Ordering == SequentiallyConsistent; }]>; def atomic_store_release_8 : releasing_store<atomic_store_8>; @@ -3863,7 +3879,7 @@ multiclass movw_operands<string prefix, string instname, int width> { let DiagnosticType = "MOVWUImm16"; } - def _imm : Operand<i32> { + def _imm : Operand<i64> { let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_imm_asmoperand"); let PrintMethod = "printMoveWideImmOperand"; let EncoderMethod = "getMoveWideImmOpValue"; @@ -3934,7 +3950,7 @@ multiclass movalias_operand<string prefix, string basename, # "A64Imms::" # immpredicate # ">"; } - def _movimm : Operand<i32> { + def _movimm : Operand<i64> { let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand"); let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift); @@ -3958,6 +3974,15 @@ def : movalias<MOVZxii, GPR64, movz64_movimm>; def : movalias<MOVNwii, GPR32, movn32_movimm>; def : movalias<MOVNxii, GPR64, movn64_movimm>; +def movw_addressref : ComplexPattern<i64, 2, "SelectMOVWAddressRef">; + +def : Pat<(A64WrapperLarge movw_addressref:$G3, movw_addressref:$G2, + movw_addressref:$G1, movw_addressref:$G0), + (MOVKxii (MOVKxii (MOVKxii (MOVZxii movw_addressref:$G3), + movw_addressref:$G2), + movw_addressref:$G1), + movw_addressref:$G0)>; + //===----------------------------------------------------------------------===// // PC-relative addressing instructions //===----------------------------------------------------------------------===// @@ -4454,8 +4479,6 @@ def : ADRP_ADD<A64WrapperSmall, tjumptable>; // GOT access patterns //===----------------------------------------------------------------------===// -// FIXME: Wibble - class GOTLoadSmall<SDNode addrfrag> : Pat<(A64GOTLoad (A64WrapperSmall addrfrag:$Hi, addrfrag:$Lo12, 8)), (LS64_LDR (ADRPxi addrfrag:$Hi), addrfrag:$Lo12)>; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp b/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp index c96bf85..3d22330 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp @@ -68,6 +68,18 @@ AArch64AsmPrinter::lowerSymbolOperand(const MachineOperand &MO, case AArch64II::MO_TPREL_G0_NC: Expr = AArch64MCExpr::CreateTPREL_G0_NC(Expr, OutContext); break; + case AArch64II::MO_ABS_G3: + Expr = AArch64MCExpr::CreateABS_G3(Expr, OutContext); + break; + case AArch64II::MO_ABS_G2_NC: + Expr = AArch64MCExpr::CreateABS_G2_NC(Expr, OutContext); + break; + case AArch64II::MO_ABS_G1_NC: + Expr = AArch64MCExpr::CreateABS_G1_NC(Expr, OutContext); + break; + case AArch64II::MO_ABS_G0_NC: + Expr = AArch64MCExpr::CreateABS_G0_NC(Expr, OutContext); + break; case AArch64II::MO_NO_FLAG: // Expr is already correct break; diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index b83577a..3b811df 100644 --- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -63,14 +63,15 @@ public: ~AArch64ELFStreamer() {} - virtual void ChangeSection(const MCSection *Section) { + virtual void ChangeSection(const MCSection *Section, + const MCExpr *Subsection) { // We have to keep track of the mapping symbol state of any sections we // use. Each one should start off as EMS_None, which is provided as the // default constructor by DenseMap::lookup. - LastMappingSymbols[getPreviousSection()] = LastEMS; + LastMappingSymbols[getPreviousSection().first] = LastEMS; LastEMS = LastMappingSymbols.lookup(Section); - MCELFStreamer::ChangeSection(Section); + MCELFStreamer::ChangeSection(Section, Subsection); } /// This function is the one used to emit instruction data into the ELF @@ -129,7 +130,7 @@ private: MCELF::SetType(SD, ELF::STT_NOTYPE); MCELF::SetBinding(SD, ELF::STB_LOCAL); SD.setExternal(false); - Symbol->setSection(*getCurrentSection()); + Symbol->setSection(*getCurrentSection().first); const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext()); Symbol->setVariableValue(Value); diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h index c0e3b29..d9798ae 100644 --- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h +++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h @@ -133,6 +133,26 @@ public: return Create(VK_AARCH64_TPREL_G0_NC, Expr, Ctx); } + static const AArch64MCExpr *CreateABS_G3(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_ABS_G3, Expr, Ctx); + } + + static const AArch64MCExpr *CreateABS_G2_NC(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_ABS_G2_NC, Expr, Ctx); + } + + static const AArch64MCExpr *CreateABS_G1_NC(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_ABS_G1_NC, Expr, Ctx); + } + + static const AArch64MCExpr *CreateABS_G0_NC(const MCExpr *Expr, + MCContext &Ctx) { + return Create(VK_AARCH64_ABS_G0_NC, Expr, Ctx); + } + /// @} /// @name Accessors /// @{ diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp index 7960db0..819eead 100644 --- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp @@ -81,6 +81,12 @@ static MCCodeGenInfo *createAArch64MCCodeGenInfo(StringRef TT, Reloc::Model RM, if (CM == CodeModel::Default) CM = CodeModel::Small; + else if (CM == CodeModel::JITDefault) { + // The default MCJIT memory managers make no guarantees about where they can + // find an executable page; JITed code needs to be able to refer to globals + // no matter how far away they are. + CM = CodeModel::Large; + } X->InitMCCodeGenInfo(RM, CM, OL); return X; diff --git a/contrib/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp b/contrib/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp index b8099cb..fc706a4 100644 --- a/contrib/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp +++ b/contrib/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp @@ -19,6 +19,6 @@ using namespace llvm; Target llvm::TheAArch64Target; extern "C" void LLVMInitializeAArch64TargetInfo() { - RegisterTarget<Triple::aarch64> + RegisterTarget<Triple::aarch64, /*HasJIT=*/true> X(TheAArch64Target, "aarch64", "AArch64"); } diff --git a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp index 1678559..bedccb5 100644 --- a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp +++ b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp @@ -981,8 +981,11 @@ bool A64Imms::isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits) { Rotation = RepeatWidth - Rotation; } - uint64_t ReplicatedOnes = (ReplicatedMask >> Rotation) - | ((ReplicatedMask << (RepeatWidth - Rotation)) & RepeatMask); + uint64_t ReplicatedOnes = ReplicatedMask; + if (Rotation != 0 && Rotation != 64) + ReplicatedOnes = (ReplicatedMask >> Rotation) + | ((ReplicatedMask << (RepeatWidth - Rotation)) & RepeatMask); + // Of course, they may not actually be ones, so we have to check that: if (!isMask_64(ReplicatedOnes)) continue; @@ -1051,13 +1054,14 @@ bool A64Imms::isLogicalImmBits(unsigned RegWidth, uint32_t Bits, int Rotation = (ImmR & (Width - 1)); uint64_t Mask = (1ULL << Num1s) - 1; uint64_t WidthMask = Width == 64 ? -1 : (1ULL << Width) - 1; - Mask = (Mask >> Rotation) - | ((Mask << (Width - Rotation)) & WidthMask); + if (Rotation != 0 && Rotation != 64) + Mask = (Mask >> Rotation) + | ((Mask << (Width - Rotation)) & WidthMask); - Imm = 0; - for (unsigned i = 0; i < RegWidth / Width; ++i) { - Imm |= Mask; + Imm = Mask; + for (unsigned i = 1; i < RegWidth / Width; ++i) { Mask <<= Width; + Imm |= Mask; } return true; diff --git a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h index 1b773d6..9a1ca61 100644 --- a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -1037,7 +1037,14 @@ namespace AArch64II { // MO_LO12 - On a symbol operand, this represents a relocation containing // lower 12 bits of the address. Used in add/sub/ldr/str. - MO_LO12 + MO_LO12, + + // MO_ABS_G* - Represent the 16-bit granules of an absolute reference using + // movz/movk instructions. + MO_ABS_G3, + MO_ABS_G2_NC, + MO_ABS_G1_NC, + MO_ABS_G0_NC }; } diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td index 6838084..2d747091 100644 --- a/contrib/llvm/lib/Target/ARM/ARM.td +++ b/contrib/llvm/lib/Target/ARM/ARM.td @@ -59,6 +59,8 @@ def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true", "FP compare + branch is slow">; def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true", "Floating point unit supports single precision only">; +def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true", + "Enable support for TrustZone security extensions">; // Some processors have FP multiply-accumulate instructions that don't // play nicely with other VFP / NEON instructions, and it's generally better @@ -144,29 +146,33 @@ include "ARMSchedule.td" def ProcA5 : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5", "Cortex-A5 ARM processors", [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx, - FeatureVMLxForwarding, FeatureT2XtPk]>; + FeatureVMLxForwarding, FeatureT2XtPk, + FeatureTrustZone]>; def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8", "Cortex-A8 ARM processors", [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx, - FeatureVMLxForwarding, FeatureT2XtPk]>; + FeatureVMLxForwarding, FeatureT2XtPk, + FeatureTrustZone]>; def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9", "Cortex-A9 ARM processors", [FeatureVMLxForwarding, FeatureT2XtPk, FeatureFP16, - FeatureAvoidPartialCPSR]>; + FeatureAvoidPartialCPSR, + FeatureTrustZone]>; def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift", "Swift ARM processors", [FeatureNEONForFP, FeatureT2XtPk, FeatureVFP4, FeatureMP, FeatureHWDiv, FeatureHWDivARM, FeatureAvoidPartialCPSR, FeatureAvoidMOVsShOp, - FeatureHasSlowFPVMLx]>; + FeatureHasSlowFPVMLx, FeatureTrustZone]>; // FIXME: It has not been determined if A15 has these features. def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15", "Cortex-A15 ARM processors", [FeatureT2XtPk, FeatureFP16, - FeatureAvoidPartialCPSR]>; + FeatureAvoidPartialCPSR, + FeatureTrustZone]>; def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5", "Cortex-R5 ARM processors", [FeatureSlowFPBrcc, FeatureHWDivARM, diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 9e68ff4..6005054 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -283,14 +283,20 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, return false; --I; } - if (!isUnpredicatedTerminator(I)) - return false; // Get the last instruction in the block. MachineInstr *LastInst = I; + unsigned LastOpc = LastInst->getOpcode(); + + // Check if it's an indirect branch first, this should return 'unanalyzable' + // even if it's predicated. + if (isIndirectBranchOpcode(LastOpc)) + return true; + + if (!isUnpredicatedTerminator(I)) + return false; // If there is only one terminator instruction, process it. - unsigned LastOpc = LastInst->getOpcode(); if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { if (isUncondBranchOpcode(LastOpc)) { TBB = LastInst->getOperand(0).getMBB(); @@ -747,10 +753,10 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Mov->addRegisterKilled(SrcReg, TRI); } -static const -MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB, - unsigned Reg, unsigned SubIdx, unsigned State, - const TargetRegisterInfo *TRI) { +const MachineInstrBuilder & +ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg, + unsigned SubIdx, unsigned State, + const TargetRegisterInfo *TRI) const { if (!SubIdx) return MIB.addReg(Reg, State); @@ -795,12 +801,22 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA)) - .addFrameIndex(FI)) - .addMemOperand(MMO); - MIB = AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); - AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); + if (Subtarget.hasV5TEOps()) { + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD)); + AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); + AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); + MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO); + + AddDefaultPred(MIB); + } else { + // Fallback to STM instruction, which has existed since the dawn of + // time. + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA)) + .addFrameIndex(FI).addMemOperand(MMO)); + AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); + AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); + } } else llvm_unreachable("Unknown reg class!"); break; @@ -948,7 +964,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); MachineFunction &MF = *MBB.getParent(); - ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); MachineFrameInfo &MFI = *MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); MachineMemOperand *MMO = @@ -975,12 +990,24 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { - unsigned LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA : ARM::LDMIA; - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, I, DL, get(LdmOpc)) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); - MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); - MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); + MachineInstrBuilder MIB; + + if (Subtarget.hasV5TEOps()) { + MIB = BuildMI(MBB, I, DL, get(ARM::LDRD)); + AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); + AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); + MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO); + + AddDefaultPred(MIB); + } else { + // Fallback to LDM instruction, which has existed since the dawn of + // time. + MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDMIA)) + .addFrameIndex(FI).addMemOperand(MMO)); + MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); + } + if (TargetRegisterInfo::isPhysicalRegister(DestReg)) MIB.addReg(DestReg, RegState::ImplicitDefine); } else diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 7c107bb..2ef659c 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -141,6 +141,10 @@ public: MachineInstr *commuteInstruction(MachineInstr*, bool=false) const; + const MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB, unsigned Reg, + unsigned SubIdx, unsigned State, + const TargetRegisterInfo *TRI) const; + virtual bool produceSameValue(const MachineInstr *MI0, const MachineInstr *MI1, const MachineRegisterInfo *MRI) const; diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index b6b27f8..b0d34a7 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -75,6 +75,12 @@ ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID) const { } const uint32_t* +ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const { + return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) + ? CSR_iOS_ThisReturn_RegMask : CSR_AAPCS_ThisReturn_RegMask; +} + +const uint32_t* ARMBaseRegisterInfo::getNoPreservedMask() const { return CSR_NoRegs_RegMask; } diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h index 725033b..0679919 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -96,6 +96,7 @@ public: /// Code Generation virtual methods... const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; const uint32_t *getCallPreservedMask(CallingConv::ID) const; + const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const; const uint32_t *getNoPreservedMask() const; BitVector getReservedRegs(const MachineFunction &MF) const; diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h index e6e8c3d..4f94ad2 100644 --- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h +++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h @@ -74,9 +74,15 @@ static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT, static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 }; static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 }; static const uint16_t ShadowRegList[] = { ARM::R0, ARM::R1 }; + static const uint16_t GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2); if (Reg == 0) { + + // If we had R3 unallocated only, now we still must to waste it. + Reg = State.AllocateReg(GPRArgRegs, 4); + assert((!Reg || Reg == ARM::R3) && "Wrong GPRs usage for f64"); + // For the 2nd half of a v2f64, do not just fail. if (CanFail) return false; diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td index b378b96..8ff666e 100644 --- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td +++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td @@ -111,8 +111,7 @@ def CC_ARM_AAPCS_Common : CallingConv<[ // i64 is 8-aligned i32 here, so we may need to eat R1 as a pad register // (and the same is true for f64 if VFP is not enabled) CCIfType<[i32], CCIfAlign<"8", CCAssignToRegWithShadow<[R0, R2], [R0, R1]>>>, - CCIfType<[i32], CCIf<"State.getNextStackOffset() == 0 &&" - "ArgFlags.getOrigAlign() != 8", + CCIfType<[i32], CCIf<"ArgFlags.getOrigAlign() != 8", CCAssignToReg<[R0, R1, R2, R3]>>>, CCIfType<[i32], CCIfAlign<"8", CCAssignToStackWithShadow<4, 8, R3>>>, @@ -195,10 +194,21 @@ def CSR_NoRegs : CalleeSavedRegs<(add)>; def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4, (sequence "D%u", 15, 8))>; +// Constructors and destructors return 'this' in the ARM C++ ABI; since 'this' +// and the pointer return value are both passed in R0 in these cases, this can +// be partially modelled by treating R0 as a callee-saved register +// Only the resulting RegMask is used; the SaveList is ignored +def CSR_AAPCS_ThisReturn : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, + R5, R4, (sequence "D%u", 15, 8), + R0)>; + // iOS ABI deviates from ARM standard ABI. R9 is not a callee-saved register. // Also save R7-R4 first to match the stack frame fixed spill areas. def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>; +def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4, + (sub CSR_AAPCS_ThisReturn, R9))>; + // GHC set of callee saved regs is empty as all those regs are // used for passing STG regs around // add is a workaround for not being able to compile empty list: diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp index 29fcd40..5d45f64 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -144,8 +144,8 @@ class ARMFastISel : public FastISel { virtual bool TargetSelectInstruction(const Instruction *I); virtual unsigned TargetMaterializeConstant(const Constant *C); virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI); - virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo, - const LoadInst *LI); + virtual bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, + const LoadInst *LI); virtual bool FastLowerArguments(); private: #include "ARMGenFastISel.inc" @@ -2605,7 +2605,7 @@ unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned Opc; bool isBoolZext = false; - const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::i32); + const TargetRegisterClass *RC; switch (SrcVT.SimpleTy) { default: return 0; case MVT::i16: @@ -2797,12 +2797,12 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { return false; } -/// TryToFoldLoad - The specified machine instr operand is a vreg, and that +/// \brief The specified machine instr operand is a vreg, and that /// vreg is being provided by the specified load instruction. If possible, /// try to fold the load as an operand to the instruction, returning true if /// successful. -bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, - const LoadInst *LI) { +bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, + const LoadInst *LI) { // Verify we have a legal type before going any further. MVT VT; if (!isLoadTypeLegal(LI->getType(), VT)) diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp index 7a02adf..483802b 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -141,7 +141,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { assert(!AFI->isThumb1OnlyFunction() && "This emitPrologue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); - unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); unsigned NumBytes = MFI->getStackSize(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); @@ -159,8 +159,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { return; // Allocate the vararg register save area. This is not counted in NumBytes. - if (VARegSaveSize) - emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize, + if (ArgRegsSaveSize) + emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize, MachineInstr::FrameSetup); if (!AFI->hasStackFrame()) { @@ -357,7 +357,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, "This emitEpilogue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); - unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); int NumBytes = (int)MFI->getStackSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); @@ -471,8 +471,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, MBBI = NewMI; } - if (VARegSaveSize) - emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize); + if (ArgRegsSaveSize) + emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize); } /// getFrameIndexReference - Provide a base+offset reference to an FI slot for @@ -1003,7 +1003,7 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - bool isVarArg = AFI->getVarArgsRegSaveSize() > 0; + bool isVarArg = AFI->getArgRegsSaveSize() > 0; unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs(); // The emitPopInst calls below do not insert reloads for the aligned DPRCS2 @@ -1174,7 +1174,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, if (AFI->isThumb1OnlyFunction()) { // Spill LR if Thumb1 function uses variable length argument lists. - if (AFI->getVarArgsRegSaveSize() > 0) + if (AFI->getArgRegsSaveSize() > 0) MRI.setPhysRegUsed(ARM::LR); // Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 2c51de2..9e1782e 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1469,14 +1469,14 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) { SDValue Ops[]= { Base, AMOpc, getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), Chain }; return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, - MVT::i32, MVT::Other, Ops, 5); + MVT::i32, MVT::Other, Ops); } else { SDValue Chain = LD->getChain(); SDValue Base = LD->getBasePtr(); SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), Chain }; return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, - MVT::i32, MVT::Other, Ops, 6); + MVT::i32, MVT::Other, Ops); } } @@ -1525,7 +1525,7 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) { SDValue Ops[]= { Base, Offset, getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), Chain }; return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, MVT::i32, - MVT::Other, Ops, 5); + MVT::Other, Ops); } return NULL; @@ -1539,7 +1539,7 @@ SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32); SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, MVT::i32); const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; - return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5); + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); } /// \brief Form a D register from a pair of S registers. @@ -1550,7 +1550,7 @@ SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32); SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32); const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; - return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5); + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); } /// \brief Form a quad register from a pair of D registers. @@ -1560,7 +1560,7 @@ SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32); SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32); const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; - return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5); + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); } /// \brief Form 4 consecutive D registers from a pair of Q registers. @@ -1570,7 +1570,7 @@ SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32); SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32); const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; - return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5); + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); } /// \brief Form 4 consecutive S registers. @@ -1585,7 +1585,7 @@ SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, MVT::i32); const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 }; - return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9); + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); } /// \brief Form 4 consecutive D registers. @@ -1599,7 +1599,7 @@ SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32); const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 }; - return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9); + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); } /// \brief Form 4 consecutive Q registers. @@ -1613,7 +1613,7 @@ SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, MVT::i32); const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 }; - return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9); + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); } /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand @@ -1761,7 +1761,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, Ops.push_back(Pred); Ops.push_back(Reg0); Ops.push_back(Chain); - VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size()); + VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); } else { // Otherwise, quad registers are loaded with two separate instructions, @@ -1774,7 +1774,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain }; SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, - ResTy, AddrTy, MVT::Other, OpsA, 7); + ResTy, AddrTy, MVT::Other, OpsA); Chain = SDValue(VLdA, 2); // Load the odd subregs. @@ -1791,8 +1791,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, Ops.push_back(Pred); Ops.push_back(Reg0); Ops.push_back(Chain); - VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, - Ops.data(), Ops.size()); + VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops); } // Transfer memoperands. @@ -1913,8 +1912,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, Ops.push_back(Pred); Ops.push_back(Reg0); Ops.push_back(Chain); - SDNode *VSt = - CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size()); + SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); // Transfer memoperands. cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1); @@ -1939,7 +1937,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain }; SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, MemAddr.getValueType(), - MVT::Other, OpsA, 7); + MVT::Other, OpsA); cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1); Chain = SDValue(VStA, 1); @@ -1958,7 +1956,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, Ops.push_back(Reg0); Ops.push_back(Chain); SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, - Ops.data(), Ops.size()); + Ops); cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1); return VStB; } @@ -2063,8 +2061,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : QOpcodes[OpcodeIndex]); - SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, - Ops.data(), Ops.size()); + SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1); if (!IsLoad) return VLdLn; @@ -2150,8 +2147,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, if (isUpdating) ResTys.push_back(MVT::i32); ResTys.push_back(MVT::Other); - SDNode *VLdDup = - CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size()); + SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1); SuperReg = SDValue(VLdDup, 0); @@ -2197,7 +2193,7 @@ SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, Ops.push_back(N->getOperand(FirstTblReg + NumVecs)); Ops.push_back(getAL(CurDAG)); // predicate Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register - return CurDAG->getMachineNode(Opc, dl, VT, Ops.data(), Ops.size()); + return CurDAG->getMachineNode(Opc, dl, VT, Ops); } SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, @@ -2542,7 +2538,7 @@ SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), MVT::i32, MVT::i32, MVT::Other, - Ops.data() ,Ops.size()); + Ops); cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1); return ResNode; } @@ -2599,7 +2595,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() }; ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other, - Ops, 4); + Ops); } else { SDValue Ops[] = { CPIdx, @@ -2609,7 +2605,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { CurDAG->getEntryNode() }; ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, - Ops, 5); + Ops); } ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0)); return NULL; @@ -2719,7 +2715,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { MVT::i32); SDValue Ops[] = { N0.getOperand(0), Imm16, getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) }; - return CurDAG->getMachineNode(Opc, dl, VT, Ops, 4); + return CurDAG->getMachineNode(Opc, dl, VT, Ops); } } break; @@ -2733,16 +2729,15 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { break; if (Subtarget->isThumb()) { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), - getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), - CurDAG->getRegister(0, MVT::i32) }; - return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32,Ops,4); + getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) }; + return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops); } else { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), CurDAG->getRegister(0, MVT::i32) }; return CurDAG->getMachineNode(Subtarget->hasV6Ops() ? ARM::UMULL : ARM::UMULLv5, - dl, MVT::i32, MVT::i32, Ops, 5); + dl, MVT::i32, MVT::i32, Ops); } } case ISD::SMUL_LOHI: { @@ -2751,14 +2746,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { if (Subtarget->isThumb()) { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) }; - return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32,Ops,4); + return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops); } else { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), CurDAG->getRegister(0, MVT::i32) }; return CurDAG->getMachineNode(Subtarget->hasV6Ops() ? ARM::SMULL : ARM::SMULLv5, - dl, MVT::i32, MVT::i32, Ops, 5); + dl, MVT::i32, MVT::i32, Ops); } } case ARMISD::UMLAL:{ @@ -2766,7 +2761,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32)}; - return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops, 6); + return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops); }else{ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3), getAL(CurDAG), @@ -2774,7 +2769,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { CurDAG->getRegister(0, MVT::i32) }; return CurDAG->getMachineNode(Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, - dl, MVT::i32, MVT::i32, Ops, 7); + dl, MVT::i32, MVT::i32, Ops); } } case ARMISD::SMLAL:{ @@ -2782,7 +2777,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32)}; - return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops, 6); + return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops); }else{ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3), getAL(CurDAG), @@ -2790,7 +2785,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { CurDAG->getRegister(0, MVT::i32) }; return CurDAG->getMachineNode(Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, - dl, MVT::i32, MVT::i32, Ops, 7); + dl, MVT::i32, MVT::i32, Ops); } } case ISD::LOAD: { @@ -2833,7 +2828,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { MVT::i32); SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag }; SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, - MVT::Glue, Ops, 5); + MVT::Glue, Ops); Chain = SDValue(ResNode, 0); if (N->getNumValues() == 2) { InFlag = SDValue(ResNode, 1); @@ -2863,7 +2858,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Pred = getAL(CurDAG); SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; - return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4); + return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops); } case ARMISD::VUZP: { unsigned Opc = 0; @@ -2883,7 +2878,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Pred = getAL(CurDAG); SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; - return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4); + return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops); } case ARMISD::VTRN: { unsigned Opc = 0; @@ -2902,7 +2897,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Pred = getAL(CurDAG); SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; - return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4); + return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops); } case ARMISD::BUILD_VECTOR: { EVT VecVT = N->getValueType(0); @@ -3147,8 +3142,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { Ops.push_back(getAL(CurDAG)); Ops.push_back(CurDAG->getRegister(0, MVT::i32)); Ops.push_back(Chain); - SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(), - Ops.size()); + SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); // Transfer memoperands. MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); @@ -3211,8 +3205,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { unsigned NewOpc = isThumb ? ARM::t2STREXD : ARM::STREXD; - SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(), - Ops.size()); + SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); // Transfer memoperands. MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); @@ -3398,7 +3391,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { Ops.push_back(N->getOperand(1)); Ops.push_back(getAL(CurDAG)); // Predicate Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register - return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops.data(), Ops.size()); + return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops); } case ARMISD::VTBL2: { DebugLoc dl = N->getDebugLoc(); @@ -3414,8 +3407,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { Ops.push_back(N->getOperand(2)); Ops.push_back(getAL(CurDAG)); // Predicate Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register - return CurDAG->getMachineNode(ARM::VTBL2, dl, VT, - Ops.data(), Ops.size()); + return CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops); } case ISD::CONCAT_VECTORS: diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp index bb26090..e49cfc4 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -729,7 +729,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) (Subtarget->hasV6Ops() && !Subtarget->isThumb())) { // membarrier needs custom lowering; the rest are legal and handled // normally. - setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); // Custom lowering for 64-bit ops setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom); @@ -747,7 +746,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setInsertFencesForAtomic(true); } else { // Set them all for expansion, which will force libcalls. - setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); @@ -765,8 +763,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // Unordered/Monotonic case. setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); - // Since the libcalls include locking, fold in the fences - setShouldFoldAtomicFences(true); } setOperationAction(ISD::PREFETCH, MVT::Other, Custom); @@ -1238,7 +1234,8 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const { + SmallVectorImpl<SDValue> &InVals, + bool isThisReturn, SDValue ThisVal) const { // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; @@ -1252,6 +1249,15 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign VA = RVLocs[i]; + // Pass 'this' value directly from the argument to return value, to avoid + // reg unit interference + if (i == 0 && isThisReturn) { + assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 && + "unexpected return calling convention register assignment"); + InVals.push_back(ThisVal); + continue; + } + SDValue Val; if (VA.needsCustom()) { // Handle f64 or half of a v2f64. @@ -1363,21 +1369,22 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool isVarArg = CLI.IsVarArg; MachineFunction &MF = DAG.getMachineFunction(); - bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); - bool IsSibCall = false; + bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); + bool isThisReturn = false; + bool isSibCall = false; // Disable tail calls if they're not supported. if (!EnableARMTailCalls && !Subtarget->supportsTailCall()) isTailCall = false; if (isTailCall) { // Check if it's really possible to do a tail call. isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, - isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(), + isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(), Outs, OutVals, Ins, DAG); // We don't support GuaranteedTailCallOpt for ARM, only automatically // detected sibcalls. if (isTailCall) { ++NumTailCalls; - IsSibCall = true; + isSibCall = true; } } @@ -1393,12 +1400,12 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, unsigned NumBytes = CCInfo.getNextStackOffset(); // For tail calls, memory operands are available in our caller's stack. - if (IsSibCall) + if (isSibCall) NumBytes = 0; // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass - if (!IsSibCall) + if (!isSibCall) Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); @@ -1460,6 +1467,13 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, StackPtr, MemOpChains, Flags); } } else if (VA.isRegLoc()) { + if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i32) { + assert(VA.getLocVT() == MVT::i32 && + "unexpected calling convention register assignment"); + assert(!Ins.empty() && Ins[0].VT == MVT::i32 && + "unexpected use of 'returned'"); + isThisReturn = true; + } RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } else if (isByVal) { assert(VA.isMemLoc()); @@ -1467,10 +1481,17 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // True if this byval aggregate will be split between registers // and memory. - if (CCInfo.isFirstByValRegValid()) { + unsigned ByValArgsCount = CCInfo.getInRegsParamsCount(); + unsigned CurByValIdx = CCInfo.getInRegsParamsProceed(); + + if (CurByValIdx < ByValArgsCount) { + + unsigned RegBegin, RegEnd; + CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); unsigned int i, j; - for (i = 0, j = CCInfo.getFirstByValReg(); j < ARM::R4; i++, j++) { + for (i = 0, j = RegBegin; j < RegEnd; i++, j++) { SDValue Const = DAG.getConstant(4*i, MVT::i32); SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, @@ -1479,11 +1500,15 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(j, Load)); } - offset = ARM::R4 - CCInfo.getFirstByValReg(); - CCInfo.clearFirstByValReg(); + + // If parameter size outsides register area, "offset" value + // helps us to calculate stack slot for remained part properly. + offset = RegEnd - RegBegin; + + CCInfo.nextInRegsParam(); } - if (Flags.getByValSize() - 4*offset > 0) { + if (Flags.getByValSize() > 4*offset) { unsigned LocMemOffset = VA.getLocMemOffset(); SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset); SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, @@ -1499,7 +1524,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, Ops, array_lengthof(Ops))); } - } else if (!IsSibCall) { + } else if (!isSibCall) { assert(VA.isMemLoc()); MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, @@ -1539,7 +1564,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } - InFlag =SDValue(); + InFlag = SDValue(); } // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every @@ -1680,8 +1705,15 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, RegsToPass[i].second.getValueType())); // Add a register mask operand representing the call-preserved registers. + const uint32_t *Mask; const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); - const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); + const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI); + if (isThisReturn) + // For 'this' returns, use the R0-preserving mask + Mask = ARI->getThisReturnPreservedMask(CallConv); + else + Mask = ARI->getCallPreservedMask(CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); @@ -1703,8 +1735,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Handle result values, copying them out of physregs into vregs that we // return. - return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, - dl, DAG, InVals); + return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, + InVals, isThisReturn, + isThisReturn ? OutVals[0] : SDValue()); } /// HandleByVal - Every parameter *after* a byval parameter is passed @@ -1718,8 +1751,24 @@ ARMTargetLowering::HandleByVal( assert((State->getCallOrPrologue() == Prologue || State->getCallOrPrologue() == Call) && "unhandled ParmContext"); - if ((!State->isFirstByValRegValid()) && - (ARM::R0 <= reg) && (reg <= ARM::R3)) { + + // For in-prologue parameters handling, we also introduce stack offset + // for byval registers: see CallingConvLower.cpp, CCState::HandleByVal. + // This behaviour outsides AAPCS rules (5.5 Parameters Passing) of how + // NSAA should be evaluted (NSAA means "next stacked argument address"). + // So: NextStackOffset = NSAAOffset + SizeOfByValParamsStoredInRegs. + // Then: NSAAOffset = NextStackOffset - SizeOfByValParamsStoredInRegs. + unsigned NSAAOffset = State->getNextStackOffset(); + if (State->getCallOrPrologue() != Call) { + for (unsigned i = 0, e = State->getInRegsParamsCount(); i != e; ++i) { + unsigned RB, RE; + State->getInRegsParamInfo(i, RB, RE); + assert(NSAAOffset >= (RE-RB)*4 && + "Stack offset for byval regs doesn't introduced anymore?"); + NSAAOffset -= (RE-RB)*4; + } + } + if ((ARM::R0 <= reg) && (reg <= ARM::R3)) { if (Subtarget->isAAPCS_ABI() && Align > 4) { unsigned AlignInRegs = Align / 4; unsigned Waste = (ARM::R4 - reg) % AlignInRegs; @@ -1727,22 +1776,45 @@ ARMTargetLowering::HandleByVal( reg = State->AllocateReg(GPRArgRegs, 4); } if (reg != 0) { - State->setFirstByValReg(reg); + unsigned excess = 4 * (ARM::R4 - reg); + + // Special case when NSAA != SP and parameter size greater than size of + // all remained GPR regs. In that case we can't split parameter, we must + // send it to stack. We also must set NCRN to R4, so waste all + // remained registers. + if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) { + while (State->AllocateReg(GPRArgRegs, 4)) + ; + return; + } + + // First register for byval parameter is the first register that wasn't + // allocated before this method call, so it would be "reg". + // If parameter is small enough to be saved in range [reg, r4), then + // the end (first after last) register would be reg + param-size-in-regs, + // else parameter would be splitted between registers and stack, + // end register would be r4 in this case. + unsigned ByValRegBegin = reg; + unsigned ByValRegEnd = (size < excess) ? reg + size/4 : ARM::R4; + State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd); + // Note, first register is allocated in the beginning of function already, + // allocate remained amount of registers we need. + for (unsigned i = reg+1; i != ByValRegEnd; ++i) + State->AllocateReg(GPRArgRegs, 4); // At a call site, a byval parameter that is split between // registers and memory needs its size truncated here. In a // function prologue, such byval parameters are reassembled in // memory, and are not truncated. if (State->getCallOrPrologue() == Call) { - unsigned excess = 4 * (ARM::R4 - reg); - assert(size >= excess && "expected larger existing stack allocation"); - size -= excess; + // Make remained size equal to 0 in case, when + // the whole structure may be stored into registers. + if (size < excess) + size = 0; + else + size -= excess; } } } - // Confiscate any remaining parameter registers to preclude their - // assignment to subsequent parameters. - while (State->AllocateReg(GPRArgRegs, 4)) - ; } /// MatchingStackOffset - Return true if the given stack call argument is @@ -1874,7 +1946,7 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // local frame. const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction(). getInfo<ARMFunctionInfo>(); - if (AFI_Caller->getVarArgsRegSaveSize()) + if (AFI_Caller->getArgRegsSaveSize()) return false; // If the callee takes no arguments then go on to check the results of the @@ -2461,35 +2533,6 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, } } -static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG, - const ARMSubtarget *Subtarget) { - DebugLoc dl = Op.getDebugLoc(); - if (!Subtarget->hasDataBarrier()) { - // Some ARMv6 cpus can support data barriers with an mcr instruction. - // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get - // here. - assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() && - "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); - return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), - DAG.getConstant(0, MVT::i32)); - } - - SDValue Op5 = Op.getOperand(5); - bool isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue() != 0; - unsigned isLL = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); - unsigned isLS = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); - bool isOnlyStoreBarrier = (isLL == 0 && isLS == 0); - - ARM_MB::MemBOpt DMBOpt; - if (isDeviceBarrier) - DMBOpt = isOnlyStoreBarrier ? ARM_MB::ST : ARM_MB::SY; - else - DMBOpt = isOnlyStoreBarrier ? ARM_MB::ISHST : ARM_MB::ISH; - return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), - DAG.getConstant(DMBOpt, MVT::i32)); -} - - static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { // FIXME: handle "fence singlethread" more efficiently. @@ -2586,12 +2629,16 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, void ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, - unsigned &VARegSize, unsigned &VARegSaveSize) + unsigned InRegsParamRecordIdx, + unsigned &ArgRegsSize, + unsigned &ArgRegsSaveSize) const { unsigned NumGPRs; - if (CCInfo.isFirstByValRegValid()) - NumGPRs = ARM::R4 - CCInfo.getFirstByValReg(); - else { + if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) { + unsigned RBegin, REnd; + CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd); + NumGPRs = REnd - RBegin; + } else { unsigned int firstUnalloced; firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs, sizeof(GPRArgRegs) / @@ -2600,8 +2647,8 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, } unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); - VARegSize = NumGPRs * 4; - VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1); + ArgRegsSize = NumGPRs * 4; + ArgRegsSaveSize = (ArgRegsSize + Align - 1) & ~(Align - 1); } // The remaining GPRs hold either the beginning of variable-argument @@ -2611,40 +2658,60 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, // If this is a variadic function, the va_list pointer will begin with // these values; otherwise, this reassembles a (byval) structure that // was split between registers and memory. -void -ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, - DebugLoc dl, SDValue &Chain, - const Value *OrigArg, - unsigned OffsetFromOrigArg, - unsigned ArgOffset, - bool ForceMutable) const { +// Return: The frame index registers were stored into. +int +ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, + DebugLoc dl, SDValue &Chain, + const Value *OrigArg, + unsigned InRegsParamRecordIdx, + unsigned OffsetFromOrigArg, + unsigned ArgOffset, + bool ForceMutable) const { + + // Currently, two use-cases possible: + // Case #1. Non var-args function, and we meet first byval parameter. + // Setup first unallocated register as first byval register; + // eat all remained registers + // (these two actions are performed by HandleByVal method). + // Then, here, we initialize stack frame with + // "store-reg" instructions. + // Case #2. Var-args function, that doesn't contain byval parameters. + // The same: eat all remained unallocated registers, + // initialize stack frame. + MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - unsigned firstRegToSaveIndex; - if (CCInfo.isFirstByValRegValid()) - firstRegToSaveIndex = CCInfo.getFirstByValReg() - ARM::R0; - else { + unsigned firstRegToSaveIndex, lastRegToSaveIndex; + unsigned RBegin, REnd; + if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) { + CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd); + firstRegToSaveIndex = RBegin - ARM::R0; + lastRegToSaveIndex = REnd - ARM::R0; + } else { firstRegToSaveIndex = CCInfo.getFirstUnallocated (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0])); + lastRegToSaveIndex = 4; } - unsigned VARegSize, VARegSaveSize; - computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize); - if (VARegSaveSize) { - // If this function is vararg, store any remaining integer argument regs - // to their spots on the stack so that they may be loaded by deferencing - // the result of va_next. - AFI->setVarArgsRegSaveSize(VARegSaveSize); - AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(VARegSaveSize, - ArgOffset + VARegSaveSize - - VARegSize, - false)); - SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), - getPointerTy()); + unsigned ArgRegsSize, ArgRegsSaveSize; + computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgRegsSize, ArgRegsSaveSize); + + // Store any by-val regs to their spots on the stack so that they may be + // loaded by deferencing the result of formal parameter pointer or va_next. + // Note: once stack area for byval/varargs registers + // was initialized, it can't be initialized again. + if (ArgRegsSaveSize) { + + int FrameIndex = MFI->CreateFixedObject( + ArgRegsSaveSize, + ArgOffset + ArgRegsSaveSize - ArgRegsSize, + false); + SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy()); SmallVector<SDValue, 4> MemOps; - for (unsigned i = 0; firstRegToSaveIndex < 4; ++firstRegToSaveIndex, ++i) { + for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex; + ++firstRegToSaveIndex, ++i) { const TargetRegisterClass *RC; if (AFI->isThumb1OnlyFunction()) RC = &ARM::tGPRRegClass; @@ -2661,13 +2728,37 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, DAG.getConstant(4, getPointerTy())); } + + AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize()); + if (!MemOps.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOps[0], MemOps.size()); + return FrameIndex; } else // This will point to the next argument passed via stack. - AFI->setVarArgsFrameIndex( - MFI->CreateFixedObject(4, ArgOffset, !ForceMutable)); + return MFI->CreateFixedObject(4, ArgOffset, !ForceMutable); +} + +// Setup stack frame, the va_list pointer will start from. +void +ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, + DebugLoc dl, SDValue &Chain, + unsigned ArgOffset, + bool ForceMutable) const { + MachineFunction &MF = DAG.getMachineFunction(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + + // Try to store any remaining integer argument regs + // to their spots on the stack so that they may be loaded by deferencing + // the result of va_next. + // If there is no regs to be stored, just point address after last + // argument passed via stack. + int FrameIndex = + StoreByValRegs(CCInfo, DAG, dl, Chain, 0, CCInfo.getInRegsParamsCount(), + 0, ArgOffset, ForceMutable); + + AFI->setVarArgsFrameIndex(FrameIndex); } SDValue @@ -2696,6 +2787,12 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, SDValue ArgValue; Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin(); unsigned CurArgIdx = 0; + + // Initially ArgRegsSaveSize is zero. + // Then we increase this value each time we meet byval parameter. + // We also increase this value in case of varargs function. + AFI->setArgRegsSaveSize(0); + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx); @@ -2793,20 +2890,15 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // Since they could be overwritten by lowering of arguments in case of // a tail call. if (Flags.isByVal()) { - ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - if (!AFI->getVarArgsFrameIndex()) { - VarArgStyleRegisters(CCInfo, DAG, - dl, Chain, CurOrigArg, - Ins[VA.getValNo()].PartOffset, - VA.getLocMemOffset(), - true /*force mutable frames*/); - int VAFrameIndex = AFI->getVarArgsFrameIndex(); - InVals.push_back(DAG.getFrameIndex(VAFrameIndex, getPointerTy())); - } else { - int FI = MFI->CreateFixedObject(Flags.getByValSize(), - VA.getLocMemOffset(), false); - InVals.push_back(DAG.getFrameIndex(FI, getPointerTy())); - } + unsigned CurByValIndex = CCInfo.getInRegsParamsProceed(); + int FrameIndex = StoreByValRegs( + CCInfo, DAG, dl, Chain, CurOrigArg, + CurByValIndex, + Ins[VA.getValNo()].PartOffset, + VA.getLocMemOffset(), + true /*force mutable frames*/); + InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy())); + CCInfo.nextInRegsParam(); } else { int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, VA.getLocMemOffset(), true); @@ -2824,7 +2916,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // varargs if (isVarArg) - VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0, 0, + VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset()); return Chain; @@ -5165,6 +5257,23 @@ static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) { return false; } +static EVT getExtensionTo64Bits(const EVT &OrigVT) { + if (OrigVT.getSizeInBits() >= 64) + return OrigVT; + + assert(OrigVT.isSimple() && "Expecting a simple value type"); + + MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy; + switch (OrigSimpleTy) { + default: llvm_unreachable("Unexpected Vector Type"); + case MVT::v2i8: + case MVT::v2i16: + return MVT::v2i32; + case MVT::v4i8: + return MVT::v4i16; + } +} + /// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total /// value size to 64 bits. We need a 64-bit D register as an operand to VMULL. /// We insert the required extension here to get the vector to fill a D register. @@ -5180,18 +5289,8 @@ static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, return N; // Must extend size to at least 64 bits to be used as an operand for VMULL. - MVT::SimpleValueType OrigSimpleTy = OrigTy.getSimpleVT().SimpleTy; - EVT NewVT; - switch (OrigSimpleTy) { - default: llvm_unreachable("Unexpected Orig Vector Type"); - case MVT::v2i8: - case MVT::v2i16: - NewVT = MVT::v2i32; - break; - case MVT::v4i8: - NewVT = MVT::v4i16; - break; - } + EVT NewVT = getExtensionTo64Bits(OrigTy); + return DAG.getNode(ExtOpcode, N->getDebugLoc(), NewVT, N); } @@ -5201,22 +5300,22 @@ static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, /// reach a total size of 64 bits. We have to add the extension separately /// because ARM does not have a sign/zero extending load for vectors. static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) { - SDValue NonExtendingLoad = - DAG.getLoad(LD->getMemoryVT(), LD->getDebugLoc(), LD->getChain(), + EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT()); + + // The load already has the right type. + if (ExtendedTy == LD->getMemoryVT()) + return DAG.getLoad(LD->getMemoryVT(), LD->getDebugLoc(), LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), LD->getAlignment()); - unsigned ExtOp = 0; - switch (LD->getExtensionType()) { - default: llvm_unreachable("Unexpected LoadExtType"); - case ISD::EXTLOAD: - case ISD::SEXTLOAD: ExtOp = ISD::SIGN_EXTEND; break; - case ISD::ZEXTLOAD: ExtOp = ISD::ZERO_EXTEND; break; - } - MVT::SimpleValueType MemType = LD->getMemoryVT().getSimpleVT().SimpleTy; - MVT::SimpleValueType ExtType = LD->getValueType(0).getSimpleVT().SimpleTy; - return AddRequiredExtensionForVMULL(NonExtendingLoad, DAG, - MemType, ExtType, ExtOp); + + // We need to create a zextload/sextload. We cannot just create a load + // followed by a zext/zext node because LowerMUL is also run during normal + // operation legalization where we can't create illegal types. + return DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), ExtendedTy, + LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), + LD->getMemoryVT(), LD->isVolatile(), + LD->isNonTemporal(), LD->getAlignment()); } /// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND, @@ -5614,7 +5713,6 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::BR_CC: return LowerBR_CC(Op, DAG); case ISD::BR_JT: return LowerBR_JT(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); - case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget); case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget); case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget); case ISD::SINT_TO_FP: diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h index 9ee17f0..426010e 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h @@ -464,7 +464,8 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const; + SmallVectorImpl<SDValue> &InVals, + bool isThisReturn, SDValue ThisVal) const; virtual SDValue LowerFormalArguments(SDValue Chain, @@ -473,16 +474,23 @@ namespace llvm { DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; + int StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, + DebugLoc dl, SDValue &Chain, + const Value *OrigArg, + unsigned InRegsParamRecordIdx, + unsigned OffsetFromOrigArg, + unsigned ArgOffset, + bool ForceMutable) const; + void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, DebugLoc dl, SDValue &Chain, - const Value *OrigArg, - unsigned OffsetFromOrigArg, unsigned ArgOffset, - bool ForceMutable = false) - const; + bool ForceMutable = false) const; void computeRegArea(CCState &CCInfo, MachineFunction &MF, - unsigned &VARegSize, unsigned &VARegSaveSize) const; + unsigned InRegsParamRecordIdx, + unsigned &ArgRegsSize, + unsigned &ArgRegsSaveSize) const; virtual SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td index 11550c5..1bd174e 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -221,6 +221,9 @@ def HasDB : Predicate<"Subtarget->hasDataBarrier()">, def HasMP : Predicate<"Subtarget->hasMPExtension()">, AssemblerPredicate<"FeatureMP", "mp-extensions">; +def HasTrustZone : Predicate<"Subtarget->hasTrustZone()">, + AssemblerPredicate<"FeatureTrustZone", + "TrustZone">; def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">; def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">; def IsThumb : Predicate<"Subtarget->isThumb()">, @@ -578,6 +581,17 @@ def imm0_1 : Operand<i32> { let ParserMatchClass = Imm0_1AsmOperand; } def Imm0_3AsmOperand: ImmAsmOperand { let Name = "Imm0_3"; } def imm0_3 : Operand<i32> { let ParserMatchClass = Imm0_3AsmOperand; } +/// imm0_4 predicate - Immediate in the range [0,4]. +def Imm0_4AsmOperand : ImmAsmOperand +{ + let Name = "Imm0_4"; + let DiagnosticType = "ImmRange0_4"; +} +def imm0_4 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 5; }]> { + let ParserMatchClass = Imm0_4AsmOperand; + let DecoderMethod = "DecodeImm0_4"; +} + /// imm0_7 predicate - Immediate in the range [0,7]. def Imm0_7AsmOperand: ImmAsmOperand { let Name = "Imm0_7"; } def imm0_7 : Operand<i32>, ImmLeaf<i32, [{ @@ -741,18 +755,26 @@ def imm1_16 : Operand<i32>, PatLeaf<(imm), [{ return Imm > 0 && Imm <= 16; }], // addrmode_imm12 := reg +/- imm12 // def MemImm12OffsetAsmOperand : AsmOperandClass { let Name = "MemImm12Offset"; } -def addrmode_imm12 : Operand<i32>, +class AddrMode_Imm12 : Operand<i32>, ComplexPattern<i32, 2, "SelectAddrModeImm12", []> { // 12-bit immediate operand. Note that instructions using this encode // #0 and #-0 differently. We flag #-0 as the magic value INT32_MIN. All other // immediate values are as normal. let EncoderMethod = "getAddrModeImm12OpValue"; - let PrintMethod = "printAddrModeImm12Operand"; let DecoderMethod = "DecodeAddrModeImm12Operand"; let ParserMatchClass = MemImm12OffsetAsmOperand; let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); } + +def addrmode_imm12 : AddrMode_Imm12 { + let PrintMethod = "printAddrModeImm12Operand<false>"; +} + +def addrmode_imm12_pre : AddrMode_Imm12 { + let PrintMethod = "printAddrModeImm12Operand<true>"; +} + // ldst_so_reg := reg +/- reg shop imm // def MemRegOffsetAsmOperand : AsmOperandClass { let Name = "MemRegOffset"; } @@ -852,14 +874,23 @@ def am2offset_imm : Operand<i32>, // // FIXME: split into imm vs. reg versions. def AddrMode3AsmOperand : AsmOperandClass { let Name = "AddrMode3"; } -def addrmode3 : Operand<i32>, - ComplexPattern<i32, 3, "SelectAddrMode3", []> { +class AddrMode3 : Operand<i32>, + ComplexPattern<i32, 3, "SelectAddrMode3", []> { let EncoderMethod = "getAddrMode3OpValue"; - let PrintMethod = "printAddrMode3Operand"; let ParserMatchClass = AddrMode3AsmOperand; let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm); } +def addrmode3 : AddrMode3 +{ + let PrintMethod = "printAddrMode3Operand<false>"; +} + +def addrmode3_pre : AddrMode3 +{ + let PrintMethod = "printAddrMode3Operand<true>"; +} + // FIXME: split into imm vs. reg versions. // FIXME: parser method to handle +/- register. def AM3OffsetAsmOperand : AsmOperandClass { @@ -885,15 +916,22 @@ def ldstm_mode : OptionalDefOperand<OtherVT, (ops i32), (ops (i32 1))> { // addrmode5 := reg +/- imm8*4 // def AddrMode5AsmOperand : AsmOperandClass { let Name = "AddrMode5"; } -def addrmode5 : Operand<i32>, - ComplexPattern<i32, 2, "SelectAddrMode5", []> { - let PrintMethod = "printAddrMode5Operand"; +class AddrMode5 : Operand<i32>, + ComplexPattern<i32, 2, "SelectAddrMode5", []> { let EncoderMethod = "getAddrMode5OpValue"; let DecoderMethod = "DecodeAddrMode5Operand"; let ParserMatchClass = AddrMode5AsmOperand; let MIOperandInfo = (ops GPR:$base, i32imm); } +def addrmode5 : AddrMode5 { + let PrintMethod = "printAddrMode5Operand<false>"; +} + +def addrmode5_pre : AddrMode5 { + let PrintMethod = "printAddrMode5Operand<true>"; +} + // addrmode6 := reg with optional alignment // def AddrMode6AsmOperand : AsmOperandClass { let Name = "AlignedMemory"; } @@ -1668,11 +1706,11 @@ def ATOMUMAX6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), NoItinerary, []>; } -def HINT : AI<(outs), (ins imm0_255:$imm), MiscFrm, NoItinerary, +def HINT : AI<(outs), (ins imm0_4:$imm), MiscFrm, NoItinerary, "hint", "\t$imm", []>, Requires<[IsARM, HasV6]> { - bits<8> imm; - let Inst{27-8} = 0b00110010000011110000; - let Inst{7-0} = imm; + bits<3> imm; + let Inst{27-3} = 0b0011001000001111000000000; + let Inst{2-0} = imm; } def : InstAlias<"nop$p", (HINT 0, pred:$p)>, Requires<[IsARM, HasV6T2]>; @@ -2077,7 +2115,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in { // Secure Monitor Call is a system instruction. def SMC : ABI<0b0001, (outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt", - []> { + []>, Requires<[IsARM, HasTrustZone]> { bits<4> opt; let Inst{23-4} = 0b01100000000000000111; let Inst{3-0} = opt; @@ -2238,7 +2276,7 @@ def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rd, GPR:$dst2), multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass iii, InstrItinClass iir> { def _PRE_IMM : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb), - (ins addrmode_imm12:$addr), IndexModePre, LdFrm, iii, + (ins addrmode_imm12_pre:$addr), IndexModePre, LdFrm, iii, opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { bits<17> addr; let Inst{25} = 0; @@ -2275,6 +2313,7 @@ multiclass AI2_ldridx<bit isByte, string opc, let Inst{23} = offset{12}; let Inst{19-16} = addr; let Inst{11-0} = offset{11-0}; + let Inst{4} = 0; let DecoderMethod = "DecodeAddrMode2IdxInstruction"; } @@ -2307,7 +2346,7 @@ defm LDRB : AI2_ldridx<1, "ldrb", IIC_iLoad_bh_iu, IIC_iLoad_bh_ru>; multiclass AI3_ldridx<bits<4> op, string opc, InstrItinClass itin> { def _PRE : AI3ldstidx<op, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), - (ins addrmode3:$addr), IndexModePre, + (ins addrmode3_pre:$addr), IndexModePre, LdMiscFrm, itin, opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { bits<14> addr; @@ -2341,7 +2380,7 @@ defm LDRSH : AI3_ldridx<0b1111, "ldrsh", IIC_iLoad_bh_ru>; defm LDRSB : AI3_ldridx<0b1101, "ldrsb", IIC_iLoad_bh_ru>; let hasExtraDefRegAllocReq = 1 in { def LDRD_PRE : AI3ldstidx<0b1101, 0, 1, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb), - (ins addrmode3:$addr), IndexModePre, + (ins addrmode3_pre:$addr), IndexModePre, LdMiscFrm, IIC_iLoad_d_ru, "ldrd", "\t$Rt, $Rt2, $addr!", "$addr.base = $Rn_wb", []> { @@ -2497,7 +2536,7 @@ def STRD : AI3str<0b1111, (outs), (ins GPR:$Rt, GPR:$src2, addrmode3:$addr), multiclass AI2_stridx<bit isByte, string opc, InstrItinClass iii, InstrItinClass iir> { def _PRE_IMM : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb), - (ins GPR:$Rt, addrmode_imm12:$addr), IndexModePre, + (ins GPR:$Rt, addrmode_imm12_pre:$addr), IndexModePre, StFrm, iii, opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { bits<17> addr; @@ -2619,7 +2658,7 @@ def STRH_preidx: ARMPseudoInst<(outs GPR:$Rn_wb), def STRH_PRE : AI3ldstidx<0b1011, 0, 1, (outs GPR:$Rn_wb), - (ins GPR:$Rt, addrmode3:$addr), IndexModePre, + (ins GPR:$Rt, addrmode3_pre:$addr), IndexModePre, StMiscFrm, IIC_iStore_bh_ru, "strh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { bits<14> addr; @@ -2651,7 +2690,7 @@ def STRH_POST : AI3ldstidx<0b1011, 0, 0, (outs GPR:$Rn_wb), let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { def STRD_PRE : AI3ldstidx<0b1111, 0, 1, (outs GPR:$Rn_wb), - (ins GPR:$Rt, GPR:$Rt2, addrmode3:$addr), + (ins GPR:$Rt, GPR:$Rt2, addrmode3_pre:$addr), IndexModePre, StMiscFrm, IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr!", "$addr.base = $Rn_wb", []> { @@ -4426,7 +4465,7 @@ multiclass LdStCop<bit load, bit Dbit, string asm> { let Inst{7-0} = addr{7-0}; let DecoderMethod = "DecodeCopMemInstruction"; } - def _PRE : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr), + def _PRE : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5_pre:$addr), asm, "\t$cop, $CRd, $addr!", IndexModePre> { bits<13> addr; bits<4> cop; @@ -4497,7 +4536,7 @@ multiclass LdSt2Cop<bit load, bit Dbit, string asm> { let Inst{7-0} = addr{7-0}; let DecoderMethod = "DecodeCopMemInstruction"; } - def _PRE : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr), + def _PRE : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5_pre:$addr), asm, "\t$cop, $CRd, $addr!", IndexModePre> { bits<13> addr; bits<4> cop; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td index 0411ac4..896fd0f 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -4316,6 +4316,24 @@ def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; +def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", + (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; +def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", + (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; +def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", + (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; +def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", + (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; + +def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", + (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; +def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", + (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; +def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", + (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; +def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", + (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; + // Vector Bitwise Operations. def vnotd : PatFrag<(ops node:$in), @@ -4889,6 +4907,29 @@ def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0, "vabs", "f32", v4f32, v4f32, fabs>; +def : Pat<(xor (v2i32 (bitconvert (v8i8 (NEONvshrs DPR:$src, (i32 7))))), + (v2i32 (bitconvert (v8i8 (add DPR:$src, + (NEONvshrs DPR:$src, (i32 7))))))), + (VABSv8i8 DPR:$src)>; +def : Pat<(xor (v2i32 (bitconvert (v4i16 (NEONvshrs DPR:$src, (i32 15))))), + (v2i32 (bitconvert (v4i16 (add DPR:$src, + (NEONvshrs DPR:$src, (i32 15))))))), + (VABSv4i16 DPR:$src)>; +def : Pat<(xor (v2i32 (NEONvshrs DPR:$src, (i32 31))), + (v2i32 (add DPR:$src, (NEONvshrs DPR:$src, (i32 31))))), + (VABSv2i32 DPR:$src)>; +def : Pat<(xor (v4i32 (bitconvert (v16i8 (NEONvshrs QPR:$src, (i32 7))))), + (v4i32 (bitconvert (v16i8 (add QPR:$src, + (NEONvshrs QPR:$src, (i32 7))))))), + (VABSv16i8 QPR:$src)>; +def : Pat<(xor (v4i32 (bitconvert (v8i16 (NEONvshrs QPR:$src, (i32 15))))), + (v4i32 (bitconvert (v8i16 (add QPR:$src, + (NEONvshrs QPR:$src, (i32 15))))))), + (VABSv8i16 QPR:$src)>; +def : Pat<(xor (v4i32 (NEONvshrs QPR:$src, (i32 31))), + (v4i32 (add QPR:$src, (NEONvshrs QPR:$src, (i32 31))))), + (VABSv4i32 QPR:$src)>; + def : Pat<(v2f32 (int_arm_neon_vabs (v2f32 DPR:$src))), (VABSfd DPR:$src)>; def : Pat<(v4f32 (int_arm_neon_vabs (v4f32 QPR:$src))), (VABSfq QPR:$src)>; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td index c9d709e..4dacb86 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -150,7 +150,7 @@ def lo5AllOne : PatLeaf<(i32 imm), [{ def t2addrmode_imm12_asmoperand : AsmOperandClass {let Name="MemUImm12Offset";} def t2addrmode_imm12 : Operand<i32>, ComplexPattern<i32, 2, "SelectT2AddrModeImm12", []> { - let PrintMethod = "printAddrModeImm12Operand"; + let PrintMethod = "printAddrModeImm12Operand<false>"; let EncoderMethod = "getAddrModeImm12OpValue"; let DecoderMethod = "DecodeT2AddrModeImm12"; let ParserMatchClass = t2addrmode_imm12_asmoperand; @@ -3401,12 +3401,7 @@ class t2CPS<dag iops, string asm_op> : T2XI<(outs), iops, NoItinerary, bits<5> mode; bit M; - let Inst{31-27} = 0b11110; - let Inst{26} = 0; - let Inst{25-20} = 0b111010; - let Inst{19-16} = 0b1111; - let Inst{15-14} = 0b10; - let Inst{12} = 0; + let Inst{31-11} = 0b111100111010111110000; let Inst{10-9} = imod; let Inst{8} = M; let Inst{7-5} = iflags; @@ -3425,13 +3420,13 @@ let imod = 0, iflags = 0, M = 1 in // A6.3.4 Branches and miscellaneous control // Table A6-14 Change Processor State, and hint instructions -def t2HINT : T2I<(outs), (ins imm0_255:$imm), NoItinerary, "hint", "\t$imm",[]>{ - bits<8> imm; - let Inst{31-8} = 0b111100111010111110000000; - let Inst{7-0} = imm; +def t2HINT : T2I<(outs), (ins imm0_4:$imm), NoItinerary, "hint", "\t$imm",[]> { + bits<3> imm; + let Inst{31-3} = 0b11110011101011111000000000000; + let Inst{2-0} = imm; } -def : t2InstAlias<"hint$p.w $imm", (t2HINT imm0_255:$imm, pred:$p)>; +def : t2InstAlias<"hint$p.w $imm", (t2HINT imm0_4:$imm, pred:$p)>; def : t2InstAlias<"nop$p.w", (t2HINT 0, pred:$p)>; def : t2InstAlias<"yield$p.w", (t2HINT 1, pred:$p)>; def : t2InstAlias<"wfe$p.w", (t2HINT 2, pred:$p)>; @@ -3449,7 +3444,8 @@ def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> { // Secure Monitor Call is a system instruction. // Option = Inst{19-16} -def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt", []> { +def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt", + []>, Requires<[IsThumb2, HasTrustZone]> { let Inst{31-27} = 0b11110; let Inst{26-20} = 0b1111111; let Inst{15-12} = 0b1000; diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index b7ac5d5..c8ed576 100644 --- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -87,53 +87,6 @@ namespace { MachineBasicBlock::iterator i) : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} }; - class UnitRegsMap { - public: - UnitRegsMap(const TargetRegisterInfo* _TRI) : TRI(_TRI) {} - const SmallVector<unsigned, 4>& operator[](unsigned Reg) { - DenseMap<unsigned, SmallVector<unsigned, 4> >::iterator found = - Cache.find(Reg); - if (found != Cache.end()) - return found->second; - else - return Cache.insert(std::make_pair(Reg, this->getUnitRegs(Reg))) - .first->second; - } - private: - SmallVector<unsigned, 4> getUnitRegs(unsigned Reg) { - SmallVector<unsigned, 4> Res; - - const TargetRegisterClass* TRC = TRI->getMinimalPhysRegClass(Reg); - if (TRC == &ARM::QPRRegClass) { - if (Reg > ARM::Q7) { - Res.push_back(TRI->getSubReg(Reg, ARM::dsub_0)); - Res.push_back(TRI->getSubReg(Reg, ARM::dsub_1)); - return Res; - } - - Res.push_back(TRI->getSubReg(Reg, ARM::ssub_0)); - Res.push_back(TRI->getSubReg(Reg, ARM::ssub_1)); - Res.push_back(TRI->getSubReg(Reg, ARM::ssub_2)); - Res.push_back(TRI->getSubReg(Reg, ARM::ssub_3)); - - return Res; - } - - if (TRC == &ARM::DPRRegClass && Reg < ARM::D15) { - Res.push_back(TRI->getSubReg(Reg, ARM::ssub_0)); - Res.push_back(TRI->getSubReg(Reg, ARM::ssub_1)); - - return Res; - } - - Res.push_back(Reg); - - return Res; - - } - const TargetRegisterInfo* TRI; - DenseMap<unsigned, SmallVector<unsigned, 4> > Cache; - }; typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; typedef MemOpQueue::iterator MemOpQueueIter; @@ -175,11 +128,6 @@ namespace { MachineBasicBlock::iterator MBBI, bool &Advance, MachineBasicBlock::iterator &I); - unsigned AddMemOp(MemOpQueue& MemOps, - const MemOpQueueEntry newEntry, - UnitRegsMap& UnitRegsInfo, - SmallSet<unsigned, 4>& UsedUnitRegs, - unsigned At = -1U); bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); bool MergeReturnIntoLDM(MachineBasicBlock &MBB); }; @@ -1265,103 +1213,12 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, return false; } -/// AddMemOp - helper for ARMLoadStoreOpt::LoadStoreMultipleOpti. -/// It adds store mem ops with simple push_back/insert method, -/// without any additional logic. -/// For load operation it does the next: -/// 1. Adds new load operation into MemOp collection at "At" position. -/// 2. Removes any "load" operations from MemOps, that changes "Reg" register -/// contents, prior to "At". -/// UnitRegsInfo - Map of type Map< Register, UnitRegisters-vector > -/// UsedUnitRegs - set of unit-registers currently in use. -/// At - position at which it would added, and prior which the clean-up -/// should be made (for load operation). -/// FIXME: The clean-up also should be made for store operations, -/// but the memory address should be analyzed instead of unit registers. -unsigned ARMLoadStoreOpt::AddMemOp(MemOpQueue& MemOps, - const MemOpQueueEntry NewEntry, - UnitRegsMap& UnitRegsInfo, - SmallSet<unsigned, 4>& UsedUnitRegs, - unsigned At) { - unsigned Cleaned = 0; - - if (At == -1U) { - At = MemOps.size(); - MemOps.push_back(NewEntry); - } else - MemOps.insert(&MemOps[At], NewEntry); - - // FIXME: - // If operation is not load, leave it as is by now, - // So 0 overridden ops would cleaned in this case. - if (!NewEntry.MBBI->mayLoad()) - return 0; - - const SmallVector<unsigned, 4>& NewEntryUnitRegs = UnitRegsInfo[NewEntry.Reg]; - - bool FoundOverriddenLoads = false; - - for (unsigned i = 0, e = NewEntryUnitRegs.size(); i != e; ++i) - if (UsedUnitRegs.count(NewEntryUnitRegs[i])) { - FoundOverriddenLoads = true; - break; - } - - // If we detect that this register is used by load operations that are - // predecessors for the new one, remove them from MemOps then. - if (FoundOverriddenLoads) { - MemOpQueue UpdatedMemOps; - - // Scan through MemOps entries. - for (unsigned i = 0; i != At; ++i) { - MemOpQueueEntry& MemOpEntry = MemOps[i]; - - // FIXME: Skip non-load operations by now. - if (!MemOpEntry.MBBI->mayLoad()) - continue; - - const SmallVector<unsigned, 4>& MemOpUnitRegs = - UnitRegsInfo[MemOpEntry.Reg]; - - // Lookup entry that loads contents into register used by new entry. - bool ReleaseThisEntry = false; - for (unsigned m = 0, em = MemOpUnitRegs.size(); m != em; ++m) { - if (std::find(NewEntryUnitRegs.begin(), NewEntryUnitRegs.end(), - MemOpUnitRegs[m]) != NewEntryUnitRegs.end()) { - ReleaseThisEntry = true; - ++Cleaned; - break; - } - } - - if (ReleaseThisEntry) { - const SmallVector<unsigned, 4>& RelesedRegs = UnitRegsInfo[MemOpEntry.Reg]; - for (unsigned r = 0, er = RelesedRegs.size(); r != er; ++r) - UsedUnitRegs.erase(RelesedRegs[r]); - } else - UpdatedMemOps.push_back(MemOpEntry); - } - - // Keep anything without changes after At position. - for (unsigned i = At, e = MemOps.size(); i != e; ++i) - UpdatedMemOps.push_back(MemOps[i]); - - MemOps.swap(UpdatedMemOps); - } - - UsedUnitRegs.insert(NewEntryUnitRegs.begin(), NewEntryUnitRegs.end()); - - return Cleaned; -} - /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR /// ops of the same base and incrementing offset into LDM / STM ops. bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { unsigned NumMerges = 0; unsigned NumMemOps = 0; MemOpQueue MemOps; - UnitRegsMap UnitRegsInfo(TRI); - SmallSet<unsigned, 4> UsedRegUnits; unsigned CurrBase = 0; int CurrOpc = -1; unsigned CurrSize = 0; @@ -1401,6 +1258,22 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { // merge the ldr's so far, including this one. But don't try to // combine the following ldr(s). Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); + + // Watch out for: + // r4 := ldr [r0, #8] + // r4 := ldr [r0, #4] + // + // The optimization may reorder the second ldr in front of the first + // ldr, which violates write after write(WAW) dependence. The same as + // str. Try to merge inst(s) already in MemOps. + bool Overlap = false; + for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) { + if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) { + Overlap = true; + break; + } + } + if (CurrBase == 0 && !Clobber) { // Start of a new chain. CurrBase = Base; @@ -1408,13 +1281,10 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { CurrSize = Size; CurrPred = Pred; CurrPredReg = PredReg; - MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); ++NumMemOps; - const SmallVector<unsigned, 4>& EntryUnitRegs = UnitRegsInfo[Reg]; - UsedRegUnits.insert(EntryUnitRegs.begin(), EntryUnitRegs.end()); Advance = true; - } else { + } else if (!Overlap) { if (Clobber) { TryMerge = true; Advance = true; @@ -1424,24 +1294,20 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { // No need to match PredReg. // Continue adding to the queue. if (Offset > MemOps.back().Offset) { - unsigned OverridesCleaned = - AddMemOp(MemOps, - MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI), - UnitRegsInfo, UsedRegUnits) != 0; - NumMemOps += 1 - OverridesCleaned; + MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, + Position, MBBI)); + ++NumMemOps; Advance = true; } else { - for (unsigned I = 0; I != NumMemOps; ++I) { - if (Offset < MemOps[I].Offset) { - MemOpQueueEntry entry(Offset, Reg, isKill, Position, MBBI); - unsigned OverridesCleaned = - AddMemOp(MemOps, entry, UnitRegsInfo, - UsedRegUnits, I) != 0; - NumMemOps += 1 - OverridesCleaned; - + for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); + I != E; ++I) { + if (Offset < I->Offset) { + MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, + Position, MBBI)); + ++NumMemOps; Advance = true; break; - } else if (Offset == MemOps[I].Offset) { + } else if (Offset == I->Offset) { // Collision! This can't be merged! break; } @@ -1512,7 +1378,6 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { CurrPredReg = 0; if (NumMemOps) { MemOps.clear(); - UsedRegUnits.clear(); NumMemOps = 0; } diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h index 88d96c0..f4248fc 100644 --- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -38,7 +38,7 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// VarArgsRegSaveSize - Size of the register save area for vararg functions. /// - unsigned VarArgsRegSaveSize; + unsigned ArgRegsSaveSize; /// HasStackFrame - True if this function has a stack frame. Set by /// processFunctionBeforeCalleeSavedScan(). @@ -117,7 +117,7 @@ public: ARMFunctionInfo() : isThumb(false), hasThumb2(false), - VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false), + ArgRegsSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false), LRSpilledForFarJump(false), FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), @@ -129,7 +129,7 @@ public: explicit ARMFunctionInfo(MachineFunction &MF) : isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()), hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()), - VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false), + ArgRegsSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false), LRSpilledForFarJump(false), FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), @@ -141,8 +141,8 @@ public: bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; } bool isThumb2Function() const { return isThumb && hasThumb2; } - unsigned getVarArgsRegSaveSize() const { return VarArgsRegSaveSize; } - void setVarArgsRegSaveSize(unsigned s) { VarArgsRegSaveSize = s; } + unsigned getArgRegsSaveSize() const { return ArgRegsSaveSize; } + void setArgRegsSaveSize(unsigned s) { ArgRegsSaveSize = s; } bool hasStackFrame() const { return HasStackFrame; } void setHasStackFrame(bool s) { HasStackFrame = s; } diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp index 739300e..8653c46 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -91,6 +91,7 @@ void ARMSubtarget::initializeEnvironment() { HasRAS = false; HasMPExtension = false; FPOnlySP = false; + HasTrustZone = false; AllowsUnalignedMem = false; Thumb2DSP = false; UseNaClTrap = false; diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h index 5b5ee6a..038eb76 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h @@ -148,6 +148,9 @@ protected: /// precision. bool FPOnlySP; + /// HasTrustZone - if true, processor supports TrustZone security extensions + bool HasTrustZone; + /// AllowsUnalignedMem - If true, the subtarget allows unaligned memory /// accesses for some types. For details, see /// ARMTargetLowering::allowsUnalignedMemoryAccesses(). @@ -251,6 +254,7 @@ public: bool hasVMLxForwarding() const { return HasVMLxForwarding; } bool isFPBrccSlow() const { return SlowFPBrcc; } bool isFPOnlySP() const { return FPOnlySP; } + bool hasTrustZone() const { return HasTrustZone; } bool prefers32BitThumb() const { return Pref32BitThumb; } bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; } bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; } diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 1019b97..53ece66 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -125,6 +125,10 @@ public: unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const; unsigned getAddressComputationCost(Type *Val) const; + + unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, + OperandValueKind Op1Info = OK_AnyValue, + OperandValueKind Op2Info = OK_AnyValue) const; /// @} }; @@ -223,9 +227,9 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 }, { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 }, - // Operations that we legalize using load/stores to the stack. - { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 4*1 + 16*2 + 2*1 }, - { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2*1 + 8*2 + 1 }, + // Operations that we legalize using splitting. + { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 }, + { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 }, // Vector float <-> i32 conversions. { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, @@ -456,3 +460,67 @@ unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, return LT.first * NEONShuffleTbl[Idx].Cost; } + +unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Op1Info, + OperandValueKind Op2Info) const { + + int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode); + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty); + + const unsigned FunctionCallDivCost = 20; + const unsigned ReciprocalDivCost = 10; + static const CostTblEntry<MVT> CostTbl[] = { + // Division. + // These costs are somewhat random. Choose a cost of 20 to indicate that + // vectorizing devision (added function call) is going to be very expensive. + // Double registers types. + { ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost}, + { ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost}, + { ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost}, + { ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost}, + { ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost}, + { ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost}, + { ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost}, + { ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost}, + { ISD::SDIV, MVT::v4i16, ReciprocalDivCost}, + { ISD::UDIV, MVT::v4i16, ReciprocalDivCost}, + { ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost}, + { ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost}, + { ISD::SDIV, MVT::v8i8, ReciprocalDivCost}, + { ISD::UDIV, MVT::v8i8, ReciprocalDivCost}, + { ISD::SREM, MVT::v8i8, 8 * FunctionCallDivCost}, + { ISD::UREM, MVT::v8i8, 8 * FunctionCallDivCost}, + // Quad register types. + { ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost}, + { ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost}, + { ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost}, + { ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost}, + { ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost}, + { ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost}, + { ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost}, + { ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost}, + { ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost}, + { ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost}, + { ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost}, + { ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost}, + { ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost}, + { ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost}, + { ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost}, + { ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost}, + // Multiplication. + }; + + int Idx = -1; + + if (ST->hasNEON()) + Idx = CostTableLookup<MVT>(CostTbl, array_lengthof(CostTbl), ISDOpcode, + LT.second); + + if (Idx != -1) + return LT.first * CostTbl[Idx].Cost; + + + return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info, + Op2Info); +} + diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index ed7b7ec..1dd2953 100644 --- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -86,11 +86,11 @@ class ARMAsmParser : public MCTargetAsmParser { MCAsmLexer &getLexer() const { return Parser.getLexer(); } bool Warning(SMLoc L, const Twine &Msg, - ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) { + ArrayRef<SMRange> Ranges = None) { return Parser.Warning(L, Msg, Ranges); } bool Error(SMLoc L, const Twine &Msg, - ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) { + ArrayRef<SMRange> Ranges = None) { return Parser.Error(L, Msg, Ranges); } @@ -610,6 +610,13 @@ public: int64_t Value = CE->getValue(); return ((Value & 3) == 0) && Value >= -1020 && Value <= 1020; } + bool isImm0_4() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value >= 0 && Value < 5; + } bool isImm0_1020s4() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); @@ -4745,6 +4752,7 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, Mnemonic == "mls" || Mnemonic == "smmls" || Mnemonic == "vcls" || Mnemonic == "vmls" || Mnemonic == "vnmls" || Mnemonic == "vacge" || Mnemonic == "vcge" || Mnemonic == "vclt" || Mnemonic == "vacgt" || + Mnemonic == "vaclt" || Mnemonic == "vacle" || Mnemonic == "vcgt" || Mnemonic == "vcle" || Mnemonic == "smlal" || Mnemonic == "umaal" || Mnemonic == "umlal" || Mnemonic == "vabal" || Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal" || @@ -5014,8 +5022,8 @@ static bool isDataTypeToken(StringRef Tok) { static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) { return Mnemonic.startswith("vldm") || Mnemonic.startswith("vstm"); } - -static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features); +static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features, + unsigned VariantID); /// Parse an arm instruction mnemonic followed by its operands. bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, @@ -5026,7 +5034,8 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // MatchInstructionImpl(), but that's too late for aliases that include // any sort of suffix. unsigned AvailableFeatures = getAvailableFeatures(); - applyMnemonicAliases(Name, AvailableFeatures); + unsigned AssemblerDialect = getParser().getAssemblerDialect(); + applyMnemonicAliases(Name, AvailableFeatures, AssemblerDialect); // First check for the ARM-specific .req directive. if (Parser.getTok().is(AsmToken::Identifier) && @@ -7613,6 +7622,11 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return Error(IDLoc, "instruction variant requires ARMv6 or later"); case Match_RequiresThumb2: return Error(IDLoc, "instruction variant requires Thumb2"); + case Match_ImmRange0_4: { + SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc(); + if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; + return Error(ErrorLoc, "immediate operand must be in the range [0,4]"); + } case Match_ImmRange0_15: { SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc(); if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 2e009e5..ac937f3 100644 --- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -308,6 +308,8 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeImm0_4(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder); static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn, @@ -1951,10 +1953,12 @@ static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn, Inst.addOperand(MCOperand::CreateImm(mode)); if (iflags) S = MCDisassembler::SoftFail; } else { - // imod == '00' && M == '0' --> UNPREDICTABLE - Inst.setOpcode(ARM::t2CPS1p); - Inst.addOperand(MCOperand::CreateImm(mode)); - S = MCDisassembler::SoftFail; + // imod == '00' && M == '0' --> this is a HINT instruction + int imm = fieldFromInstruction(Insn, 0, 8); + // HINT are defined only for immediate in [0..4] + if(imm > 4) return MCDisassembler::Fail; + Inst.setOpcode(ARM::t2HINT); + Inst.addOperand(MCOperand::CreateImm(imm)); } return S; @@ -1996,9 +2000,10 @@ static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn, imm |= (fieldFromInstruction(Insn, 16, 4) << 12); if (Inst.getOpcode() == ARM::MOVTi16) - if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder))) + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder))) return MCDisassembler::Fail; - if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder))) + + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder))) return MCDisassembler::Fail; if (!tryAddingSymbolicOperand(Address, imm, false, 4, Inst, Decoder)) @@ -3570,7 +3575,7 @@ static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn, unsigned Rn = fieldFromInstruction(Insn, 16, 4); unsigned pred = fieldFromInstruction(Insn, 28, 4); - if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder))) + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder))) return MCDisassembler::Fail; if ((Rt & 1) || Rt == 0xE || Rn == 0xF) return MCDisassembler::Fail; @@ -4496,6 +4501,15 @@ static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, return S; } +static DecodeStatus DecodeImm0_4(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) +{ + unsigned Imm = fieldFromInstruction(Insn, 0, 3); + if (Imm > 4) return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(Imm)); + return MCDisassembler::Success; +} + static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 2afb20d..3bcd083 100644 --- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -490,7 +490,8 @@ void ARMInstPrinter::printAM3PostIndexOp(const MCInst *MI, unsigned Op, } void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, - raw_ostream &O) { + raw_ostream &O, + bool AlwaysPrintImm0) { const MCOperand &MO1 = MI->getOperand(Op); const MCOperand &MO2 = MI->getOperand(Op+1); const MCOperand &MO3 = MI->getOperand(Op+2); @@ -509,7 +510,7 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()); ARM_AM::AddrOpc op = ARM_AM::getAM3Op(MO3.getImm()); - if (ImmOffs || (op == ARM_AM::sub)) { + if (AlwaysPrintImm0 || ImmOffs || (op == ARM_AM::sub)) { O << ", " << markup("<imm:") << "#" @@ -520,6 +521,7 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, O << ']' << markup(">"); } +template <bool AlwaysPrintImm0> void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(Op); @@ -535,7 +537,7 @@ void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op, printAM3PostIndexOp(MI, Op, O); return; } - printAM3PreOrOffsetIndexOp(MI, Op, O); + printAM3PreOrOffsetIndexOp(MI, Op, O, AlwaysPrintImm0); } void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI, @@ -593,6 +595,7 @@ void ARMInstPrinter::printLdStmModeOperand(const MCInst *MI, unsigned OpNum, O << ARM_AM::getAMSubModeStr(Mode); } +template <bool AlwaysPrintImm0> void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); @@ -608,7 +611,7 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm()); unsigned Op = ARM_AM::getAM5Op(MO2.getImm()); - if (ImmOffs || Op == ARM_AM::sub) { + if (AlwaysPrintImm0 || ImmOffs || Op == ARM_AM::sub) { O << ", " << markup("<imm:") << "#" @@ -1022,6 +1025,7 @@ void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum, ARM_AM::getSORegOffset(MO2.getImm()), UseMarkup); } +template <bool AlwaysPrintImm0> void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); @@ -1042,13 +1046,13 @@ void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum, OffImm = 0; if (isSub) { O << ", " - << markup("<imm:") + << markup("<imm:") << "#-" << -OffImm << markup(">"); } - else if (OffImm > 0) { + else if (AlwaysPrintImm0 || OffImm > 0) { O << ", " - << markup("<imm:") + << markup("<imm:") << "#" << OffImm << markup(">"); } diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index edff75d..344104e 100644 --- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -47,12 +47,13 @@ public: raw_ostream &O); void printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - + template <bool AlwaysPrintImm0> void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printAM3PostIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O); - void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,raw_ostream &O); + void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O, + bool AlwaysPrintImm0); void printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printPostIdxRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); @@ -60,6 +61,7 @@ public: raw_ostream &O); void printLdStmModeOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + template <bool AlwaysPrintImm0> void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printAddrMode6Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printAddrMode7Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); @@ -91,6 +93,7 @@ public: raw_ostream &O); void printT2SOOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + template<bool AlwaysPrintImm0> void printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum, diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 418971d..6c3d247 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -13,7 +13,9 @@ // //===----------------------------------------------------------------------===// +#include "ARMRegisterInfo.h" #include "ARMUnwindOp.h" +#include "ARMUnwindOpAsm.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCAsmBackend.h" @@ -26,6 +28,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCObjectStreamer.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" @@ -33,11 +36,15 @@ #include "llvm/MC/MCValue.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ELF.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +static std::string GetAEABIUnwindPersonalityName(unsigned Index) { + assert(Index < NUM_PERSONALITY_INDEX && "Invalid personality index"); + return (Twine("__aeabi_unwind_cpp_pr") + Twine(Index)).str(); +} + namespace { /// Extend the generic ELFStreamer class so that it can emit mapping symbols at @@ -57,8 +64,9 @@ public: ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, MCCodeEmitter *Emitter, bool IsThumb) : MCELFStreamer(SK_ARMELFStreamer, Context, TAB, OS, Emitter), - IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None), ExTab(0), - FnStart(0), Personality(0), CantUnwind(false) {} + IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None) { + Reset(); + } ~ARMELFStreamer() {} @@ -75,14 +83,15 @@ public: virtual void EmitRegSave(const SmallVectorImpl<unsigned> &RegList, bool isVector); - virtual void ChangeSection(const MCSection *Section) { + virtual void ChangeSection(const MCSection *Section, + const MCExpr *Subsection) { // We have to keep track of the mapping symbol state of any sections we // use. Each one should start off as EMS_None, which is provided as the // default constructor by DenseMap::lookup. - LastMappingSymbols[getPreviousSection()] = LastEMS; + LastMappingSymbols[getPreviousSection().first] = LastEMS; LastEMS = LastMappingSymbols.lookup(Section); - MCELFStreamer::ChangeSection(Section); + MCELFStreamer::ChangeSection(Section, Subsection); } /// This function is the one used to emit instruction data into the ELF @@ -175,7 +184,7 @@ private: MCELF::SetType(SD, ELF::STT_NOTYPE); MCELF::SetBinding(SD, ELF::STB_LOCAL); SD.setExternal(false); - Symbol->setSection(*getCurrentSection()); + Symbol->setSection(*getCurrentSection().first); const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext()); Symbol->setVariableValue(Value); @@ -194,6 +203,7 @@ private: void Reset(); void EmitPersonalityFixup(StringRef Name); + void CollectUnwindOpcodes(); void SwitchToEHSection(const char *Prefix, unsigned Type, unsigned Flags, SectionKind Kind, const MCSymbol &Fn); @@ -210,9 +220,16 @@ private: MCSymbol *ExTab; MCSymbol *FnStart; const MCSymbol *Personality; + uint32_t VFPRegSave; // Register mask for {d31-d0} + uint32_t RegSave; // Register mask for {r15-r0} + int64_t SPOffset; + uint16_t FPReg; + int64_t FPOffset; + bool UsedFP; bool CantUnwind; + UnwindOpcodeAssembler UnwindOpAsm; }; -} +} // end anonymous namespace inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix, unsigned Type, @@ -238,7 +255,7 @@ inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix, } else { EHSection = getContext().getELFSection(EHSecName, Type, Flags, Kind); } - assert(EHSection); + assert(EHSection && "Failed to get the required EH section"); // Switch to .ARM.extab or .ARM.exidx section SwitchSection(EHSection); @@ -262,10 +279,20 @@ inline void ARMELFStreamer::SwitchToExIdxSection(const MCSymbol &FnStart) { } void ARMELFStreamer::Reset() { + const MCRegisterInfo &MRI = getContext().getRegisterInfo(); + ExTab = NULL; FnStart = NULL; Personality = NULL; + VFPRegSave = 0; + RegSave = 0; + FPReg = MRI.getEncodingValue(ARM::SP); + FPOffset = 0; + SPOffset = 0; + UsedFP = false; CantUnwind = false; + + UnwindOpAsm.Reset(); } // Add the R_ARM_NONE fixup at the same position @@ -284,6 +311,18 @@ void ARMELFStreamer::EmitPersonalityFixup(StringRef Name) { MCFixup::getKindForSize(4, false))); } +void ARMELFStreamer::CollectUnwindOpcodes() { + if (UsedFP) { + UnwindOpAsm.EmitSetFP(FPReg); + UnwindOpAsm.EmitSPOffset(-FPOffset); + } else { + UnwindOpAsm.EmitSPOffset(SPOffset); + } + UnwindOpAsm.EmitVFPRegSave(VFPRegSave); + UnwindOpAsm.EmitRegSave(RegSave); + UnwindOpAsm.Finalize(); +} + void ARMELFStreamer::EmitFnStart() { assert(FnStart == 0); FnStart = getContext().CreateTempSymbol(); @@ -294,35 +333,29 @@ void ARMELFStreamer::EmitFnEnd() { assert(FnStart && ".fnstart must preceeds .fnend"); // Emit unwind opcodes if there is no .handlerdata directive - int PersonalityIndex = -1; if (!ExTab && !CantUnwind) { - // For __aeabi_unwind_cpp_pr1, we have to emit opcodes in .ARM.extab. - SwitchToExTabSection(*FnStart); - - // Create .ARM.extab label for offset in .ARM.exidx - ExTab = getContext().CreateTempSymbol(); - EmitLabel(ExTab); - - PersonalityIndex = 1; - - uint32_t Entry = 0; - uint32_t NumExtraEntryWords = 0; - Entry |= NumExtraEntryWords << 24; - Entry |= (EHT_COMPACT | PersonalityIndex) << 16; - - // TODO: This should be generated according to .save, .vsave, .setfp - // directives. Currently, we are simply generating FINISH opcode. - Entry |= UNWIND_OPCODE_FINISH << 8; - Entry |= UNWIND_OPCODE_FINISH; - - EmitIntValue(Entry, 4, 0); + CollectUnwindOpcodes(); + + unsigned PersonalityIndex = UnwindOpAsm.getPersonalityIndex(); + if (PersonalityIndex == AEABI_UNWIND_CPP_PR1 || + PersonalityIndex == AEABI_UNWIND_CPP_PR2) { + // For the __aeabi_unwind_cpp_pr1 and __aeabi_unwind_cpp_pr2, we have to + // emit the unwind opcodes in the corresponding ".ARM.extab" section, and + // then emit a reference to these unwind opcodes in the second word of + // the exception index table entry. + SwitchToExTabSection(*FnStart); + ExTab = getContext().CreateTempSymbol(); + EmitLabel(ExTab); + EmitBytes(UnwindOpAsm.data(), 0); + } } // Emit the exception index table entry SwitchToExIdxSection(*FnStart); - if (PersonalityIndex == 1) - EmitPersonalityFixup("__aeabi_unwind_cpp_pr1"); + unsigned PersonalityIndex = UnwindOpAsm.getPersonalityIndex(); + if (PersonalityIndex < NUM_PERSONALITY_INDEX) + EmitPersonalityFixup(GetAEABIUnwindPersonalityName(PersonalityIndex)); const MCSymbolRefExpr *FnStartRef = MCSymbolRefExpr::Create(FnStart, @@ -333,12 +366,22 @@ void ARMELFStreamer::EmitFnEnd() { if (CantUnwind) { EmitIntValue(EXIDX_CANTUNWIND, 4, 0); - } else { + } else if (ExTab) { + // Emit a reference to the unwind opcodes in the ".ARM.extab" section. const MCSymbolRefExpr *ExTabEntryRef = MCSymbolRefExpr::Create(ExTab, MCSymbolRefExpr::VK_ARM_PREL31, getContext()); EmitValue(ExTabEntryRef, 4, 0); + } else { + // For the __aeabi_unwind_cpp_pr0, we have to emit the unwind opcodes in + // the second word of exception index table entry. The size of the unwind + // opcodes should always be 4 bytes. + assert(PersonalityIndex == AEABI_UNWIND_CPP_PR0 && + "Compact model must use __aeabi_cpp_unwind_pr0 as personality"); + assert(UnwindOpAsm.size() == 4u && + "Unwind opcode size for __aeabi_cpp_unwind_pr0 must be equal to 4"); + EmitBytes(UnwindOpAsm.data(), 0); } // Clean exception handling frame information @@ -368,36 +411,54 @@ void ARMELFStreamer::EmitHandlerData() { EmitValue(PersonalityRef, 4, 0); // Emit unwind opcodes - uint32_t Entry = 0; - uint32_t NumExtraEntryWords = 0; - - // TODO: This should be generated according to .save, .vsave, .setfp - // directives. Currently, we are simply generating FINISH opcode. - Entry |= NumExtraEntryWords << 24; - Entry |= UNWIND_OPCODE_FINISH << 16; - Entry |= UNWIND_OPCODE_FINISH << 8; - Entry |= UNWIND_OPCODE_FINISH; - - EmitIntValue(Entry, 4, 0); + CollectUnwindOpcodes(); + EmitBytes(UnwindOpAsm.data(), 0); } void ARMELFStreamer::EmitPersonality(const MCSymbol *Per) { Personality = Per; + UnwindOpAsm.setPersonality(Per); } -void ARMELFStreamer::EmitSetFP(unsigned NewFpReg, - unsigned NewSpReg, +void ARMELFStreamer::EmitSetFP(unsigned NewFPReg, + unsigned NewSPReg, int64_t Offset) { - // TODO: Not implemented + assert(SPOffset == 0 && + "Current implementation assumes .setfp precedes .pad"); + + const MCRegisterInfo &MRI = getContext().getRegisterInfo(); + + uint16_t NewFPRegEncVal = MRI.getEncodingValue(NewFPReg); +#ifndef NDEBUG + uint16_t NewSPRegEncVal = MRI.getEncodingValue(NewSPReg); +#endif + + assert((NewSPReg == ARM::SP || NewSPRegEncVal == FPReg) && + "the operand of .setfp directive should be either $sp or $fp"); + + UsedFP = true; + FPReg = NewFPRegEncVal; + FPOffset = Offset; } void ARMELFStreamer::EmitPad(int64_t Offset) { - // TODO: Not implemented + SPOffset += Offset; } void ARMELFStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList, bool IsVector) { - // TODO: Not implemented + const MCRegisterInfo &MRI = getContext().getRegisterInfo(); + +#ifndef NDEBUG + unsigned Max = IsVector ? 32 : 16; +#endif + uint32_t &RegMask = IsVector ? VFPRegSave : RegSave; + + for (size_t i = 0; i < RegList.size(); ++i) { + unsigned Reg = MRI.getEncodingValue(RegList[i]); + assert(Reg < Max && "Register encoded value out of range"); + RegMask |= 1u << Reg; + } } namespace llvm { diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h index dad5576..fa4add6 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h @@ -107,6 +107,19 @@ namespace llvm { UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D8 = 0xd0 }; + /// ARM-defined Personality Routine Index + enum ARMPersonalityRoutineIndex { + // To make the exception handling table become more compact, ARM defined + // several personality routines in EHABI. There are 3 different + // personality routines in ARM EHABI currently. It is possible to have 16 + // pre-defined personality routines at most. + AEABI_UNWIND_CPP_PR0 = 0, + AEABI_UNWIND_CPP_PR1 = 1, + AEABI_UNWIND_CPP_PR2 = 2, + + NUM_PERSONALITY_INDEX + }; + } #endif // ARM_UNWIND_OP_H diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp new file mode 100644 index 0000000..191db69 --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp @@ -0,0 +1,198 @@ +//===-- ARMUnwindOpAsm.cpp - ARM Unwind Opcodes Assembler -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the unwind opcode assmebler for ARM exception handling +// table. +// +//===----------------------------------------------------------------------===// + +#include "ARMUnwindOpAsm.h" + +#include "ARMUnwindOp.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/LEB128.h" + +using namespace llvm; + +void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) { + if (RegSave == 0u) + return; + + // One byte opcode to save register r14 and r11-r4 + if (RegSave & (1u << 4)) { + // The one byte opcode will always save r4, thus we can't use the one byte + // opcode when r4 is not in .save directive. + + // Compute the consecutive registers from r4 to r11. + uint32_t Range = 0; + uint32_t Mask = (1u << 4); + for (uint32_t Bit = (1u << 5); Bit < (1u << 12); Bit <<= 1) { + if ((RegSave & Bit) == 0u) + break; + ++Range; + Mask |= Bit; + } + + // Emit this opcode when the mask covers every registers. + uint32_t UnmaskedReg = RegSave & 0xfff0u & (~Mask); + if (UnmaskedReg == 0u) { + // Pop r[4 : (4 + n)] + Ops.push_back(UNWIND_OPCODE_POP_REG_RANGE_R4 | Range); + RegSave &= 0x000fu; + } else if (UnmaskedReg == (1u << 14)) { + // Pop r[14] + r[4 : (4 + n)] + Ops.push_back(UNWIND_OPCODE_POP_REG_RANGE_R4_R14 | Range); + RegSave &= 0x000fu; + } + } + + // Two bytes opcode to save register r15-r4 + if ((RegSave & 0xfff0u) != 0) { + uint32_t Op = UNWIND_OPCODE_POP_REG_MASK_R4 | (RegSave >> 4); + Ops.push_back(static_cast<uint8_t>(Op >> 8)); + Ops.push_back(static_cast<uint8_t>(Op & 0xff)); + } + + // Opcode to save register r3-r0 + if ((RegSave & 0x000fu) != 0) { + uint32_t Op = UNWIND_OPCODE_POP_REG_MASK | (RegSave & 0x000fu); + Ops.push_back(static_cast<uint8_t>(Op >> 8)); + Ops.push_back(static_cast<uint8_t>(Op & 0xff)); + } +} + +/// Emit unwind opcodes for .vsave directives +void UnwindOpcodeAssembler::EmitVFPRegSave(uint32_t VFPRegSave) { + size_t i = 32; + + while (i > 16) { + uint32_t Bit = 1u << (i - 1); + if ((VFPRegSave & Bit) == 0u) { + --i; + continue; + } + + uint32_t Range = 0; + + --i; + Bit >>= 1; + + while (i > 16 && (VFPRegSave & Bit)) { + --i; + ++Range; + Bit >>= 1; + } + + uint32_t Op = + UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16 | ((i - 16) << 4) | Range; + Ops.push_back(static_cast<uint8_t>(Op >> 8)); + Ops.push_back(static_cast<uint8_t>(Op & 0xff)); + } + + while (i > 0) { + uint32_t Bit = 1u << (i - 1); + if ((VFPRegSave & Bit) == 0u) { + --i; + continue; + } + + uint32_t Range = 0; + + --i; + Bit >>= 1; + + while (i > 0 && (VFPRegSave & Bit)) { + --i; + ++Range; + Bit >>= 1; + } + + uint32_t Op = UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD | (i << 4) | Range; + Ops.push_back(static_cast<uint8_t>(Op >> 8)); + Ops.push_back(static_cast<uint8_t>(Op & 0xff)); + } +} + +/// Emit unwind opcodes for .setfp directives +void UnwindOpcodeAssembler::EmitSetFP(uint16_t FPReg) { + Ops.push_back(UNWIND_OPCODE_SET_VSP | FPReg); +} + +/// Emit unwind opcodes to update stack pointer +void UnwindOpcodeAssembler::EmitSPOffset(int64_t Offset) { + if (Offset > 0x200) { + uint8_t Buff[10]; + size_t Size = encodeULEB128((Offset - 0x204) >> 2, Buff); + Ops.push_back(UNWIND_OPCODE_INC_VSP_ULEB128); + Ops.append(Buff, Buff + Size); + } else if (Offset > 0) { + if (Offset > 0x100) { + Ops.push_back(UNWIND_OPCODE_INC_VSP | 0x3fu); + Offset -= 0x100; + } + Ops.push_back(UNWIND_OPCODE_INC_VSP | + static_cast<uint8_t>((Offset - 4) >> 2)); + } else if (Offset < 0) { + while (Offset < -0x100) { + Ops.push_back(UNWIND_OPCODE_DEC_VSP | 0x3fu); + Offset += 0x100; + } + Ops.push_back(UNWIND_OPCODE_DEC_VSP | + static_cast<uint8_t>(((-Offset) - 4) >> 2)); + } +} + +void UnwindOpcodeAssembler::AddOpcodeSizePrefix(size_t Pos) { + size_t SizeInWords = (size() + 3) / 4; + assert(SizeInWords <= 0x100u && + "Only 256 additional words are allowed for unwind opcodes"); + Ops[Pos] = static_cast<uint8_t>(SizeInWords - 1); +} + +void UnwindOpcodeAssembler::AddPersonalityIndexPrefix(size_t Pos, unsigned PI) { + assert(PI < NUM_PERSONALITY_INDEX && "Invalid personality prefix"); + Ops[Pos] = EHT_COMPACT | PI; +} + +void UnwindOpcodeAssembler::EmitFinishOpcodes() { + for (size_t i = (0x4u - (size() & 0x3u)) & 0x3u; i > 0; --i) + Ops.push_back(UNWIND_OPCODE_FINISH); +} + +void UnwindOpcodeAssembler::Finalize() { + if (HasPersonality) { + // Personality specified by .personality directive + Offset = 1; + AddOpcodeSizePrefix(1); + } else { + if (getOpcodeSize() <= 3) { + // __aeabi_unwind_cpp_pr0: [ 0x80 , OP1 , OP2 , OP3 ] + Offset = 1; + PersonalityIndex = AEABI_UNWIND_CPP_PR0; + AddPersonalityIndexPrefix(Offset, PersonalityIndex); + } else { + // __aeabi_unwind_cpp_pr1: [ 0x81 , SIZE , OP1 , OP2 , ... ] + Offset = 0; + PersonalityIndex = AEABI_UNWIND_CPP_PR1; + AddPersonalityIndexPrefix(Offset, PersonalityIndex); + AddOpcodeSizePrefix(1); + } + } + + // Emit the padding finish opcodes if the size() is not multiple of 4. + EmitFinishOpcodes(); + + // Swap the byte order + uint8_t *Ptr = Ops.begin() + Offset; + assert(size() % 4 == 0 && "Final unwind opcodes should align to 4"); + for (size_t i = 0, n = size(); i < n; i += 4) { + std::swap(Ptr[i], Ptr[i + 3]); + std::swap(Ptr[i + 1], Ptr[i + 2]); + } +} diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h new file mode 100644 index 0000000..f6ecaeb --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h @@ -0,0 +1,114 @@ +//===-- ARMUnwindOpAsm.h - ARM Unwind Opcodes Assembler ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the unwind opcode assmebler for ARM exception handling +// table. +// +//===----------------------------------------------------------------------===// + +#ifndef ARM_UNWIND_OP_ASM_H +#define ARM_UNWIND_OP_ASM_H + +#include "ARMUnwindOp.h" + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/DataTypes.h" + +namespace llvm { + +class MCSymbol; + +class UnwindOpcodeAssembler { +private: + llvm::SmallVector<uint8_t, 8> Ops; + + unsigned Offset; + unsigned PersonalityIndex; + bool HasPersonality; + + enum { + // The number of bytes to be preserved for the size and personality index + // prefix of unwind opcodes. + NUM_PRESERVED_PREFIX_BUF = 2 + }; + +public: + UnwindOpcodeAssembler() + : Ops(NUM_PRESERVED_PREFIX_BUF), Offset(NUM_PRESERVED_PREFIX_BUF), + PersonalityIndex(NUM_PERSONALITY_INDEX), HasPersonality(0) { + } + + /// Reset the unwind opcode assembler. + void Reset() { + Ops.resize(NUM_PRESERVED_PREFIX_BUF); + Offset = NUM_PRESERVED_PREFIX_BUF; + PersonalityIndex = NUM_PERSONALITY_INDEX; + HasPersonality = 0; + } + + /// Get the size of the payload (including the size byte) + size_t size() const { + return Ops.size() - Offset; + } + + /// Get the beginning of the payload + const uint8_t *begin() const { + return Ops.begin() + Offset; + } + + /// Get the payload + StringRef data() const { + return StringRef(reinterpret_cast<const char *>(begin()), size()); + } + + /// Set the personality index + void setPersonality(const MCSymbol *Per) { + HasPersonality = 1; + } + + /// Get the personality index + unsigned getPersonalityIndex() const { + return PersonalityIndex; + } + + /// Emit unwind opcodes for .save directives + void EmitRegSave(uint32_t RegSave); + + /// Emit unwind opcodes for .vsave directives + void EmitVFPRegSave(uint32_t VFPRegSave); + + /// Emit unwind opcodes for .setfp directives + void EmitSetFP(uint16_t FPReg); + + /// Emit unwind opcodes to update stack pointer + void EmitSPOffset(int64_t Offset); + + /// Finalize the unwind opcode sequence for EmitBytes() + void Finalize(); + +private: + /// Get the size of the opcodes in bytes. + size_t getOpcodeSize() const { + return Ops.size() - NUM_PRESERVED_PREFIX_BUF; + } + + /// Add the length prefix to the payload + void AddOpcodeSizePrefix(size_t Pos); + + /// Add personality index prefix in some compact format + void AddPersonalityIndexPrefix(size_t Pos, unsigned PersonalityIndex); + + /// Fill the words with finish opcode if it is not aligned + void EmitFinishOpcodes(); +}; + +} // namespace llvm + +#endif // ARM_UNWIND_OP_ASM_H diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp index 2c3388c..1e2a8b0 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -88,7 +88,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { const Thumb1InstrInfo &TII = *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo()); - unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); unsigned NumBytes = MFI->getStackSize(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); @@ -104,8 +104,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; int FramePtrSpillFI = 0; - if (VARegSaveSize) - emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -VARegSaveSize, + if (ArgRegsSaveSize) + emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -ArgRegsSaveSize, MachineInstr::FrameSetup); if (!AFI->hasStackFrame()) { @@ -249,7 +249,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, const Thumb1InstrInfo &TII = *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo()); - unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); int NumBytes = (int)MFI->getStackSize(); const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(); unsigned FramePtr = RegInfo->getFrameRegister(MF); @@ -300,7 +300,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, } } - if (VARegSaveSize) { + if (ArgRegsSaveSize) { // Unlike T2 and ARM mode, the T1 pop instruction cannot restore // to LR, and we can't pop the value directly to the PC since // we need to update the SP after popping the value. Therefore, we @@ -313,7 +313,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) .addReg(ARM::R3, RegState::Define); - emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, VARegSaveSize); + emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)) @@ -376,7 +376,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - bool isVarArg = AFI->getVarArgsRegSaveSize() > 0; + bool isVarArg = AFI->getArgRegsSaveSize() > 0; DebugLoc DL = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP)); AddDefaultPred(MIB); diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp index 67e8ec7..a1b48c2 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/CommandLine.h" @@ -126,25 +127,41 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { + DebugLoc DL; + if (I != MBB.end()) DL = I->getDebugLoc(); + + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), + MachineMemOperand::MOStore, + MFI.getObjectSize(FI), + MFI.getObjectAlignment(FI)); + if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass || RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass || RC == &ARM::GPRnopcRegClass) { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - - MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), - MachineMemOperand::MOStore, - MFI.getObjectSize(FI), - MFI.getObjectAlignment(FI)); AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2STRi12)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); return; } + if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { + // Thumb2 STRD expects its dest-registers to be in rGPR. Not a problem for + // gsub_0, but needs an extra constraint for gsub_1 (which could be sp + // otherwise). + MachineRegisterInfo *MRI = &MF.getRegInfo(); + MRI->constrainRegClass(SrcReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass); + + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2STRDi8)); + AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); + AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); + MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO); + AddDefaultPred(MIB); + return; + } + ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC, TRI); } @@ -153,24 +170,42 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), + MachineMemOperand::MOLoad, + MFI.getObjectSize(FI), + MFI.getObjectAlignment(FI)); + DebugLoc DL; + if (I != MBB.end()) DL = I->getDebugLoc(); + if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass || RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass || RC == &ARM::GPRnopcRegClass) { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - - MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), - MachineMemOperand::MOLoad, - MFI.getObjectSize(FI), - MFI.getObjectAlignment(FI)); AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); return; } + if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { + // Thumb2 LDRD expects its dest-registers to be in rGPR. Not a problem for + // gsub_0, but needs an extra constraint for gsub_1 (which could be sp + // otherwise). + MachineRegisterInfo *MRI = &MF.getRegInfo(); + MRI->constrainRegClass(DestReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass); + + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2LDRDi8)); + AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); + AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); + MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO); + AddDefaultPred(MIB); + + if (TargetRegisterInfo::isPhysicalRegister(DestReg)) + MIB.addReg(DestReg, RegState::ImplicitDefine); + return; + } + ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI); } @@ -514,6 +549,15 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, Offset = -Offset; isSub = true; } + } else if (AddrMode == ARMII::AddrModeT2_i8s4) { + Offset += MI.getOperand(FrameRegIdx + 1).getImm() * 4; + NumBits = 8; + // MCInst operand has already scaled value. + Scale = 1; + if (Offset < 0) { + isSub = true; + Offset = -Offset; + } } else { llvm_unreachable("Unsupported addressing mode!"); } diff --git a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp index d50f5d9..4795aae 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -926,13 +926,11 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { HighLatencyCPSR = false; // Check predecessors for the latest CPSRDef. - bool HasBackEdges = false; for (MachineBasicBlock::pred_iterator I = MBB.pred_begin(), E = MBB.pred_end(); I != E; ++I) { const MBBInfo &PInfo = BlockInfo[(*I)->getNumber()]; if (!PInfo.Visited) { // Since blocks are visited in RPO, this must be a back-edge. - HasBackEdges = true; continue; } if (PInfo.HighLatencyCPSR) { diff --git a/contrib/llvm/lib/Target/Hexagon/Hexagon.h b/contrib/llvm/lib/Target/Hexagon/Hexagon.h index dfbefc8..a9b00a2 100644 --- a/contrib/llvm/lib/Target/Hexagon/Hexagon.h +++ b/contrib/llvm/lib/Target/Hexagon/Hexagon.h @@ -29,26 +29,25 @@ namespace llvm { class HexagonTargetMachine; class raw_ostream; - FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM, + FunctionPass *createHexagonISelDag(const HexagonTargetMachine &TM, CodeGenOpt::Level OptLevel); - FunctionPass *createHexagonDelaySlotFillerPass(TargetMachine &TM); - FunctionPass *createHexagonFPMoverPass(TargetMachine &TM); - FunctionPass *createHexagonRemoveExtendOps(HexagonTargetMachine &TM); - FunctionPass *createHexagonCFGOptimizer(HexagonTargetMachine &TM); - - FunctionPass *createHexagonSplitTFRCondSets(HexagonTargetMachine &TM); - FunctionPass *createHexagonExpandPredSpillCode(HexagonTargetMachine &TM); + FunctionPass *createHexagonDelaySlotFillerPass(const TargetMachine &TM); + FunctionPass *createHexagonFPMoverPass(const TargetMachine &TM); + FunctionPass *createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM); + FunctionPass *createHexagonCFGOptimizer(const HexagonTargetMachine &TM); + FunctionPass *createHexagonSplitTFRCondSets(const HexagonTargetMachine &TM); + FunctionPass *createHexagonExpandPredSpillCode( + const HexagonTargetMachine &TM); FunctionPass *createHexagonHardwareLoops(); FunctionPass *createHexagonPeephole(); FunctionPass *createHexagonFixupHwLoops(); FunctionPass *createHexagonPacketizer(); FunctionPass *createHexagonNewValueJump(); - /* TODO: object output. MCCodeEmitter *createHexagonMCCodeEmitter(const Target &, - TargetMachine &TM, + const TargetMachine &TM, MCContext &Ctx); */ /* TODO: assembler input. diff --git a/contrib/llvm/lib/Target/Hexagon/Hexagon.td b/contrib/llvm/lib/Target/Hexagon/Hexagon.td index 8a5ee40..9b3a643 100644 --- a/contrib/llvm/lib/Target/Hexagon/Hexagon.td +++ b/contrib/llvm/lib/Target/Hexagon/Hexagon.td @@ -84,12 +84,36 @@ def getPredOpcode : InstrMapping { } //===----------------------------------------------------------------------===// +// Generate mapping table to relate predicate-true instructions with their +// predicate-false forms +// +def getFalsePredOpcode : InstrMapping { + let FilterClass = "PredRel"; + let RowFields = ["BaseOpcode", "PNewValue", "isNVStore", "isBrTaken"]; + let ColFields = ["PredSense"]; + let KeyCol = ["true"]; + let ValueCols = [["false"]]; +} + +//===----------------------------------------------------------------------===// +// Generate mapping table to relate predicate-false instructions with their +// predicate-true forms +// +def getTruePredOpcode : InstrMapping { + let FilterClass = "PredRel"; + let RowFields = ["BaseOpcode", "PNewValue", "isNVStore", "isBrTaken"]; + let ColFields = ["PredSense"]; + let KeyCol = ["false"]; + let ValueCols = [["true"]]; +} + +//===----------------------------------------------------------------------===// // Generate mapping table to relate predicated instructions with their .new // format. // def getPredNewOpcode : InstrMapping { let FilterClass = "PredNewRel"; - let RowFields = ["BaseOpcode", "PredSense", "isNVStore"]; + let RowFields = ["BaseOpcode", "PredSense", "isNVStore", "isBrTaken"]; let ColFields = ["PNewValue"]; let KeyCol = [""]; let ValueCols = [["new"]]; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp index d4078ad..8597f11 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp @@ -26,21 +26,27 @@ using namespace llvm; +namespace llvm { + void initializeHexagonCFGOptimizerPass(PassRegistry&); +} + + namespace { class HexagonCFGOptimizer : public MachineFunctionPass { private: - HexagonTargetMachine& QTM; + const HexagonTargetMachine& QTM; const HexagonSubtarget &QST; void InvertAndChangeJumpTarget(MachineInstr*, MachineBasicBlock*); public: static char ID; - HexagonCFGOptimizer(HexagonTargetMachine& TM) : MachineFunctionPass(ID), - QTM(TM), - QST(*TM.getSubtargetImpl()) {} + HexagonCFGOptimizer(const HexagonTargetMachine& TM) + : MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) { + initializeHexagonCFGOptimizerPass(*PassRegistry::getPassRegistry()); + } const char *getPassName() const { return "Hexagon CFG Optimizer"; @@ -52,8 +58,8 @@ private: char HexagonCFGOptimizer::ID = 0; static bool IsConditionalBranch(int Opc) { - return (Opc == Hexagon::JMP_c) || (Opc == Hexagon::JMP_cNot) - || (Opc == Hexagon::JMP_cdnPt) || (Opc == Hexagon::JMP_cdnNotPt); + return (Opc == Hexagon::JMP_t) || (Opc == Hexagon::JMP_f) + || (Opc == Hexagon::JMP_tnew_t) || (Opc == Hexagon::JMP_fnew_t); } @@ -68,20 +74,20 @@ HexagonCFGOptimizer::InvertAndChangeJumpTarget(MachineInstr* MI, const HexagonInstrInfo *QII = QTM.getInstrInfo(); int NewOpcode = 0; switch(MI->getOpcode()) { - case Hexagon::JMP_c: - NewOpcode = Hexagon::JMP_cNot; + case Hexagon::JMP_t: + NewOpcode = Hexagon::JMP_f; break; - case Hexagon::JMP_cNot: - NewOpcode = Hexagon::JMP_c; + case Hexagon::JMP_f: + NewOpcode = Hexagon::JMP_t; break; - case Hexagon::JMP_cdnPt: - NewOpcode = Hexagon::JMP_cdnNotPt; + case Hexagon::JMP_tnew_t: + NewOpcode = Hexagon::JMP_fnew_t; break; - case Hexagon::JMP_cdnNotPt: - NewOpcode = Hexagon::JMP_cdnPt; + case Hexagon::JMP_fnew_t: + NewOpcode = Hexagon::JMP_tnew_t; break; default: @@ -156,8 +162,8 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { // The target of the unconditional branch must be JumpAroundTarget. // TODO: If not, we should not invert the unconditional branch. MachineBasicBlock* CondBranchTarget = NULL; - if ((MI->getOpcode() == Hexagon::JMP_c) || - (MI->getOpcode() == Hexagon::JMP_cNot)) { + if ((MI->getOpcode() == Hexagon::JMP_t) || + (MI->getOpcode() == Hexagon::JMP_f)) { CondBranchTarget = MI->getOperand(1).getMBB(); } @@ -231,6 +237,16 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { // Public Constructor Functions //===----------------------------------------------------------------------===// -FunctionPass *llvm::createHexagonCFGOptimizer(HexagonTargetMachine &TM) { +static void initializePassOnce(PassRegistry &Registry) { + PassInfo *PI = new PassInfo("Hexagon CFG Optimizer", "hexagon-cfg", + &HexagonCFGOptimizer::ID, 0, false, false); + Registry.registerPass(*PI, true); +} + +void llvm::initializeHexagonCFGOptimizerPass(PassRegistry &Registry) { + CALL_ONCE_INITIALIZATION(initializePassOnce) +} + +FunctionPass *llvm::createHexagonCFGOptimizer(const HexagonTargetMachine &TM) { return new HexagonCFGOptimizer(TM); } diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp index 0814421..8a5991f 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp @@ -41,16 +41,24 @@ using namespace llvm; +namespace llvm { + void initializeHexagonExpandPredSpillCodePass(PassRegistry&); +} + + namespace { class HexagonExpandPredSpillCode : public MachineFunctionPass { - HexagonTargetMachine& QTM; + const HexagonTargetMachine& QTM; const HexagonSubtarget &QST; public: static char ID; - HexagonExpandPredSpillCode(HexagonTargetMachine& TM) : - MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {} + HexagonExpandPredSpillCode(const HexagonTargetMachine& TM) : + MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) { + PassRegistry &Registry = *PassRegistry::getPassRegistry(); + initializeHexagonExpandPredSpillCodePass(Registry); + } const char *getPassName() const { return "Hexagon Expand Predicate Spill Code"; @@ -175,6 +183,19 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) { // Public Constructor Functions //===----------------------------------------------------------------------===// -FunctionPass *llvm::createHexagonExpandPredSpillCode(HexagonTargetMachine &TM) { +static void initializePassOnce(PassRegistry &Registry) { + const char *Name = "Hexagon Expand Predicate Spill Code"; + PassInfo *PI = new PassInfo(Name, "hexagon-spill-pred", + &HexagonExpandPredSpillCode::ID, + 0, false, false); + Registry.registerPass(*PI, true); +} + +void llvm::initializeHexagonExpandPredSpillCodePass(PassRegistry &Registry) { + CALL_ONCE_INITIALIZATION(initializePassOnce) +} + +FunctionPass* +llvm::createHexagonExpandPredSpillCode(const HexagonTargetMachine &TM) { return new HexagonExpandPredSpillCode(TM); } diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp index d6a9329..de993ee 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -189,7 +189,7 @@ void HexagonFrameLowering::emitEpilogue(MachineFunction &MF, // Replace 'jumpr r31' instruction with dealloc_return for V4 and higher // versions. - if (STI.hasV4TOps() && MBBI->getOpcode() == Hexagon::JMPR + if (STI.hasV4TOps() && MBBI->getOpcode() == Hexagon::JMPret && !DisableDeallocRet) { // Remove jumpr node. MBB.erase(MBBI); diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp index 1786624..d002788 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -541,12 +541,6 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, case Hexagon::CMPEQrr: Cmp = !Negated ? Comparison::EQ : Comparison::NE; break; - case Hexagon::CMPLTrr: - Cmp = !Negated ? Comparison::LTs : Comparison::GEs; - break; - case Hexagon::CMPLTUrr: - Cmp = !Negated ? Comparison::LTu : Comparison::GEu; - break; case Hexagon::CMPGTUri: case Hexagon::CMPGTUrr: Cmp = !Negated ? Comparison::GTu : Comparison::LEu; @@ -1125,8 +1119,8 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { // The loop ends with either: // - a conditional branch followed by an unconditional branch, or // - a conditional branch to the loop start. - if (LastI->getOpcode() == Hexagon::JMP_c || - LastI->getOpcode() == Hexagon::JMP_cNot) { + if (LastI->getOpcode() == Hexagon::JMP_t || + LastI->getOpcode() == Hexagon::JMP_f) { // Delete one and change/add an uncond. branch to out of the loop. MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB(); LastI = LastMBB->erase(LastI); diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index 8fc9ba1..54ca2c9 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -49,11 +49,11 @@ class HexagonDAGToDAGISel : public SelectionDAGISel { const HexagonSubtarget &Subtarget; // Keep a reference to HexagonTargetMachine. - HexagonTargetMachine& TM; + const HexagonTargetMachine& TM; const HexagonInstrInfo *TII; DenseMap<const GlobalValue *, unsigned> GlobalAddressUseCountMap; public: - explicit HexagonDAGToDAGISel(HexagonTargetMachine &targetmachine, + explicit HexagonDAGToDAGISel(const HexagonTargetMachine &targetmachine, CodeGenOpt::Level OptLevel) : SelectionDAGISel(targetmachine, OptLevel), Subtarget(targetmachine.getSubtarget<HexagonSubtarget>()), @@ -160,6 +160,17 @@ inline SDValue XformU7ToU7M1Imm(signed Imm) { return CurDAG->getTargetConstant(Imm - 1, MVT::i8); } +// XformS8ToS8M1Imm - Return a target constant decremented by 1. +inline SDValue XformSToSM1Imm(signed Imm) { + return CurDAG->getTargetConstant(Imm - 1, MVT::i32); +} + +// XformU8ToU8M1Imm - Return a target constant decremented by 1. +inline SDValue XformUToUM1Imm(unsigned Imm) { + assert((Imm >= 1) && "Cannot decrement unsigned int less than 1"); + return CurDAG->getTargetConstant(Imm - 1, MVT::i32); +} + // Include the pieces autogenerated from the target description. #include "HexagonGenDAGISel.inc" }; @@ -169,7 +180,7 @@ inline SDValue XformU7ToU7M1Imm(signed Imm) { /// createHexagonISelDag - This pass converts a legalized DAG into a /// Hexagon-specific DAG, ready for instruction scheduling. /// -FunctionPass *llvm::createHexagonISelDag(HexagonTargetMachine &TM, +FunctionPass *llvm::createHexagonISelDag(const HexagonTargetMachine &TM, CodeGenOpt::Level OptLevel) { return new HexagonDAGToDAGISel(TM, OptLevel); } @@ -697,7 +708,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, DebugLoc dl) { // Build post increment store. SDNode* Result = CurDAG->getMachineNode(Opcode, dl, MVT::i32, - MVT::Other, Ops, 4); + MVT::Other, Ops); MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = ST->getMemOperand(); cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1); @@ -723,8 +734,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, DebugLoc dl) { // Build regular store. SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); - SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, - 4); + SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); // Build splitted incriment instruction. SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32, Base, @@ -780,7 +790,7 @@ SDNode *HexagonDAGToDAGISel::SelectBaseOffsetStore(StoreSDNode *ST, Value, Chain}; // build indexed store SDNode* Result = CurDAG->getMachineNode(Opcode, dl, - MVT::Other, Ops, 4); + MVT::Other, Ops); MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = ST->getMemOperand(); cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1); @@ -1230,8 +1240,7 @@ SDNode *HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) { } EVT ReturnValueVT = N->getValueType(0); SDNode *Result = CurDAG->getMachineNode(IntrinsicWithPred, dl, - ReturnValueVT, - Ops.data(), Ops.size()); + ReturnValueVT, Ops); ReplaceUses(N, Result); return Result; } diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 15858a9..0e5b8dc 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1002,14 +1002,6 @@ HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { return FrameAddr; } - -SDValue HexagonTargetLowering::LowerMEMBARRIER(SDValue Op, - SelectionDAG& DAG) const { - DebugLoc dl = Op.getDebugLoc(); - return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(0)); -} - - SDValue HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const { DebugLoc dl = Op.getDebugLoc(); @@ -1361,7 +1353,6 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine } - setOperationAction(ISD::BRIND, MVT::Other, Expand); if (EmitJumpTables) { setOperationAction(ISD::BR_JT, MVT::Other, Custom); } else { @@ -1377,7 +1368,6 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine setOperationAction(ISD::BR_CC, MVT::i32, Expand); setOperationAction(ISD::BR_CC, MVT::i64, Expand); - setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); setOperationAction(ISD::FSIN , MVT::f64, Expand); @@ -1444,7 +1434,7 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); - setOperationAction(ISD::EH_RETURN, MVT::Other, Expand); + setOperationAction(ISD::EH_RETURN, MVT::Other, Custom); if (TM.getSubtargetImpl()->isSubtargetV2()) { setExceptionPointerRegister(Hexagon::R20); @@ -1499,6 +1489,7 @@ HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG"; case HexagonISD::BR_JT: return "HexagonISD::BR_JT"; case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN"; + case HexagonISD::EH_RETURN: return "HexagonISD::EH_RETURN"; } } @@ -1520,16 +1511,43 @@ bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { } SDValue +HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { + SDValue Chain = Op.getOperand(0); + SDValue Offset = Op.getOperand(1); + SDValue Handler = Op.getOperand(2); + DebugLoc dl = Op.getDebugLoc(); + + // Mark function as containing a call to EH_RETURN. + HexagonMachineFunctionInfo *FuncInfo = + DAG.getMachineFunction().getInfo<HexagonMachineFunctionInfo>(); + FuncInfo->setHasEHReturn(); + + unsigned OffsetReg = Hexagon::R28; + + SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), + DAG.getRegister(Hexagon::R30, getPointerTy()), + DAG.getIntPtrConstant(4)); + Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(), + false, false, 0); + Chain = DAG.getCopyToReg(Chain, dl, OffsetReg, Offset); + + // Not needed we already use it as explict input to EH_RETURN. + // MF.getRegInfo().addLiveOut(OffsetReg); + + return DAG.getNode(HexagonISD::EH_RETURN, dl, MVT::Other, Chain); +} + +SDValue HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Should not custom lower this!"); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); + case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); // Frame & Return address. Currently unimplemented. case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::GlobalTLSAddress: llvm_unreachable("TLS not implemented for Hexagon."); - case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG); case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG); case ISD::GlobalAddress: return LowerGLOBALADDRESS(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h index 3279cc6..bb1acc1 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -62,7 +62,8 @@ namespace llvm { WrapperShuffEH, WrapperShuffOB, WrapperShuffOH, - TC_RETURN + TC_RETURN, + EH_RETURN }; } @@ -101,6 +102,7 @@ namespace llvm { SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, @@ -122,7 +124,6 @@ namespace llvm { SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const; SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index 60b12ac..f114170 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -23,7 +23,9 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" #define GET_INSTRINFO_CTOR #define GET_INSTRMAP_INFO #include "HexagonGenInstrInfo.inc" @@ -118,16 +120,16 @@ HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, DebugLoc DL) const{ int BOpc = Hexagon::JMP; - int BccOpc = Hexagon::JMP_c; + int BccOpc = Hexagon::JMP_t; assert(TBB && "InsertBranch must not be told to insert a fallthrough"); int regPos = 0; // Check if ReverseBranchCondition has asked to reverse this branch // If we want to reverse the branch an odd number of times, we want - // JMP_cNot. + // JMP_f. if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) { - BccOpc = Hexagon::JMP_cNot; + BccOpc = Hexagon::JMP_f; regPos = 1; } @@ -174,8 +176,8 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, FBB = NULL; // If the block has no terminators, it just falls into the block after it. - MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) + MachineBasicBlock::instr_iterator I = MBB.instr_end(); + if (I == MBB.instr_begin()) return false; // A basic block may looks like this: @@ -194,13 +196,24 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, --I; if (I->isEHLabel()) return true; - } while (I != MBB.begin()); + } while (I != MBB.instr_begin()); - I = MBB.end(); + I = MBB.instr_end(); --I; while (I->isDebugValue()) { - if (I == MBB.begin()) + if (I == MBB.instr_begin()) + return false; + --I; + } + + // Delete the JMP if it's equivalent to a fall-through. + if (AllowModify && I->getOpcode() == Hexagon::JMP && + MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { + DEBUG(dbgs()<< "\nErasing the jump to successor block\n";); + I->eraseFromParent(); + I = MBB.instr_end(); + if (I == MBB.instr_begin()) return false; --I; } @@ -209,23 +222,42 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, // Get the last instruction in the block. MachineInstr *LastInst = I; + MachineInstr *SecondLastInst = NULL; + // Find one more terminator if present. + do { + if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(I)) { + if (!SecondLastInst) + SecondLastInst = I; + else + // This is a third branch. + return true; + } + if (I == MBB.instr_begin()) + break; + --I; + } while(I); + + int LastOpcode = LastInst->getOpcode(); + + bool LastOpcodeHasJMP_c = PredOpcodeHasJMP_c(LastOpcode); + bool LastOpcodeHasNot = PredOpcodeHasNot(LastOpcode); // If there is only one terminator instruction, process it. - if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { - if (LastInst->getOpcode() == Hexagon::JMP) { + if (LastInst && !SecondLastInst) { + if (LastOpcode == Hexagon::JMP) { TBB = LastInst->getOperand(0).getMBB(); return false; } - if (LastInst->getOpcode() == Hexagon::JMP_c) { - // Block ends with fall-through true condbranch. - TBB = LastInst->getOperand(1).getMBB(); + if (LastOpcode == Hexagon::ENDLOOP0) { + TBB = LastInst->getOperand(0).getMBB(); Cond.push_back(LastInst->getOperand(0)); return false; } - if (LastInst->getOpcode() == Hexagon::JMP_cNot) { - // Block ends with fall-through false condbranch. + if (LastOpcodeHasJMP_c) { TBB = LastInst->getOperand(1).getMBB(); - Cond.push_back(MachineOperand::CreateImm(0)); + if (LastOpcodeHasNot) { + Cond.push_back(MachineOperand::CreateImm(0)); + } Cond.push_back(LastInst->getOperand(0)); return false; } @@ -233,29 +265,14 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, return true; } - // Get the instruction before it if it's a terminator. - MachineInstr *SecondLastInst = I; - - // If there are three terminators, we don't know what sort of block this is. - if (SecondLastInst && I != MBB.begin() && - isUnpredicatedTerminator(--I)) - return true; + int SecLastOpcode = SecondLastInst->getOpcode(); - // If the block ends with Hexagon::BRCOND and Hexagon:JMP, handle it. - if (((SecondLastInst->getOpcode() == Hexagon::BRCOND) || - (SecondLastInst->getOpcode() == Hexagon::JMP_c)) && - LastInst->getOpcode() == Hexagon::JMP) { + bool SecLastOpcodeHasJMP_c = PredOpcodeHasJMP_c(SecLastOpcode); + bool SecLastOpcodeHasNot = PredOpcodeHasNot(SecLastOpcode); + if (SecLastOpcodeHasJMP_c && (LastOpcode == Hexagon::JMP)) { TBB = SecondLastInst->getOperand(1).getMBB(); - Cond.push_back(SecondLastInst->getOperand(0)); - FBB = LastInst->getOperand(0).getMBB(); - return false; - } - - // If the block ends with Hexagon::JMP_cNot and Hexagon:JMP, handle it. - if ((SecondLastInst->getOpcode() == Hexagon::JMP_cNot) && - LastInst->getOpcode() == Hexagon::JMP) { - TBB = SecondLastInst->getOperand(1).getMBB(); - Cond.push_back(MachineOperand::CreateImm(0)); + if (SecLastOpcodeHasNot) + Cond.push_back(MachineOperand::CreateImm(0)); Cond.push_back(SecondLastInst->getOperand(0)); FBB = LastInst->getOperand(0).getMBB(); return false; @@ -263,8 +280,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, // If the block ends with two Hexagon:JMPs, handle it. The second one is not // executed, so remove it. - if (SecondLastInst->getOpcode() == Hexagon::JMP && - LastInst->getOpcode() == Hexagon::JMP) { + if (SecLastOpcode == Hexagon::JMP && LastOpcode == Hexagon::JMP) { TBB = SecondLastInst->getOperand(0).getMBB(); I = LastInst; if (AllowModify) @@ -272,6 +288,15 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, return false; } + // If the block ends with an ENDLOOP, and JMP, handle it. + if (SecLastOpcode == Hexagon::ENDLOOP0 && + LastOpcode == Hexagon::JMP) { + TBB = SecondLastInst->getOperand(0).getMBB(); + Cond.push_back(SecondLastInst->getOperand(0)); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } + // Otherwise, can't handle this. return true; } @@ -279,8 +304,8 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { int BOpc = Hexagon::JMP; - int BccOpc = Hexagon::JMP_c; - int BccOpcNot = Hexagon::JMP_cNot; + int BccOpc = Hexagon::JMP_t; + int BccOpcNot = Hexagon::JMP_f; MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) return 0; @@ -325,8 +350,6 @@ bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI, case Hexagon::CMPGTUrr: case Hexagon::CMPGTri: case Hexagon::CMPGTrr: - case Hexagon::CMPLTUrr: - case Hexagon::CMPLTrr: SrcReg = MI->getOperand(1).getReg(); Mask = ~0; break; @@ -366,8 +389,6 @@ bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI, case Hexagon::CMPhEQrr_xor_V4: case Hexagon::CMPhGTUrr_V4: case Hexagon::CMPhGTrr_shl_V4: - case Hexagon::CMPLTUrr: - case Hexagon::CMPLTrr: SrcReg2 = MI->getOperand(2).getReg(); return true; @@ -605,110 +626,8 @@ bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const { return false; } -bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const { - switch (MI->getOpcode()) { - default: return false; - // JMP_EQri - case Hexagon::JMP_EQriPt_nv_V4: - case Hexagon::JMP_EQriPnt_nv_V4: - case Hexagon::JMP_EQriNotPt_nv_V4: - case Hexagon::JMP_EQriNotPnt_nv_V4: - case Hexagon::JMP_EQriPt_ie_nv_V4: - case Hexagon::JMP_EQriPnt_ie_nv_V4: - case Hexagon::JMP_EQriNotPt_ie_nv_V4: - case Hexagon::JMP_EQriNotPnt_ie_nv_V4: - - // JMP_EQri - with -1 - case Hexagon::JMP_EQriPtneg_nv_V4: - case Hexagon::JMP_EQriPntneg_nv_V4: - case Hexagon::JMP_EQriNotPtneg_nv_V4: - case Hexagon::JMP_EQriNotPntneg_nv_V4: - case Hexagon::JMP_EQriPtneg_ie_nv_V4: - case Hexagon::JMP_EQriPntneg_ie_nv_V4: - case Hexagon::JMP_EQriNotPtneg_ie_nv_V4: - case Hexagon::JMP_EQriNotPntneg_ie_nv_V4: - - // JMP_EQrr - case Hexagon::JMP_EQrrPt_nv_V4: - case Hexagon::JMP_EQrrPnt_nv_V4: - case Hexagon::JMP_EQrrNotPt_nv_V4: - case Hexagon::JMP_EQrrNotPnt_nv_V4: - case Hexagon::JMP_EQrrPt_ie_nv_V4: - case Hexagon::JMP_EQrrPnt_ie_nv_V4: - case Hexagon::JMP_EQrrNotPt_ie_nv_V4: - case Hexagon::JMP_EQrrNotPnt_ie_nv_V4: - - // JMP_GTri - case Hexagon::JMP_GTriPt_nv_V4: - case Hexagon::JMP_GTriPnt_nv_V4: - case Hexagon::JMP_GTriNotPt_nv_V4: - case Hexagon::JMP_GTriNotPnt_nv_V4: - case Hexagon::JMP_GTriPt_ie_nv_V4: - case Hexagon::JMP_GTriPnt_ie_nv_V4: - case Hexagon::JMP_GTriNotPt_ie_nv_V4: - case Hexagon::JMP_GTriNotPnt_ie_nv_V4: - - // JMP_GTri - with -1 - case Hexagon::JMP_GTriPtneg_nv_V4: - case Hexagon::JMP_GTriPntneg_nv_V4: - case Hexagon::JMP_GTriNotPtneg_nv_V4: - case Hexagon::JMP_GTriNotPntneg_nv_V4: - case Hexagon::JMP_GTriPtneg_ie_nv_V4: - case Hexagon::JMP_GTriPntneg_ie_nv_V4: - case Hexagon::JMP_GTriNotPtneg_ie_nv_V4: - case Hexagon::JMP_GTriNotPntneg_ie_nv_V4: - - // JMP_GTrr - case Hexagon::JMP_GTrrPt_nv_V4: - case Hexagon::JMP_GTrrPnt_nv_V4: - case Hexagon::JMP_GTrrNotPt_nv_V4: - case Hexagon::JMP_GTrrNotPnt_nv_V4: - case Hexagon::JMP_GTrrPt_ie_nv_V4: - case Hexagon::JMP_GTrrPnt_ie_nv_V4: - case Hexagon::JMP_GTrrNotPt_ie_nv_V4: - case Hexagon::JMP_GTrrNotPnt_ie_nv_V4: - - // JMP_GTrrdn - case Hexagon::JMP_GTrrdnPt_nv_V4: - case Hexagon::JMP_GTrrdnPnt_nv_V4: - case Hexagon::JMP_GTrrdnNotPt_nv_V4: - case Hexagon::JMP_GTrrdnNotPnt_nv_V4: - case Hexagon::JMP_GTrrdnPt_ie_nv_V4: - case Hexagon::JMP_GTrrdnPnt_ie_nv_V4: - case Hexagon::JMP_GTrrdnNotPt_ie_nv_V4: - case Hexagon::JMP_GTrrdnNotPnt_ie_nv_V4: - - // JMP_GTUri - case Hexagon::JMP_GTUriPt_nv_V4: - case Hexagon::JMP_GTUriPnt_nv_V4: - case Hexagon::JMP_GTUriNotPt_nv_V4: - case Hexagon::JMP_GTUriNotPnt_nv_V4: - case Hexagon::JMP_GTUriPt_ie_nv_V4: - case Hexagon::JMP_GTUriPnt_ie_nv_V4: - case Hexagon::JMP_GTUriNotPt_ie_nv_V4: - case Hexagon::JMP_GTUriNotPnt_ie_nv_V4: - - // JMP_GTUrr - case Hexagon::JMP_GTUrrPt_nv_V4: - case Hexagon::JMP_GTUrrPnt_nv_V4: - case Hexagon::JMP_GTUrrNotPt_nv_V4: - case Hexagon::JMP_GTUrrNotPnt_nv_V4: - case Hexagon::JMP_GTUrrPt_ie_nv_V4: - case Hexagon::JMP_GTUrrPnt_ie_nv_V4: - case Hexagon::JMP_GTUrrNotPt_ie_nv_V4: - case Hexagon::JMP_GTUrrNotPnt_ie_nv_V4: - - // JMP_GTUrrdn - case Hexagon::JMP_GTUrrdnPt_nv_V4: - case Hexagon::JMP_GTUrrdnPnt_nv_V4: - case Hexagon::JMP_GTUrrdnNotPt_nv_V4: - case Hexagon::JMP_GTUrrdnNotPnt_nv_V4: - case Hexagon::JMP_GTUrrdnPt_ie_nv_V4: - case Hexagon::JMP_GTUrrdnPnt_ie_nv_V4: - case Hexagon::JMP_GTUrrdnNotPt_ie_nv_V4: - case Hexagon::JMP_GTUrrdnNotPnt_ie_nv_V4: - return true; - } +bool HexagonInstrInfo::isBranch (const MachineInstr *MI) const { + return MI->getDesc().isBranch(); } bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const { @@ -746,11 +665,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const { case Hexagon::STrib_abs_cdnPt_nv_V4: case Hexagon::STrib_abs_cNotPt_nv_V4: case Hexagon::STrib_abs_cdnNotPt_nv_V4: - case Hexagon::STrib_imm_abs_nv_V4: - case Hexagon::STrib_imm_abs_cPt_nv_V4: - case Hexagon::STrib_imm_abs_cdnPt_nv_V4: - case Hexagon::STrib_imm_abs_cNotPt_nv_V4: - case Hexagon::STrib_imm_abs_cdnNotPt_nv_V4: // Store Halfword case Hexagon::STrih_nv_V4: @@ -784,11 +698,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const { case Hexagon::STrih_abs_cdnPt_nv_V4: case Hexagon::STrih_abs_cNotPt_nv_V4: case Hexagon::STrih_abs_cdnNotPt_nv_V4: - case Hexagon::STrih_imm_abs_nv_V4: - case Hexagon::STrih_imm_abs_cPt_nv_V4: - case Hexagon::STrih_imm_abs_cdnPt_nv_V4: - case Hexagon::STrih_imm_abs_cNotPt_nv_V4: - case Hexagon::STrih_imm_abs_cdnNotPt_nv_V4: // Store Word case Hexagon::STriw_nv_V4: @@ -822,11 +731,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const { case Hexagon::STriw_abs_cdnPt_nv_V4: case Hexagon::STriw_abs_cNotPt_nv_V4: case Hexagon::STriw_abs_cdnNotPt_nv_V4: - case Hexagon::STriw_imm_abs_nv_V4: - case Hexagon::STriw_imm_abs_cPt_nv_V4: - case Hexagon::STriw_imm_abs_cdnPt_nv_V4: - case Hexagon::STriw_imm_abs_cNotPt_nv_V4: - case Hexagon::STriw_imm_abs_cdnNotPt_nv_V4: return true; } } @@ -1003,9 +907,6 @@ bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const { case Hexagon::ZXTB: case Hexagon::ZXTH: return Subtarget.hasV4TOps(); - - case Hexagon::JMPR: - return false; } return true; @@ -1030,6 +931,12 @@ bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const { // cNotPt ---> cNotPt_nv // cPt ---> cPt_nv unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const { + int InvPredOpcode; + InvPredOpcode = isPredicatedTrue(Opc) ? Hexagon::getFalsePredOpcode(Opc) + : Hexagon::getTruePredOpcode(Opc); + if (InvPredOpcode >= 0) // Valid instruction with the inverted predicate. + return InvPredOpcode; + switch(Opc) { default: llvm_unreachable("Unexpected predicated instruction"); case Hexagon::TFR_cPt: @@ -1042,10 +949,10 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const { case Hexagon::TFRI_cNotPt: return Hexagon::TFRI_cPt; - case Hexagon::JMP_c: - return Hexagon::JMP_cNot; - case Hexagon::JMP_cNot: - return Hexagon::JMP_c; + case Hexagon::JMP_t: + return Hexagon::JMP_f; + case Hexagon::JMP_f: + return Hexagon::JMP_t; case Hexagon::ADD_ri_cPt: return Hexagon::ADD_ri_cNotPt; @@ -1113,10 +1020,10 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const { return Hexagon::ZXTH_cPt_V4; - case Hexagon::JMPR_cPt: - return Hexagon::JMPR_cNotPt; - case Hexagon::JMPR_cNotPt: - return Hexagon::JMPR_cPt; + case Hexagon::JMPR_t: + return Hexagon::JMPR_f; + case Hexagon::JMPR_f: + return Hexagon::JMPR_t; // V4 indexed+scaled load. case Hexagon::LDrid_indexed_shl_cPt_V4: @@ -1362,117 +1269,6 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const { return Hexagon::DEALLOC_RET_cNotPt_V4; case Hexagon::DEALLOC_RET_cNotPt_V4: return Hexagon::DEALLOC_RET_cPt_V4; - - // New Value Jump. - // JMPEQ_ri - with -1. - case Hexagon::JMP_EQriPtneg_nv_V4: - return Hexagon::JMP_EQriNotPtneg_nv_V4; - case Hexagon::JMP_EQriNotPtneg_nv_V4: - return Hexagon::JMP_EQriPtneg_nv_V4; - - case Hexagon::JMP_EQriPntneg_nv_V4: - return Hexagon::JMP_EQriNotPntneg_nv_V4; - case Hexagon::JMP_EQriNotPntneg_nv_V4: - return Hexagon::JMP_EQriPntneg_nv_V4; - - // JMPEQ_ri. - case Hexagon::JMP_EQriPt_nv_V4: - return Hexagon::JMP_EQriNotPt_nv_V4; - case Hexagon::JMP_EQriNotPt_nv_V4: - return Hexagon::JMP_EQriPt_nv_V4; - - case Hexagon::JMP_EQriPnt_nv_V4: - return Hexagon::JMP_EQriNotPnt_nv_V4; - case Hexagon::JMP_EQriNotPnt_nv_V4: - return Hexagon::JMP_EQriPnt_nv_V4; - - // JMPEQ_rr. - case Hexagon::JMP_EQrrPt_nv_V4: - return Hexagon::JMP_EQrrNotPt_nv_V4; - case Hexagon::JMP_EQrrNotPt_nv_V4: - return Hexagon::JMP_EQrrPt_nv_V4; - - case Hexagon::JMP_EQrrPnt_nv_V4: - return Hexagon::JMP_EQrrNotPnt_nv_V4; - case Hexagon::JMP_EQrrNotPnt_nv_V4: - return Hexagon::JMP_EQrrPnt_nv_V4; - - // JMPGT_ri - with -1. - case Hexagon::JMP_GTriPtneg_nv_V4: - return Hexagon::JMP_GTriNotPtneg_nv_V4; - case Hexagon::JMP_GTriNotPtneg_nv_V4: - return Hexagon::JMP_GTriPtneg_nv_V4; - - case Hexagon::JMP_GTriPntneg_nv_V4: - return Hexagon::JMP_GTriNotPntneg_nv_V4; - case Hexagon::JMP_GTriNotPntneg_nv_V4: - return Hexagon::JMP_GTriPntneg_nv_V4; - - // JMPGT_ri. - case Hexagon::JMP_GTriPt_nv_V4: - return Hexagon::JMP_GTriNotPt_nv_V4; - case Hexagon::JMP_GTriNotPt_nv_V4: - return Hexagon::JMP_GTriPt_nv_V4; - - case Hexagon::JMP_GTriPnt_nv_V4: - return Hexagon::JMP_GTriNotPnt_nv_V4; - case Hexagon::JMP_GTriNotPnt_nv_V4: - return Hexagon::JMP_GTriPnt_nv_V4; - - // JMPGT_rr. - case Hexagon::JMP_GTrrPt_nv_V4: - return Hexagon::JMP_GTrrNotPt_nv_V4; - case Hexagon::JMP_GTrrNotPt_nv_V4: - return Hexagon::JMP_GTrrPt_nv_V4; - - case Hexagon::JMP_GTrrPnt_nv_V4: - return Hexagon::JMP_GTrrNotPnt_nv_V4; - case Hexagon::JMP_GTrrNotPnt_nv_V4: - return Hexagon::JMP_GTrrPnt_nv_V4; - - // JMPGT_rrdn. - case Hexagon::JMP_GTrrdnPt_nv_V4: - return Hexagon::JMP_GTrrdnNotPt_nv_V4; - case Hexagon::JMP_GTrrdnNotPt_nv_V4: - return Hexagon::JMP_GTrrdnPt_nv_V4; - - case Hexagon::JMP_GTrrdnPnt_nv_V4: - return Hexagon::JMP_GTrrdnNotPnt_nv_V4; - case Hexagon::JMP_GTrrdnNotPnt_nv_V4: - return Hexagon::JMP_GTrrdnPnt_nv_V4; - - // JMPGTU_ri. - case Hexagon::JMP_GTUriPt_nv_V4: - return Hexagon::JMP_GTUriNotPt_nv_V4; - case Hexagon::JMP_GTUriNotPt_nv_V4: - return Hexagon::JMP_GTUriPt_nv_V4; - - case Hexagon::JMP_GTUriPnt_nv_V4: - return Hexagon::JMP_GTUriNotPnt_nv_V4; - case Hexagon::JMP_GTUriNotPnt_nv_V4: - return Hexagon::JMP_GTUriPnt_nv_V4; - - // JMPGTU_rr. - case Hexagon::JMP_GTUrrPt_nv_V4: - return Hexagon::JMP_GTUrrNotPt_nv_V4; - case Hexagon::JMP_GTUrrNotPt_nv_V4: - return Hexagon::JMP_GTUrrPt_nv_V4; - - case Hexagon::JMP_GTUrrPnt_nv_V4: - return Hexagon::JMP_GTUrrNotPnt_nv_V4; - case Hexagon::JMP_GTUrrNotPnt_nv_V4: - return Hexagon::JMP_GTUrrPnt_nv_V4; - - // JMPGTU_rrdn. - case Hexagon::JMP_GTUrrdnPt_nv_V4: - return Hexagon::JMP_GTUrrdnNotPt_nv_V4; - case Hexagon::JMP_GTUrrdnNotPt_nv_V4: - return Hexagon::JMP_GTUrrdnPt_nv_V4; - - case Hexagon::JMP_GTUrrdnPnt_nv_V4: - return Hexagon::JMP_GTUrrdnNotPnt_nv_V4; - case Hexagon::JMP_GTUrrdnNotPnt_nv_V4: - return Hexagon::JMP_GTUrrdnPnt_nv_V4; } } @@ -1499,14 +1295,9 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const { return !invertPredicate ? Hexagon::TFRI_cPt : Hexagon::TFRI_cNotPt; case Hexagon::JMP: - return !invertPredicate ? Hexagon::JMP_c : - Hexagon::JMP_cNot; - case Hexagon::JMP_EQrrPt_nv_V4: - return !invertPredicate ? Hexagon::JMP_EQrrPt_nv_V4 : - Hexagon::JMP_EQrrNotPt_nv_V4; - case Hexagon::JMP_EQriPt_nv_V4: - return !invertPredicate ? Hexagon::JMP_EQriPt_nv_V4 : - Hexagon::JMP_EQriNotPt_nv_V4; + return !invertPredicate ? Hexagon::JMP_t : + Hexagon::JMP_f; + case Hexagon::COMBINE_rr: return !invertPredicate ? Hexagon::COMBINE_rr_cPt : Hexagon::COMBINE_rr_cNotPt; @@ -1530,8 +1321,8 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const { Hexagon::ZXTH_cNotPt_V4; case Hexagon::JMPR: - return !invertPredicate ? Hexagon::JMPR_cPt : - Hexagon::JMPR_cNotPt; + return !invertPredicate ? Hexagon::JMPR_t : + Hexagon::JMPR_f; // V4 indexed+scaled load. case Hexagon::LDrid_indexed_shl_V4: @@ -1830,11 +1621,15 @@ PredicateInstruction(MachineInstr *MI, // It is better to have an assert here to check this. But I don't know how // to write this assert because findFirstPredOperandIdx() would return -1 if (oper < -1) oper = -1; + MI->getOperand(oper+1).ChangeToRegister(PredMO.getReg(), PredMO.isDef(), - PredMO.isImplicit(), PredMO.isKill(), + PredMO.isImplicit(), false, PredMO.isDead(), PredMO.isUndef(), PredMO.isDebug()); + MachineRegisterInfo &RegInfo = MI->getParent()->getParent()->getRegInfo(); + RegInfo.clearKillFlags(PredMO.getReg()); + if (hasGAOpnd) { unsigned int i; @@ -1883,13 +1678,41 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB, return true; } - +// Returns true if an instruction is predicated irrespective of the predicate +// sense. For example, all of the following will return true. +// if (p0) R1 = add(R2, R3) +// if (!p0) R1 = add(R2, R3) +// if (p0.new) R1 = add(R2, R3) +// if (!p0.new) R1 = add(R2, R3) bool HexagonInstrInfo::isPredicated(const MachineInstr *MI) const { const uint64_t F = MI->getDesc().TSFlags; return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask); } +bool HexagonInstrInfo::isPredicated(unsigned Opcode) const { + const uint64_t F = get(Opcode).TSFlags; + + return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask); +} + +bool HexagonInstrInfo::isPredicatedTrue(const MachineInstr *MI) const { + const uint64_t F = MI->getDesc().TSFlags; + + assert(isPredicated(MI)); + return (!((F >> HexagonII::PredicatedFalsePos) & + HexagonII::PredicatedFalseMask)); +} + +bool HexagonInstrInfo::isPredicatedTrue(unsigned Opcode) const { + const uint64_t F = get(Opcode).TSFlags; + + // Make sure that the instruction is predicated. + assert((F>> HexagonII::PredicatedPos) & HexagonII::PredicatedMask); + return (!((F >> HexagonII::PredicatedFalsePos) & + HexagonII::PredicatedFalseMask)); +} + bool HexagonInstrInfo::isPredicatedNew(const MachineInstr *MI) const { const uint64_t F = MI->getDesc().TSFlags; @@ -1897,6 +1720,13 @@ bool HexagonInstrInfo::isPredicatedNew(const MachineInstr *MI) const { return ((F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask); } +bool HexagonInstrInfo::isPredicatedNew(unsigned Opcode) const { + const uint64_t F = get(Opcode).TSFlags; + + assert(isPredicated(Opcode)); + return ((F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask); +} + bool HexagonInstrInfo::DefinesPredicate(MachineInstr *MI, std::vector<MachineOperand> &Pred) const { @@ -2129,14 +1959,10 @@ bool HexagonInstrInfo::isNewValueJumpCandidate(const MachineInstr *MI) const { default: return false; case Hexagon::CMPEQrr: case Hexagon::CMPEQri: - case Hexagon::CMPLTrr: case Hexagon::CMPGTrr: case Hexagon::CMPGTri: - case Hexagon::CMPLTUrr: case Hexagon::CMPGTUrr: case Hexagon::CMPGTUri: - case Hexagon::CMPGEri: - case Hexagon::CMPGEUri: return true; } } @@ -2369,6 +2195,18 @@ isConditionalStore (const MachineInstr* MI) const { } } + +bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const { + if (isNewValue(MI) && isBranch(MI)) + return true; + return false; +} + +bool HexagonInstrInfo::isNewValue(const MachineInstr* MI) const { + const uint64_t F = MI->getDesc().TSFlags; + return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask); +} + // Returns true, if any one of the operands is a dot new // insn, whether it is predicated dot new or register dot new. bool HexagonInstrInfo::isDotNewInst (const MachineInstr* MI) const { @@ -2470,6 +2308,34 @@ bool HexagonInstrInfo::isConstExtended(MachineInstr *MI) const { return (ImmValue < MinValue || ImmValue > MaxValue); } +// Returns the opcode to use when converting MI, which is a conditional jump, +// into a conditional instruction which uses the .new value of the predicate. +// We also use branch probabilities to add a hint to the jump. +int +HexagonInstrInfo::getDotNewPredJumpOp(MachineInstr *MI, + const + MachineBranchProbabilityInfo *MBPI) const { + + // We assume that block can have at most two successors. + bool taken = false; + MachineBasicBlock *Src = MI->getParent(); + MachineOperand *BrTarget = &MI->getOperand(1); + MachineBasicBlock *Dst = BrTarget->getMBB(); + + const BranchProbability Prediction = MBPI->getEdgeProbability(Src, Dst); + if (Prediction >= BranchProbability(1,2)) + taken = true; + + switch (MI->getOpcode()) { + case Hexagon::JMP_t: + return taken ? Hexagon::JMP_tnew_t : Hexagon::JMP_tnew_nt; + case Hexagon::JMP_f: + return taken ? Hexagon::JMP_fnew_t : Hexagon::JMP_fnew_nt; + + default: + llvm_unreachable("Unexpected jump instruction."); + } +} // Returns true if a particular operand is extendable for an instruction. bool HexagonInstrInfo::isOperandExtended(const MachineInstr *MI, unsigned short OperandNum) const { @@ -2574,3 +2440,18 @@ short HexagonInstrInfo::getNonExtOpcode (const MachineInstr *MI) const { } return -1; } + +bool HexagonInstrInfo::PredOpcodeHasJMP_c(Opcode_t Opcode) const { + return (Opcode == Hexagon::JMP_t) || + (Opcode == Hexagon::JMP_f) || + (Opcode == Hexagon::JMP_tnew_t) || + (Opcode == Hexagon::JMP_fnew_t) || + (Opcode == Hexagon::JMP_tnew_nt) || + (Opcode == Hexagon::JMP_fnew_nt); +} + +bool HexagonInstrInfo::PredOpcodeHasNot(Opcode_t Opcode) const { + return (Opcode == Hexagon::JMP_f) || + (Opcode == Hexagon::JMP_fnew_t) || + (Opcode == Hexagon::JMP_fnew_nt); +} diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h index 5df13a8..b721da4 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h @@ -16,9 +16,9 @@ #include "HexagonRegisterInfo.h" #include "MCTargetDesc/HexagonBaseInfo.h" -#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" - +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #define GET_INSTRINFO_HEADER #include "HexagonGenInstrInfo.inc" @@ -28,6 +28,8 @@ namespace llvm { class HexagonInstrInfo : public HexagonGenInstrInfo { const HexagonRegisterInfo RI; const HexagonSubtarget& Subtarget; + typedef unsigned Opcode_t; + public: explicit HexagonInstrInfo(HexagonSubtarget &ST); @@ -111,6 +113,7 @@ public: unsigned createVR(MachineFunction* MF, MVT VT) const; + virtual bool isBranch(const MachineInstr *MI) const; virtual bool isPredicable(MachineInstr *MI) const; virtual bool PredicateInstruction(MachineInstr *MI, @@ -127,7 +130,11 @@ public: const BranchProbability &Probability) const; virtual bool isPredicated(const MachineInstr *MI) const; + virtual bool isPredicated(unsigned Opcode) const; + virtual bool isPredicatedTrue(const MachineInstr *MI) const; + virtual bool isPredicatedTrue(unsigned Opcode) const; virtual bool isPredicatedNew(const MachineInstr *MI) const; + virtual bool isPredicatedNew(unsigned Opcode) const; virtual bool DefinesPredicate(MachineInstr *MI, std::vector<MachineOperand> &Pred) const; virtual bool @@ -176,6 +183,7 @@ public: bool isConditionalLoad (const MachineInstr* MI) const; bool isConditionalStore(const MachineInstr* MI) const; bool isNewValueInst(const MachineInstr* MI) const; + bool isNewValue(const MachineInstr* MI) const; bool isDotNewInst(const MachineInstr* MI) const; bool isDeallocRet(const MachineInstr *MI) const; unsigned getInvertedPredicatedOpcode(const int Opc) const; @@ -189,6 +197,8 @@ public: void immediateExtend(MachineInstr *MI) const; bool isConstExtended(MachineInstr *MI) const; + int getDotNewPredJumpOp(MachineInstr *MI, + const MachineBranchProbabilityInfo *MBPI) const; unsigned getAddrMode(const MachineInstr* MI) const; bool isOperandExtended(const MachineInstr *MI, unsigned short OperandNum) const; @@ -197,6 +207,9 @@ public: int getMaxValue(const MachineInstr *MI) const; bool NonExtEquivalentExists (const MachineInstr *MI) const; short getNonExtOpcode(const MachineInstr *MI) const; + bool PredOpcodeHasJMP_c(Opcode_t Opcode) const; + bool PredOpcodeHasNot(Opcode_t Opcode) const; + private: int getMatchingCondBranchOpcode(int Opc, bool sense) const; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td index 74dc0ca..2a4b17b 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td @@ -14,6 +14,8 @@ include "HexagonInstrFormats.td" include "HexagonOperands.td" +//===----------------------------------------------------------------------===// + // Multi-class for logical operators. multiclass ALU32_rr_ri<string OpcStr, SDNode OpNode> { def rr : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), @@ -34,12 +36,6 @@ multiclass CMP64_rr<string OpcStr, PatFrag OpNode> { [(set (i1 PredRegs:$dst), (OpNode (i64 DoubleRegs:$b), (i64 DoubleRegs:$c)))]>; } -multiclass CMP32_rr<string OpcStr, PatFrag OpNode> { - def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c), - !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")), - [(set (i1 PredRegs:$dst), - (OpNode (i32 IntRegs:$b), (i32 IntRegs:$c)))]>; -} multiclass CMP32_rr_ri_s10<string OpcStr, string CextOp, PatFrag OpNode> { let CextOpcode = CextOp in { @@ -75,14 +71,6 @@ multiclass CMP32_rr_ri_u9<string OpcStr, string CextOp, PatFrag OpNode> { } } -multiclass CMP32_ri_u8<string OpcStr, PatFrag OpNode> { -let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 8 in - def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u8Ext:$c), - !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")), - [(set (i1 PredRegs:$dst), (OpNode (i32 IntRegs:$b), - u8ExtPred:$c))]>; -} - multiclass CMP32_ri_s8<string OpcStr, PatFrag OpNode> { let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8 in def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s8Ext:$c), @@ -95,22 +83,30 @@ let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8 in //===----------------------------------------------------------------------===// // ALU32/ALU (Instructions with register-register form) //===----------------------------------------------------------------------===// -multiclass ALU32_Pbase<string mnemonic, bit isNot, - bit isPredNew> { +def SDTHexagonI64I32I32 : SDTypeProfile<1, 2, + [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; + +def HexagonWrapperCombineII : + SDNode<"HexagonISD::WrapperCombineII", SDTHexagonI64I32I32>; - let PNewValue = !if(isPredNew, "new", "") in - def NAME : ALU32_rr<(outs IntRegs:$dst), +def HexagonWrapperCombineRR : + SDNode<"HexagonISD::WrapperCombineRR", SDTHexagonI64I32I32>; + +multiclass ALU32_Pbase<string mnemonic, RegisterClass RC, bit isNot, + bit isPredNew> { + let isPredicatedNew = isPredNew in + def NAME : ALU32_rr<(outs RC:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs: $src3), !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ", ") $dst = ")#mnemonic#"($src2, $src3)", []>; } -multiclass ALU32_Pred<string mnemonic, bit PredNot> { - let PredSense = !if(PredNot, "false", "true") in { - defm _c#NAME : ALU32_Pbase<mnemonic, PredNot, 0>; +multiclass ALU32_Pred<string mnemonic, RegisterClass RC, bit PredNot> { + let isPredicatedFalse = PredNot in { + defm _c#NAME : ALU32_Pbase<mnemonic, RC, PredNot, 0>; // Predicate new - defm _cdn#NAME : ALU32_Pbase<mnemonic, PredNot, 1>; + defm _cdn#NAME : ALU32_Pbase<mnemonic, RC, PredNot, 1>; } } @@ -125,8 +121,8 @@ multiclass ALU32_base<string mnemonic, string CextOp, SDNode OpNode> { (i32 IntRegs:$src2)))]>; let neverHasSideEffects = 1, isPredicated = 1 in { - defm Pt : ALU32_Pred<mnemonic, 0>; - defm NotPt : ALU32_Pred<mnemonic, 1>; + defm Pt : ALU32_Pred<mnemonic, IntRegs, 0>; + defm NotPt : ALU32_Pred<mnemonic, IntRegs, 1>; } } } @@ -140,11 +136,42 @@ let isCommutable = 1 in { defm SUB_rr : ALU32_base<"sub", "SUB", sub>, ImmRegRel, PredNewRel; +// Combines the two integer registers SRC1 and SRC2 into a double register. +let isPredicable = 1 in +class T_Combine : ALU32_rr<(outs DoubleRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = combine($src1, $src2)", + [(set (i64 DoubleRegs:$dst), + (i64 (HexagonWrapperCombineRR (i32 IntRegs:$src1), + (i32 IntRegs:$src2))))]>; + +multiclass Combine_base { + let BaseOpcode = "combine" in { + def NAME : T_Combine; + let neverHasSideEffects = 1, isPredicated = 1 in { + defm Pt : ALU32_Pred<"combine", DoubleRegs, 0>; + defm NotPt : ALU32_Pred<"combine", DoubleRegs, 1>; + } + } +} + +defm COMBINE_rr : Combine_base, PredNewRel; + +// Combines the two immediates SRC1 and SRC2 into a double register. +class COMBINE_imm<Operand imm1, Operand imm2, PatLeaf pat1, PatLeaf pat2> : + ALU32_ii<(outs DoubleRegs:$dst), (ins imm1:$src1, imm2:$src2), + "$dst = combine(#$src1, #$src2)", + [(set (i64 DoubleRegs:$dst), + (i64 (HexagonWrapperCombineII (i32 pat1:$src1), (i32 pat2:$src2))))]>; + +let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 8 in +def COMBINE_Ii : COMBINE_imm<s8Ext, s8Imm, s8ExtPred, s8ImmPred>; + //===----------------------------------------------------------------------===// // ALU32/ALU (ADD with register-immediate form) //===----------------------------------------------------------------------===// multiclass ALU32ri_Pbase<string mnemonic, bit isNot, bit isPredNew> { - let PNewValue = !if(isPredNew, "new", "") in + let isPredicatedNew = isPredNew in def NAME : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, s8Ext: $src3), !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ", @@ -153,7 +180,7 @@ multiclass ALU32ri_Pbase<string mnemonic, bit isNot, bit isPredNew> { } multiclass ALU32ri_Pred<string mnemonic, bit PredNot> { - let PredSense = !if(PredNot, "false", "true") in { + let isPredicatedFalse = PredNot in { defm _c#NAME : ALU32ri_Pbase<mnemonic, PredNot, 0>; // Predicate new defm _cdn#NAME : ALU32ri_Pbase<mnemonic, PredNot, 1>; @@ -189,11 +216,6 @@ def OR_ri : ALU32_ri<(outs IntRegs:$dst), [(set (i32 IntRegs:$dst), (or (i32 IntRegs:$src1), s10ExtPred:$src2))]>, ImmRegRel; -def NOT_rr : ALU32_rr<(outs IntRegs:$dst), - (ins IntRegs:$src1), - "$dst = not($src1)", - [(set (i32 IntRegs:$dst), (not (i32 IntRegs:$src1)))]>; - let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 10, InputType = "imm", CextOpcode = "AND" in def AND_ri : ALU32_ri<(outs IntRegs:$dst), @@ -201,10 +223,7 @@ def AND_ri : ALU32_ri<(outs IntRegs:$dst), "$dst = and($src1, #$src2)", [(set (i32 IntRegs:$dst), (and (i32 IntRegs:$src1), s10ExtPred:$src2))]>, ImmRegRel; -// Negate. -def NEG : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), - "$dst = neg($src1)", - [(set (i32 IntRegs:$dst), (ineg (i32 IntRegs:$src1)))]>; + // Nop. let neverHasSideEffects = 1 in def NOP : ALU32_rr<(outs), (ins), @@ -220,15 +239,21 @@ def SUB_ri : ALU32_ri<(outs IntRegs:$dst), [(set IntRegs:$dst, (sub s10ExtPred:$src1, IntRegs:$src2))]>, ImmRegRel; +// Rd = not(Rs) gets mapped to Rd=sub(#-1, Rs). +def : Pat<(not (i32 IntRegs:$src1)), + (SUB_ri -1, (i32 IntRegs:$src1))>; + +// Rd = neg(Rs) gets mapped to Rd=sub(#0, Rs). +// Pattern definition for 'neg' was not necessary. multiclass TFR_Pred<bit PredNot> { - let PredSense = !if(PredNot, "false", "true") in { + let isPredicatedFalse = PredNot in { def _c#NAME : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), !if(PredNot, "if (!$src1", "if ($src1")#") $dst = $src2", []>; // Predicate new - let PNewValue = "new" in + let isPredicatedNew = 1 in def _cdn#NAME : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), !if(PredNot, "if (!$src1", "if ($src1")#".new) $dst = $src2", @@ -274,10 +299,10 @@ class T_TFR64_Pred<bit PredNot, bit isPredNew> } multiclass TFR64_Pred<bit PredNot> { - let PredSense = !if(PredNot, "false", "true") in { + let isPredicatedFalse = PredNot in { def _c#NAME : T_TFR64_Pred<PredNot, 0>; - let PNewValue = "new" in + let isPredicatedNew = 1 in def _cdn#NAME : T_TFR64_Pred<PredNot, 1>; // Predicate new } } @@ -309,14 +334,14 @@ multiclass TFR64_base<string BaseName> { } multiclass TFRI_Pred<bit PredNot> { - let isMoveImm = 1, PredSense = !if(PredNot, "false", "true") in { + let isMoveImm = 1, isPredicatedFalse = PredNot in { def _c#NAME : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, s12Ext:$src2), !if(PredNot, "if (!$src1", "if ($src1")#") $dst = #$src2", []>; // Predicate new - let PNewValue = "new" in + let isPredicatedNew = 1 in def _cdn#NAME : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, s12Ext:$src2), !if(PredNot, "if (!$src1", "if ($src1")#".new) $dst = #$src2", @@ -359,52 +384,6 @@ def TFCR : CRInst<(outs CRRegs:$dst), (ins IntRegs:$src1), // ALU32/PERM + //===----------------------------------------------------------------------===// -// Combine. - -def SDTHexagonI64I32I32 : SDTypeProfile<1, 2, - [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; - -def HexagonWrapperCombineII : - SDNode<"HexagonISD::WrapperCombineII", SDTHexagonI64I32I32>; -def HexagonWrapperCombineRR : - SDNode<"HexagonISD::WrapperCombineRR", SDTHexagonI64I32I32>; - -// Combines the two integer registers SRC1 and SRC2 into a double register. -let isPredicable = 1 in -def COMBINE_rr : ALU32_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, - IntRegs:$src2), - "$dst = combine($src1, $src2)", - [(set (i64 DoubleRegs:$dst), - (i64 (HexagonWrapperCombineRR (i32 IntRegs:$src1), - (i32 IntRegs:$src2))))]>; - -// Rd=combine(Rt.[HL], Rs.[HL]) -class COMBINE_halves<string A, string B>: ALU32_rr<(outs IntRegs:$dst), - (ins IntRegs:$src1, - IntRegs:$src2), - "$dst = combine($src1."# A #", $src2."# B #")", []>; - -let isPredicable = 1 in { - def COMBINE_hh : COMBINE_halves<"H", "H">; - def COMBINE_hl : COMBINE_halves<"H", "L">; - def COMBINE_lh : COMBINE_halves<"L", "H">; - def COMBINE_ll : COMBINE_halves<"L", "L">; -} - -def : Pat<(i32 (trunc (i64 (srl (i64 DoubleRegs:$a), (i32 16))))), - (COMBINE_lh (EXTRACT_SUBREG (i64 DoubleRegs:$a), subreg_hireg), - (EXTRACT_SUBREG (i64 DoubleRegs:$a), subreg_loreg))>; - -// Combines the two immediates SRC1 and SRC2 into a double register. -class COMBINE_imm<Operand imm1, Operand imm2, PatLeaf pat1, PatLeaf pat2> : - ALU32_ii<(outs DoubleRegs:$dst), (ins imm1:$src1, imm2:$src2), - "$dst = combine(#$src1, #$src2)", - [(set (i64 DoubleRegs:$dst), - (i64 (HexagonWrapperCombineII (i32 pat1:$src1), (i32 pat2:$src2))))]>; - -let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 8 in -def COMBINE_Ii : COMBINE_imm<s8Ext, s8Imm, s8ExtPred, s8ImmPred>; - // Mux. def VMUX_prr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1, DoubleRegs:$src2, @@ -507,40 +486,24 @@ def : Pat <(sext_inreg (i32 IntRegs:$src1), i16), // ALU32/PRED + //===----------------------------------------------------------------------===// -// Conditional combine. -let neverHasSideEffects = 1, isPredicated = 1 in { -def COMBINE_rr_cPt : ALU32_rr<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1) $dst = combine($src2, $src3)", - []>; - -let isPredicatedFalse = 1 in -def COMBINE_rr_cNotPt : ALU32_rr<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1) $dst = combine($src2, $src3)", - []>; - -let isPredicatedNew = 1 in -def COMBINE_rr_cdnPt : ALU32_rr<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1.new) $dst = combine($src2, $src3)", - []>; - -let isPredicatedNew = 1, isPredicatedFalse = 1 in -def COMBINE_rr_cdnNotPt : ALU32_rr<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1.new) $dst = combine($src2, $src3)", - []>; -} - // Compare. defm CMPGTU : CMP32_rr_ri_u9<"cmp.gtu", "CMPGTU", setugt>, ImmRegRel; defm CMPGT : CMP32_rr_ri_s10<"cmp.gt", "CMPGT", setgt>, ImmRegRel; -defm CMPLT : CMP32_rr<"cmp.lt", setlt>; -defm CMPLTU : CMP32_rr<"cmp.ltu", setult>; defm CMPEQ : CMP32_rr_ri_s10<"cmp.eq", "CMPEQ", seteq>, ImmRegRel; -defm CMPGE : CMP32_ri_s8<"cmp.ge", setge>; -defm CMPGEU : CMP32_ri_u8<"cmp.geu", setuge>; + +// SDNode for converting immediate C to C-1. +def DEC_CONST_SIGNED : SDNodeXForm<imm, [{ + // Return the byte immediate const-1 as an SDNode. + int32_t imm = N->getSExtValue(); + return XformSToSM1Imm(imm); +}]>; + +// SDNode for converting immediate C to C-1. +def DEC_CONST_UNSIGNED : SDNodeXForm<imm, [{ + // Return the byte immediate const-1 as an SDNode. + uint32_t imm = N->getZExtValue(); + return XformUToUM1Imm(imm); +}]>; def CTLZ_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1), "$dst = cl0($src1)", @@ -774,112 +737,153 @@ def XOR_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2), // CR - //===----------------------------------------------------------------------===// +def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone, + [SDNPHasChain]>; -//===----------------------------------------------------------------------===// -// J + -//===----------------------------------------------------------------------===// -// Jump to address. -let isBranch = 1, isTerminator=1, isBarrier = 1, isPredicable = 1 in { - def JMP : JInst< (outs), - (ins brtarget:$offset), - "jump $offset", - [(br bb:$offset)]>; -} +def SDHexagonBR_JT: SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; +def HexagonBR_JT: SDNode<"HexagonISD::BR_JT", SDHexagonBR_JT, [SDNPHasChain]>; -// if (p0) jump -let isBranch = 1, isTerminator=1, Defs = [PC], - isPredicated = 1 in { - def JMP_c : JInst< (outs), - (ins PredRegs:$src, brtarget:$offset), - "if ($src) jump $offset", - [(brcond (i1 PredRegs:$src), bb:$offset)]>; -} +let InputType = "imm", isBarrier = 1, isPredicable = 1, +Defs = [PC], isExtendable = 1, opExtendable = 0, isExtentSigned = 1, +opExtentBits = 24 in +class T_JMP <dag InsDag, list<dag> JumpList = []> + : JInst<(outs), InsDag, + "jump $dst" , JumpList> { + bits<24> dst; + + let IClass = 0b0101; + + let Inst{27-25} = 0b100; + let Inst{24-16} = dst{23-15}; + let Inst{13-1} = dst{14-2}; +} + +let InputType = "imm", isExtendable = 1, opExtendable = 1, isExtentSigned = 1, +Defs = [PC], isPredicated = 1, opExtentBits = 17 in +class T_JMP_c <bit PredNot, bit isPredNew, bit isTaken>: + JInst<(outs ), (ins PredRegs:$src, brtarget:$dst), + !if(PredNot, "if (!$src", "if ($src")# + !if(isPredNew, ".new) ", ") ")#"jump"# + !if(isPredNew, !if(isTaken, ":t ", ":nt "), " ")#"$dst"> { + + let isBrTaken = !if(isPredNew, !if(isTaken, "true", "false"), ""); + let isPredicatedFalse = PredNot; + let isPredicatedNew = isPredNew; + bits<2> src; + bits<17> dst; + + let IClass = 0b0101; + + let Inst{27-24} = 0b1100; + let Inst{21} = PredNot; + let Inst{12} = !if(isPredNew, isTaken, zero); + let Inst{11} = isPredNew; + let Inst{9-8} = src; + let Inst{23-22} = dst{16-15}; + let Inst{20-16} = dst{14-10}; + let Inst{13} = dst{9}; + let Inst{7-1} = dst{8-2}; + } -// if (!p0) jump -let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC], - isPredicated = 1 in { - def JMP_cNot : JInst< (outs), - (ins PredRegs:$src, brtarget:$offset), - "if (!$src) jump $offset", - []>; +let isBarrier = 1, Defs = [PC], isPredicable = 1, InputType = "reg" in +class T_JMPr<dag InsDag = (ins IntRegs:$dst)> + : JRInst<(outs ), InsDag, + "jumpr $dst" , + []> { + bits<5> dst; + + let IClass = 0b0101; + let Inst{27-21} = 0b0010100; + let Inst{20-16} = dst; } -let isTerminator = 1, isBranch = 1, neverHasSideEffects = 1, Defs = [PC], - isPredicated = 1 in { - def BRCOND : JInst < (outs), (ins PredRegs:$pred, brtarget:$dst), - "if ($pred) jump $dst", - []>; +let Defs = [PC], isPredicated = 1, InputType = "reg" in +class T_JMPr_c <bit PredNot, bit isPredNew, bit isTaken>: + JRInst <(outs ), (ins PredRegs:$src, IntRegs:$dst), + !if(PredNot, "if (!$src", "if ($src")# + !if(isPredNew, ".new) ", ") ")#"jumpr"# + !if(isPredNew, !if(isTaken, ":t ", ":nt "), " ")#"$dst"> { + + let isBrTaken = !if(isPredNew, !if(isTaken, "true", "false"), ""); + let isPredicatedFalse = PredNot; + let isPredicatedNew = isPredNew; + bits<2> src; + bits<5> dst; + + let IClass = 0b0101; + + let Inst{27-22} = 0b001101; + let Inst{21} = PredNot; + let Inst{20-16} = dst; + let Inst{12} = !if(isPredNew, isTaken, zero); + let Inst{11} = isPredNew; + let Inst{9-8} = src; + let Predicates = !if(isPredNew, [HasV3T], [HasV2T]); + let validSubTargets = !if(isPredNew, HasV3SubT, HasV2SubT); } -// Jump to address conditioned on new predicate. -// if (p0) jump:t -let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC], - isPredicated = 1 in { - def JMP_cdnPt : JInst< (outs), - (ins PredRegs:$src, brtarget:$offset), - "if ($src.new) jump:t $offset", - []>; +multiclass JMP_Pred<bit PredNot> { + def _#NAME : T_JMP_c<PredNot, 0, 0>; + // Predicate new + def _#NAME#new_t : T_JMP_c<PredNot, 1, 1>; // taken + def _#NAME#new_nt : T_JMP_c<PredNot, 1, 0>; // not taken } -// if (!p0) jump:t -let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC], - isPredicated = 1 in { - def JMP_cdnNotPt : JInst< (outs), - (ins PredRegs:$src, brtarget:$offset), - "if (!$src.new) jump:t $offset", - []>; +multiclass JMP_base<string BaseOp> { + let BaseOpcode = BaseOp in { + def NAME : T_JMP<(ins brtarget:$dst), [(br bb:$dst)]>; + defm t : JMP_Pred<0>; + defm f : JMP_Pred<1>; + } } -// Not taken. -let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC], - isPredicated = 1 in { - def JMP_cdnPnt : JInst< (outs), - (ins PredRegs:$src, brtarget:$offset), - "if ($src.new) jump:nt $offset", - []>; +multiclass JMPR_Pred<bit PredNot> { + def NAME: T_JMPr_c<PredNot, 0, 0>; + // Predicate new + def NAME#new_tV3 : T_JMPr_c<PredNot, 1, 1>; // taken + def NAME#new_ntV3 : T_JMPr_c<PredNot, 1, 0>; // not taken } -// Not taken. -let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC], - isPredicated = 1 in { - def JMP_cdnNotPnt : JInst< (outs), - (ins PredRegs:$src, brtarget:$offset), - "if (!$src.new) jump:nt $offset", - []>; +multiclass JMPR_base<string BaseOp> { + let BaseOpcode = BaseOp in { + def NAME : T_JMPr; + defm _t : JMPR_Pred<0>; + defm _f : JMPR_Pred<1>; + } } -//===----------------------------------------------------------------------===// -// J - -//===----------------------------------------------------------------------===// -//===----------------------------------------------------------------------===// -// JR + -//===----------------------------------------------------------------------===// -def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +let isTerminator = 1, neverHasSideEffects = 1 in { +let isBranch = 1 in +defm JMP : JMP_base<"JMP">, PredNewRel; -// Jump to address from register. -let isPredicable =1, isReturn = 1, isTerminator = 1, isBarrier = 1, - Defs = [PC], Uses = [R31] in { - def JMPR: JRInst<(outs), (ins), - "jumpr r31", - [(retflag)]>; -} +let isBranch = 1, isIndirectBranch = 1 in +defm JMPR : JMPR_base<"JMPr">, PredNewRel; -// Jump to address from register. -let isReturn = 1, isTerminator = 1, isBarrier = 1, isPredicated = 1, - Defs = [PC], Uses = [R31] in { - def JMPR_cPt: JRInst<(outs), (ins PredRegs:$src1), - "if ($src1) jumpr r31", - []>; +let isReturn = 1, isCodeGenOnly = 1 in +defm JMPret : JMPR_base<"JMPret">, PredNewRel; } -// Jump to address from register. -let isReturn = 1, isTerminator = 1, isBarrier = 1, isPredicated = 1, - Defs = [PC], Uses = [R31] in { - def JMPR_cNotPt: JRInst<(outs), (ins PredRegs:$src1), - "if (!$src1) jumpr r31", - []>; -} +def : Pat<(retflag), + (JMPret (i32 R31))>; + +def : Pat <(brcond (i1 PredRegs:$src1), bb:$offset), + (JMP_t (i1 PredRegs:$src1), bb:$offset)>; + +// A return through builtin_eh_return. +let isReturn = 1, isTerminator = 1, isBarrier = 1, neverHasSideEffects = 1, +isCodeGenOnly = 1, Defs = [PC], Uses = [R28], isPredicable = 0 in +def EH_RETURN_JMPR : T_JMPr; + +def : Pat<(eh_return), + (EH_RETURN_JMPR (i32 R31))>; + +def : Pat<(HexagonBR_JT (i32 IntRegs:$dst)), + (JMPR (i32 IntRegs:$dst))>; + +def : Pat<(brind (i32 IntRegs:$dst)), + (JMPR (i32 IntRegs:$dst))>; //===----------------------------------------------------------------------===// // JR - @@ -892,7 +896,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, isPredicated = 1, // Load -- MEMri operand multiclass LD_MEMri_Pbase<string mnemonic, RegisterClass RC, bit isNot, bit isPredNew> { - let PNewValue = !if(isPredNew, "new", "") in + let isPredicatedNew = isPredNew in def NAME : LDInst2<(outs RC:$dst), (ins PredRegs:$src1, MEMri:$addr), !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", @@ -901,7 +905,7 @@ multiclass LD_MEMri_Pbase<string mnemonic, RegisterClass RC, } multiclass LD_MEMri_Pred<string mnemonic, RegisterClass RC, bit PredNot> { - let PredSense = !if(PredNot, "false", "true") in { + let isPredicatedFalse = PredNot in { defm _c#NAME : LD_MEMri_Pbase<mnemonic, RC, PredNot, 0>; // Predicate new defm _cdn#NAME : LD_MEMri_Pbase<mnemonic, RC, PredNot, 1>; @@ -958,7 +962,7 @@ def : Pat < (i64 (load ADDRriS11_3:$addr)), // Load - Base with Immediate offset addressing mode multiclass LD_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp, bit isNot, bit isPredNew> { - let PNewValue = !if(isPredNew, "new", "") in + let isPredicatedNew = isPredNew in def NAME : LDInst2<(outs RC:$dst), (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3), !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", @@ -968,7 +972,7 @@ multiclass LD_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp, multiclass LD_Idxd_Pred<string mnemonic, RegisterClass RC, Operand predImmOp, bit PredNot> { - let PredSense = !if(PredNot, "false", "true") in { + let isPredicatedFalse = PredNot in { defm _c#NAME : LD_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 0>; // Predicate new defm _cdn#NAME : LD_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 1>; @@ -1038,7 +1042,7 @@ def : Pat < (i64 (load (add IntRegs:$src1, s11_3ExtPred:$offset))), multiclass LD_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp, bit isNot, bit isPredNew> { - let PNewValue = !if(isPredNew, "new", "") in + let isPredicatedNew = isPredNew in def NAME : LDInst2PI<(outs RC:$dst, IntRegs:$dst2), (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset), !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", @@ -1049,7 +1053,7 @@ multiclass LD_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp, multiclass LD_PostInc_Pred<string mnemonic, RegisterClass RC, Operand ImmOp, bit PredNot> { - let PredSense = !if(PredNot, "false", "true") in { + let isPredicatedFalse = PredNot in { defm _c#NAME : LD_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 0>; // Predicate new let Predicates = [HasV4T], validSubTargets = HasV4SubT in @@ -1366,7 +1370,7 @@ def SUBri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1, multiclass ST_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp, bit isNot, bit isPredNew> { - let PNewValue = !if(isPredNew, "new", "") in + let isPredicatedNew = isPredNew in def NAME : STInst2PI<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset, RC:$src3), !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", @@ -1377,7 +1381,7 @@ multiclass ST_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp, multiclass ST_PostInc_Pred<string mnemonic, RegisterClass RC, Operand ImmOp, bit PredNot> { - let PredSense = !if(PredNot, "false", "true") in { + let isPredicatedFalse = PredNot in { defm _c#NAME# : ST_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 0>; // Predicate new let Predicates = [HasV4T], validSubTargets = HasV4SubT in @@ -1431,7 +1435,7 @@ def : Pat<(post_store (i64 DoubleRegs:$src1), IntRegs:$src2, //===----------------------------------------------------------------------===// multiclass ST_MEMri_Pbase<string mnemonic, RegisterClass RC, bit isNot, bit isPredNew> { - let PNewValue = !if(isPredNew, "new", "") in + let isPredicatedNew = isPredNew in def NAME : STInst2<(outs), (ins PredRegs:$src1, MEMri:$addr, RC: $src2), !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", @@ -1440,7 +1444,7 @@ multiclass ST_MEMri_Pbase<string mnemonic, RegisterClass RC, bit isNot, } multiclass ST_MEMri_Pred<string mnemonic, RegisterClass RC, bit PredNot> { - let PredSense = !if(PredNot, "false", "true") in { + let isPredicatedFalse = PredNot in { defm _c#NAME : ST_MEMri_Pbase<mnemonic, RC, PredNot, 0>; // Predicate new @@ -1497,7 +1501,7 @@ def : Pat<(store (i64 DoubleRegs:$src1), ADDRriS11_3:$addr), //===----------------------------------------------------------------------===// multiclass ST_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp, bit isNot, bit isPredNew> { - let PNewValue = !if(isPredNew, "new", "") in + let isPredicatedNew = isPredNew in def NAME : STInst2<(outs), (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3, RC: $src4), !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", @@ -1507,7 +1511,7 @@ multiclass ST_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp, multiclass ST_Idxd_Pred<string mnemonic, RegisterClass RC, Operand predImmOp, bit PredNot> { - let PredSense = !if(PredNot, "false", "true"), isPredicated = 1 in { + let isPredicatedFalse = PredNot, isPredicated = 1 in { defm _c#NAME : ST_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 0>; // Predicate new @@ -2023,20 +2027,18 @@ let isCall = 1, neverHasSideEffects = 1, []>; } -// Tail Calls. -let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1 in { - def TCRETURNtg : JInst<(outs), (ins calltarget:$dst), - "jump $dst // TAILCALL", []>; -} -let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1 in { - def TCRETURNtext : JInst<(outs), (ins calltarget:$dst), - "jump $dst // TAILCALL", []>; -} -let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1 in { - def TCRETURNR : JInst<(outs), (ins IntRegs:$dst), - "jumpr $dst // TAILCALL", []>; +// Indirect tail-call. +let isCodeGenOnly = 1, isCall = 1, isReturn = 1 in +def TCRETURNR : T_JMPr; + +// Direct tail-calls. +let isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0, +isTerminator = 1, isCodeGenOnly = 1 in { + def TCRETURNtg : T_JMP<(ins calltarget:$dst)>; + def TCRETURNtext : T_JMP<(ins calltarget:$dst)>; } + // Map call instruction. def : Pat<(call (i32 IntRegs:$dst)), (CALLR (i32 IntRegs:$dst))>, Requires<[HasV2TOnly]>; @@ -2133,10 +2135,11 @@ def : Pat <(add (i1 PredRegs:$src1), -1), // Map from p0 = setlt(r0, r1) r2 = mux(p0, r3, r4) => // p0 = cmp.lt(r0, r1), r0 = mux(p0, r2, r1). +// cmp.lt(r0, r1) -> cmp.gt(r1, r0) def : Pat <(select (i1 (setlt (i32 IntRegs:$src1), (i32 IntRegs:$src2))), (i32 IntRegs:$src3), (i32 IntRegs:$src4)), - (i32 (TFR_condset_rr (CMPLTrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)), + (i32 (TFR_condset_rr (CMPGTrr (i32 IntRegs:$src2), (i32 IntRegs:$src1)), (i32 IntRegs:$src4), (i32 IntRegs:$src3)))>, Requires<[HasV2TOnly]>; @@ -2154,18 +2157,25 @@ def : Pat <(select (not (i1 PredRegs:$src1)), s12ImmPred:$src2, // Map from p0 = pnot(p0); r0 = mux(p0, r1, #i) // => r0 = TFR_condset_ir(p0, #i, r1) -def : Pat <(select (not PredRegs:$src1), IntRegs:$src2, s12ImmPred:$src3), +def : Pat <(select (not (i1 PredRegs:$src1)), IntRegs:$src2, s12ImmPred:$src3), (i32 (TFR_condset_ir (i1 PredRegs:$src1), s12ImmPred:$src3, (i32 IntRegs:$src2)))>; // Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump. -def : Pat <(brcond (not PredRegs:$src1), bb:$offset), - (JMP_cNot (i1 PredRegs:$src1), bb:$offset)>; +def : Pat <(brcond (not (i1 PredRegs:$src1)), bb:$offset), + (JMP_f (i1 PredRegs:$src1), bb:$offset)>; // Map from p2 = pnot(p2); p1 = and(p0, p2) => p1 = and(p0, !p2). -def : Pat <(and PredRegs:$src1, (not PredRegs:$src2)), +def : Pat <(and (i1 PredRegs:$src1), (not (i1 PredRegs:$src2))), (i1 (AND_pnotp (i1 PredRegs:$src1), (i1 PredRegs:$src2)))>; + +let AddedComplexity = 100 in +def : Pat <(i64 (zextloadi1 (HexagonCONST32 tglobaladdr:$global))), + (i64 (COMBINE_rr (TFRI 0), + (LDriub_indexed (CONST32_set tglobaladdr:$global), 0)))>, + Requires<[NoV4T]>; + // Map from i1 loads to 32 bits. This assumes that the i1* is byte aligned. let AddedComplexity = 10 in def : Pat <(i32 (zextloadi1 ADDRriS11_0:$addr)), @@ -2186,43 +2196,46 @@ def : Pat <(i64 (sext_inreg (i64 DoubleRegs:$src1), i8)), subreg_loreg))))))>; // We want to prevent emitting pnot's as much as possible. -// Map brcond with an unsupported setcc to a JMP_cNot. +// Map brcond with an unsupported setcc to a JMP_f. def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2))), bb:$offset), - (JMP_cNot (CMPEQrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)), + (JMP_f (CMPEQrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)), bb:$offset)>; def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), s10ImmPred:$src2)), bb:$offset), - (JMP_cNot (CMPEQri (i32 IntRegs:$src1), s10ImmPred:$src2), bb:$offset)>; + (JMP_f (CMPEQri (i32 IntRegs:$src1), s10ImmPred:$src2), bb:$offset)>; def : Pat <(brcond (i1 (setne (i1 PredRegs:$src1), (i1 -1))), bb:$offset), - (JMP_cNot (i1 PredRegs:$src1), bb:$offset)>; + (JMP_f (i1 PredRegs:$src1), bb:$offset)>; def : Pat <(brcond (i1 (setne (i1 PredRegs:$src1), (i1 0))), bb:$offset), - (JMP_c (i1 PredRegs:$src1), bb:$offset)>; + (JMP_t (i1 PredRegs:$src1), bb:$offset)>; +// cmp.lt(Rs, Imm) -> !cmp.ge(Rs, Imm) -> !cmp.gt(Rs, Imm-1) def : Pat <(brcond (i1 (setlt (i32 IntRegs:$src1), s8ImmPred:$src2)), bb:$offset), - (JMP_cNot (CMPGEri (i32 IntRegs:$src1), s8ImmPred:$src2), bb:$offset)>; + (JMP_f (CMPGTri (i32 IntRegs:$src1), + (DEC_CONST_SIGNED s8ImmPred:$src2)), bb:$offset)>; +// cmp.lt(r0, r1) -> cmp.gt(r1, r0) def : Pat <(brcond (i1 (setlt (i32 IntRegs:$src1), (i32 IntRegs:$src2))), bb:$offset), - (JMP_c (CMPLTrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)), bb:$offset)>; + (JMP_t (CMPGTrr (i32 IntRegs:$src2), (i32 IntRegs:$src1)), bb:$offset)>; def : Pat <(brcond (i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), bb:$offset), - (JMP_cNot (CMPGTU64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)), + (JMP_f (CMPGTU64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)), bb:$offset)>; def : Pat <(brcond (i1 (setule (i32 IntRegs:$src1), (i32 IntRegs:$src2))), bb:$offset), - (JMP_cNot (CMPGTUrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)), + (JMP_f (CMPGTUrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)), bb:$offset)>; def : Pat <(brcond (i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), bb:$offset), - (JMP_cNot (CMPGTU64rr (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)), + (JMP_f (CMPGTU64rr (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)), bb:$offset)>; // Map from a 64-bit select to an emulated 64-bit mux. @@ -2300,8 +2313,8 @@ def : Pat<(i64 (anyext (i32 IntRegs:$src1))), // Map cmple -> cmpgt. // rs <= rt -> !(rs > rt). -def : Pat<(i1 (setle (i32 IntRegs:$src1), s10ImmPred:$src2)), - (i1 (NOT_p (CMPGTri (i32 IntRegs:$src1), s10ImmPred:$src2)))>; +def : Pat<(i1 (setle (i32 IntRegs:$src1), s10ExtPred:$src2)), + (i1 (NOT_p (CMPGTri (i32 IntRegs:$src1), s10ExtPred:$src2)))>; // rs <= rt -> !(rs > rt). def : Pat<(i1 (setle (i32 IntRegs:$src1), (i32 IntRegs:$src2))), @@ -2314,8 +2327,8 @@ def : Pat<(i1 (setle (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), // Map cmpne -> cmpeq. // Hexagon_TODO: We should improve on this. // rs != rt -> !(rs == rt). -def : Pat <(i1 (setne (i32 IntRegs:$src1), s10ImmPred:$src2)), - (i1 (NOT_p(i1 (CMPEQri (i32 IntRegs:$src1), s10ImmPred:$src2))))>; +def : Pat <(i1 (setne (i32 IntRegs:$src1), s10ExtPred:$src2)), + (i1 (NOT_p(i1 (CMPEQri (i32 IntRegs:$src1), s10ExtPred:$src2))))>; // Map cmpne(Rs) -> !cmpeqe(Rs). // rs != rt -> !(rs == rt). @@ -2337,8 +2350,9 @@ def : Pat <(i1 (setne (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), def : Pat <(i1 (setge (i32 IntRegs:$src1), (i32 IntRegs:$src2))), (i1 (NOT_p (i1 (CMPGTrr (i32 IntRegs:$src2), (i32 IntRegs:$src1)))))>; -def : Pat <(i1 (setge (i32 IntRegs:$src1), s8ImmPred:$src2)), - (i1 (CMPGEri (i32 IntRegs:$src1), s8ImmPred:$src2))>; +// cmpge(Rs, Imm) -> cmpgt(Rs, Imm-1) +def : Pat <(i1 (setge (i32 IntRegs:$src1), s8ExtPred:$src2)), + (i1 (CMPGTri (i32 IntRegs:$src1), (DEC_CONST_SIGNED s8ExtPred:$src2)))>; // Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss). // rss >= rtt -> !(rtt > rss). @@ -2347,9 +2361,10 @@ def : Pat <(i1 (setge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), (i64 DoubleRegs:$src1)))))>; // Map cmplt(Rs, Imm) -> !cmpge(Rs, Imm). +// !cmpge(Rs, Imm) -> !cmpgt(Rs, Imm-1). // rs < rt -> !(rs >= rt). -def : Pat <(i1 (setlt (i32 IntRegs:$src1), s8ImmPred:$src2)), - (i1 (NOT_p (CMPGEri (i32 IntRegs:$src1), s8ImmPred:$src2)))>; +def : Pat <(i1 (setlt (i32 IntRegs:$src1), s8ExtPred:$src2)), + (i1 (NOT_p (CMPGTri (i32 IntRegs:$src1), (DEC_CONST_SIGNED s8ExtPred:$src2))))>; // Map cmplt(Rs, Rt) -> cmpgt(Rt, Rs). // rs < rt -> rt > rs. @@ -2373,13 +2388,17 @@ def : Pat <(i1 (setult (i32 IntRegs:$src1), (i32 IntRegs:$src2))), def : Pat <(i1 (setult (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), (i1 (CMPGTU64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)))>; -// Generate cmpgeu(Rs, #u8) -def : Pat <(i1 (setuge (i32 IntRegs:$src1), u8ImmPred:$src2)), - (i1 (CMPGEUri (i32 IntRegs:$src1), u8ImmPred:$src2))>; +// Generate cmpgeu(Rs, #0) -> cmpeq(Rs, Rs) +def : Pat <(i1 (setuge (i32 IntRegs:$src1), 0)), + (i1 (CMPEQrr (i32 IntRegs:$src1), (i32 IntRegs:$src1)))>; + +// Generate cmpgeu(Rs, #u8) -> cmpgtu(Rs, #u8 -1) +def : Pat <(i1 (setuge (i32 IntRegs:$src1), u8ExtPred:$src2)), + (i1 (CMPGTUri (i32 IntRegs:$src1), (DEC_CONST_UNSIGNED u8ExtPred:$src2)))>; // Generate cmpgtu(Rs, #u9) -def : Pat <(i1 (setugt (i32 IntRegs:$src1), u9ImmPred:$src2)), - (i1 (CMPGTUri (i32 IntRegs:$src1), u9ImmPred:$src2))>; +def : Pat <(i1 (setugt (i32 IntRegs:$src1), u9ExtPred:$src2)), + (i1 (CMPGTUri (i32 IntRegs:$src1), u9ExtPred:$src2))>; // Map from Rs >= Rt -> !(Rt > Rs). // rs >= rt -> !(rt > rs). @@ -2391,7 +2410,7 @@ def : Pat <(i1 (setuge (i32 IntRegs:$src1), (i32 IntRegs:$src2))), def : Pat <(i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), (i1 (NOT_p (CMPGTU64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1))))>; -// Map from cmpleu(Rs, Rs) -> !cmpgtu(Rs, Rs). +// Map from cmpleu(Rs, Rt) -> !cmpgtu(Rs, Rt). // Map from (Rs <= Rt) -> !(Rs > Rt). def : Pat <(i1 (setule (i32 IntRegs:$src1), (i32 IntRegs:$src2))), (i1 (NOT_p (CMPGTUrr (i32 IntRegs:$src1), (i32 IntRegs:$src2))))>; @@ -2487,6 +2506,13 @@ def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)), (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>, Requires<[NoV4T]>; +let AddedComplexity = 100 in +def: Pat <(i64 (zextloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))), + (i64 (COMBINE_rr (TFRI 0), (LDriw_indexed IntRegs:$src1, + s11_2ExtPred:$offset)))>, + Requires<[NoV4T]>; + +let AddedComplexity = 10 in def: Pat <(i32 (zextloadi1 ADDRriS11_0:$src1)), (i32 (LDriw ADDRriS11_0:$src1))>; @@ -2503,6 +2529,48 @@ def : Pat <(i64 (anyext (i1 PredRegs:$src1))), (i64 (SXTW (i32 (MUX_ii (i1 PredRegs:$src1), 1, 0))))>; +let AddedComplexity = 100 in +def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh), + (i32 32))), + (i64 (zextloadi32 (i32 (add IntRegs:$src2, + s11_2ExtPred:$offset2)))))), + (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg), + (LDriw_indexed IntRegs:$src2, + s11_2ExtPred:$offset2)))>; + +def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh), + (i32 32))), + (i64 (zextloadi32 ADDRriS11_2:$srcLow)))), + (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg), + (LDriw ADDRriS11_2:$srcLow)))>; + +def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh), + (i32 32))), + (i64 (zext (i32 IntRegs:$srcLow))))), + (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg), + IntRegs:$srcLow))>; + +let AddedComplexity = 100 in +def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh), + (i32 32))), + (i64 (zextloadi32 (i32 (add IntRegs:$src2, + s11_2ExtPred:$offset2)))))), + (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg), + (LDriw_indexed IntRegs:$src2, + s11_2ExtPred:$offset2)))>; + +def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh), + (i32 32))), + (i64 (zextloadi32 ADDRriS11_2:$srcLow)))), + (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg), + (LDriw ADDRriS11_2:$srcLow)))>; + +def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh), + (i32 32))), + (i64 (zext (i32 IntRegs:$srcLow))))), + (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg), + IntRegs:$srcLow))>; + // Any extended 64-bit load. // anyext i32 -> i64 def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)), @@ -2637,19 +2705,6 @@ let AddedComplexity = 100 in def : Pat<(i32 (sext_inreg (Hexagon_ARGEXTEND (i32 IntRegs:$src1)), i16)), (COPY (i32 IntRegs:$src1))>; -def SDHexagonBR_JT: SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; -def HexagonBR_JT: SDNode<"HexagonISD::BR_JT", SDHexagonBR_JT, [SDNPHasChain]>; - -let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in -def BR_JT : JRInst<(outs), (ins IntRegs:$src), - "jumpr $src", - [(HexagonBR_JT (i32 IntRegs:$src))]>; - -let isBranch=1, isIndirectBranch=1, isTerminator=1 in -def BRIND : JRInst<(outs), (ins IntRegs:$src), - "jumpr $src", - [(brind (i32 IntRegs:$src))]>; - def HexagonWrapperJT: SDNode<"HexagonISD::WrapperJT", SDTIntUnaryOp>; def : Pat<(HexagonWrapperJT tjumptable:$dst), diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td index 157ab3d..7e75554 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td @@ -11,6 +11,11 @@ // //===----------------------------------------------------------------------===// +def callv3 : SDNode<"HexagonISD::CALLv3", SDT_SPCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; + +def callv3nr : SDNode<"HexagonISD::CALLv3nr", SDT_SPCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; //===----------------------------------------------------------------------===// // J + @@ -40,41 +45,6 @@ let isCall = 1, neverHasSideEffects = 1, []>, Requires<[HasV3TOnly]>; } - -// Jump to address from register -// if(p?.new) jumpr:t r? -let isReturn = 1, isTerminator = 1, isBarrier = 1, - Defs = [PC], Uses = [R31] in { - def JMPR_cdnPt_V3: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2), - "if ($src1.new) jumpr:t $src2", - []>, Requires<[HasV3T]>; -} - -// if (!p?.new) jumpr:t r? -let isReturn = 1, isTerminator = 1, isBarrier = 1, - Defs = [PC], Uses = [R31] in { - def JMPR_cdnNotPt_V3: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2), - "if (!$src1.new) jumpr:t $src2", - []>, Requires<[HasV3T]>; -} - -// Not taken. -// if(p?.new) jumpr:nt r? -let isReturn = 1, isTerminator = 1, isBarrier = 1, - Defs = [PC], Uses = [R31] in { - def JMPR_cdnPnt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2), - "if ($src1.new) jumpr:nt $src2", - []>, Requires<[HasV3T]>; -} - -// if (!p?.new) jumpr:nt r? -let isReturn = 1, isTerminator = 1, isBarrier = 1, - Defs = [PC], Uses = [R31] in { - def JMPR_cdnNotPnt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2), - "if (!$src1.new) jumpr:nt $src2", - []>, Requires<[HasV3T]>; -} - //===----------------------------------------------------------------------===// // JR - //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td index cd0e475..933239d 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -209,105 +209,31 @@ def COMBINE_iI_V4 : ALU32_ii<(outs DoubleRegs:$dst), //===----------------------------------------------------------------------===// // LD + //===----------------------------------------------------------------------===// -// -// These absolute set addressing mode instructions accept immediate as -// an operand. We have duplicated these patterns to take global address. - +//===----------------------------------------------------------------------===// +// Template class for load instructions with Absolute set addressing mode. +//===----------------------------------------------------------------------===// let isExtended = 1, opExtendable = 2, neverHasSideEffects = 1, -validSubTargets = HasV4SubT in { -def LDrid_abs_setimm_V4 : LDInst2<(outs DoubleRegs:$dst1, IntRegs:$dst2), - (ins u0AlwaysExt:$addr), - "$dst1 = memd($dst2=##$addr)", - []>, - Requires<[HasV4T]>; - -// Rd=memb(Re=#U6) -def LDrib_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), - (ins u0AlwaysExt:$addr), - "$dst1 = memb($dst2=##$addr)", - []>, - Requires<[HasV4T]>; - -// Rd=memh(Re=#U6) -def LDrih_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), - (ins u0AlwaysExt:$addr), - "$dst1 = memh($dst2=##$addr)", - []>, - Requires<[HasV4T]>; - -// Rd=memub(Re=#U6) -def LDriub_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), +validSubTargets = HasV4SubT in +class T_LD_abs_set<string mnemonic, RegisterClass RC>: + LDInst2<(outs RC:$dst1, IntRegs:$dst2), (ins u0AlwaysExt:$addr), - "$dst1 = memub($dst2=##$addr)", + "$dst1 = "#mnemonic#"($dst2=##$addr)", []>, Requires<[HasV4T]>; -// Rd=memuh(Re=#U6) -def LDriuh_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), - (ins u0AlwaysExt:$addr), - "$dst1 = memuh($dst2=##$addr)", - []>, - Requires<[HasV4T]>; +def LDrid_abs_set_V4 : T_LD_abs_set <"memd", DoubleRegs>; +def LDrib_abs_set_V4 : T_LD_abs_set <"memb", IntRegs>; +def LDriub_abs_set_V4 : T_LD_abs_set <"memub", IntRegs>; +def LDrih_abs_set_V4 : T_LD_abs_set <"memh", IntRegs>; +def LDriw_abs_set_V4 : T_LD_abs_set <"memw", IntRegs>; +def LDriuh_abs_set_V4 : T_LD_abs_set <"memuh", IntRegs>; -// Rd=memw(Re=#U6) -def LDriw_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), - (ins u0AlwaysExt:$addr), - "$dst1 = memw($dst2=##$addr)", - []>, - Requires<[HasV4T]>; -} - -// Following patterns are defined for absolute set addressing mode -// instruction which take global address as operand. -let isExtended = 1, opExtendable = 2, neverHasSideEffects = 1, -validSubTargets = HasV4SubT in { -def LDrid_abs_set_V4 : LDInst2<(outs DoubleRegs:$dst1, IntRegs:$dst2), - (ins globaladdressExt:$addr), - "$dst1 = memd($dst2=##$addr)", - []>, - Requires<[HasV4T]>; - -// Rd=memb(Re=#U6) -def LDrib_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), - (ins globaladdressExt:$addr), - "$dst1 = memb($dst2=##$addr)", - []>, - Requires<[HasV4T]>; - -// Rd=memh(Re=#U6) -def LDrih_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), - (ins globaladdressExt:$addr), - "$dst1 = memh($dst2=##$addr)", - []>, - Requires<[HasV4T]>; - -// Rd=memub(Re=#U6) -def LDriub_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), - (ins globaladdressExt:$addr), - "$dst1 = memub($dst2=##$addr)", - []>, - Requires<[HasV4T]>; - -// Rd=memuh(Re=#U6) -def LDriuh_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), - (ins globaladdressExt:$addr), - "$dst1 = memuh($dst2=##$addr)", - []>, - Requires<[HasV4T]>; - -// Rd=memw(Re=#U6) -def LDriw_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), - (ins globaladdressExt:$addr), - "$dst1 = memw($dst2=##$addr)", - []>, - Requires<[HasV4T]>; -} // multiclass for load instructions with base + register offset // addressing mode multiclass ld_idxd_shl_pbase<string mnemonic, RegisterClass RC, bit isNot, bit isPredNew> { - let PNewValue = !if(isPredNew, "new", "") in + let isPredicatedNew = isPredNew in def NAME : LDInst2<(outs RC:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", @@ -316,7 +242,7 @@ multiclass ld_idxd_shl_pbase<string mnemonic, RegisterClass RC, bit isNot, } multiclass ld_idxd_shl_pred<string mnemonic, RegisterClass RC, bit PredNot> { - let PredSense = !if(PredNot, "false", "true") in { + let isPredicatedFalse = PredNot in { defm _c#NAME : ld_idxd_shl_pbase<mnemonic, RC, PredNot, 0>; // Predicate new defm _cdn#NAME : ld_idxd_shl_pbase<mnemonic, RC, PredNot, 1>; @@ -527,78 +453,29 @@ def: Pat <(i64 (extloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))), // ST + //===----------------------------------------------------------------------===// /// -/// Assumptions::: ****** DO NOT IGNORE ******** -/// 1. Make sure that in post increment store, the zero'th operand is always the -/// post increment operand. -/// 2. Make sure that the store value operand(Rt/Rtt) in a store is always the -/// last operand. -/// - -// memd(Re=#U)=Rtt -let isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT in { -def STrid_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1), - (ins DoubleRegs:$src1, u0AlwaysExt:$src2), - "memd($dst1=##$src2) = $src1", - []>, - Requires<[HasV4T]>; - -// memb(Re=#U)=Rs -def STrib_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1), - (ins IntRegs:$src1, u0AlwaysExt:$src2), - "memb($dst1=##$src2) = $src1", - []>, - Requires<[HasV4T]>; - -// memh(Re=#U)=Rs -def STrih_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1), - (ins IntRegs:$src1, u0AlwaysExt:$src2), - "memh($dst1=##$src2) = $src1", - []>, - Requires<[HasV4T]>; - -// memw(Re=#U)=Rs -def STriw_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1), - (ins IntRegs:$src1, u0AlwaysExt:$src2), - "memw($dst1=##$src2) = $src1", - []>, - Requires<[HasV4T]>; -} - -// memd(Re=#U)=Rtt -let isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT in { -def STrid_abs_set_V4 : STInst2<(outs IntRegs:$dst1), - (ins DoubleRegs:$src1, globaladdressExt:$src2), - "memd($dst1=##$src2) = $src1", - []>, - Requires<[HasV4T]>; - -// memb(Re=#U)=Rs -def STrib_abs_set_V4 : STInst2<(outs IntRegs:$dst1), - (ins IntRegs:$src1, globaladdressExt:$src2), - "memb($dst1=##$src2) = $src1", +//===----------------------------------------------------------------------===// +// Template class for store instructions with Absolute set addressing mode. +//===----------------------------------------------------------------------===// +let isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT in +class T_ST_abs_set<string mnemonic, RegisterClass RC>: + STInst2<(outs IntRegs:$dst1), + (ins RC:$src1, u0AlwaysExt:$src2), + mnemonic#"($dst1=##$src2) = $src1", []>, Requires<[HasV4T]>; -// memh(Re=#U)=Rs -def STrih_abs_set_V4 : STInst2<(outs IntRegs:$dst1), - (ins IntRegs:$src1, globaladdressExt:$src2), - "memh($dst1=##$src2) = $src1", - []>, - Requires<[HasV4T]>; - -// memw(Re=#U)=Rs -def STriw_abs_set_V4 : STInst2<(outs IntRegs:$dst1), - (ins IntRegs:$src1, globaladdressExt:$src2), - "memw($dst1=##$src2) = $src1", - []>, - Requires<[HasV4T]>; -} +def STrid_abs_set_V4 : T_ST_abs_set <"memd", DoubleRegs>; +def STrib_abs_set_V4 : T_ST_abs_set <"memb", IntRegs>; +def STrih_abs_set_V4 : T_ST_abs_set <"memh", IntRegs>; +def STriw_abs_set_V4 : T_ST_abs_set <"memw", IntRegs>; +//===----------------------------------------------------------------------===// // multiclass for store instructions with base + register offset addressing // mode +//===----------------------------------------------------------------------===// multiclass ST_Idxd_shl_Pbase<string mnemonic, RegisterClass RC, bit isNot, bit isPredNew> { - let PNewValue = !if(isPredNew, "new", "") in + let isPredicatedNew = isPredNew in def NAME : STInst2<(outs), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, RC:$src5), @@ -609,7 +486,7 @@ multiclass ST_Idxd_shl_Pbase<string mnemonic, RegisterClass RC, bit isNot, } multiclass ST_Idxd_shl_Pred<string mnemonic, RegisterClass RC, bit PredNot> { - let PredSense = !if(PredNot, "false", "true") in { + let isPredicatedFalse = PredNot in { defm _c#NAME : ST_Idxd_shl_Pbase<mnemonic, RC, PredNot, 0>; // Predicate new defm _cdn#NAME : ST_Idxd_shl_Pbase<mnemonic, RC, PredNot, 1>; @@ -637,7 +514,7 @@ multiclass ST_Idxd_shl<string mnemonic, string CextOp, RegisterClass RC> { // addressing mode. multiclass ST_Idxd_shl_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot, bit isPredNew> { - let PNewValue = !if(isPredNew, "new", "") in + let isPredicatedNew = isPredNew in def NAME#_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, RC:$src5), @@ -648,7 +525,7 @@ multiclass ST_Idxd_shl_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot, } multiclass ST_Idxd_shl_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> { - let PredSense = !if(PredNot, "false", "true") in { + let isPredicatedFalse = PredNot in { defm _c#NAME : ST_Idxd_shl_Pbase_nv<mnemonic, RC, PredNot, 0>; // Predicate new defm _cdn#NAME : ST_Idxd_shl_Pbase_nv<mnemonic, RC, PredNot, 1>; @@ -711,17 +588,59 @@ def : Pat<(store (i64 DoubleRegs:$src4), u2ImmPred:$src3, DoubleRegs:$src4)>; } -// memd(Ru<<#u2+#U6)=Rtt -let isExtended = 1, opExtendable = 2, AddedComplexity = 10, -validSubTargets = HasV4SubT in -def STrid_shl_V4 : STInst<(outs), - (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, DoubleRegs:$src4), - "memd($src1<<#$src2+#$src3) = $src4", - [(store (i64 DoubleRegs:$src4), +let isExtended = 1, opExtendable = 2 in +class T_ST_LongOff <string mnemonic, PatFrag stOp, RegisterClass RC, ValueType VT> : + STInst<(outs), + (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, RC:$src4), + mnemonic#"($src1<<#$src2+##$src3) = $src4", + [(stOp (VT RC:$src4), (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2), u0AlwaysExtPred:$src3))]>, Requires<[HasV4T]>; +let isExtended = 1, opExtendable = 2, mayStore = 1, isNVStore = 1 in +class T_ST_LongOff_nv <string mnemonic> : + NVInst_V4<(outs), + (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4), + mnemonic#"($src1<<#$src2+##$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + +multiclass ST_LongOff <string mnemonic, string BaseOp, PatFrag stOp> { + let BaseOpcode = BaseOp#"_shl" in { + let isNVStorable = 1 in + def NAME#_V4 : T_ST_LongOff<mnemonic, stOp, IntRegs, i32>; + + def NAME#_nv_V4 : T_ST_LongOff_nv<mnemonic>; + } +} + +let AddedComplexity = 10, validSubTargets = HasV4SubT in { + def STrid_shl_V4 : T_ST_LongOff<"memd", store, DoubleRegs, i64>; + defm STrib_shl : ST_LongOff <"memb", "STrib", truncstorei8>, NewValueRel; + defm STrih_shl : ST_LongOff <"memh", "Strih", truncstorei16>, NewValueRel; + defm STriw_shl : ST_LongOff <"memw", "STriw", store>, NewValueRel; +} + +let AddedComplexity = 40 in +multiclass T_ST_LOff_Pats <InstHexagon I, RegisterClass RC, ValueType VT, + PatFrag stOp> { + def : Pat<(stOp (VT RC:$src4), + (add (shl IntRegs:$src1, u2ImmPred:$src2), + (NumUsesBelowThresCONST32 tglobaladdr:$src3))), + (I IntRegs:$src1, u2ImmPred:$src2, tglobaladdr:$src3, RC:$src4)>; + + def : Pat<(stOp (VT RC:$src4), + (add IntRegs:$src1, + (NumUsesBelowThresCONST32 tglobaladdr:$src3))), + (I IntRegs:$src1, 0, tglobaladdr:$src3, RC:$src4)>; +} + +defm : T_ST_LOff_Pats<STrid_shl_V4, DoubleRegs, i64, store>; +defm : T_ST_LOff_Pats<STriw_shl_V4, IntRegs, i32, store>; +defm : T_ST_LOff_Pats<STrib_shl_V4, IntRegs, i32, truncstorei8>; +defm : T_ST_LOff_Pats<STrih_shl_V4, IntRegs, i32, truncstorei16>; + // memd(Rx++#s4:3)=Rtt // memd(Rx++#s4:3:circ(Mu))=Rtt // memd(Rx++I:circ(Mu))=Rtt @@ -741,7 +660,7 @@ def STrid_shl_V4 : STInst<(outs), //===----------------------------------------------------------------------===// multiclass ST_Imm_Pbase<string mnemonic, Operand OffsetOp, bit isNot, bit isPredNew> { - let PNewValue = !if(isPredNew, "new", "") in + let isPredicatedNew = isPredNew in def NAME : STInst2<(outs), (ins PredRegs:$src1, IntRegs:$src2, OffsetOp:$src3, s6Ext:$src4), !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", @@ -751,7 +670,7 @@ multiclass ST_Imm_Pbase<string mnemonic, Operand OffsetOp, bit isNot, } multiclass ST_Imm_Pred<string mnemonic, Operand OffsetOp, bit PredNot> { - let PredSense = !if(PredNot, "false", "true") in { + let isPredicatedFalse = PredNot in { defm _c#NAME : ST_Imm_Pbase<mnemonic, OffsetOp, PredNot, 0>; // Predicate new defm _cdn#NAME : ST_Imm_Pbase<mnemonic, OffsetOp, PredNot, 1>; @@ -799,17 +718,6 @@ def : Pat <(truncstorei8 s8ExtPred:$src2, (i32 IntRegs:$src1)), (STrib_imm_V4 IntRegs:$src1, 0, s8ExtPred:$src2)>, Requires<[HasV4T]>; -// memb(Ru<<#u2+#U6)=Rt -let isExtended = 1, opExtendable = 2, AddedComplexity = 10, isNVStorable = 1, -validSubTargets = HasV4SubT in -def STrib_shl_V4 : STInst<(outs), - (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4), - "memb($src1<<#$src2+#$src3) = $src4", - [(truncstorei8 (i32 IntRegs:$src4), - (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2), - u0AlwaysExtPred:$src3))]>, - Requires<[HasV4T]>; - // memb(Rx++#s4:0:circ(Mu))=Rt // memb(Rx++I:circ(Mu))=Rt // memb(Rx++Mu)=Rt @@ -830,17 +738,6 @@ def : Pat <(truncstorei16 s8ExtPred:$src2, (i32 IntRegs:$src1)), // TODO: needs to be implemented. // memh(Ru<<#u2+#U6)=Rt.H -// memh(Ru<<#u2+#U6)=Rt -let isExtended = 1, opExtendable = 2, AddedComplexity = 10, isNVStorable = 1, -validSubTargets = HasV4SubT in -def STrih_shl_V4 : STInst<(outs), - (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4), - "memh($src1<<#$src2+#$src3) = $src4", - [(truncstorei16 (i32 IntRegs:$src4), - (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2), - u0AlwaysExtPred:$src3))]>, - Requires<[HasV4T]>; - // memh(Rx++#s4:1:circ(Mu))=Rt.H // memh(Rx++#s4:1:circ(Mu))=Rt // memh(Rx++I:circ(Mu))=Rt.H @@ -877,17 +774,6 @@ def : Pat <(store s8ExtPred:$src2, (i32 IntRegs:$src1)), (STriw_imm_V4 IntRegs:$src1, 0, s8ExtPred:$src2)>, Requires<[HasV4T]>; -// memw(Ru<<#u2+#U6)=Rt -let isExtended = 1, opExtendable = 2, AddedComplexity = 10, isNVStorable = 1, -validSubTargets = HasV4SubT in -def STriw_shl_V4 : STInst<(outs), - (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4), - "memw($src1<<#$src2+#$src3) = $src4", - [(store (i32 IntRegs:$src4), - (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2), - u0AlwaysExtPred:$src3))]>, - Requires<[HasV4T]>; - // memw(Rx++#s4:2)=Rt // memw(Rx++#s4:2:circ(Mu))=Rt // memw(Rx++I:circ(Mu))=Rt @@ -907,7 +793,7 @@ def STriw_shl_V4 : STInst<(outs), // multiclass ST_Idxd_Pbase_nv<string mnemonic, RegisterClass RC, Operand predImmOp, bit isNot, bit isPredNew> { - let PNewValue = !if(isPredNew, "new", "") in + let isPredicatedNew = isPredNew in def NAME#_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3, RC: $src4), !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", @@ -918,7 +804,7 @@ multiclass ST_Idxd_Pbase_nv<string mnemonic, RegisterClass RC, multiclass ST_Idxd_Pred_nv<string mnemonic, RegisterClass RC, Operand predImmOp, bit PredNot> { - let PredSense = !if(PredNot, "false", "true") in { + let isPredicatedFalse = PredNot in { defm _c#NAME : ST_Idxd_Pbase_nv<mnemonic, RC, predImmOp, PredNot, 0>; // Predicate new defm _cdn#NAME : ST_Idxd_Pbase_nv<mnemonic, RC, predImmOp, PredNot, 1>; @@ -960,7 +846,7 @@ let addrMode = BaseImmOffset, validSubTargets = HasV4SubT in { // and MEMri operand. multiclass ST_MEMri_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot, bit isPredNew> { - let PNewValue = !if(isPredNew, "new", "") in + let isPredicatedNew = isPredNew in def NAME#_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, MEMri:$addr, RC: $src2), !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", @@ -970,7 +856,7 @@ multiclass ST_MEMri_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot, } multiclass ST_MEMri_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> { - let PredSense = !if(PredNot, "false", "true") in { + let isPredicatedFalse = PredNot in { defm _c#NAME : ST_MEMri_Pbase_nv<mnemonic, RC, PredNot, 0>; // Predicate new @@ -1006,15 +892,6 @@ mayStore = 1 in { defm STriw: ST_MEMri_nv<"memw", "STriw", IntRegs, 13, 8>, AddrModeRel; } -// memb(Ru<<#u2+#U6)=Nt.new -let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10, -isNVStore = 1, validSubTargets = HasV4SubT in -def STrib_shl_nv_V4 : NVInst_V4<(outs), - (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4), - "memb($src1<<#$src2+#$src3) = $src4.new", - []>, - Requires<[HasV4T]>; - //===----------------------------------------------------------------------===// // Post increment store // mem[bhwd](Rx++#s4:[0123])=Nt.new @@ -1022,7 +899,7 @@ def STrib_shl_nv_V4 : NVInst_V4<(outs), multiclass ST_PostInc_Pbase_nv<string mnemonic, RegisterClass RC, Operand ImmOp, bit isNot, bit isPredNew> { - let PNewValue = !if(isPredNew, "new", "") in + let isPredicatedNew = isPredNew in def NAME#_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset, RC:$src3), !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", @@ -1034,7 +911,7 @@ multiclass ST_PostInc_Pbase_nv<string mnemonic, RegisterClass RC, Operand ImmOp, multiclass ST_PostInc_Pred_nv<string mnemonic, RegisterClass RC, Operand ImmOp, bit PredNot> { - let PredSense = !if(PredNot, "false", "true") in { + let isPredicatedFalse = PredNot in { defm _c#NAME : ST_PostInc_Pbase_nv<mnemonic, RC, ImmOp, PredNot, 0>; // Predicate new let Predicates = [HasV4T], validSubTargets = HasV4SubT in @@ -1072,29 +949,11 @@ defm POST_STwri: ST_PostInc_nv <"memw", "STriw", IntRegs, s4_2Imm>, AddrModeRel; // memb(Rx++I:circ(Mu))=Nt.new // memb(Rx++Mu)=Nt.new // memb(Rx++Mu:brev)=Nt.new -// memh(Ru<<#u2+#U6)=Nt.new -let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10, -isNVStore = 1, validSubTargets = HasV4SubT in -def STrih_shl_nv_V4 : NVInst_V4<(outs), - (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4), - "memh($src1<<#$src2+#$src3) = $src4.new", - []>, - Requires<[HasV4T]>; - // memh(Rx++#s4:1:circ(Mu))=Nt.new // memh(Rx++I:circ(Mu))=Nt.new // memh(Rx++Mu)=Nt.new // memh(Rx++Mu:brev)=Nt.new -// memw(Ru<<#u2+#U6)=Nt.new -let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10, -isNVStore = 1, validSubTargets = HasV4SubT in -def STriw_shl_nv_V4 : NVInst_V4<(outs), - (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4), - "memw($src1<<#$src2+#$src3) = $src4.new", - []>, - Requires<[HasV4T]>; - // memw(Rx++#s4:2:circ(Mu))=Nt.new // memw(Rx++I:circ(Mu))=Nt.new // memw(Rx++Mu)=Nt.new @@ -1108,179 +967,193 @@ def STriw_shl_nv_V4 : NVInst_V4<(outs), // NV/J + //===----------------------------------------------------------------------===// -multiclass NVJ_type_basic_reg<string NotStr, string OpcStr, string TakenStr> { - def _ie_nv_V4 : NVInst_V4<(outs), - (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset), - !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr, - !strconcat("($src1.new, $src2)) jump:", - !strconcat(TakenStr, " $offset"))))), - []>, - Requires<[HasV4T]>; +//===----------------------------------------------------------------------===// +// multiclass/template class for the new-value compare jumps with the register +// operands. +//===----------------------------------------------------------------------===// - def _nv_V4 : NVInst_V4<(outs), - (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset), - !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr, - !strconcat("($src1.new, $src2)) jump:", - !strconcat(TakenStr, " $offset"))))), - []>, - Requires<[HasV4T]>; -} +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 11 in +class NVJrr_template<string mnemonic, bits<3> majOp, bit NvOpNum, + bit isNegCond, bit isTaken> + : NVInst_V4<(outs), + (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset), + "if ("#!if(isNegCond, "!","")#mnemonic# + "($src1"#!if(!eq(NvOpNum, 0),".new, ",", ")# + "$src2"#!if(!eq(NvOpNum, 1),".new))","))")#" jump:" + #!if(isTaken, "t","nt")#" $offset", + []>, Requires<[HasV4T]> { -multiclass NVJ_type_basic_2ndDotNew<string NotStr, string OpcStr, - string TakenStr> { - def _ie_nv_V4 : NVInst_V4<(outs), - (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset), - !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr, - !strconcat("($src1, $src2.new)) jump:", - !strconcat(TakenStr, " $offset"))))), - []>, - Requires<[HasV4T]>; + bits<5> src1; + bits<5> src2; + bits<3> Ns; // New-Value Operand + bits<5> RegOp; // Non New-Value Operand + bits<11> offset; - def _nv_V4 : NVInst_V4<(outs), - (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset), - !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr, - !strconcat("($src1, $src2.new)) jump:", - !strconcat(TakenStr, " $offset"))))), - []>, - Requires<[HasV4T]>; -} + let isBrTaken = !if(isTaken, "true", "false"); + let isPredicatedFalse = isNegCond; -multiclass NVJ_type_basic_imm<string NotStr, string OpcStr, string TakenStr> { - def _ie_nv_V4 : NVInst_V4<(outs), - (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset), - !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr, - !strconcat("($src1.new, #$src2)) jump:", - !strconcat(TakenStr, " $offset"))))), - []>, - Requires<[HasV4T]>; + let Ns = !if(!eq(NvOpNum, 0), src1{2-0}, src2{2-0}); + let RegOp = !if(!eq(NvOpNum, 0), src2, src1); - def _nv_V4 : NVInst_V4<(outs), - (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset), - !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr, - !strconcat("($src1.new, #$src2)) jump:", - !strconcat(TakenStr, " $offset"))))), - []>, - Requires<[HasV4T]>; + let IClass = 0b0010; + let Inst{26} = 0b0; + let Inst{25-23} = majOp; + let Inst{22} = isNegCond; + let Inst{18-16} = Ns; + let Inst{13} = isTaken; + let Inst{12-8} = RegOp; + let Inst{21-20} = offset{10-9}; + let Inst{7-1} = offset{8-2}; } -multiclass NVJ_type_basic_neg<string NotStr, string OpcStr, string TakenStr> { - def _ie_nv_V4 : NVInst_V4<(outs), - (ins IntRegs:$src1, nOneImm:$src2, brtarget:$offset), - !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr, - !strconcat("($src1.new, #$src2)) jump:", - !strconcat(TakenStr, " $offset"))))), - []>, - Requires<[HasV4T]>; - def _nv_V4 : NVInst_V4<(outs), - (ins IntRegs:$src1, nOneImm:$src2, brtarget:$offset), - !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr, - !strconcat("($src1.new, #$src2)) jump:", - !strconcat(TakenStr, " $offset"))))), - []>, - Requires<[HasV4T]>; +multiclass NVJrr_cond<string mnemonic, bits<3> majOp, bit NvOpNum, + bit isNegCond> { + // Branch not taken: + def _nt_V4: NVJrr_template<mnemonic, majOp, NvOpNum, isNegCond, 0>; + // Branch taken: + def _t_V4: NVJrr_template<mnemonic, majOp, NvOpNum, isNegCond, 1>; } -multiclass NVJ_type_basic_tstbit<string NotStr, string OpcStr, - string TakenStr> { - def _ie_nv_V4 : NVInst_V4<(outs), - (ins IntRegs:$src1, u1Imm:$src2, brtarget:$offset), - !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr, - !strconcat("($src1.new, #$src2)) jump:", - !strconcat(TakenStr, " $offset"))))), - []>, - Requires<[HasV4T]>; +// NvOpNum = 0 -> First Operand is a new-value Register +// NvOpNum = 1 -> Second Operand is a new-value Register - def _nv_V4 : NVInst_V4<(outs), - (ins IntRegs:$src1, u1Imm:$src2, brtarget:$offset), - !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr, - !strconcat("($src1.new, #$src2)) jump:", - !strconcat(TakenStr, " $offset"))))), - []>, - Requires<[HasV4T]>; +multiclass NVJrr_base<string mnemonic, string BaseOp, bits<3> majOp, + bit NvOpNum> { + let BaseOpcode = BaseOp#_NVJ in { + defm _t_Jumpnv : NVJrr_cond<mnemonic, majOp, NvOpNum, 0>; // True cond + defm _f_Jumpnv : NVJrr_cond<mnemonic, majOp, NvOpNum, 1>; // False cond + } } -// Multiclass for regular dot new of Ist operand register. -multiclass NVJ_type_br_pred_reg<string NotStr, string OpcStr> { - defm Pt : NVJ_type_basic_reg<NotStr, OpcStr, "t">; - defm Pnt : NVJ_type_basic_reg<NotStr, OpcStr, "nt">; -} +// if ([!]cmp.eq(Ns.new,Rt)) jump:[n]t #r9:2 +// if ([!]cmp.gt(Ns.new,Rt)) jump:[n]t #r9:2 +// if ([!]cmp.gtu(Ns.new,Rt)) jump:[n]t #r9:2 +// if ([!]cmp.gt(Rt,Ns.new)) jump:[n]t #r9:2 +// if ([!]cmp.gtu(Rt,Ns.new)) jump:[n]t #r9:2 -// Multiclass for dot new of 2nd operand register. -multiclass NVJ_type_br_pred_2ndDotNew<string NotStr, string OpcStr> { - defm Pt : NVJ_type_basic_2ndDotNew<NotStr, OpcStr, "t">; - defm Pnt : NVJ_type_basic_2ndDotNew<NotStr, OpcStr, "nt">; +let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1, + Defs = [PC], neverHasSideEffects = 1, validSubTargets = HasV4SubT in { + defm CMPEQrr : NVJrr_base<"cmp.eq", "CMPEQ", 0b000, 0>, PredRel; + defm CMPGTrr : NVJrr_base<"cmp.gt", "CMPGT", 0b001, 0>, PredRel; + defm CMPGTUrr : NVJrr_base<"cmp.gtu", "CMPGTU", 0b010, 0>, PredRel; + defm CMPLTrr : NVJrr_base<"cmp.gt", "CMPLT", 0b011, 1>, PredRel; + defm CMPLTUrr : NVJrr_base<"cmp.gtu", "CMPLTU", 0b100, 1>, PredRel; } -// Multiclass for 2nd operand immediate, including -1. -multiclass NVJ_type_br_pred_imm<string NotStr, string OpcStr> { - defm Pt : NVJ_type_basic_imm<NotStr, OpcStr, "t">; - defm Pnt : NVJ_type_basic_imm<NotStr, OpcStr, "nt">; - defm Ptneg : NVJ_type_basic_neg<NotStr, OpcStr, "t">; - defm Pntneg : NVJ_type_basic_neg<NotStr, OpcStr, "nt">; -} +//===----------------------------------------------------------------------===// +// multiclass/template class for the new-value compare jumps instruction +// with a register and an unsigned immediate (U5) operand. +//===----------------------------------------------------------------------===// -// Multiclass for 2nd operand immediate, excluding -1. -multiclass NVJ_type_br_pred_imm_only<string NotStr, string OpcStr> { - defm Pt : NVJ_type_basic_imm<NotStr, OpcStr, "t">; - defm Pnt : NVJ_type_basic_imm<NotStr, OpcStr, "nt">; -} +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 11 in +class NVJri_template<string mnemonic, bits<3> majOp, bit isNegCond, + bit isTaken> + : NVInst_V4<(outs), + (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset), + "if ("#!if(isNegCond, "!","")#mnemonic#"($src1.new, #$src2)) jump:" + #!if(isTaken, "t","nt")#" $offset", + []>, Requires<[HasV4T]> { -// Multiclass for tstbit, where 2nd operand is always #0. -multiclass NVJ_type_br_pred_tstbit<string NotStr, string OpcStr> { - defm Pt : NVJ_type_basic_tstbit<NotStr, OpcStr, "t">; - defm Pnt : NVJ_type_basic_tstbit<NotStr, OpcStr, "nt">; + let isPredicatedFalse = isNegCond; + let isBrTaken = !if(isTaken, "true", "false"); + + bits<3> src1; + bits<5> src2; + bits<11> offset; + + let IClass = 0b0010; + let Inst{26} = 0b1; + let Inst{25-23} = majOp; + let Inst{22} = isNegCond; + let Inst{18-16} = src1; + let Inst{13} = isTaken; + let Inst{12-8} = src2; + let Inst{21-20} = offset{10-9}; + let Inst{7-1} = offset{8-2}; } -// Multiclass for GT. -multiclass NVJ_type_rr_ri<string OpcStr> { - defm rrNot : NVJ_type_br_pred_reg<"!", OpcStr>; - defm rr : NVJ_type_br_pred_reg<"", OpcStr>; - defm rrdnNot : NVJ_type_br_pred_2ndDotNew<"!", OpcStr>; - defm rrdn : NVJ_type_br_pred_2ndDotNew<"", OpcStr>; - defm riNot : NVJ_type_br_pred_imm<"!", OpcStr>; - defm ri : NVJ_type_br_pred_imm<"", OpcStr>; +multiclass NVJri_cond<string mnemonic, bits<3> majOp, bit isNegCond> { + // Branch not taken: + def _nt_V4: NVJri_template<mnemonic, majOp, isNegCond, 0>; + // Branch taken: + def _t_V4: NVJri_template<mnemonic, majOp, isNegCond, 1>; } -// Multiclass for EQ. -multiclass NVJ_type_rr_ri_no_2ndDotNew<string OpcStr> { - defm rrNot : NVJ_type_br_pred_reg<"!", OpcStr>; - defm rr : NVJ_type_br_pred_reg<"", OpcStr>; - defm riNot : NVJ_type_br_pred_imm<"!", OpcStr>; - defm ri : NVJ_type_br_pred_imm<"", OpcStr>; +multiclass NVJri_base<string mnemonic, string BaseOp, bits<3> majOp> { + let BaseOpcode = BaseOp#_NVJri in { + defm _t_Jumpnv : NVJri_cond<mnemonic, majOp, 0>; // True Cond + defm _f_Jumpnv : NVJri_cond<mnemonic, majOp, 1>; // False cond + } } -// Multiclass for GTU. -multiclass NVJ_type_rr_ri_no_nOne<string OpcStr> { - defm rrNot : NVJ_type_br_pred_reg<"!", OpcStr>; - defm rr : NVJ_type_br_pred_reg<"", OpcStr>; - defm rrdnNot : NVJ_type_br_pred_2ndDotNew<"!", OpcStr>; - defm rrdn : NVJ_type_br_pred_2ndDotNew<"", OpcStr>; - defm riNot : NVJ_type_br_pred_imm_only<"!", OpcStr>; - defm ri : NVJ_type_br_pred_imm_only<"", OpcStr>; +// if ([!]cmp.eq(Ns.new,#U5)) jump:[n]t #r9:2 +// if ([!]cmp.gt(Ns.new,#U5)) jump:[n]t #r9:2 +// if ([!]cmp.gtu(Ns.new,#U5)) jump:[n]t #r9:2 + +let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1, + Defs = [PC], neverHasSideEffects = 1, validSubTargets = HasV4SubT in { + defm CMPEQri : NVJri_base<"cmp.eq", "CMPEQ", 0b000>, PredRel; + defm CMPGTri : NVJri_base<"cmp.gt", "CMPGT", 0b001>, PredRel; + defm CMPGTUri : NVJri_base<"cmp.gtu", "CMPGTU", 0b010>, PredRel; } -// Multiclass for tstbit. -multiclass NVJ_type_r0<string OpcStr> { - defm r0Not : NVJ_type_br_pred_tstbit<"!", OpcStr>; - defm r0 : NVJ_type_br_pred_tstbit<"", OpcStr>; - } +//===----------------------------------------------------------------------===// +// multiclass/template class for the new-value compare jumps instruction +// with a register and an hardcoded 0/-1 immediate value. +//===----------------------------------------------------------------------===// + +let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 11 in +class NVJ_ConstImm_template<string mnemonic, bits<3> majOp, string ImmVal, + bit isNegCond, bit isTaken> + : NVInst_V4<(outs), + (ins IntRegs:$src1, brtarget:$offset), + "if ("#!if(isNegCond, "!","")#mnemonic + #"($src1.new, #"#ImmVal#")) jump:" + #!if(isTaken, "t","nt")#" $offset", + []>, Requires<[HasV4T]> { -// Base Multiclass for New Value Jump. -multiclass NVJ_type { - defm GT : NVJ_type_rr_ri<"cmp.gt">; - defm EQ : NVJ_type_rr_ri_no_2ndDotNew<"cmp.eq">; - defm GTU : NVJ_type_rr_ri_no_nOne<"cmp.gtu">; - defm TSTBIT : NVJ_type_r0<"tstbit">; + let isPredicatedFalse = isNegCond; + let isBrTaken = !if(isTaken, "true", "false"); + + bits<3> src1; + bits<11> offset; + let IClass = 0b0010; + let Inst{26} = 0b1; + let Inst{25-23} = majOp; + let Inst{22} = isNegCond; + let Inst{18-16} = src1; + let Inst{13} = isTaken; + let Inst{21-20} = offset{10-9}; + let Inst{7-1} = offset{8-2}; } -let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in { - defm JMP_ : NVJ_type; +multiclass NVJ_ConstImm_cond<string mnemonic, bits<3> majOp, string ImmVal, + bit isNegCond> { + // Branch not taken: + def _nt_V4: NVJ_ConstImm_template<mnemonic, majOp, ImmVal, isNegCond, 0>; + // Branch taken: + def _t_V4: NVJ_ConstImm_template<mnemonic, majOp, ImmVal, isNegCond, 1>; } -//===----------------------------------------------------------------------===// -// NV/J - -//===----------------------------------------------------------------------===// +multiclass NVJ_ConstImm_base<string mnemonic, string BaseOp, bits<3> majOp, + string ImmVal> { + let BaseOpcode = BaseOp#_NVJ_ConstImm in { + defm _t_Jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 0>; // True cond + defm _f_Jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 1>; // False Cond + } +} + +// if ([!]tstbit(Ns.new,#0)) jump:[n]t #r9:2 +// if ([!]cmp.eq(Ns.new,#-1)) jump:[n]t #r9:2 +// if ([!]cmp.gt(Ns.new,#-1)) jump:[n]t #r9:2 + +let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator=1, + Defs = [PC], neverHasSideEffects = 1 in { + defm TSTBIT0 : NVJ_ConstImm_base<"tstbit", "TSTBIT", 0b011, "0">, PredRel; + defm CMPEQn1 : NVJ_ConstImm_base<"cmp.eq", "CMPEQ", 0b100, "-1">, PredRel; + defm CMPGTn1 : NVJ_ConstImm_base<"cmp.gt", "CMPGT", 0b101, "-1">, PredRel; +} //===----------------------------------------------------------------------===// // XTYPE/ALU + @@ -2286,7 +2159,7 @@ def CMPbEQri_V4 : MInst<(outs PredRegs:$dst), def : Pat <(brcond (i1 (setne (and (i32 IntRegs:$src1), 255), u8ImmPred:$src2)), bb:$offset), - (JMP_cNot (CMPbEQri_V4 (i32 IntRegs:$src1), u8ImmPred:$src2), + (JMP_f (CMPbEQri_V4 (i32 IntRegs:$src1), u8ImmPred:$src2), bb:$offset)>, Requires<[HasV4T]>; @@ -2769,9 +2642,9 @@ let isReturn = 1, isTerminator = 1, multiclass ST_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot, bit isPredNew> { - let PNewValue = !if(isPredNew, "new", "") in + let isPredicatedNew = isPredNew in def NAME#_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdressExt:$absaddr, RC: $src2), + (ins PredRegs:$src1, u0AlwaysExt:$absaddr, RC: $src2), !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", ") ")#mnemonic#"(##$absaddr) = $src2", []>, @@ -2779,7 +2652,7 @@ multiclass ST_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot, } multiclass ST_Abs_Pred<string mnemonic, RegisterClass RC, bit PredNot> { - let PredSense = !if(PredNot, "false", "true") in { + let isPredicatedFalse = PredNot in { defm _c#NAME : ST_Abs_Predbase<mnemonic, RC, PredNot, 0>; // Predicate new defm _cdn#NAME : ST_Abs_Predbase<mnemonic, RC, PredNot, 1>; @@ -2791,7 +2664,7 @@ multiclass ST_Abs<string mnemonic, string CextOp, RegisterClass RC> { let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in { let opExtendable = 0, isPredicable = 1 in def NAME#_V4 : STInst2<(outs), - (ins globaladdressExt:$absaddr, RC:$src), + (ins u0AlwaysExt:$absaddr, RC:$src), mnemonic#"(##$absaddr) = $src", []>, Requires<[HasV4T]>; @@ -2805,9 +2678,9 @@ multiclass ST_Abs<string mnemonic, string CextOp, RegisterClass RC> { multiclass ST_Abs_Predbase_nv<string mnemonic, RegisterClass RC, bit isNot, bit isPredNew> { - let PNewValue = !if(isPredNew, "new", "") in + let isPredicatedNew = isPredNew in def NAME#_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdressExt:$absaddr, RC: $src2), + (ins PredRegs:$src1, u0AlwaysExt:$absaddr, RC: $src2), !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", ") ")#mnemonic#"(##$absaddr) = $src2.new", []>, @@ -2815,7 +2688,7 @@ multiclass ST_Abs_Predbase_nv<string mnemonic, RegisterClass RC, bit isNot, } multiclass ST_Abs_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> { - let PredSense = !if(PredNot, "false", "true") in { + let isPredicatedFalse = PredNot in { defm _c#NAME : ST_Abs_Predbase_nv<mnemonic, RC, PredNot, 0>; // Predicate new defm _cdn#NAME : ST_Abs_Predbase_nv<mnemonic, RC, PredNot, 1>; @@ -2827,7 +2700,7 @@ multiclass ST_Abs_nv<string mnemonic, string CextOp, RegisterClass RC> { let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in { let opExtendable = 0, isPredicable = 1 in def NAME#_nv_V4 : NVInst_V4<(outs), - (ins globaladdressExt:$absaddr, RC:$src), + (ins u0AlwaysExt:$absaddr, RC:$src), mnemonic#"(##$absaddr) = $src.new", []>, Requires<[HasV4T]>; @@ -2840,16 +2713,19 @@ multiclass ST_Abs_nv<string mnemonic, string CextOp, RegisterClass RC> { } let addrMode = Absolute in { + let accessSize = ByteAccess in defm STrib_abs : ST_Abs<"memb", "STrib", IntRegs>, ST_Abs_nv<"memb", "STrib", IntRegs>, AddrModeRel; + let accessSize = HalfWordAccess in defm STrih_abs : ST_Abs<"memh", "STrih", IntRegs>, ST_Abs_nv<"memh", "STrih", IntRegs>, AddrModeRel; + let accessSize = WordAccess in defm STriw_abs : ST_Abs<"memw", "STriw", IntRegs>, ST_Abs_nv<"memw", "STriw", IntRegs>, AddrModeRel; - let isNVStorable = 0 in + let accessSize = DoubleWordAccess, isNVStorable = 0 in defm STrid_abs : ST_Abs<"memd", "STrid", DoubleRegs>, AddrModeRel; } @@ -2875,6 +2751,7 @@ def : Pat<(store (i64 DoubleRegs:$src1), // mem[bhwd](#global)=Rt // if ([!]Pv[.new]) mem[bhwd](##global) = Rt //===----------------------------------------------------------------------===// +let mayStore = 1, isNVStorable = 1 in multiclass ST_GP<string mnemonic, string BaseOp, RegisterClass RC> { let BaseOpcode = BaseOp, isPredicable = 1 in def NAME#_V4 : STInst2<(outs), @@ -2909,15 +2786,16 @@ multiclass ST_GP_nv<string mnemonic, string BaseOp, RegisterClass RC> { } } -let validSubTargets = HasV4SubT, validSubTargets = HasV4SubT in { -defm STd_GP : ST_GP <"memd", "STd_GP", DoubleRegs>, - ST_GP_nv<"memd", "STd_GP", DoubleRegs>, NewValueRel ; -defm STb_GP : ST_GP<"memb", "STb_GP", IntRegs>, - ST_GP_nv<"memb", "STb_GP", IntRegs>, NewValueRel ; -defm STh_GP : ST_GP<"memh", "STh_GP", IntRegs>, - ST_GP_nv<"memh", "STh_GP", IntRegs>, NewValueRel ; -defm STw_GP : ST_GP<"memw", "STw_GP", IntRegs>, - ST_GP_nv<"memw", "STw_GP", IntRegs>, NewValueRel ; +let validSubTargets = HasV4SubT, neverHasSideEffects = 1 in { + let isNVStorable = 0 in + defm STd_GP : ST_GP <"memd", "STd_GP", DoubleRegs>, PredNewRel; + + defm STb_GP : ST_GP<"memb", "STb_GP", IntRegs>, + ST_GP_nv<"memb", "STb_GP", IntRegs>, NewValueRel; + defm STh_GP : ST_GP<"memh", "STh_GP", IntRegs>, + ST_GP_nv<"memh", "STh_GP", IntRegs>, NewValueRel; + defm STw_GP : ST_GP<"memw", "STw_GP", IntRegs>, + ST_GP_nv<"memw", "STw_GP", IntRegs>, NewValueRel; } // 64 bit atomic store @@ -2974,9 +2852,9 @@ def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)), //===----------------------------------------------------------------------===// multiclass LD_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot, bit isPredNew> { - let PNewValue = !if(isPredNew, "new", "") in + let isPredicatedNew = isPredNew in def NAME : LDInst2<(outs RC:$dst), - (ins PredRegs:$src1, globaladdressExt:$absaddr), + (ins PredRegs:$src1, u0AlwaysExt:$absaddr), !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", ") ")#"$dst = "#mnemonic#"(##$absaddr)", []>, @@ -2984,7 +2862,7 @@ multiclass LD_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot, } multiclass LD_Abs_Pred<string mnemonic, RegisterClass RC, bit PredNot> { - let PredSense = !if(PredNot, "false", "true") in { + let isPredicatedFalse = PredNot in { defm _c#NAME : LD_Abs_Predbase<mnemonic, RC, PredNot, 0>; // Predicate new defm _cdn#NAME : LD_Abs_Predbase<mnemonic, RC, PredNot, 1>; @@ -2996,7 +2874,7 @@ multiclass LD_Abs<string mnemonic, string CextOp, RegisterClass RC> { let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in { let opExtendable = 1, isPredicable = 1 in def NAME#_V4 : LDInst2<(outs RC:$dst), - (ins globaladdressExt:$absaddr), + (ins u0AlwaysExt:$absaddr), "$dst = "#mnemonic#"(##$absaddr)", []>, Requires<[HasV4T]>; @@ -3009,33 +2887,37 @@ multiclass LD_Abs<string mnemonic, string CextOp, RegisterClass RC> { } let addrMode = Absolute in { + let accessSize = ByteAccess in { defm LDrib_abs : LD_Abs<"memb", "LDrib", IntRegs>, AddrModeRel; defm LDriub_abs : LD_Abs<"memub", "LDriub", IntRegs>, AddrModeRel; + } + let accessSize = HalfWordAccess in { defm LDrih_abs : LD_Abs<"memh", "LDrih", IntRegs>, AddrModeRel; defm LDriuh_abs : LD_Abs<"memuh", "LDriuh", IntRegs>, AddrModeRel; + } + let accessSize = WordAccess in defm LDriw_abs : LD_Abs<"memw", "LDriw", IntRegs>, AddrModeRel; + + let accessSize = DoubleWordAccess in defm LDrid_abs : LD_Abs<"memd", "LDrid", DoubleRegs>, AddrModeRel; } -let Predicates = [HasV4T], AddedComplexity = 30 in +let Predicates = [HasV4T], AddedComplexity = 30 in { def : Pat<(i32 (load (HexagonCONST32 tglobaladdr:$absaddr))), (LDriw_abs_V4 tglobaladdr: $absaddr)>; -let Predicates = [HasV4T], AddedComplexity=30 in def : Pat<(i32 (sextloadi8 (HexagonCONST32 tglobaladdr:$absaddr))), (LDrib_abs_V4 tglobaladdr:$absaddr)>; -let Predicates = [HasV4T], AddedComplexity=30 in def : Pat<(i32 (zextloadi8 (HexagonCONST32 tglobaladdr:$absaddr))), (LDriub_abs_V4 tglobaladdr:$absaddr)>; -let Predicates = [HasV4T], AddedComplexity=30 in def : Pat<(i32 (sextloadi16 (HexagonCONST32 tglobaladdr:$absaddr))), (LDrih_abs_V4 tglobaladdr:$absaddr)>; -let Predicates = [HasV4T], AddedComplexity=30 in def : Pat<(i32 (zextloadi16 (HexagonCONST32 tglobaladdr:$absaddr))), (LDriuh_abs_V4 tglobaladdr:$absaddr)>; +} //===----------------------------------------------------------------------===// // multiclass for load instructions with GP-relative addressing mode. @@ -3058,12 +2940,12 @@ multiclass LD_GP<string mnemonic, string BaseOp, RegisterClass RC> { } } -defm LDd_GP : LD_GP<"memd", "LDd_GP", DoubleRegs>; -defm LDb_GP : LD_GP<"memb", "LDb_GP", IntRegs>; -defm LDub_GP : LD_GP<"memub", "LDub_GP", IntRegs>; -defm LDh_GP : LD_GP<"memh", "LDh_GP", IntRegs>; -defm LDuh_GP : LD_GP<"memuh", "LDuh_GP", IntRegs>; -defm LDw_GP : LD_GP<"memw", "LDw_GP", IntRegs>; +defm LDd_GP : LD_GP<"memd", "LDd_GP", DoubleRegs>, PredNewRel; +defm LDb_GP : LD_GP<"memb", "LDb_GP", IntRegs>, PredNewRel; +defm LDub_GP : LD_GP<"memub", "LDub_GP", IntRegs>, PredNewRel; +defm LDh_GP : LD_GP<"memh", "LDh_GP", IntRegs>, PredNewRel; +defm LDuh_GP : LD_GP<"memuh", "LDuh_GP", IntRegs>, PredNewRel; +defm LDw_GP : LD_GP<"memw", "LDw_GP", IntRegs>, PredNewRel; def : Pat <(atomic_load_64 (HexagonCONST32_GP tglobaladdr:$global)), (i64 (LDd_GP_V4 tglobaladdr:$global))>; @@ -3139,9 +3021,10 @@ def : Pat <(i32 (load (HexagonCONST32_GP tglobaladdr:$global))), // Transfer global address into a register -let AddedComplexity=50, isMoveImm = 1, isReMaterializable = 1 in -def TFRI_V4 : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$src1), - "$dst = ##$src1", +let isExtended = 1, opExtendable = 1, AddedComplexity=50, isMoveImm = 1, +isAsCheapAsAMove = 1, isReMaterializable = 1, validSubTargets = HasV4SubT in +def TFRI_V4 : ALU32_ri<(outs IntRegs:$dst), (ins s16Ext:$src1), + "$dst = #$src1", [(set IntRegs:$dst, (HexagonCONST32 tglobaladdr:$src1))]>, Requires<[HasV4T]>; @@ -3185,19 +3068,21 @@ def : Pat<(HexagonCONST32_GP tglobaladdr:$src1), // Load - Indirect with long offset: These instructions take global address // as an operand -let AddedComplexity = 10 in +let isExtended = 1, opExtendable = 3, AddedComplexity = 40, +validSubTargets = HasV4SubT in def LDrid_ind_lo_V4 : LDInst<(outs DoubleRegs:$dst), - (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$offset), + (ins IntRegs:$src1, u2Imm:$src2, globaladdressExt:$offset), "$dst=memd($src1<<#$src2+##$offset)", [(set (i64 DoubleRegs:$dst), (load (add (shl IntRegs:$src1, u2ImmPred:$src2), (HexagonCONST32 tglobaladdr:$offset))))]>, Requires<[HasV4T]>; -let AddedComplexity = 10 in +let AddedComplexity = 40 in multiclass LD_indirect_lo<string OpcStr, PatFrag OpNode> { +let isExtended = 1, opExtendable = 3, validSubTargets = HasV4SubT in def _lo_V4 : LDInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$offset), + (ins IntRegs:$src1, u2Imm:$src2, globaladdressExt:$offset), !strconcat("$dst = ", !strconcat(OpcStr, "($src1<<#$src2+##$offset)")), [(set IntRegs:$dst, @@ -3208,202 +3093,53 @@ multiclass LD_indirect_lo<string OpcStr, PatFrag OpNode> { defm LDrib_ind : LD_indirect_lo<"memb", sextloadi8>; defm LDriub_ind : LD_indirect_lo<"memub", zextloadi8>; +defm LDriub_ind_anyext : LD_indirect_lo<"memub", extloadi8>; defm LDrih_ind : LD_indirect_lo<"memh", sextloadi16>; defm LDriuh_ind : LD_indirect_lo<"memuh", zextloadi16>; +defm LDriuh_ind_anyext : LD_indirect_lo<"memuh", extloadi16>; defm LDriw_ind : LD_indirect_lo<"memw", load>; -// Store - Indirect with long offset: These instructions take global address -// as an operand -let AddedComplexity = 10 in -def STrid_ind_lo_V4 : STInst<(outs), - (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$src3, - DoubleRegs:$src4), - "memd($src1<<#$src2+#$src3) = $src4", - [(store (i64 DoubleRegs:$src4), - (add (shl IntRegs:$src1, u2ImmPred:$src2), - (HexagonCONST32 tglobaladdr:$src3)))]>, - Requires<[HasV4T]>; - -let AddedComplexity = 10 in -multiclass ST_indirect_lo<string OpcStr, PatFrag OpNode> { - def _lo_V4 : STInst<(outs), - (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$src3, - IntRegs:$src4), - !strconcat(OpcStr, "($src1<<#$src2+##$src3) = $src4"), - [(OpNode (i32 IntRegs:$src4), - (add (shl IntRegs:$src1, u2ImmPred:$src2), - (HexagonCONST32 tglobaladdr:$src3)))]>, - Requires<[HasV4T]>; -} - -defm STrib_ind : ST_indirect_lo<"memb", truncstorei8>; -defm STrih_ind : ST_indirect_lo<"memh", truncstorei16>; -defm STriw_ind : ST_indirect_lo<"memw", store>; - -// Store - absolute addressing mode: These instruction take constant -// value as the extended operand. -multiclass ST_absimm<string OpcStr> { -let isExtended = 1, opExtendable = 0, isPredicable = 1, -validSubTargets = HasV4SubT in - def _abs_V4 : STInst2<(outs), - (ins u0AlwaysExt:$src1, IntRegs:$src2), - !strconcat(OpcStr, "(##$src1) = $src2"), - []>, - Requires<[HasV4T]>; - -let isExtended = 1, opExtendable = 1, isPredicated = 1, -validSubTargets = HasV4SubT in { - def _abs_cPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3), - !strconcat("if ($src1)", !strconcat(OpcStr, "(##$src2) = $src3")), - []>, - Requires<[HasV4T]>; - - def _abs_cNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3), - !strconcat("if (!$src1)", !strconcat(OpcStr, "(##$src2) = $src3")), - []>, - Requires<[HasV4T]>; - - def _abs_cdnPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3), - !strconcat("if ($src1.new)", - !strconcat(OpcStr, "(##$src2) = $src3")), - []>, - Requires<[HasV4T]>; - - def _abs_cdnNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3), - !strconcat("if (!$src1.new)", - !strconcat(OpcStr, "(##$src2) = $src3")), - []>, - Requires<[HasV4T]>; -} - -let isExtended = 1, opExtendable = 0, mayStore = 1, isNVStore = 1, -validSubTargets = HasV4SubT in - def _abs_nv_V4 : NVInst_V4<(outs), - (ins u0AlwaysExt:$src1, IntRegs:$src2), - !strconcat(OpcStr, "(##$src1) = $src2.new"), - []>, - Requires<[HasV4T]>; - -let isExtended = 1, opExtendable = 1, mayStore = 1, isPredicated = 1, -isNVStore = 1, validSubTargets = HasV4SubT in { - def _abs_cPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3), - !strconcat("if ($src1)", - !strconcat(OpcStr, "(##$src2) = $src3.new")), - []>, - Requires<[HasV4T]>; - - def _abs_cNotPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3), - !strconcat("if (!$src1)", - !strconcat(OpcStr, "(##$src2) = $src3.new")), - []>, - Requires<[HasV4T]>; - - def _abs_cdnPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3), - !strconcat("if ($src1.new)", - !strconcat(OpcStr, "(##$src2) = $src3.new")), - []>, - Requires<[HasV4T]>; - - def _abs_cdnNotPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3), - !strconcat("if (!$src1.new)", - !strconcat(OpcStr, "(##$src2) = $src3.new")), - []>, - Requires<[HasV4T]>; -} -} +let AddedComplexity = 40 in +def : Pat <(i32 (sextloadi8 (add IntRegs:$src1, + (NumUsesBelowThresCONST32 tglobaladdr:$offset)))), + (i32 (LDrib_ind_lo_V4 IntRegs:$src1, 0, tglobaladdr:$offset))>, + Requires<[HasV4T]>; -defm STrib_imm : ST_absimm<"memb">; -defm STrih_imm : ST_absimm<"memh">; -defm STriw_imm : ST_absimm<"memw">; +let AddedComplexity = 40 in +def : Pat <(i32 (zextloadi8 (add IntRegs:$src1, + (NumUsesBelowThresCONST32 tglobaladdr:$offset)))), + (i32 (LDriub_ind_lo_V4 IntRegs:$src1, 0, tglobaladdr:$offset))>, + Requires<[HasV4T]>; let Predicates = [HasV4T], AddedComplexity = 30 in { def : Pat<(truncstorei8 (i32 IntRegs:$src1), u0AlwaysExtPred:$src2), - (STrib_imm_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>; + (STrib_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>; def : Pat<(truncstorei16 (i32 IntRegs:$src1), u0AlwaysExtPred:$src2), - (STrih_imm_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>; + (STrih_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>; def : Pat<(store (i32 IntRegs:$src1), u0AlwaysExtPred:$src2), - (STriw_imm_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>; -} - -// Load - absolute addressing mode: These instruction take constant -// value as the extended operand - -multiclass LD_absimm<string OpcStr> { -let isExtended = 1, opExtendable = 1, isPredicable = 1, -validSubTargets = HasV4SubT in - def _abs_V4 : LDInst2<(outs IntRegs:$dst), - (ins u0AlwaysExt:$src), - !strconcat("$dst = ", - !strconcat(OpcStr, "(##$src)")), - []>, - Requires<[HasV4T]>; - -let isExtended = 1, opExtendable = 2, isPredicated = 1, -validSubTargets = HasV4SubT in { - def _abs_cPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, u0AlwaysExt:$src2), - !strconcat("if ($src1) $dst = ", - !strconcat(OpcStr, "(##$src2)")), - []>, - Requires<[HasV4T]>; - - def _abs_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, u0AlwaysExt:$src2), - !strconcat("if (!$src1) $dst = ", - !strconcat(OpcStr, "(##$src2)")), - []>, - Requires<[HasV4T]>; - - def _abs_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, u0AlwaysExt:$src2), - !strconcat("if ($src1.new) $dst = ", - !strconcat(OpcStr, "(##$src2)")), - []>, - Requires<[HasV4T]>; - - def _abs_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, u0AlwaysExt:$src2), - !strconcat("if (!$src1.new) $dst = ", - !strconcat(OpcStr, "(##$src2)")), - []>, - Requires<[HasV4T]>; + (STriw_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>; } -} - -defm LDrib_imm : LD_absimm<"memb">; -defm LDriub_imm : LD_absimm<"memub">; -defm LDrih_imm : LD_absimm<"memh">; -defm LDriuh_imm : LD_absimm<"memuh">; -defm LDriw_imm : LD_absimm<"memw">; let Predicates = [HasV4T], AddedComplexity = 30 in { def : Pat<(i32 (load u0AlwaysExtPred:$src)), - (LDriw_imm_abs_V4 u0AlwaysExtPred:$src)>; + (LDriw_abs_V4 u0AlwaysExtPred:$src)>; def : Pat<(i32 (sextloadi8 u0AlwaysExtPred:$src)), - (LDrib_imm_abs_V4 u0AlwaysExtPred:$src)>; + (LDrib_abs_V4 u0AlwaysExtPred:$src)>; def : Pat<(i32 (zextloadi8 u0AlwaysExtPred:$src)), - (LDriub_imm_abs_V4 u0AlwaysExtPred:$src)>; + (LDriub_abs_V4 u0AlwaysExtPred:$src)>; def : Pat<(i32 (sextloadi16 u0AlwaysExtPred:$src)), - (LDrih_imm_abs_V4 u0AlwaysExtPred:$src)>; + (LDrih_abs_V4 u0AlwaysExtPred:$src)>; def : Pat<(i32 (zextloadi16 u0AlwaysExtPred:$src)), - (LDriuh_imm_abs_V4 u0AlwaysExtPred:$src)>; + (LDriuh_abs_V4 u0AlwaysExtPred:$src)>; } -// Indexed store double word - global address. +// Indexed store word - global address. // memw(Rs+#u6:2)=#S8 let AddedComplexity = 10 in def STriw_offset_ext_V4 : STInst<(outs), diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h index 0318c519..bd7b26a 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h @@ -29,15 +29,18 @@ class HexagonMachineFunctionInfo : public MachineFunctionInfo { std::vector<MachineInstr*> AllocaAdjustInsts; int VarArgsFrameIndex; bool HasClobberLR; + bool HasEHReturn; std::map<const MachineInstr*, unsigned> PacketInfo; public: - HexagonMachineFunctionInfo() : SRetReturnReg(0), HasClobberLR(0) {} + HexagonMachineFunctionInfo() : SRetReturnReg(0), HasClobberLR(0), + HasEHReturn(false) {} HexagonMachineFunctionInfo(MachineFunction &MF) : SRetReturnReg(0), - HasClobberLR(0) {} + HasClobberLR(0), + HasEHReturn(false) {} unsigned getSRetReturnReg() const { return SRetReturnReg; } void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; } @@ -69,6 +72,8 @@ public: void setHasClobberLR(bool v) { HasClobberLR = v; } bool hasClobberLR() const { return HasClobberLR; } + bool hasEHReturn() const { return HasEHReturn; }; + void setHasEHReturn(bool H = true) { HasEHReturn = H; }; }; } // End llvm namespace diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp index 5e80e48..05e6968 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp @@ -22,29 +22,31 @@ // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "hexagon-nvj" -#include "Hexagon.h" -#include "HexagonInstrInfo.h" -#include "HexagonMachineFunctionInfo.h" -#include "HexagonRegisterInfo.h" -#include "HexagonSubtarget.h" -#include "HexagonTargetMachine.h" +#include "llvm/PassSupport.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/LiveVariables.h" -#include "llvm/CodeGen/MachineFunctionAnalysis.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" -#include "llvm/PassSupport.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/Debug.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "Hexagon.h" +#include "HexagonTargetMachine.h" +#include "HexagonRegisterInfo.h" +#include "HexagonSubtarget.h" +#include "HexagonInstrInfo.h" +#include "HexagonMachineFunctionInfo.h" + #include <map> + +#include "llvm/Support/CommandLine.h" using namespace llvm; STATISTIC(NumNVJGenerated, "Number of New Value Jump Instructions created"); @@ -57,6 +59,11 @@ static cl::opt<bool> DisableNewValueJumps("disable-nvjump", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Disable New Value Jumps")); +namespace llvm { + void initializeHexagonNewValueJumpPass(PassRegistry&); +} + + namespace { struct HexagonNewValueJump : public MachineFunctionPass { const HexagonInstrInfo *QII; @@ -65,9 +72,12 @@ namespace { public: static char ID; - HexagonNewValueJump() : MachineFunctionPass(ID) { } + HexagonNewValueJump() : MachineFunctionPass(ID) { + initializeHexagonNewValueJumpPass(*PassRegistry::getPassRegistry()); + } virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineBranchProbabilityInfo>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -78,6 +88,8 @@ namespace { virtual bool runOnMachineFunction(MachineFunction &Fn); private: + /// \brief A handle to the branch probability pass. + const MachineBranchProbabilityInfo *MBPI; }; @@ -85,6 +97,13 @@ namespace { char HexagonNewValueJump::ID = 0; +INITIALIZE_PASS_BEGIN(HexagonNewValueJump, "hexagon-nvj", + "Hexagon NewValueJump", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_END(HexagonNewValueJump, "hexagon-nvj", + "Hexagon NewValueJump", false, false) + + // We have identified this II could be feeder to NVJ, // verify that it can be. static bool canBeFeederToNewValueJump(const HexagonInstrInfo *QII, @@ -208,19 +227,15 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII, // range specified by the arch. if (!secondReg) { int64_t v = MI->getOperand(2).getImm(); - if (MI->getOpcode() == Hexagon::CMPGEri || - (MI->getOpcode() == Hexagon::CMPGEUri && v > 0)) - --v; if (!(isUInt<5>(v) || ((MI->getOpcode() == Hexagon::CMPEQri || - MI->getOpcode() == Hexagon::CMPGTri || - MI->getOpcode() == Hexagon::CMPGEri) && + MI->getOpcode() == Hexagon::CMPGTri) && (v == -1)))) return false; } - unsigned cmpReg1, cmpOp2; + unsigned cmpReg1, cmpOp2 = 0; // cmpOp2 assignment silences compiler warning. cmpReg1 = MI->getOperand(1).getReg(); if (secondReg) { @@ -271,58 +286,63 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII, // Given a compare operator, return a matching New Value Jump // compare operator. Make sure that MI here is included in // HexagonInstrInfo.cpp::isNewValueJumpCandidate -static unsigned getNewValueJumpOpcode(const MachineInstr *MI, int reg, - bool secondRegNewified) { +static unsigned getNewValueJumpOpcode(MachineInstr *MI, int reg, + bool secondRegNewified, + MachineBasicBlock *jmpTarget, + const MachineBranchProbabilityInfo + *MBPI) { + bool taken = false; + MachineBasicBlock *Src = MI->getParent(); + const BranchProbability Prediction = + MBPI->getEdgeProbability(Src, jmpTarget); + + if (Prediction >= BranchProbability(1,2)) + taken = true; + switch (MI->getOpcode()) { case Hexagon::CMPEQrr: - return Hexagon::JMP_EQrrPt_nv_V4; + return taken ? Hexagon::CMPEQrr_t_Jumpnv_t_V4 + : Hexagon::CMPEQrr_t_Jumpnv_nt_V4; case Hexagon::CMPEQri: { if (reg >= 0) - return Hexagon::JMP_EQriPt_nv_V4; + return taken ? Hexagon::CMPEQri_t_Jumpnv_t_V4 + : Hexagon::CMPEQri_t_Jumpnv_nt_V4; else - return Hexagon::JMP_EQriPtneg_nv_V4; + return taken ? Hexagon::CMPEQn1_t_Jumpnv_t_V4 + : Hexagon::CMPEQn1_t_Jumpnv_nt_V4; } - case Hexagon::CMPLTrr: case Hexagon::CMPGTrr: { if (secondRegNewified) - return Hexagon::JMP_GTrrdnPt_nv_V4; + return taken ? Hexagon::CMPLTrr_t_Jumpnv_t_V4 + : Hexagon::CMPLTrr_t_Jumpnv_nt_V4; else - return Hexagon::JMP_GTrrPt_nv_V4; - } - - case Hexagon::CMPGEri: { - if (reg >= 1) - return Hexagon::JMP_GTriPt_nv_V4; - else - return Hexagon::JMP_GTriPtneg_nv_V4; + return taken ? Hexagon::CMPGTrr_t_Jumpnv_t_V4 + : Hexagon::CMPGTrr_t_Jumpnv_nt_V4; } case Hexagon::CMPGTri: { if (reg >= 0) - return Hexagon::JMP_GTriPt_nv_V4; + return taken ? Hexagon::CMPGTri_t_Jumpnv_t_V4 + : Hexagon::CMPGTri_t_Jumpnv_nt_V4; else - return Hexagon::JMP_GTriPtneg_nv_V4; + return taken ? Hexagon::CMPGTn1_t_Jumpnv_t_V4 + : Hexagon::CMPGTn1_t_Jumpnv_nt_V4; } - case Hexagon::CMPLTUrr: case Hexagon::CMPGTUrr: { if (secondRegNewified) - return Hexagon::JMP_GTUrrdnPt_nv_V4; + return taken ? Hexagon::CMPLTUrr_t_Jumpnv_t_V4 + : Hexagon::CMPLTUrr_t_Jumpnv_nt_V4; else - return Hexagon::JMP_GTUrrPt_nv_V4; + return taken ? Hexagon::CMPGTUrr_t_Jumpnv_t_V4 + : Hexagon::CMPGTUrr_t_Jumpnv_nt_V4; } case Hexagon::CMPGTUri: - return Hexagon::JMP_GTUriPt_nv_V4; - - case Hexagon::CMPGEUri: { - if (reg == 0) - return Hexagon::JMP_EQrrPt_nv_V4; - else - return Hexagon::JMP_GTUriPt_nv_V4; - } + return taken ? Hexagon::CMPGTUri_t_Jumpnv_t_V4 + : Hexagon::CMPGTUri_t_Jumpnv_nt_V4; default: llvm_unreachable("Could not find matching New Value Jump instruction."); @@ -346,6 +366,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { QII = static_cast<const HexagonInstrInfo *>(MF.getTarget().getInstrInfo()); QRI = static_cast<const HexagonRegisterInfo *>(MF.getTarget().getRegisterInfo()); + MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); if (!QRI->Subtarget.hasV4TOps() || DisableNewValueJumps) { @@ -393,12 +414,12 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "Instr: "; MI->dump(); dbgs() << "\n"); if (!foundJump && - (MI->getOpcode() == Hexagon::JMP_c || - MI->getOpcode() == Hexagon::JMP_cNot || - MI->getOpcode() == Hexagon::JMP_cdnPt || - MI->getOpcode() == Hexagon::JMP_cdnPnt || - MI->getOpcode() == Hexagon::JMP_cdnNotPt || - MI->getOpcode() == Hexagon::JMP_cdnNotPnt)) { + (MI->getOpcode() == Hexagon::JMP_t || + MI->getOpcode() == Hexagon::JMP_f || + MI->getOpcode() == Hexagon::JMP_tnew_t || + MI->getOpcode() == Hexagon::JMP_tnew_nt || + MI->getOpcode() == Hexagon::JMP_fnew_t || + MI->getOpcode() == Hexagon::JMP_fnew_nt)) { // This is where you would insert your compare and // instr that feeds compare jmpPos = MII; @@ -434,9 +455,9 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { jmpTarget = MI->getOperand(1).getMBB(); foundJump = true; - if (MI->getOpcode() == Hexagon::JMP_cNot || - MI->getOpcode() == Hexagon::JMP_cdnNotPt || - MI->getOpcode() == Hexagon::JMP_cdnNotPnt) { + if (MI->getOpcode() == Hexagon::JMP_f || + MI->getOpcode() == Hexagon::JMP_fnew_t || + MI->getOpcode() == Hexagon::JMP_fnew_nt) { invertPredicate = true; } continue; @@ -525,10 +546,8 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { if (isSecondOpReg) { // In case of CMPLT, or CMPLTU, or EQ with the second register // to newify, swap the operands. - if (cmpInstr->getOpcode() == Hexagon::CMPLTrr || - cmpInstr->getOpcode() == Hexagon::CMPLTUrr || - (cmpInstr->getOpcode() == Hexagon::CMPEQrr && - feederReg == (unsigned) cmpOp2)) { + if (cmpInstr->getOpcode() == Hexagon::CMPEQrr && + feederReg == (unsigned) cmpOp2) { unsigned tmp = cmpReg1; bool tmpIsKill = MO1IsKill; cmpReg1 = cmpOp2; @@ -582,42 +601,34 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { assert((QII->isNewValueJumpCandidate(cmpInstr)) && "This compare is not a New Value Jump candidate."); unsigned opc = getNewValueJumpOpcode(cmpInstr, cmpOp2, - isSecondOpNewified); + isSecondOpNewified, + jmpTarget, MBPI); if (invertPredicate) opc = QII->getInvertedPredicatedOpcode(opc); - // Manage the conversions from CMPGEUri to either CMPEQrr - // or CMPGTUri properly. See Arch spec for CMPGEUri instructions. - // This has to be after the getNewValueJumpOpcode function call as - // second operand of the compare could be modified in this logic. - if (cmpInstr->getOpcode() == Hexagon::CMPGEUri) { - if (cmpOp2 == 0) { - cmpOp2 = cmpReg1; - MO2IsKill = MO1IsKill; - isSecondOpReg = true; - } else - --cmpOp2; - } - - // Manage the conversions from CMPGEri to CMPGTUri properly. - // See Arch spec for CMPGEri instructions. - if (cmpInstr->getOpcode() == Hexagon::CMPGEri) - --cmpOp2; - - if (isSecondOpReg) { + if (isSecondOpReg) NewMI = BuildMI(*MBB, jmpPos, dl, QII->get(opc)) .addReg(cmpReg1, getKillRegState(MO1IsKill)) .addReg(cmpOp2, getKillRegState(MO2IsKill)) .addMBB(jmpTarget); - } - else { + + else if ((cmpInstr->getOpcode() == Hexagon::CMPEQri || + cmpInstr->getOpcode() == Hexagon::CMPGTri) && + cmpOp2 == -1 ) + // Corresponding new-value compare jump instructions don't have the + // operand for -1 immediate value. + NewMI = BuildMI(*MBB, jmpPos, dl, + QII->get(opc)) + .addReg(cmpReg1, getKillRegState(MO1IsKill)) + .addMBB(jmpTarget); + + else NewMI = BuildMI(*MBB, jmpPos, dl, QII->get(opc)) .addReg(cmpReg1, getKillRegState(MO1IsKill)) .addImm(cmpOp2) .addMBB(jmpTarget); - } assert(NewMI && "New Value Jump Instruction Not created!"); if (cmpInstr->getOperand(0).isReg() && diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp index 576f1d7..89e3406 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp @@ -61,10 +61,6 @@ static cl::opt<bool> DisableHexagonPeephole("disable-hexagon-peephole", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Disable Peephole Optimization")); -static cl::opt<int> -DbgPNPCount("pnp-count", cl::init(-1), cl::Hidden, - cl::desc("Maximum number of P=NOT(P) to be optimized")); - static cl::opt<bool> DisablePNotP("disable-hexagon-pnotp", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Disable Optimization of PNotP")); @@ -73,6 +69,14 @@ static cl::opt<bool> DisableOptSZExt("disable-hexagon-optszext", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Disable Optimization of Sign/Zero Extends")); +static cl::opt<bool> DisableOptExtTo64("disable-hexagon-opt-ext-to-64", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Disable Optimization of extensions to i64.")); + +namespace llvm { + void initializeHexagonPeepholePass(PassRegistry&); +} + namespace { struct HexagonPeephole : public MachineFunctionPass { const HexagonInstrInfo *QII; @@ -81,7 +85,9 @@ namespace { public: static char ID; - HexagonPeephole() : MachineFunctionPass(ID) { } + HexagonPeephole() : MachineFunctionPass(ID) { + initializeHexagonPeepholePass(*PassRegistry::getPassRegistry()); + } bool runOnMachineFunction(MachineFunction &MF); @@ -100,8 +106,10 @@ namespace { char HexagonPeephole::ID = 0; -bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { +INITIALIZE_PASS(HexagonPeephole, "hexagon-peephole", "Hexagon Peephole", + false, false) +bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { QII = static_cast<const HexagonInstrInfo *>(MF.getTarget(). getInstrInfo()); QRI = static_cast<const HexagonRegisterInfo *>(MF.getTarget(). @@ -142,6 +150,21 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { } } + // Look for %vreg170<def> = COMBINE_ir_V4 (0, %vreg169) + // %vreg170:DoublRegs, %vreg169:IntRegs + if (!DisableOptExtTo64 && + MI->getOpcode () == Hexagon::COMBINE_Ir_V4) { + assert (MI->getNumOperands() == 3); + MachineOperand &Dst = MI->getOperand(0); + MachineOperand &Src1 = MI->getOperand(1); + MachineOperand &Src2 = MI->getOperand(2); + if (Src1.getImm() != 0) + continue; + unsigned DstReg = Dst.getReg(); + unsigned SrcReg = Src2.getReg(); + PeepholeMap[DstReg] = SrcReg; + } + // Look for this sequence below // %vregDoubleReg1 = LSRd_ri %vregDoubleReg0, 32 // %vregIntReg = COPY %vregDoubleReg1:subreg_loreg. diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp index 34bf4ea..44234e8 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp @@ -21,11 +21,18 @@ #include "llvm/Transforms/Scalar.h" using namespace llvm; + +namespace llvm { + void initializeHexagonRemoveExtendArgsPass(PassRegistry&); +} + namespace { struct HexagonRemoveExtendArgs : public FunctionPass { public: static char ID; - HexagonRemoveExtendArgs() : FunctionPass(ID) {} + HexagonRemoveExtendArgs() : FunctionPass(ID) { + initializeHexagonRemoveExtendArgsPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnFunction(Function &F); const char *getPassName() const { @@ -41,11 +48,9 @@ namespace { } char HexagonRemoveExtendArgs::ID = 0; -RegisterPass<HexagonRemoveExtendArgs> X("reargs", - "Remove Sign and Zero Extends for Args" - ); - +INITIALIZE_PASS(HexagonRemoveExtendArgs, "reargs", + "Remove Sign and Zero Extends for Args", false, false) bool HexagonRemoveExtendArgs::runOnFunction(Function &F) { unsigned Idx = 1; @@ -78,6 +83,7 @@ bool HexagonRemoveExtendArgs::runOnFunction(Function &F) { -FunctionPass *llvm::createHexagonRemoveExtendOps(HexagonTargetMachine &TM) { +FunctionPass* +llvm::createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM) { return new HexagonRemoveExtendArgs(); } diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp index 814249f..8608e08 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp @@ -49,16 +49,23 @@ using namespace llvm; +namespace llvm { + void initializeHexagonSplitTFRCondSetsPass(PassRegistry&); +} + + namespace { class HexagonSplitTFRCondSets : public MachineFunctionPass { - HexagonTargetMachine& QTM; + const HexagonTargetMachine &QTM; const HexagonSubtarget &QST; public: static char ID; - HexagonSplitTFRCondSets(HexagonTargetMachine& TM) : - MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {} + HexagonSplitTFRCondSets(const HexagonTargetMachine& TM) : + MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) { + initializeHexagonSplitTFRCondSetsPass(*PassRegistry::getPassRegistry()); + } const char *getPassName() const { return "Hexagon Split TFRCondSets"; @@ -211,6 +218,18 @@ bool HexagonSplitTFRCondSets::runOnMachineFunction(MachineFunction &Fn) { // Public Constructor Functions //===----------------------------------------------------------------------===// -FunctionPass *llvm::createHexagonSplitTFRCondSets(HexagonTargetMachine &TM) { +static void initializePassOnce(PassRegistry &Registry) { + const char *Name = "Hexagon Split TFRCondSets"; + PassInfo *PI = new PassInfo(Name, "hexagon-split-tfr", + &HexagonSplitTFRCondSets::ID, 0, false, false); + Registry.registerPass(*PI, true); +} + +void llvm::initializeHexagonSplitTFRCondSetsPass(PassRegistry &Registry) { + CALL_ONCE_INITIALIZATION(initializePassOnce) +} + +FunctionPass* +llvm::createHexagonSplitTFRCondSets(const HexagonTargetMachine &TM) { return new HexagonSplitTFRCondSets(TM); } diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp index ce45c62..caa1ba4 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -25,19 +25,17 @@ using namespace llvm; -static cl:: -opt<bool> DisableHardwareLoops( - "disable-hexagon-hwloops", cl::Hidden, - cl::desc("Disable Hardware Loops for Hexagon target")); +static cl:: opt<bool> DisableHardwareLoops("disable-hexagon-hwloops", + cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target")); -static cl:: -opt<bool> DisableHexagonMISched("disable-hexagon-misched", - cl::Hidden, cl::ZeroOrMore, cl::init(false), - cl::desc("Disable Hexagon MI Scheduling")); +static cl::opt<bool> DisableHexagonMISched("disable-hexagon-misched", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Disable Hexagon MI Scheduling")); static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt", - cl::Hidden, cl::ZeroOrMore, cl::init(false), - cl::desc("Disable Hexagon CFG Optimization")); + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Disable Hexagon CFG Optimization")); + /// HexagonTargetMachineModule - Note that this is used on hosts that /// cannot link in a library unless there are references into the @@ -126,55 +124,62 @@ TargetPassConfig *HexagonTargetMachine::createPassConfig(PassManagerBase &PM) { } bool HexagonPassConfig::addInstSelector() { + const HexagonTargetMachine &TM = getHexagonTargetMachine(); + bool NoOpt = (getOptLevel() == CodeGenOpt::None); - if (getOptLevel() != CodeGenOpt::None) - addPass(createHexagonRemoveExtendOps(getHexagonTargetMachine())); + if (!NoOpt) + addPass(createHexagonRemoveExtendArgs(TM)); - addPass(createHexagonISelDag(getHexagonTargetMachine(), getOptLevel())); + addPass(createHexagonISelDag(TM, getOptLevel())); - if (getOptLevel() != CodeGenOpt::None) + if (!NoOpt) { addPass(createHexagonPeephole()); + printAndVerify("After hexagon peephole pass"); + } return false; } - bool HexagonPassConfig::addPreRegAlloc() { - if (!DisableHardwareLoops && getOptLevel() != CodeGenOpt::None) - addPass(createHexagonHardwareLoops()); + if (getOptLevel() != CodeGenOpt::None) + if (!DisableHardwareLoops) + addPass(createHexagonHardwareLoops()); return false; } bool HexagonPassConfig::addPostRegAlloc() { - if (!DisableHexagonCFGOpt && getOptLevel() != CodeGenOpt::None) - addPass(createHexagonCFGOptimizer(getHexagonTargetMachine())); - return true; + const HexagonTargetMachine &TM = getHexagonTargetMachine(); + if (getOptLevel() != CodeGenOpt::None) + if (!DisableHexagonCFGOpt) + addPass(createHexagonCFGOptimizer(TM)); + return false; } - bool HexagonPassConfig::addPreSched2() { if (getOptLevel() != CodeGenOpt::None) addPass(&IfConverterID); - return true; + return false; } bool HexagonPassConfig::addPreEmitPass() { + const HexagonTargetMachine &TM = getHexagonTargetMachine(); + bool NoOpt = (getOptLevel() == CodeGenOpt::None); - if (!DisableHardwareLoops && getOptLevel() != CodeGenOpt::None) - addPass(createHexagonFixupHwLoops()); - - if (getOptLevel() != CodeGenOpt::None) + if (!NoOpt) addPass(createHexagonNewValueJump()); // Expand Spill code for predicate registers. - addPass(createHexagonExpandPredSpillCode(getHexagonTargetMachine())); + addPass(createHexagonExpandPredSpillCode(TM)); // Split up TFRcondsets into conditional transfers. - addPass(createHexagonSplitTFRCondSets(getHexagonTargetMachine())); + addPass(createHexagonSplitTFRCondSets(TM)); // Create Packets. - if (getOptLevel() != CodeGenOpt::None) + if (!NoOpt) { + if (!DisableHardwareLoops) + addPass(createHexagonFixupHwLoops()); addPass(createHexagonPacketizer()); + } return false; } diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index c0d86da..39995e1 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -48,19 +48,32 @@ #include "HexagonMachineFunctionInfo.h" #include <map> +#include <vector> using namespace llvm; +static cl::opt<bool> PacketizeVolatiles("hexagon-packetize-volatiles", + cl::ZeroOrMore, cl::Hidden, cl::init(true), + cl::desc("Allow non-solo packetization of volatile memory references")); + +namespace llvm { + void initializeHexagonPacketizerPass(PassRegistry&); +} + + namespace { class HexagonPacketizer : public MachineFunctionPass { public: static char ID; - HexagonPacketizer() : MachineFunctionPass(ID) {} + HexagonPacketizer() : MachineFunctionPass(ID) { + initializeHexagonPacketizerPass(*PassRegistry::getPassRegistry()); + } void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired<MachineDominatorTree>(); + AU.addRequired<MachineBranchProbabilityInfo>(); AU.addPreserved<MachineDominatorTree>(); AU.addRequired<MachineLoopInfo>(); AU.addPreserved<MachineLoopInfo>(); @@ -96,10 +109,17 @@ namespace { // schedule this instruction. bool FoundSequentialDependence; + /// \brief A handle to the branch probability pass. + const MachineBranchProbabilityInfo *MBPI; + + // Track MIs with ignored dependece. + std::vector<MachineInstr*> IgnoreDepMIs; + public: // Ctor. HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI, - MachineDominatorTree &MDT); + MachineDominatorTree &MDT, + const MachineBranchProbabilityInfo *MBPI); // initPacketizerState - initialize some internal flags. void initPacketizerState(); @@ -123,20 +143,20 @@ namespace { private: bool IsCallDependent(MachineInstr* MI, SDep::Kind DepType, unsigned DepReg); bool PromoteToDotNew(MachineInstr* MI, SDep::Kind DepType, - MachineBasicBlock::iterator &MII, - const TargetRegisterClass* RC); + MachineBasicBlock::iterator &MII, + const TargetRegisterClass* RC); bool CanPromoteToDotNew(MachineInstr* MI, SUnit* PacketSU, - unsigned DepReg, - std::map <MachineInstr*, SUnit*> MIToSUnit, - MachineBasicBlock::iterator &MII, - const TargetRegisterClass* RC); + unsigned DepReg, + std::map <MachineInstr*, SUnit*> MIToSUnit, + MachineBasicBlock::iterator &MII, + const TargetRegisterClass* RC); bool CanPromoteToNewValue(MachineInstr* MI, SUnit* PacketSU, - unsigned DepReg, - std::map <MachineInstr*, SUnit*> MIToSUnit, - MachineBasicBlock::iterator &MII); + unsigned DepReg, + std::map <MachineInstr*, SUnit*> MIToSUnit, + MachineBasicBlock::iterator &MII); bool CanPromoteToNewValueStore(MachineInstr* MI, MachineInstr* PacketMI, - unsigned DepReg, - std::map <MachineInstr*, SUnit*> MIToSUnit); + unsigned DepReg, + std::map <MachineInstr*, SUnit*> MIToSUnit); bool DemoteToDotOld(MachineInstr* MI); bool ArePredicatesComplements(MachineInstr* MI1, MachineInstr* MI2, std::map <MachineInstr*, SUnit*> MIToSUnit); @@ -152,19 +172,32 @@ namespace { }; } +INITIALIZE_PASS_BEGIN(HexagonPacketizer, "packets", "Hexagon Packetizer", + false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(HexagonPacketizer, "packets", "Hexagon Packetizer", + false, false) + + // HexagonPacketizerList Ctor. HexagonPacketizerList::HexagonPacketizerList( - MachineFunction &MF, MachineLoopInfo &MLI,MachineDominatorTree &MDT) + MachineFunction &MF, MachineLoopInfo &MLI,MachineDominatorTree &MDT, + const MachineBranchProbabilityInfo *MBPI) : VLIWPacketizerList(MF, MLI, MDT, true){ + this->MBPI = MBPI; } bool HexagonPacketizer::runOnMachineFunction(MachineFunction &Fn) { const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo(); MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); - + const MachineBranchProbabilityInfo *MBPI = + &getAnalysis<MachineBranchProbabilityInfo>(); // Instantiate the packetizer. - HexagonPacketizerList Packetizer(Fn, MLI, MDT); + HexagonPacketizerList Packetizer(Fn, MLI, MDT, MBPI); // DFA state table should not be empty. assert(Packetizer.getResourceTracker() && "Empty DFA table!"); @@ -710,8 +743,10 @@ static int GetDotNewOp(const int opc) { } // Return .new predicate version for an instruction -static int GetDotNewPredOp(const int opc) { - switch (opc) { +static int GetDotNewPredOp(MachineInstr *MI, + const MachineBranchProbabilityInfo *MBPI, + const HexagonInstrInfo *QII) { + switch (MI->getOpcode()) { default: llvm_unreachable("Unknown .new type"); // Conditional stores // Store byte conditionally @@ -857,17 +892,15 @@ static int GetDotNewPredOp(const int opc) { return Hexagon::STw_GP_cdnNotPt_V4; // Condtional Jumps - case Hexagon::JMP_c: - return Hexagon::JMP_cdnPt; + case Hexagon::JMP_t: + case Hexagon::JMP_f: + return QII->getDotNewPredJumpOp(MI, MBPI); - case Hexagon::JMP_cNot: - return Hexagon::JMP_cdnNotPt; + case Hexagon::JMPR_t: + return Hexagon::JMPR_tnew_tV3; - case Hexagon::JMPR_cPt: - return Hexagon::JMPR_cdnPt_V3; - - case Hexagon::JMPR_cNotPt: - return Hexagon::JMPR_cdnNotPt_V3; + case Hexagon::JMPR_f: + return Hexagon::JMPR_fnew_tV3; // Conditional Transfers case Hexagon::TFR_cPt: @@ -1261,7 +1294,7 @@ bool HexagonPacketizerList::PromoteToDotNew(MachineInstr* MI, int NewOpcode; if (RC == &Hexagon::PredRegsRegClass) - NewOpcode = GetDotNewPredOp(MI->getOpcode()); + NewOpcode = GetDotNewPredOp(MI, MBPI, QII); else NewOpcode = GetDotNewOp(MI->getOpcode()); MI->setDesc(QII->get(NewOpcode)); @@ -1306,17 +1339,17 @@ static int GetDotOldOp(const int opc) { case Hexagon::TFRI_cdnNotPt: return Hexagon::TFRI_cNotPt; - case Hexagon::JMP_cdnPt: - return Hexagon::JMP_c; + case Hexagon::JMP_tnew_t: + return Hexagon::JMP_t; - case Hexagon::JMP_cdnNotPt: - return Hexagon::JMP_cNot; + case Hexagon::JMP_fnew_t: + return Hexagon::JMP_f; - case Hexagon::JMPR_cdnPt_V3: - return Hexagon::JMPR_cPt; + case Hexagon::JMPR_tnew_tV3: + return Hexagon::JMPR_t; - case Hexagon::JMPR_cdnNotPt_V3: - return Hexagon::JMPR_cNotPt; + case Hexagon::JMPR_fnew_tV3: + return Hexagon::JMPR_f; // Load double word @@ -1912,7 +1945,7 @@ static bool GetPredicateSense(MachineInstr* MI, case Hexagon::STrih_imm_cdnPt_V4 : case Hexagon::STriw_imm_cPt_V4 : case Hexagon::STriw_imm_cdnPt_V4 : - case Hexagon::JMP_cdnPt : + case Hexagon::JMP_tnew_t : case Hexagon::LDrid_cPt : case Hexagon::LDrid_cdnPt : case Hexagon::LDrid_indexed_cPt : @@ -2051,7 +2084,7 @@ static bool GetPredicateSense(MachineInstr* MI, case Hexagon::STrih_imm_cdnNotPt_V4 : case Hexagon::STriw_imm_cNotPt_V4 : case Hexagon::STriw_imm_cdnNotPt_V4 : - case Hexagon::JMP_cdnNotPt : + case Hexagon::JMP_fnew_t : case Hexagon::LDrid_cNotPt : case Hexagon::LDrid_cdnNotPt : case Hexagon::LDrid_indexed_cNotPt : @@ -2739,9 +2772,8 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { // If an instruction feeds new value jump, glue it. MachineBasicBlock::iterator NextMII = I; ++NextMII; - MachineInstr *NextMI = NextMII; - - if (QII->isNewValueJump(NextMI)) { + if (NextMII != I->getParent()->end() && QII->isNewValueJump(NextMII)) { + MachineInstr *NextMI = NextMII; bool secondRegMatch = false; bool maintainNewValueJump = false; diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp index 78ad24d..34e33fd 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp @@ -237,7 +237,7 @@ SDNode* MBlazeDAGToDAGISel::Select(SDNode *Node) { // Use load to get GOT target SDValue Ops[] = { Callee, GPReg, Chain }; SDValue Load = SDValue(CurDAG->getMachineNode(MBlaze::LW, dl, - MVT::i32, MVT::Other, Ops, 3), 0); + MVT::i32, MVT::Other, Ops), 0); Chain = Load.getValue(1); // Call target must be on T9 diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td index f86bc0b..d27cd39 100644 --- a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td +++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td @@ -724,8 +724,7 @@ let usesCustomInserter=1 in { [(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$val))]>; def MEMBARRIER : MBlazePseudo<(outs), (ins), - "# memory barrier", - [(membarrier (i32 imm), (i32 imm), (i32 imm), (i32 imm), (i32 imm))]>; + "# memory barrier", []>; } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/Mangler.cpp b/contrib/llvm/lib/Target/Mangler.cpp index edfd421..d31efa8 100644 --- a/contrib/llvm/lib/Target/Mangler.cpp +++ b/contrib/llvm/lib/Target/Mangler.cpp @@ -188,7 +188,12 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName, // If this global has a name, handle it simply. if (GV->hasName()) { - getNameWithPrefix(OutName, GV->getName(), PrefixTy); + StringRef Name = GV->getName(); + getNameWithPrefix(OutName, Name, PrefixTy); + // No need to do anything else if the global has the special "do not mangle" + // flag in the name. + if (Name[0] == 1) + return; } else { // Get the ID for the global, assigning a new one if we haven't got one // already. diff --git a/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index c403f21..0795cb9 100644 --- a/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -63,7 +63,6 @@ class MipsAsmParser : public MCTargetAsmParser { MCAsmParser &Parser; MipsAssemblerOptions Options; - #define GET_ASSEMBLER_HEADER #include "MipsGenAsmMatcher.inc" @@ -127,9 +126,12 @@ class MipsAsmParser : public MCTargetAsmParser { bool isLoad,bool isImmOpnd); bool reportParseError(StringRef ErrorMsg); - bool parseMemOffset(const MCExpr *&Res); + bool parseMemOffset(const MCExpr *&Res, bool isParenExpr); bool parseRelocOperand(const MCExpr *&Res); + const MCExpr* evaluateRelocExpr(const MCExpr *Expr, StringRef RelocStr); + + bool isEvaluated(const MCExpr *Expr); bool parseDirectiveSet(); bool parseSetAtDirective(); @@ -171,7 +173,7 @@ class MipsAsmParser : public MCTargetAsmParser { bool requestsDoubleOperand(StringRef Mnemonic); - unsigned getReg(int RC,int RegNo); + unsigned getReg(int RC, int RegNo); int getATReg(); @@ -269,7 +271,7 @@ public: void addImmOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); const MCExpr *Expr = getImm(); - addExpr(Inst,Expr); + addExpr(Inst, Expr); } void addMemOperands(MCInst &Inst, unsigned N) const { @@ -278,7 +280,7 @@ public: Inst.addOperand(MCOperand::CreateReg(getMemBase())); const MCExpr *Expr = getMemOff(); - addExpr(Inst,Expr); + addExpr(Inst, Expr); } bool isReg() const { return Kind == k_Register; } @@ -391,15 +393,19 @@ public: } /// getStartLoc - Get the location of the first token of this operand. - SMLoc getStartLoc() const { return StartLoc; } + SMLoc getStartLoc() const { + return StartLoc; + } /// getEndLoc - Get the location of the last token of this operand. - SMLoc getEndLoc() const { return EndLoc; } + SMLoc getEndLoc() const { + return EndLoc; + } virtual void print(raw_ostream &OS) const { llvm_unreachable("unimplemented!"); } -}; -} +}; // class MipsOperand +} // namespace namespace llvm { extern const MCInstrDesc MipsInsts[]; @@ -409,39 +415,55 @@ static const MCInstrDesc &getInstDesc(unsigned Opcode) { } bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, - SmallVectorImpl<MCInst> &Instructions) { + SmallVectorImpl<MCInst> &Instructions) { const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode()); Inst.setLoc(IDLoc); + if (MCID.hasDelaySlot() && Options.isReorder()) { + // If this instruction has a delay slot and .set reorder is active, + // emit a NOP after it. + Instructions.push_back(Inst); + MCInst NopInst; + NopInst.setOpcode(Mips::SLL); + NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO)); + NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO)); + NopInst.addOperand(MCOperand::CreateImm(0)); + Instructions.push_back(NopInst); + return false; + } + if (MCID.mayLoad() || MCID.mayStore()) { // Check the offset of memory operand, if it is a symbol - // reference or immediate we may have to expand instructions - for (unsigned i=0;i<MCID.getNumOperands();i++) { + // reference or immediate we may have to expand instructions. + for (unsigned i = 0; i < MCID.getNumOperands(); i++) { const MCOperandInfo &OpInfo = MCID.OpInfo[i]; - if ((OpInfo.OperandType == MCOI::OPERAND_MEMORY) || - (OpInfo.OperandType == MCOI::OPERAND_UNKNOWN)) { + if ((OpInfo.OperandType == MCOI::OPERAND_MEMORY) + || (OpInfo.OperandType == MCOI::OPERAND_UNKNOWN)) { MCOperand &Op = Inst.getOperand(i); if (Op.isImm()) { int MemOffset = Op.getImm(); if (MemOffset < -32768 || MemOffset > 32767) { - // Offset can't exceed 16bit value - expandMemInst(Inst,IDLoc,Instructions,MCID.mayLoad(),true); + // Offset can't exceed 16bit value. + expandMemInst(Inst, IDLoc, Instructions, MCID.mayLoad(), true); return false; } } else if (Op.isExpr()) { const MCExpr *Expr = Op.getExpr(); - if (Expr->getKind() == MCExpr::SymbolRef){ + if (Expr->getKind() == MCExpr::SymbolRef) { const MCSymbolRefExpr *SR = - static_cast<const MCSymbolRefExpr*>(Expr); + static_cast<const MCSymbolRefExpr*>(Expr); if (SR->getKind() == MCSymbolRefExpr::VK_None) { - // Expand symbol - expandMemInst(Inst,IDLoc,Instructions,MCID.mayLoad(),false); + // Expand symbol. + expandMemInst(Inst, IDLoc, Instructions, MCID.mayLoad(), false); return false; } + } else if (!isEvaluated(Expr)) { + expandMemInst(Inst, IDLoc, Instructions, MCID.mayLoad(), false); + return false; } } } - } - } + } // for + } // if load/store if (needsExpansion(Inst)) expandInstruction(Inst, IDLoc, Instructions); @@ -453,30 +475,30 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, bool MipsAsmParser::needsExpansion(MCInst &Inst) { - switch(Inst.getOpcode()) { - case Mips::LoadImm32Reg: - case Mips::LoadAddr32Imm: - case Mips::LoadAddr32Reg: - return true; - default: - return false; + switch (Inst.getOpcode()) { + case Mips::LoadImm32Reg: + case Mips::LoadAddr32Imm: + case Mips::LoadAddr32Reg: + return true; + default: + return false; } } void MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc, - SmallVectorImpl<MCInst> &Instructions){ - switch(Inst.getOpcode()) { - case Mips::LoadImm32Reg: - return expandLoadImm(Inst, IDLoc, Instructions); - case Mips::LoadAddr32Imm: - return expandLoadAddressImm(Inst,IDLoc,Instructions); - case Mips::LoadAddr32Reg: - return expandLoadAddressReg(Inst,IDLoc,Instructions); - } + SmallVectorImpl<MCInst> &Instructions) { + switch (Inst.getOpcode()) { + case Mips::LoadImm32Reg: + return expandLoadImm(Inst, IDLoc, Instructions); + case Mips::LoadAddr32Imm: + return expandLoadAddressImm(Inst, IDLoc, Instructions); + case Mips::LoadAddr32Reg: + return expandLoadAddressReg(Inst, IDLoc, Instructions); + } } void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc, - SmallVectorImpl<MCInst> &Instructions){ + SmallVectorImpl<MCInst> &Instructions) { MCInst tmpInst; const MCOperand &ImmOp = Inst.getOperand(1); assert(ImmOp.isImm() && "expected immediate operand kind"); @@ -485,26 +507,24 @@ void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc, int ImmValue = ImmOp.getImm(); tmpInst.setLoc(IDLoc); - if ( 0 <= ImmValue && ImmValue <= 65535) { - // for 0 <= j <= 65535. + if (0 <= ImmValue && ImmValue <= 65535) { + // For 0 <= j <= 65535. // li d,j => ori d,$zero,j tmpInst.setOpcode(Mips::ORi); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); - tmpInst.addOperand( - MCOperand::CreateReg(Mips::ZERO)); + tmpInst.addOperand(MCOperand::CreateReg(Mips::ZERO)); tmpInst.addOperand(MCOperand::CreateImm(ImmValue)); Instructions.push_back(tmpInst); - } else if ( ImmValue < 0 && ImmValue >= -32768) { - // for -32768 <= j < 0. + } else if (ImmValue < 0 && ImmValue >= -32768) { + // For -32768 <= j < 0. // li d,j => addiu d,$zero,j tmpInst.setOpcode(Mips::ADDiu); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); - tmpInst.addOperand( - MCOperand::CreateReg(Mips::ZERO)); + tmpInst.addOperand(MCOperand::CreateReg(Mips::ZERO)); tmpInst.addOperand(MCOperand::CreateImm(ImmValue)); Instructions.push_back(tmpInst); } else { - // for any other value of j that is representable as a 32-bit integer. + // For any other value of j that is representable as a 32-bit integer. // li d,j => lui d,hi16(j) // ori d,d,lo16(j) tmpInst.setOpcode(Mips::LUi); @@ -522,7 +542,7 @@ void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc, } void MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc, - SmallVectorImpl<MCInst> &Instructions){ + SmallVectorImpl<MCInst> &Instructions) { MCInst tmpInst; const MCOperand &ImmOp = Inst.getOperand(2); assert(ImmOp.isImm() && "expected immediate operand kind"); @@ -531,19 +551,19 @@ void MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc, const MCOperand &DstRegOp = Inst.getOperand(0); assert(DstRegOp.isReg() && "expected register operand kind"); int ImmValue = ImmOp.getImm(); - if ( -32768 <= ImmValue && ImmValue <= 65535) { - //for -32768 <= j <= 65535. - //la d,j(s) => addiu d,s,j + if (-32768 <= ImmValue && ImmValue <= 65535) { + // For -32768 <= j <= 65535. + // la d,j(s) => addiu d,s,j tmpInst.setOpcode(Mips::ADDiu); tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg())); tmpInst.addOperand(MCOperand::CreateReg(SrcRegOp.getReg())); tmpInst.addOperand(MCOperand::CreateImm(ImmValue)); Instructions.push_back(tmpInst); } else { - //for any other value of j that is representable as a 32-bit integer. - //la d,j(s) => lui d,hi16(j) - // ori d,d,lo16(j) - // addu d,d,s + // For any other value of j that is representable as a 32-bit integer. + // la d,j(s) => lui d,hi16(j) + // ori d,d,lo16(j) + // addu d,d,s tmpInst.setOpcode(Mips::LUi); tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg())); tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16)); @@ -564,26 +584,25 @@ void MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc, } void MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc, - SmallVectorImpl<MCInst> &Instructions){ + SmallVectorImpl<MCInst> &Instructions) { MCInst tmpInst; const MCOperand &ImmOp = Inst.getOperand(1); assert(ImmOp.isImm() && "expected immediate operand kind"); const MCOperand &RegOp = Inst.getOperand(0); assert(RegOp.isReg() && "expected register operand kind"); int ImmValue = ImmOp.getImm(); - if ( -32768 <= ImmValue && ImmValue <= 65535) { - //for -32768 <= j <= 65535. - //la d,j => addiu d,$zero,j + if (-32768 <= ImmValue && ImmValue <= 65535) { + // For -32768 <= j <= 65535. + // la d,j => addiu d,$zero,j tmpInst.setOpcode(Mips::ADDiu); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); - tmpInst.addOperand( - MCOperand::CreateReg(Mips::ZERO)); + tmpInst.addOperand(MCOperand::CreateReg(Mips::ZERO)); tmpInst.addOperand(MCOperand::CreateImm(ImmValue)); Instructions.push_back(tmpInst); } else { - //for any other value of j that is representable as a 32-bit integer. - //la d,j => lui d,hi16(j) - // ori d,d,lo16(j) + // For any other value of j that is representable as a 32-bit integer. + // la d,j => lui d,hi16(j) + // ori d,d,lo16(j) tmpInst.setOpcode(Mips::LUi); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16)); @@ -598,40 +617,37 @@ void MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc, } void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, - SmallVectorImpl<MCInst> &Instructions, - bool isLoad,bool isImmOpnd) { + SmallVectorImpl<MCInst> &Instructions, bool isLoad, bool isImmOpnd) { const MCSymbolRefExpr *SR; MCInst TempInst; - unsigned ImmOffset,HiOffset,LoOffset; + unsigned ImmOffset, HiOffset, LoOffset; const MCExpr *ExprOffset; unsigned TmpRegNum; - unsigned AtRegNum = getReg((isMips64()) ? Mips::CPU64RegsRegClassID: - Mips::CPURegsRegClassID, - getATReg()); - // 1st operand is either source or dst register + unsigned AtRegNum = getReg((isMips64()) ? Mips::CPU64RegsRegClassID + : Mips::CPURegsRegClassID, getATReg()); + // 1st operand is either the source or destination register. assert(Inst.getOperand(0).isReg() && "expected register operand kind"); unsigned RegOpNum = Inst.getOperand(0).getReg(); - // 2nd operand is base register + // 2nd operand is the base register. assert(Inst.getOperand(1).isReg() && "expected register operand kind"); unsigned BaseRegNum = Inst.getOperand(1).getReg(); - // 3rd operand is either immediate or expression + // 3rd operand is either an immediate or expression. if (isImmOpnd) { assert(Inst.getOperand(2).isImm() && "expected immediate operand kind"); ImmOffset = Inst.getOperand(2).getImm(); LoOffset = ImmOffset & 0x0000ffff; HiOffset = (ImmOffset & 0xffff0000) >> 16; - // If msb of LoOffset is 1(negative number) we must increment HiOffset + // If msb of LoOffset is 1(negative number) we must increment HiOffset. if (LoOffset & 0x8000) HiOffset++; - } - else + } else ExprOffset = Inst.getOperand(2).getExpr(); - // All instructions will have the same location + // All instructions will have the same location. TempInst.setLoc(IDLoc); // 1st instruction in expansion is LUi. For load instruction we can use // the dst register as a temporary if base and dst are different, - // but for stores we must use $at - TmpRegNum = (isLoad && (BaseRegNum != RegOpNum))?RegOpNum:AtRegNum; + // but for stores we must use $at. + TmpRegNum = (isLoad && (BaseRegNum != RegOpNum)) ? RegOpNum : AtRegNum; TempInst.setOpcode(Mips::LUi); TempInst.addOperand(MCOperand::CreateReg(TmpRegNum)); if (isImmOpnd) @@ -639,26 +655,28 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, else { if (ExprOffset->getKind() == MCExpr::SymbolRef) { SR = static_cast<const MCSymbolRefExpr*>(ExprOffset); - const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr:: - Create(SR->getSymbol().getName(), - MCSymbolRefExpr::VK_Mips_ABS_HI, - getContext()); + const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::Create( + SR->getSymbol().getName(), MCSymbolRefExpr::VK_Mips_ABS_HI, + getContext()); + TempInst.addOperand(MCOperand::CreateExpr(HiExpr)); + } else { + const MCExpr *HiExpr = evaluateRelocExpr(ExprOffset, "hi"); TempInst.addOperand(MCOperand::CreateExpr(HiExpr)); } } - // Add the instruction to the list + // Add the instruction to the list. Instructions.push_back(TempInst); - // and prepare TempInst for next instruction + // Prepare TempInst for next instruction. TempInst.clear(); - // which is add temp register to base + // Add temp register to base. TempInst.setOpcode(Mips::ADDu); TempInst.addOperand(MCOperand::CreateReg(TmpRegNum)); TempInst.addOperand(MCOperand::CreateReg(TmpRegNum)); TempInst.addOperand(MCOperand::CreateReg(BaseRegNum)); Instructions.push_back(TempInst); TempInst.clear(); - // and finaly, create original instruction with low part - // of offset and new base + // And finaly, create original instruction with low part + // of offset and new base. TempInst.setOpcode(Inst.getOpcode()); TempInst.addOperand(MCOperand::CreateReg(RegOpNum)); TempInst.addOperand(MCOperand::CreateReg(TmpRegNum)); @@ -666,10 +684,12 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, TempInst.addOperand(MCOperand::CreateImm(LoOffset)); else { if (ExprOffset->getKind() == MCExpr::SymbolRef) { - const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr:: - Create(SR->getSymbol().getName(), - MCSymbolRefExpr::VK_Mips_ABS_LO, - getContext()); + const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::Create( + SR->getSymbol().getName(), MCSymbolRefExpr::VK_Mips_ABS_LO, + getContext()); + TempInst.addOperand(MCOperand::CreateExpr(LoExpr)); + } else { + const MCExpr *LoExpr = evaluateRelocExpr(ExprOffset, "lo"); TempInst.addOperand(MCOperand::CreateExpr(LoExpr)); } } @@ -688,11 +708,12 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, MatchingInlineAsm); switch (MatchResult) { - default: break; + default: + break; case Match_Success: { - if (processInstruction(Inst,IDLoc,Instructions)) + if (processInstruction(Inst, IDLoc, Instructions)) return true; - for(unsigned i =0; i < Instructions.size(); i++) + for (unsigned i = 0; i < Instructions.size(); i++) Out.EmitInstruction(Instructions[i]); return false; } @@ -705,8 +726,9 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, if (ErrorInfo >= Operands.size()) return Error(IDLoc, "too few operands for instruction"); - ErrorLoc = ((MipsOperand*)Operands[ErrorInfo])->getStartLoc(); - if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; + ErrorLoc = ((MipsOperand*) Operands[ErrorInfo])->getStartLoc(); + if (ErrorLoc == SMLoc()) + ErrorLoc = IDLoc; } return Error(ErrorLoc, "invalid operand for instruction"); @@ -757,10 +779,10 @@ int MipsAsmParser::matchCPURegisterName(StringRef Name) { .Case("t9", 25) .Default(-1); - // Although SGI documentation just cut out t0-t3 for n32/n64, + // Although SGI documentation just cuts out t0-t3 for n32/n64, // GNU pushes the values of t0-t3 to override the o32/o64 values for t4-t7 // We are supporting both cases, so for t0-t3 we'll just push them to t4-t7. - if (isMips64() && 8 <= CC && CC <= 11) + if (isMips64() && 8 <= CC && CC <= 11) CC += 4; if (CC == -1 && isMips64()) @@ -776,19 +798,23 @@ int MipsAsmParser::matchCPURegisterName(StringRef Name) { return CC; } + int MipsAsmParser::matchRegisterName(StringRef Name, bool is64BitReg) { + if (Name.equals("fcc0")) + return Mips::FCC0; + int CC; CC = matchCPURegisterName(Name); if (CC != -1) - return matchRegisterByNumber(CC,is64BitReg?Mips::CPU64RegsRegClassID: - Mips::CPURegsRegClassID); + return matchRegisterByNumber(CC, is64BitReg ? Mips::CPU64RegsRegClassID + : Mips::CPURegsRegClassID); if (Name[0] == 'f') { StringRef NumString = Name.substr(1); unsigned IntVal; - if( NumString.getAsInteger(10, IntVal)) - return -1; // not integer + if (NumString.getAsInteger(10, IntVal)) + return -1; // This is not an integer. if (IntVal > 31) return -1; @@ -797,18 +823,19 @@ int MipsAsmParser::matchRegisterName(StringRef Name, bool is64BitReg) { if (Format == FP_FORMAT_S || Format == FP_FORMAT_W) return getReg(Mips::FGR32RegClassID, IntVal); if (Format == FP_FORMAT_D) { - if(isFP64()) { + if (isFP64()) { return getReg(Mips::FGR64RegClassID, IntVal); } - // only even numbers available as register pairs - if (( IntVal > 31) || (IntVal%2 != 0)) + // Only even numbers available as register pairs. + if ((IntVal > 31) || (IntVal % 2 != 0)) return -1; - return getReg(Mips::AFGR64RegClassID, IntVal/2); + return getReg(Mips::AFGR64RegClassID, IntVal / 2); } } return -1; } + void MipsAsmParser::setDefaultFpFormat() { if (isMips64() || isFP64()) @@ -828,6 +855,7 @@ bool MipsAsmParser::requestsDoubleOperand(StringRef Mnemonic){ return IsDouble; } + void MipsAsmParser::setFpFormat(StringRef Format) { FpFormat = StringSwitch<FpFormatTy>(Format.lower()) @@ -850,7 +878,7 @@ int MipsAsmParser::getATReg() { return Options.getATRegNum(); } -unsigned MipsAsmParser::getReg(int RC,int RegNo) { +unsigned MipsAsmParser::getReg(int RC, int RegNo) { return *(getContext().getRegisterInfo().getRegClass(RC).begin() + RegNo); } @@ -871,14 +899,12 @@ int MipsAsmParser::tryParseRegister(bool is64BitReg) { RegNum = matchRegisterName(lowerCase, is64BitReg); } else if (Tok.is(AsmToken::Integer)) RegNum = matchRegisterByNumber(static_cast<unsigned>(Tok.getIntVal()), - is64BitReg ? Mips::CPU64RegsRegClassID - : Mips::CPURegsRegClassID); + is64BitReg ? Mips::CPU64RegsRegClassID : Mips::CPURegsRegClassID); return RegNum; } -bool MipsAsmParser:: - tryParseRegisterOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, - bool is64BitReg){ +bool MipsAsmParser::tryParseRegisterOperand( + SmallVectorImpl<MCParsedAsmOperand*> &Operands, bool is64BitReg) { SMLoc S = Parser.getTok().getLoc(); int RegNo = -1; @@ -888,7 +914,7 @@ bool MipsAsmParser:: return true; Operands.push_back(MipsOperand::CreateReg(RegNo, S, - Parser.getTok().getLoc())); + Parser.getTok().getLoc())); Parser.Lex(); // Eat register token. return false; } @@ -911,19 +937,19 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands, Error(Parser.getTok().getLoc(), "unexpected token in operand"); return true; case AsmToken::Dollar: { - // parse register + // Parse the register. SMLoc S = Parser.getTok().getLoc(); Parser.Lex(); // Eat dollar token. - // parse register operand + // Parse the register operand. if (!tryParseRegisterOperand(Operands, isMips64())) { if (getLexer().is(AsmToken::LParen)) { - // check if it is indexed addressing operand + // Check if it is indexed addressing operand. Operands.push_back(MipsOperand::CreateToken("(", S)); - Parser.Lex(); // eat parenthesis + Parser.Lex(); // Eat the parenthesis. if (getLexer().isNot(AsmToken::Dollar)) return true; - Parser.Lex(); // eat dollar + Parser.Lex(); // Eat the dollar if (tryParseRegisterOperand(Operands, isMips64())) return true; @@ -936,7 +962,7 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands, } return false; } - // maybe it is a symbol reference + // Maybe it is a symbol reference. StringRef Identifier; if (Parser.parseIdentifier(Identifier)) return true; @@ -945,7 +971,7 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands, MCSymbol *Sym = getContext().GetOrCreateSymbol("$" + Identifier); - // Otherwise create a symbol ref. + // Otherwise create a symbol reference. const MCExpr *Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, getContext()); @@ -954,16 +980,16 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands, } case AsmToken::Identifier: // Look for the existing symbol, we should check if - // we need to assigne the propper RegisterKind - if (searchSymbolAlias(Operands,MipsOperand::Kind_None)) - return false; - //else drop to expression parsing + // we need to assigne the propper RegisterKind. + if (searchSymbolAlias(Operands, MipsOperand::Kind_None)) + return false; + // Else drop to expression parsing. case AsmToken::LParen: case AsmToken::Minus: case AsmToken::Plus: case AsmToken::Integer: case AsmToken::String: { - // quoted label names + // Quoted label names. const MCExpr *IdVal; SMLoc S = Parser.getTok().getLoc(); if (getParser().parseExpression(IdVal)) @@ -973,9 +999,9 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands, return false; } case AsmToken::Percent: { - // it is a symbol reference or constant expression + // It is a symbol reference or constant expression. const MCExpr *IdVal; - SMLoc S = Parser.getTok().getLoc(); // start location of the operand + SMLoc S = Parser.getTok().getLoc(); // Start location of the operand. if (parseRelocOperand(IdVal)) return true; @@ -988,131 +1014,200 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands, return true; } -bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) { +const MCExpr* MipsAsmParser::evaluateRelocExpr(const MCExpr *Expr, + StringRef RelocStr) { + const MCExpr *Res; + // Check the type of the expression. + if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Expr)) { + // It's a constant, evaluate lo or hi value. + if (RelocStr == "lo") { + short Val = MCE->getValue(); + Res = MCConstantExpr::Create(Val, getContext()); + } else if (RelocStr == "hi") { + int Val = MCE->getValue(); + int LoSign = Val & 0x8000; + Val = (Val & 0xffff0000) >> 16; + // Lower part is treated as a signed int, so if it is negative + // we must add 1 to the hi part to compensate. + if (LoSign) + Val++; + Res = MCConstantExpr::Create(Val, getContext()); + } else { + llvm_unreachable("Invalid RelocStr value"); + } + return Res; + } + + if (const MCSymbolRefExpr *MSRE = dyn_cast<MCSymbolRefExpr>(Expr)) { + // It's a symbol, create a symbolic expression from the symbol. + StringRef Symbol = MSRE->getSymbol().getName(); + MCSymbolRefExpr::VariantKind VK = getVariantKind(RelocStr); + Res = MCSymbolRefExpr::Create(Symbol, VK, getContext()); + return Res; + } + + if (const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(Expr)) { + const MCExpr *LExp = evaluateRelocExpr(BE->getLHS(), RelocStr); + const MCExpr *RExp = evaluateRelocExpr(BE->getRHS(), RelocStr); + Res = MCBinaryExpr::Create(BE->getOpcode(), LExp, RExp, getContext()); + return Res; + } - Parser.Lex(); // eat % token - const AsmToken &Tok = Parser.getTok(); // get next token, operation + if (const MCUnaryExpr *UN = dyn_cast<MCUnaryExpr>(Expr)) { + const MCExpr *UnExp = evaluateRelocExpr(UN->getSubExpr(), RelocStr); + Res = MCUnaryExpr::Create(UN->getOpcode(), UnExp, getContext()); + return Res; + } + // Just return the original expression. + return Expr; +} + +bool MipsAsmParser::isEvaluated(const MCExpr *Expr) { + + switch (Expr->getKind()) { + case MCExpr::Constant: + return true; + case MCExpr::SymbolRef: + return (cast<MCSymbolRefExpr>(Expr)->getKind() != MCSymbolRefExpr::VK_None); + case MCExpr::Binary: + if (const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(Expr)) { + if (!isEvaluated(BE->getLHS())) + return false; + return isEvaluated(BE->getRHS()); + } + case MCExpr::Unary: + return isEvaluated(cast<MCUnaryExpr>(Expr)->getSubExpr()); + default: + return false; + } + return false; +} + +bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) { + Parser.Lex(); // Eat the % token. + const AsmToken &Tok = Parser.getTok(); // Get next token, operation. if (Tok.isNot(AsmToken::Identifier)) return true; std::string Str = Tok.getIdentifier().str(); - Parser.Lex(); // eat identifier - // now make expression from the rest of the operand + Parser.Lex(); // Eat the identifier. + // Now make an expression from the rest of the operand. const MCExpr *IdVal; SMLoc EndLoc; if (getLexer().getKind() == AsmToken::LParen) { while (1) { - Parser.Lex(); // eat '(' token + Parser.Lex(); // Eat the '(' token. if (getLexer().getKind() == AsmToken::Percent) { - Parser.Lex(); // eat % token + Parser.Lex(); // Eat the % token. const AsmToken &nextTok = Parser.getTok(); if (nextTok.isNot(AsmToken::Identifier)) return true; Str += "(%"; Str += nextTok.getIdentifier(); - Parser.Lex(); // eat identifier + Parser.Lex(); // Eat the identifier. if (getLexer().getKind() != AsmToken::LParen) return true; } else break; } - if (getParser().parseParenExpression(IdVal,EndLoc)) + if (getParser().parseParenExpression(IdVal, EndLoc)) return true; while (getLexer().getKind() == AsmToken::RParen) - Parser.Lex(); // eat ')' token + Parser.Lex(); // Eat the ')' token. } else - return true; // parenthesis must follow reloc operand - - // Check the type of the expression - if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(IdVal)) { - // It's a constant, evaluate lo or hi value - if (Str == "lo") { - short Val = MCE->getValue(); - Res = MCConstantExpr::Create(Val, getContext()); - } else if (Str == "hi") { - int Val = MCE->getValue(); - int LoSign = Val & 0x8000; - Val = (Val & 0xffff0000) >> 16; - // Lower part is treated as a signed int, so if it is negative - // we must add 1 to the hi part to compensate - if (LoSign) - Val++; - Res = MCConstantExpr::Create(Val, getContext()); - } - return false; - } + return true; // Parenthesis must follow the relocation operand. - if (const MCSymbolRefExpr *MSRE = dyn_cast<MCSymbolRefExpr>(IdVal)) { - // It's a symbol, create symbolic expression from symbol - StringRef Symbol = MSRE->getSymbol().getName(); - MCSymbolRefExpr::VariantKind VK = getVariantKind(Str); - Res = MCSymbolRefExpr::Create(Symbol,VK,getContext()); - return false; - } - return true; + Res = evaluateRelocExpr(IdVal, Str); + return false; } bool MipsAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { - StartLoc = Parser.getTok().getLoc(); RegNo = tryParseRegister(isMips64()); EndLoc = Parser.getTok().getLoc(); - return (RegNo == (unsigned)-1); + return (RegNo == (unsigned) -1); } -bool MipsAsmParser::parseMemOffset(const MCExpr *&Res) { - +bool MipsAsmParser::parseMemOffset(const MCExpr *&Res, bool isParenExpr) { SMLoc S; + bool Result = true; - switch(getLexer().getKind()) { + while (getLexer().getKind() == AsmToken::LParen) + Parser.Lex(); + + switch (getLexer().getKind()) { default: return true; case AsmToken::Identifier: + case AsmToken::LParen: case AsmToken::Integer: case AsmToken::Minus: case AsmToken::Plus: - return (getParser().parseExpression(Res)); + if (isParenExpr) + Result = getParser().parseParenExpression(Res, S); + else + Result = (getParser().parseExpression(Res)); + while (getLexer().getKind() == AsmToken::RParen) + Parser.Lex(); + break; case AsmToken::Percent: - return parseRelocOperand(Res); - case AsmToken::LParen: - return false; // it's probably assuming 0 + Result = parseRelocOperand(Res); } - return true; + return Result; } MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand( - SmallVectorImpl<MCParsedAsmOperand*>&Operands) { + SmallVectorImpl<MCParsedAsmOperand*>&Operands) { const MCExpr *IdVal = 0; SMLoc S; - // first operand is the offset + bool isParenExpr = false; + // First operand is the offset. S = Parser.getTok().getLoc(); - if (parseMemOffset(IdVal)) - return MatchOperand_ParseFail; + if (getLexer().getKind() == AsmToken::LParen) { + Parser.Lex(); + isParenExpr = true; + } - const AsmToken &Tok = Parser.getTok(); // get next token - if (Tok.isNot(AsmToken::LParen)) { - MipsOperand *Mnemonic = static_cast<MipsOperand*>(Operands[0]); - if (Mnemonic->getToken() == "la") { - SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() -1); - Operands.push_back(MipsOperand::CreateImm(IdVal, S, E)); - return MatchOperand_Success; + if (getLexer().getKind() != AsmToken::Dollar) { + if (parseMemOffset(IdVal, isParenExpr)) + return MatchOperand_ParseFail; + + const AsmToken &Tok = Parser.getTok(); // Get the next token. + if (Tok.isNot(AsmToken::LParen)) { + MipsOperand *Mnemonic = static_cast<MipsOperand*>(Operands[0]); + if (Mnemonic->getToken() == "la") { + SMLoc E = SMLoc::getFromPointer( + Parser.getTok().getLoc().getPointer() - 1); + Operands.push_back(MipsOperand::CreateImm(IdVal, S, E)); + return MatchOperand_Success; + } + if (Tok.is(AsmToken::EndOfStatement)) { + SMLoc E = SMLoc::getFromPointer( + Parser.getTok().getLoc().getPointer() - 1); + + // Zero register assumed, add a memory operand with ZERO as its base. + Operands.push_back(MipsOperand::CreateMem(isMips64() ? Mips::ZERO_64 + : Mips::ZERO, + IdVal, S, E)); + return MatchOperand_Success; + } + Error(Parser.getTok().getLoc(), "'(' expected"); + return MatchOperand_ParseFail; } - Error(Parser.getTok().getLoc(), "'(' expected"); - return MatchOperand_ParseFail; - } - Parser.Lex(); // Eat '(' token. + Parser.Lex(); // Eat the '(' token. + } - const AsmToken &Tok1 = Parser.getTok(); // get next token + const AsmToken &Tok1 = Parser.getTok(); // Get next token if (Tok1.is(AsmToken::Dollar)) { - Parser.Lex(); // Eat '$' token. + Parser.Lex(); // Eat the '$' token. if (tryParseRegisterOperand(Operands, isMips64())) { Error(Parser.getTok().getLoc(), "unexpected token in operand"); return MatchOperand_ParseFail; @@ -1123,7 +1218,7 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand( return MatchOperand_ParseFail; } - const AsmToken &Tok2 = Parser.getTok(); // get next token + const AsmToken &Tok2 = Parser.getTok(); // Get next token. if (Tok2.isNot(AsmToken::RParen)) { Error(Parser.getTok().getLoc(), "')' expected"); return MatchOperand_ParseFail; @@ -1131,17 +1226,26 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand( SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); - Parser.Lex(); // Eat ')' token. + Parser.Lex(); // Eat the ')' token. if (IdVal == 0) IdVal = MCConstantExpr::Create(0, getContext()); - // now replace register operand with the mem operand + // Replace the register operand with the memory operand. MipsOperand* op = static_cast<MipsOperand*>(Operands.back()); int RegNo = op->getReg(); - // remove register from operands + // Remove the register from the operands. Operands.pop_back(); - // and add memory operand + // Add the memory operand. + if (const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(IdVal)) { + int64_t Imm; + if (IdVal->EvaluateAsAbsolute(Imm)) + IdVal = MCConstantExpr::Create(Imm, getContext()); + else if (BE->getLHS()->getKind() != MCExpr::SymbolRef) + IdVal = MCBinaryExpr::Create(BE->getOpcode(), BE->getRHS(), BE->getLHS(), + getContext()); + } + Operands.push_back(MipsOperand::CreateMem(RegNo, IdVal, S, E)); delete op; return MatchOperand_Success; @@ -1153,17 +1257,17 @@ MipsAsmParser::parseCPU64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (!isMips64()) return MatchOperand_NoMatch; if (getLexer().getKind() == AsmToken::Identifier) { - if (searchSymbolAlias(Operands,MipsOperand::Kind_CPU64Regs)) + if (searchSymbolAlias(Operands, MipsOperand::Kind_CPU64Regs)) return MatchOperand_Success; return MatchOperand_NoMatch; } - // if the first token is not '$' we have an error + // If the first token is not '$', we have an error. if (Parser.getTok().isNot(AsmToken::Dollar)) return MatchOperand_NoMatch; Parser.Lex(); // Eat $ - if(!tryParseRegisterOperand(Operands, true)) { - // set the proper register kind + if (!tryParseRegisterOperand(Operands, true)) { + // Set the proper register kind. MipsOperand* op = static_cast<MipsOperand*>(Operands.back()); op->setRegKind(MipsOperand::Kind_CPU64Regs); return MatchOperand_Success; @@ -1171,9 +1275,8 @@ MipsAsmParser::parseCPU64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_NoMatch; } -bool MipsAsmParser:: -searchSymbolAlias(SmallVectorImpl<MCParsedAsmOperand*> &Operands, - unsigned RegisterKind) { +bool MipsAsmParser::searchSymbolAlias( + SmallVectorImpl<MCParsedAsmOperand*> &Operands, unsigned RegisterKind) { MCSymbol *Sym = getContext().LookupSymbol(Parser.getTok().getIdentifier()); if (Sym) { @@ -1187,13 +1290,13 @@ searchSymbolAlias(SmallVectorImpl<MCParsedAsmOperand*> &Operands, const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr); const StringRef DefSymbol = Ref->getSymbol().getName(); if (DefSymbol.startswith("$")) { - // Lookup for the register with corresponding name - int RegNum = matchRegisterName(DefSymbol.substr(1),isMips64()); + // Lookup for the register with the corresponding name. + int RegNum = matchRegisterName(DefSymbol.substr(1), isMips64()); if (RegNum > -1) { Parser.Lex(); - MipsOperand *op = MipsOperand::CreateReg(RegNum,S, - Parser.getTok().getLoc()); - op->setRegKind((MipsOperand::RegisterKind)RegisterKind); + MipsOperand *op = MipsOperand::CreateReg(RegNum, S, + Parser.getTok().getLoc()); + op->setRegKind((MipsOperand::RegisterKind) RegisterKind); Operands.push_back(op); return true; } @@ -1201,29 +1304,30 @@ searchSymbolAlias(SmallVectorImpl<MCParsedAsmOperand*> &Operands, } else if (Expr->getKind() == MCExpr::Constant) { Parser.Lex(); const MCConstantExpr *Const = static_cast<const MCConstantExpr*>(Expr); - MipsOperand *op = MipsOperand::CreateImm(Const,S, - Parser.getTok().getLoc()); + MipsOperand *op = MipsOperand::CreateImm(Const, S, + Parser.getTok().getLoc()); Operands.push_back(op); return true; } } return false; } + MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseCPURegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (getLexer().getKind() == AsmToken::Identifier) { - if (searchSymbolAlias(Operands,MipsOperand::Kind_CPURegs)) + if (searchSymbolAlias(Operands, MipsOperand::Kind_CPURegs)) return MatchOperand_Success; return MatchOperand_NoMatch; } - // if the first token is not '$' we have an error + // If the first token is not '$' we have an error. if (Parser.getTok().isNot(AsmToken::Dollar)) return MatchOperand_NoMatch; Parser.Lex(); // Eat $ - if(!tryParseRegisterOperand(Operands, false)) { - // set the propper register kind + if (!tryParseRegisterOperand(Operands, false)) { + // Set the proper register kind. MipsOperand* op = static_cast<MipsOperand*>(Operands.back()); op->setRegKind(MipsOperand::Kind_CPURegs); return MatchOperand_Success; @@ -1237,87 +1341,88 @@ MipsAsmParser::parseHWRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (isMips64()) return MatchOperand_NoMatch; - // if the first token is not '$' we have error + // If the first token is not '$' we have error. if (Parser.getTok().isNot(AsmToken::Dollar)) return MatchOperand_NoMatch; SMLoc S = Parser.getTok().getLoc(); - Parser.Lex(); // Eat $ + Parser.Lex(); // Eat the '$'. - const AsmToken &Tok = Parser.getTok(); // get next token + const AsmToken &Tok = Parser.getTok(); // Get the next token. if (Tok.isNot(AsmToken::Integer)) return MatchOperand_NoMatch; unsigned RegNum = Tok.getIntVal(); - // at the moment only hwreg29 is supported + // At the moment only hwreg29 is supported. if (RegNum != 29) return MatchOperand_ParseFail; MipsOperand *op = MipsOperand::CreateReg(Mips::HWR29, S, - Parser.getTok().getLoc()); + Parser.getTok().getLoc()); op->setRegKind(MipsOperand::Kind_HWRegs); Operands.push_back(op); - Parser.Lex(); // Eat reg number + Parser.Lex(); // Eat the register number. return MatchOperand_Success; } MipsAsmParser::OperandMatchResultTy -MipsAsmParser::parseHW64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +MipsAsmParser::parseHW64Regs( + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (!isMips64()) return MatchOperand_NoMatch; - //if the first token is not '$' we have error + // If the first token is not '$' we have an error. if (Parser.getTok().isNot(AsmToken::Dollar)) return MatchOperand_NoMatch; SMLoc S = Parser.getTok().getLoc(); Parser.Lex(); // Eat $ - const AsmToken &Tok = Parser.getTok(); // get next token + const AsmToken &Tok = Parser.getTok(); // Get the next token. if (Tok.isNot(AsmToken::Integer)) return MatchOperand_NoMatch; unsigned RegNum = Tok.getIntVal(); - // at the moment only hwreg29 is supported + // At the moment only hwreg29 is supported. if (RegNum != 29) return MatchOperand_ParseFail; MipsOperand *op = MipsOperand::CreateReg(Mips::HWR29_64, S, - Parser.getTok().getLoc()); + Parser.getTok().getLoc()); op->setRegKind(MipsOperand::Kind_HW64Regs); Operands.push_back(op); - Parser.Lex(); // Eat reg number + Parser.Lex(); // Eat the register number. return MatchOperand_Success; } MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseCCRRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { unsigned RegNum; - //if the first token is not '$' we have error + // If the first token is not '$' we have an error. if (Parser.getTok().isNot(AsmToken::Dollar)) return MatchOperand_NoMatch; SMLoc S = Parser.getTok().getLoc(); - Parser.Lex(); // Eat $ + Parser.Lex(); // Eat the '$' - const AsmToken &Tok = Parser.getTok(); // get next token + const AsmToken &Tok = Parser.getTok(); // Get next token. if (Tok.is(AsmToken::Integer)) { RegNum = Tok.getIntVal(); - // at the moment only fcc0 is supported + // At the moment only fcc0 is supported. if (RegNum != 0) return MatchOperand_ParseFail; } else if (Tok.is(AsmToken::Identifier)) { - // at the moment only fcc0 is supported + // At the moment only fcc0 is supported. if (Tok.getIdentifier() != "fcc0") return MatchOperand_ParseFail; } else return MatchOperand_NoMatch; MipsOperand *op = MipsOperand::CreateReg(Mips::FCC0, S, - Parser.getTok().getLoc()); + Parser.getTok().getLoc()); op->setRegKind(MipsOperand::Kind_CCRRegs); Operands.push_back(op); - Parser.Lex(); // Eat reg number + Parser.Lex(); // Eat the register number. return MatchOperand_Success; } @@ -1349,23 +1454,23 @@ MCSymbolRefExpr::VariantKind MipsAsmParser::getVariantKind(StringRef Symbol) { static int ConvertCcString(StringRef CondString) { int CC = StringSwitch<unsigned>(CondString) - .Case(".f", 0) - .Case(".un", 1) - .Case(".eq", 2) - .Case(".ueq", 3) - .Case(".olt", 4) - .Case(".ult", 5) - .Case(".ole", 6) - .Case(".ule", 7) - .Case(".sf", 8) - .Case(".ngle", 9) - .Case(".seq", 10) - .Case(".ngl", 11) - .Case(".lt", 12) - .Case(".nge", 13) - .Case(".le", 14) - .Case(".ngt", 15) - .Default(-1); + .Case(".f", 0) + .Case(".un", 1) + .Case(".eq", 2) + .Case(".ueq", 3) + .Case(".olt", 4) + .Case(".ult", 5) + .Case(".ole", 6) + .Case(".ule", 7) + .Case(".sf", 8) + .Case(".ngle", 9) + .Case(".seq", 10) + .Case(".ngl", 11) + .Case(".lt", 12) + .Case(".nge", 13) + .Case(".le", 14) + .Case(".ngt", 15) + .Default(-1); return CC; } @@ -1373,16 +1478,16 @@ static int ConvertCcString(StringRef CondString) { bool MipsAsmParser:: parseMathOperation(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // split the format + // Split the format. size_t Start = Name.find('.'), Next = Name.rfind('.'); StringRef Format1 = Name.slice(Start, Next); - // and add the first format to the operands + // Add the first format to the operands. Operands.push_back(MipsOperand::CreateToken(Format1, NameLoc)); - // now for the second format + // Now for the second format. StringRef Format2 = Name.slice(Next, StringRef::npos); Operands.push_back(MipsOperand::CreateToken(Format2, NameLoc)); - // set the format for the first register + // Set the format for the first register. setFpFormat(Format1); // Read the remaining operands. @@ -1398,11 +1503,10 @@ parseMathOperation(StringRef Name, SMLoc NameLoc, SMLoc Loc = getLexer().getLoc(); Parser.eatToEndOfStatement(); return Error(Loc, "unexpected token in argument list"); - } - Parser.Lex(); // Eat the comma. + Parser.Lex(); // Eat the comma. - //set the format for the first register + // Set the format for the first register setFpFormat(Format2); // Parse and remember the operand. @@ -1419,7 +1523,7 @@ parseMathOperation(StringRef Name, SMLoc NameLoc, return Error(Loc, "unexpected token in argument list"); } - Parser.Lex(); // Consume the EndOfStatement + Parser.Lex(); // Consume the EndOfStatement. return false; } @@ -1427,13 +1531,12 @@ bool MipsAsmParser:: ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { StringRef Mnemonic; - // floating point instructions: should register be treated as double? + // Floating point instructions: Should the register be treated as a double? if (requestsDoubleOperand(Name)) { setFpFormat(FP_FORMAT_D); - Operands.push_back(MipsOperand::CreateToken(Name, NameLoc)); - Mnemonic = Name; - } - else { + Operands.push_back(MipsOperand::CreateToken(Name, NameLoc)); + Mnemonic = Name; + } else { setDefaultFpFormat(); // Create the leading tokens for the mnemonic, split by '.' characters. size_t Start = 0, Next = Name.find('.'); @@ -1442,30 +1545,30 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, Operands.push_back(MipsOperand::CreateToken(Mnemonic, NameLoc)); if (Next != StringRef::npos) { - // there is a format token in mnemonic - // StringRef Rest = Name.slice(Next, StringRef::npos); - size_t Dot = Name.find('.', Next+1); + // There is a format token in mnemonic. + size_t Dot = Name.find('.', Next + 1); StringRef Format = Name.slice(Next, Dot); - if (Dot == StringRef::npos) //only one '.' in a string, it's a format + if (Dot == StringRef::npos) // Only one '.' in a string, it's a format. Operands.push_back(MipsOperand::CreateToken(Format, NameLoc)); else { - if (Name.startswith("c.")){ - // floating point compare, add '.' and immediate represent for cc + if (Name.startswith("c.")) { + // Floating point compare, add '.' and immediate represent for cc. Operands.push_back(MipsOperand::CreateToken(".", NameLoc)); int Cc = ConvertCcString(Format); if (Cc == -1) { return Error(NameLoc, "Invalid conditional code"); } SMLoc E = SMLoc::getFromPointer( - Parser.getTok().getLoc().getPointer() -1 ); - Operands.push_back(MipsOperand::CreateImm( - MCConstantExpr::Create(Cc, getContext()), NameLoc, E)); + Parser.getTok().getLoc().getPointer() - 1); + Operands.push_back( + MipsOperand::CreateImm(MCConstantExpr::Create(Cc, getContext()), + NameLoc, E)); } else { // trunc, ceil, floor ... return parseMathOperation(Name, NameLoc, Operands); } - // the rest is a format + // The rest is a format. Format = Name.slice(Dot, StringRef::npos); Operands.push_back(MipsOperand::CreateToken(Format, NameLoc)); } @@ -1483,8 +1586,8 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, return Error(Loc, "unexpected token in argument list"); } - while (getLexer().is(AsmToken::Comma) ) { - Parser.Lex(); // Eat the comma. + while (getLexer().is(AsmToken::Comma)) { + Parser.Lex(); // Eat the comma. // Parse and remember the operand. if (ParseOperand(Operands, Name)) { @@ -1501,48 +1604,47 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, return Error(Loc, "unexpected token in argument list"); } - Parser.Lex(); // Consume the EndOfStatement + Parser.Lex(); // Consume the EndOfStatement. return false; } bool MipsAsmParser::reportParseError(StringRef ErrorMsg) { - SMLoc Loc = getLexer().getLoc(); - Parser.eatToEndOfStatement(); - return Error(Loc, ErrorMsg); + SMLoc Loc = getLexer().getLoc(); + Parser.eatToEndOfStatement(); + return Error(Loc, ErrorMsg); } bool MipsAsmParser::parseSetNoAtDirective() { - // Line should look like: - // .set noat - // set at reg to 0 + // Line should look like: ".set noat". + // set at reg to 0. Options.setATReg(0); // eat noat Parser.Lex(); - // If this is not the end of the statement, report error + // If this is not the end of the statement, report an error. if (getLexer().isNot(AsmToken::EndOfStatement)) { reportParseError("unexpected token in statement"); return false; } - Parser.Lex(); // Consume the EndOfStatement + Parser.Lex(); // Consume the EndOfStatement. return false; } + bool MipsAsmParser::parseSetAtDirective() { - // line can be - // .set at - defaults to $1 + // Line can be .set at - defaults to $1 // or .set at=$reg int AtRegNo; getParser().Lex(); if (getLexer().is(AsmToken::EndOfStatement)) { Options.setATReg(1); - Parser.Lex(); // Consume the EndOfStatement + Parser.Lex(); // Consume the EndOfStatement. return false; } else if (getLexer().is(AsmToken::Equal)) { - getParser().Lex(); // eat '=' + getParser().Lex(); // Eat the '='. if (getLexer().isNot(AsmToken::Dollar)) { reportParseError("unexpected token in statement"); return false; } - Parser.Lex(); // Eat '$' + Parser.Lex(); // Eat the '$'. const AsmToken &Reg = Parser.getTok(); if (Reg.is(AsmToken::Identifier)) { AtRegNo = matchCPURegisterName(Reg.getIdentifier()); @@ -1553,7 +1655,7 @@ bool MipsAsmParser::parseSetAtDirective() { return false; } - if ( AtRegNo < 1 || AtRegNo > 31) { + if (AtRegNo < 1 || AtRegNo > 31) { reportParseError("unexpected token in statement"); return false; } @@ -1562,13 +1664,13 @@ bool MipsAsmParser::parseSetAtDirective() { reportParseError("unexpected token in statement"); return false; } - getParser().Lex(); // Eat reg + getParser().Lex(); // Eat the register. if (getLexer().isNot(AsmToken::EndOfStatement)) { reportParseError("unexpected token in statement"); return false; - } - Parser.Lex(); // Consume the EndOfStatement + } + Parser.Lex(); // Consume the EndOfStatement. return false; } else { reportParseError("unexpected token in statement"); @@ -1578,43 +1680,43 @@ bool MipsAsmParser::parseSetAtDirective() { bool MipsAsmParser::parseSetReorderDirective() { Parser.Lex(); - // If this is not the end of the statement, report error + // If this is not the end of the statement, report an error. if (getLexer().isNot(AsmToken::EndOfStatement)) { reportParseError("unexpected token in statement"); return false; } Options.setReorder(); - Parser.Lex(); // Consume the EndOfStatement + Parser.Lex(); // Consume the EndOfStatement. return false; } bool MipsAsmParser::parseSetNoReorderDirective() { - Parser.Lex(); - // if this is not the end of the statement, report error - if (getLexer().isNot(AsmToken::EndOfStatement)) { - reportParseError("unexpected token in statement"); - return false; - } - Options.setNoreorder(); - Parser.Lex(); // Consume the EndOfStatement + Parser.Lex(); + // If this is not the end of the statement, report an error. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("unexpected token in statement"); return false; + } + Options.setNoreorder(); + Parser.Lex(); // Consume the EndOfStatement. + return false; } bool MipsAsmParser::parseSetMacroDirective() { Parser.Lex(); - // if this is not the end of the statement, report error + // If this is not the end of the statement, report an error. if (getLexer().isNot(AsmToken::EndOfStatement)) { reportParseError("unexpected token in statement"); return false; } Options.setMacro(); - Parser.Lex(); // Consume the EndOfStatement + Parser.Lex(); // Consume the EndOfStatement. return false; } bool MipsAsmParser::parseSetNoMacroDirective() { Parser.Lex(); - // if this is not the end of the statement, report error + // If this is not the end of the statement, report an error. if (getLexer().isNot(AsmToken::EndOfStatement)) { reportParseError("`noreorder' must be set before `nomacro'"); return false; @@ -1624,7 +1726,7 @@ bool MipsAsmParser::parseSetNoMacroDirective() { return false; } Options.setNomacro(); - Parser.Lex(); // Consume the EndOfStatement + Parser.Lex(); // Consume the EndOfStatement. return false; } @@ -1637,24 +1739,24 @@ bool MipsAsmParser::parseSetAssignment() { if (getLexer().isNot(AsmToken::Comma)) return reportParseError("unexpected token in .set directive"); - Lex(); //eat comma + Lex(); // Eat comma if (Parser.parseExpression(Value)) reportParseError("expected valid expression after comma"); - // check if the Name already exists as a symbol + // Check if the Name already exists as a symbol. MCSymbol *Sym = getContext().LookupSymbol(Name); - if (Sym) { + if (Sym) return reportParseError("symbol already defined"); - } Sym = getContext().GetOrCreateSymbol(Name); Sym->setVariableValue(Value); return false; } + bool MipsAsmParser::parseDirectiveSet() { - // get next token + // Get the next token. const AsmToken &Tok = Parser.getTok(); if (Tok.getString() == "noat") { @@ -1670,15 +1772,15 @@ bool MipsAsmParser::parseDirectiveSet() { } else if (Tok.getString() == "nomacro") { return parseSetNoMacroDirective(); } else if (Tok.getString() == "nomips16") { - // ignore this directive for now + // Ignore this directive for now. Parser.eatToEndOfStatement(); return false; } else if (Tok.getString() == "nomicromips") { - // ignore this directive for now + // Ignore this directive for now. Parser.eatToEndOfStatement(); return false; } else { - // it is just an identifier, look for assignment + // It is just an identifier, look for an assignment. parseSetAssignment(); return false; } @@ -1715,20 +1817,20 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getString(); - if ( IDVal == ".ent") { - // ignore this directive for now + if (IDVal == ".ent") { + // Ignore this directive for now. Parser.Lex(); return false; } if (IDVal == ".end") { - // ignore this directive for now + // Ignore this directive for now. Parser.Lex(); return false; } if (IDVal == ".frame") { - // ignore this directive for now + // Ignore this directive for now. Parser.eatToEndOfStatement(); return false; } @@ -1738,19 +1840,19 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) { } if (IDVal == ".fmask") { - // ignore this directive for now + // Ignore this directive for now. Parser.eatToEndOfStatement(); return false; } if (IDVal == ".mask") { - // ignore this directive for now + // Ignore this directive for now. Parser.eatToEndOfStatement(); return false; } if (IDVal == ".gpword") { - // ignore this directive for now + // Ignore this directive for now. Parser.eatToEndOfStatement(); return false; } diff --git a/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index 59e49d8..0dba33a 100644 --- a/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -143,6 +143,16 @@ static DecodeStatus DecodeACRegsDSPRegisterClass(MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeHIRegsDSPRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeLORegsDSPRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + static DecodeStatus DecodeBranchTarget(MCInst &Inst, unsigned Offset, uint64_t Address, @@ -496,6 +506,30 @@ static DecodeStatus DecodeACRegsDSPRegisterClass(MCInst &Inst, return MCDisassembler::Success; } +static DecodeStatus DecodeHIRegsDSPRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo >= 4) + return MCDisassembler::Fail; + + unsigned Reg = getReg(Decoder, Mips::HIRegsDSPRegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Reg)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeLORegsDSPRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo >= 4) + return MCDisassembler::Fail; + + unsigned Reg = getReg(Decoder, Mips::LORegsDSPRegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Reg)); + return MCDisassembler::Success; +} + static DecodeStatus DecodeBranchTarget(MCInst &Inst, unsigned Offset, uint64_t Address, diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index e198a7c..9460731 100644 --- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -27,6 +27,9 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/raw_ostream.h" +#define GET_INSTRMAP_INFO +#include "MipsGenInstrInfo.inc" + using namespace llvm; namespace { @@ -35,12 +38,13 @@ class MipsMCCodeEmitter : public MCCodeEmitter { void operator=(const MipsMCCodeEmitter &) LLVM_DELETED_FUNCTION; const MCInstrInfo &MCII; MCContext &Ctx; + const MCSubtargetInfo &STI; bool IsLittleEndian; public: MipsMCCodeEmitter(const MCInstrInfo &mcii, MCContext &Ctx_, const MCSubtargetInfo &sti, bool IsLittle) : - MCII(mcii), Ctx(Ctx_), IsLittleEndian(IsLittle) {} + MCII(mcii), Ctx(Ctx_), STI (sti), IsLittleEndian(IsLittle) {} ~MipsMCCodeEmitter() {} @@ -88,6 +92,9 @@ public: unsigned getSizeInsEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const; + unsigned + getExprOpValue(const MCExpr *Expr,SmallVectorImpl<MCFixup> &Fixups) const; + }; // class MipsMCCodeEmitter } // namespace @@ -141,6 +148,15 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, if ((Opcode != Mips::NOP) && (Opcode != Mips::SLL) && !Binary) llvm_unreachable("unimplemented opcode in EncodeInstruction()"); + if (STI.getFeatureBits() & Mips::FeatureMicroMips) { + int NewOpcode = Mips::Std2MicroMips (Opcode, Mips::Arch_micromips); + if (NewOpcode != -1) { + Opcode = NewOpcode; + TmpInst.setOpcode (NewOpcode); + Binary = getBinaryCodeForInstr(TmpInst, Fixups); + } + } + const MCInstrDesc &Desc = MCII.get(TmpInst.getOpcode()); // Get byte count of instruction @@ -192,35 +208,24 @@ getJumpTargetOpValue(const MCInst &MI, unsigned OpNo, return 0; } -/// getMachineOpValue - Return binary encoding of operand. If the machine -/// operand requires relocation, record the relocation and return zero. unsigned MipsMCCodeEmitter:: -getMachineOpValue(const MCInst &MI, const MCOperand &MO, - SmallVectorImpl<MCFixup> &Fixups) const { - if (MO.isReg()) { - unsigned Reg = MO.getReg(); - unsigned RegNo = Ctx.getRegisterInfo().getEncodingValue(Reg); - return RegNo; - } else if (MO.isImm()) { - return static_cast<unsigned>(MO.getImm()); - } else if (MO.isFPImm()) { - return static_cast<unsigned>(APFloat(MO.getFPImm()) - .bitcastToAPInt().getHiBits(32).getLimitedValue()); - } +getExprOpValue(const MCExpr *Expr,SmallVectorImpl<MCFixup> &Fixups) const { + int64_t Res; - // MO must be an Expr. - assert(MO.isExpr()); + if (Expr->EvaluateAsAbsolute(Res)) + return Res; - const MCExpr *Expr = MO.getExpr(); MCExpr::ExprKind Kind = Expr->getKind(); + if (Kind == MCExpr::Constant) { + return cast<MCConstantExpr>(Expr)->getValue(); + } if (Kind == MCExpr::Binary) { - Expr = static_cast<const MCBinaryExpr*>(Expr)->getLHS(); - Kind = Expr->getKind(); + unsigned Res = getExprOpValue(cast<MCBinaryExpr>(Expr)->getLHS(), Fixups); + Res += getExprOpValue(cast<MCBinaryExpr>(Expr)->getRHS(), Fixups); + return Res; } - - assert (Kind == MCExpr::SymbolRef); - + if (Kind == MCExpr::SymbolRef) { Mips::Fixups FixupKind = Mips::Fixups(0); switch(cast<MCSymbolRefExpr>(Expr)->getKind()) { @@ -300,12 +305,32 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, break; } // switch - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), MCFixupKind(FixupKind))); - - // All of the information is in the fixup. + Fixups.push_back(MCFixup::Create(0, Expr, MCFixupKind(FixupKind))); + return 0; + } return 0; } +/// getMachineOpValue - Return binary encoding of operand. If the machine +/// operand requires relocation, record the relocation and return zero. +unsigned MipsMCCodeEmitter:: +getMachineOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl<MCFixup> &Fixups) const { + if (MO.isReg()) { + unsigned Reg = MO.getReg(); + unsigned RegNo = Ctx.getRegisterInfo().getEncodingValue(Reg); + return RegNo; + } else if (MO.isImm()) { + return static_cast<unsigned>(MO.getImm()); + } else if (MO.isFPImm()) { + return static_cast<unsigned>(APFloat(MO.getFPImm()) + .bitcastToAPInt().getHiBits(32).getLimitedValue()); + } + // MO must be an Expr. + assert(MO.isExpr()); + return getExprOpValue(MO.getExpr(),Fixups); +} + /// getMemEncoding - Return binary encoding of memory related operand. /// If the offset operand requires relocation, record the relocation. unsigned diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td b/contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td new file mode 100644 index 0000000..665b4d2 --- /dev/null +++ b/contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td @@ -0,0 +1,112 @@ +class MMArch { + string Arch = "micromips"; + list<dag> Pattern = []; +} + +class ADD_FM_MM<bits<6> op, bits<10> funct> : MMArch { + bits<5> rt; + bits<5> rs; + bits<5> rd; + + bits<32> Inst; + + let Inst{31-26} = op; + let Inst{25-21} = rt; + let Inst{20-16} = rs; + let Inst{15-11} = rd; + let Inst{10} = 0; + let Inst{9-0} = funct; +} + +class ADDI_FM_MM<bits<6> op> : MMArch { + bits<5> rs; + bits<5> rt; + bits<16> imm16; + + bits<32> Inst; + + let Inst{31-26} = op; + let Inst{25-21} = rt; + let Inst{20-16} = rs; + let Inst{15-0} = imm16; +} + +class SLTI_FM_MM<bits<6> op> : MMArch { + bits<5> rt; + bits<5> rs; + bits<16> imm16; + + bits<32> Inst; + + let Inst{31-26} = op; + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-0} = imm16; +} + +class LUI_FM_MM : MMArch { + bits<5> rt; + bits<16> imm16; + + bits<32> Inst; + + let Inst{31-26} = 0x10; + let Inst{25-21} = 0xd; + let Inst{20-16} = rt; + let Inst{15-0} = imm16; +} + +class MULT_FM_MM<bits<10> funct> : MMArch { + bits<5> rs; + bits<5> rt; + + bits<32> Inst; + + let Inst{31-26} = 0x00; + let Inst{25-21} = rt; + let Inst{20-16} = rs; + let Inst{15-6} = funct; + let Inst{5-0} = 0x3c; +} + +class SRA_FM_MM<bits<10> funct, bit rotate> : MMArch { + bits<5> rd; + bits<5> rt; + bits<5> shamt; + + bits<32> Inst; + + let Inst{31-26} = 0; + let Inst{25-21} = rd; + let Inst{20-16} = rt; + let Inst{15-11} = shamt; + let Inst{10} = rotate; + let Inst{9-0} = funct; +} + +class SRLV_FM_MM<bits<10> funct, bit rotate> : MMArch { + bits<5> rd; + bits<5> rt; + bits<5> rs; + + bits<32> Inst; + + let Inst{31-26} = 0; + let Inst{25-21} = rt; + let Inst{20-16} = rs; + let Inst{15-11} = rd; + let Inst{10} = rotate; + let Inst{9-0} = funct; +} + +class LW_FM_MM<bits<6> op> : MMArch { + bits<5> rt; + bits<21> addr; + + bits<32> Inst; + + let Inst{31-26} = op; + let Inst{25-21} = rt; + let Inst{20-16} = addr{20-16}; + let Inst{15-0} = addr{15-0}; +} diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td b/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td new file mode 100644 index 0000000..74cdccd --- /dev/null +++ b/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td @@ -0,0 +1,67 @@ +let isCodeGenOnly = 1 in { + /// Arithmetic Instructions (ALU Immediate) + def ADDiu_MM : MMRel, ArithLogicI<"addiu", simm16, CPURegsOpnd>, + ADDI_FM_MM<0xc>; + def ADDi_MM : MMRel, ArithLogicI<"addi", simm16, CPURegsOpnd>, + ADDI_FM_MM<0x4>; + def SLTi_MM : MMRel, SetCC_I<"slti", setlt, simm16, immSExt16, CPURegs>, + SLTI_FM_MM<0x24>; + def SLTiu_MM : MMRel, SetCC_I<"sltiu", setult, simm16, immSExt16, CPURegs>, + SLTI_FM_MM<0x2c>; + def ANDi_MM : MMRel, ArithLogicI<"andi", uimm16, CPURegsOpnd, immZExt16, and>, + ADDI_FM_MM<0x34>; + def ORi_MM : MMRel, ArithLogicI<"ori", uimm16, CPURegsOpnd, immZExt16, or>, + ADDI_FM_MM<0x14>; + def XORi_MM : MMRel, ArithLogicI<"xori", uimm16, CPURegsOpnd, immZExt16, xor>, + ADDI_FM_MM<0x1c>; + def LUi_MM : MMRel, LoadUpper<"lui", CPURegs, uimm16>, LUI_FM_MM; + + /// Arithmetic Instructions (3-Operand, R-Type) + def ADDu_MM : MMRel, ArithLogicR<"addu", CPURegsOpnd>, ADD_FM_MM<0, 0x150>; + def SUBu_MM : MMRel, ArithLogicR<"subu", CPURegsOpnd>, ADD_FM_MM<0, 0x1d0>; + def MUL_MM : MMRel, ArithLogicR<"mul", CPURegsOpnd>, ADD_FM_MM<0, 0x210>; + def ADD_MM : MMRel, ArithLogicR<"add", CPURegsOpnd>, ADD_FM_MM<0, 0x110>; + def SUB_MM : MMRel, ArithLogicR<"sub", CPURegsOpnd>, ADD_FM_MM<0, 0x190>; + def SLT_MM : MMRel, SetCC_R<"slt", setlt, CPURegs>, ADD_FM_MM<0, 0x350>; + def SLTu_MM : MMRel, SetCC_R<"sltu", setult, CPURegs>, + ADD_FM_MM<0, 0x390>; + def AND_MM : MMRel, ArithLogicR<"and", CPURegsOpnd, 1, IIAlu, and>, + ADD_FM_MM<0, 0x250>; + def OR_MM : MMRel, ArithLogicR<"or", CPURegsOpnd, 1, IIAlu, or>, + ADD_FM_MM<0, 0x290>; + def XOR_MM : MMRel, ArithLogicR<"xor", CPURegsOpnd, 1, IIAlu, xor>, + ADD_FM_MM<0, 0x310>; + def NOR_MM : MMRel, LogicNOR<"nor", CPURegsOpnd>, ADD_FM_MM<0, 0x2d0>; + def MULT_MM : MMRel, Mult<"mult", IIImul, CPURegsOpnd, [HI, LO]>, + MULT_FM_MM<0x22c>; + def MULTu_MM : MMRel, Mult<"multu", IIImul, CPURegsOpnd, [HI, LO]>, + MULT_FM_MM<0x26c>; + + /// Shift Instructions + def SLL_MM : MMRel, shift_rotate_imm<"sll", shamt, CPURegsOpnd>, + SRA_FM_MM<0, 0>; + def SRL_MM : MMRel, shift_rotate_imm<"srl", shamt, CPURegsOpnd>, + SRA_FM_MM<0x40, 0>; + def SRA_MM : MMRel, shift_rotate_imm<"sra", shamt, CPURegsOpnd>, + SRA_FM_MM<0x80, 0>; + def SLLV_MM : MMRel, shift_rotate_reg<"sllv", CPURegsOpnd>, + SRLV_FM_MM<0x10, 0>; + def SRLV_MM : MMRel, shift_rotate_reg<"srlv", CPURegsOpnd>, + SRLV_FM_MM<0x50, 0>; + def SRAV_MM : MMRel, shift_rotate_reg<"srav", CPURegsOpnd>, + SRLV_FM_MM<0x90, 0>; + def ROTR_MM : MMRel, shift_rotate_imm<"rotr", shamt, CPURegsOpnd>, + SRA_FM_MM<0xc0, 0>; + def ROTRV_MM : MMRel, shift_rotate_reg<"rotrv", CPURegsOpnd>, + SRLV_FM_MM<0xd0, 0>; + + /// Load and Store Instructions - aligned + defm LB_MM : LoadM<"lb", CPURegs, sextloadi8>, MMRel, LW_FM_MM<0x7>; + defm LBu_MM : LoadM<"lbu", CPURegs, zextloadi8>, MMRel, LW_FM_MM<0x5>; + defm LH_MM : LoadM<"lh", CPURegs, sextloadi16>, MMRel, LW_FM_MM<0xf>; + defm LHu_MM : LoadM<"lhu", CPURegs, zextloadi16>, MMRel, LW_FM_MM<0xd>; + defm LW_MM : LoadM<"lw", CPURegs>, MMRel, LW_FM_MM<0x3f>; + defm SB_MM : StoreM<"sb", CPURegs, truncstorei8>, MMRel, LW_FM_MM<0x6>; + defm SH_MM : StoreM<"sh", CPURegs, truncstorei16>, MMRel, LW_FM_MM<0xe>; + defm SW_MM : StoreM<"sw", CPURegs>, MMRel, LW_FM_MM<0x3e>; +} diff --git a/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp index 00b3449..c1c635c 100644 --- a/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp @@ -35,6 +35,11 @@ #include "llvm/Target/TargetMachine.h" using namespace llvm; +bool Mips16DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { + if (!Subtarget.inMips16Mode()) + return false; + return MipsDAGToDAGISel::runOnMachineFunction(MF); +} /// Select multiply instructions. std::pair<SDNode*, SDNode*> Mips16DAGToDAGISel::selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, EVT Ty, @@ -267,7 +272,7 @@ std::pair<bool, SDNode*> Mips16DAGToDAGISel::selectNode(SDNode *Node) { EVT VT = LHS.getValueType(); unsigned Sltu_op = Mips::SltuRxRyRz16; - SDNode *Carry = CurDAG->getMachineNode(Sltu_op, DL, VT, Ops, 2); + SDNode *Carry = CurDAG->getMachineNode(Sltu_op, DL, VT, Ops); unsigned Addu_op = Mips::AdduRxRyRz16; SDNode *AddCarry = CurDAG->getMachineNode(Addu_op, DL, VT, SDValue(Carry,0), RHS); diff --git a/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.h b/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.h index baa8587..f05f9b7 100644 --- a/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.h +++ b/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.h @@ -28,6 +28,8 @@ private: SDValue getMips16SPAliasReg(); + virtual bool runOnMachineFunction(MachineFunction &MF); + void getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg); virtual bool selectAddr16(SDNode *Parent, SDValue N, SDValue &Base, diff --git a/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp b/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp index 23eb537..f63318f 100644 --- a/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp +++ b/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp @@ -53,7 +53,6 @@ Mips16TargetLowering::Mips16TargetLowering(MipsTargetMachine &TM) if (Mips16HardFloat) setMips16HardFloatLibCalls(); - setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); @@ -614,7 +613,8 @@ MachineBasicBlock unsigned regX = MI->getOperand(0).getReg(); unsigned regY = MI->getOperand(1).getReg(); MachineBasicBlock *target = MI->getOperand(2).getMBB(); - BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addReg(regY); + BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX) + .addReg(regY); BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target); MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; @@ -636,7 +636,8 @@ MachineBasicBlock *Mips16TargetLowering::emitFEXT_T8I8I16_ins( CmpOpc = CmpiXOpc; else llvm_unreachable("immediate field not usable"); - BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addImm(imm); + BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX) + .addImm(imm); BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target); MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; diff --git a/contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp b/contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp index 6cca227..7ad18f2 100644 --- a/contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp +++ b/contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp @@ -1,5 +1,4 @@ - -//===-- Mips16RegisterInfo.cpp - MIPS16 Register Information -== ----------===// +//===-- Mips16RegisterInfo.cpp - MIPS16 Register Information --------------===// // // The LLVM Compiler Infrastructure // diff --git a/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td b/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td index 846a822..fc533fb 100644 --- a/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td +++ b/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td @@ -66,14 +66,12 @@ let usesCustomInserter = 1, Predicates = [HasStdEnc], defm ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap64<atomic_cmp_swap_64>; } -/// Pseudo instructions for loading, storing and copying accumulator registers. +/// Pseudo instructions for loading and storing accumulator registers. let isPseudo = 1 in { defm LOAD_AC128 : LoadM<"load_ac128", ACRegs128>; defm STORE_AC128 : StoreM<"store_ac128", ACRegs128>; } -def COPY_AC128 : PseudoSE<(outs ACRegs128:$dst), (ins ACRegs128:$src), []>; - //===----------------------------------------------------------------------===// // Instruction definition //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp index 1876cb6..6e4feda 100644 --- a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp @@ -46,6 +46,10 @@ using namespace llvm; bool MipsAsmPrinter::runOnMachineFunction(MachineFunction &MF) { + // Initialize TargetLoweringObjectFile. + if (Subtarget->allowMixed16_32()) + const_cast<TargetLoweringObjectFile&>(getObjFileLowering()) + .Initialize(OutContext, TM); MipsFI = MF.getInfo<MipsFunctionInfo>(); AsmPrinter::runOnMachineFunction(MF); return true; @@ -245,12 +249,18 @@ void MipsAsmPrinter::EmitFunctionEntryLabel() { void MipsAsmPrinter::EmitFunctionBodyStart() { MCInstLowering.Initialize(Mang, &MF->getContext()); - emitFrameDirective(); + bool IsNakedFunction = + MF->getFunction()-> + getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::Naked); + if (!IsNakedFunction) + emitFrameDirective(); if (OutStreamer.hasRawTextSupport()) { SmallString<128> Str; raw_svector_ostream OS(Str); - printSavedRegsBitmask(OS); + if (!IsNakedFunction) + printSavedRegsBitmask(OS); OutStreamer.EmitRawText(OS.str()); if (!Subtarget->inMips16Mode()) { OutStreamer.EmitRawText(StringRef("\t.set\tnoreorder")); @@ -419,12 +429,18 @@ bool MipsAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum, unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) { - if (ExtraCode && ExtraCode[0]) - return true; // Unknown modifier. + int Offset = 0; + // Currently we are expecting either no ExtraCode or 'D' + if (ExtraCode) { + if (ExtraCode[0] == 'D') + Offset = 4; + else + return true; // Unknown modifier. + } const MachineOperand &MO = MI->getOperand(OpNum); assert(MO.isReg() && "unexpected inline asm memory operand"); - O << "0($" << MipsInstPrinter::getRegisterName(MO.getReg()) << ")"; + O << Offset << "($" << MipsInstPrinter::getRegisterName(MO.getReg()) << ")"; return false; } diff --git a/contrib/llvm/lib/Target/Mips/MipsCodeEmitter.cpp b/contrib/llvm/lib/Target/Mips/MipsCodeEmitter.cpp index 1d86d90..3fc402b 100644 --- a/contrib/llvm/lib/Target/Mips/MipsCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsCodeEmitter.cpp @@ -116,7 +116,7 @@ private: int Offset) const; /// Expand pseudo instructions with accumulator register operands. - void expandACCInstr(MachineBasicBlock::instr_iterator &MI, + void expandACCInstr(MachineBasicBlock::instr_iterator MI, MachineBasicBlock &MBB, unsigned Opc) const; /// \brief Expand pseudo instruction. Return true if MI was expanded. @@ -302,7 +302,7 @@ void MipsCodeEmitter::emitWord(unsigned Word) { MCE.emitWordBE(Word); } -void MipsCodeEmitter::expandACCInstr(MachineBasicBlock::instr_iterator &MI, +void MipsCodeEmitter::expandACCInstr(MachineBasicBlock::instr_iterator MI, MachineBasicBlock &MBB, unsigned Opc) const { // Expand "pseudomult $ac0, $t0, $t1" to "mult $t0, $t1". diff --git a/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp b/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp index b5de1eb..1951324 100644 --- a/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp @@ -80,6 +80,10 @@ FunctionPass *llvm::createMipsConstantIslandPass(MipsTargetMachine &tm) { } bool MipsConstantIslands::runOnMachineFunction(MachineFunction &F) { - return true; + // The intention is for this to be a mips16 only pass for now + // FIXME: + // if (!TM.getSubtarget<MipsSubtarget>().inMips16Mode()) + // return false; + return false; } diff --git a/contrib/llvm/lib/Target/Mips/MipsDSPInstrFormats.td b/contrib/llvm/lib/Target/Mips/MipsDSPInstrFormats.td index a72a763..cf09113 100644 --- a/contrib/llvm/lib/Target/Mips/MipsDSPInstrFormats.td +++ b/contrib/llvm/lib/Target/Mips/MipsDSPInstrFormats.td @@ -219,6 +219,33 @@ class MULT_FMT<bits<6> opcode, bits<6> funct> : DSPInst { let Inst{5-0} = funct; } +// MFHI sub-class format. +class MFHI_FMT<bits<6> funct> : DSPInst { + bits<5> rd; + bits<2> ac; + + let Inst{31-26} = 0; + let Inst{25-23} = 0; + let Inst{22-21} = ac; + let Inst{20-16} = 0; + let Inst{15-11} = rd; + let Inst{10-6} = 0; + let Inst{5-0} = funct; +} + +// MTHI sub-class format. +class MTHI_FMT<bits<6> funct> : DSPInst { + bits<5> rs; + bits<2> ac; + + let Inst{31-26} = 0; + let Inst{25-21} = rs; + let Inst{20-13} = 0; + let Inst{12-11} = ac; + let Inst{10-6} = 0; + let Inst{5-0} = funct; +} + // EXTR.W sub-class format (type 1). class EXTR_W_TY1_FMT<bits<5> op> : DSPInst { bits<5> rt; diff --git a/contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td index 3c116e1..c12878a 100644 --- a/contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td +++ b/contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td @@ -26,6 +26,8 @@ def SDT_MipsShilo : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>, SDTCisSameAs<0, 2>, SDTCisVT<1, i32>]>; def SDT_MipsDPA : SDTypeProfile<1, 3, [SDTCisVT<0, untyped>, SDTCisSameAs<0, 3>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; +def SDT_MipsSHIFT_DSP : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, + SDTCisVT<2, i32>]>; class MipsDSPBase<string Opc, SDTypeProfile Prof> : SDNode<!strconcat("MipsISD::", Opc), Prof>; @@ -74,18 +76,19 @@ def MipsMADD_DSP : MipsDSPBase<"MADD_DSP", SDT_MipsDPA>; def MipsMADDU_DSP : MipsDSPBase<"MADDU_DSP", SDT_MipsDPA>; def MipsMSUB_DSP : MipsDSPBase<"MSUB_DSP", SDT_MipsDPA>; def MipsMSUBU_DSP : MipsDSPBase<"MSUBU_DSP", SDT_MipsDPA>; +def MipsSHLL_DSP : MipsDSPBase<"SHLL_DSP", SDT_MipsSHIFT_DSP>; +def MipsSHRA_DSP : MipsDSPBase<"SHRA_DSP", SDT_MipsSHIFT_DSP>; +def MipsSHRL_DSP : MipsDSPBase<"SHRL_DSP", SDT_MipsSHIFT_DSP>; +def MipsSETCC_DSP : MipsDSPBase<"SETCC_DSP", SDTSetCC>; +def MipsSELECT_CC_DSP : MipsDSPBase<"SELECT_CC_DSP", SDTSelectCC>; // Flags. -class UseAC { - list<Register> Uses = [AC0]; +class Uses<list<Register> Regs> { + list<Register> Uses = Regs; } -class UseDSPCtrl { - list<Register> Uses = [DSPCtrl]; -} - -class ClearDefs { - list<Register> Defs = []; +class Defs<list<Register> Regs> { + list<Register> Defs = Regs; } // Instruction encoding. @@ -145,6 +148,10 @@ class MAQ_S_W_PHL_ENC : DPA_W_PH_FMT<0b10100>; class MAQ_S_W_PHR_ENC : DPA_W_PH_FMT<0b10110>; class MAQ_SA_W_PHL_ENC : DPA_W_PH_FMT<0b10000>; class MAQ_SA_W_PHR_ENC : DPA_W_PH_FMT<0b10010>; +class MFHI_ENC : MFHI_FMT<0b010000>; +class MFLO_ENC : MFHI_FMT<0b010010>; +class MTHI_ENC : MTHI_FMT<0b010001>; +class MTLO_ENC : MTHI_FMT<0b010011>; class DPAU_H_QBL_ENC : DPA_W_PH_FMT<0b00011>; class DPAU_H_QBR_ENC : DPA_W_PH_FMT<0b00111>; class DPSU_H_QBL_ENC : DPA_W_PH_FMT<0b01011>; @@ -256,7 +263,6 @@ class ADDU_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode, string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt"); list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))]; InstrItinClass Itinerary = itin; - list<Register> Defs = [DSPCtrl]; } class RADDU_W_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode, @@ -267,7 +273,6 @@ class RADDU_W_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode, string AsmString = !strconcat(instr_asm, "\t$rd, $rs"); list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs))]; InstrItinClass Itinerary = itin; - list<Register> Defs = [DSPCtrl]; } class CMP_EQ_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode, @@ -278,7 +283,6 @@ class CMP_EQ_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode, string AsmString = !strconcat(instr_asm, "\t$rs, $rt"); list<dag> Pattern = [(OpNode RCS:$rs, RCT:$rt)]; InstrItinClass Itinerary = itin; - list<Register> Defs = [DSPCtrl]; } class CMP_EQ_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode, @@ -289,7 +293,6 @@ class CMP_EQ_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode, string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt"); list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))]; InstrItinClass Itinerary = itin; - list<Register> Defs = [DSPCtrl]; } class PRECR_SRA_PH_W_DESC_BASE<string instr_asm, SDPatternOperator OpNode, @@ -300,7 +303,6 @@ class PRECR_SRA_PH_W_DESC_BASE<string instr_asm, SDPatternOperator OpNode, string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa"); list<dag> Pattern = [(set RCT:$rt, (OpNode RCS:$src, RCS:$rs, immZExt5:$sa))]; InstrItinClass Itinerary = itin; - list<Register> Defs = [DSPCtrl]; string Constraints = "$src = $rt"; } @@ -312,7 +314,6 @@ class ABSQ_S_PH_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode, string AsmString = !strconcat(instr_asm, "\t$rd, $rt"); list<dag> Pattern = [(set RCD:$rd, (OpNode RCT:$rt))]; InstrItinClass Itinerary = itin; - list<Register> Defs = [DSPCtrl]; } class REPL_DESC_BASE<string instr_asm, SDPatternOperator OpNode, @@ -322,7 +323,6 @@ class REPL_DESC_BASE<string instr_asm, SDPatternOperator OpNode, string AsmString = !strconcat(instr_asm, "\t$rd, $imm"); list<dag> Pattern = [(set RC:$rd, (OpNode immPat:$imm))]; InstrItinClass Itinerary = itin; - list<Register> Defs = [DSPCtrl]; } class SHLL_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode, @@ -332,7 +332,6 @@ class SHLL_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode, string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs_sa"); list<dag> Pattern = [(set RC:$rd, (OpNode RC:$rt, CPURegs:$rs_sa))]; InstrItinClass Itinerary = itin; - list<Register> Defs = [DSPCtrl]; } class SHLL_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode, @@ -343,7 +342,7 @@ class SHLL_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode, string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs_sa"); list<dag> Pattern = [(set RC:$rd, (OpNode RC:$rt, ImmPat:$rs_sa))]; InstrItinClass Itinerary = itin; - list<Register> Defs = [DSPCtrl]; + bit hasSideEffects = 1; } class LX_DESC_BASE<string instr_asm, SDPatternOperator OpNode, @@ -354,7 +353,6 @@ class LX_DESC_BASE<string instr_asm, SDPatternOperator OpNode, list<dag> Pattern = [(set CPURegs:$rd, (OpNode CPURegs:$base, CPURegs:$index))]; InstrItinClass Itinerary = itin; - list<Register> Defs = [DSPCtrl]; bit mayLoad = 1; } @@ -366,7 +364,6 @@ class ADDUH_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode, string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt"); list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))]; InstrItinClass Itinerary = itin; - list<Register> Defs = [DSPCtrl]; } class APPEND_DESC_BASE<string instr_asm, SDPatternOperator OpNode, @@ -377,7 +374,6 @@ class APPEND_DESC_BASE<string instr_asm, SDPatternOperator OpNode, list<dag> Pattern = [(set CPURegs:$rt, (OpNode CPURegs:$src, CPURegs:$rs, ImmOp:$sa))]; InstrItinClass Itinerary = itin; - list<Register> Defs = [DSPCtrl]; string Constraints = "$src = $rt"; } @@ -387,7 +383,6 @@ class EXTR_W_TY1_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode, dag InOperandList = (ins ACRegsDSP:$ac, CPURegs:$shift_rs); string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs"); InstrItinClass Itinerary = itin; - list<Register> Defs = [DSPCtrl]; } class EXTR_W_TY1_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode, @@ -396,7 +391,6 @@ class EXTR_W_TY1_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode, dag InOperandList = (ins ACRegsDSP:$ac, uimm16:$shift_rs); string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs"); InstrItinClass Itinerary = itin; - list<Register> Defs = [DSPCtrl]; } class SHILO_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode> { @@ -405,7 +399,6 @@ class SHILO_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode> { string AsmString = !strconcat(instr_asm, "\t$ac, $shift"); list<dag> Pattern = [(set ACRegsDSP:$ac, (OpNode immSExt6:$shift, ACRegsDSP:$acin))]; - list<Register> Defs = [DSPCtrl]; string Constraints = "$acin = $ac"; } @@ -415,7 +408,6 @@ class SHILO_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode> { string AsmString = !strconcat(instr_asm, "\t$ac, $rs"); list<dag> Pattern = [(set ACRegsDSP:$ac, (OpNode CPURegs:$rs, ACRegsDSP:$acin))]; - list<Register> Defs = [DSPCtrl]; string Constraints = "$acin = $ac"; } @@ -425,7 +417,6 @@ class MTHLIP_DESC_BASE<string instr_asm, SDPatternOperator OpNode> { string AsmString = !strconcat(instr_asm, "\t$rs, $ac"); list<dag> Pattern = [(set ACRegsDSP:$ac, (OpNode CPURegs:$rs, ACRegsDSP:$acin))]; - list<Register> Uses = [DSPCtrl]; string Constraints = "$acin = $ac"; } @@ -436,7 +427,6 @@ class RDDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode, string AsmString = !strconcat(instr_asm, "\t$rd, $mask"); list<dag> Pattern = [(set CPURegs:$rd, (OpNode immZExt10:$mask))]; InstrItinClass Itinerary = itin; - list<Register> Uses = [DSPCtrl]; } class WRDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode, @@ -446,7 +436,6 @@ class WRDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode, string AsmString = !strconcat(instr_asm, "\t$rs, $mask"); list<dag> Pattern = [(OpNode CPURegs:$rs, immZExt10:$mask)]; InstrItinClass Itinerary = itin; - list<Register> Defs = [DSPCtrl]; } class DPA_W_PH_DESC_BASE<string instr_asm, SDPatternOperator OpNode> { @@ -455,7 +444,6 @@ class DPA_W_PH_DESC_BASE<string instr_asm, SDPatternOperator OpNode> { string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt"); list<dag> Pattern = [(set ACRegsDSP:$ac, (OpNode CPURegs:$rs, CPURegs:$rt, ACRegsDSP:$acin))]; - list<Register> Defs = [DSPCtrl]; string Constraints = "$acin = $ac"; } @@ -482,9 +470,22 @@ class MADD_DESC_BASE<string instr_asm, SDPatternOperator OpNode, string Constraints = "$acin = $ac"; } +class MFHI_DESC_BASE<string instr_asm, RegisterClass RC, InstrItinClass itin> { + dag OutOperandList = (outs CPURegs:$rd); + dag InOperandList = (ins RC:$ac); + string AsmString = !strconcat(instr_asm, "\t$rd, $ac"); + InstrItinClass Itinerary = itin; +} + +class MTHI_DESC_BASE<string instr_asm, RegisterClass RC, InstrItinClass itin> { + dag OutOperandList = (outs RC:$ac); + dag InOperandList = (ins CPURegs:$rs); + string AsmString = !strconcat(instr_asm, "\t$rs, $ac"); + InstrItinClass Itinerary = itin; +} + class BPOSGE32_PSEUDO_DESC_BASE<SDPatternOperator OpNode, InstrItinClass itin> : MipsPseudo<(outs CPURegs:$dst), (ins), [(set CPURegs:$dst, (OpNode))]> { - list<Register> Uses = [DSPCtrl]; bit usesCustomInserter = 1; } @@ -493,7 +494,6 @@ class BPOSGE32_DESC_BASE<string instr_asm, InstrItinClass itin> { dag InOperandList = (ins brtarget:$offset); string AsmString = !strconcat(instr_asm, "\t$offset"); InstrItinClass Itinerary = itin; - list<Register> Uses = [DSPCtrl]; bit isBranch = 1; bit isTerminator = 1; bit hasDelaySlot = 1; @@ -506,7 +506,6 @@ class INSV_DESC_BASE<string instr_asm, SDPatternOperator OpNode, string AsmString = !strconcat(instr_asm, "\t$rt, $rs"); list<dag> Pattern = [(set CPURegs:$rt, (OpNode CPURegs:$src, CPURegs:$rs))]; InstrItinClass Itinerary = itin; - list<Register> Uses = [DSPCtrl]; string Constraints = "$src = $rt"; } @@ -515,178 +514,183 @@ class INSV_DESC_BASE<string instr_asm, SDPatternOperator OpNode, //===----------------------------------------------------------------------===// // Addition/subtraction -class ADDU_QB_DESC : ADDU_QB_DESC_BASE<"addu.qb", int_mips_addu_qb, NoItinerary, - DSPRegs, DSPRegs>, IsCommutable; +class ADDU_QB_DESC : ADDU_QB_DESC_BASE<"addu.qb", null_frag, NoItinerary, + DSPRegs, DSPRegs>, IsCommutable, + Defs<[DSPOutFlag20]>; class ADDU_S_QB_DESC : ADDU_QB_DESC_BASE<"addu_s.qb", int_mips_addu_s_qb, NoItinerary, DSPRegs, DSPRegs>, - IsCommutable; + IsCommutable, Defs<[DSPOutFlag20]>; -class SUBU_QB_DESC : ADDU_QB_DESC_BASE<"subu.qb", int_mips_subu_qb, NoItinerary, - DSPRegs, DSPRegs>; +class SUBU_QB_DESC : ADDU_QB_DESC_BASE<"subu.qb", null_frag, NoItinerary, + DSPRegs, DSPRegs>, + Defs<[DSPOutFlag20]>; class SUBU_S_QB_DESC : ADDU_QB_DESC_BASE<"subu_s.qb", int_mips_subu_s_qb, - NoItinerary, DSPRegs, DSPRegs>; + NoItinerary, DSPRegs, DSPRegs>, + Defs<[DSPOutFlag20]>; -class ADDQ_PH_DESC : ADDU_QB_DESC_BASE<"addq.ph", int_mips_addq_ph, NoItinerary, - DSPRegs, DSPRegs>, IsCommutable; +class ADDQ_PH_DESC : ADDU_QB_DESC_BASE<"addq.ph", null_frag, NoItinerary, + DSPRegs, DSPRegs>, IsCommutable, + Defs<[DSPOutFlag20]>; class ADDQ_S_PH_DESC : ADDU_QB_DESC_BASE<"addq_s.ph", int_mips_addq_s_ph, NoItinerary, DSPRegs, DSPRegs>, - IsCommutable; + IsCommutable, Defs<[DSPOutFlag20]>; -class SUBQ_PH_DESC : ADDU_QB_DESC_BASE<"subq.ph", int_mips_subq_ph, NoItinerary, - DSPRegs, DSPRegs>; +class SUBQ_PH_DESC : ADDU_QB_DESC_BASE<"subq.ph", null_frag, NoItinerary, + DSPRegs, DSPRegs>, + Defs<[DSPOutFlag20]>; class SUBQ_S_PH_DESC : ADDU_QB_DESC_BASE<"subq_s.ph", int_mips_subq_s_ph, - NoItinerary, DSPRegs, DSPRegs>; + NoItinerary, DSPRegs, DSPRegs>, + Defs<[DSPOutFlag20]>; class ADDQ_S_W_DESC : ADDU_QB_DESC_BASE<"addq_s.w", int_mips_addq_s_w, NoItinerary, CPURegs, CPURegs>, - IsCommutable; + IsCommutable, Defs<[DSPOutFlag20]>; class SUBQ_S_W_DESC : ADDU_QB_DESC_BASE<"subq_s.w", int_mips_subq_s_w, - NoItinerary, CPURegs, CPURegs>; + NoItinerary, CPURegs, CPURegs>, + Defs<[DSPOutFlag20]>; -class ADDSC_DESC : ADDU_QB_DESC_BASE<"addsc", int_mips_addsc, NoItinerary, - CPURegs, CPURegs>, IsCommutable; +class ADDSC_DESC : ADDU_QB_DESC_BASE<"addsc", null_frag, NoItinerary, + CPURegs, CPURegs>, IsCommutable, + Defs<[DSPCarry]>; -class ADDWC_DESC : ADDU_QB_DESC_BASE<"addwc", int_mips_addwc, NoItinerary, +class ADDWC_DESC : ADDU_QB_DESC_BASE<"addwc", null_frag, NoItinerary, CPURegs, CPURegs>, - IsCommutable, UseDSPCtrl; + IsCommutable, Uses<[DSPCarry]>, Defs<[DSPOutFlag20]>; class MODSUB_DESC : ADDU_QB_DESC_BASE<"modsub", int_mips_modsub, NoItinerary, - CPURegs, CPURegs>, ClearDefs; + CPURegs, CPURegs>; class RADDU_W_QB_DESC : RADDU_W_QB_DESC_BASE<"raddu.w.qb", int_mips_raddu_w_qb, - NoItinerary, CPURegs, DSPRegs>, - ClearDefs; + NoItinerary, CPURegs, DSPRegs>; // Absolute value class ABSQ_S_PH_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.ph", int_mips_absq_s_ph, - NoItinerary, DSPRegs>; + NoItinerary, DSPRegs>, + Defs<[DSPOutFlag20]>; class ABSQ_S_W_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.w", int_mips_absq_s_w, - NoItinerary, CPURegs>; + NoItinerary, CPURegs>, + Defs<[DSPOutFlag20]>; // Precision reduce/expand class PRECRQ_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq.qb.ph", int_mips_precrq_qb_ph, - NoItinerary, DSPRegs, DSPRegs>, - ClearDefs; + NoItinerary, DSPRegs, DSPRegs>; class PRECRQ_PH_W_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq.ph.w", int_mips_precrq_ph_w, - NoItinerary, DSPRegs, CPURegs>, - ClearDefs; + NoItinerary, DSPRegs, CPURegs>; class PRECRQ_RS_PH_W_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq_rs.ph.w", int_mips_precrq_rs_ph_w, NoItinerary, DSPRegs, - CPURegs>; + CPURegs>, + Defs<[DSPOutFlag22]>; class PRECRQU_S_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrqu_s.qb.ph", int_mips_precrqu_s_qb_ph, NoItinerary, DSPRegs, - DSPRegs>; + DSPRegs>, + Defs<[DSPOutFlag22]>; class PRECEQ_W_PHL_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceq.w.phl", int_mips_preceq_w_phl, - NoItinerary, CPURegs, DSPRegs>, - ClearDefs; + NoItinerary, CPURegs, DSPRegs>; class PRECEQ_W_PHR_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceq.w.phr", int_mips_preceq_w_phr, - NoItinerary, CPURegs, DSPRegs>, - ClearDefs; + NoItinerary, CPURegs, DSPRegs>; class PRECEQU_PH_QBL_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbl", int_mips_precequ_ph_qbl, - NoItinerary, DSPRegs>, - ClearDefs; + NoItinerary, DSPRegs>; class PRECEQU_PH_QBR_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbr", int_mips_precequ_ph_qbr, - NoItinerary, DSPRegs>, - ClearDefs; + NoItinerary, DSPRegs>; class PRECEQU_PH_QBLA_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbla", int_mips_precequ_ph_qbla, - NoItinerary, DSPRegs>, - ClearDefs; + NoItinerary, DSPRegs>; class PRECEQU_PH_QBRA_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbra", int_mips_precequ_ph_qbra, - NoItinerary, DSPRegs>, - ClearDefs; + NoItinerary, DSPRegs>; class PRECEU_PH_QBL_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbl", int_mips_preceu_ph_qbl, - NoItinerary, DSPRegs>, - ClearDefs; + NoItinerary, DSPRegs>; class PRECEU_PH_QBR_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbr", int_mips_preceu_ph_qbr, - NoItinerary, DSPRegs>, - ClearDefs; + NoItinerary, DSPRegs>; class PRECEU_PH_QBLA_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbla", int_mips_preceu_ph_qbla, - NoItinerary, DSPRegs>, - ClearDefs; + NoItinerary, DSPRegs>; class PRECEU_PH_QBRA_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbra", int_mips_preceu_ph_qbra, - NoItinerary, DSPRegs>, - ClearDefs; + NoItinerary, DSPRegs>; // Shift -class SHLL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shll.qb", int_mips_shll_qb, immZExt3, - NoItinerary, DSPRegs>; +class SHLL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shll.qb", null_frag, immZExt3, + NoItinerary, DSPRegs>, + Defs<[DSPOutFlag22]>; class SHLLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shllv.qb", int_mips_shll_qb, - NoItinerary, DSPRegs>; + NoItinerary, DSPRegs>, + Defs<[DSPOutFlag22]>; -class SHRL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shrl.qb", int_mips_shrl_qb, immZExt3, - NoItinerary, DSPRegs>, ClearDefs; +class SHRL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shrl.qb", null_frag, immZExt3, + NoItinerary, DSPRegs>; class SHRLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.qb", int_mips_shrl_qb, - NoItinerary, DSPRegs>, ClearDefs; + NoItinerary, DSPRegs>; -class SHLL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll.ph", int_mips_shll_ph, immZExt4, - NoItinerary, DSPRegs>; +class SHLL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll.ph", null_frag, immZExt4, + NoItinerary, DSPRegs>, + Defs<[DSPOutFlag22]>; class SHLLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv.ph", int_mips_shll_ph, - NoItinerary, DSPRegs>; + NoItinerary, DSPRegs>, + Defs<[DSPOutFlag22]>; class SHLL_S_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.ph", int_mips_shll_s_ph, - immZExt4, NoItinerary, DSPRegs>; + immZExt4, NoItinerary, DSPRegs>, + Defs<[DSPOutFlag22]>; class SHLLV_S_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.ph", int_mips_shll_s_ph, - NoItinerary, DSPRegs>; + NoItinerary, DSPRegs>, + Defs<[DSPOutFlag22]>; -class SHRA_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra.ph", int_mips_shra_ph, immZExt4, - NoItinerary, DSPRegs>, ClearDefs; +class SHRA_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra.ph", null_frag, immZExt4, + NoItinerary, DSPRegs>; class SHRAV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav.ph", int_mips_shra_ph, - NoItinerary, DSPRegs>, ClearDefs; + NoItinerary, DSPRegs>; class SHRA_R_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.ph", int_mips_shra_r_ph, - immZExt4, NoItinerary, DSPRegs>, - ClearDefs; + immZExt4, NoItinerary, DSPRegs>; class SHRAV_R_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.ph", int_mips_shra_r_ph, - NoItinerary, DSPRegs>, ClearDefs; + NoItinerary, DSPRegs>; class SHLL_S_W_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.w", int_mips_shll_s_w, - immZExt5, NoItinerary, CPURegs>; + immZExt5, NoItinerary, CPURegs>, + Defs<[DSPOutFlag22]>; class SHLLV_S_W_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.w", int_mips_shll_s_w, - NoItinerary, CPURegs>; + NoItinerary, CPURegs>, + Defs<[DSPOutFlag22]>; class SHRA_R_W_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.w", int_mips_shra_r_w, - immZExt5, NoItinerary, CPURegs>, - ClearDefs; + immZExt5, NoItinerary, CPURegs>; class SHRAV_R_W_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.w", int_mips_shra_r_w, NoItinerary, CPURegs>; @@ -694,36 +698,49 @@ class SHRAV_R_W_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.w", int_mips_shra_r_w, // Multiplication class MULEU_S_PH_QBL_DESC : ADDU_QB_DESC_BASE<"muleu_s.ph.qbl", int_mips_muleu_s_ph_qbl, - NoItinerary, DSPRegs, DSPRegs>; + NoItinerary, DSPRegs, DSPRegs>, + Defs<[DSPOutFlag21]>; class MULEU_S_PH_QBR_DESC : ADDU_QB_DESC_BASE<"muleu_s.ph.qbr", int_mips_muleu_s_ph_qbr, - NoItinerary, DSPRegs, DSPRegs>; + NoItinerary, DSPRegs, DSPRegs>, + Defs<[DSPOutFlag21]>; class MULEQ_S_W_PHL_DESC : ADDU_QB_DESC_BASE<"muleq_s.w.phl", int_mips_muleq_s_w_phl, NoItinerary, CPURegs, DSPRegs>, - IsCommutable; + IsCommutable, Defs<[DSPOutFlag21]>; class MULEQ_S_W_PHR_DESC : ADDU_QB_DESC_BASE<"muleq_s.w.phr", int_mips_muleq_s_w_phr, NoItinerary, CPURegs, DSPRegs>, - IsCommutable; + IsCommutable, Defs<[DSPOutFlag21]>; class MULQ_RS_PH_DESC : ADDU_QB_DESC_BASE<"mulq_rs.ph", int_mips_mulq_rs_ph, NoItinerary, DSPRegs, DSPRegs>, - IsCommutable; + IsCommutable, Defs<[DSPOutFlag21]>; class MULSAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsaq_s.w.ph", - MipsMULSAQ_S_W_PH>; + MipsMULSAQ_S_W_PH>, + Defs<[DSPOutFlag16_19]>; -class MAQ_S_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phl", MipsMAQ_S_W_PHL>; +class MAQ_S_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phl", MipsMAQ_S_W_PHL>, + Defs<[DSPOutFlag16_19]>; -class MAQ_S_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phr", MipsMAQ_S_W_PHR>; +class MAQ_S_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phr", MipsMAQ_S_W_PHR>, + Defs<[DSPOutFlag16_19]>; -class MAQ_SA_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phl", MipsMAQ_SA_W_PHL>; +class MAQ_SA_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phl", MipsMAQ_SA_W_PHL>, + Defs<[DSPOutFlag16_19]>; -class MAQ_SA_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phr", MipsMAQ_SA_W_PHR>; +class MAQ_SA_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phr", MipsMAQ_SA_W_PHR>, + Defs<[DSPOutFlag16_19]>; + +// Move from/to hi/lo. +class MFHI_DESC : MFHI_DESC_BASE<"mfhi", HIRegsDSP, NoItinerary>; +class MFLO_DESC : MFHI_DESC_BASE<"mflo", LORegsDSP, NoItinerary>; +class MTHI_DESC : MTHI_DESC_BASE<"mthi", HIRegsDSP, NoItinerary>; +class MTLO_DESC : MTHI_DESC_BASE<"mtlo", LORegsDSP, NoItinerary>; // Dot product with accumulate/subtract class DPAU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbl", MipsDPAU_H_QBL>; @@ -734,13 +751,17 @@ class DPSU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbl", MipsDPSU_H_QBL>; class DPSU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbr", MipsDPSU_H_QBR>; -class DPAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaq_s.w.ph", MipsDPAQ_S_W_PH>; +class DPAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaq_s.w.ph", MipsDPAQ_S_W_PH>, + Defs<[DSPOutFlag16_19]>; -class DPSQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsq_s.w.ph", MipsDPSQ_S_W_PH>; +class DPSQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsq_s.w.ph", MipsDPSQ_S_W_PH>, + Defs<[DSPOutFlag16_19]>; -class DPAQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpaq_sa.l.w", MipsDPAQ_SA_L_W>; +class DPAQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpaq_sa.l.w", MipsDPAQ_SA_L_W>, + Defs<[DSPOutFlag16_19]>; -class DPSQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpsq_sa.l.w", MipsDPSQ_SA_L_W>; +class DPSQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpsq_sa.l.w", MipsDPSQ_SA_L_W>, + Defs<[DSPOutFlag16_19]>; class MULT_DSP_DESC : MULT_DESC_BASE<"mult", MipsMult, NoItinerary>; class MULTU_DSP_DESC : MULT_DESC_BASE<"multu", MipsMultu, NoItinerary>; @@ -752,15 +773,16 @@ class MSUBU_DSP_DESC : MADD_DESC_BASE<"msubu", MipsMSubu, NoItinerary>; // Comparison class CMPU_EQ_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.eq.qb", int_mips_cmpu_eq_qb, NoItinerary, - DSPRegs>, IsCommutable; + DSPRegs>, + IsCommutable, Defs<[DSPCCond]>; class CMPU_LT_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.lt.qb", int_mips_cmpu_lt_qb, NoItinerary, - DSPRegs>, IsCommutable; + DSPRegs>, Defs<[DSPCCond]>; class CMPU_LE_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.le.qb", int_mips_cmpu_le_qb, NoItinerary, - DSPRegs>, IsCommutable; + DSPRegs>, Defs<[DSPCCond]>; class CMPGU_EQ_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.eq.qb", int_mips_cmpgu_eq_qb, @@ -769,222 +791,235 @@ class CMPGU_EQ_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.eq.qb", class CMPGU_LT_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.lt.qb", int_mips_cmpgu_lt_qb, - NoItinerary, CPURegs, DSPRegs>, - IsCommutable; + NoItinerary, CPURegs, DSPRegs>; class CMPGU_LE_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.le.qb", int_mips_cmpgu_le_qb, - NoItinerary, CPURegs, DSPRegs>, - IsCommutable; + NoItinerary, CPURegs, DSPRegs>; class CMP_EQ_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.eq.ph", int_mips_cmp_eq_ph, NoItinerary, DSPRegs>, - IsCommutable; + IsCommutable, Defs<[DSPCCond]>; class CMP_LT_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.lt.ph", int_mips_cmp_lt_ph, NoItinerary, DSPRegs>, - IsCommutable; + Defs<[DSPCCond]>; class CMP_LE_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.le.ph", int_mips_cmp_le_ph, NoItinerary, DSPRegs>, - IsCommutable; + Defs<[DSPCCond]>; // Misc class BITREV_DESC : ABSQ_S_PH_R2_DESC_BASE<"bitrev", int_mips_bitrev, - NoItinerary, CPURegs>, ClearDefs; + NoItinerary, CPURegs>; class PACKRL_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"packrl.ph", int_mips_packrl_ph, - NoItinerary, DSPRegs, DSPRegs>, - ClearDefs; + NoItinerary, DSPRegs, DSPRegs>; class REPL_QB_DESC : REPL_DESC_BASE<"repl.qb", int_mips_repl_qb, immZExt8, - NoItinerary, DSPRegs>, ClearDefs; + NoItinerary, DSPRegs>; class REPL_PH_DESC : REPL_DESC_BASE<"repl.ph", int_mips_repl_ph, immZExt10, - NoItinerary, DSPRegs>, ClearDefs; + NoItinerary, DSPRegs>; class REPLV_QB_DESC : ABSQ_S_PH_R2_DESC_BASE<"replv.qb", int_mips_repl_qb, - NoItinerary, DSPRegs, CPURegs>, - ClearDefs; + NoItinerary, DSPRegs, CPURegs>; class REPLV_PH_DESC : ABSQ_S_PH_R2_DESC_BASE<"replv.ph", int_mips_repl_ph, - NoItinerary, DSPRegs, CPURegs>, - ClearDefs; + NoItinerary, DSPRegs, CPURegs>; class PICK_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"pick.qb", int_mips_pick_qb, NoItinerary, DSPRegs, DSPRegs>, - ClearDefs, UseDSPCtrl; + Uses<[DSPCCond]>; class PICK_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"pick.ph", int_mips_pick_ph, NoItinerary, DSPRegs, DSPRegs>, - ClearDefs, UseDSPCtrl; + Uses<[DSPCCond]>; -class LWX_DESC : LX_DESC_BASE<"lwx", int_mips_lwx, NoItinerary>, ClearDefs; +class LWX_DESC : LX_DESC_BASE<"lwx", int_mips_lwx, NoItinerary>; -class LHX_DESC : LX_DESC_BASE<"lhx", int_mips_lhx, NoItinerary>, ClearDefs; +class LHX_DESC : LX_DESC_BASE<"lhx", int_mips_lhx, NoItinerary>; -class LBUX_DESC : LX_DESC_BASE<"lbux", int_mips_lbux, NoItinerary>, ClearDefs; +class LBUX_DESC : LX_DESC_BASE<"lbux", int_mips_lbux, NoItinerary>; class BPOSGE32_DESC : BPOSGE32_DESC_BASE<"bposge32", NoItinerary>; // Extr -class EXTP_DESC : EXTR_W_TY1_R1_DESC_BASE<"extp", MipsEXTP, NoItinerary>; +class EXTP_DESC : EXTR_W_TY1_R1_DESC_BASE<"extp", MipsEXTP, NoItinerary>, + Uses<[DSPPos]>, Defs<[DSPEFI]>; -class EXTPV_DESC : EXTR_W_TY1_R2_DESC_BASE<"extpv", MipsEXTP, NoItinerary>; +class EXTPV_DESC : EXTR_W_TY1_R2_DESC_BASE<"extpv", MipsEXTP, NoItinerary>, + Uses<[DSPPos]>, Defs<[DSPEFI]>; -class EXTPDP_DESC : EXTR_W_TY1_R1_DESC_BASE<"extpdp", MipsEXTPDP, NoItinerary>; +class EXTPDP_DESC : EXTR_W_TY1_R1_DESC_BASE<"extpdp", MipsEXTPDP, NoItinerary>, + Uses<[DSPPos]>, Defs<[DSPPos, DSPEFI]>; class EXTPDPV_DESC : EXTR_W_TY1_R2_DESC_BASE<"extpdpv", MipsEXTPDP, - NoItinerary>; + NoItinerary>, + Uses<[DSPPos]>, Defs<[DSPPos, DSPEFI]>; -class EXTR_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr.w", MipsEXTR_W, NoItinerary>; +class EXTR_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr.w", MipsEXTR_W, NoItinerary>, + Defs<[DSPOutFlag23]>; class EXTRV_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv.w", MipsEXTR_W, - NoItinerary>; + NoItinerary>, Defs<[DSPOutFlag23]>; class EXTR_R_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_r.w", MipsEXTR_R_W, - NoItinerary>; + NoItinerary>, + Defs<[DSPOutFlag23]>; class EXTRV_R_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_r.w", MipsEXTR_R_W, - NoItinerary>; + NoItinerary>, + Defs<[DSPOutFlag23]>; class EXTR_RS_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_rs.w", MipsEXTR_RS_W, - NoItinerary>; + NoItinerary>, + Defs<[DSPOutFlag23]>; class EXTRV_RS_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_rs.w", MipsEXTR_RS_W, - NoItinerary>; + NoItinerary>, + Defs<[DSPOutFlag23]>; class EXTR_S_H_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_s.h", MipsEXTR_S_H, - NoItinerary>; + NoItinerary>, + Defs<[DSPOutFlag23]>; class EXTRV_S_H_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_s.h", MipsEXTR_S_H, - NoItinerary>; + NoItinerary>, + Defs<[DSPOutFlag23]>; class SHILO_DESC : SHILO_R1_DESC_BASE<"shilo", MipsSHILO>; class SHILOV_DESC : SHILO_R2_DESC_BASE<"shilov", MipsSHILO>; -class MTHLIP_DESC : MTHLIP_DESC_BASE<"mthlip", MipsMTHLIP>; +class MTHLIP_DESC : MTHLIP_DESC_BASE<"mthlip", MipsMTHLIP>, Defs<[DSPPos]>; class RDDSP_DESC : RDDSP_DESC_BASE<"rddsp", int_mips_rddsp, NoItinerary>; class WRDSP_DESC : WRDSP_DESC_BASE<"wrdsp", int_mips_wrdsp, NoItinerary>; -class INSV_DESC : INSV_DESC_BASE<"insv", int_mips_insv, NoItinerary>; +class INSV_DESC : INSV_DESC_BASE<"insv", int_mips_insv, NoItinerary>, + Uses<[DSPPos, DSPSCount]>; //===----------------------------------------------------------------------===// // MIPS DSP Rev 2 // Addition/subtraction class ADDU_PH_DESC : ADDU_QB_DESC_BASE<"addu.ph", int_mips_addu_ph, NoItinerary, - DSPRegs, DSPRegs>, IsCommutable; + DSPRegs, DSPRegs>, IsCommutable, + Defs<[DSPOutFlag20]>; class ADDU_S_PH_DESC : ADDU_QB_DESC_BASE<"addu_s.ph", int_mips_addu_s_ph, NoItinerary, DSPRegs, DSPRegs>, - IsCommutable; + IsCommutable, Defs<[DSPOutFlag20]>; class SUBU_PH_DESC : ADDU_QB_DESC_BASE<"subu.ph", int_mips_subu_ph, NoItinerary, - DSPRegs, DSPRegs>; + DSPRegs, DSPRegs>, + Defs<[DSPOutFlag20]>; class SUBU_S_PH_DESC : ADDU_QB_DESC_BASE<"subu_s.ph", int_mips_subu_s_ph, - NoItinerary, DSPRegs, DSPRegs>; + NoItinerary, DSPRegs, DSPRegs>, + Defs<[DSPOutFlag20]>; class ADDUH_QB_DESC : ADDUH_QB_DESC_BASE<"adduh.qb", int_mips_adduh_qb, - NoItinerary, DSPRegs>, - ClearDefs, IsCommutable; + NoItinerary, DSPRegs>, IsCommutable; class ADDUH_R_QB_DESC : ADDUH_QB_DESC_BASE<"adduh_r.qb", int_mips_adduh_r_qb, - NoItinerary, DSPRegs>, - ClearDefs, IsCommutable; + NoItinerary, DSPRegs>, IsCommutable; class SUBUH_QB_DESC : ADDUH_QB_DESC_BASE<"subuh.qb", int_mips_subuh_qb, - NoItinerary, DSPRegs>, ClearDefs; + NoItinerary, DSPRegs>; class SUBUH_R_QB_DESC : ADDUH_QB_DESC_BASE<"subuh_r.qb", int_mips_subuh_r_qb, - NoItinerary, DSPRegs>, ClearDefs; + NoItinerary, DSPRegs>; class ADDQH_PH_DESC : ADDUH_QB_DESC_BASE<"addqh.ph", int_mips_addqh_ph, - NoItinerary, DSPRegs>, - ClearDefs, IsCommutable; + NoItinerary, DSPRegs>, IsCommutable; class ADDQH_R_PH_DESC : ADDUH_QB_DESC_BASE<"addqh_r.ph", int_mips_addqh_r_ph, - NoItinerary, DSPRegs>, - ClearDefs, IsCommutable; + NoItinerary, DSPRegs>, IsCommutable; class SUBQH_PH_DESC : ADDUH_QB_DESC_BASE<"subqh.ph", int_mips_subqh_ph, - NoItinerary, DSPRegs>, ClearDefs; + NoItinerary, DSPRegs>; class SUBQH_R_PH_DESC : ADDUH_QB_DESC_BASE<"subqh_r.ph", int_mips_subqh_r_ph, - NoItinerary, DSPRegs>, ClearDefs; + NoItinerary, DSPRegs>; class ADDQH_W_DESC : ADDUH_QB_DESC_BASE<"addqh.w", int_mips_addqh_w, - NoItinerary, CPURegs>, - ClearDefs, IsCommutable; + NoItinerary, CPURegs>, IsCommutable; class ADDQH_R_W_DESC : ADDUH_QB_DESC_BASE<"addqh_r.w", int_mips_addqh_r_w, - NoItinerary, CPURegs>, - ClearDefs, IsCommutable; + NoItinerary, CPURegs>, IsCommutable; class SUBQH_W_DESC : ADDUH_QB_DESC_BASE<"subqh.w", int_mips_subqh_w, - NoItinerary, CPURegs>, ClearDefs; + NoItinerary, CPURegs>; class SUBQH_R_W_DESC : ADDUH_QB_DESC_BASE<"subqh_r.w", int_mips_subqh_r_w, - NoItinerary, CPURegs>, ClearDefs; + NoItinerary, CPURegs>; // Comparison class CMPGDU_EQ_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.eq.qb", int_mips_cmpgdu_eq_qb, NoItinerary, CPURegs, DSPRegs>, - IsCommutable; + IsCommutable, Defs<[DSPCCond]>; class CMPGDU_LT_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.lt.qb", int_mips_cmpgdu_lt_qb, NoItinerary, CPURegs, DSPRegs>, - IsCommutable; + Defs<[DSPCCond]>; class CMPGDU_LE_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.le.qb", int_mips_cmpgdu_le_qb, NoItinerary, CPURegs, DSPRegs>, - IsCommutable; + Defs<[DSPCCond]>; // Absolute class ABSQ_S_QB_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.qb", int_mips_absq_s_qb, - NoItinerary, DSPRegs>; + NoItinerary, DSPRegs>, + Defs<[DSPOutFlag20]>; // Multiplication -class MUL_PH_DESC : ADDUH_QB_DESC_BASE<"mul.ph", int_mips_mul_ph, NoItinerary, - DSPRegs>, IsCommutable; +class MUL_PH_DESC : ADDUH_QB_DESC_BASE<"mul.ph", null_frag, NoItinerary, + DSPRegs>, IsCommutable, + Defs<[DSPOutFlag21]>; class MUL_S_PH_DESC : ADDUH_QB_DESC_BASE<"mul_s.ph", int_mips_mul_s_ph, - NoItinerary, DSPRegs>, IsCommutable; + NoItinerary, DSPRegs>, IsCommutable, + Defs<[DSPOutFlag21]>; class MULQ_S_W_DESC : ADDUH_QB_DESC_BASE<"mulq_s.w", int_mips_mulq_s_w, - NoItinerary, CPURegs>, IsCommutable; + NoItinerary, CPURegs>, IsCommutable, + Defs<[DSPOutFlag21]>; class MULQ_RS_W_DESC : ADDUH_QB_DESC_BASE<"mulq_rs.w", int_mips_mulq_rs_w, - NoItinerary, CPURegs>, IsCommutable; + NoItinerary, CPURegs>, IsCommutable, + Defs<[DSPOutFlag21]>; class MULQ_S_PH_DESC : ADDU_QB_DESC_BASE<"mulq_s.ph", int_mips_mulq_s_ph, NoItinerary, DSPRegs, DSPRegs>, - IsCommutable; + IsCommutable, Defs<[DSPOutFlag21]>; // Dot product with accumulate/subtract class DPA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpa.w.ph", MipsDPA_W_PH>; class DPS_W_PH_DESC : DPA_W_PH_DESC_BASE<"dps.w.ph", MipsDPS_W_PH>; -class DPAQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_s.w.ph", MipsDPAQX_S_W_PH>; +class DPAQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_s.w.ph", MipsDPAQX_S_W_PH>, + Defs<[DSPOutFlag16_19]>; class DPAQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_sa.w.ph", - MipsDPAQX_SA_W_PH>; + MipsDPAQX_SA_W_PH>, + Defs<[DSPOutFlag16_19]>; class DPAX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpax.w.ph", MipsDPAX_W_PH>; class DPSX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsx.w.ph", MipsDPSX_W_PH>; -class DPSQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_s.w.ph", MipsDPSQX_S_W_PH>; +class DPSQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_s.w.ph", MipsDPSQX_S_W_PH>, + Defs<[DSPOutFlag16_19]>; class DPSQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_sa.w.ph", - MipsDPSQX_SA_W_PH>; + MipsDPSQX_SA_W_PH>, + Defs<[DSPOutFlag16_19]>; class MULSA_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsa.w.ph", MipsMULSA_W_PH>; @@ -996,45 +1031,45 @@ class PRECR_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precr.qb.ph", class PRECR_SRA_PH_W_DESC : PRECR_SRA_PH_W_DESC_BASE<"precr_sra.ph.w", int_mips_precr_sra_ph_w, NoItinerary, DSPRegs, - CPURegs>, ClearDefs; + CPURegs>; class PRECR_SRA_R_PH_W_DESC : PRECR_SRA_PH_W_DESC_BASE<"precr_sra_r.ph.w", int_mips_precr_sra_r_ph_w, NoItinerary, DSPRegs, - CPURegs>, ClearDefs; + CPURegs>; // Shift -class SHRA_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra.qb", int_mips_shra_qb, immZExt3, - NoItinerary, DSPRegs>, ClearDefs; +class SHRA_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra.qb", null_frag, immZExt3, + NoItinerary, DSPRegs>; class SHRAV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav.qb", int_mips_shra_qb, - NoItinerary, DSPRegs>, ClearDefs; + NoItinerary, DSPRegs>; class SHRA_R_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.qb", int_mips_shra_r_qb, - immZExt3, NoItinerary, DSPRegs>, - ClearDefs; + immZExt3, NoItinerary, DSPRegs>; class SHRAV_R_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.qb", int_mips_shra_r_qb, - NoItinerary, DSPRegs>, ClearDefs; + NoItinerary, DSPRegs>; -class SHRL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shrl.ph", int_mips_shrl_ph, immZExt4, - NoItinerary, DSPRegs>, ClearDefs; +class SHRL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shrl.ph", null_frag, immZExt4, + NoItinerary, DSPRegs>; class SHRLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.ph", int_mips_shrl_ph, - NoItinerary, DSPRegs>, ClearDefs; + NoItinerary, DSPRegs>; // Misc class APPEND_DESC : APPEND_DESC_BASE<"append", int_mips_append, immZExt5, - NoItinerary>, ClearDefs; + NoItinerary>; class BALIGN_DESC : APPEND_DESC_BASE<"balign", int_mips_balign, immZExt2, - NoItinerary>, ClearDefs; + NoItinerary>; class PREPEND_DESC : APPEND_DESC_BASE<"prepend", int_mips_prepend, immZExt5, - NoItinerary>, ClearDefs; + NoItinerary>; // Pseudos. -def BPOSGE32_PSEUDO : BPOSGE32_PSEUDO_DESC_BASE<int_mips_bposge32, NoItinerary>; +def BPOSGE32_PSEUDO : BPOSGE32_PSEUDO_DESC_BASE<int_mips_bposge32, + NoItinerary>, Uses<[DSPPos]>; // Instruction defs. // MIPS DSP Rev 1 @@ -1094,6 +1129,10 @@ def MAQ_S_W_PHL : MAQ_S_W_PHL_ENC, MAQ_S_W_PHL_DESC; def MAQ_S_W_PHR : MAQ_S_W_PHR_ENC, MAQ_S_W_PHR_DESC; def MAQ_SA_W_PHL : MAQ_SA_W_PHL_ENC, MAQ_SA_W_PHL_DESC; def MAQ_SA_W_PHR : MAQ_SA_W_PHR_ENC, MAQ_SA_W_PHR_DESC; +def MFHI_DSP : MFHI_ENC, MFHI_DESC; +def MFLO_DSP : MFLO_ENC, MFLO_DESC; +def MTHI_DSP : MTHI_ENC, MTHI_DESC; +def MTLO_DSP : MTLO_ENC, MTLO_DESC; def DPAU_H_QBL : DPAU_H_QBL_ENC, DPAU_H_QBL_DESC; def DPAU_H_QBR : DPAU_H_QBR_ENC, DPAU_H_QBR_DESC; def DPSU_H_QBL : DPSU_H_QBL_ENC, DPSU_H_QBL_DESC; @@ -1201,13 +1240,35 @@ def PREPEND : PREPEND_ENC, PREPEND_DESC; } // Pseudos. -/// Pseudo instructions for loading, storing and copying accumulator registers. let isPseudo = 1 in { + // Pseudo instructions for loading and storing accumulator registers. defm LOAD_AC_DSP : LoadM<"load_ac_dsp", ACRegsDSP>; defm STORE_AC_DSP : StoreM<"store_ac_dsp", ACRegsDSP>; + + // Pseudos for loading and storing ccond field of DSP control register. + defm LOAD_CCOND_DSP : LoadM<"load_ccond_dsp", DSPCC>; + defm STORE_CCOND_DSP : StoreM<"store_ccond_dsp", DSPCC>; } -def COPY_AC_DSP : PseudoSE<(outs ACRegsDSP:$dst), (ins ACRegsDSP:$src), []>; +// Pseudo CMP and PICK instructions. +class PseudoCMP<Instruction RealInst> : + PseudoDSP<(outs DSPCC:$cmp), (ins DSPRegs:$rs, DSPRegs:$rt), []>, + PseudoInstExpansion<(RealInst DSPRegs:$rs, DSPRegs:$rt)>, NeverHasSideEffects; + +class PseudoPICK<Instruction RealInst> : + PseudoDSP<(outs DSPRegs:$rd), (ins DSPCC:$cmp, DSPRegs:$rs, DSPRegs:$rt), []>, + PseudoInstExpansion<(RealInst DSPRegs:$rd, DSPRegs:$rs, DSPRegs:$rt)>, + NeverHasSideEffects; + +def PseudoCMP_EQ_PH : PseudoCMP<CMP_EQ_PH>; +def PseudoCMP_LT_PH : PseudoCMP<CMP_LT_PH>; +def PseudoCMP_LE_PH : PseudoCMP<CMP_LE_PH>; +def PseudoCMPU_EQ_QB : PseudoCMP<CMPU_EQ_QB>; +def PseudoCMPU_LT_QB : PseudoCMP<CMPU_LT_QB>; +def PseudoCMPU_LE_QB : PseudoCMP<CMPU_LE_QB>; + +def PseudoPICK_PH : PseudoPICK<PICK_PH>; +def PseudoPICK_QB : PseudoPICK<PICK_QB>; // Patterns. class DSPPat<dag pattern, dag result, Predicate pred = HasDSP> : @@ -1232,6 +1293,95 @@ def : DSPPat<(store (v2i16 DSPRegs:$val), addr:$a), def : DSPPat<(store (v4i8 DSPRegs:$val), addr:$a), (SW (COPY_TO_REGCLASS DSPRegs:$val, CPURegs), addr:$a)>; +// Binary operations. +class DSPBinPat<Instruction Inst, ValueType ValTy, SDPatternOperator Node, + Predicate Pred = HasDSP> : + DSPPat<(Node ValTy:$a, ValTy:$b), (Inst ValTy:$a, ValTy:$b), Pred>; + +def : DSPBinPat<ADDQ_PH, v2i16, int_mips_addq_ph>; +def : DSPBinPat<ADDQ_PH, v2i16, add>; +def : DSPBinPat<SUBQ_PH, v2i16, int_mips_subq_ph>; +def : DSPBinPat<SUBQ_PH, v2i16, sub>; +def : DSPBinPat<MUL_PH, v2i16, int_mips_mul_ph, HasDSPR2>; +def : DSPBinPat<MUL_PH, v2i16, mul, HasDSPR2>; +def : DSPBinPat<ADDU_QB, v4i8, int_mips_addu_qb>; +def : DSPBinPat<ADDU_QB, v4i8, add>; +def : DSPBinPat<SUBU_QB, v4i8, int_mips_subu_qb>; +def : DSPBinPat<SUBU_QB, v4i8, sub>; +def : DSPBinPat<ADDSC, i32, int_mips_addsc>; +def : DSPBinPat<ADDSC, i32, addc>; +def : DSPBinPat<ADDWC, i32, int_mips_addwc>; +def : DSPBinPat<ADDWC, i32, adde>; + +// Shift immediate patterns. +class DSPShiftPat<Instruction Inst, ValueType ValTy, SDPatternOperator Node, + SDPatternOperator Imm, Predicate Pred = HasDSP> : + DSPPat<(Node ValTy:$a, Imm:$shamt), (Inst ValTy:$a, Imm:$shamt), Pred>; + +def : DSPShiftPat<SHLL_PH, v2i16, MipsSHLL_DSP, imm>; +def : DSPShiftPat<SHRA_PH, v2i16, MipsSHRA_DSP, imm>; +def : DSPShiftPat<SHRL_PH, v2i16, MipsSHRL_DSP, imm, HasDSPR2>; +def : DSPShiftPat<SHLL_PH, v2i16, int_mips_shll_ph, immZExt4>; +def : DSPShiftPat<SHRA_PH, v2i16, int_mips_shra_ph, immZExt4>; +def : DSPShiftPat<SHRL_PH, v2i16, int_mips_shrl_ph, immZExt4, HasDSPR2>; +def : DSPShiftPat<SHLL_QB, v4i8, MipsSHLL_DSP, imm>; +def : DSPShiftPat<SHRA_QB, v4i8, MipsSHRA_DSP, imm, HasDSPR2>; +def : DSPShiftPat<SHRL_QB, v4i8, MipsSHRL_DSP, imm>; +def : DSPShiftPat<SHLL_QB, v4i8, int_mips_shll_qb, immZExt3>; +def : DSPShiftPat<SHRA_QB, v4i8, int_mips_shra_qb, immZExt3, HasDSPR2>; +def : DSPShiftPat<SHRL_QB, v4i8, int_mips_shrl_qb, immZExt3>; + +// SETCC/SELECT_CC patterns. +class DSPSetCCPat<Instruction Cmp, Instruction Pick, ValueType ValTy, + CondCode CC> : + DSPPat<(ValTy (MipsSETCC_DSP ValTy:$a, ValTy:$b, CC)), + (ValTy (Pick (ValTy (Cmp ValTy:$a, ValTy:$b)), + (ValTy (COPY_TO_REGCLASS (ADDiu ZERO, -1), DSPRegs)), + (ValTy ZERO)))>; + +class DSPSetCCPatInv<Instruction Cmp, Instruction Pick, ValueType ValTy, + CondCode CC> : + DSPPat<(ValTy (MipsSETCC_DSP ValTy:$a, ValTy:$b, CC)), + (ValTy (Pick (ValTy (Cmp ValTy:$a, ValTy:$b)), + (ValTy ZERO), + (ValTy (COPY_TO_REGCLASS (ADDiu ZERO, -1), DSPRegs))))>; + +class DSPSelectCCPat<Instruction Cmp, Instruction Pick, ValueType ValTy, + CondCode CC> : + DSPPat<(ValTy (MipsSELECT_CC_DSP ValTy:$a, ValTy:$b, ValTy:$c, ValTy:$d, CC)), + (ValTy (Pick (ValTy (Cmp ValTy:$a, ValTy:$b)), $c, $d))>; + +class DSPSelectCCPatInv<Instruction Cmp, Instruction Pick, ValueType ValTy, + CondCode CC> : + DSPPat<(ValTy (MipsSELECT_CC_DSP ValTy:$a, ValTy:$b, ValTy:$c, ValTy:$d, CC)), + (ValTy (Pick (ValTy (Cmp ValTy:$a, ValTy:$b)), $d, $c))>; + +def : DSPSetCCPat<PseudoCMP_EQ_PH, PseudoPICK_PH, v2i16, SETEQ>; +def : DSPSetCCPat<PseudoCMP_LT_PH, PseudoPICK_PH, v2i16, SETLT>; +def : DSPSetCCPat<PseudoCMP_LE_PH, PseudoPICK_PH, v2i16, SETLE>; +def : DSPSetCCPatInv<PseudoCMP_EQ_PH, PseudoPICK_PH, v2i16, SETNE>; +def : DSPSetCCPatInv<PseudoCMP_LT_PH, PseudoPICK_PH, v2i16, SETGE>; +def : DSPSetCCPatInv<PseudoCMP_LE_PH, PseudoPICK_PH, v2i16, SETGT>; +def : DSPSetCCPat<PseudoCMPU_EQ_QB, PseudoPICK_QB, v4i8, SETEQ>; +def : DSPSetCCPat<PseudoCMPU_LT_QB, PseudoPICK_QB, v4i8, SETULT>; +def : DSPSetCCPat<PseudoCMPU_LE_QB, PseudoPICK_QB, v4i8, SETULE>; +def : DSPSetCCPatInv<PseudoCMPU_EQ_QB, PseudoPICK_QB, v4i8, SETNE>; +def : DSPSetCCPatInv<PseudoCMPU_LT_QB, PseudoPICK_QB, v4i8, SETUGE>; +def : DSPSetCCPatInv<PseudoCMPU_LE_QB, PseudoPICK_QB, v4i8, SETUGT>; + +def : DSPSelectCCPat<PseudoCMP_EQ_PH, PseudoPICK_PH, v2i16, SETEQ>; +def : DSPSelectCCPat<PseudoCMP_LT_PH, PseudoPICK_PH, v2i16, SETLT>; +def : DSPSelectCCPat<PseudoCMP_LE_PH, PseudoPICK_PH, v2i16, SETLE>; +def : DSPSelectCCPatInv<PseudoCMP_EQ_PH, PseudoPICK_PH, v2i16, SETNE>; +def : DSPSelectCCPatInv<PseudoCMP_LT_PH, PseudoPICK_PH, v2i16, SETGE>; +def : DSPSelectCCPatInv<PseudoCMP_LE_PH, PseudoPICK_PH, v2i16, SETGT>; +def : DSPSelectCCPat<PseudoCMPU_EQ_QB, PseudoPICK_QB, v4i8, SETEQ>; +def : DSPSelectCCPat<PseudoCMPU_LT_QB, PseudoPICK_QB, v4i8, SETULT>; +def : DSPSelectCCPat<PseudoCMPU_LE_QB, PseudoPICK_QB, v4i8, SETULE>; +def : DSPSelectCCPatInv<PseudoCMPU_EQ_QB, PseudoPICK_QB, v4i8, SETNE>; +def : DSPSelectCCPatInv<PseudoCMPU_LT_QB, PseudoPICK_QB, v4i8, SETUGE>; +def : DSPSelectCCPatInv<PseudoCMPU_LE_QB, PseudoPICK_QB, v4i8, SETUGT>; + // Extr patterns. class EXTR_W_TY1_R2_Pat<SDPatternOperator OpNode, Instruction Instr> : DSPPat<(i32 (OpNode CPURegs:$rs, ACRegsDSP:$ac)), diff --git a/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp index 77b08cb..968e536 100644 --- a/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -17,7 +17,6 @@ #include "MipsSEISelDAGToDAG.h" #include "Mips.h" #include "MCTargetDesc/MipsBaseInfo.h" -#include "MipsAnalyzeImmediate.h" #include "MipsMachineFunction.h" #include "MipsRegisterInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp index e2219f2..4d76181 100644 --- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -30,7 +30,6 @@ #include "llvm/IR/CallingConv.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -198,6 +197,11 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::MADDU_DSP: return "MipsISD::MADDU_DSP"; case MipsISD::MSUB_DSP: return "MipsISD::MSUB_DSP"; case MipsISD::MSUBU_DSP: return "MipsISD::MSUBU_DSP"; + case MipsISD::SHLL_DSP: return "MipsISD::SHLL_DSP"; + case MipsISD::SHRA_DSP: return "MipsISD::SHRA_DSP"; + case MipsISD::SHRL_DSP: return "MipsISD::SHRL_DSP"; + case MipsISD::SETCC_DSP: return "MipsISD::SETCC_DSP"; + case MipsISD::SELECT_CC_DSP: return "MipsISD::SELECT_CC_DSP"; default: return NULL; } } @@ -211,7 +215,7 @@ MipsTargetLowering(MipsTargetMachine &TM) // Mips does not have i1 type, so use i32 for // setcc operations results (slt, sgt, ...). setBooleanContents(ZeroOrOneBooleanContent); - setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? + setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // Load extented operations for i1 types must be promoted setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); @@ -346,9 +350,6 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::VACOPY, MVT::Other, Expand); setOperationAction(ISD::VAEND, MVT::Other, Expand); - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); - // Use the default for now setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); @@ -449,7 +450,7 @@ static SDValue performDivRemCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } -static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) { +static Mips::CondCode condCodeToFCC(ISD::CondCode CC) { switch (CC) { default: llvm_unreachable("Unknown fp condition code!"); case ISD::SETEQ: @@ -508,7 +509,7 @@ static SDValue createFPCmp(SelectionDAG &DAG, const SDValue &Op) { ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); return DAG.getNode(MipsISD::FPCmp, DL, MVT::Glue, LHS, RHS, - DAG.getConstant(FPCondCCodeToFCC(CC), MVT::i32)); + DAG.getConstant(condCodeToFCC(CC), MVT::i32)); } // Creates and returns a CMovFPT/F node. @@ -712,10 +713,7 @@ void MipsTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { - SDValue Res = LowerOperation(SDValue(N, 0), DAG); - - for (unsigned I = 0, E = Res->getNumValues(); I != E; ++I) - Results.push_back(Res.getValue(I)); + return LowerOperationWrapper(N, Results, DAG); } SDValue MipsTargetLowering:: @@ -739,15 +737,12 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const case ISD::FRAMEADDR: return lowerFRAMEADDR(Op, DAG); case ISD::RETURNADDR: return lowerRETURNADDR(Op, DAG); case ISD::EH_RETURN: return lowerEH_RETURN(Op, DAG); - case ISD::MEMBARRIER: return lowerMEMBARRIER(Op, DAG); case ISD::ATOMIC_FENCE: return lowerATOMIC_FENCE(Op, DAG); case ISD::SHL_PARTS: return lowerShiftLeftParts(Op, DAG); case ISD::SRA_PARTS: return lowerShiftRightParts(Op, DAG, true); case ISD::SRL_PARTS: return lowerShiftRightParts(Op, DAG, false); case ISD::LOAD: return lowerLOAD(Op, DAG); case ISD::STORE: return lowerSTORE(Op, DAG); - case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG); - case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG); case ISD::ADD: return lowerADD(Op, DAG); } return SDValue(); @@ -1827,15 +1822,6 @@ SDValue MipsTargetLowering::lowerEH_RETURN(SDValue Op, SelectionDAG &DAG) Chain.getValue(1)); } -// TODO: set SType according to the desired memory barrier behavior. -SDValue -MipsTargetLowering::lowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const { - unsigned SType = 0; - DebugLoc DL = Op.getDebugLoc(); - return DAG.getNode(MipsISD::Sync, DL, MVT::Other, Op.getOperand(0), - DAG.getConstant(SType, MVT::i32)); -} - SDValue MipsTargetLowering::lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const { // FIXME: Need pseudo-fence for 'singlethread' fences @@ -1918,7 +1904,7 @@ SDValue MipsTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, return DAG.getMergeValues(Ops, 2, DL); } -static SDValue CreateLoadLR(unsigned Opc, SelectionDAG &DAG, LoadSDNode *LD, +static SDValue createLoadLR(unsigned Opc, SelectionDAG &DAG, LoadSDNode *LD, SDValue Chain, SDValue Src, unsigned Offset) { SDValue Ptr = LD->getBasePtr(); EVT VT = LD->getValueType(0), MemVT = LD->getMemoryVT(); @@ -1958,15 +1944,15 @@ SDValue MipsTargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { // (set tmp, (ldl (add baseptr, 7), undef)) // (set dst, (ldr baseptr, tmp)) if ((VT == MVT::i64) && (ExtType == ISD::NON_EXTLOAD)) { - SDValue LDL = CreateLoadLR(MipsISD::LDL, DAG, LD, Chain, Undef, + SDValue LDL = createLoadLR(MipsISD::LDL, DAG, LD, Chain, Undef, IsLittle ? 7 : 0); - return CreateLoadLR(MipsISD::LDR, DAG, LD, LDL.getValue(1), LDL, + return createLoadLR(MipsISD::LDR, DAG, LD, LDL.getValue(1), LDL, IsLittle ? 0 : 7); } - SDValue LWL = CreateLoadLR(MipsISD::LWL, DAG, LD, Chain, Undef, + SDValue LWL = createLoadLR(MipsISD::LWL, DAG, LD, Chain, Undef, IsLittle ? 3 : 0); - SDValue LWR = CreateLoadLR(MipsISD::LWR, DAG, LD, LWL.getValue(1), LWL, + SDValue LWR = createLoadLR(MipsISD::LWR, DAG, LD, LWL.getValue(1), LWL, IsLittle ? 0 : 3); // Expand @@ -1997,7 +1983,7 @@ SDValue MipsTargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { return DAG.getMergeValues(Ops, 2, DL); } -static SDValue CreateStoreLR(unsigned Opc, SelectionDAG &DAG, StoreSDNode *SD, +static SDValue createStoreLR(unsigned Opc, SelectionDAG &DAG, StoreSDNode *SD, SDValue Chain, unsigned Offset) { SDValue Ptr = SD->getBasePtr(), Value = SD->getValue(); EVT MemVT = SD->getMemoryVT(), BasePtrVT = Ptr.getValueType(); @@ -2034,9 +2020,9 @@ SDValue MipsTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { // (swl val, (add baseptr, 3)) // (swr val, baseptr) if ((VT == MVT::i32) || SD->isTruncatingStore()) { - SDValue SWL = CreateStoreLR(MipsISD::SWL, DAG, SD, Chain, + SDValue SWL = createStoreLR(MipsISD::SWL, DAG, SD, Chain, IsLittle ? 3 : 0); - return CreateStoreLR(MipsISD::SWR, DAG, SD, SWL, IsLittle ? 0 : 3); + return createStoreLR(MipsISD::SWR, DAG, SD, SWL, IsLittle ? 0 : 3); } assert(VT == MVT::i64); @@ -2046,172 +2032,8 @@ SDValue MipsTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { // to // (sdl val, (add baseptr, 7)) // (sdr val, baseptr) - SDValue SDL = CreateStoreLR(MipsISD::SDL, DAG, SD, Chain, IsLittle ? 7 : 0); - return CreateStoreLR(MipsISD::SDR, DAG, SD, SDL, IsLittle ? 0 : 7); -} - -static SDValue initAccumulator(SDValue In, DebugLoc DL, SelectionDAG &DAG) { - SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In, - DAG.getConstant(0, MVT::i32)); - SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In, - DAG.getConstant(1, MVT::i32)); - return DAG.getNode(MipsISD::InsertLOHI, DL, MVT::Untyped, InLo, InHi); -} - -static SDValue extractLOHI(SDValue Op, DebugLoc DL, SelectionDAG &DAG) { - SDValue Lo = DAG.getNode(MipsISD::ExtractLOHI, DL, MVT::i32, Op, - DAG.getConstant(Mips::sub_lo, MVT::i32)); - SDValue Hi = DAG.getNode(MipsISD::ExtractLOHI, DL, MVT::i32, Op, - DAG.getConstant(Mips::sub_hi, MVT::i32)); - return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi); -} - -// This function expands mips intrinsic nodes which have 64-bit input operands -// or output values. -// -// out64 = intrinsic-node in64 -// => -// lo = copy (extract-element (in64, 0)) -// hi = copy (extract-element (in64, 1)) -// mips-specific-node -// v0 = copy lo -// v1 = copy hi -// out64 = merge-values (v0, v1) -// -static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { - DebugLoc DL = Op.getDebugLoc(); - bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other; - SmallVector<SDValue, 3> Ops; - unsigned OpNo = 0; - - // See if Op has a chain input. - if (HasChainIn) - Ops.push_back(Op->getOperand(OpNo++)); - - // The next operand is the intrinsic opcode. - assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant); - - // See if the next operand has type i64. - SDValue Opnd = Op->getOperand(++OpNo), In64; - - if (Opnd.getValueType() == MVT::i64) - In64 = initAccumulator(Opnd, DL, DAG); - else - Ops.push_back(Opnd); - - // Push the remaining operands. - for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo) - Ops.push_back(Op->getOperand(OpNo)); - - // Add In64 to the end of the list. - if (In64.getNode()) - Ops.push_back(In64); - - // Scan output. - SmallVector<EVT, 2> ResTys; - - for (SDNode::value_iterator I = Op->value_begin(), E = Op->value_end(); - I != E; ++I) - ResTys.push_back((*I == MVT::i64) ? MVT::Untyped : *I); - - // Create node. - SDValue Val = DAG.getNode(Opc, DL, ResTys, &Ops[0], Ops.size()); - SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val; - - if (!HasChainIn) - return Out; - - assert(Val->getValueType(1) == MVT::Other); - SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) }; - return DAG.getMergeValues(Vals, 2, DL); -} - -SDValue MipsTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, - SelectionDAG &DAG) const { - switch (cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue()) { - default: - return SDValue(); - case Intrinsic::mips_shilo: - return lowerDSPIntr(Op, DAG, MipsISD::SHILO); - case Intrinsic::mips_dpau_h_qbl: - return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL); - case Intrinsic::mips_dpau_h_qbr: - return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR); - case Intrinsic::mips_dpsu_h_qbl: - return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL); - case Intrinsic::mips_dpsu_h_qbr: - return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR); - case Intrinsic::mips_dpa_w_ph: - return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH); - case Intrinsic::mips_dps_w_ph: - return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH); - case Intrinsic::mips_dpax_w_ph: - return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH); - case Intrinsic::mips_dpsx_w_ph: - return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH); - case Intrinsic::mips_mulsa_w_ph: - return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH); - case Intrinsic::mips_mult: - return lowerDSPIntr(Op, DAG, MipsISD::Mult); - case Intrinsic::mips_multu: - return lowerDSPIntr(Op, DAG, MipsISD::Multu); - case Intrinsic::mips_madd: - return lowerDSPIntr(Op, DAG, MipsISD::MAdd); - case Intrinsic::mips_maddu: - return lowerDSPIntr(Op, DAG, MipsISD::MAddu); - case Intrinsic::mips_msub: - return lowerDSPIntr(Op, DAG, MipsISD::MSub); - case Intrinsic::mips_msubu: - return lowerDSPIntr(Op, DAG, MipsISD::MSubu); - } -} - -SDValue MipsTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, - SelectionDAG &DAG) const { - switch (cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue()) { - default: - return SDValue(); - case Intrinsic::mips_extp: - return lowerDSPIntr(Op, DAG, MipsISD::EXTP); - case Intrinsic::mips_extpdp: - return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP); - case Intrinsic::mips_extr_w: - return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W); - case Intrinsic::mips_extr_r_w: - return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W); - case Intrinsic::mips_extr_rs_w: - return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W); - case Intrinsic::mips_extr_s_h: - return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H); - case Intrinsic::mips_mthlip: - return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP); - case Intrinsic::mips_mulsaq_s_w_ph: - return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH); - case Intrinsic::mips_maq_s_w_phl: - return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL); - case Intrinsic::mips_maq_s_w_phr: - return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR); - case Intrinsic::mips_maq_sa_w_phl: - return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL); - case Intrinsic::mips_maq_sa_w_phr: - return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR); - case Intrinsic::mips_dpaq_s_w_ph: - return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH); - case Intrinsic::mips_dpsq_s_w_ph: - return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH); - case Intrinsic::mips_dpaq_sa_l_w: - return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W); - case Intrinsic::mips_dpsq_sa_l_w: - return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W); - case Intrinsic::mips_dpaqx_s_w_ph: - return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH); - case Intrinsic::mips_dpaqx_sa_w_ph: - return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH); - case Intrinsic::mips_dpsqx_s_w_ph: - return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH); - case Intrinsic::mips_dpsqx_sa_w_ph: - return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH); - } + SDValue SDL = createStoreLR(MipsISD::SDL, DAG, SD, Chain, IsLittle ? 7 : 0); + return createStoreLR(MipsISD::SDR, DAG, SD, SDL, IsLittle ? 0 : 7); } SDValue MipsTargetLowering::lowerADD(SDValue Op, SelectionDAG &DAG) const { @@ -3009,8 +2831,8 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const return std::make_pair((unsigned)Mips::T9_64, &Mips::CPU64RegsRegClass); case 'l': // register suitable for indirect jump if (VT == MVT::i32) - return std::make_pair((unsigned)Mips::LO, &Mips::HILORegClass); - return std::make_pair((unsigned)Mips::LO64, &Mips::HILO64RegClass); + return std::make_pair((unsigned)Mips::LO, &Mips::LORegsRegClass); + return std::make_pair((unsigned)Mips::LO64, &Mips::LORegs64RegClass); case 'x': // register suitable for indirect jump // Fixme: Not triggering the use of both hi and low // This will generate an error message diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h index cab71a6..5587e8f 100644 --- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h +++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h @@ -143,6 +143,15 @@ namespace llvm { MSUB_DSP, MSUBU_DSP, + // DSP shift nodes. + SHLL_DSP, + SHRA_DSP, + SHRL_DSP, + + // DSP setcc and select_cc nodes. + SETCC_DSP, + SELECT_CC_DSP, + // Load/Store Left/Right nodes. LWL = ISD::FIRST_TARGET_MEMORY_OPCODE, LWR, @@ -338,15 +347,12 @@ namespace llvm { SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const; SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const; SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG& DAG) const; SDValue lowerShiftRightParts(SDValue Op, SelectionDAG& DAG, bool IsSRA) const; SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue lowerADD(SDValue Op, SelectionDAG &DAG) const; /// isEligibleForTailCallOptimization - Check whether the call is eligible diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td b/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td index ee432c8..ea07372 100644 --- a/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td +++ b/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td @@ -36,6 +36,24 @@ def FrmFR : Format<4>; def FrmFI : Format<5>; def FrmOther : Format<6>; // Instruction w/ a custom format +class MMRel; + +def Std2MicroMips : InstrMapping { + let FilterClass = "MMRel"; + // Instructions with the same BaseOpcode and isNVStore values form a row. + let RowFields = ["BaseOpcode"]; + // Instructions with the same predicate sense form a column. + let ColFields = ["Arch"]; + // The key column is the unpredicated instructions. + let KeyCol = ["se"]; + // Value columns are PredSense=true and PredSense=false + let ValueCols = [["se"], ["micromips"]]; +} + +class StdArch { + string Arch = "se"; +} + // Generic Mips Format class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern, InstrItinClass itin, Format f>: Instruction @@ -74,9 +92,11 @@ class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern, // Mips32/64 Instruction Format class InstSE<dag outs, dag ins, string asmstr, list<dag> pattern, - InstrItinClass itin, Format f>: + InstrItinClass itin, Format f, string opstr = ""> : MipsInst<outs, ins, asmstr, pattern, itin, f> { let Predicates = [HasStdEnc]; + string BaseOpcode = opstr; + string Arch; } // Mips Pseudo Instructions Format @@ -192,7 +212,7 @@ class MFC3OP_FM<bits<6> op, bits<5> mfmt> let Inst{2-0} = sel; } -class ADD_FM<bits<6> op, bits<6> funct> { +class ADD_FM<bits<6> op, bits<6> funct> : StdArch { bits<5> rd; bits<5> rs; bits<5> rt; @@ -207,7 +227,7 @@ class ADD_FM<bits<6> op, bits<6> funct> { let Inst{5-0} = funct; } -class ADDI_FM<bits<6> op> { +class ADDI_FM<bits<6> op> : StdArch { bits<5> rs; bits<5> rt; bits<16> imm16; @@ -220,7 +240,7 @@ class ADDI_FM<bits<6> op> { let Inst{15-0} = imm16; } -class SRA_FM<bits<6> funct, bit rotate> { +class SRA_FM<bits<6> funct, bit rotate> : StdArch { bits<5> rd; bits<5> rt; bits<5> shamt; @@ -236,7 +256,7 @@ class SRA_FM<bits<6> funct, bit rotate> { let Inst{5-0} = funct; } -class SRLV_FM<bits<6> funct, bit rotate> { +class SRLV_FM<bits<6> funct, bit rotate> : StdArch { bits<5> rd; bits<5> rt; bits<5> rs; @@ -288,7 +308,7 @@ class B_FM { let Inst{15-0} = offset; } -class SLTI_FM<bits<6> op> { +class SLTI_FM<bits<6> op> : StdArch { bits<5> rt; bits<5> rs; bits<16> imm16; @@ -413,7 +433,7 @@ class SYNC_FM { let Inst{5-0} = 0xf; } -class MULT_FM<bits<6> op, bits<6> funct> { +class MULT_FM<bits<6> op, bits<6> funct> : StdArch { bits<5> rs; bits<5> rt; @@ -529,7 +549,7 @@ class MFC1_FM<bits<5> funct> { let Inst{10-0} = 0; } -class LW_FM<bits<6> op> { +class LW_FM<bits<6> op> : StdArch { bits<5> rt; bits<21> addr; diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td index 3a82e81..86ec729 100644 --- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td +++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td @@ -179,6 +179,7 @@ def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">, AssemblerPredicate<"FeatureMips32">; def HasStdEnc : Predicate<"Subtarget.hasStandardEncoding()">, AssemblerPredicate<"!FeatureMips16">; +def NotDSP : Predicate<"!Subtarget.hasDSP()">; class MipsPat<dag pattern, dag result> : Pat<pattern, result> { let Predicates = [HasStdEnc]; @@ -374,11 +375,9 @@ class ArithLogicR<string opstr, RegisterOperand RO, bit isComm = 0, SDPatternOperator OpNode = null_frag>: InstSE<(outs RO:$rd), (ins RO:$rs, RO:$rt), !strconcat(opstr, "\t$rd, $rs, $rt"), - [(set RO:$rd, (OpNode RO:$rs, RO:$rt))], Itin, FrmR> { + [(set RO:$rd, (OpNode RO:$rs, RO:$rt))], Itin, FrmR, opstr> { let isCommutable = isComm; let isReMaterializable = 1; - string BaseOpcode; - string Arch; } // Arithmetic and logical instructions with 2 register operands. @@ -387,7 +386,8 @@ class ArithLogicI<string opstr, Operand Od, RegisterOperand RO, SDPatternOperator OpNode = null_frag> : InstSE<(outs RO:$rt), (ins RO:$rs, Od:$imm16), !strconcat(opstr, "\t$rt, $rs, $imm16"), - [(set RO:$rt, (OpNode RO:$rs, imm_type:$imm16))], IIAlu, FrmI> { + [(set RO:$rt, (OpNode RO:$rs, imm_type:$imm16))], + IIAlu, FrmI, opstr> { let isReMaterializable = 1; } @@ -404,7 +404,7 @@ class MArithR<string opstr, bit isComm = 0> : class LogicNOR<string opstr, RegisterOperand RC>: InstSE<(outs RC:$rd), (ins RC:$rs, RC:$rt), !strconcat(opstr, "\t$rd, $rs, $rt"), - [(set RC:$rd, (not (or RC:$rs, RC:$rt)))], IIAlu, FrmR> { + [(set RC:$rd, (not (or RC:$rs, RC:$rt)))], IIAlu, FrmR, opstr> { let isCommutable = 1; } @@ -414,13 +414,13 @@ class shift_rotate_imm<string opstr, Operand ImmOpnd, SDPatternOperator PF = null_frag> : InstSE<(outs RC:$rd), (ins RC:$rt, ImmOpnd:$shamt), !strconcat(opstr, "\t$rd, $rt, $shamt"), - [(set RC:$rd, (OpNode RC:$rt, PF:$shamt))], IIAlu, FrmR>; + [(set RC:$rd, (OpNode RC:$rt, PF:$shamt))], IIAlu, FrmR, opstr>; class shift_rotate_reg<string opstr, RegisterOperand RC, SDPatternOperator OpNode = null_frag>: InstSE<(outs RC:$rd), (ins CPURegsOpnd:$rs, RC:$rt), !strconcat(opstr, "\t$rd, $rt, $rs"), - [(set RC:$rd, (OpNode RC:$rt, CPURegsOpnd:$rs))], IIAlu, FrmR>; + [(set RC:$rd, (OpNode RC:$rt, CPURegsOpnd:$rs))], IIAlu, FrmR, opstr>; // Load Upper Imediate class LoadUpper<string opstr, RegisterClass RC, Operand Imm>: @@ -440,18 +440,20 @@ class FMem<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern, // Memory Load/Store class Load<string opstr, SDPatternOperator OpNode, RegisterClass RC, - Operand MemOpnd, ComplexPattern Addr> : + Operand MemOpnd, ComplexPattern Addr, string ofsuffix> : InstSE<(outs RC:$rt), (ins MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"), - [(set RC:$rt, (OpNode Addr:$addr))], NoItinerary, FrmI> { + [(set RC:$rt, (OpNode Addr:$addr))], NoItinerary, FrmI, + !strconcat(opstr, ofsuffix)> { let DecoderMethod = "DecodeMem"; let canFoldAsLoad = 1; let mayLoad = 1; } class Store<string opstr, SDPatternOperator OpNode, RegisterClass RC, - Operand MemOpnd, ComplexPattern Addr> : + Operand MemOpnd, ComplexPattern Addr, string ofsuffix> : InstSE<(outs), (ins RC:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"), - [(OpNode RC:$rt, Addr:$addr)], NoItinerary, FrmI> { + [(OpNode RC:$rt, Addr:$addr)], NoItinerary, FrmI, + !strconcat(opstr, ofsuffix)> { let DecoderMethod = "DecodeMem"; let mayStore = 1; } @@ -459,8 +461,9 @@ class Store<string opstr, SDPatternOperator OpNode, RegisterClass RC, multiclass LoadM<string opstr, RegisterClass RC, SDPatternOperator OpNode = null_frag, ComplexPattern Addr = addr> { - def NAME : Load<opstr, OpNode, RC, mem, Addr>, Requires<[NotN64, HasStdEnc]>; - def _P8 : Load<opstr, OpNode, RC, mem64, Addr>, + def NAME : Load<opstr, OpNode, RC, mem, Addr, "">, + Requires<[NotN64, HasStdEnc]>; + def _P8 : Load<opstr, OpNode, RC, mem64, Addr, "_p8">, Requires<[IsN64, HasStdEnc]> { let DecoderNamespace = "Mips64"; let isCodeGenOnly = 1; @@ -470,8 +473,9 @@ multiclass LoadM<string opstr, RegisterClass RC, multiclass StoreM<string opstr, RegisterClass RC, SDPatternOperator OpNode = null_frag, ComplexPattern Addr = addr> { - def NAME : Store<opstr, OpNode, RC, mem, Addr>, Requires<[NotN64, HasStdEnc]>; - def _P8 : Store<opstr, OpNode, RC, mem64, Addr>, + def NAME : Store<opstr, OpNode, RC, mem, Addr, "">, + Requires<[NotN64, HasStdEnc]>; + def _P8 : Store<opstr, OpNode, RC, mem64, Addr, "_p8">, Requires<[IsN64, HasStdEnc]> { let DecoderNamespace = "Mips64"; let isCodeGenOnly = 1; @@ -542,14 +546,15 @@ class CBranchZero<string opstr, PatFrag cond_op, RegisterClass RC> : class SetCC_R<string opstr, PatFrag cond_op, RegisterClass RC> : InstSE<(outs CPURegsOpnd:$rd), (ins RC:$rs, RC:$rt), !strconcat(opstr, "\t$rd, $rs, $rt"), - [(set CPURegsOpnd:$rd, (cond_op RC:$rs, RC:$rt))], IIAlu, FrmR>; + [(set CPURegsOpnd:$rd, (cond_op RC:$rs, RC:$rt))], + IIAlu, FrmR, opstr>; class SetCC_I<string opstr, PatFrag cond_op, Operand Od, PatLeaf imm_type, RegisterClass RC>: InstSE<(outs CPURegsOpnd:$rt), (ins RC:$rs, Od:$imm16), !strconcat(opstr, "\t$rt, $rs, $imm16"), [(set CPURegsOpnd:$rt, (cond_op RC:$rs, imm_type:$imm16))], - IIAlu, FrmI>; + IIAlu, FrmI, opstr>; // Jump class JumpFJ<DAGOperand opnd, string opstr, SDPatternOperator operator, @@ -636,7 +641,7 @@ class SYNC_FT : class Mult<string opstr, InstrItinClass itin, RegisterOperand RO, list<Register> DefRegs> : InstSE<(outs), (ins RO:$rs, RO:$rt), !strconcat(opstr, "\t$rs, $rt"), [], - itin, FrmR> { + itin, FrmR, opstr> { let isCommutable = 1; let Defs = DefRegs; let neverHasSideEffects = 1; @@ -832,14 +837,12 @@ let usesCustomInserter = 1 in { defm ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap32<atomic_cmp_swap_32>; } -/// Pseudo instructions for loading, storing and copying accumulator registers. +/// Pseudo instructions for loading and storing accumulator registers. let isPseudo = 1 in { defm LOAD_AC64 : LoadM<"load_ac64", ACRegs>; defm STORE_AC64 : StoreM<"store_ac64", ACRegs>; } -def COPY_AC64 : PseudoSE<(outs ACRegs:$dst), (ins ACRegs:$src), []>; - //===----------------------------------------------------------------------===// // Instruction definition //===----------------------------------------------------------------------===// @@ -848,60 +851,70 @@ def COPY_AC64 : PseudoSE<(outs ACRegs:$dst), (ins ACRegs:$src), []>; //===----------------------------------------------------------------------===// /// Arithmetic Instructions (ALU Immediate) -def ADDiu : ArithLogicI<"addiu", simm16, CPURegsOpnd, immSExt16, add>, +def ADDiu : MMRel, ArithLogicI<"addiu", simm16, CPURegsOpnd, immSExt16, add>, ADDI_FM<0x9>, IsAsCheapAsAMove; -def ADDi : ArithLogicI<"addi", simm16, CPURegsOpnd>, ADDI_FM<0x8>; -def SLTi : SetCC_I<"slti", setlt, simm16, immSExt16, CPURegs>, SLTI_FM<0xa>; -def SLTiu : SetCC_I<"sltiu", setult, simm16, immSExt16, CPURegs>, SLTI_FM<0xb>; -def ANDi : ArithLogicI<"andi", uimm16, CPURegsOpnd, immZExt16, and>, +def ADDi : MMRel, ArithLogicI<"addi", simm16, CPURegsOpnd>, ADDI_FM<0x8>; +def SLTi : MMRel, SetCC_I<"slti", setlt, simm16, immSExt16, CPURegs>, + SLTI_FM<0xa>; +def SLTiu : MMRel, SetCC_I<"sltiu", setult, simm16, immSExt16, CPURegs>, + SLTI_FM<0xb>; +def ANDi : MMRel, ArithLogicI<"andi", uimm16, CPURegsOpnd, immZExt16, and>, ADDI_FM<0xc>; -def ORi : ArithLogicI<"ori", uimm16, CPURegsOpnd, immZExt16, or>, +def ORi : MMRel, ArithLogicI<"ori", uimm16, CPURegsOpnd, immZExt16, or>, ADDI_FM<0xd>; -def XORi : ArithLogicI<"xori", uimm16, CPURegsOpnd, immZExt16, xor>, +def XORi : MMRel, ArithLogicI<"xori", uimm16, CPURegsOpnd, immZExt16, xor>, ADDI_FM<0xe>; -def LUi : LoadUpper<"lui", CPURegs, uimm16>, LUI_FM; +def LUi : MMRel, LoadUpper<"lui", CPURegs, uimm16>, LUI_FM; /// Arithmetic Instructions (3-Operand, R-Type) -def ADDu : ArithLogicR<"addu", CPURegsOpnd, 1, IIAlu, add>, ADD_FM<0, 0x21>; -def SUBu : ArithLogicR<"subu", CPURegsOpnd, 0, IIAlu, sub>, ADD_FM<0, 0x23>; -def MUL : ArithLogicR<"mul", CPURegsOpnd, 1, IIImul, mul>, ADD_FM<0x1c, 2>; -def ADD : ArithLogicR<"add", CPURegsOpnd>, ADD_FM<0, 0x20>; -def SUB : ArithLogicR<"sub", CPURegsOpnd>, ADD_FM<0, 0x22>; -def SLT : SetCC_R<"slt", setlt, CPURegs>, ADD_FM<0, 0x2a>; -def SLTu : SetCC_R<"sltu", setult, CPURegs>, ADD_FM<0, 0x2b>; -def AND : ArithLogicR<"and", CPURegsOpnd, 1, IIAlu, and>, ADD_FM<0, 0x24>; -def OR : ArithLogicR<"or", CPURegsOpnd, 1, IIAlu, or>, ADD_FM<0, 0x25>; -def XOR : ArithLogicR<"xor", CPURegsOpnd, 1, IIAlu, xor>, ADD_FM<0, 0x26>; -def NOR : LogicNOR<"nor", CPURegsOpnd>, ADD_FM<0, 0x27>; +def ADDu : MMRel, ArithLogicR<"addu", CPURegsOpnd, 1, IIAlu, add>, + ADD_FM<0, 0x21>; +def SUBu : MMRel, ArithLogicR<"subu", CPURegsOpnd, 0, IIAlu, sub>, + ADD_FM<0, 0x23>; +def MUL : MMRel, ArithLogicR<"mul", CPURegsOpnd, 1, IIImul, mul>, + ADD_FM<0x1c, 2>; +def ADD : MMRel, ArithLogicR<"add", CPURegsOpnd>, ADD_FM<0, 0x20>; +def SUB : MMRel, ArithLogicR<"sub", CPURegsOpnd>, ADD_FM<0, 0x22>; +def SLT : MMRel, SetCC_R<"slt", setlt, CPURegs>, ADD_FM<0, 0x2a>; +def SLTu : MMRel, SetCC_R<"sltu", setult, CPURegs>, ADD_FM<0, 0x2b>; +def AND : MMRel, ArithLogicR<"and", CPURegsOpnd, 1, IIAlu, and>, + ADD_FM<0, 0x24>; +def OR : MMRel, ArithLogicR<"or", CPURegsOpnd, 1, IIAlu, or>, + ADD_FM<0, 0x25>; +def XOR : MMRel, ArithLogicR<"xor", CPURegsOpnd, 1, IIAlu, xor>, + ADD_FM<0, 0x26>; +def NOR : MMRel, LogicNOR<"nor", CPURegsOpnd>, ADD_FM<0, 0x27>; /// Shift Instructions -def SLL : shift_rotate_imm<"sll", shamt, CPURegsOpnd, shl, immZExt5>, +def SLL : MMRel, shift_rotate_imm<"sll", shamt, CPURegsOpnd, shl, immZExt5>, SRA_FM<0, 0>; -def SRL : shift_rotate_imm<"srl", shamt, CPURegsOpnd, srl, immZExt5>, +def SRL : MMRel, shift_rotate_imm<"srl", shamt, CPURegsOpnd, srl, immZExt5>, SRA_FM<2, 0>; -def SRA : shift_rotate_imm<"sra", shamt, CPURegsOpnd, sra, immZExt5>, +def SRA : MMRel, shift_rotate_imm<"sra", shamt, CPURegsOpnd, sra, immZExt5>, SRA_FM<3, 0>; -def SLLV : shift_rotate_reg<"sllv", CPURegsOpnd, shl>, SRLV_FM<4, 0>; -def SRLV : shift_rotate_reg<"srlv", CPURegsOpnd, srl>, SRLV_FM<6, 0>; -def SRAV : shift_rotate_reg<"srav", CPURegsOpnd, sra>, SRLV_FM<7, 0>; +def SLLV : MMRel, shift_rotate_reg<"sllv", CPURegsOpnd, shl>, SRLV_FM<4, 0>; +def SRLV : MMRel, shift_rotate_reg<"srlv", CPURegsOpnd, srl>, SRLV_FM<6, 0>; +def SRAV : MMRel, shift_rotate_reg<"srav", CPURegsOpnd, sra>, SRLV_FM<7, 0>; // Rotate Instructions let Predicates = [HasMips32r2, HasStdEnc] in { - def ROTR : shift_rotate_imm<"rotr", shamt, CPURegsOpnd, rotr, immZExt5>, + def ROTR : MMRel, shift_rotate_imm<"rotr", shamt, CPURegsOpnd, rotr, + immZExt5>, SRA_FM<2, 1>; - def ROTRV : shift_rotate_reg<"rotrv", CPURegsOpnd, rotr>, SRLV_FM<6, 1>; + def ROTRV : MMRel, shift_rotate_reg<"rotrv", CPURegsOpnd, rotr>, + SRLV_FM<6, 1>; } /// Load and Store Instructions /// aligned -defm LB : LoadM<"lb", CPURegs, sextloadi8>, LW_FM<0x20>; -defm LBu : LoadM<"lbu", CPURegs, zextloadi8, addrDefault>, LW_FM<0x24>; -defm LH : LoadM<"lh", CPURegs, sextloadi16, addrDefault>, LW_FM<0x21>; -defm LHu : LoadM<"lhu", CPURegs, zextloadi16>, LW_FM<0x25>; -defm LW : LoadM<"lw", CPURegs, load, addrDefault>, LW_FM<0x23>; -defm SB : StoreM<"sb", CPURegs, truncstorei8>, LW_FM<0x28>; -defm SH : StoreM<"sh", CPURegs, truncstorei16>, LW_FM<0x29>; -defm SW : StoreM<"sw", CPURegs, store>, LW_FM<0x2b>; +defm LB : LoadM<"lb", CPURegs, sextloadi8>, MMRel, LW_FM<0x20>; +defm LBu : LoadM<"lbu", CPURegs, zextloadi8, addrDefault>, MMRel, LW_FM<0x24>; +defm LH : LoadM<"lh", CPURegs, sextloadi16, addrDefault>, MMRel, LW_FM<0x21>; +defm LHu : LoadM<"lhu", CPURegs, zextloadi16>, MMRel, LW_FM<0x25>; +defm LW : LoadM<"lw", CPURegs, load, addrDefault>, MMRel, LW_FM<0x23>; +defm SB : StoreM<"sb", CPURegs, truncstorei8>, MMRel, LW_FM<0x28>; +defm SH : StoreM<"sh", CPURegs, truncstorei16>, MMRel, LW_FM<0x29>; +defm SW : StoreM<"sw", CPURegs, store>, MMRel, LW_FM<0x2b>; /// load/store left/right defm LWL : LoadLeftRightM<"lwl", MipsLWL, CPURegs>, LW_FM<0x22>; @@ -968,8 +981,10 @@ let Uses = [V0, V1], isTerminator = 1, isReturn = 1, isBarrier = 1 in { } /// Multiply and Divide Instructions. -def MULT : Mult<"mult", IIImul, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x18>; -def MULTu : Mult<"multu", IIImul, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x19>; +def MULT : MMRel, Mult<"mult", IIImul, CPURegsOpnd, [HI, LO]>, + MULT_FM<0, 0x18>; +def MULTu : MMRel, Mult<"multu", IIImul, CPURegsOpnd, [HI, LO]>, + MULT_FM<0, 0x19>; def PseudoMULT : MultDivPseudo<MULT, ACRegs, CPURegsOpnd, MipsMult, IIImul>; def PseudoMULTu : MultDivPseudo<MULTu, ACRegs, CPURegsOpnd, MipsMultu, IIImul>; def SDIV : Div<"div", IIIdiv, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x1a>; @@ -1066,10 +1081,10 @@ def : InstAlias<"negu $rt, $rs", def : InstAlias<"slt $rs, $rt, $imm", (SLTi CPURegsOpnd:$rs, CPURegs:$rt, simm16:$imm), 0>; def : InstAlias<"xor $rs, $rt, $imm", - (XORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>, + (XORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, uimm16:$imm), 1>, Requires<[NotMips64]>; def : InstAlias<"or $rs, $rt, $imm", - (ORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>, + (ORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, uimm16:$imm), 1>, Requires<[NotMips64]>; def : InstAlias<"nop", (SLL ZERO, ZERO, 0), 1>; def : InstAlias<"mfc0 $rt, $rd", @@ -1128,10 +1143,12 @@ def : MipsPat<(i32 imm:$imm), // Carry MipsPatterns def : MipsPat<(subc CPURegs:$lhs, CPURegs:$rhs), (SUBu CPURegs:$lhs, CPURegs:$rhs)>; -def : MipsPat<(addc CPURegs:$lhs, CPURegs:$rhs), - (ADDu CPURegs:$lhs, CPURegs:$rhs)>; -def : MipsPat<(addc CPURegs:$src, immSExt16:$imm), - (ADDiu CPURegs:$src, imm:$imm)>; +let Predicates = [HasStdEnc, NotDSP] in { + def : MipsPat<(addc CPURegs:$lhs, CPURegs:$rhs), + (ADDu CPURegs:$lhs, CPURegs:$rhs)>; + def : MipsPat<(addc CPURegs:$src, immSExt16:$imm), + (ADDiu CPURegs:$src, imm:$imm)>; +} // Call def : MipsPat<(MipsJmpLink (i32 tglobaladdr:$dst)), @@ -1326,3 +1343,6 @@ include "Mips16InstrInfo.td" include "MipsDSPInstrFormats.td" include "MipsDSPInstrInfo.td" +// Micromips +include "MicroMipsInstrFormats.td" +include "MicroMipsInstrInfo.td" diff --git a/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp b/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp index 2efe534..bf5ad37 100644 --- a/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp @@ -399,6 +399,8 @@ static void emitGPDisp(MachineFunction &F, const MipsInstrInfo *TII) { } bool MipsLongBranch::runOnMachineFunction(MachineFunction &F) { + if (TM.getSubtarget<MipsSubtarget>().inMips16Mode()) + return false; if ((TM.getRelocationModel() == Reloc::PIC_) && TM.getSubtarget<MipsSubtarget>().isABI_O32() && F.getInfo<MipsFunctionInfo>()->globalBaseRegSet()) diff --git a/contrib/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp new file mode 100644 index 0000000..c6abf17 --- /dev/null +++ b/contrib/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp @@ -0,0 +1,34 @@ +//===----------------------------------------------------------------------===// +// Instruction Selector Subtarget Control +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// This file defines a pass used to change the subtarget for the +// Mips Instruction selector. +// +//===----------------------------------------------------------------------===// + +#include "MipsISelDAGToDAG.h" +#include "MipsModuleISelDAGToDAG.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +bool MipsModuleDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { + DEBUG(errs() << "In MipsModuleDAGToDAGISel::runMachineFunction\n"); + const_cast<MipsSubtarget&>(Subtarget).resetSubtarget(&MF); + return false; +} + +char MipsModuleDAGToDAGISel::ID = 0; + +} + + +llvm::FunctionPass *llvm::createMipsModuleISelDag(MipsTargetMachine &TM) { + return new MipsModuleDAGToDAGISel(TM); +} + + diff --git a/contrib/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.h b/contrib/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.h new file mode 100644 index 0000000..fda35ae --- /dev/null +++ b/contrib/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.h @@ -0,0 +1,66 @@ +//===---- MipsModuleISelDAGToDAG.h - Change Subtarget --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a pass used to change the subtarget for the +// Mips Instruction selector. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSMODULEISELDAGTODAG_H +#define MIPSMODULEISELDAGTODAG_H + +#include "Mips.h" +#include "MipsSubtarget.h" +#include "MipsTargetMachine.h" +#include "llvm/CodeGen/SelectionDAGISel.h" + + +//===----------------------------------------------------------------------===// +// Instruction Selector Implementation +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MipsModuleDAGToDAGISel - MIPS specific code to select MIPS machine +// instructions for SelectionDAG operations. +//===----------------------------------------------------------------------===// +namespace llvm { + +class MipsModuleDAGToDAGISel : public MachineFunctionPass { +public: + + static char ID; + + explicit MipsModuleDAGToDAGISel(MipsTargetMachine &TM_) + : MachineFunctionPass(ID), + TM(TM_), Subtarget(TM.getSubtarget<MipsSubtarget>()) {} + + // Pass Name + virtual const char *getPassName() const { + return "MIPS DAG->DAG Pattern Instruction Selection"; + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual SDNode *Select(SDNode *N) { + llvm_unreachable("unexpected"); + } + +protected: + /// Keep a pointer to the MipsSubtarget around so that we can make the right + /// decision when generating code for different targets. + const TargetMachine &TM; + const MipsSubtarget &Subtarget; +}; + +/// createMipsISelDag - This pass converts a legalized DAG into a +/// MIPS-specific DAG, ready for instruction scheduling. +FunctionPass *createMipsModuleISelDag(MipsTargetMachine &TM); +} + +#endif diff --git a/contrib/llvm/lib/Target/Mips/MipsOs16.cpp b/contrib/llvm/lib/Target/Mips/MipsOs16.cpp new file mode 100644 index 0000000..1919077 --- /dev/null +++ b/contrib/llvm/lib/Target/Mips/MipsOs16.cpp @@ -0,0 +1,113 @@ +//===---- MipsOs16.cpp for Mips Option -Os16 --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an optimization phase for the MIPS target. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mips-os16" +#include "MipsOs16.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +namespace { + + // Figure out if we need float point based on the function signature. + // We need to move variables in and/or out of floating point + // registers because of the ABI + // + bool needsFPFromSig(Function &F) { + Type* RetType = F.getReturnType(); + switch (RetType->getTypeID()) { + case Type::FloatTyID: + case Type::DoubleTyID: + return true; + default: + ; + } + if (F.arg_size() >=1) { + Argument &Arg = F.getArgumentList().front(); + switch (Arg.getType()->getTypeID()) { + case Type::FloatTyID: + case Type::DoubleTyID: + return true; + default: + ; + } + } + return false; + } + + // Figure out if the function will need floating point operations + // + bool needsFP(Function &F) { + if (needsFPFromSig(F)) + return true; + for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); + I != E; ++I) { + const Instruction &Inst = *I; + switch (Inst.getOpcode()) { + case Instruction::FAdd: + case Instruction::FSub: + case Instruction::FMul: + case Instruction::FDiv: + case Instruction::FRem: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::FCmp: + return true; + default: + ; + } + if (const CallInst *CI = dyn_cast<CallInst>(I)) { + DEBUG(dbgs() << "Working on call" << "\n"); + Function &F_ = *CI->getCalledFunction(); + if (needsFPFromSig(F_)) + return true; + } + } + return false; + } +} +namespace llvm { + + +bool MipsOs16::runOnModule(Module &M) { + DEBUG(errs() << "Run on Module MipsOs16\n"); + bool modified = false; + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + DEBUG(dbgs() << "Working on " << F->getName() << "\n"); + if (needsFP(*F)) { + DEBUG(dbgs() << " need to compile as nomips16 \n"); + F->addFnAttr("nomips16"); + } + else { + F->addFnAttr("mips16"); + DEBUG(dbgs() << " no need to compile as nomips16 \n"); + } + } + return modified; +} + +char MipsOs16::ID = 0; + +} + +ModulePass *llvm::createMipsOs16(MipsTargetMachine &TM) { + return new MipsOs16; +} + + diff --git a/contrib/llvm/lib/Target/Mips/MipsOs16.h b/contrib/llvm/lib/Target/Mips/MipsOs16.h new file mode 100644 index 0000000..21beef8 --- /dev/null +++ b/contrib/llvm/lib/Target/Mips/MipsOs16.h @@ -0,0 +1,49 @@ +//===---- MipsOs16.h for Mips Option -Os16 --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an optimization phase for the MIPS target. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/MipsMCTargetDesc.h" +#include "MipsTargetMachine.h" +#include "llvm/Pass.h" +#include "llvm/Target/TargetMachine.h" + + + +#ifndef MIPSOS16_H +#define MIPSOS16_H + +using namespace llvm; + +namespace llvm { + +class MipsOs16 : public ModulePass { + +public: + static char ID; + + MipsOs16() : ModulePass(ID) { + + } + + virtual const char *getPassName() const { + return "MIPS Os16 Optimization"; + } + + virtual bool runOnModule(Module &M); + +}; + +ModulePass *createMipsOs16(MipsTargetMachine &TM); + +} + +#endif diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp index 3250733..dead07b 100644 --- a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp @@ -145,7 +145,11 @@ getReservedRegs(const MachineFunction &MF) const { Reserved.set(Mips::HWR29_64); // Reserve DSP control register. - Reserved.set(Mips::DSPCtrl); + Reserved.set(Mips::DSPPos); + Reserved.set(Mips::DSPSCount); + Reserved.set(Mips::DSPCarry); + Reserved.set(Mips::DSPEFI); + Reserved.set(Mips::DSPOutFlag); // Reserve RA if in mips16 mode. if (Subtarget.inMips16Mode()) { diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td index 64458bc..229f167 100644 --- a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td +++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td @@ -16,6 +16,11 @@ def sub_fpodd : SubRegIndex; def sub_32 : SubRegIndex; def sub_lo : SubRegIndex; def sub_hi : SubRegIndex; +def sub_dsp16_19 : SubRegIndex; +def sub_dsp20 : SubRegIndex; +def sub_dsp21 : SubRegIndex; +def sub_dsp22 : SubRegIndex; +def sub_dsp23 : SubRegIndex; } class Unallocatable { @@ -229,14 +234,14 @@ let Namespace = "Mips" in { def D31_64 : AFPR64<31, "f31", [F31]>, DwarfRegNum<[63]>; // Hi/Lo registers - def HI : Register<"hi">, DwarfRegNum<[64]>; - def HI1 : Register<"hi1">, DwarfRegNum<[176]>; - def HI2 : Register<"hi2">, DwarfRegNum<[178]>; - def HI3 : Register<"hi3">, DwarfRegNum<[180]>; - def LO : Register<"lo">, DwarfRegNum<[65]>; - def LO1 : Register<"lo1">, DwarfRegNum<[177]>; - def LO2 : Register<"lo2">, DwarfRegNum<[179]>; - def LO3 : Register<"lo3">, DwarfRegNum<[181]>; + def HI : Register<"ac0">, DwarfRegNum<[64]>; + def HI1 : Register<"ac1">, DwarfRegNum<[176]>; + def HI2 : Register<"ac2">, DwarfRegNum<[178]>; + def HI3 : Register<"ac3">, DwarfRegNum<[180]>; + def LO : Register<"ac0">, DwarfRegNum<[65]>; + def LO1 : Register<"ac1">, DwarfRegNum<[177]>; + def LO2 : Register<"ac2">, DwarfRegNum<[179]>; + def LO3 : Register<"ac3">, DwarfRegNum<[181]>; let SubRegIndices = [sub_32] in { def HI64 : RegisterWithSubRegs<"hi", [HI]>; @@ -264,7 +269,23 @@ let Namespace = "Mips" in { def AC0_64 : ACC<0, "ac0", [LO64, HI64]>; - def DSPCtrl : Register<"dspctrl">; + // DSP-ASE control register fields. + def DSPPos : Register<"">; + def DSPSCount : Register<"">; + def DSPCarry : Register<"">; + def DSPEFI : Register<"">; + def DSPOutFlag16_19 : Register<"">; + def DSPOutFlag20 : Register<"">; + def DSPOutFlag21 : Register<"">; + def DSPOutFlag22 : Register<"">; + def DSPOutFlag23 : Register<"">; + def DSPCCond : Register<"">; + + let SubRegIndices = [sub_dsp16_19, sub_dsp20, sub_dsp21, sub_dsp22, + sub_dsp23] in + def DSPOutFlag : RegisterWithSubRegs<"", [DSPOutFlag16_19, DSPOutFlag20, + DSPOutFlag21, DSPOutFlag22, + DSPOutFlag23]>; } //===----------------------------------------------------------------------===// @@ -340,8 +361,12 @@ def FGR64 : RegisterClass<"Mips", [f64], 64, (sequence "D%u_64", 0, 31)>; def CCR : RegisterClass<"Mips", [i32], 32, (add FCR31,FCC0)>, Unallocatable; // Hi/Lo Registers -def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>, Unallocatable; -def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)>, Unallocatable; +def LORegs : RegisterClass<"Mips", [i32], 32, (add LO)>; +def HIRegs : RegisterClass<"Mips", [i32], 32, (add HI)>; +def LORegsDSP : RegisterClass<"Mips", [i32], 32, (add LO, LO1, LO2, LO3)>; +def HIRegsDSP : RegisterClass<"Mips", [i32], 32, (add HI, HI1, HI2, HI3)>; +def LORegs64 : RegisterClass<"Mips", [i64], 64, (add LO64)>; +def HIRegs64 : RegisterClass<"Mips", [i64], 64, (add HI64)>; // Hardware registers def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>, Unallocatable; @@ -360,6 +385,9 @@ def ACRegsDSP : RegisterClass<"Mips", [untyped], 64, (sequence "AC%u", 0, 3)> { let Size = 64; } +def DSPCC : RegisterClass<"Mips", [v4i8, v2i16], 32, (add DSPCCond)>; + +// Register Operands. def CPURegsAsmOperand : AsmOperandClass { let Name = "CPURegsAsm"; let ParserMethod = "parseCPURegs"; diff --git a/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp index 68ec921..b295e91 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -32,17 +32,21 @@ using namespace llvm; namespace { typedef MachineBasicBlock::iterator Iter; -/// Helper class to expand accumulator pseudos. -class ExpandACCPseudo { +/// Helper class to expand pseudos. +class ExpandPseudo { public: - ExpandACCPseudo(MachineFunction &MF); + ExpandPseudo(MachineFunction &MF); bool expand(); private: bool expandInstr(MachineBasicBlock &MBB, Iter I); - void expandLoad(MachineBasicBlock &MBB, Iter I, unsigned RegSize); - void expandStore(MachineBasicBlock &MBB, Iter I, unsigned RegSize); - void expandCopy(MachineBasicBlock &MBB, Iter I, unsigned RegSize); + void expandLoadCCond(MachineBasicBlock &MBB, Iter I); + void expandStoreCCond(MachineBasicBlock &MBB, Iter I); + void expandLoadACC(MachineBasicBlock &MBB, Iter I, unsigned RegSize); + void expandStoreACC(MachineBasicBlock &MBB, Iter I, unsigned RegSize); + bool expandCopy(MachineBasicBlock &MBB, Iter I); + bool expandCopyACC(MachineBasicBlock &MBB, Iter I, unsigned Dst, + unsigned Src, unsigned RegSize); MachineFunction &MF; const MipsSEInstrInfo &TII; @@ -51,12 +55,12 @@ private: }; } -ExpandACCPseudo::ExpandACCPseudo(MachineFunction &MF_) +ExpandPseudo::ExpandPseudo(MachineFunction &MF_) : MF(MF_), TII(*static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo())), RegInfo(TII.getRegisterInfo()), MRI(MF.getRegInfo()) {} -bool ExpandACCPseudo::expand() { +bool ExpandPseudo::expand() { bool Expanded = false; for (MachineFunction::iterator BB = MF.begin(), BBEnd = MF.end(); @@ -67,34 +71,39 @@ bool ExpandACCPseudo::expand() { return Expanded; } -bool ExpandACCPseudo::expandInstr(MachineBasicBlock &MBB, Iter I) { +bool ExpandPseudo::expandInstr(MachineBasicBlock &MBB, Iter I) { switch(I->getOpcode()) { + case Mips::LOAD_CCOND_DSP: + case Mips::LOAD_CCOND_DSP_P8: + expandLoadCCond(MBB, I); + break; + case Mips::STORE_CCOND_DSP: + case Mips::STORE_CCOND_DSP_P8: + expandStoreCCond(MBB, I); + break; case Mips::LOAD_AC64: case Mips::LOAD_AC64_P8: case Mips::LOAD_AC_DSP: case Mips::LOAD_AC_DSP_P8: - expandLoad(MBB, I, 4); + expandLoadACC(MBB, I, 4); break; case Mips::LOAD_AC128: case Mips::LOAD_AC128_P8: - expandLoad(MBB, I, 8); + expandLoadACC(MBB, I, 8); break; case Mips::STORE_AC64: case Mips::STORE_AC64_P8: case Mips::STORE_AC_DSP: case Mips::STORE_AC_DSP_P8: - expandStore(MBB, I, 4); + expandStoreACC(MBB, I, 4); break; case Mips::STORE_AC128: case Mips::STORE_AC128_P8: - expandStore(MBB, I, 8); + expandStoreACC(MBB, I, 8); break; - case Mips::COPY_AC64: - case Mips::COPY_AC_DSP: - expandCopy(MBB, I, 4); - break; - case Mips::COPY_AC128: - expandCopy(MBB, I, 8); + case TargetOpcode::COPY: + if (!expandCopy(MBB, I)) + return false; break; default: return false; @@ -104,7 +113,37 @@ bool ExpandACCPseudo::expandInstr(MachineBasicBlock &MBB, Iter I) { return true; } -void ExpandACCPseudo::expandLoad(MachineBasicBlock &MBB, Iter I, +void ExpandPseudo::expandLoadCCond(MachineBasicBlock &MBB, Iter I) { + // load $vr, FI + // copy ccond, $vr + + assert(I->getOperand(0).isReg() && I->getOperand(1).isFI()); + + const TargetRegisterClass *RC = RegInfo.intRegClass(4); + unsigned VR = MRI.createVirtualRegister(RC); + unsigned Dst = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex(); + + TII.loadRegFromStack(MBB, I, VR, FI, RC, &RegInfo, 0); + BuildMI(MBB, I, I->getDebugLoc(), TII.get(TargetOpcode::COPY), Dst) + .addReg(VR, RegState::Kill); +} + +void ExpandPseudo::expandStoreCCond(MachineBasicBlock &MBB, Iter I) { + // copy $vr, ccond + // store $vr, FI + + assert(I->getOperand(0).isReg() && I->getOperand(1).isFI()); + + const TargetRegisterClass *RC = RegInfo.intRegClass(4); + unsigned VR = MRI.createVirtualRegister(RC); + unsigned Src = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex(); + + BuildMI(MBB, I, I->getDebugLoc(), TII.get(TargetOpcode::COPY), VR) + .addReg(Src, getKillRegState(I->getOperand(0).isKill())); + TII.storeRegToStack(MBB, I, VR, true, FI, RC, &RegInfo, 0); +} + +void ExpandPseudo::expandLoadACC(MachineBasicBlock &MBB, Iter I, unsigned RegSize) { // load $vr0, FI // copy lo, $vr0 @@ -128,7 +167,7 @@ void ExpandACCPseudo::expandLoad(MachineBasicBlock &MBB, Iter I, BuildMI(MBB, I, DL, Desc, Hi).addReg(VR1, RegState::Kill); } -void ExpandACCPseudo::expandStore(MachineBasicBlock &MBB, Iter I, +void ExpandPseudo::expandStoreACC(MachineBasicBlock &MBB, Iter I, unsigned RegSize) { // copy $vr0, lo // store $vr0, FI @@ -152,8 +191,20 @@ void ExpandACCPseudo::expandStore(MachineBasicBlock &MBB, Iter I, TII.storeRegToStack(MBB, I, VR1, true, FI, RC, &RegInfo, RegSize); } -void ExpandACCPseudo::expandCopy(MachineBasicBlock &MBB, Iter I, - unsigned RegSize) { +bool ExpandPseudo::expandCopy(MachineBasicBlock &MBB, Iter I) { + unsigned Dst = I->getOperand(0).getReg(), Src = I->getOperand(1).getReg(); + + if (Mips::ACRegsDSPRegClass.contains(Dst, Src)) + return expandCopyACC(MBB, I, Dst, Src, 4); + + if (Mips::ACRegs128RegClass.contains(Dst, Src)) + return expandCopyACC(MBB, I, Dst, Src, 8); + + return false; +} + +bool ExpandPseudo::expandCopyACC(MachineBasicBlock &MBB, Iter I, unsigned Dst, + unsigned Src, unsigned RegSize) { // copy $vr0, src_lo // copy dst_lo, $vr0 // copy $vr1, src_hi @@ -162,7 +213,6 @@ void ExpandACCPseudo::expandCopy(MachineBasicBlock &MBB, Iter I, const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize); unsigned VR0 = MRI.createVirtualRegister(RC); unsigned VR1 = MRI.createVirtualRegister(RC); - unsigned Dst = I->getOperand(0).getReg(), Src = I->getOperand(1).getReg(); unsigned SrcKill = getKillRegState(I->getOperand(1).isKill()); unsigned DstLo = RegInfo.getSubReg(Dst, Mips::sub_lo); unsigned DstHi = RegInfo.getSubReg(Dst, Mips::sub_hi); @@ -176,6 +226,7 @@ void ExpandACCPseudo::expandCopy(MachineBasicBlock &MBB, Iter I, BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR1).addReg(SrcHi, SrcKill); BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), DstHi) .addReg(VR1, RegState::Kill); + return true; } unsigned MipsSEFrameLowering::ehDataReg(unsigned I) const { @@ -438,7 +489,7 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Expand pseudo instructions which load, store or copy accumulators. // Add an emergency spill slot if a pseudo was expanded. - if (ExpandACCPseudo(MF).expand()) { + if (ExpandPseudo(MF).expand()) { // The spill slot should be half the size of the accumulator. If target is // mips64, it should be 64-bit, otherwise it should be 32-bt. const TargetRegisterClass *RC = STI.hasMips64() ? diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index d6d2207..8a6523a 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -35,6 +35,36 @@ #include "llvm/Target/TargetMachine.h" using namespace llvm; +bool MipsSEDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { + if (Subtarget.inMips16Mode()) + return false; + return MipsDAGToDAGISel::runOnMachineFunction(MF); +} + +void MipsSEDAGToDAGISel::addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI, + MachineFunction &MF) { + MachineInstrBuilder MIB(MF, &MI); + unsigned Mask = MI.getOperand(1).getImm(); + unsigned Flag = IsDef ? RegState::ImplicitDefine : RegState::Implicit; + + if (Mask & 1) + MIB.addReg(Mips::DSPPos, Flag); + + if (Mask & 2) + MIB.addReg(Mips::DSPSCount, Flag); + + if (Mask & 4) + MIB.addReg(Mips::DSPCarry, Flag); + + if (Mask & 8) + MIB.addReg(Mips::DSPOutFlag, Flag); + + if (Mask & 16) + MIB.addReg(Mips::DSPCCond, Flag); + + if (Mask & 32) + MIB.addReg(Mips::DSPEFI, Flag); +} bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr& MI) { @@ -173,29 +203,14 @@ void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) { for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); MFI != MFE; ++MFI) - for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I) - replaceUsesWithZeroReg(MRI, *I); -} - -/// Select multiply instructions. -std::pair<SDNode*, SDNode*> -MipsSEDAGToDAGISel::selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, EVT Ty, - bool HasLo, bool HasHi) { - SDNode *Lo = 0, *Hi = 0; - SDNode *Mul = CurDAG->getMachineNode(Opc, DL, MVT::Glue, N->getOperand(0), - N->getOperand(1)); - SDValue InFlag = SDValue(Mul, 0); - - if (HasLo) { - unsigned Opcode = (Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64); - Lo = CurDAG->getMachineNode(Opcode, DL, Ty, MVT::Glue, InFlag); - InFlag = SDValue(Lo, 1); - } - if (HasHi) { - unsigned Opcode = (Ty == MVT::i32 ? Mips::MFHI : Mips::MFHI64); - Hi = CurDAG->getMachineNode(Opcode, DL, Ty, InFlag); - } - return std::make_pair(Lo, Hi); + for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I) { + if (I->getOpcode() == Mips::RDDSP) + addDSPCtrlRegOperands(false, *I, MF); + else if (I->getOpcode() == Mips::WRDSP) + addDSPCtrlRegOperands(true, *I, MF); + else + replaceUsesWithZeroReg(MRI, *I); + } } SDNode *MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag, @@ -211,7 +226,7 @@ SDNode *MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag, SDValue LHS = Node->getOperand(0), RHS = Node->getOperand(1); EVT VT = LHS.getValueType(); - SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, DL, VT, Ops, 2); + SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, DL, VT, Ops); SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, DL, VT, SDValue(Carry, 0), RHS); return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS, @@ -307,9 +322,7 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) { // Instruction Selection not handled by the auto-generated // tablegen selection should be handled here. /// - EVT NodeTy = Node->getValueType(0); SDNode *Result; - unsigned MultOpc; switch(Opcode) { default: break; @@ -321,51 +334,13 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) { } case ISD::ADDE: { + if (Subtarget.hasDSP()) // Select DSP instructions, ADDSC and ADDWC. + break; SDValue InFlag = Node->getOperand(2); Result = selectAddESubE(Mips::ADDu, InFlag, InFlag.getValue(0), DL, Node); return std::make_pair(true, Result); } - /// Mul with two results - case ISD::SMUL_LOHI: - case ISD::UMUL_LOHI: { - if (NodeTy == MVT::i32) - MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT); - else - MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::DMULTu : Mips::DMULT); - - std::pair<SDNode*, SDNode*> LoHi = selectMULT(Node, MultOpc, DL, NodeTy, - true, true); - - if (!SDValue(Node, 0).use_empty()) - ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0)); - - if (!SDValue(Node, 1).use_empty()) - ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0)); - - return std::make_pair(true, (SDNode*)NULL); - } - - /// Special Muls - case ISD::MUL: { - // Mips32 has a 32-bit three operand mul instruction. - if (Subtarget.hasMips32() && NodeTy == MVT::i32) - break; - MultOpc = NodeTy == MVT::i32 ? Mips::MULT : Mips::DMULT; - Result = selectMULT(Node, MultOpc, DL, NodeTy, true, false).first; - return std::make_pair(true, Result); - } - case ISD::MULHS: - case ISD::MULHU: { - if (NodeTy == MVT::i32) - MultOpc = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT); - else - MultOpc = (Opcode == ISD::MULHU ? Mips::DMULTu : Mips::DMULT); - - Result = selectMULT(Node, MultOpc, DL, NodeTy, false, true).second; - return std::make_pair(true, Result); - } - case ISD::ConstantFP: { ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node); if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) { @@ -460,7 +435,7 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) { const SDValue Ops[] = { RegClass, Node->getOperand(0), LoIdx, Node->getOperand(1), HiIdx }; SDNode *Res = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, - MVT::Untyped, Ops, 5); + MVT::Untyped, Ops); return std::make_pair(true, Res); } } diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h index 6137ab0..a235e96 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h +++ b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h @@ -24,6 +24,12 @@ public: explicit MipsSEDAGToDAGISel(MipsTargetMachine &TM) : MipsDAGToDAGISel(TM) {} private: + + virtual bool runOnMachineFunction(MachineFunction &MF); + + void addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI, + MachineFunction &MF); + bool replaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&); std::pair<SDNode*, SDNode*> selectMULT(SDNode *N, unsigned Opc, DebugLoc dl, diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp index 4f21921..8544bb8 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp @@ -15,6 +15,7 @@ #include "MipsTargetMachine.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetInstrInfo.h" @@ -27,6 +28,9 @@ EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden, MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) : MipsTargetLowering(TM) { // Set up the register classes + + clearRegisterClasses(); + addRegisterClass(MVT::i32, &Mips::CPURegsRegClass); if (HasMips64) @@ -42,12 +46,23 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) setOperationAction(Opc, VecTys[i], Expand); + setOperationAction(ISD::ADD, VecTys[i], Legal); + setOperationAction(ISD::SUB, VecTys[i], Legal); setOperationAction(ISD::LOAD, VecTys[i], Legal); setOperationAction(ISD::STORE, VecTys[i], Legal); setOperationAction(ISD::BITCAST, VecTys[i], Legal); } + + setTargetDAGCombine(ISD::SHL); + setTargetDAGCombine(ISD::SRA); + setTargetDAGCombine(ISD::SRL); + setTargetDAGCombine(ISD::SETCC); + setTargetDAGCombine(ISD::VSELECT); } + if (Subtarget->hasDSPR2()) + setOperationAction(ISD::MUL, MVT::v2i16, Legal); + if (!TM.Options.UseSoftFloat) { addRegisterClass(MVT::f32, &Mips::FGR32RegClass); @@ -65,14 +80,19 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::MULHS, MVT::i32, Custom); setOperationAction(ISD::MULHU, MVT::i32, Custom); - if (HasMips64) + if (HasMips64) { + setOperationAction(ISD::MULHS, MVT::i64, Custom); + setOperationAction(ISD::MULHU, MVT::i64, Custom); setOperationAction(ISD::MUL, MVT::i64, Custom); + } + + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); setOperationAction(ISD::SDIVREM, MVT::i32, Custom); setOperationAction(ISD::UDIVREM, MVT::i32, Custom); setOperationAction(ISD::SDIVREM, MVT::i64, Custom); setOperationAction(ISD::UDIVREM, MVT::i64, Custom); - setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); setOperationAction(ISD::LOAD, MVT::i32, Custom); setOperationAction(ISD::STORE, MVT::i32, Custom); @@ -113,7 +133,10 @@ SDValue MipsSETargetLowering::LowerOperation(SDValue Op, case ISD::MULHU: return lowerMulDiv(Op, MipsISD::Multu, false, true, DAG); case ISD::MUL: return lowerMulDiv(Op, MipsISD::Mult, true, false, DAG); case ISD::SDIVREM: return lowerMulDiv(Op, MipsISD::DivRem, true, true, DAG); - case ISD::UDIVREM: return lowerMulDiv(Op, MipsISD::DivRemU, true, true, DAG); + case ISD::UDIVREM: return lowerMulDiv(Op, MipsISD::DivRemU, true, true, + DAG); + case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG); } return MipsTargetLowering::LowerOperation(Op, DAG); @@ -297,18 +320,136 @@ static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty, + SelectionDAG &DAG, + const MipsSubtarget *Subtarget) { + // See if this is a vector splat immediate node. + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + unsigned EltSize = Ty.getVectorElementType().getSizeInBits(); + BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); + + if (!BV || + !BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, + EltSize, !Subtarget->isLittle()) || + (SplatBitSize != EltSize) || + (SplatValue.getZExtValue() >= EltSize)) + return SDValue(); + + return DAG.getNode(Opc, N->getDebugLoc(), Ty, N->getOperand(0), + DAG.getConstant(SplatValue.getZExtValue(), MVT::i32)); +} + +static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget *Subtarget) { + EVT Ty = N->getValueType(0); + + if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) + return SDValue(); + + return performDSPShiftCombine(MipsISD::SHLL_DSP, N, Ty, DAG, Subtarget); +} + +static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget *Subtarget) { + EVT Ty = N->getValueType(0); + + if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget->hasDSPR2())) + return SDValue(); + + return performDSPShiftCombine(MipsISD::SHRA_DSP, N, Ty, DAG, Subtarget); +} + + +static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget *Subtarget) { + EVT Ty = N->getValueType(0); + + if (((Ty != MVT::v2i16) || !Subtarget->hasDSPR2()) && (Ty != MVT::v4i8)) + return SDValue(); + + return performDSPShiftCombine(MipsISD::SHRL_DSP, N, Ty, DAG, Subtarget); +} + +static bool isLegalDSPCondCode(EVT Ty, ISD::CondCode CC) { + bool IsV216 = (Ty == MVT::v2i16); + + switch (CC) { + case ISD::SETEQ: + case ISD::SETNE: return true; + case ISD::SETLT: + case ISD::SETLE: + case ISD::SETGT: + case ISD::SETGE: return IsV216; + case ISD::SETULT: + case ISD::SETULE: + case ISD::SETUGT: + case ISD::SETUGE: return !IsV216; + default: return false; + } +} + +static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) { + EVT Ty = N->getValueType(0); + + if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) + return SDValue(); + + if (!isLegalDSPCondCode(Ty, cast<CondCodeSDNode>(N->getOperand(2))->get())) + return SDValue(); + + return DAG.getNode(MipsISD::SETCC_DSP, N->getDebugLoc(), Ty, N->getOperand(0), + N->getOperand(1), N->getOperand(2)); +} + +static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) { + EVT Ty = N->getValueType(0); + + if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) + return SDValue(); + + SDValue SetCC = N->getOperand(0); + + if (SetCC.getOpcode() != MipsISD::SETCC_DSP) + return SDValue(); + + return DAG.getNode(MipsISD::SELECT_CC_DSP, N->getDebugLoc(), Ty, + SetCC.getOperand(0), SetCC.getOperand(1), N->getOperand(1), + N->getOperand(2), SetCC.getOperand(2)); +} + SDValue MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; + SDValue Val; switch (N->getOpcode()) { case ISD::ADDE: return performADDECombine(N, DAG, DCI, Subtarget); case ISD::SUBE: return performSUBECombine(N, DAG, DCI, Subtarget); - default: - return MipsTargetLowering::PerformDAGCombine(N, DCI); + case ISD::SHL: + return performSHLCombine(N, DAG, DCI, Subtarget); + case ISD::SRA: + return performSRACombine(N, DAG, DCI, Subtarget); + case ISD::SRL: + return performSRLCombine(N, DAG, DCI, Subtarget); + case ISD::VSELECT: + return performVSELECTCombine(N, DAG); + case ISD::SETCC: { + Val = performSETCCCombine(N, DAG); + break; } + } + + if (Val.getNode()) + return Val; + + return MipsTargetLowering::PerformDAGCombine(N, DCI); } MachineBasicBlock * @@ -378,6 +519,171 @@ SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc, return DAG.getMergeValues(Vals, 2, DL); } + +static SDValue initAccumulator(SDValue In, DebugLoc DL, SelectionDAG &DAG) { + SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In, + DAG.getConstant(0, MVT::i32)); + SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In, + DAG.getConstant(1, MVT::i32)); + return DAG.getNode(MipsISD::InsertLOHI, DL, MVT::Untyped, InLo, InHi); +} + +static SDValue extractLOHI(SDValue Op, DebugLoc DL, SelectionDAG &DAG) { + SDValue Lo = DAG.getNode(MipsISD::ExtractLOHI, DL, MVT::i32, Op, + DAG.getConstant(Mips::sub_lo, MVT::i32)); + SDValue Hi = DAG.getNode(MipsISD::ExtractLOHI, DL, MVT::i32, Op, + DAG.getConstant(Mips::sub_hi, MVT::i32)); + return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi); +} + +// This function expands mips intrinsic nodes which have 64-bit input operands +// or output values. +// +// out64 = intrinsic-node in64 +// => +// lo = copy (extract-element (in64, 0)) +// hi = copy (extract-element (in64, 1)) +// mips-specific-node +// v0 = copy lo +// v1 = copy hi +// out64 = merge-values (v0, v1) +// +static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { + DebugLoc DL = Op.getDebugLoc(); + bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other; + SmallVector<SDValue, 3> Ops; + unsigned OpNo = 0; + + // See if Op has a chain input. + if (HasChainIn) + Ops.push_back(Op->getOperand(OpNo++)); + + // The next operand is the intrinsic opcode. + assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant); + + // See if the next operand has type i64. + SDValue Opnd = Op->getOperand(++OpNo), In64; + + if (Opnd.getValueType() == MVT::i64) + In64 = initAccumulator(Opnd, DL, DAG); + else + Ops.push_back(Opnd); + + // Push the remaining operands. + for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo) + Ops.push_back(Op->getOperand(OpNo)); + + // Add In64 to the end of the list. + if (In64.getNode()) + Ops.push_back(In64); + + // Scan output. + SmallVector<EVT, 2> ResTys; + + for (SDNode::value_iterator I = Op->value_begin(), E = Op->value_end(); + I != E; ++I) + ResTys.push_back((*I == MVT::i64) ? MVT::Untyped : *I); + + // Create node. + SDValue Val = DAG.getNode(Opc, DL, ResTys, &Ops[0], Ops.size()); + SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val; + + if (!HasChainIn) + return Out; + + assert(Val->getValueType(1) == MVT::Other); + SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) }; + return DAG.getMergeValues(Vals, 2, DL); +} + +SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + switch (cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue()) { + default: + return SDValue(); + case Intrinsic::mips_shilo: + return lowerDSPIntr(Op, DAG, MipsISD::SHILO); + case Intrinsic::mips_dpau_h_qbl: + return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL); + case Intrinsic::mips_dpau_h_qbr: + return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR); + case Intrinsic::mips_dpsu_h_qbl: + return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL); + case Intrinsic::mips_dpsu_h_qbr: + return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR); + case Intrinsic::mips_dpa_w_ph: + return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH); + case Intrinsic::mips_dps_w_ph: + return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH); + case Intrinsic::mips_dpax_w_ph: + return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH); + case Intrinsic::mips_dpsx_w_ph: + return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH); + case Intrinsic::mips_mulsa_w_ph: + return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH); + case Intrinsic::mips_mult: + return lowerDSPIntr(Op, DAG, MipsISD::Mult); + case Intrinsic::mips_multu: + return lowerDSPIntr(Op, DAG, MipsISD::Multu); + case Intrinsic::mips_madd: + return lowerDSPIntr(Op, DAG, MipsISD::MAdd); + case Intrinsic::mips_maddu: + return lowerDSPIntr(Op, DAG, MipsISD::MAddu); + case Intrinsic::mips_msub: + return lowerDSPIntr(Op, DAG, MipsISD::MSub); + case Intrinsic::mips_msubu: + return lowerDSPIntr(Op, DAG, MipsISD::MSubu); + } +} + +SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + switch (cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue()) { + default: + return SDValue(); + case Intrinsic::mips_extp: + return lowerDSPIntr(Op, DAG, MipsISD::EXTP); + case Intrinsic::mips_extpdp: + return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP); + case Intrinsic::mips_extr_w: + return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W); + case Intrinsic::mips_extr_r_w: + return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W); + case Intrinsic::mips_extr_rs_w: + return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W); + case Intrinsic::mips_extr_s_h: + return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H); + case Intrinsic::mips_mthlip: + return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP); + case Intrinsic::mips_mulsaq_s_w_ph: + return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH); + case Intrinsic::mips_maq_s_w_phl: + return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL); + case Intrinsic::mips_maq_s_w_phr: + return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR); + case Intrinsic::mips_maq_sa_w_phl: + return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL); + case Intrinsic::mips_maq_sa_w_phr: + return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR); + case Intrinsic::mips_dpaq_s_w_ph: + return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH); + case Intrinsic::mips_dpsq_s_w_ph: + return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH); + case Intrinsic::mips_dpaq_sa_l_w: + return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W); + case Intrinsic::mips_dpsq_sa_l_w: + return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W); + case Intrinsic::mips_dpaqx_s_w_ph: + return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH); + case Intrinsic::mips_dpaqx_sa_w_ph: + return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH); + case Intrinsic::mips_dpsqx_s_w_ph: + return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH); + case Intrinsic::mips_dpsqx_sa_w_ph: + return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH); + } +} + MachineBasicBlock * MipsSETargetLowering:: emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{ // $bb: diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.h b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.h index 186f6a3..ec8a5c7 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.h +++ b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.h @@ -31,6 +31,11 @@ namespace llvm { virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; + virtual bool isShuffleMaskLegal(const SmallVectorImpl<int> &Mask, + EVT VT) const { + return false; + } + virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const { if (VT == MVT::Untyped) return Subtarget->hasDSP() ? &Mips::ACRegsDSPRegClass : @@ -54,6 +59,9 @@ namespace llvm { SDValue lowerMulDiv(SDValue Op, unsigned NewOpc, bool HasLo, bool HasHi, SelectionDAG &DAG) const; + SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; + MachineBasicBlock *emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const; }; diff --git a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp index ca0315e..a0768e5 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -95,20 +95,39 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = Mips::CFC1; else if (Mips::FGR32RegClass.contains(SrcReg)) Opc = Mips::MFC1; - else if (SrcReg == Mips::HI) + else if (Mips::HIRegsRegClass.contains(SrcReg)) Opc = Mips::MFHI, SrcReg = 0; - else if (SrcReg == Mips::LO) + else if (Mips::LORegsRegClass.contains(SrcReg)) Opc = Mips::MFLO, SrcReg = 0; + else if (Mips::HIRegsDSPRegClass.contains(SrcReg)) + Opc = Mips::MFHI_DSP; + else if (Mips::LORegsDSPRegClass.contains(SrcReg)) + Opc = Mips::MFLO_DSP; + else if (Mips::DSPCCRegClass.contains(SrcReg)) { + BuildMI(MBB, I, DL, get(Mips::RDDSP), DestReg).addImm(1 << 4) + .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); + return; + } } else if (Mips::CPURegsRegClass.contains(SrcReg)) { // Copy from CPU Reg. if (Mips::CCRRegClass.contains(DestReg)) Opc = Mips::CTC1; else if (Mips::FGR32RegClass.contains(DestReg)) Opc = Mips::MTC1; - else if (DestReg == Mips::HI) + else if (Mips::HIRegsRegClass.contains(DestReg)) Opc = Mips::MTHI, DestReg = 0; - else if (DestReg == Mips::LO) + else if (Mips::LORegsRegClass.contains(DestReg)) Opc = Mips::MTLO, DestReg = 0; + else if (Mips::HIRegsDSPRegClass.contains(DestReg)) + Opc = Mips::MTHI_DSP; + else if (Mips::LORegsDSPRegClass.contains(DestReg)) + Opc = Mips::MTLO_DSP; + else if (Mips::DSPCCRegClass.contains(DestReg)) { + BuildMI(MBB, I, DL, get(Mips::WRDSP)) + .addReg(SrcReg, getKillRegState(KillSrc)).addImm(1 << 4) + .addReg(DestReg, RegState::ImplicitDefine); + return; + } } else if (Mips::FGR32RegClass.contains(DestReg, SrcReg)) Opc = Mips::FMOV_S; @@ -121,27 +140,21 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB, else if (Mips::CPU64RegsRegClass.contains(DestReg)) { // Copy to CPU64 Reg. if (Mips::CPU64RegsRegClass.contains(SrcReg)) Opc = Mips::OR64, ZeroReg = Mips::ZERO_64; - else if (SrcReg == Mips::HI64) + else if (Mips::HIRegs64RegClass.contains(SrcReg)) Opc = Mips::MFHI64, SrcReg = 0; - else if (SrcReg == Mips::LO64) + else if (Mips::LORegs64RegClass.contains(SrcReg)) Opc = Mips::MFLO64, SrcReg = 0; else if (Mips::FGR64RegClass.contains(SrcReg)) Opc = Mips::DMFC1; } else if (Mips::CPU64RegsRegClass.contains(SrcReg)) { // Copy from CPU64 Reg. - if (DestReg == Mips::HI64) + if (Mips::HIRegs64RegClass.contains(DestReg)) Opc = Mips::MTHI64, DestReg = 0; - else if (DestReg == Mips::LO64) + else if (Mips::LORegs64RegClass.contains(DestReg)) Opc = Mips::MTLO64, DestReg = 0; else if (Mips::FGR64RegClass.contains(DestReg)) Opc = Mips::DMTC1; } - else if (Mips::ACRegsRegClass.contains(DestReg, SrcReg)) - Opc = Mips::COPY_AC64; - else if (Mips::ACRegsDSPRegClass.contains(DestReg, SrcReg)) - Opc = Mips::COPY_AC_DSP; - else if (Mips::ACRegs128RegClass.contains(DestReg, SrcReg)) - Opc = Mips::COPY_AC128; assert(Opc && "Cannot copy registers"); @@ -178,6 +191,8 @@ storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Opc = IsN64 ? Mips::STORE_AC_DSP_P8 : Mips::STORE_AC_DSP; else if (Mips::ACRegs128RegClass.hasSubClassEq(RC)) Opc = IsN64 ? Mips::STORE_AC128_P8 : Mips::STORE_AC128; + else if (Mips::DSPCCRegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::STORE_CCOND_DSP_P8 : Mips::STORE_CCOND_DSP; else if (Mips::FGR32RegClass.hasSubClassEq(RC)) Opc = IsN64 ? Mips::SWC1_P8 : Mips::SWC1; else if (Mips::AFGR64RegClass.hasSubClassEq(RC)) @@ -209,6 +224,8 @@ loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Opc = IsN64 ? Mips::LOAD_AC_DSP_P8 : Mips::LOAD_AC_DSP; else if (Mips::ACRegs128RegClass.hasSubClassEq(RC)) Opc = IsN64 ? Mips::LOAD_AC128_P8 : Mips::LOAD_AC128; + else if (Mips::DSPCCRegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::LOAD_CCOND_DSP_P8 : Mips::LOAD_CCOND_DSP; else if (Mips::FGR32RegClass.hasSubClassEq(RC)) Opc = IsN64 ? Mips::LWC1_P8 : Mips::LWC1; else if (Mips::AFGR64RegClass.hasSubClassEq(RC)) diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp b/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp index e11e5d1..14a2b27 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp @@ -11,29 +11,56 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "mips-subtarget" + +#include "MipsMachineFunction.h" #include "MipsSubtarget.h" +#include "MipsTargetMachine.h" #include "Mips.h" #include "MipsRegisterInfo.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR #include "MipsGenSubtargetInfo.inc" + using namespace llvm; +// FIXME: Maybe this should be on by default when Mips16 is specified +// +static cl::opt<bool> Mixed16_32( + "mips-mixed-16-32", + cl::init(false), + cl::desc("Allow for a mixture of Mips16 " + "and Mips32 code in a single source file"), + cl::Hidden); + +static cl::opt<bool> Mips_Os16( + "mips-os16", + cl::init(false), + cl::desc("Compile all functions that don' use " + "floating point as Mips 16"), + cl::Hidden); + void MipsSubtarget::anchor() { } MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS, bool little, - Reloc::Model _RM) : + Reloc::Model _RM, MipsTargetMachine *_TM) : MipsGenSubtargetInfo(TT, CPU, FS), MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little), IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false), IsLinux(true), HasSEInReg(false), HasCondMov(false), HasSwap(false), HasBitCount(false), HasFPIdx(false), InMips16Mode(false), InMicroMipsMode(false), HasDSP(false), HasDSPR2(false), - RM(_RM) + AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16), + RM(_RM), OverrideMode(NoOverride), TM(_TM) { std::string CPUName = CPU; if (CPUName.empty()) @@ -42,6 +69,8 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, // Parse features string. ParseSubtargetFeatures(CPUName, FS); + PreviousInMips16Mode = InMips16Mode; + // Initialize scheduling itinerary for the specified CPU. InstrItins = getInstrItineraryForCPU(CPUName); @@ -72,3 +101,48 @@ MipsSubtarget::enablePostRAScheduler(CodeGenOpt::Level OptLevel, &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass); return OptLevel >= CodeGenOpt::Aggressive; } + +//FIXME: This logic for reseting the subtarget along with +// the helper classes can probably be simplified but there are a lot of +// cases so we will defer rewriting this to later. +// +void MipsSubtarget::resetSubtarget(MachineFunction *MF) { + bool ChangeToMips16 = false, ChangeToNoMips16 = false; + DEBUG(dbgs() << "resetSubtargetFeatures" << "\n"); + AttributeSet FnAttrs = MF->getFunction()->getAttributes(); + ChangeToMips16 = FnAttrs.hasAttribute(AttributeSet::FunctionIndex, + "mips16"); + ChangeToNoMips16 = FnAttrs.hasAttribute(AttributeSet::FunctionIndex, + "nomips16"); + assert (!(ChangeToMips16 & ChangeToNoMips16) && + "mips16 and nomips16 specified on the same function"); + if (ChangeToMips16) { + if (PreviousInMips16Mode) + return; + OverrideMode = Mips16Override; + PreviousInMips16Mode = true; + TM->setHelperClassesMips16(); + return; + } else if (ChangeToNoMips16) { + if (!PreviousInMips16Mode) + return; + OverrideMode = NoMips16Override; + PreviousInMips16Mode = false; + TM->setHelperClassesMipsSE(); + return; + } else { + if (OverrideMode == NoOverride) + return; + OverrideMode = NoOverride; + DEBUG(dbgs() << "back to default" << "\n"); + if (inMips16Mode() && !PreviousInMips16Mode) { + TM->setHelperClassesMips16(); + PreviousInMips16Mode = true; + } else if (!inMips16Mode() && PreviousInMips16Mode) { + TM->setHelperClassesMipsSE(); + PreviousInMips16Mode = false; + } + return; + } +} + diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h index 7a2e47c..f2f0e15 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h +++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h @@ -16,7 +16,9 @@ #include "MCTargetDesc/MipsReginfo.h" #include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetSubtargetInfo.h" + #include <string> #define GET_SUBTARGETINFO_HEADER @@ -25,6 +27,8 @@ namespace llvm { class StringRef; +class MipsTargetMachine; + class MipsSubtarget : public MipsGenSubtargetInfo { virtual void anchor(); @@ -89,12 +93,23 @@ protected: // InMips16 -- can process Mips16 instructions bool InMips16Mode; + // PreviousInMips16 -- the function we just processed was in Mips 16 Mode + bool PreviousInMips16Mode; + // InMicroMips -- can process MicroMips instructions bool InMicroMipsMode; // HasDSP, HasDSPR2 -- supports DSP ASE. bool HasDSP, HasDSPR2; + // Allow mixed Mips16 and Mips32 in one source file + bool AllowMixed16_32; + + // Optimize for space by compiling all functions as Mips 16 unless + // it needs floating point. Functions needing floating point are + // compiled as Mips32 + bool Os16; + InstrItineraryData InstrItins; // The instance to the register info section object @@ -103,6 +118,12 @@ protected: // Relocation Model Reloc::Model RM; + // We can override the determination of whether we are in mips16 mode + // as from the command line + enum {NoOverride, Mips16Override, NoMips16Override} OverrideMode; + + MipsTargetMachine *TM; + public: virtual bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, AntiDepBreakMode& Mode, @@ -118,7 +139,8 @@ public: /// This constructor initializes the data members to match that /// of the specified triple. MipsSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, bool little, Reloc::Model RM); + const std::string &FS, bool little, Reloc::Model RM, + MipsTargetMachine *TM); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. @@ -137,7 +159,20 @@ public: bool isSingleFloat() const { return IsSingleFloat; } bool isNotSingleFloat() const { return !IsSingleFloat; } bool hasVFPU() const { return HasVFPU; } - bool inMips16Mode() const { return InMips16Mode; } + bool inMips16Mode() const { + switch (OverrideMode) { + case NoOverride: + return InMips16Mode; + case Mips16Override: + return true; + case NoMips16Override: + return false; + } + llvm_unreachable("Unexpected mode"); + } + bool inMips16ModeDefault() { + return InMips16Mode; + } bool inMicroMipsMode() const { return InMicroMipsMode; } bool hasDSP() const { return HasDSP; } bool hasDSPR2() const { return HasDSPR2; } @@ -153,11 +188,20 @@ public: bool hasBitCount() const { return HasBitCount; } bool hasFPIdx() const { return HasFPIdx; } + bool allowMixed16_32() const { return AllowMixed16_32;}; + + bool os16() const { return Os16;}; + // Grab MipsRegInfo object const MipsReginfo &getMReginfo() const { return MRI; } // Grab relocation model Reloc::Model getRelocationModel() const {return RM;} + + /// \brief Reset the subtarget for the Mips target. + void resetSubtarget(MachineFunction *MF); + + }; } // End llvm namespace diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp index 3336358..ee28e2a 100644 --- a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp @@ -15,11 +15,26 @@ #include "Mips.h" #include "MipsFrameLowering.h" #include "MipsInstrInfo.h" +#include "MipsModuleISelDAGToDAG.h" +#include "MipsOs16.h" +#include "MipsSEFrameLowering.h" +#include "MipsSEInstrInfo.h" +#include "MipsSEISelLowering.h" +#include "MipsSEISelDAGToDAG.h" +#include "Mips16FrameLowering.h" +#include "Mips16InstrInfo.h" +#include "Mips16ISelDAGToDAG.h" +#include "Mips16ISelLowering.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/PassManager.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; + + extern "C" void LLVMInitializeMipsTarget() { // Register the target. RegisterTargetMachine<MipsebTargetMachine> X(TheMipsTarget); @@ -42,7 +57,7 @@ MipsTargetMachine(const Target &T, StringRef TT, CodeGenOpt::Level OL, bool isLittle) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS, isLittle, RM), + Subtarget(TT, CPU, FS, isLittle, RM, this), DL(isLittle ? (Subtarget.isABI_N64() ? "e-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-" @@ -54,9 +69,46 @@ MipsTargetMachine(const Target &T, StringRef TT, "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32-S64")), InstrInfo(MipsInstrInfo::create(*this)), FrameLowering(MipsFrameLowering::create(*this, Subtarget)), - TLInfo(MipsTargetLowering::create(*this)), TSInfo(*this), JITInfo() { + TLInfo(MipsTargetLowering::create(*this)), + TSInfo(*this), JITInfo() { +} + + +void MipsTargetMachine::setHelperClassesMips16() { + InstrInfoSE.swap(InstrInfo); + FrameLoweringSE.swap(FrameLowering); + TLInfoSE.swap(TLInfo); + if (!InstrInfo16) { + InstrInfo.reset(MipsInstrInfo::create(*this)); + FrameLowering.reset(MipsFrameLowering::create(*this, Subtarget)); + TLInfo.reset(MipsTargetLowering::create(*this)); + } else { + InstrInfo16.swap(InstrInfo); + FrameLowering16.swap(FrameLowering); + TLInfo16.swap(TLInfo); + } + assert(TLInfo && "null target lowering 16"); + assert(InstrInfo && "null instr info 16"); + assert(FrameLowering && "null frame lowering 16"); } +void MipsTargetMachine::setHelperClassesMipsSE() { + InstrInfo16.swap(InstrInfo); + FrameLowering16.swap(FrameLowering); + TLInfo16.swap(TLInfo); + if (!InstrInfoSE) { + InstrInfo.reset(MipsInstrInfo::create(*this)); + FrameLowering.reset(MipsFrameLowering::create(*this, Subtarget)); + TLInfo.reset(MipsTargetLowering::create(*this)); + } else { + InstrInfoSE.swap(InstrInfo); + FrameLoweringSE.swap(FrameLowering); + TLInfoSE.swap(TLInfo); + } + assert(TLInfo && "null target lowering in SE"); + assert(InstrInfo && "null instr info SE"); + assert(FrameLowering && "null frame lowering SE"); +} void MipsebTargetMachine::anchor() { } MipsebTargetMachine:: @@ -90,6 +142,7 @@ public: return *getMipsTargetMachine().getSubtargetImpl(); } + virtual void addIRPasses(); virtual bool addInstSelector(); virtual bool addPreEmitPass(); }; @@ -99,24 +152,50 @@ TargetPassConfig *MipsTargetMachine::createPassConfig(PassManagerBase &PM) { return new MipsPassConfig(this, PM); } +void MipsPassConfig::addIRPasses() { + TargetPassConfig::addIRPasses(); + if (getMipsSubtarget().os16()) + addPass(createMipsOs16(getMipsTargetMachine())); +} // Install an instruction selector pass using // the ISelDag to gen Mips code. bool MipsPassConfig::addInstSelector() { - addPass(createMipsISelDag(getMipsTargetMachine())); + if (getMipsSubtarget().allowMixed16_32()) { + addPass(createMipsModuleISelDag(getMipsTargetMachine())); + addPass(createMips16ISelDag(getMipsTargetMachine())); + addPass(createMipsSEISelDag(getMipsTargetMachine())); + } else { + addPass(createMipsISelDag(getMipsTargetMachine())); + } return false; } +void MipsTargetMachine::addAnalysisPasses(PassManagerBase &PM) { + if (Subtarget.allowMixed16_32()) { + DEBUG(errs() << "No "); + //FIXME: The Basic Target Transform Info + // pass needs to become a function pass instead of + // being an immutable pass and then this method as it exists now + // would be unnecessary. + PM.add(createNoTargetTransformInfoPass()); + } else + LLVMTargetMachine::addAnalysisPasses(PM); + DEBUG(errs() << "Target Transform Info Pass Added\n"); +} + // Implemented by targets that want to run passes immediately before // machine code is emitted. return true if -print-machineinstrs should // print out the code after the passes. bool MipsPassConfig::addPreEmitPass() { MipsTargetMachine &TM = getMipsTargetMachine(); + const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>(); addPass(createMipsDelaySlotFillerPass(TM)); - // NOTE: long branch has not been implemented for mips16. - if (TM.getSubtarget<MipsSubtarget>().hasStandardEncoding()) + if (Subtarget.hasStandardEncoding() || + Subtarget.allowMixed16_32()) addPass(createMipsLongBranchPass(TM)); - if (TM.getSubtarget<MipsSubtarget>().inMips16Mode()) + if (Subtarget.inMips16Mode() || + Subtarget.allowMixed16_32()) addPass(createMipsConstantIslandPass(TM)); return true; diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h index 7e5f192..ee55708 100644 --- a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h +++ b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h @@ -21,6 +21,8 @@ #include "MipsSelectionDAGInfo.h" #include "MipsSubtarget.h" #include "llvm/ADT/OwningPtr.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" @@ -35,6 +37,12 @@ class MipsTargetMachine : public LLVMTargetMachine { OwningPtr<const MipsInstrInfo> InstrInfo; OwningPtr<const MipsFrameLowering> FrameLowering; OwningPtr<const MipsTargetLowering> TLInfo; + OwningPtr<const MipsInstrInfo> InstrInfo16; + OwningPtr<const MipsFrameLowering> FrameLowering16; + OwningPtr<const MipsTargetLowering> TLInfo16; + OwningPtr<const MipsInstrInfo> InstrInfoSE; + OwningPtr<const MipsFrameLowering> FrameLoweringSE; + OwningPtr<const MipsTargetLowering> TLInfoSE; MipsSelectionDAGInfo TSInfo; MipsJITInfo JITInfo; @@ -47,6 +55,8 @@ public: virtual ~MipsTargetMachine() {} + virtual void addAnalysisPasses(PassManagerBase &PM); + virtual const MipsInstrInfo *getInstrInfo() const { return InstrInfo.get(); } virtual const TargetFrameLowering *getFrameLowering() const @@ -73,6 +83,13 @@ public: // Pass Pipeline Configuration virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE); + + // Set helper classes + void setHelperClassesMips16(); + + void setHelperClassesMipsSE(); + + }; /// MipsebTargetMachine - Mips32/64 big endian target machine. diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTX.h b/contrib/llvm/lib/Target/NVPTX/NVPTX.h index 6a53a44..072c65d 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTX.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTX.h @@ -16,6 +16,7 @@ #define LLVM_TARGET_NVPTX_H #include "MCTargetDesc/NVPTXBaseInfo.h" +#include "llvm/ADT/StringMap.h" #include "llvm/IR/Module.h" #include "llvm/IR/Value.h" #include "llvm/Support/ErrorHandling.h" @@ -62,6 +63,9 @@ createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel); FunctionPass *createLowerStructArgsPass(NVPTXTargetMachine &); FunctionPass *createNVPTXReMatPass(NVPTXTargetMachine &); FunctionPass *createNVPTXReMatBlockPass(NVPTXTargetMachine &); +ModulePass *createGenericToNVVMPass(); +ModulePass *createNVVMReflectPass(); +ModulePass *createNVVMReflectPass(const StringMap<int>& Mapping); bool isImageOrSamplerVal(const Value *, const Module *); diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index ce5d78a..229e4e5 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -68,11 +68,12 @@ InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore, namespace { /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V /// depends. -void DiscoverDependentGlobals(Value *V, DenseSet<GlobalVariable *> &Globals) { - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) +void DiscoverDependentGlobals(const Value *V, + DenseSet<const GlobalVariable *> &Globals) { + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) Globals.insert(GV); else { - if (User *U = dyn_cast<User>(V)) { + if (const User *U = dyn_cast<User>(V)) { for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) { DiscoverDependentGlobals(U->getOperand(i), Globals); } @@ -84,8 +85,9 @@ void DiscoverDependentGlobals(Value *V, DenseSet<GlobalVariable *> &Globals) { /// instances to be emitted, but only after any dependents have been added /// first. void VisitGlobalVariableForEmission( - GlobalVariable *GV, SmallVectorImpl<GlobalVariable *> &Order, - DenseSet<GlobalVariable *> &Visited, DenseSet<GlobalVariable *> &Visiting) { + const GlobalVariable *GV, SmallVectorImpl<const GlobalVariable *> &Order, + DenseSet<const GlobalVariable *> &Visited, + DenseSet<const GlobalVariable *> &Visiting) { // Have we already visited this one? if (Visited.count(GV)) return; @@ -98,12 +100,12 @@ void VisitGlobalVariableForEmission( Visiting.insert(GV); // Make sure we visit all dependents first - DenseSet<GlobalVariable *> Others; + DenseSet<const GlobalVariable *> Others; for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i) DiscoverDependentGlobals(GV->getOperand(i), Others); - for (DenseSet<GlobalVariable *>::iterator I = Others.begin(), - E = Others.end(); + for (DenseSet<const GlobalVariable *>::iterator I = Others.begin(), + E = Others.end(); I != E; ++I) VisitGlobalVariableForEmission(*I, Order, Visited, Visiting); @@ -405,6 +407,11 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() { SmallString<128> Str; raw_svector_ostream O(Str); + if (!GlobalsEmitted) { + emitGlobals(*MF->getFunction()->getParent()); + GlobalsEmitted = true; + } + // Set up MRI = &MF->getRegInfo(); F = MF->getFunction(); @@ -695,7 +702,7 @@ void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) { else O << ".func "; printReturnValStr(F, O); - O << *CurrentFnSym << "\n"; + O << *Mang->getSymbol(F) << "\n"; emitFunctionParamList(F, O); O << ";\n"; } @@ -795,7 +802,7 @@ static bool useFuncSeen(const Constant *C, return false; } -void NVPTXAsmPrinter::emitDeclarations(Module &M, raw_ostream &O) { +void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) { llvm::DenseMap<const Function *, bool> seenMap; for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) { const Function *F = FI; @@ -805,7 +812,6 @@ void NVPTXAsmPrinter::emitDeclarations(Module &M, raw_ostream &O) { continue; if (F->getIntrinsicID()) continue; - CurrentFnSym = Mang->getSymbol(F); emitDeclaration(F, O); continue; } @@ -817,14 +823,12 @@ void NVPTXAsmPrinter::emitDeclarations(Module &M, raw_ostream &O) { // The use is in the initialization of a global variable // that is a function pointer, so print a declaration // for the original function - CurrentFnSym = Mang->getSymbol(F); emitDeclaration(F, O); break; } // Emit a declaration of this function if the function that // uses this constant expr has already been seen. if (useFuncSeen(C, seenMap)) { - CurrentFnSym = Mang->getSymbol(F); emitDeclaration(F, O); break; } @@ -844,7 +848,6 @@ void NVPTXAsmPrinter::emitDeclarations(Module &M, raw_ostream &O) { // appearing in the module before the callee. so print out // a declaration for the callee. if (seenMap.find(caller) != seenMap.end()) { - CurrentFnSym = Mang->getSymbol(F); emitDeclaration(F, O); break; } @@ -921,6 +924,12 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) { if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) recordAndEmitFilenames(M); + GlobalsEmitted = false; + + return false; // success +} + +void NVPTXAsmPrinter::emitGlobals(const Module &M) { SmallString<128> Str2; raw_svector_ostream OS2(Str2); @@ -931,13 +940,13 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) { // global variable in order, and ensure that we emit it *after* its dependent // globals. We use a little extra memory maintaining both a set and a list to // have fast searches while maintaining a strict ordering. - SmallVector<GlobalVariable *, 8> Globals; - DenseSet<GlobalVariable *> GVVisited; - DenseSet<GlobalVariable *> GVVisiting; + SmallVector<const GlobalVariable *, 8> Globals; + DenseSet<const GlobalVariable *> GVVisited; + DenseSet<const GlobalVariable *> GVVisiting; // Visit each global variable, in order - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; - ++I) + for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting); assert(GVVisited.size() == M.getGlobalList().size() && @@ -951,7 +960,6 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) { OS2 << '\n'; OutStreamer.EmitRawText(OS2.str()); - return false; // success } void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) { @@ -989,6 +997,14 @@ void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) { } bool NVPTXAsmPrinter::doFinalization(Module &M) { + + // If we did not emit any functions, then the global declarations have not + // yet been emitted. + if (!GlobalsEmitted) { + emitGlobals(M); + GlobalsEmitted = true; + } + // XXX Temproarily remove global variables so that doFinalization() will not // emit them again (global variables are emitted at beginning). @@ -1063,7 +1079,8 @@ void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V, } } -void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O, +void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, + raw_ostream &O, bool processDemoted) { // Skip meta data @@ -1107,10 +1124,10 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O, if (llvm::isSampler(*GVar)) { O << ".global .samplerref " << llvm::getSamplerName(*GVar); - Constant *Initializer = NULL; + const Constant *Initializer = NULL; if (GVar->hasInitializer()) Initializer = GVar->getInitializer(); - ConstantInt *CI = NULL; + const ConstantInt *CI = NULL; if (Initializer) CI = dyn_cast<ConstantInt>(Initializer); if (CI) { @@ -1183,7 +1200,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O, if (localDecls.find(demotedFunc) != localDecls.end()) localDecls[demotedFunc].push_back(GVar); else { - std::vector<GlobalVariable *> temp; + std::vector<const GlobalVariable *> temp; temp.push_back(GVar); localDecls[demotedFunc] = temp; } @@ -1199,7 +1216,11 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O, if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) { O << " ."; - O << getPTXFundamentalTypeStr(ETy, false); + // Special case: ABI requires that we use .u8 for predicates + if (ETy->isIntegerTy(1)) + O << "u8"; + else + O << getPTXFundamentalTypeStr(ETy, false); O << " "; O << *Mang->getSymbol(GVar); @@ -1209,7 +1230,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O, (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) || (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) && GVar->hasInitializer()) { - Constant *Initializer = GVar->getInitializer(); + const Constant *Initializer = GVar->getInitializer(); if (!Initializer->isNullValue()) { O << " = "; printScalarConstant(Initializer, O); @@ -1233,7 +1254,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O, (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) || (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) && GVar->hasInitializer()) { - Constant *Initializer = GVar->getInitializer(); + const Constant *Initializer = GVar->getInitializer(); if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) { AggBuffer aggBuffer(ElementSize, O, *this); bufferAggregateConstant(Initializer, &aggBuffer); @@ -1283,7 +1304,7 @@ void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) { if (localDecls.find(f) == localDecls.end()) return; - std::vector<GlobalVariable *> &gvars = localDecls[f]; + std::vector<const GlobalVariable *> &gvars = localDecls[f]; for (unsigned i = 0, e = gvars.size(); i != e; ++i) { O << "\t// demoted variable\n\t"; @@ -1448,7 +1469,7 @@ void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I, int paramIndex, raw_ostream &O) { if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) || (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) - O << *CurrentFnSym << "_param_" << paramIndex; + O << *Mang->getSymbol(I->getParent()) << "_param_" << paramIndex; else { std::string argName = I->getName(); const char *p = argName.c_str(); @@ -1507,11 +1528,13 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { if (llvm::isImage(*I)) { std::string sname = I->getName(); if (llvm::isImageWriteOnly(*I)) - O << "\t.param .surfref " << *CurrentFnSym << "_param_" << paramIndex; + O << "\t.param .surfref " << *Mang->getSymbol(F) << "_param_" + << paramIndex; else // Default image is read_only - O << "\t.param .texref " << *CurrentFnSym << "_param_" << paramIndex; + O << "\t.param .texref " << *Mang->getSymbol(F) << "_param_" + << paramIndex; } else // Should be llvm::isSampler(*I) - O << "\t.param .samplerref " << *CurrentFnSym << "_param_" + O << "\t.param .samplerref " << *Mang->getSymbol(F) << "_param_" << paramIndex; continue; } @@ -1564,7 +1587,13 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { } // non-pointer scalar to kernel func - O << "\t.param ." << getPTXFundamentalTypeStr(Ty) << " "; + O << "\t.param ."; + // Special case: predicate operands become .u8 types + if (Ty->isIntegerTy(1)) + O << "u8"; + else + O << getPTXFundamentalTypeStr(Ty); + O << " "; printParamName(I, paramIndex, O); continue; } @@ -1751,12 +1780,12 @@ void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) { O << utohexstr(API.getZExtValue()); } -void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) { - if (ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) { +void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) { + if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) { O << CI->getValue(); return; } - if (ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) { + if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) { printFPConstant(CFP, O); return; } @@ -1764,13 +1793,13 @@ void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) { O << "0"; return; } - if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { + if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { O << *Mang->getSymbol(GVar); return; } - if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { - Value *v = Cexpr->stripPointerCasts(); - if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) { + if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { + const Value *v = Cexpr->stripPointerCasts(); + if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) { O << *Mang->getSymbol(GVar); return; } else { @@ -1781,7 +1810,7 @@ void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) { llvm_unreachable("Not scalar type found in printScalarConstant()"); } -void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, +void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, AggBuffer *aggBuffer) { const DataLayout *TD = TM.getDataLayout(); @@ -1809,13 +1838,13 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, ptr = (unsigned char *)&int16; aggBuffer->addBytes(ptr, 2, Bytes); } else if (ETy == Type::getInt32Ty(CPV->getContext())) { - if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { + if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { int int32 = (int)(constInt->getZExtValue()); ptr = (unsigned char *)&int32; aggBuffer->addBytes(ptr, 4, Bytes); break; - } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { - if (ConstantInt *constInt = dyn_cast<ConstantInt>( + } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { + if (const ConstantInt *constInt = dyn_cast<ConstantInt>( ConstantFoldConstantExpression(Cexpr, TD))) { int int32 = (int)(constInt->getZExtValue()); ptr = (unsigned char *)&int32; @@ -1831,13 +1860,13 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, } llvm_unreachable("unsupported integer const type"); } else if (ETy == Type::getInt64Ty(CPV->getContext())) { - if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { + if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { long long int64 = (long long)(constInt->getZExtValue()); ptr = (unsigned char *)&int64; aggBuffer->addBytes(ptr, 8, Bytes); break; - } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { - if (ConstantInt *constInt = dyn_cast<ConstantInt>( + } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { + if (const ConstantInt *constInt = dyn_cast<ConstantInt>( ConstantFoldConstantExpression(Cexpr, TD))) { long long int64 = (long long)(constInt->getZExtValue()); ptr = (unsigned char *)&int64; @@ -1858,7 +1887,7 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, } case Type::FloatTyID: case Type::DoubleTyID: { - ConstantFP *CFP = dyn_cast<ConstantFP>(CPV); + const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV); const Type *Ty = CFP->getType(); if (Ty == Type::getFloatTy(CPV->getContext())) { float float32 = (float) CFP->getValueAPF().convertToFloat(); @@ -1874,10 +1903,10 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, break; } case Type::PointerTyID: { - if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { + if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { aggBuffer->addSymbol(GVar); - } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { - Value *v = Cexpr->stripPointerCasts(); + } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { + const Value *v = Cexpr->stripPointerCasts(); aggBuffer->addSymbol(v); } unsigned int s = TD->getTypeAllocSize(CPV->getType()); @@ -1906,7 +1935,7 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, } } -void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV, +void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV, AggBuffer *aggBuffer) { const DataLayout *TD = TM.getDataLayout(); int Bytes; diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h index 6dc9fc0..7faa6b2 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -91,7 +91,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { unsigned char *buffer; // the buffer unsigned numSymbols; // number of symbol addresses SmallVector<unsigned, 4> symbolPosInBuffer; - SmallVector<Value *, 4> Symbols; + SmallVector<const Value *, 4> Symbols; private: unsigned curpos; @@ -128,7 +128,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { } return curpos; } - void addSymbol(Value *GVar) { + void addSymbol(const Value *GVar) { symbolPosInBuffer.push_back(curpos); Symbols.push_back(GVar); numSymbols++; @@ -153,11 +153,11 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { if (pos) O << ", "; if (pos == nextSymbolPos) { - Value *v = Symbols[nSym]; - if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) { + const Value *v = Symbols[nSym]; + if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) { MCSymbol *Name = AP.Mang->getSymbol(GVar); O << *Name; - } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(v)) { + } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(v)) { O << *nvptx::LowerConstant(Cexpr, AP); } else llvm_unreachable("symbol type unknown"); @@ -205,10 +205,12 @@ private: void printImplicitDef(const MachineInstr *MI, raw_ostream &O) const; // definition autogenerated. void printInstruction(const MachineInstr *MI, raw_ostream &O); - void printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O, bool = false); + void printModuleLevelGV(const GlobalVariable *GVar, raw_ostream &O, + bool = false); void printParamName(int paramIndex, raw_ostream &O); void printParamName(Function::const_arg_iterator I, int paramIndex, raw_ostream &O); + void emitGlobals(const Module &M); void emitHeader(Module &M, raw_ostream &O); void emitKernelFunctionDirectives(const Function &F, raw_ostream &O) const; void emitVirtualRegister(unsigned int vr, bool isVec, raw_ostream &O); @@ -234,6 +236,8 @@ protected: private: std::string CurrentBankselLabelInBasicBlock; + bool GlobalsEmitted; + // This is specific per MachineFunction. const MachineRegisterInfo *MRI; // The contents are specific for each @@ -247,7 +251,7 @@ private: std::map<const Type *, std::string> TypeNameMap; // List of variables demoted to a function scope. - std::map<const Function *, std::vector<GlobalVariable *> > localDecls; + std::map<const Function *, std::vector<const GlobalVariable *> > localDecls; // To record filename to ID mapping std::map<std::string, unsigned> filenameMap; @@ -256,15 +260,15 @@ private: void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O); void emitPTXAddressSpace(unsigned int AddressSpace, raw_ostream &O) const; std::string getPTXFundamentalTypeStr(const Type *Ty, bool = true) const; - void printScalarConstant(Constant *CPV, raw_ostream &O); + void printScalarConstant(const Constant *CPV, raw_ostream &O); void printFPConstant(const ConstantFP *Fp, raw_ostream &O); - void bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer); - void bufferAggregateConstant(Constant *CV, AggBuffer *aggBuffer); + void bufferLEByte(const Constant *CPV, int Bytes, AggBuffer *aggBuffer); + void bufferAggregateConstant(const Constant *CV, AggBuffer *aggBuffer); void printOperandProper(const MachineOperand &MO); void emitLinkageDirective(const GlobalValue *V, raw_ostream &O); - void emitDeclarations(Module &, raw_ostream &O); + void emitDeclarations(const Module &, raw_ostream &O); void emitDeclaration(const Function *, raw_ostream &O); static const char *getRegisterName(unsigned RegNo); diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp new file mode 100644 index 0000000..1077c46 --- /dev/null +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp @@ -0,0 +1,436 @@ +//===-- GenericToNVVM.cpp - Convert generic module to NVVM module - C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Convert generic global variables into either .global or .const access based +// on the variable's "constant" qualifier. +// +//===----------------------------------------------------------------------===// + +#include "NVPTX.h" +#include "NVPTXUtilities.h" +#include "MCTargetDesc/NVPTXBaseInfo.h" + +#include "llvm/PassManager.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/ADT/ValueMap.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/IRBuilder.h" + +using namespace llvm; + +namespace llvm { +void initializeGenericToNVVMPass(PassRegistry &); +} + +namespace { +class GenericToNVVM : public ModulePass { +public: + static char ID; + + GenericToNVVM() : ModulePass(ID) {} + + virtual bool runOnModule(Module &M); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + } + +private: + Value *getOrInsertCVTA(Module *M, Function *F, GlobalVariable *GV, + IRBuilder<> &Builder); + Value *remapConstant(Module *M, Function *F, Constant *C, + IRBuilder<> &Builder); + Value *remapConstantVectorOrConstantAggregate(Module *M, Function *F, + Constant *C, + IRBuilder<> &Builder); + Value *remapConstantExpr(Module *M, Function *F, ConstantExpr *C, + IRBuilder<> &Builder); + void remapNamedMDNode(Module *M, NamedMDNode *N); + MDNode *remapMDNode(Module *M, MDNode *N); + + typedef ValueMap<GlobalVariable *, GlobalVariable *> GVMapTy; + typedef ValueMap<Constant *, Value *> ConstantToValueMapTy; + GVMapTy GVMap; + ConstantToValueMapTy ConstantToValueMap; +}; +} + +char GenericToNVVM::ID = 0; + +ModulePass *llvm::createGenericToNVVMPass() { return new GenericToNVVM(); } + +INITIALIZE_PASS( + GenericToNVVM, "generic-to-nvvm", + "Ensure that the global variables are in the global address space", false, + false) + +bool GenericToNVVM::runOnModule(Module &M) { + // Create a clone of each global variable that has the default address space. + // The clone is created with the global address space specifier, and the pair + // of original global variable and its clone is placed in the GVMap for later + // use. + + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E;) { + GlobalVariable *GV = I++; + if (GV->getType()->getAddressSpace() == llvm::ADDRESS_SPACE_GENERIC && + !llvm::isTexture(*GV) && !llvm::isSurface(*GV) && + !GV->getName().startswith("llvm.")) { + GlobalVariable *NewGV = new GlobalVariable( + M, GV->getType()->getElementType(), GV->isConstant(), + GV->getLinkage(), GV->hasInitializer() ? GV->getInitializer() : NULL, + "", GV, GV->getThreadLocalMode(), llvm::ADDRESS_SPACE_GLOBAL); + NewGV->copyAttributesFrom(GV); + GVMap[GV] = NewGV; + } + } + + // Return immediately, if every global variable has a specific address space + // specifier. + if (GVMap.empty()) { + return false; + } + + // Walk through the instructions in function defitinions, and replace any use + // of original global variables in GVMap with a use of the corresponding + // copies in GVMap. If necessary, promote constants to instructions. + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + if (I->isDeclaration()) { + continue; + } + IRBuilder<> Builder(I->getEntryBlock().getFirstNonPHIOrDbg()); + for (Function::iterator BBI = I->begin(), BBE = I->end(); BBI != BBE; + ++BBI) { + for (BasicBlock::iterator II = BBI->begin(), IE = BBI->end(); II != IE; + ++II) { + for (unsigned i = 0, e = II->getNumOperands(); i < e; ++i) { + Value *Operand = II->getOperand(i); + if (isa<Constant>(Operand)) { + II->setOperand( + i, remapConstant(&M, I, cast<Constant>(Operand), Builder)); + } + } + } + } + ConstantToValueMap.clear(); + } + + // Walk through the metadata section and update the debug information + // associated with the global variables in the default address space. + for (Module::named_metadata_iterator I = M.named_metadata_begin(), + E = M.named_metadata_end(); + I != E; I++) { + remapNamedMDNode(&M, I); + } + + // Walk through the global variable initializers, and replace any use of + // original global variables in GVMap with a use of the corresponding copies + // in GVMap. The copies need to be bitcast to the original global variable + // types, as we cannot use cvta in global variable initializers. + for (GVMapTy::iterator I = GVMap.begin(), E = GVMap.end(); I != E;) { + GlobalVariable *GV = I->first; + GlobalVariable *NewGV = I->second; + ++I; + Constant *BitCastNewGV = ConstantExpr::getBitCast(NewGV, GV->getType()); + // At this point, the remaining uses of GV should be found only in global + // variable initializers, as other uses have been already been removed + // while walking through the instructions in function definitions. + for (Value::use_iterator UI = GV->use_begin(), UE = GV->use_end(); + UI != UE;) { + Use &U = (UI++).getUse(); + U.set(BitCastNewGV); + } + std::string Name = GV->getName(); + GV->removeDeadConstantUsers(); + GV->eraseFromParent(); + NewGV->setName(Name); + } + GVMap.clear(); + + return true; +} + +Value *GenericToNVVM::getOrInsertCVTA(Module *M, Function *F, + GlobalVariable *GV, + IRBuilder<> &Builder) { + PointerType *GVType = GV->getType(); + Value *CVTA = NULL; + + // See if the address space conversion requires the operand to be bitcast + // to i8 addrspace(n)* first. + EVT ExtendedGVType = EVT::getEVT(GVType->getElementType(), true); + if (!ExtendedGVType.isInteger() && !ExtendedGVType.isFloatingPoint()) { + // A bitcast to i8 addrspace(n)* on the operand is needed. + LLVMContext &Context = M->getContext(); + unsigned int AddrSpace = GVType->getAddressSpace(); + Type *DestTy = PointerType::get(Type::getInt8Ty(Context), AddrSpace); + CVTA = Builder.CreateBitCast(GV, DestTy, "cvta"); + // Insert the address space conversion. + Type *ResultType = + PointerType::get(Type::getInt8Ty(Context), llvm::ADDRESS_SPACE_GENERIC); + SmallVector<Type *, 2> ParamTypes; + ParamTypes.push_back(ResultType); + ParamTypes.push_back(DestTy); + Function *CVTAFunction = Intrinsic::getDeclaration( + M, Intrinsic::nvvm_ptr_global_to_gen, ParamTypes); + CVTA = Builder.CreateCall(CVTAFunction, CVTA, "cvta"); + // Another bitcast from i8 * to <the element type of GVType> * is + // required. + DestTy = + PointerType::get(GVType->getElementType(), llvm::ADDRESS_SPACE_GENERIC); + CVTA = Builder.CreateBitCast(CVTA, DestTy, "cvta"); + } else { + // A simple CVTA is enough. + SmallVector<Type *, 2> ParamTypes; + ParamTypes.push_back(PointerType::get(GVType->getElementType(), + llvm::ADDRESS_SPACE_GENERIC)); + ParamTypes.push_back(GVType); + Function *CVTAFunction = Intrinsic::getDeclaration( + M, Intrinsic::nvvm_ptr_global_to_gen, ParamTypes); + CVTA = Builder.CreateCall(CVTAFunction, GV, "cvta"); + } + + return CVTA; +} + +Value *GenericToNVVM::remapConstant(Module *M, Function *F, Constant *C, + IRBuilder<> &Builder) { + // If the constant C has been converted already in the given function F, just + // return the converted value. + ConstantToValueMapTy::iterator CTII = ConstantToValueMap.find(C); + if (CTII != ConstantToValueMap.end()) { + return CTII->second; + } + + Value *NewValue = C; + if (isa<GlobalVariable>(C)) { + // If the constant C is a global variable and is found in GVMap, generate a + // set set of instructions that convert the clone of C with the global + // address space specifier to a generic pointer. + // The constant C cannot be used here, as it will be erased from the + // module eventually. And the clone of C with the global address space + // specifier cannot be used here either, as it will affect the types of + // other instructions in the function. Hence, this address space conversion + // is required. + GVMapTy::iterator I = GVMap.find(cast<GlobalVariable>(C)); + if (I != GVMap.end()) { + NewValue = getOrInsertCVTA(M, F, I->second, Builder); + } + } else if (isa<ConstantVector>(C) || isa<ConstantArray>(C) || + isa<ConstantStruct>(C)) { + // If any element in the constant vector or aggregate C is or uses a global + // variable in GVMap, the constant C needs to be reconstructed, using a set + // of instructions. + NewValue = remapConstantVectorOrConstantAggregate(M, F, C, Builder); + } else if (isa<ConstantExpr>(C)) { + // If any operand in the constant expression C is or uses a global variable + // in GVMap, the constant expression C needs to be reconstructed, using a + // set of instructions. + NewValue = remapConstantExpr(M, F, cast<ConstantExpr>(C), Builder); + } + + ConstantToValueMap[C] = NewValue; + return NewValue; +} + +Value *GenericToNVVM::remapConstantVectorOrConstantAggregate( + Module *M, Function *F, Constant *C, IRBuilder<> &Builder) { + bool OperandChanged = false; + SmallVector<Value *, 4> NewOperands; + unsigned NumOperands = C->getNumOperands(); + + // Check if any element is or uses a global variable in GVMap, and thus + // converted to another value. + for (unsigned i = 0; i < NumOperands; ++i) { + Value *Operand = C->getOperand(i); + Value *NewOperand = remapConstant(M, F, cast<Constant>(Operand), Builder); + OperandChanged |= Operand != NewOperand; + NewOperands.push_back(NewOperand); + } + + // If none of the elements has been modified, return C as it is. + if (!OperandChanged) { + return C; + } + + // If any of the elements has been modified, construct the equivalent + // vector or aggregate value with a set instructions and the converted + // elements. + Value *NewValue = UndefValue::get(C->getType()); + if (isa<ConstantVector>(C)) { + for (unsigned i = 0; i < NumOperands; ++i) { + Value *Idx = ConstantInt::get(Type::getInt32Ty(M->getContext()), i); + NewValue = Builder.CreateInsertElement(NewValue, NewOperands[i], Idx); + } + } else { + for (unsigned i = 0; i < NumOperands; ++i) { + NewValue = + Builder.CreateInsertValue(NewValue, NewOperands[i], makeArrayRef(i)); + } + } + + return NewValue; +} + +Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C, + IRBuilder<> &Builder) { + bool OperandChanged = false; + SmallVector<Value *, 4> NewOperands; + unsigned NumOperands = C->getNumOperands(); + + // Check if any operand is or uses a global variable in GVMap, and thus + // converted to another value. + for (unsigned i = 0; i < NumOperands; ++i) { + Value *Operand = C->getOperand(i); + Value *NewOperand = remapConstant(M, F, cast<Constant>(Operand), Builder); + OperandChanged |= Operand != NewOperand; + NewOperands.push_back(NewOperand); + } + + // If none of the operands has been modified, return C as it is. + if (!OperandChanged) { + return C; + } + + // If any of the operands has been modified, construct the instruction with + // the converted operands. + unsigned Opcode = C->getOpcode(); + switch (Opcode) { + case Instruction::ICmp: + // CompareConstantExpr (icmp) + return Builder.CreateICmp(CmpInst::Predicate(C->getPredicate()), + NewOperands[0], NewOperands[1]); + case Instruction::FCmp: + // CompareConstantExpr (fcmp) + assert(false && "Address space conversion should have no effect " + "on float point CompareConstantExpr (fcmp)!"); + return C; + case Instruction::ExtractElement: + // ExtractElementConstantExpr + return Builder.CreateExtractElement(NewOperands[0], NewOperands[1]); + case Instruction::InsertElement: + // InsertElementConstantExpr + return Builder.CreateInsertElement(NewOperands[0], NewOperands[1], + NewOperands[2]); + case Instruction::ShuffleVector: + // ShuffleVector + return Builder.CreateShuffleVector(NewOperands[0], NewOperands[1], + NewOperands[2]); + case Instruction::ExtractValue: + // ExtractValueConstantExpr + return Builder.CreateExtractValue(NewOperands[0], C->getIndices()); + case Instruction::InsertValue: + // InsertValueConstantExpr + return Builder.CreateInsertValue(NewOperands[0], NewOperands[1], + C->getIndices()); + case Instruction::GetElementPtr: + // GetElementPtrConstantExpr + return cast<GEPOperator>(C)->isInBounds() + ? Builder.CreateGEP( + NewOperands[0], + makeArrayRef(&NewOperands[1], NumOperands - 1)) + : Builder.CreateInBoundsGEP( + NewOperands[0], + makeArrayRef(&NewOperands[1], NumOperands - 1)); + case Instruction::Select: + // SelectConstantExpr + return Builder.CreateSelect(NewOperands[0], NewOperands[1], NewOperands[2]); + default: + // BinaryConstantExpr + if (Instruction::isBinaryOp(Opcode)) { + return Builder.CreateBinOp(Instruction::BinaryOps(C->getOpcode()), + NewOperands[0], NewOperands[1]); + } + // UnaryConstantExpr + if (Instruction::isCast(Opcode)) { + return Builder.CreateCast(Instruction::CastOps(C->getOpcode()), + NewOperands[0], C->getType()); + } + assert(false && "GenericToNVVM encountered an unsupported ConstantExpr"); + return C; + } +} + +void GenericToNVVM::remapNamedMDNode(Module *M, NamedMDNode *N) { + + bool OperandChanged = false; + SmallVector<MDNode *, 16> NewOperands; + unsigned NumOperands = N->getNumOperands(); + + // Check if any operand is or contains a global variable in GVMap, and thus + // converted to another value. + for (unsigned i = 0; i < NumOperands; ++i) { + MDNode *Operand = N->getOperand(i); + MDNode *NewOperand = remapMDNode(M, Operand); + OperandChanged |= Operand != NewOperand; + NewOperands.push_back(NewOperand); + } + + // If none of the operands has been modified, return immediately. + if (!OperandChanged) { + return; + } + + // Replace the old operands with the new operands. + N->dropAllReferences(); + for (SmallVector<MDNode *, 16>::iterator I = NewOperands.begin(), + E = NewOperands.end(); + I != E; ++I) { + N->addOperand(*I); + } +} + +MDNode *GenericToNVVM::remapMDNode(Module *M, MDNode *N) { + + bool OperandChanged = false; + SmallVector<Value *, 8> NewOperands; + unsigned NumOperands = N->getNumOperands(); + + // Check if any operand is or contains a global variable in GVMap, and thus + // converted to another value. + for (unsigned i = 0; i < NumOperands; ++i) { + Value *Operand = N->getOperand(i); + Value *NewOperand = Operand; + if (Operand) { + if (isa<GlobalVariable>(Operand)) { + GVMapTy::iterator I = GVMap.find(cast<GlobalVariable>(Operand)); + if (I != GVMap.end()) { + NewOperand = I->second; + if (++i < NumOperands) { + NewOperands.push_back(NewOperand); + // Address space of the global variable follows the global variable + // in the global variable debug info (see createGlobalVariable in + // lib/Analysis/DIBuilder.cpp). + NewOperand = + ConstantInt::get(Type::getInt32Ty(M->getContext()), + I->second->getType()->getAddressSpace()); + } + } + } else if (isa<MDNode>(Operand)) { + NewOperand = remapMDNode(M, cast<MDNode>(Operand)); + } + } + OperandChanged |= Operand != NewOperand; + NewOperands.push_back(NewOperand); + } + + // If none of the operands has been modified, return N as it is. + if (!OperandChanged) { + return N; + } + + // If any of the operands has been modified, create a new MDNode with the new + // operands. + return MDNode::get(M->getContext(), makeArrayRef(NewOperands)); +} diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index e862988..d4378c2 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -42,6 +42,11 @@ static cl::opt<int> UsePrecDivF32( " IEEE Compliant F32 div.rnd if avaiable."), cl::init(2)); +static cl::opt<bool> +UsePrecSqrtF32("nvptx-prec-sqrtf32", + cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."), + cl::init(true)); + /// createNVPTXISelDag - This pass converts a legalized DAG into a /// NVPTX-specific DAG, ready for instruction scheduling. FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM, @@ -74,6 +79,8 @@ NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, // Decide how to translate f32 div do_DIVF32_PREC = UsePrecDivF32; + // Decide how to translate f32 sqrt + do_SQRTF32_PREC = UsePrecSqrtF32; // sm less than sm_20 does not support div.rnd. Use div.full. if (do_DIVF32_PREC == 2 && !Subtarget.reqPTX20()) do_DIVF32_PREC = 1; @@ -241,7 +248,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), getI32Imm(vecType), getI32Imm(fromType), getI32Imm(fromTypeWidth), Addr, Chain }; - NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 7); + NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops); } else if (Subtarget.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset) : SelectADDRsi(N1.getNode(), N1, Base, Offset)) { @@ -270,7 +277,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), getI32Imm(vecType), getI32Imm(fromType), getI32Imm(fromTypeWidth), Base, Offset, Chain }; - NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 8); + NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops); } else if (Subtarget.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset) : SelectADDRri(N1.getNode(), N1, Base, Offset)) { @@ -324,7 +331,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), getI32Imm(vecType), getI32Imm(fromType), getI32Imm(fromTypeWidth), Base, Offset, Chain }; - NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 8); + NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops); } else { if (Subtarget.is64Bit()) { switch (TargetVT) { @@ -376,7 +383,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), getI32Imm(vecType), getI32Imm(fromType), getI32Imm(fromTypeWidth), N1, Chain }; - NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 7); + NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops); } if (NVPTXLD != NULL) { @@ -501,7 +508,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), getI32Imm(VecType), getI32Imm(FromType), getI32Imm(FromTypeWidth), Addr, Chain }; - LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7); + LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); } else if (Subtarget.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset) : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) { @@ -555,7 +562,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), getI32Imm(VecType), getI32Imm(FromType), getI32Imm(FromTypeWidth), Base, Offset, Chain }; - LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8); + LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); } else if (Subtarget.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { @@ -659,7 +666,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { getI32Imm(VecType), getI32Imm(FromType), getI32Imm(FromTypeWidth), Base, Offset, Chain }; - LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8); + LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); } else { if (Subtarget.is64Bit()) { switch (N->getOpcode()) { @@ -760,7 +767,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), getI32Imm(VecType), getI32Imm(FromType), getI32Imm(FromTypeWidth), Op1, Chain }; - LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7); + LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); } MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); @@ -962,7 +969,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { } SDValue Ops[] = { Op1, Chain }; - LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), &Ops[0], 2); + LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); @@ -1055,7 +1062,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) { SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), getI32Imm(vecType), getI32Imm(toType), getI32Imm(toTypeWidth), Addr, Chain }; - NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 8); + NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); } else if (Subtarget.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset) : SelectADDRsi(N2.getNode(), N2, Base, Offset)) { @@ -1084,7 +1091,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) { SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), getI32Imm(vecType), getI32Imm(toType), getI32Imm(toTypeWidth), Base, Offset, Chain }; - NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 9); + NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); } else if (Subtarget.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset) : SelectADDRri(N2.getNode(), N2, Base, Offset)) { @@ -1138,7 +1145,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) { SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), getI32Imm(vecType), getI32Imm(toType), getI32Imm(toTypeWidth), Base, Offset, Chain }; - NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 9); + NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); } else { if (Subtarget.is64Bit()) { switch (SourceVT) { @@ -1190,7 +1197,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) { SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), getI32Imm(vecType), getI32Imm(toType), getI32Imm(toTypeWidth), N2, Chain }; - NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 8); + NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); } if (NVPTXST != NULL) { @@ -1569,7 +1576,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { StOps.push_back(Chain); - ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, &StOps[0], StOps.size()); + ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps); MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index 70e8e46..ed16d44 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -41,6 +41,10 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel { // Otherwise, use div.full int do_DIVF32_PREC; + // If true, generate sqrt.rn, else generate sqrt.approx. If FTZ + // is true, then generate the corresponding FTZ version. + bool do_SQRTF32_PREC; + // If true, add .ftz to f32 instructions. // This is only meaningful for sm_20 and later, as the default // is not ftz. diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index f43abe2..da6dd39 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -75,6 +75,9 @@ def allowFMA_ftz : Predicate<"(allowFMA && UseF32FTZ)">; def do_DIVF32_APPROX : Predicate<"do_DIVF32_PREC==0">; def do_DIVF32_FULL : Predicate<"do_DIVF32_PREC==1">; +def do_SQRTF32_APPROX : Predicate<"do_SQRTF32_PREC==0">; +def do_SQRTF32_RN : Predicate<"do_SQRTF32_PREC==1">; + def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">; def true : Predicate<"1">; diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/contrib/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 49e2568..24037ca 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -512,6 +512,16 @@ def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs, def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs, Float64Regs, int_nvvm_sqrt_rp_d>; +// nvvm_sqrt intrinsic +def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), + (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>; +def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), + (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>; +def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), + (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>; +def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), + (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>; + // // Rsqrt // @@ -1510,38 +1520,12 @@ multiclass G_TO_NG<string Str, Intrinsic Intrin> { defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>; defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>; defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>; +defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>; defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>; defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>; defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>; - -def cvta_const : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), - "mov.u32 \t$result, $src;", - [(set Int32Regs:$result, (int_nvvm_ptr_constant_to_gen Int32Regs:$src))]>; -def cvta_const_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), - "mov.u64 \t$result, $src;", - [(set Int64Regs:$result, (int_nvvm_ptr_constant_to_gen Int64Regs:$src))]>; - - - -// @TODO: Revisit this. There is a type -// contradiction between iPTRAny and iPTR for the def. -/*def cvta_const_addr : NVPTXInst<(outs Int32Regs:$result), (ins imemAny:$src), - "mov.u32 \t$result, $src;", - [(set Int32Regs:$result, (int_nvvm_ptr_constant_to_gen - (Wrapper tglobaladdr:$src)))]>; -def cvta_const_addr_64 : NVPTXInst<(outs Int64Regs:$result), (ins imemAny:$src), - "mov.u64 \t$result, $src;", - [(set Int64Regs:$result, (int_nvvm_ptr_constant_to_gen - (Wrapper tglobaladdr:$src)))]>;*/ - - -def cvta_to_const : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), - "mov.u32 \t$result, $src;", - [(set Int32Regs:$result, (int_nvvm_ptr_gen_to_constant Int32Regs:$src))]>; -def cvta_to_const_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), - "mov.u64 \t$result, $src;", - [(set Int64Regs:$result, (int_nvvm_ptr_gen_to_constant Int64Regs:$src))]>; +defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>; // nvvm.ptr.gen.to.param diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h b/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h index e166be5..e57ace9 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h @@ -32,7 +32,8 @@ public: /// Override this as NVPTX has its own way of printing switching /// to a section. virtual void PrintSwitchToSection(const MCAsmInfo &MAI, - raw_ostream &OS) const {} + raw_ostream &OS, + const MCExpr *Subsection) const {} /// Base address of PTX sections is zero. virtual bool isBaseAddressKnownZero() const { return true; } diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 67ca6b5..1ae2a7c 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -49,6 +49,7 @@ using namespace llvm; namespace llvm { void initializeNVVMReflectPass(PassRegistry&); +void initializeGenericToNVVMPass(PassRegistry&); } extern "C" void LLVMInitializeNVPTXTarget() { @@ -62,6 +63,7 @@ extern "C" void LLVMInitializeNVPTXTarget() { // FIXME: This pass is really intended to be invoked during IR optimization, // but it's very NVPTX-specific. initializeNVVMReflectPass(*PassRegistry::getPassRegistry()); + initializeGenericToNVVMPass(*PassRegistry::getPassRegistry()); } NVPTXTargetMachine::NVPTXTargetMachine( @@ -100,6 +102,7 @@ public: return getTM<NVPTXTargetMachine>(); } + virtual void addIRPasses(); virtual bool addInstSelector(); virtual bool addPreRegAlloc(); }; @@ -110,6 +113,11 @@ TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { return PassConfig; } +void NVPTXPassConfig::addIRPasses() { + TargetPassConfig::addIRPasses(); + addPass(createGenericToNVVMPass()); +} + bool NVPTXPassConfig::addInstSelector() { addPass(createLowerAggrCopies()); addPass(createSplitBBatBarPass()); diff --git a/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp b/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp index 0ad62ce..3cc324b 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp @@ -14,6 +14,7 @@ // //===----------------------------------------------------------------------===// +#include "NVPTX.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" @@ -40,7 +41,7 @@ using namespace llvm; namespace llvm { void initializeNVVMReflectPass(PassRegistry &); } namespace { -class LLVM_LIBRARY_VISIBILITY NVVMReflect : public ModulePass { +class NVVMReflect : public ModulePass { private: StringMap<int> VarMap; typedef DenseMap<std::string, int>::iterator VarMapIter; @@ -48,9 +49,18 @@ private: public: static char ID; - NVVMReflect() : ModulePass(ID) { + NVVMReflect() : ModulePass(ID), ReflectFunction(0) { + initializeNVVMReflectPass(*PassRegistry::getPassRegistry()); VarMap.clear(); - ReflectFunction = 0; + } + + NVVMReflect(const StringMap<int> &Mapping) + : ModulePass(ID), ReflectFunction(0) { + initializeNVVMReflectPass(*PassRegistry::getPassRegistry()); + for (StringMap<int>::const_iterator I = Mapping.begin(), E = Mapping.end(); + I != E; ++I) { + VarMap[(*I).getKey()] = (*I).getValue(); + } } void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); } @@ -60,6 +70,14 @@ public: }; } +ModulePass *llvm::createNVVMReflectPass() { + return new NVVMReflect(); +} + +ModulePass *llvm::createNVVMReflectPass(const StringMap<int>& Mapping) { + return new NVVMReflect(Mapping); +} + static cl::opt<bool> NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true), cl::desc("NVVM reflection, enabled by default")); diff --git a/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp new file mode 100644 index 0000000..f2cb8b8 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -0,0 +1,739 @@ +//===-- PPCAsmParser.cpp - Parse PowerPC asm to MCInst instructions ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/PPCMCTargetDesc.h" +#include "llvm/MC/MCTargetAsmParser.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { + +static unsigned RRegs[32] = { + PPC::R0, PPC::R1, PPC::R2, PPC::R3, + PPC::R4, PPC::R5, PPC::R6, PPC::R7, + PPC::R8, PPC::R9, PPC::R10, PPC::R11, + PPC::R12, PPC::R13, PPC::R14, PPC::R15, + PPC::R16, PPC::R17, PPC::R18, PPC::R19, + PPC::R20, PPC::R21, PPC::R22, PPC::R23, + PPC::R24, PPC::R25, PPC::R26, PPC::R27, + PPC::R28, PPC::R29, PPC::R30, PPC::R31 +}; +static unsigned RRegsNoR0[32] = { + PPC::ZERO, + PPC::R1, PPC::R2, PPC::R3, + PPC::R4, PPC::R5, PPC::R6, PPC::R7, + PPC::R8, PPC::R9, PPC::R10, PPC::R11, + PPC::R12, PPC::R13, PPC::R14, PPC::R15, + PPC::R16, PPC::R17, PPC::R18, PPC::R19, + PPC::R20, PPC::R21, PPC::R22, PPC::R23, + PPC::R24, PPC::R25, PPC::R26, PPC::R27, + PPC::R28, PPC::R29, PPC::R30, PPC::R31 +}; +static unsigned XRegs[32] = { + PPC::X0, PPC::X1, PPC::X2, PPC::X3, + PPC::X4, PPC::X5, PPC::X6, PPC::X7, + PPC::X8, PPC::X9, PPC::X10, PPC::X11, + PPC::X12, PPC::X13, PPC::X14, PPC::X15, + PPC::X16, PPC::X17, PPC::X18, PPC::X19, + PPC::X20, PPC::X21, PPC::X22, PPC::X23, + PPC::X24, PPC::X25, PPC::X26, PPC::X27, + PPC::X28, PPC::X29, PPC::X30, PPC::X31 +}; +static unsigned XRegsNoX0[32] = { + PPC::ZERO8, + PPC::X1, PPC::X2, PPC::X3, + PPC::X4, PPC::X5, PPC::X6, PPC::X7, + PPC::X8, PPC::X9, PPC::X10, PPC::X11, + PPC::X12, PPC::X13, PPC::X14, PPC::X15, + PPC::X16, PPC::X17, PPC::X18, PPC::X19, + PPC::X20, PPC::X21, PPC::X22, PPC::X23, + PPC::X24, PPC::X25, PPC::X26, PPC::X27, + PPC::X28, PPC::X29, PPC::X30, PPC::X31 +}; +static unsigned FRegs[32] = { + PPC::F0, PPC::F1, PPC::F2, PPC::F3, + PPC::F4, PPC::F5, PPC::F6, PPC::F7, + PPC::F8, PPC::F9, PPC::F10, PPC::F11, + PPC::F12, PPC::F13, PPC::F14, PPC::F15, + PPC::F16, PPC::F17, PPC::F18, PPC::F19, + PPC::F20, PPC::F21, PPC::F22, PPC::F23, + PPC::F24, PPC::F25, PPC::F26, PPC::F27, + PPC::F28, PPC::F29, PPC::F30, PPC::F31 +}; +static unsigned VRegs[32] = { + PPC::V0, PPC::V1, PPC::V2, PPC::V3, + PPC::V4, PPC::V5, PPC::V6, PPC::V7, + PPC::V8, PPC::V9, PPC::V10, PPC::V11, + PPC::V12, PPC::V13, PPC::V14, PPC::V15, + PPC::V16, PPC::V17, PPC::V18, PPC::V19, + PPC::V20, PPC::V21, PPC::V22, PPC::V23, + PPC::V24, PPC::V25, PPC::V26, PPC::V27, + PPC::V28, PPC::V29, PPC::V30, PPC::V31 +}; +static unsigned CRBITRegs[32] = { + PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN, + PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN, + PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, + PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, + PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN, + PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN, + PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN, + PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN +}; +static unsigned CRRegs[8] = { + PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3, + PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7 +}; + +struct PPCOperand; + +class PPCAsmParser : public MCTargetAsmParser { + MCSubtargetInfo &STI; + MCAsmParser &Parser; + bool IsPPC64; + + MCAsmParser &getParser() const { return Parser; } + MCAsmLexer &getLexer() const { return Parser.getLexer(); } + + void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); } + bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } + + bool isPPC64() const { return IsPPC64; } + + bool MatchRegisterName(const AsmToken &Tok, + unsigned &RegNo, int64_t &IntVal); + + virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); + + bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + bool ParseDirectiveWord(unsigned Size, SMLoc L); + bool ParseDirectiveTC(unsigned Size, SMLoc L); + + bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + SmallVectorImpl<MCParsedAsmOperand*> &Operands, + MCStreamer &Out, unsigned &ErrorInfo, + bool MatchingInlineAsm); + + void ProcessInstruction(MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Ops); + + /// @name Auto-generated Match Functions + /// { + +#define GET_ASSEMBLER_HEADER +#include "PPCGenAsmMatcher.inc" + + /// } + + +public: + PPCAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser) + : MCTargetAsmParser(), STI(_STI), Parser(_Parser) { + // Check for 64-bit vs. 32-bit pointer mode. + Triple TheTriple(STI.getTargetTriple()); + IsPPC64 = TheTriple.getArch() == Triple::ppc64; + // Initialize the set of available features. + setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); + } + + virtual bool ParseInstruction(ParseInstructionInfo &Info, + StringRef Name, SMLoc NameLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + virtual bool ParseDirective(AsmToken DirectiveID); +}; + +/// PPCOperand - Instances of this class represent a parsed PowerPC machine +/// instruction. +struct PPCOperand : public MCParsedAsmOperand { + enum KindTy { + Token, + Immediate, + Expression + } Kind; + + SMLoc StartLoc, EndLoc; + bool IsPPC64; + + struct TokOp { + const char *Data; + unsigned Length; + }; + + struct ImmOp { + int64_t Val; + }; + + struct ExprOp { + const MCExpr *Val; + }; + + union { + struct TokOp Tok; + struct ImmOp Imm; + struct ExprOp Expr; + }; + + PPCOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} +public: + PPCOperand(const PPCOperand &o) : MCParsedAsmOperand() { + Kind = o.Kind; + StartLoc = o.StartLoc; + EndLoc = o.EndLoc; + IsPPC64 = o.IsPPC64; + switch (Kind) { + case Token: + Tok = o.Tok; + break; + case Immediate: + Imm = o.Imm; + break; + case Expression: + Expr = o.Expr; + break; + } + } + + /// getStartLoc - Get the location of the first token of this operand. + SMLoc getStartLoc() const { return StartLoc; } + + /// getEndLoc - Get the location of the last token of this operand. + SMLoc getEndLoc() const { return EndLoc; } + + /// isPPC64 - True if this operand is for an instruction in 64-bit mode. + bool isPPC64() const { return IsPPC64; } + + int64_t getImm() const { + assert(Kind == Immediate && "Invalid access!"); + return Imm.Val; + } + + const MCExpr *getExpr() const { + assert(Kind == Expression && "Invalid access!"); + return Expr.Val; + } + + unsigned getReg() const { + assert(isRegNumber() && "Invalid access!"); + return (unsigned) Imm.Val; + } + + unsigned getCCReg() const { + assert(isCCRegNumber() && "Invalid access!"); + return (unsigned) Imm.Val; + } + + unsigned getCRBitMask() const { + assert(isCRBitMask() && "Invalid access!"); + return 7 - CountTrailingZeros_32(Imm.Val); + } + + bool isToken() const { return Kind == Token; } + bool isImm() const { return Kind == Immediate || Kind == Expression; } + bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); } + bool isS5Imm() const { return Kind == Immediate && isInt<5>(getImm()); } + bool isU6Imm() const { return Kind == Immediate && isUInt<6>(getImm()); } + bool isU16Imm() const { return Kind == Expression || + (Kind == Immediate && isUInt<16>(getImm())); } + bool isS16Imm() const { return Kind == Expression || + (Kind == Immediate && isInt<16>(getImm())); } + bool isS16ImmX4() const { return Kind == Expression || + (Kind == Immediate && isInt<16>(getImm()) && + (getImm() & 3) == 0); } + bool isRegNumber() const { return Kind == Immediate && isUInt<5>(getImm()); } + bool isCCRegNumber() const { return Kind == Immediate && + isUInt<3>(getImm()); } + bool isCRBitMask() const { return Kind == Immediate && isUInt<8>(getImm()) && + isPowerOf2_32(getImm()); } + bool isMem() const { return false; } + bool isReg() const { return false; } + + void addRegOperands(MCInst &Inst, unsigned N) const { + llvm_unreachable("addRegOperands"); + } + + void addRegGPRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(RRegs[getReg()])); + } + + void addRegGPRCNoR0Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(RRegsNoR0[getReg()])); + } + + void addRegG8RCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(XRegs[getReg()])); + } + + void addRegG8RCNoX0Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(XRegsNoX0[getReg()])); + } + + void addRegGxRCOperands(MCInst &Inst, unsigned N) const { + if (isPPC64()) + addRegG8RCOperands(Inst, N); + else + addRegGPRCOperands(Inst, N); + } + + void addRegGxRCNoR0Operands(MCInst &Inst, unsigned N) const { + if (isPPC64()) + addRegG8RCNoX0Operands(Inst, N); + else + addRegGPRCNoR0Operands(Inst, N); + } + + void addRegF4RCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(FRegs[getReg()])); + } + + void addRegF8RCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(FRegs[getReg()])); + } + + void addRegVRRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(VRegs[getReg()])); + } + + void addRegCRBITRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(CRBITRegs[getReg()])); + } + + void addRegCRRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(CRRegs[getCCReg()])); + } + + void addCRBitMaskOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(CRRegs[getCRBitMask()])); + } + + void addImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + if (Kind == Immediate) + Inst.addOperand(MCOperand::CreateImm(getImm())); + else + Inst.addOperand(MCOperand::CreateExpr(getExpr())); + } + + void addDispRIOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + if (Kind == Immediate) + Inst.addOperand(MCOperand::CreateImm(getImm())); + else + Inst.addOperand(MCOperand::CreateExpr(getExpr())); + } + + void addDispRIXOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + if (Kind == Immediate) + Inst.addOperand(MCOperand::CreateImm(getImm() / 4)); + else + Inst.addOperand(MCOperand::CreateExpr(getExpr())); + } + + StringRef getToken() const { + assert(Kind == Token && "Invalid access!"); + return StringRef(Tok.Data, Tok.Length); + } + + virtual void print(raw_ostream &OS) const; + + static PPCOperand *CreateToken(StringRef Str, SMLoc S, bool IsPPC64) { + PPCOperand *Op = new PPCOperand(Token); + Op->Tok.Data = Str.data(); + Op->Tok.Length = Str.size(); + Op->StartLoc = S; + Op->EndLoc = S; + Op->IsPPC64 = IsPPC64; + return Op; + } + + static PPCOperand *CreateImm(int64_t Val, SMLoc S, SMLoc E, bool IsPPC64) { + PPCOperand *Op = new PPCOperand(Immediate); + Op->Imm.Val = Val; + Op->StartLoc = S; + Op->EndLoc = E; + Op->IsPPC64 = IsPPC64; + return Op; + } + + static PPCOperand *CreateExpr(const MCExpr *Val, + SMLoc S, SMLoc E, bool IsPPC64) { + PPCOperand *Op = new PPCOperand(Expression); + Op->Expr.Val = Val; + Op->StartLoc = S; + Op->EndLoc = E; + Op->IsPPC64 = IsPPC64; + return Op; + } +}; + +} // end anonymous namespace. + +void PPCOperand::print(raw_ostream &OS) const { + switch (Kind) { + case Token: + OS << "'" << getToken() << "'"; + break; + case Immediate: + OS << getImm(); + break; + case Expression: + getExpr()->print(OS); + break; + } +} + + +void PPCAsmParser:: +ProcessInstruction(MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + switch (Inst.getOpcode()) { + case PPC::SLWI: { + MCInst TmpInst; + int64_t N = Inst.getOperand(2).getImm(); + TmpInst.setOpcode(PPC::RLWINM); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(MCOperand::CreateImm(N)); + TmpInst.addOperand(MCOperand::CreateImm(0)); + TmpInst.addOperand(MCOperand::CreateImm(31 - N)); + Inst = TmpInst; + break; + } + case PPC::SRWI: { + MCInst TmpInst; + int64_t N = Inst.getOperand(2).getImm(); + TmpInst.setOpcode(PPC::RLWINM); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(MCOperand::CreateImm(32 - N)); + TmpInst.addOperand(MCOperand::CreateImm(N)); + TmpInst.addOperand(MCOperand::CreateImm(31)); + Inst = TmpInst; + break; + } + case PPC::SLDI: { + MCInst TmpInst; + int64_t N = Inst.getOperand(2).getImm(); + TmpInst.setOpcode(PPC::RLDICR); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(MCOperand::CreateImm(N)); + TmpInst.addOperand(MCOperand::CreateImm(63 - N)); + Inst = TmpInst; + break; + } + case PPC::SRDI: { + MCInst TmpInst; + int64_t N = Inst.getOperand(2).getImm(); + TmpInst.setOpcode(PPC::RLDICL); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(MCOperand::CreateImm(64 - N)); + TmpInst.addOperand(MCOperand::CreateImm(N)); + Inst = TmpInst; + break; + } + } +} + +bool PPCAsmParser:: +MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + SmallVectorImpl<MCParsedAsmOperand*> &Operands, + MCStreamer &Out, unsigned &ErrorInfo, + bool MatchingInlineAsm) { + MCInst Inst; + + switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) { + default: break; + case Match_Success: + // Post-process instructions (typically extended mnemonics) + ProcessInstruction(Inst, Operands); + Inst.setLoc(IDLoc); + Out.EmitInstruction(Inst); + return false; + case Match_MissingFeature: + return Error(IDLoc, "instruction use requires an option to be enabled"); + case Match_MnemonicFail: + return Error(IDLoc, "unrecognized instruction mnemonic"); + case Match_InvalidOperand: { + SMLoc ErrorLoc = IDLoc; + if (ErrorInfo != ~0U) { + if (ErrorInfo >= Operands.size()) + return Error(IDLoc, "too few operands for instruction"); + + ErrorLoc = ((PPCOperand*)Operands[ErrorInfo])->getStartLoc(); + if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; + } + + return Error(ErrorLoc, "invalid operand for instruction"); + } + } + + llvm_unreachable("Implement any new match types added!"); +} + +bool PPCAsmParser:: +MatchRegisterName(const AsmToken &Tok, unsigned &RegNo, int64_t &IntVal) { + if (Tok.is(AsmToken::Identifier)) { + StringRef Name = Tok.getString(); + + if (Name.equals_lower("lr")) { + RegNo = isPPC64()? PPC::LR8 : PPC::LR; + IntVal = 8; + return false; + } else if (Name.equals_lower("ctr")) { + RegNo = isPPC64()? PPC::CTR8 : PPC::CTR; + IntVal = 9; + return false; + } else if (Name.substr(0, 1).equals_lower("r") && + !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) { + RegNo = isPPC64()? XRegs[IntVal] : RRegs[IntVal]; + return false; + } else if (Name.substr(0, 1).equals_lower("f") && + !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) { + RegNo = FRegs[IntVal]; + return false; + } else if (Name.substr(0, 1).equals_lower("v") && + !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) { + RegNo = VRegs[IntVal]; + return false; + } else if (Name.substr(0, 2).equals_lower("cr") && + !Name.substr(2).getAsInteger(10, IntVal) && IntVal < 8) { + RegNo = CRRegs[IntVal]; + return false; + } + } + + return true; +} + +bool PPCAsmParser:: +ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { + const AsmToken &Tok = Parser.getTok(); + StartLoc = Tok.getLoc(); + EndLoc = Tok.getEndLoc(); + RegNo = 0; + int64_t IntVal; + + if (!MatchRegisterName(Tok, RegNo, IntVal)) { + Parser.Lex(); // Eat identifier token. + return false; + } + + return Error(StartLoc, "invalid register name"); +} + +bool PPCAsmParser:: +ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + SMLoc S = Parser.getTok().getLoc(); + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + const MCExpr *EVal; + PPCOperand *Op; + + // Attempt to parse the next token as an immediate + switch (getLexer().getKind()) { + // Special handling for register names. These are interpreted + // as immediates corresponding to the register number. + case AsmToken::Percent: + Parser.Lex(); // Eat the '%'. + unsigned RegNo; + int64_t IntVal; + if (!MatchRegisterName(Parser.getTok(), RegNo, IntVal)) { + Parser.Lex(); // Eat the identifier token. + Op = PPCOperand::CreateImm(IntVal, S, E, isPPC64()); + Operands.push_back(Op); + return false; + } + return Error(S, "invalid register name"); + + // All other expressions + case AsmToken::LParen: + case AsmToken::Plus: + case AsmToken::Minus: + case AsmToken::Integer: + case AsmToken::Identifier: + case AsmToken::Dot: + case AsmToken::Dollar: + if (!getParser().parseExpression(EVal)) + break; + /* fall through */ + default: + return Error(S, "unknown operand"); + } + + if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(EVal)) + Op = PPCOperand::CreateImm(CE->getValue(), S, E, isPPC64()); + else + Op = PPCOperand::CreateExpr(EVal, S, E, isPPC64()); + + // Push the parsed operand into the list of operands + Operands.push_back(Op); + + // Check for D-form memory operands + if (getLexer().is(AsmToken::LParen)) { + Parser.Lex(); // Eat the '('. + S = Parser.getTok().getLoc(); + + int64_t IntVal; + switch (getLexer().getKind()) { + case AsmToken::Percent: + Parser.Lex(); // Eat the '%'. + unsigned RegNo; + if (MatchRegisterName(Parser.getTok(), RegNo, IntVal)) + return Error(S, "invalid register name"); + Parser.Lex(); // Eat the identifier token. + break; + + case AsmToken::Integer: + if (getParser().parseAbsoluteExpression(IntVal) || + IntVal < 0 || IntVal > 31) + return Error(S, "invalid register number"); + break; + + default: + return Error(S, "invalid memory operand"); + } + + if (getLexer().isNot(AsmToken::RParen)) + return Error(Parser.getTok().getLoc(), "missing ')'"); + E = Parser.getTok().getLoc(); + Parser.Lex(); // Eat the ')'. + + Op = PPCOperand::CreateImm(IntVal, S, E, isPPC64()); + Operands.push_back(Op); + } + + return false; +} + +/// Parse an instruction mnemonic followed by its operands. +bool PPCAsmParser:: +ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // The first operand is the token for the instruction name. + // If the instruction ends in a '.', we need to create a separate + // token for it, to match what TableGen is doing. + size_t Dot = Name.find('.'); + StringRef Mnemonic = Name.slice(0, Dot); + Operands.push_back(PPCOperand::CreateToken(Mnemonic, NameLoc, isPPC64())); + if (Dot != StringRef::npos) { + SMLoc DotLoc = SMLoc::getFromPointer(NameLoc.getPointer() + Dot); + StringRef DotStr = Name.slice(Dot, StringRef::npos); + Operands.push_back(PPCOperand::CreateToken(DotStr, DotLoc, isPPC64())); + } + + // If there are no more operands then finish + if (getLexer().is(AsmToken::EndOfStatement)) + return false; + + // Parse the first operand + if (ParseOperand(Operands)) + return true; + + while (getLexer().isNot(AsmToken::EndOfStatement) && + getLexer().is(AsmToken::Comma)) { + // Consume the comma token + getLexer().Lex(); + + // Parse the next operand + if (ParseOperand(Operands)) + return true; + } + + return false; +} + +/// ParseDirective parses the PPC specific directives +bool PPCAsmParser::ParseDirective(AsmToken DirectiveID) { + StringRef IDVal = DirectiveID.getIdentifier(); + if (IDVal == ".word") + return ParseDirectiveWord(4, DirectiveID.getLoc()); + if (IDVal == ".tc") + return ParseDirectiveTC(isPPC64()? 8 : 4, DirectiveID.getLoc()); + return true; +} + +/// ParseDirectiveWord +/// ::= .word [ expression (, expression)* ] +bool PPCAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { + if (getLexer().isNot(AsmToken::EndOfStatement)) { + for (;;) { + const MCExpr *Value; + if (getParser().parseExpression(Value)) + return true; + + getParser().getStreamer().EmitValue(Value, Size); + + if (getLexer().is(AsmToken::EndOfStatement)) + break; + + if (getLexer().isNot(AsmToken::Comma)) + return Error(L, "unexpected token in directive"); + Parser.Lex(); + } + } + + Parser.Lex(); + return false; +} + +/// ParseDirectiveTC +/// ::= .tc [ symbol (, expression)* ] +bool PPCAsmParser::ParseDirectiveTC(unsigned Size, SMLoc L) { + // Skip TC symbol, which is only used with XCOFF. + while (getLexer().isNot(AsmToken::EndOfStatement) + && getLexer().isNot(AsmToken::Comma)) + Parser.Lex(); + if (getLexer().isNot(AsmToken::Comma)) + return Error(L, "unexpected token in directive"); + Parser.Lex(); + + // Align to word size. + getParser().getStreamer().EmitValueToAlignment(Size); + + // Emit expressions. + return ParseDirectiveWord(Size, L); +} + +/// Force static initialization. +extern "C" void LLVMInitializePowerPCAsmParser() { + RegisterMCAsmParser<PPCAsmParser> A(ThePPC32Target); + RegisterMCAsmParser<PPCAsmParser> B(ThePPC64Target); +} + +#define GET_REGISTER_MATCHER +#define GET_MATCHER_IMPLEMENTATION +#include "PPCGenAsmMatcher.inc" diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index bacc108..93fca00 100644 --- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -151,8 +151,8 @@ void PPCInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo, return printOperand(MI, OpNo, O); // Branches can take an immediate operand. This is used by the branch - // selection pass to print $+8, an eight byte displacement from the PC. - O << "$+"; + // selection pass to print .+8, an eight byte displacement from the PC. + O << ".+"; printAbsAddrOperand(MI, OpNo, O); } diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index 84e4175..7a84723 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -77,6 +77,9 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case PPC::fixup_ppc_br24: Type = ELF::R_PPC_REL24; break; + case PPC::fixup_ppc_brcond14: + Type = ELF::R_PPC_REL14; + break; case FK_Data_4: case FK_PCRel_4: Type = ELF::R_PPC_REL32; @@ -104,7 +107,8 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_PPC_DTPREL16_HA: Type = ELF::R_PPC64_DTPREL16_HA; break; - case MCSymbolRefExpr::VK_None: + case MCSymbolRefExpr::VK_PPC_GAS_HA16: + case MCSymbolRefExpr::VK_PPC_DARWIN_HA16: Type = ELF::R_PPC_ADDR16_HA; break; case MCSymbolRefExpr::VK_PPC_TOC16_HA: @@ -131,6 +135,10 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, Type = ELF::R_PPC64_DTPREL16_LO; break; case MCSymbolRefExpr::VK_None: + Type = ELF::R_PPC_ADDR16; + break; + case MCSymbolRefExpr::VK_PPC_GAS_LO16: + case MCSymbolRefExpr::VK_PPC_DARWIN_LO16: Type = ELF::R_PPC_ADDR16_LO; break; case MCSymbolRefExpr::VK_PPC_TOC_ENTRY: @@ -153,6 +161,10 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_None: Type = ELF::R_PPC64_ADDR16_DS; break; + case MCSymbolRefExpr::VK_PPC_GAS_LO16: + case MCSymbolRefExpr::VK_PPC_DARWIN_LO16: + Type = ELF::R_PPC64_ADDR16_LO_DS; + break; case MCSymbolRefExpr::VK_PPC_TOC_ENTRY: Type = ELF::R_PPC64_TOC16_DS; break; diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp index d84eb9c..853e505 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp @@ -29,3 +29,18 @@ PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) { } llvm_unreachable("Unknown PPC branch opcode!"); } + +PPC::Predicate PPC::getSwappedPredicate(PPC::Predicate Opcode) { + switch (Opcode) { + case PPC::PRED_EQ: return PPC::PRED_EQ; + case PPC::PRED_NE: return PPC::PRED_NE; + case PPC::PRED_LT: return PPC::PRED_GT; + case PPC::PRED_GE: return PPC::PRED_LE; + case PPC::PRED_GT: return PPC::PRED_LT; + case PPC::PRED_LE: return PPC::PRED_GE; + case PPC::PRED_NU: return PPC::PRED_NU; + case PPC::PRED_UN: return PPC::PRED_UN; + } + llvm_unreachable("Unknown PPC branch opcode!"); +} + diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h index ad2b018..444758c 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h @@ -37,6 +37,10 @@ namespace PPC { /// Invert the specified predicate. != -> ==, < -> >=. Predicate InvertPredicate(Predicate Opcode); + + /// Assume the condition register is set by MI(a,b), return the predicate if + /// we modify the instructions such that condition register is set by MI(b,a). + Predicate getSwappedPredicate(Predicate Opcode); } } diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.h b/contrib/llvm/lib/Target/PowerPC/PPC.h index 446b685..b4be51a 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPC.h +++ b/contrib/llvm/lib/Target/PowerPC/PPC.h @@ -31,6 +31,7 @@ namespace llvm { class MCInst; FunctionPass *createPPCCTRLoops(); + FunctionPass *createPPCEarlyReturnPass(); FunctionPass *createPPCBranchSelectionPass(); FunctionPass *createPPCISelDag(PPCTargetMachine &TM); FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM, @@ -40,7 +41,7 @@ namespace llvm { /// \brief Creates an PPC-specific Target Transformation Info pass. ImmutablePass *createPPCTargetTransformInfoPass(const PPCTargetMachine *TM); - + namespace PPCII { /// Target Operand Flag enum. diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.td b/contrib/llvm/lib/Target/PowerPC/PPC.td index 3892162..eb73c67 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPC.td +++ b/contrib/llvm/lib/Target/PowerPC/PPC.td @@ -95,6 +95,43 @@ def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true", // VSX p7 vector-scalar instruction set //===----------------------------------------------------------------------===// +// Classes used for relation maps. +//===----------------------------------------------------------------------===// +// RecFormRel - Filter class used to relate non-record-form instructions with +// their record-form variants. +class RecFormRel; + +//===----------------------------------------------------------------------===// +// Relation Map Definitions. +//===----------------------------------------------------------------------===// + +def getRecordFormOpcode : InstrMapping { + let FilterClass = "RecFormRel"; + // Instructions with the same BaseName and Interpretation64Bit values + // form a row. + let RowFields = ["BaseName", "Interpretation64Bit"]; + // Instructions with the same RC value form a column. + let ColFields = ["RC"]; + // The key column are the non-record-form instructions. + let KeyCol = ["0"]; + // Value columns RC=1 + let ValueCols = [["1"]]; +} + +def getNonRecordFormOpcode : InstrMapping { + let FilterClass = "RecFormRel"; + // Instructions with the same BaseName and Interpretation64Bit values + // form a row. + let RowFields = ["BaseName", "Interpretation64Bit"]; + // Instructions with the same RC value form a column. + let ColFields = ["RC"]; + // The key column are the record-form instructions. + let KeyCol = ["1"]; + // Value columns are RC=0 + let ValueCols = [["0"]]; +} + +//===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// @@ -216,7 +253,6 @@ def : ProcessorModel<"ppc64", G5Model, FeatureFRSQRTE, FeatureSTFIWX, Feature64Bit /*, Feature64BitRegs */]>; - //===----------------------------------------------------------------------===// // Calling Conventions //===----------------------------------------------------------------------===// @@ -232,9 +268,14 @@ def PPCAsmWriter : AsmWriter { bit isMCAsmWriter = 1; } +def PPCAsmParser : AsmParser { + let ShouldEmitMatchRegisterName = 0; +} + def PPC : Target { // Information about the instructions. let InstructionSet = PPCInstrInfo; let AssemblyWriters = [PPCAsmWriter]; + let AssemblyParsers = [PPCAsmParser]; } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 96a9f0a..3c7cc4e 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -721,7 +721,7 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { return AsmPrinter::EmitFunctionEntryLabel(); // Emit an official procedure descriptor. - const MCSection *Current = OutStreamer.getCurrentSection(); + MCSectionSubPair Current = OutStreamer.getCurrentSection(); const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".opd", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC, SectionKind::getReadOnly()); @@ -741,7 +741,7 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { 8/*size*/); // Emit a null environment pointer. OutStreamer.EmitIntValue(0, 8 /* size */); - OutStreamer.SwitchSection(Current); + OutStreamer.SwitchSection(Current.first, Current.second); MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol( ".L." + Twine(CurrentFnSym->getName())); diff --git a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp index bd1c378..3e608ca 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp @@ -112,15 +112,21 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { unsigned MBBStartOffset = 0; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { - if (I->getOpcode() != PPC::BCC || I->getOperand(2).isImm()) { + MachineBasicBlock *Dest = 0; + if (I->getOpcode() == PPC::BCC && !I->getOperand(2).isImm()) + Dest = I->getOperand(2).getMBB(); + else if ((I->getOpcode() == PPC::BDNZ8 || I->getOpcode() == PPC::BDNZ || + I->getOpcode() == PPC::BDZ8 || I->getOpcode() == PPC::BDZ) && + !I->getOperand(0).isImm()) + Dest = I->getOperand(0).getMBB(); + + if (!Dest) { MBBStartOffset += TII->GetInstSizeInBytes(I); continue; } // Determine the offset from the current branch to the destination // block. - MachineBasicBlock *Dest = I->getOperand(2).getMBB(); - int BranchSize; if (Dest->getNumber() <= MBB.getNumber()) { // If this is a backwards branch, the delta is the offset from the diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index 3244b90..c845909 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -223,9 +223,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, // If we are a leaf function, and use up to 224 bytes of stack space, // don't have a frame pointer, calls, or dynamic alloca then we do not need - // to adjust the stack pointer (we fit in the Red Zone). For 64-bit - // SVR4, we also require a stack frame if we need to spill the CR, - // since this spill area is addressed relative to the stack pointer. + // to adjust the stack pointer (we fit in the Red Zone). // The 32-bit SVR4 ABI has no Red Zone. However, it can still generate // stackless code if all local vars are reg-allocated. bool DisableRedZone = MF.getFunction()->getAttributes(). @@ -237,9 +235,6 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, FrameSize <= 224 && // Fits in red zone. !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->adjustsStack() && // No calls. - !(Subtarget.isPPC64() && // No 64-bit SVR4 CRsave. - Subtarget.isSVR4ABI() - && spillsCR(MF)) && (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment. // No need for frame if (UpdateMF) @@ -373,6 +368,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { // Check if the link register (LR) must be saved. PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); bool MustSaveLR = FI->mustSaveLR(); + const SmallVector<unsigned, 3> &MustSaveCRs = FI->getMustSaveCRs(); // Do we have a frame pointer for this function? bool HasFP = hasFP(MF); @@ -394,6 +390,13 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { if (MustSaveLR) BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR8), PPC::X0); + if (!MustSaveCRs.empty()) { + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, dl, TII.get(PPC::MFCR8), PPC::X12); + for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) + MIB.addReg(MustSaveCRs[i], RegState::ImplicitKill); + } + if (HasFP) BuildMI(MBB, MBBI, dl, TII.get(PPC::STD)) .addReg(PPC::X31) @@ -405,6 +408,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { .addReg(PPC::X0) .addImm(LROffset / 4) .addReg(PPC::X1); + + if (!MustSaveCRs.empty()) + BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8)) + .addReg(PPC::X12, getKillRegState(true)) + .addImm(8) + .addReg(PPC::X1); } else { if (MustSaveLR) BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR), PPC::R0); @@ -417,6 +426,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { .addImm(FPOffset) .addReg(PPC::R1); + assert(MustSaveCRs.empty() && + "Prologue CR saving supported only in 64-bit mode"); + if (MustSaveLR) BuildMI(MBB, MBBI, dl, TII.get(PPC::STW)) .addReg(PPC::R0) @@ -580,7 +592,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { // spilled CRs. if (Subtarget.isSVR4ABI() && (PPC::CR2 <= Reg && Reg <= PPC::CR4) - && !spillsCR(MF)) + && MustSaveCRs.empty()) continue; // For 64-bit SVR4 when we have spilled CRs, the spill location @@ -636,6 +648,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, // Check if the link register (LR) has been saved. PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); bool MustSaveLR = FI->mustSaveLR(); + const SmallVector<unsigned, 3> &MustSaveCRs = FI->getMustSaveCRs(); // Do we have a frame pointer for this function? bool HasFP = hasFP(MF); @@ -736,10 +749,19 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X0) .addImm(LROffset/4).addReg(PPC::X1); + if (!MustSaveCRs.empty()) + BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), PPC::X12) + .addImm(8).addReg(PPC::X1); + if (HasFP) BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X31) .addImm(FPOffset/4).addReg(PPC::X1); + if (!MustSaveCRs.empty()) + for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) + BuildMI(MBB, MBBI, dl, TII.get(PPC::MTCRF8), MustSaveCRs[i]) + .addReg(PPC::X12, getKillRegState(i == e-1)); + if (MustSaveLR) BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR8)).addReg(PPC::X0); } else { @@ -747,6 +769,9 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R0) .addImm(LROffset).addReg(PPC::R1); + assert(MustSaveCRs.empty() && + "Epilogue CR restoring supported only in 64-bit mode"); + if (HasFP) BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R31) .addImm(FPOffset).addReg(PPC::R1); @@ -1122,44 +1147,42 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, *static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo()); DebugLoc DL; bool CRSpilled = false; + MachineInstrBuilder CRMIB; for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); // CR2 through CR4 are the nonvolatile CR fields. bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; - if (CRSpilled && IsCRField) - continue; - // Add the callee-saved register as live-in; it's killed at the spill. MBB.addLiveIn(Reg); + if (CRSpilled && IsCRField) { + CRMIB.addReg(Reg, RegState::ImplicitKill); + continue; + } + // Insert the spill to the stack frame. if (IsCRField) { - CRSpilled = true; - // The first time we see a CR field, store the whole CR into the - // save slot via GPR12 (available in the prolog for 32- and 64-bit). + PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>(); if (Subtarget.isPPC64()) { - // 64-bit: SP+8 - MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::MFCR8), PPC::X12)); - MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::STW8)) - .addReg(PPC::X12, - getKillRegState(true)) - .addImm(8) - .addReg(PPC::X1)); + // The actual spill will happen at the start of the prologue. + FuncInfo->addMustSaveCR(Reg); } else { + CRSpilled = true; + FuncInfo->setSpillsCR(); + // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. - MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)); + CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12) + .addReg(Reg, RegState::ImplicitKill); + + MBB.insert(MI, CRMIB); MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW)) .addReg(PPC::R12, getKillRegState(true)), CSI[i].getFrameIdx())); } - - // Record that we spill the CR in this function. - PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>(); - FuncInfo->setSpillsCR(); } else { const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); TII.storeRegToStackSlot(MBB, MI, Reg, true, @@ -1170,7 +1193,8 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, } static void -restoreCRs(bool isPPC64, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled, +restoreCRs(bool isPPC64, bool is31, + bool CR2Spilled, bool CR3Spilled, bool CR4Spilled, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) { @@ -1180,14 +1204,10 @@ restoreCRs(bool isPPC64, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled, DebugLoc DL; unsigned RestoreOp, MoveReg; - if (isPPC64) { - // 64-bit: SP+8 - MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::LWZ8), PPC::X12) - .addImm(8) - .addReg(PPC::X1)); - RestoreOp = PPC::MTCRF8; - MoveReg = PPC::X12; - } else { + if (isPPC64) + // This is handled during epilogue generation. + return; + else { // 32-bit: FP-relative MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), PPC::R12), @@ -1297,7 +1317,9 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, // least one CR register, restore all spilled CRs together. if ((CR2Spilled || CR3Spilled || CR4Spilled) && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) { - restoreCRs(Subtarget.isPPC64(), CR2Spilled, CR3Spilled, CR4Spilled, + bool is31 = needsFP(*MF); + restoreCRs(Subtarget.isPPC64(), is31, + CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex); CR2Spilled = CR3Spilled = CR4Spilled = false; } @@ -1320,9 +1342,11 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, } // If we haven't yet spilled the CRs, do so now. - if (CR2Spilled || CR3Spilled || CR4Spilled) - restoreCRs(Subtarget.isPPC64(), CR2Spilled, CR3Spilled, CR4Spilled, + if (CR2Spilled || CR3Spilled || CR4Spilled) { + bool is31 = needsFP(*MF); + restoreCRs(Subtarget.isPPC64(), is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex); + } return true; } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 95efc11..aed0fbb 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -457,7 +457,7 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) { SH &= 31; SDValue Ops[] = { Op0, Op1, getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) }; - return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5); + return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops); } } return 0; @@ -780,7 +780,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { } case ISD::SETGT: { SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) }; - Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), + Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1)); @@ -873,7 +873,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { // Get the specified bit. SDValue Tmp = - SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0); + SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); if (Inv) { assert(OtherCondIdx == -1 && "Can't have split plus negation"); return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1)); @@ -885,7 +885,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { // Get the other bit of the comparison. Ops[1] = getI32Imm((32-(3-OtherCondIdx)) & 31); SDValue OtherCond = - SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0); + SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); return CurDAG->SelectNodeTo(N, PPC::OR, MVT::i32, Tmp, OtherCond); } @@ -1079,7 +1079,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Ops[] = { Offset, Base, Chain }; return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0), PPCLowering.getPointerTy(), - MVT::Other, Ops, 3); + MVT::Other, Ops); } else { unsigned Opcode; bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD; @@ -1114,7 +1114,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Ops[] = { Base, Offset, Chain }; return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0), PPCLowering.getPointerTy(), - MVT::Other, Ops, 3); + MVT::Other, Ops); } } @@ -1163,7 +1163,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Ops[] = { N->getOperand(0).getOperand(0), N->getOperand(0).getOperand(1), getI32Imm(0), getI32Imm(MB),getI32Imm(ME) }; - return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5); + return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops); } } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 16fc8a0..3fcafdc 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -71,6 +71,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) { const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>(); PPCRegInfo = TM.getRegisterInfo(); + PPCII = TM.getInstrInfo(); setPow2DivIsCheap(); @@ -513,7 +514,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); setBooleanContents(ZeroOrOneBooleanContent); - setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? + // Altivec instructions set fields to all zeros or all ones. + setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); if (isPPC64) { setStackPointerRegisterToSaveRestore(PPC::X1); @@ -4672,10 +4674,14 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { !Op.getOperand(2).getValueType().isFloatingPoint()) return Op; - ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); + // We might be able to do better than this under some circumstances, but in + // general, fsel-based lowering of select is a finite-math-only optimization. + // For more information, see section F.3 of the 2.06 ISA specification. + if (!DAG.getTarget().Options.NoInfsFPMath || + !DAG.getTarget().Options.NoNaNsFPMath) + return Op; - // Cannot handle SETEQ/SETNE. - if (CC == ISD::SETEQ || CC == ISD::SETNE) return Op; + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); EVT ResVT = Op.getValueType(); EVT CmpVT = Op.getOperand(0).getValueType(); @@ -4685,9 +4691,20 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { // If the RHS of the comparison is a 0.0, we don't need to do the // subtraction at all. + SDValue Sel1; if (isFloatingPointZero(RHS)) switch (CC) { default: break; // SETUO etc aren't handled by fsel. + case ISD::SETNE: + std::swap(TV, FV); + case ISD::SETEQ: + if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits + LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); + Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); + if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits + Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); + return DAG.getNode(PPCISD::FSEL, dl, ResVT, + DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV); case ISD::SETULT: case ISD::SETLT: std::swap(TV, FV); // fsel is natively setge, swap operands for setlt @@ -4710,30 +4727,41 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDValue Cmp; switch (CC) { default: break; // SETUO etc aren't handled by fsel. + case ISD::SETNE: + std::swap(TV, FV); + case ISD::SETEQ: + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); + if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits + Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); + Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); + if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits + Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); + return DAG.getNode(PPCISD::FSEL, dl, ResVT, + DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV); case ISD::SETULT: case ISD::SETLT: Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); - return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); + return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); case ISD::SETOGE: case ISD::SETGE: Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); - return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); + return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); case ISD::SETUGT: case ISD::SETGT: Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); - return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); + return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); case ISD::SETOLE: case ISD::SETLE: Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); - return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); + return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); } return Op; } @@ -6239,29 +6267,13 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, if (PPCSubTarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 || MI->getOpcode() == PPC::SELECT_CC_I8)) { - unsigned OpCode = MI->getOpcode() == PPC::SELECT_CC_I8 ? - PPC::ISEL8 : PPC::ISEL; - unsigned SelectPred = MI->getOperand(4).getImm(); - DebugLoc dl = MI->getDebugLoc(); + SmallVector<MachineOperand, 2> Cond; + Cond.push_back(MI->getOperand(4)); + Cond.push_back(MI->getOperand(1)); - unsigned SubIdx; - bool SwapOps; - switch (SelectPred) { - default: llvm_unreachable("invalid predicate for isel"); - case PPC::PRED_EQ: SubIdx = PPC::sub_eq; SwapOps = false; break; - case PPC::PRED_NE: SubIdx = PPC::sub_eq; SwapOps = true; break; - case PPC::PRED_LT: SubIdx = PPC::sub_lt; SwapOps = false; break; - case PPC::PRED_GE: SubIdx = PPC::sub_lt; SwapOps = true; break; - case PPC::PRED_GT: SubIdx = PPC::sub_gt; SwapOps = false; break; - case PPC::PRED_LE: SubIdx = PPC::sub_gt; SwapOps = true; break; - case PPC::PRED_UN: SubIdx = PPC::sub_un; SwapOps = false; break; - case PPC::PRED_NU: SubIdx = PPC::sub_un; SwapOps = true; break; - } - - BuildMI(*BB, MI, dl, TII->get(OpCode), MI->getOperand(0).getReg()) - .addReg(MI->getOperand(SwapOps? 3 : 2).getReg()) - .addReg(MI->getOperand(SwapOps? 2 : 3).getReg()) - .addReg(MI->getOperand(1).getReg(), 0, SubIdx); + DebugLoc dl = MI->getDebugLoc(); + PPCII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(), Cond, + MI->getOperand(2).getReg(), MI->getOperand(3).getReg()); } else if (MI->getOpcode() == PPC::SELECT_CC_I4 || MI->getOpcode() == PPC::SELECT_CC_I8 || MI->getOpcode() == PPC::SELECT_CC_F4 || diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h index 7157b70..423e983 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -16,6 +16,7 @@ #define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H #include "PPC.h" +#include "PPCInstrInfo.h" #include "PPCRegisterInfo.h" #include "PPCSubtarget.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -327,6 +328,7 @@ namespace llvm { class PPCTargetLowering : public TargetLowering { const PPCSubtarget &PPCSubTarget; const PPCRegisterInfo *PPCRegInfo; + const PPCInstrInfo *PPCII; public: explicit PPCTargetLowering(PPCTargetMachine &TM); diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td index fa5b65f..bff4c23 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -17,17 +17,21 @@ // def s16imm64 : Operand<i64> { let PrintMethod = "printS16ImmOperand"; + let ParserMatchClass = PPCS16ImmAsmOperand; } def u16imm64 : Operand<i64> { let PrintMethod = "printU16ImmOperand"; + let ParserMatchClass = PPCU16ImmAsmOperand; } def symbolHi64 : Operand<i64> { let PrintMethod = "printSymbolHi"; let EncoderMethod = "getHA16Encoding"; + let ParserMatchClass = PPCS16ImmAsmOperand; } def symbolLo64 : Operand<i64> { let PrintMethod = "printSymbolLo"; let EncoderMethod = "getLO16Encoding"; + let ParserMatchClass = PPCS16ImmAsmOperand; } def tocentry : Operand<iPTR> { let MIOperandInfo = (ops i64imm:$imm); @@ -66,10 +70,17 @@ def HI48_64 : SDNodeXForm<imm, [{ // Calls. // +let Interpretation64Bit = 1 in { let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in { - let isBranch = 1, isIndirectBranch = 1, Uses = [CTR8] in + let isBranch = 1, isIndirectBranch = 1, Uses = [CTR8] in { def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>, Requires<[In64BitMode]>; + + let isCodeGenOnly = 1 in + def BCCTR8 : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond), + "b${cond:cc}ctr ${cond:reg}", BrB, []>, + Requires<[In64BitMode]>; + } } let Defs = [LR8] in @@ -83,8 +94,17 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { def BDNZ8 : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst), "bdnz $dst">; } + + let isReturn = 1, Defs = [CTR8], Uses = [CTR8, LR8, RM] in { + def BDZLR8 : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins), + "bdzlr", BrB, []>; + def BDNZLR8 : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins), + "bdnzlr", BrB, []>; + } } + + let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in { // Convenient aliases for call instructions let Uses = [RM] in { @@ -116,9 +136,14 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in { def BCTRL8 : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins), "bctrl", BrB, [(PPCbctrl)]>, Requires<[In64BitMode]>; + + let isCodeGenOnly = 1 in + def BCCTRL8 : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond), + "b${cond:cc}ctrl ${cond:reg}", BrB, []>, + Requires<[In64BitMode]>; } } - +} // Interpretation64Bit // Calls def : Pat<(PPCcall (i64 tglobaladdr:$dst)), @@ -135,45 +160,46 @@ def : Pat<(PPCcall_nop (i64 texternalsym:$dst)), let usesCustomInserter = 1 in { let Defs = [CR0] in { def ATOMIC_LOAD_ADD_I64 : Pseudo< - (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_ADD_I64", + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_ADD_I64", [(set i64:$dst, (atomic_load_add_64 xoaddr:$ptr, i64:$incr))]>; def ATOMIC_LOAD_SUB_I64 : Pseudo< - (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_SUB_I64", + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_SUB_I64", [(set i64:$dst, (atomic_load_sub_64 xoaddr:$ptr, i64:$incr))]>; def ATOMIC_LOAD_OR_I64 : Pseudo< - (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_OR_I64", + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_OR_I64", [(set i64:$dst, (atomic_load_or_64 xoaddr:$ptr, i64:$incr))]>; def ATOMIC_LOAD_XOR_I64 : Pseudo< - (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_XOR_I64", + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_XOR_I64", [(set i64:$dst, (atomic_load_xor_64 xoaddr:$ptr, i64:$incr))]>; def ATOMIC_LOAD_AND_I64 : Pseudo< - (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_AND_i64", + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_AND_i64", [(set i64:$dst, (atomic_load_and_64 xoaddr:$ptr, i64:$incr))]>; def ATOMIC_LOAD_NAND_I64 : Pseudo< - (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_NAND_I64", + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_NAND_I64", [(set i64:$dst, (atomic_load_nand_64 xoaddr:$ptr, i64:$incr))]>; def ATOMIC_CMP_SWAP_I64 : Pseudo< - (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$old, G8RC:$new), "#ATOMIC_CMP_SWAP_I64", + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$old, g8rc:$new), "#ATOMIC_CMP_SWAP_I64", [(set i64:$dst, (atomic_cmp_swap_64 xoaddr:$ptr, i64:$old, i64:$new))]>; def ATOMIC_SWAP_I64 : Pseudo< - (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$new), "#ATOMIC_SWAP_I64", + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$new), "#ATOMIC_SWAP_I64", [(set i64:$dst, (atomic_swap_64 xoaddr:$ptr, i64:$new))]>; } } // Instructions to support atomic operations -def LDARX : XForm_1<31, 84, (outs G8RC:$rD), (ins memrr:$ptr), +def LDARX : XForm_1<31, 84, (outs g8rc:$rD), (ins memrr:$ptr), "ldarx $rD, $ptr", LdStLDARX, [(set i64:$rD, (PPClarx xoaddr:$ptr))]>; let Defs = [CR0] in -def STDCX : XForm_1<31, 214, (outs), (ins G8RC:$rS, memrr:$dst), +def STDCX : XForm_1<31, 214, (outs), (ins g8rc:$rS, memrr:$dst), "stdcx. $rS, $dst", LdStSTDCX, [(PPCstcx i64:$rS, xoaddr:$dst)]>, isDOT; +let Interpretation64Bit = 1 in { let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in def TCRETURNdi8 :Pseudo< (outs), (ins calltarget:$dst, i32imm:$offset), @@ -212,6 +238,7 @@ def TAILBA8 : IForm<18, 0, 0, (outs), (ins aaddr:$dst), []>; } +} // Interpretation64Bit def : Pat<(PPCtc_return (i64 tglobaladdr:$dst), imm:$imm), (TCRETURNdi8 tglobaladdr:$dst, imm:$imm)>; @@ -224,21 +251,25 @@ def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm), // 64-bit CR instructions -def MTCRF8 : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins G8RC:$rS), +let Interpretation64Bit = 1 in { +let neverHasSideEffects = 1 in { +def MTCRF8 : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins g8rc:$rS), "mtcrf $FXM, $rS", BrMCRX>, PPC970_MicroCode, PPC970_Unit_CRU; let isCodeGenOnly = 1 in -def MFCR8pseud: XFXForm_3<31, 19, (outs G8RC:$rT), (ins crbitm:$FXM), +def MFCR8pseud: XFXForm_3<31, 19, (outs g8rc:$rT), (ins crbitm:$FXM), "#MFCR8pseud", SprMFCR>, PPC970_MicroCode, PPC970_Unit_CRU; - -def MFCR8 : XFXForm_3<31, 19, (outs G8RC:$rT), (ins), +} // neverHasSideEffects = 1 + +let neverHasSideEffects = 1 in +def MFCR8 : XFXForm_3<31, 19, (outs g8rc:$rT), (ins), "mfcr $rT", SprMFCR>, PPC970_MicroCode, PPC970_Unit_CRU; let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { - def EH_SjLj_SetJmp64 : Pseudo<(outs GPRC:$dst), (ins memr:$buf), + def EH_SjLj_SetJmp64 : Pseudo<(outs gprc:$dst), (ins memr:$buf), "#EH_SJLJ_SETJMP64", [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>, Requires<[In64BitMode]>; @@ -253,18 +284,18 @@ let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { // 64-bit SPR manipulation instrs. let Uses = [CTR8] in { -def MFCTR8 : XFXForm_1_ext<31, 339, 9, (outs G8RC:$rT), (ins), +def MFCTR8 : XFXForm_1_ext<31, 339, 9, (outs g8rc:$rT), (ins), "mfctr $rT", SprMFSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } let Pattern = [(PPCmtctr i64:$rS)], Defs = [CTR8] in { -def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins G8RC:$rS), +def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS), "mtctr $rS", SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } let Pattern = [(set i64:$rT, readcyclecounter)] in -def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs G8RC:$rT), (ins), +def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs g8rc:$rT), (ins), "mfspr $rT, 268", SprMFTB>, PPC970_DGroup_First, PPC970_Unit_FXU; // Note that encoding mftb using mfspr is now the preferred form, @@ -273,252 +304,265 @@ def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs G8RC:$rT), (ins), // the POWER3. let Defs = [X1], Uses = [X1] in -def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),"#DYNALLOC8", +def DYNALLOC8 : Pseudo<(outs g8rc:$result), (ins g8rc:$negsize, memri:$fpsi),"#DYNALLOC8", [(set i64:$result, (PPCdynalloc i64:$negsize, iaddr:$fpsi))]>; let Defs = [LR8] in { -def MTLR8 : XFXForm_7_ext<31, 467, 8, (outs), (ins G8RC:$rS), +def MTLR8 : XFXForm_7_ext<31, 467, 8, (outs), (ins g8rc:$rS), "mtlr $rS", SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } let Uses = [LR8] in { -def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs G8RC:$rT), (ins), +def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs g8rc:$rT), (ins), "mflr $rT", SprMFSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } +} // Interpretation64Bit //===----------------------------------------------------------------------===// // Fixed point instructions. // let PPC970_Unit = 1 in { // FXU Operations. +let Interpretation64Bit = 1 in { +let neverHasSideEffects = 1 in { let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { -def LI8 : DForm_2_r0<14, (outs G8RC:$rD), (ins symbolLo64:$imm), +def LI8 : DForm_2_r0<14, (outs g8rc:$rD), (ins symbolLo64:$imm), "li $rD, $imm", IntSimple, [(set i64:$rD, immSExt16:$imm)]>; -def LIS8 : DForm_2_r0<15, (outs G8RC:$rD), (ins symbolHi64:$imm), +def LIS8 : DForm_2_r0<15, (outs g8rc:$rD), (ins symbolHi64:$imm), "lis $rD, $imm", IntSimple, [(set i64:$rD, imm16ShiftedSExt:$imm)]>; } // Logical ops. -def NAND8: XForm_6<31, 476, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), - "nand $rA, $rS, $rB", IntSimple, - [(set i64:$rA, (not (and i64:$rS, i64:$rB)))]>; -def AND8 : XForm_6<31, 28, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), - "and $rA, $rS, $rB", IntSimple, - [(set i64:$rA, (and i64:$rS, i64:$rB))]>; -def ANDC8: XForm_6<31, 60, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), - "andc $rA, $rS, $rB", IntSimple, - [(set i64:$rA, (and i64:$rS, (not i64:$rB)))]>; -def OR8 : XForm_6<31, 444, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), - "or $rA, $rS, $rB", IntSimple, - [(set i64:$rA, (or i64:$rS, i64:$rB))]>; -def NOR8 : XForm_6<31, 124, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), - "nor $rA, $rS, $rB", IntSimple, - [(set i64:$rA, (not (or i64:$rS, i64:$rB)))]>; -def ORC8 : XForm_6<31, 412, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), - "orc $rA, $rS, $rB", IntSimple, - [(set i64:$rA, (or i64:$rS, (not i64:$rB)))]>; -def EQV8 : XForm_6<31, 284, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), - "eqv $rA, $rS, $rB", IntSimple, - [(set i64:$rA, (not (xor i64:$rS, i64:$rB)))]>; -def XOR8 : XForm_6<31, 316, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), - "xor $rA, $rS, $rB", IntSimple, - [(set i64:$rA, (xor i64:$rS, i64:$rB))]>; +defm NAND8: XForm_6r<31, 476, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), + "nand", "$rA, $rS, $rB", IntSimple, + [(set i64:$rA, (not (and i64:$rS, i64:$rB)))]>; +defm AND8 : XForm_6r<31, 28, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), + "and", "$rA, $rS, $rB", IntSimple, + [(set i64:$rA, (and i64:$rS, i64:$rB))]>; +defm ANDC8: XForm_6r<31, 60, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), + "andc", "$rA, $rS, $rB", IntSimple, + [(set i64:$rA, (and i64:$rS, (not i64:$rB)))]>; +defm OR8 : XForm_6r<31, 444, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), + "or", "$rA, $rS, $rB", IntSimple, + [(set i64:$rA, (or i64:$rS, i64:$rB))]>; +defm NOR8 : XForm_6r<31, 124, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), + "nor", "$rA, $rS, $rB", IntSimple, + [(set i64:$rA, (not (or i64:$rS, i64:$rB)))]>; +defm ORC8 : XForm_6r<31, 412, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), + "orc", "$rA, $rS, $rB", IntSimple, + [(set i64:$rA, (or i64:$rS, (not i64:$rB)))]>; +defm EQV8 : XForm_6r<31, 284, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), + "eqv", "$rA, $rS, $rB", IntSimple, + [(set i64:$rA, (not (xor i64:$rS, i64:$rB)))]>; +defm XOR8 : XForm_6r<31, 316, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), + "xor", "$rA, $rS, $rB", IntSimple, + [(set i64:$rA, (xor i64:$rS, i64:$rB))]>; // Logical ops with immediate. -def ANDIo8 : DForm_4<28, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), +let Defs = [CR0] in { +def ANDIo8 : DForm_4<28, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2), "andi. $dst, $src1, $src2", IntGeneral, [(set i64:$dst, (and i64:$src1, immZExt16:$src2))]>, isDOT; -def ANDISo8 : DForm_4<29, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), +def ANDISo8 : DForm_4<29, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2), "andis. $dst, $src1, $src2", IntGeneral, [(set i64:$dst, (and i64:$src1, imm16ShiftedZExt:$src2))]>, isDOT; -def ORI8 : DForm_4<24, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), +} +def ORI8 : DForm_4<24, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2), "ori $dst, $src1, $src2", IntSimple, [(set i64:$dst, (or i64:$src1, immZExt16:$src2))]>; -def ORIS8 : DForm_4<25, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), +def ORIS8 : DForm_4<25, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2), "oris $dst, $src1, $src2", IntSimple, [(set i64:$dst, (or i64:$src1, imm16ShiftedZExt:$src2))]>; -def XORI8 : DForm_4<26, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), +def XORI8 : DForm_4<26, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2), "xori $dst, $src1, $src2", IntSimple, [(set i64:$dst, (xor i64:$src1, immZExt16:$src2))]>; -def XORIS8 : DForm_4<27, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), +def XORIS8 : DForm_4<27, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2), "xoris $dst, $src1, $src2", IntSimple, [(set i64:$dst, (xor i64:$src1, imm16ShiftedZExt:$src2))]>; -def ADD8 : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), - "add $rT, $rA, $rB", IntSimple, - [(set i64:$rT, (add i64:$rA, i64:$rB))]>; +defm ADD8 : XOForm_1r<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "add", "$rT, $rA, $rB", IntSimple, + [(set i64:$rT, (add i64:$rA, i64:$rB))]>; // ADD8 has a special form: reg = ADD8(reg, sym@tls) for use by the // initial-exec thread-local storage model. let isCodeGenOnly = 1 in -def ADD8TLS : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, tlsreg:$rB), +def ADD8TLS : XOForm_1<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, tlsreg:$rB), "add $rT, $rA, $rB@tls", IntSimple, [(set i64:$rT, (add i64:$rA, tglobaltlsaddr:$rB))]>; -let Defs = [CARRY] in { -def ADDC8 : XOForm_1<31, 10, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), - "addc $rT, $rA, $rB", IntGeneral, - [(set i64:$rT, (addc i64:$rA, i64:$rB))]>, - PPC970_DGroup_Cracked; -def ADDIC8 : DForm_2<12, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm), +defm ADDC8 : XOForm_1rc<31, 10, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "addc", "$rT, $rA, $rB", IntGeneral, + [(set i64:$rT, (addc i64:$rA, i64:$rB))]>, + PPC970_DGroup_Cracked; +let Defs = [CARRY] in +def ADDIC8 : DForm_2<12, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm), "addic $rD, $rA, $imm", IntGeneral, [(set i64:$rD, (addc i64:$rA, immSExt16:$imm))]>; -} -def ADDI8 : DForm_2<14, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, symbolLo64:$imm), +def ADDI8 : DForm_2<14, (outs g8rc:$rD), (ins g8rc_nox0:$rA, symbolLo64:$imm), "addi $rD, $rA, $imm", IntSimple, [(set i64:$rD, (add i64:$rA, immSExt16:$imm))]>; -def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, symbolHi64:$imm), +def ADDIS8 : DForm_2<15, (outs g8rc:$rD), (ins g8rc_nox0:$rA, symbolHi64:$imm), "addis $rD, $rA, $imm", IntSimple, [(set i64:$rD, (add i64:$rA, imm16ShiftedSExt:$imm))]>; let Defs = [CARRY] in { -def SUBFIC8: DForm_2< 8, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm), +def SUBFIC8: DForm_2< 8, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm), "subfic $rD, $rA, $imm", IntGeneral, [(set i64:$rD, (subc immSExt16:$imm, i64:$rA))]>; -def SUBFC8 : XOForm_1<31, 8, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), - "subfc $rT, $rA, $rB", IntGeneral, - [(set i64:$rT, (subc i64:$rB, i64:$rA))]>, - PPC970_DGroup_Cracked; -} -def SUBF8 : XOForm_1<31, 40, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), - "subf $rT, $rA, $rB", IntGeneral, - [(set i64:$rT, (sub i64:$rB, i64:$rA))]>; -def NEG8 : XOForm_3<31, 104, 0, (outs G8RC:$rT), (ins G8RC:$rA), - "neg $rT, $rA", IntSimple, - [(set i64:$rT, (ineg i64:$rA))]>; -let Uses = [CARRY], Defs = [CARRY] in { -def ADDE8 : XOForm_1<31, 138, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), - "adde $rT, $rA, $rB", IntGeneral, - [(set i64:$rT, (adde i64:$rA, i64:$rB))]>; -def ADDME8 : XOForm_3<31, 234, 0, (outs G8RC:$rT), (ins G8RC:$rA), - "addme $rT, $rA", IntGeneral, - [(set i64:$rT, (adde i64:$rA, -1))]>; -def ADDZE8 : XOForm_3<31, 202, 0, (outs G8RC:$rT), (ins G8RC:$rA), - "addze $rT, $rA", IntGeneral, - [(set i64:$rT, (adde i64:$rA, 0))]>; -def SUBFE8 : XOForm_1<31, 136, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), - "subfe $rT, $rA, $rB", IntGeneral, - [(set i64:$rT, (sube i64:$rB, i64:$rA))]>; -def SUBFME8 : XOForm_3<31, 232, 0, (outs G8RC:$rT), (ins G8RC:$rA), - "subfme $rT, $rA", IntGeneral, - [(set i64:$rT, (sube -1, i64:$rA))]>; -def SUBFZE8 : XOForm_3<31, 200, 0, (outs G8RC:$rT), (ins G8RC:$rA), - "subfze $rT, $rA", IntGeneral, - [(set i64:$rT, (sube 0, i64:$rA))]>; -} - - -def MULHD : XOForm_1<31, 73, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), - "mulhd $rT, $rA, $rB", IntMulHW, - [(set i64:$rT, (mulhs i64:$rA, i64:$rB))]>; -def MULHDU : XOForm_1<31, 9, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), - "mulhdu $rT, $rA, $rB", IntMulHWU, - [(set i64:$rT, (mulhu i64:$rA, i64:$rB))]>; - -def CMPD : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins G8RC:$rA, G8RC:$rB), - "cmpd $crD, $rA, $rB", IntCompare>, isPPC64; -def CMPLD : XForm_16_ext<31, 32, (outs CRRC:$crD), (ins G8RC:$rA, G8RC:$rB), - "cmpld $crD, $rA, $rB", IntCompare>, isPPC64; -def CMPDI : DForm_5_ext<11, (outs CRRC:$crD), (ins G8RC:$rA, s16imm:$imm), - "cmpdi $crD, $rA, $imm", IntCompare>, isPPC64; -def CMPLDI : DForm_6_ext<10, (outs CRRC:$dst), (ins G8RC:$src1, u16imm:$src2), - "cmpldi $dst, $src1, $src2", IntCompare>, isPPC64; - -def SLD : XForm_6<31, 27, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB), - "sld $rA, $rS, $rB", IntRotateD, - [(set i64:$rA, (PPCshl i64:$rS, i32:$rB))]>, isPPC64; -def SRD : XForm_6<31, 539, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB), - "srd $rA, $rS, $rB", IntRotateD, - [(set i64:$rA, (PPCsrl i64:$rS, i32:$rB))]>, isPPC64; -let Defs = [CARRY] in { -def SRAD : XForm_6<31, 794, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB), - "srad $rA, $rS, $rB", IntRotateD, - [(set i64:$rA, (PPCsra i64:$rS, i32:$rB))]>, isPPC64; +defm SUBFC8 : XOForm_1r<31, 8, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "subfc", "$rT, $rA, $rB", IntGeneral, + [(set i64:$rT, (subc i64:$rB, i64:$rA))]>, + PPC970_DGroup_Cracked; +} +defm SUBF8 : XOForm_1r<31, 40, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "subf", "$rT, $rA, $rB", IntGeneral, + [(set i64:$rT, (sub i64:$rB, i64:$rA))]>; +defm NEG8 : XOForm_3r<31, 104, 0, (outs g8rc:$rT), (ins g8rc:$rA), + "neg", "$rT, $rA", IntSimple, + [(set i64:$rT, (ineg i64:$rA))]>; +let Uses = [CARRY] in { +defm ADDE8 : XOForm_1rc<31, 138, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "adde", "$rT, $rA, $rB", IntGeneral, + [(set i64:$rT, (adde i64:$rA, i64:$rB))]>; +defm ADDME8 : XOForm_3rc<31, 234, 0, (outs g8rc:$rT), (ins g8rc:$rA), + "addme", "$rT, $rA", IntGeneral, + [(set i64:$rT, (adde i64:$rA, -1))]>; +defm ADDZE8 : XOForm_3rc<31, 202, 0, (outs g8rc:$rT), (ins g8rc:$rA), + "addze", "$rT, $rA", IntGeneral, + [(set i64:$rT, (adde i64:$rA, 0))]>; +defm SUBFE8 : XOForm_1rc<31, 136, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "subfe", "$rT, $rA, $rB", IntGeneral, + [(set i64:$rT, (sube i64:$rB, i64:$rA))]>; +defm SUBFME8 : XOForm_3rc<31, 232, 0, (outs g8rc:$rT), (ins g8rc:$rA), + "subfme", "$rT, $rA", IntGeneral, + [(set i64:$rT, (sube -1, i64:$rA))]>; +defm SUBFZE8 : XOForm_3rc<31, 200, 0, (outs g8rc:$rT), (ins g8rc:$rA), + "subfze", "$rT, $rA", IntGeneral, + [(set i64:$rT, (sube 0, i64:$rA))]>; } - -def EXTSB8 : XForm_11<31, 954, (outs G8RC:$rA), (ins G8RC:$rS), - "extsb $rA, $rS", IntSimple, - [(set i64:$rA, (sext_inreg i64:$rS, i8))]>; -def EXTSH8 : XForm_11<31, 922, (outs G8RC:$rA), (ins G8RC:$rS), - "extsh $rA, $rS", IntSimple, - [(set i64:$rA, (sext_inreg i64:$rS, i16))]>; - -def EXTSW : XForm_11<31, 986, (outs G8RC:$rA), (ins G8RC:$rS), - "extsw $rA, $rS", IntSimple, - [(set i64:$rA, (sext_inreg i64:$rS, i32))]>, isPPC64; -def EXTSW_32_64 : XForm_11<31, 986, (outs G8RC:$rA), (ins GPRC:$rS), - "extsw $rA, $rS", IntSimple, - [(set i64:$rA, (sext i32:$rS))]>, isPPC64; -let Defs = [CARRY] in { -def SRADI : XSForm_1<31, 413, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH), - "sradi $rA, $rS, $SH", IntRotateDI, - [(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64; + +defm MULHD : XOForm_1r<31, 73, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "mulhd", "$rT, $rA, $rB", IntMulHW, + [(set i64:$rT, (mulhs i64:$rA, i64:$rB))]>; +defm MULHDU : XOForm_1r<31, 9, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "mulhdu", "$rT, $rA, $rB", IntMulHWU, + [(set i64:$rT, (mulhu i64:$rA, i64:$rB))]>; +} +} // Interpretation64Bit + +let isCompare = 1, neverHasSideEffects = 1 in { + def CMPD : XForm_16_ext<31, 0, (outs crrc:$crD), (ins g8rc:$rA, g8rc:$rB), + "cmpd $crD, $rA, $rB", IntCompare>, isPPC64; + def CMPLD : XForm_16_ext<31, 32, (outs crrc:$crD), (ins g8rc:$rA, g8rc:$rB), + "cmpld $crD, $rA, $rB", IntCompare>, isPPC64; + def CMPDI : DForm_5_ext<11, (outs crrc:$crD), (ins g8rc:$rA, s16imm:$imm), + "cmpdi $crD, $rA, $imm", IntCompare>, isPPC64; + def CMPLDI : DForm_6_ext<10, (outs crrc:$dst), (ins g8rc:$src1, u16imm:$src2), + "cmpldi $dst, $src1, $src2", IntCompare>, isPPC64; } -def CNTLZD : XForm_11<31, 58, (outs G8RC:$rA), (ins G8RC:$rS), - "cntlzd $rA, $rS", IntGeneral, - [(set i64:$rA, (ctlz i64:$rS))]>; -def POPCNTD : XForm_11<31, 506, (outs G8RC:$rA), (ins G8RC:$rS), - "popcntd $rA, $rS", IntGeneral, - [(set i64:$rA, (ctpop i64:$rS))]>; + +let neverHasSideEffects = 1 in { +defm SLD : XForm_6r<31, 27, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB), + "sld", "$rA, $rS, $rB", IntRotateD, + [(set i64:$rA, (PPCshl i64:$rS, i32:$rB))]>, isPPC64; +defm SRD : XForm_6r<31, 539, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB), + "srd", "$rA, $rS, $rB", IntRotateD, + [(set i64:$rA, (PPCsrl i64:$rS, i32:$rB))]>, isPPC64; +defm SRAD : XForm_6rc<31, 794, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB), + "srad", "$rA, $rS, $rB", IntRotateD, + [(set i64:$rA, (PPCsra i64:$rS, i32:$rB))]>, isPPC64; + +let Interpretation64Bit = 1 in { +defm EXTSB8 : XForm_11r<31, 954, (outs g8rc:$rA), (ins g8rc:$rS), + "extsb", "$rA, $rS", IntSimple, + [(set i64:$rA, (sext_inreg i64:$rS, i8))]>; +defm EXTSH8 : XForm_11r<31, 922, (outs g8rc:$rA), (ins g8rc:$rS), + "extsh", "$rA, $rS", IntSimple, + [(set i64:$rA, (sext_inreg i64:$rS, i16))]>; +} // Interpretation64Bit + +defm EXTSW : XForm_11r<31, 986, (outs g8rc:$rA), (ins g8rc:$rS), + "extsw", "$rA, $rS", IntSimple, + [(set i64:$rA, (sext_inreg i64:$rS, i32))]>, isPPC64; +let Interpretation64Bit = 1 in +defm EXTSW_32_64 : XForm_11r<31, 986, (outs g8rc:$rA), (ins gprc:$rS), + "extsw", "$rA, $rS", IntSimple, + [(set i64:$rA, (sext i32:$rS))]>, isPPC64; + +defm SRADI : XSForm_1rc<31, 413, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH), + "sradi", "$rA, $rS, $SH", IntRotateDI, + [(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64; +defm CNTLZD : XForm_11r<31, 58, (outs g8rc:$rA), (ins g8rc:$rS), + "cntlzd", "$rA, $rS", IntGeneral, + [(set i64:$rA, (ctlz i64:$rS))]>; +defm POPCNTD : XForm_11r<31, 506, (outs g8rc:$rA), (ins g8rc:$rS), + "popcntd", "$rA, $rS", IntGeneral, + [(set i64:$rA, (ctpop i64:$rS))]>; // popcntw also does a population count on the high 32 bits (storing the // results in the high 32-bits of the output). We'll ignore that here (which is // safe because we never separately use the high part of the 64-bit registers). -def POPCNTW : XForm_11<31, 378, (outs GPRC:$rA), (ins GPRC:$rS), - "popcntw $rA, $rS", IntGeneral, - [(set i32:$rA, (ctpop i32:$rS))]>; - -def DIVD : XOForm_1<31, 489, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), - "divd $rT, $rA, $rB", IntDivD, - [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64, - PPC970_DGroup_First, PPC970_DGroup_Cracked; -def DIVDU : XOForm_1<31, 457, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), - "divdu $rT, $rA, $rB", IntDivD, - [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64, - PPC970_DGroup_First, PPC970_DGroup_Cracked; -def MULLD : XOForm_1<31, 233, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), - "mulld $rT, $rA, $rB", IntMulHD, - [(set i64:$rT, (mul i64:$rA, i64:$rB))]>, isPPC64; - +defm POPCNTW : XForm_11r<31, 378, (outs gprc:$rA), (ins gprc:$rS), + "popcntw", "$rA, $rS", IntGeneral, + [(set i32:$rA, (ctpop i32:$rS))]>; + +defm DIVD : XOForm_1r<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "divd", "$rT, $rA, $rB", IntDivD, + [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64, + PPC970_DGroup_First, PPC970_DGroup_Cracked; +defm DIVDU : XOForm_1r<31, 457, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "divdu", "$rT, $rA, $rB", IntDivD, + [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64, + PPC970_DGroup_First, PPC970_DGroup_Cracked; +defm MULLD : XOForm_1r<31, 233, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "mulld", "$rT, $rA, $rB", IntMulHD, + [(set i64:$rT, (mul i64:$rA, i64:$rB))]>, isPPC64; +} +let neverHasSideEffects = 1 in { let isCommutable = 1 in { -def RLDIMI : MDForm_1<30, 3, - (outs G8RC:$rA), (ins G8RC:$rSi, G8RC:$rS, u6imm:$SH, u6imm:$MB), - "rldimi $rA, $rS, $SH, $MB", IntRotateDI, - []>, isPPC64, RegConstraint<"$rSi = $rA">, - NoEncode<"$rSi">; +defm RLDIMI : MDForm_1r<30, 3, (outs g8rc:$rA), + (ins g8rc:$rSi, g8rc:$rS, u6imm:$SH, u6imm:$MBE), + "rldimi", "$rA, $rS, $SH, $MBE", IntRotateDI, + []>, isPPC64, RegConstraint<"$rSi = $rA">, + NoEncode<"$rSi">; } // Rotate instructions. -def RLDCL : MDForm_1<30, 0, - (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB, u6imm:$MBE), - "rldcl $rA, $rS, $rB, $MBE", IntRotateD, - []>, isPPC64; -def RLDICL : MDForm_1<30, 0, - (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MBE), - "rldicl $rA, $rS, $SH, $MBE", IntRotateDI, - []>, isPPC64; -def RLDICR : MDForm_1<30, 1, - (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MBE), - "rldicr $rA, $rS, $SH, $MBE", IntRotateDI, - []>, isPPC64; - -def RLWINM8 : MForm_2<21, - (outs G8RC:$rA), (ins G8RC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME), - "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral, - []>; - +defm RLDCL : MDSForm_1r<30, 8, + (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB, u6imm:$MBE), + "rldcl", "$rA, $rS, $rB, $MBE", IntRotateD, + []>, isPPC64; +defm RLDICL : MDForm_1r<30, 0, + (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE), + "rldicl", "$rA, $rS, $SH, $MBE", IntRotateDI, + []>, isPPC64; +defm RLDICR : MDForm_1r<30, 1, + (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE), + "rldicr", "$rA, $rS, $SH, $MBE", IntRotateDI, + []>, isPPC64; + +let Interpretation64Bit = 1 in { +defm RLWINM8 : MForm_2r<21, (outs g8rc:$rA), + (ins g8rc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME), + "rlwinm", "$rA, $rS, $SH, $MB, $ME", IntGeneral, + []>; + +let isSelect = 1 in def ISEL8 : AForm_4<31, 15, - (outs G8RC:$rT), (ins G8RC_NOX0:$rA, G8RC:$rB, CRBITRC:$cond), + (outs g8rc:$rT), (ins g8rc_nox0:$rA, g8rc:$rB, crbitrc:$cond), "isel $rT, $rA, $rB, $cond", IntGeneral, []>; +} // Interpretation64Bit +} // neverHasSideEffects = 1 } // End FXU Operations. @@ -529,39 +573,43 @@ def ISEL8 : AForm_4<31, 15, // Sign extending loads. let canFoldAsLoad = 1, PPC970_Unit = 2 in { -def LHA8: DForm_1<42, (outs G8RC:$rD), (ins memri:$src), +let Interpretation64Bit = 1 in +def LHA8: DForm_1<42, (outs g8rc:$rD), (ins memri:$src), "lha $rD, $src", LdStLHA, [(set i64:$rD, (sextloadi16 iaddr:$src))]>, PPC970_DGroup_Cracked; -def LWA : DSForm_1<58, 2, (outs G8RC:$rD), (ins memrix:$src), +def LWA : DSForm_1<58, 2, (outs g8rc:$rD), (ins memrix:$src), "lwa $rD, $src", LdStLWA, [(set i64:$rD, (aligned4sextloadi32 ixaddr:$src))]>, isPPC64, PPC970_DGroup_Cracked; -def LHAX8: XForm_1<31, 343, (outs G8RC:$rD), (ins memrr:$src), +let Interpretation64Bit = 1 in +def LHAX8: XForm_1<31, 343, (outs g8rc:$rD), (ins memrr:$src), "lhax $rD, $src", LdStLHA, [(set i64:$rD, (sextloadi16 xaddr:$src))]>, PPC970_DGroup_Cracked; -def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src), +def LWAX : XForm_1<31, 341, (outs g8rc:$rD), (ins memrr:$src), "lwax $rD, $src", LdStLHA, [(set i64:$rD, (sextloadi32 xaddr:$src))]>, isPPC64, PPC970_DGroup_Cracked; // Update forms. -let mayLoad = 1 in { -def LHAU8 : DForm_1<43, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), +let mayLoad = 1, neverHasSideEffects = 1 in { +let Interpretation64Bit = 1 in +def LHAU8 : DForm_1<43, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lhau $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; // NO LWAU! -def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), +let Interpretation64Bit = 1 in +def LHAUX8 : XForm_1<31, 375, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lhaux $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; -def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), +def LWAUX : XForm_1<31, 373, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lwaux $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.ptrreg = $ea_result">, @@ -569,87 +617,89 @@ def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), } } +let Interpretation64Bit = 1 in { // Zero extending loads. let canFoldAsLoad = 1, PPC970_Unit = 2 in { -def LBZ8 : DForm_1<34, (outs G8RC:$rD), (ins memri:$src), +def LBZ8 : DForm_1<34, (outs g8rc:$rD), (ins memri:$src), "lbz $rD, $src", LdStLoad, [(set i64:$rD, (zextloadi8 iaddr:$src))]>; -def LHZ8 : DForm_1<40, (outs G8RC:$rD), (ins memri:$src), +def LHZ8 : DForm_1<40, (outs g8rc:$rD), (ins memri:$src), "lhz $rD, $src", LdStLoad, [(set i64:$rD, (zextloadi16 iaddr:$src))]>; -def LWZ8 : DForm_1<32, (outs G8RC:$rD), (ins memri:$src), +def LWZ8 : DForm_1<32, (outs g8rc:$rD), (ins memri:$src), "lwz $rD, $src", LdStLoad, [(set i64:$rD, (zextloadi32 iaddr:$src))]>, isPPC64; -def LBZX8 : XForm_1<31, 87, (outs G8RC:$rD), (ins memrr:$src), +def LBZX8 : XForm_1<31, 87, (outs g8rc:$rD), (ins memrr:$src), "lbzx $rD, $src", LdStLoad, [(set i64:$rD, (zextloadi8 xaddr:$src))]>; -def LHZX8 : XForm_1<31, 279, (outs G8RC:$rD), (ins memrr:$src), +def LHZX8 : XForm_1<31, 279, (outs g8rc:$rD), (ins memrr:$src), "lhzx $rD, $src", LdStLoad, [(set i64:$rD, (zextloadi16 xaddr:$src))]>; -def LWZX8 : XForm_1<31, 23, (outs G8RC:$rD), (ins memrr:$src), +def LWZX8 : XForm_1<31, 23, (outs g8rc:$rD), (ins memrr:$src), "lwzx $rD, $src", LdStLoad, [(set i64:$rD, (zextloadi32 xaddr:$src))]>; // Update forms. -let mayLoad = 1 in { -def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), +let mayLoad = 1, neverHasSideEffects = 1 in { +def LBZU8 : DForm_1<35, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lbzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), +def LHZU8 : DForm_1<41, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lhzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), +def LWZU8 : DForm_1<33, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lwzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), +def LBZUX8 : XForm_1<31, 119, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lbzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; -def LHZUX8 : XForm_1<31, 311, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), +def LHZUX8 : XForm_1<31, 311, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lhzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; -def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), +def LWZUX8 : XForm_1<31, 55, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lwzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; } } +} // Interpretation64Bit // Full 8-byte loads. let canFoldAsLoad = 1, PPC970_Unit = 2 in { -def LD : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src), +def LD : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src), "ld $rD, $src", LdStLD, [(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64; // The following three definitions are selected for small code model only. // Otherwise, we need to create two instructions to form a 32-bit offset, // so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select(). -def LDtoc: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), +def LDtoc: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg), "#LDtoc", [(set i64:$rD, (PPCtoc_entry tglobaladdr:$disp, i64:$reg))]>, isPPC64; -def LDtocJTI: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), +def LDtocJTI: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg), "#LDtocJTI", [(set i64:$rD, (PPCtoc_entry tjumptable:$disp, i64:$reg))]>, isPPC64; -def LDtocCPT: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), +def LDtocCPT: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg), "#LDtocCPT", [(set i64:$rD, (PPCtoc_entry tconstpool:$disp, i64:$reg))]>, isPPC64; let hasSideEffects = 1, isCodeGenOnly = 1 in { let RST = 2, DS = 2 in -def LDinto_toc: DSForm_1a<58, 0, (outs), (ins G8RC:$reg), +def LDinto_toc: DSForm_1a<58, 0, (outs), (ins g8rc:$reg), "ld 2, 8($reg)", LdStLD, [(PPCload_toc i64:$reg)]>, isPPC64; @@ -658,25 +708,26 @@ def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins), "ld 2, 40(1)", LdStLD, [(PPCtoc_restore)]>, isPPC64; } -def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src), +def LDX : XForm_1<31, 21, (outs g8rc:$rD), (ins memrr:$src), "ldx $rD, $src", LdStLD, [(set i64:$rD, (load xaddr:$src))]>, isPPC64; -def LDBRX : XForm_1<31, 532, (outs G8RC:$rD), (ins memrr:$src), +def LDBRX : XForm_1<31, 532, (outs g8rc:$rD), (ins memrr:$src), "ldbrx $rD, $src", LdStLoad, [(set i64:$rD, (PPClbrx xoaddr:$src, i64))]>, isPPC64; -let mayLoad = 1 in -def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrix:$addr), +let mayLoad = 1, neverHasSideEffects = 1 in { +def LDU : DSForm_1<58, 1, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrix:$addr), "ldu $rD, $addr", LdStLDU, []>, RegConstraint<"$addr.reg = $ea_result">, isPPC64, NoEncode<"$ea_result">; -def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), +def LDUX : XForm_1<31, 53, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "ldux $rD, $addr", LdStLDU, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">, isPPC64; } +} def : Pat<(PPCload ixaddr:$src), (LD ixaddr:$src)>; @@ -684,108 +735,111 @@ def : Pat<(PPCload xaddr:$src), (LDX xaddr:$src)>; // Support for medium and large code model. -def ADDIStocHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, tocentry:$disp), +def ADDIStocHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp), "#ADDIStocHA", [(set i64:$rD, (PPCaddisTocHA i64:$reg, tglobaladdr:$disp))]>, isPPC64; -def LDtocL: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC_NOX0:$reg), +def LDtocL: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc_nox0:$reg), "#LDtocL", [(set i64:$rD, (PPCldTocL tglobaladdr:$disp, i64:$reg))]>, isPPC64; -def ADDItocL: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, tocentry:$disp), +def ADDItocL: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp), "#ADDItocL", [(set i64:$rD, (PPCaddiTocL i64:$reg, tglobaladdr:$disp))]>, isPPC64; // Support for thread-local storage. -def ADDISgotTprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp), +def ADDISgotTprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolHi64:$disp), "#ADDISgotTprelHA", [(set i64:$rD, (PPCaddisGotTprelHA i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; -def LDgotTprelL: Pseudo<(outs G8RC:$rD), (ins symbolLo64:$disp, G8RC_NOX0:$reg), +def LDgotTprelL: Pseudo<(outs g8rc:$rD), (ins symbolLo64:$disp, g8rc_nox0:$reg), "#LDgotTprelL", [(set i64:$rD, (PPCldGotTprelL tglobaltlsaddr:$disp, i64:$reg))]>, isPPC64; def : Pat<(PPCaddTls i64:$in, tglobaltlsaddr:$g), (ADD8TLS $in, tglobaltlsaddr:$g)>; -def ADDIStlsgdHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp), +def ADDIStlsgdHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolHi64:$disp), "#ADDIStlsgdHA", [(set i64:$rD, (PPCaddisTlsgdHA i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; -def ADDItlsgdL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp), +def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolLo64:$disp), "#ADDItlsgdL", [(set i64:$rD, (PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; -def GETtlsADDR : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tlsgd:$sym), +def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym), "#GETtlsADDR", [(set i64:$rD, (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>, isPPC64; -def ADDIStlsldHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp), +def ADDIStlsldHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolHi64:$disp), "#ADDIStlsldHA", [(set i64:$rD, (PPCaddisTlsldHA i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; -def ADDItlsldL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp), +def ADDItlsldL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolLo64:$disp), "#ADDItlsldL", [(set i64:$rD, (PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; -def GETtlsldADDR : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tlsgd:$sym), +def GETtlsldADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym), "#GETtlsldADDR", [(set i64:$rD, (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>, isPPC64; -def ADDISdtprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp), +def ADDISdtprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolHi64:$disp), "#ADDISdtprelHA", [(set i64:$rD, (PPCaddisDtprelHA i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; -def ADDIdtprelL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp), +def ADDIdtprelL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolLo64:$disp), "#ADDIdtprelL", [(set i64:$rD, (PPCaddiDtprelL i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; let PPC970_Unit = 2 in { +let Interpretation64Bit = 1 in { // Truncating stores. -def STB8 : DForm_1<38, (outs), (ins G8RC:$rS, memri:$src), +def STB8 : DForm_1<38, (outs), (ins g8rc:$rS, memri:$src), "stb $rS, $src", LdStStore, [(truncstorei8 i64:$rS, iaddr:$src)]>; -def STH8 : DForm_1<44, (outs), (ins G8RC:$rS, memri:$src), +def STH8 : DForm_1<44, (outs), (ins g8rc:$rS, memri:$src), "sth $rS, $src", LdStStore, [(truncstorei16 i64:$rS, iaddr:$src)]>; -def STW8 : DForm_1<36, (outs), (ins G8RC:$rS, memri:$src), +def STW8 : DForm_1<36, (outs), (ins g8rc:$rS, memri:$src), "stw $rS, $src", LdStStore, [(truncstorei32 i64:$rS, iaddr:$src)]>; -def STBX8 : XForm_8<31, 215, (outs), (ins G8RC:$rS, memrr:$dst), +def STBX8 : XForm_8<31, 215, (outs), (ins g8rc:$rS, memrr:$dst), "stbx $rS, $dst", LdStStore, [(truncstorei8 i64:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; -def STHX8 : XForm_8<31, 407, (outs), (ins G8RC:$rS, memrr:$dst), +def STHX8 : XForm_8<31, 407, (outs), (ins g8rc:$rS, memrr:$dst), "sthx $rS, $dst", LdStStore, [(truncstorei16 i64:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; -def STWX8 : XForm_8<31, 151, (outs), (ins G8RC:$rS, memrr:$dst), +def STWX8 : XForm_8<31, 151, (outs), (ins g8rc:$rS, memrr:$dst), "stwx $rS, $dst", LdStStore, [(truncstorei32 i64:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; +} // Interpretation64Bit + // Normal 8-byte stores. -def STD : DSForm_1<62, 0, (outs), (ins G8RC:$rS, memrix:$dst), +def STD : DSForm_1<62, 0, (outs), (ins g8rc:$rS, memrix:$dst), "std $rS, $dst", LdStSTD, [(aligned4store i64:$rS, ixaddr:$dst)]>, isPPC64; -def STDX : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst), +def STDX : XForm_8<31, 149, (outs), (ins g8rc:$rS, memrr:$dst), "stdx $rS, $dst", LdStSTD, [(store i64:$rS, xaddr:$dst)]>, isPPC64, PPC970_DGroup_Cracked; -def STDBRX: XForm_8<31, 660, (outs), (ins G8RC:$rS, memrr:$dst), +def STDBRX: XForm_8<31, 660, (outs), (ins g8rc:$rS, memrr:$dst), "stdbrx $rS, $dst", LdStStore, [(PPCstbrx i64:$rS, xoaddr:$dst, i64)]>, isPPC64, PPC970_DGroup_Cracked; @@ -793,33 +847,36 @@ def STDBRX: XForm_8<31, 660, (outs), (ins G8RC:$rS, memrr:$dst), // Stores with Update (pre-inc). let PPC970_Unit = 2, mayStore = 1 in { -def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst), +let Interpretation64Bit = 1 in { +def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst), "stbu $rS, $dst", LdStStoreUpd, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; -def STHU8 : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst), +def STHU8 : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst), "sthu $rS, $dst", LdStStoreUpd, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; -def STWU8 : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst), +def STWU8 : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst), "stwu $rS, $dst", LdStStoreUpd, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; -def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrix:$dst), +def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrix:$dst), "stdu $rS, $dst", LdStSTDU, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">, isPPC64; -def STBUX8: XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst), +def STBUX8: XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst), "stbux $rS, $dst", LdStStoreUpd, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; -def STHUX8: XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst), +def STHUX8: XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst), "sthux $rS, $dst", LdStStoreUpd, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; -def STWUX8: XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst), +def STWUX8: XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst), "stwux $rS, $dst", LdStStoreUpd, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; -def STDUX : XForm_8<31, 181, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst), +} // Interpretation64Bit + +def STDUX : XForm_8<31, 181, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst), "stdux $rS, $dst", LdStSTDU, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked, isPPC64; @@ -852,29 +909,30 @@ def : Pat<(pre_store i64:$rS, iPTR:$ptrreg, iPTR:$ptroff), // -let PPC970_Unit = 3, Uses = [RM] in { // FPU Operations. -def FCFID : XForm_26<63, 846, (outs F8RC:$frD), (ins F8RC:$frB), - "fcfid $frD, $frB", FPGeneral, - [(set f64:$frD, (PPCfcfid f64:$frB))]>, isPPC64; -def FCTIDZ : XForm_26<63, 815, (outs F8RC:$frD), (ins F8RC:$frB), - "fctidz $frD, $frB", FPGeneral, - [(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64; - -def FCFIDU : XForm_26<63, 974, (outs F8RC:$frD), (ins F8RC:$frB), - "fcfidu $frD, $frB", FPGeneral, - [(set f64:$frD, (PPCfcfidu f64:$frB))]>, isPPC64; -def FCFIDS : XForm_26<59, 846, (outs F4RC:$frD), (ins F8RC:$frB), - "fcfids $frD, $frB", FPGeneral, - [(set f32:$frD, (PPCfcfids f64:$frB))]>, isPPC64; -def FCFIDUS : XForm_26<59, 974, (outs F4RC:$frD), (ins F8RC:$frB), - "fcfidus $frD, $frB", FPGeneral, - [(set f32:$frD, (PPCfcfidus f64:$frB))]>, isPPC64; -def FCTIDUZ : XForm_26<63, 943, (outs F8RC:$frD), (ins F8RC:$frB), - "fctiduz $frD, $frB", FPGeneral, - [(set f64:$frD, (PPCfctiduz f64:$frB))]>, isPPC64; -def FCTIWUZ : XForm_26<63, 143, (outs F8RC:$frD), (ins F8RC:$frB), - "fctiwuz $frD, $frB", FPGeneral, - [(set f64:$frD, (PPCfctiwuz f64:$frB))]>, isPPC64; +let PPC970_Unit = 3, neverHasSideEffects = 1, + Uses = [RM] in { // FPU Operations. +defm FCFID : XForm_26r<63, 846, (outs f8rc:$frD), (ins f8rc:$frB), + "fcfid", "$frD, $frB", FPGeneral, + [(set f64:$frD, (PPCfcfid f64:$frB))]>, isPPC64; +defm FCTIDZ : XForm_26r<63, 815, (outs f8rc:$frD), (ins f8rc:$frB), + "fctidz", "$frD, $frB", FPGeneral, + [(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64; + +defm FCFIDU : XForm_26r<63, 974, (outs f8rc:$frD), (ins f8rc:$frB), + "fcfidu", "$frD, $frB", FPGeneral, + [(set f64:$frD, (PPCfcfidu f64:$frB))]>, isPPC64; +defm FCFIDS : XForm_26r<59, 846, (outs f4rc:$frD), (ins f8rc:$frB), + "fcfids", "$frD, $frB", FPGeneral, + [(set f32:$frD, (PPCfcfids f64:$frB))]>, isPPC64; +defm FCFIDUS : XForm_26r<59, 974, (outs f4rc:$frD), (ins f8rc:$frB), + "fcfidus", "$frD, $frB", FPGeneral, + [(set f32:$frD, (PPCfcfidus f64:$frB))]>, isPPC64; +defm FCTIDUZ : XForm_26r<63, 943, (outs f8rc:$frD), (ins f8rc:$frB), + "fctiduz", "$frD, $frB", FPGeneral, + [(set f64:$frD, (PPCfctiduz f64:$frB))]>, isPPC64; +defm FCTIWUZ : XForm_26r<63, 143, (outs f8rc:$frD), (ins f8rc:$frB), + "fctiwuz", "$frD, $frB", FPGeneral, + [(set f64:$frD, (PPCfctiwuz f64:$frB))]>, isPPC64; } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td index a5ba4c8..cc9cf0a 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -163,7 +163,7 @@ def vecspltisw : PatLeaf<(build_vector), [{ // VA1a_Int_Ty - A VAForm_1a intrinsic definition of specific type. class VA1a_Int_Ty<bits<6> xo, string opc, Intrinsic IntID, ValueType Ty> - : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC), + : VAForm_1a<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC), !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP, [(set Ty:$vD, (IntID Ty:$vA, Ty:$vB, Ty:$vC))]>; @@ -171,7 +171,7 @@ class VA1a_Int_Ty<bits<6> xo, string opc, Intrinsic IntID, ValueType Ty> // inputs doesn't match the type of the output. class VA1a_Int_Ty2<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy, ValueType InTy> - : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC), + : VAForm_1a<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC), !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP, [(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB, InTy:$vC))]>; @@ -179,14 +179,14 @@ class VA1a_Int_Ty2<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy, // input types and an output type. class VA1a_Int_Ty3<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy, ValueType In1Ty, ValueType In2Ty> - : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC), + : VAForm_1a<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC), !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP, [(set OutTy:$vD, (IntID In1Ty:$vA, In1Ty:$vB, In2Ty:$vC))]>; // VX1_Int_Ty - A VXForm_1 intrinsic definition of specific type. class VX1_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty> - : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), !strconcat(opc, " $vD, $vA, $vB"), VecFP, [(set Ty:$vD, (IntID Ty:$vA, Ty:$vB))]>; @@ -194,7 +194,7 @@ class VX1_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty> // inputs doesn't match the type of the output. class VX1_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy, ValueType InTy> - : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), !strconcat(opc, " $vD, $vA, $vB"), VecFP, [(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB))]>; @@ -202,13 +202,13 @@ class VX1_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy, // input types and an output type. class VX1_Int_Ty3<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy, ValueType In1Ty, ValueType In2Ty> - : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), !strconcat(opc, " $vD, $vA, $vB"), VecFP, [(set OutTy:$vD, (IntID In1Ty:$vA, In2Ty:$vB))]>; // VX2_Int_SP - A VXForm_2 intrinsic definition of vector single-precision type. class VX2_Int_SP<bits<11> xo, string opc, Intrinsic IntID> - : VXForm_2<xo, (outs VRRC:$vD), (ins VRRC:$vB), + : VXForm_2<xo, (outs vrrc:$vD), (ins vrrc:$vB), !strconcat(opc, " $vD, $vB"), VecFP, [(set v4f32:$vD, (IntID v4f32:$vB))]>; @@ -216,7 +216,7 @@ class VX2_Int_SP<bits<11> xo, string opc, Intrinsic IntID> // inputs doesn't match the type of the output. class VX2_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy, ValueType InTy> - : VXForm_2<xo, (outs VRRC:$vD), (ins VRRC:$vB), + : VXForm_2<xo, (outs vrrc:$vD), (ins vrrc:$vB), !strconcat(opc, " $vD, $vB"), VecFP, [(set OutTy:$vD, (IntID InTy:$vB))]>; @@ -234,93 +234,93 @@ def DSSALL : DSS_Form<822, (outs), (ins u5imm:$ONE, u5imm:$ZERO0,u5imm:$ZERO1,u5imm:$ZERO2), "dssall", LdStLoad /*FIXME*/, []>; def DST : DSS_Form<342, (outs), - (ins u5imm:$ZERO, u5imm:$STRM, GPRC:$rA, GPRC:$rB), + (ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB), "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def DSTT : DSS_Form<342, (outs), - (ins u5imm:$ONE, u5imm:$STRM, GPRC:$rA, GPRC:$rB), + (ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB), "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def DSTST : DSS_Form<374, (outs), - (ins u5imm:$ZERO, u5imm:$STRM, GPRC:$rA, GPRC:$rB), + (ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB), "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def DSTSTT : DSS_Form<374, (outs), - (ins u5imm:$ONE, u5imm:$STRM, GPRC:$rA, GPRC:$rB), + (ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB), "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def DST64 : DSS_Form<342, (outs), - (ins u5imm:$ZERO, u5imm:$STRM, G8RC:$rA, GPRC:$rB), + (ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB), "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def DSTT64 : DSS_Form<342, (outs), - (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB), + (ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB), "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def DSTST64 : DSS_Form<374, (outs), - (ins u5imm:$ZERO, u5imm:$STRM, G8RC:$rA, GPRC:$rB), + (ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB), "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def DSTSTT64 : DSS_Form<374, (outs), - (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB), + (ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB), "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; } -def MFVSCR : VXForm_4<1540, (outs VRRC:$vD), (ins), +def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins), "mfvscr $vD", LdStStore, [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>; -def MTVSCR : VXForm_5<1604, (outs), (ins VRRC:$vB), +def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB), "mtvscr $vB", LdStLoad, [(int_ppc_altivec_mtvscr v4i32:$vB)]>; let canFoldAsLoad = 1, PPC970_Unit = 2 in { // Loads. -def LVEBX: XForm_1<31, 7, (outs VRRC:$vD), (ins memrr:$src), +def LVEBX: XForm_1<31, 7, (outs vrrc:$vD), (ins memrr:$src), "lvebx $vD, $src", LdStLoad, [(set v16i8:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>; -def LVEHX: XForm_1<31, 39, (outs VRRC:$vD), (ins memrr:$src), +def LVEHX: XForm_1<31, 39, (outs vrrc:$vD), (ins memrr:$src), "lvehx $vD, $src", LdStLoad, [(set v8i16:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>; -def LVEWX: XForm_1<31, 71, (outs VRRC:$vD), (ins memrr:$src), +def LVEWX: XForm_1<31, 71, (outs vrrc:$vD), (ins memrr:$src), "lvewx $vD, $src", LdStLoad, [(set v4i32:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>; -def LVX : XForm_1<31, 103, (outs VRRC:$vD), (ins memrr:$src), +def LVX : XForm_1<31, 103, (outs vrrc:$vD), (ins memrr:$src), "lvx $vD, $src", LdStLoad, [(set v4i32:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>; -def LVXL : XForm_1<31, 359, (outs VRRC:$vD), (ins memrr:$src), +def LVXL : XForm_1<31, 359, (outs vrrc:$vD), (ins memrr:$src), "lvxl $vD, $src", LdStLoad, [(set v4i32:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>; } -def LVSL : XForm_1<31, 6, (outs VRRC:$vD), (ins memrr:$src), +def LVSL : XForm_1<31, 6, (outs vrrc:$vD), (ins memrr:$src), "lvsl $vD, $src", LdStLoad, [(set v16i8:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>, PPC970_Unit_LSU; -def LVSR : XForm_1<31, 38, (outs VRRC:$vD), (ins memrr:$src), +def LVSR : XForm_1<31, 38, (outs vrrc:$vD), (ins memrr:$src), "lvsr $vD, $src", LdStLoad, [(set v16i8:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>, PPC970_Unit_LSU; let PPC970_Unit = 2 in { // Stores. -def STVEBX: XForm_8<31, 135, (outs), (ins VRRC:$rS, memrr:$dst), +def STVEBX: XForm_8<31, 135, (outs), (ins vrrc:$rS, memrr:$dst), "stvebx $rS, $dst", LdStStore, [(int_ppc_altivec_stvebx v16i8:$rS, xoaddr:$dst)]>; -def STVEHX: XForm_8<31, 167, (outs), (ins VRRC:$rS, memrr:$dst), +def STVEHX: XForm_8<31, 167, (outs), (ins vrrc:$rS, memrr:$dst), "stvehx $rS, $dst", LdStStore, [(int_ppc_altivec_stvehx v8i16:$rS, xoaddr:$dst)]>; -def STVEWX: XForm_8<31, 199, (outs), (ins VRRC:$rS, memrr:$dst), +def STVEWX: XForm_8<31, 199, (outs), (ins vrrc:$rS, memrr:$dst), "stvewx $rS, $dst", LdStStore, [(int_ppc_altivec_stvewx v4i32:$rS, xoaddr:$dst)]>; -def STVX : XForm_8<31, 231, (outs), (ins VRRC:$rS, memrr:$dst), +def STVX : XForm_8<31, 231, (outs), (ins vrrc:$rS, memrr:$dst), "stvx $rS, $dst", LdStStore, [(int_ppc_altivec_stvx v4i32:$rS, xoaddr:$dst)]>; -def STVXL : XForm_8<31, 487, (outs), (ins VRRC:$rS, memrr:$dst), +def STVXL : XForm_8<31, 487, (outs), (ins vrrc:$rS, memrr:$dst), "stvxl $rS, $dst", LdStStore, [(int_ppc_altivec_stvxl v4i32:$rS, xoaddr:$dst)]>; } let PPC970_Unit = 5 in { // VALU Operations. // VA-Form instructions. 3-input AltiVec ops. -def VMADDFP : VAForm_1<46, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB), +def VMADDFP : VAForm_1<46, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vC, vrrc:$vB), "vmaddfp $vD, $vA, $vC, $vB", VecFP, [(set v4f32:$vD, (fma v4f32:$vA, v4f32:$vC, v4f32:$vB))]>; // FIXME: The fma+fneg pattern won't match because fneg is not legal. -def VNMSUBFP: VAForm_1<47, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB), +def VNMSUBFP: VAForm_1<47, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vC, vrrc:$vB), "vnmsubfp $vD, $vA, $vC, $vB", VecFP, [(set v4f32:$vD, (fneg (fma v4f32:$vA, v4f32:$vC, (fneg v4f32:$vB))))]>; @@ -335,23 +335,23 @@ def VPERM : VA1a_Int_Ty3<43, "vperm", int_ppc_altivec_vperm, def VSEL : VA1a_Int_Ty<42, "vsel", int_ppc_altivec_vsel, v4i32>; // Shuffles. -def VSLDOI : VAForm_2<44, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, u5imm:$SH), +def VSLDOI : VAForm_2<44, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, u5imm:$SH), "vsldoi $vD, $vA, $vB, $SH", VecFP, [(set v16i8:$vD, (vsldoi_shuffle:$SH v16i8:$vA, v16i8:$vB))]>; // VX-Form instructions. AltiVec arithmetic ops. -def VADDFP : VXForm_1<10, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), +def VADDFP : VXForm_1<10, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vaddfp $vD, $vA, $vB", VecFP, [(set v4f32:$vD, (fadd v4f32:$vA, v4f32:$vB))]>; -def VADDUBM : VXForm_1<0, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), +def VADDUBM : VXForm_1<0, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vaddubm $vD, $vA, $vB", VecGeneral, [(set v16i8:$vD, (add v16i8:$vA, v16i8:$vB))]>; -def VADDUHM : VXForm_1<64, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), +def VADDUHM : VXForm_1<64, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vadduhm $vD, $vA, $vB", VecGeneral, [(set v8i16:$vD, (add v8i16:$vA, v8i16:$vB))]>; -def VADDUWM : VXForm_1<128, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), +def VADDUWM : VXForm_1<128, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vadduwm $vD, $vA, $vB", VecGeneral, [(set v4i32:$vD, (add v4i32:$vA, v4i32:$vB))]>; @@ -364,27 +364,27 @@ def VADDUHS : VX1_Int_Ty<576, "vadduhs", int_ppc_altivec_vadduhs, v8i16>; def VADDUWS : VX1_Int_Ty<640, "vadduws", int_ppc_altivec_vadduws, v4i32>; -def VAND : VXForm_1<1028, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), +def VAND : VXForm_1<1028, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vand $vD, $vA, $vB", VecFP, [(set v4i32:$vD, (and v4i32:$vA, v4i32:$vB))]>; -def VANDC : VXForm_1<1092, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), +def VANDC : VXForm_1<1092, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vandc $vD, $vA, $vB", VecFP, [(set v4i32:$vD, (and v4i32:$vA, (vnot_ppc v4i32:$vB)))]>; -def VCFSX : VXForm_1<842, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), +def VCFSX : VXForm_1<842, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), "vcfsx $vD, $vB, $UIMM", VecFP, [(set v4f32:$vD, (int_ppc_altivec_vcfsx v4i32:$vB, imm:$UIMM))]>; -def VCFUX : VXForm_1<778, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), +def VCFUX : VXForm_1<778, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), "vcfux $vD, $vB, $UIMM", VecFP, [(set v4f32:$vD, (int_ppc_altivec_vcfux v4i32:$vB, imm:$UIMM))]>; -def VCTSXS : VXForm_1<970, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), +def VCTSXS : VXForm_1<970, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), "vctsxs $vD, $vB, $UIMM", VecFP, [(set v4i32:$vD, (int_ppc_altivec_vctsxs v4f32:$vB, imm:$UIMM))]>; -def VCTUXS : VXForm_1<906, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), +def VCTUXS : VXForm_1<906, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), "vctuxs $vD, $vB, $UIMM", VecFP, [(set v4i32:$vD, (int_ppc_altivec_vctuxs v4f32:$vB, imm:$UIMM))]>; @@ -393,19 +393,19 @@ def VCTUXS : VXForm_1<906, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), // to integer (fp_to_sint/fp_to_uint) conversions and integer // to floating-point (sint_to_fp/uint_to_fp) conversions. let VA = 0 in { -def VCFSX_0 : VXForm_1<842, (outs VRRC:$vD), (ins VRRC:$vB), +def VCFSX_0 : VXForm_1<842, (outs vrrc:$vD), (ins vrrc:$vB), "vcfsx $vD, $vB, 0", VecFP, [(set v4f32:$vD, (int_ppc_altivec_vcfsx v4i32:$vB, 0))]>; -def VCTUXS_0 : VXForm_1<906, (outs VRRC:$vD), (ins VRRC:$vB), +def VCTUXS_0 : VXForm_1<906, (outs vrrc:$vD), (ins vrrc:$vB), "vctuxs $vD, $vB, 0", VecFP, [(set v4i32:$vD, (int_ppc_altivec_vctuxs v4f32:$vB, 0))]>; -def VCFUX_0 : VXForm_1<778, (outs VRRC:$vD), (ins VRRC:$vB), +def VCFUX_0 : VXForm_1<778, (outs vrrc:$vD), (ins vrrc:$vB), "vcfux $vD, $vB, 0", VecFP, [(set v4f32:$vD, (int_ppc_altivec_vcfux v4i32:$vB, 0))]>; -def VCTSXS_0 : VXForm_1<970, (outs VRRC:$vD), (ins VRRC:$vB), +def VCTSXS_0 : VXForm_1<970, (outs vrrc:$vD), (ins vrrc:$vB), "vctsxs $vD, $vB, 0", VecFP, [(set v4i32:$vD, (int_ppc_altivec_vctsxs v4f32:$vB, 0))]>; @@ -435,22 +435,22 @@ def VMINUB : VX1_Int_Ty< 514, "vminub", int_ppc_altivec_vminub, v16i8>; def VMINUH : VX1_Int_Ty< 578, "vminuh", int_ppc_altivec_vminuh, v8i16>; def VMINUW : VX1_Int_Ty< 642, "vminuw", int_ppc_altivec_vminuw, v4i32>; -def VMRGHB : VXForm_1< 12, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), +def VMRGHB : VXForm_1< 12, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vmrghb $vD, $vA, $vB", VecFP, [(set v16i8:$vD, (vmrghb_shuffle v16i8:$vA, v16i8:$vB))]>; -def VMRGHH : VXForm_1< 76, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), +def VMRGHH : VXForm_1< 76, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vmrghh $vD, $vA, $vB", VecFP, [(set v16i8:$vD, (vmrghh_shuffle v16i8:$vA, v16i8:$vB))]>; -def VMRGHW : VXForm_1<140, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), +def VMRGHW : VXForm_1<140, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vmrghw $vD, $vA, $vB", VecFP, [(set v16i8:$vD, (vmrghw_shuffle v16i8:$vA, v16i8:$vB))]>; -def VMRGLB : VXForm_1<268, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), +def VMRGLB : VXForm_1<268, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vmrglb $vD, $vA, $vB", VecFP, [(set v16i8:$vD, (vmrglb_shuffle v16i8:$vA, v16i8:$vB))]>; -def VMRGLH : VXForm_1<332, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), +def VMRGLH : VXForm_1<332, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vmrglh $vD, $vA, $vB", VecFP, [(set v16i8:$vD, (vmrglh_shuffle v16i8:$vA, v16i8:$vB))]>; -def VMRGLW : VXForm_1<396, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), +def VMRGLW : VXForm_1<396, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vmrglw $vD, $vA, $vB", VecFP, [(set v16i8:$vD, (vmrglw_shuffle v16i8:$vA, v16i8:$vB))]>; @@ -491,18 +491,18 @@ def VRFIP : VX2_Int_SP<650, "vrfip", int_ppc_altivec_vrfip>; def VRFIZ : VX2_Int_SP<586, "vrfiz", int_ppc_altivec_vrfiz>; def VRSQRTEFP : VX2_Int_SP<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>; -def VSUBCUW : VX1_Int_Ty<74, "vsubcuw", int_ppc_altivec_vsubcuw, v4i32>; +def VSUBCUW : VX1_Int_Ty<1408, "vsubcuw", int_ppc_altivec_vsubcuw, v4i32>; -def VSUBFP : VXForm_1<74, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), +def VSUBFP : VXForm_1<74, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vsubfp $vD, $vA, $vB", VecGeneral, [(set v4f32:$vD, (fsub v4f32:$vA, v4f32:$vB))]>; -def VSUBUBM : VXForm_1<1024, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), +def VSUBUBM : VXForm_1<1024, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vsububm $vD, $vA, $vB", VecGeneral, [(set v16i8:$vD, (sub v16i8:$vA, v16i8:$vB))]>; -def VSUBUHM : VXForm_1<1088, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), +def VSUBUHM : VXForm_1<1088, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vsubuhm $vD, $vA, $vB", VecGeneral, [(set v8i16:$vD, (sub v8i16:$vA, v8i16:$vB))]>; -def VSUBUWM : VXForm_1<1152, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), +def VSUBUWM : VXForm_1<1152, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vsubuwm $vD, $vA, $vB", VecGeneral, [(set v4i32:$vD, (sub v4i32:$vA, v4i32:$vB))]>; @@ -516,21 +516,21 @@ def VSUBUWS : VX1_Int_Ty<1664, "vsubuws" , int_ppc_altivec_vsubuws, v4i32>; def VSUMSWS : VX1_Int_Ty<1928, "vsumsws" , int_ppc_altivec_vsumsws, v4i32>; def VSUM2SWS: VX1_Int_Ty<1672, "vsum2sws", int_ppc_altivec_vsum2sws, v4i32>; -def VSUM4SBS: VX1_Int_Ty3<1672, "vsum4sbs", int_ppc_altivec_vsum4sbs, +def VSUM4SBS: VX1_Int_Ty3<1800, "vsum4sbs", int_ppc_altivec_vsum4sbs, v4i32, v16i8, v4i32>; def VSUM4SHS: VX1_Int_Ty3<1608, "vsum4shs", int_ppc_altivec_vsum4shs, v4i32, v8i16, v4i32>; def VSUM4UBS: VX1_Int_Ty3<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs, v4i32, v16i8, v4i32>; -def VNOR : VXForm_1<1284, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), +def VNOR : VXForm_1<1284, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vnor $vD, $vA, $vB", VecFP, [(set v4i32:$vD, (vnot_ppc (or v4i32:$vA, v4i32:$vB)))]>; -def VOR : VXForm_1<1156, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), +def VOR : VXForm_1<1156, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vor $vD, $vA, $vB", VecFP, [(set v4i32:$vD, (or v4i32:$vA, v4i32:$vB))]>; -def VXOR : VXForm_1<1220, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), +def VXOR : VXForm_1<1220, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vxor $vD, $vA, $vB", VecFP, [(set v4i32:$vD, (xor v4i32:$vA, v4i32:$vB))]>; @@ -545,15 +545,15 @@ def VSLB : VX1_Int_Ty< 260, "vslb", int_ppc_altivec_vslb, v16i8>; def VSLH : VX1_Int_Ty< 324, "vslh", int_ppc_altivec_vslh, v8i16>; def VSLW : VX1_Int_Ty< 388, "vslw", int_ppc_altivec_vslw, v4i32>; -def VSPLTB : VXForm_1<524, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), +def VSPLTB : VXForm_1<524, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), "vspltb $vD, $vB, $UIMM", VecPerm, [(set v16i8:$vD, (vspltb_shuffle:$UIMM v16i8:$vB, (undef)))]>; -def VSPLTH : VXForm_1<588, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), +def VSPLTH : VXForm_1<588, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), "vsplth $vD, $vB, $UIMM", VecPerm, [(set v16i8:$vD, (vsplth_shuffle:$UIMM v16i8:$vB, (undef)))]>; -def VSPLTW : VXForm_1<652, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), +def VSPLTW : VXForm_1<652, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), "vspltw $vD, $vB, $UIMM", VecPerm, [(set v16i8:$vD, (vspltw_shuffle:$UIMM v16i8:$vB, (undef)))]>; @@ -569,13 +569,13 @@ def VSRH : VX1_Int_Ty< 580, "vsrh" , int_ppc_altivec_vsrh , v8i16>; def VSRW : VX1_Int_Ty< 644, "vsrw" , int_ppc_altivec_vsrw , v4i32>; -def VSPLTISB : VXForm_3<780, (outs VRRC:$vD), (ins s5imm:$SIMM), +def VSPLTISB : VXForm_3<780, (outs vrrc:$vD), (ins s5imm:$SIMM), "vspltisb $vD, $SIMM", VecPerm, [(set v16i8:$vD, (v16i8 vecspltisb:$SIMM))]>; -def VSPLTISH : VXForm_3<844, (outs VRRC:$vD), (ins s5imm:$SIMM), +def VSPLTISH : VXForm_3<844, (outs vrrc:$vD), (ins s5imm:$SIMM), "vspltish $vD, $SIMM", VecPerm, [(set v8i16:$vD, (v8i16 vecspltish:$SIMM))]>; -def VSPLTISW : VXForm_3<908, (outs VRRC:$vD), (ins s5imm:$SIMM), +def VSPLTISW : VXForm_3<908, (outs vrrc:$vD), (ins s5imm:$SIMM), "vspltisw $vD, $SIMM", VecPerm, [(set v4i32:$vD, (v4i32 vecspltisw:$SIMM))]>; @@ -590,13 +590,13 @@ def VPKSWSS : VX1_Int_Ty2<462, "vpkswss", int_ppc_altivec_vpkswss, v16i8, v4i32>; def VPKSWUS : VX1_Int_Ty2<334, "vpkswus", int_ppc_altivec_vpkswus, v8i16, v4i32>; -def VPKUHUM : VXForm_1<14, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), +def VPKUHUM : VXForm_1<14, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vpkuhum $vD, $vA, $vB", VecFP, [(set v16i8:$vD, (vpkuhum_shuffle v16i8:$vA, v16i8:$vB))]>; def VPKUHUS : VX1_Int_Ty2<142, "vpkuhus", int_ppc_altivec_vpkuhus, v16i8, v8i16>; -def VPKUWUM : VXForm_1<78, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), +def VPKUWUM : VXForm_1<78, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vpkuwum $vD, $vA, $vB", VecFP, [(set v16i8:$vD, (vpkuwum_shuffle v16i8:$vA, v16i8:$vB))]>; @@ -621,10 +621,10 @@ def VUPKLSH : VX2_Int_Ty2<718, "vupklsh", int_ppc_altivec_vupklsh, // Altivec Comparisons. class VCMP<bits<10> xo, string asmstr, ValueType Ty> - : VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare, + : VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),asmstr,VecFPCompare, [(set Ty:$vD, (Ty (PPCvcmp Ty:$vA, Ty:$vB, xo)))]>; class VCMPo<bits<10> xo, string asmstr, ValueType Ty> - : VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare, + : VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),asmstr,VecFPCompare, [(set Ty:$vD, (Ty (PPCvcmp_o Ty:$vA, Ty:$vB, xo)))]> { let Defs = [CR6]; let RC = 1; @@ -665,11 +665,11 @@ def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>; def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>; let isCodeGenOnly = 1 in -def V_SET0 : VXForm_setzero<1220, (outs VRRC:$vD), (ins), +def V_SET0 : VXForm_setzero<1220, (outs vrrc:$vD), (ins), "vxor $vD, $vD, $vD", VecFP, [(set v4i32:$vD, (v4i32 immAllZerosV))]>; let IMM=-1 in { -def V_SETALLONES : VXForm_3<908, (outs VRRC:$vD), (ins), +def V_SETALLONES : VXForm_3<908, (outs vrrc:$vD), (ins), "vspltisw $vD, -1", VecFP, [(set v4i32:$vD, (v4i32 immAllOnesV))]>; } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td index 400b7e3..b6f4e85 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td @@ -35,6 +35,15 @@ class I<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin> let TSFlags{1} = PPC970_Single; let TSFlags{2} = PPC970_Cracked; let TSFlags{5-3} = PPC970_Unit; + + // Fields used for relation models. + string BaseName = ""; + + // For cases where multiple instruction definitions really represent the + // same underlying instruction but with one definition for 64-bit arguments + // and one for 32-bit arguments, this bit breaks the degeneracy between + // the two forms and allows TableGen to generate mapping tables. + bit Interpretation64Bit = 0; } class PPC970_DGroup_First { bits<1> PPC970_First = 1; } @@ -80,6 +89,10 @@ class I2<bits<6> opcode1, bits<6> opcode2, dag OOL, dag IOL, string asmstr, let TSFlags{1} = PPC970_Single; let TSFlags{2} = PPC970_Cracked; let TSFlags{5-3} = PPC970_Unit; + + // Fields used for relation models. + string BaseName = ""; + bit Interpretation64Bit = 0; } // 1.7.1 I-Form @@ -177,7 +190,12 @@ class DForm_1a<bits<6> opcode, dag OOL, dag IOL, string asmstr, class DForm_2<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> - : DForm_base<opcode, OOL, IOL, asmstr, itin, pattern>; + : DForm_base<opcode, OOL, IOL, asmstr, itin, pattern> { + + // Even though ADDICo does not really have an RC bit, provide + // the declaration of one here so that isDOT has something to set. + bit RC = 0; +} class DForm_2_r0<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> @@ -347,6 +365,12 @@ class XForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern>; +class XForm_1a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> { + let RST = 0; +} + class XForm_6<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> { @@ -565,9 +589,9 @@ class XLForm_2_br<bits<6> opcode, bits<10> xo, bit lk, bits<7> BIBO; // 2 bits of BI and 5 bits of BO. bits<3> CR; - let BO = BIBO{2-6}; - let BI{0-1} = BIBO{0-1}; - let BI{2-4} = CR; + let BO = BIBO{4-0}; + let BI{0-1} = BIBO{5-6}; + let BI{2-4} = CR{0-2}; let BH = 0; } @@ -837,6 +861,25 @@ class MDForm_1<bits<6> opcode, bits<3> xo, dag OOL, dag IOL, string asmstr, let Inst{31} = RC; } +class MDSForm_1<bits<6> opcode, bits<4> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> RA; + bits<5> RS; + bits<5> RB; + bits<6> MBE; + + let Pattern = pattern; + + bit RC = 0; // set by isDOT + + let Inst{6-10} = RS; + let Inst{11-15} = RA; + let Inst{16-20} = RB; + let Inst{21-26} = MBE{4,3,2,1,0,5}; + let Inst{27-30} = xo; + let Inst{31} = RC; +} // E-1 VA-Form diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 69c54ed..1fb17eb 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -18,8 +18,10 @@ #include "PPCInstrBuilder.h" #include "PPCMachineFunctionInfo.h" #include "PPCTargetMachine.h" +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -30,6 +32,7 @@ #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" +#define GET_INSTRMAP_INFO #define GET_INSTRINFO_CTOR #include "PPCGenInstrInfo.inc" @@ -39,6 +42,9 @@ static cl:: opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden, cl::desc("Disable analysis for CTR loops")); +static cl::opt<bool> DisableCmpOpt("disable-ppc-cmp-opt", +cl::desc("Disable compare instruction optimization"), cl::Hidden); + PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm) : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP), TM(tm), RI(*TM.getSubtargetImpl(), *this) {} @@ -147,7 +153,8 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { MachineFunction &MF = *MI->getParent()->getParent(); // Normal instructions can be commuted the obvious way. - if (MI->getOpcode() != PPC::RLWIMI) + if (MI->getOpcode() != PPC::RLWIMI && + MI->getOpcode() != PPC::RLWIMIo) return TargetInstrInfo::commuteInstruction(MI, NewMI); // Cannot commute if it has a non-zero rotate count. @@ -417,6 +424,105 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, return 2; } +// Select analysis. +bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB, + const SmallVectorImpl<MachineOperand> &Cond, + unsigned TrueReg, unsigned FalseReg, + int &CondCycles, int &TrueCycles, int &FalseCycles) const { + if (!TM.getSubtargetImpl()->hasISEL()) + return false; + + if (Cond.size() != 2) + return false; + + // If this is really a bdnz-like condition, then it cannot be turned into a + // select. + if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8) + return false; + + // Check register classes. + const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + const TargetRegisterClass *RC = + RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg)); + if (!RC) + return false; + + // isel is for regular integer GPRs only. + if (!PPC::GPRCRegClass.hasSubClassEq(RC) && + !PPC::G8RCRegClass.hasSubClassEq(RC)) + return false; + + // FIXME: These numbers are for the A2, how well they work for other cores is + // an open question. On the A2, the isel instruction has a 2-cycle latency + // but single-cycle throughput. These numbers are used in combination with + // the MispredictPenalty setting from the active SchedMachineModel. + CondCycles = 1; + TrueCycles = 1; + FalseCycles = 1; + + return true; +} + +void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc dl, + unsigned DestReg, + const SmallVectorImpl<MachineOperand> &Cond, + unsigned TrueReg, unsigned FalseReg) const { + assert(Cond.size() == 2 && + "PPC branch conditions have two components!"); + + assert(TM.getSubtargetImpl()->hasISEL() && + "Cannot insert select on target without ISEL support"); + + // Get the register classes. + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + const TargetRegisterClass *RC = + RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg)); + assert(RC && "TrueReg and FalseReg must have overlapping register classes"); + assert((PPC::GPRCRegClass.hasSubClassEq(RC) || + PPC::G8RCRegClass.hasSubClassEq(RC)) && + "isel is for regular integer GPRs only"); + + unsigned OpCode = + PPC::GPRCRegClass.hasSubClassEq(RC) ? PPC::ISEL : PPC::ISEL8; + unsigned SelectPred = Cond[0].getImm(); + + unsigned SubIdx; + bool SwapOps; + switch (SelectPred) { + default: llvm_unreachable("invalid predicate for isel"); + case PPC::PRED_EQ: SubIdx = PPC::sub_eq; SwapOps = false; break; + case PPC::PRED_NE: SubIdx = PPC::sub_eq; SwapOps = true; break; + case PPC::PRED_LT: SubIdx = PPC::sub_lt; SwapOps = false; break; + case PPC::PRED_GE: SubIdx = PPC::sub_lt; SwapOps = true; break; + case PPC::PRED_GT: SubIdx = PPC::sub_gt; SwapOps = false; break; + case PPC::PRED_LE: SubIdx = PPC::sub_gt; SwapOps = true; break; + case PPC::PRED_UN: SubIdx = PPC::sub_un; SwapOps = false; break; + case PPC::PRED_NU: SubIdx = PPC::sub_un; SwapOps = true; break; + } + + unsigned FirstReg = SwapOps ? FalseReg : TrueReg, + SecondReg = SwapOps ? TrueReg : FalseReg; + + // The first input register of isel cannot be r0. If it is a member + // of a register class that can be r0, then copy it first (the + // register allocator should eliminate the copy). + if (MRI.getRegClass(FirstReg)->contains(PPC::R0) || + MRI.getRegClass(FirstReg)->contains(PPC::X0)) { + const TargetRegisterClass *FirstRC = + MRI.getRegClass(FirstReg)->contains(PPC::X0) ? + &PPC::G8RC_NOX0RegClass : &PPC::GPRC_NOR0RegClass; + unsigned OldFirstReg = FirstReg; + FirstReg = MRI.createVirtualRegister(FirstRC); + BuildMI(MBB, MI, dl, get(TargetOpcode::COPY), FirstReg) + .addReg(OldFirstReg); + } + + BuildMI(MBB, MI, dl, get(OpCode), DestReg) + .addReg(FirstReg).addReg(SecondReg) + .addReg(Cond[1].getReg(), 0, SubIdx); +} + void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, @@ -707,6 +813,555 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { return false; } +bool PPCInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, + unsigned Reg, MachineRegisterInfo *MRI) const { + // For some instructions, it is legal to fold ZERO into the RA register field. + // A zero immediate should always be loaded with a single li. + unsigned DefOpc = DefMI->getOpcode(); + if (DefOpc != PPC::LI && DefOpc != PPC::LI8) + return false; + if (!DefMI->getOperand(1).isImm()) + return false; + if (DefMI->getOperand(1).getImm() != 0) + return false; + + // Note that we cannot here invert the arguments of an isel in order to fold + // a ZERO into what is presented as the second argument. All we have here + // is the condition bit, and that might come from a CR-logical bit operation. + + const MCInstrDesc &UseMCID = UseMI->getDesc(); + + // Only fold into real machine instructions. + if (UseMCID.isPseudo()) + return false; + + unsigned UseIdx; + for (UseIdx = 0; UseIdx < UseMI->getNumOperands(); ++UseIdx) + if (UseMI->getOperand(UseIdx).isReg() && + UseMI->getOperand(UseIdx).getReg() == Reg) + break; + + assert(UseIdx < UseMI->getNumOperands() && "Cannot find Reg in UseMI"); + assert(UseIdx < UseMCID.getNumOperands() && "No operand description for Reg"); + + const MCOperandInfo *UseInfo = &UseMCID.OpInfo[UseIdx]; + + // We can fold the zero if this register requires a GPRC_NOR0/G8RC_NOX0 + // register (which might also be specified as a pointer class kind). + if (UseInfo->isLookupPtrRegClass()) { + if (UseInfo->RegClass /* Kind */ != 1) + return false; + } else { + if (UseInfo->RegClass != PPC::GPRC_NOR0RegClassID && + UseInfo->RegClass != PPC::G8RC_NOX0RegClassID) + return false; + } + + // Make sure this is not tied to an output register (or otherwise + // constrained). This is true for ST?UX registers, for example, which + // are tied to their output registers. + if (UseInfo->Constraints != 0) + return false; + + unsigned ZeroReg; + if (UseInfo->isLookupPtrRegClass()) { + bool isPPC64 = TM.getSubtargetImpl()->isPPC64(); + ZeroReg = isPPC64 ? PPC::ZERO8 : PPC::ZERO; + } else { + ZeroReg = UseInfo->RegClass == PPC::G8RC_NOX0RegClassID ? + PPC::ZERO8 : PPC::ZERO; + } + + bool DeleteDef = MRI->hasOneNonDBGUse(Reg); + UseMI->getOperand(UseIdx).setReg(ZeroReg); + + if (DeleteDef) + DefMI->eraseFromParent(); + + return true; +} + +static bool MBBDefinesCTR(MachineBasicBlock &MBB) { + for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end(); + I != IE; ++I) + if (I->definesRegister(PPC::CTR) || I->definesRegister(PPC::CTR8)) + return true; + return false; +} + +// We should make sure that, if we're going to predicate both sides of a +// condition (a diamond), that both sides don't define the counter register. We +// can predicate counter-decrement-based branches, but while that predicates +// the branching, it does not predicate the counter decrement. If we tried to +// merge the triangle into one predicated block, we'd decrement the counter +// twice. +bool PPCInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, + unsigned NumT, unsigned ExtraT, + MachineBasicBlock &FMBB, + unsigned NumF, unsigned ExtraF, + const BranchProbability &Probability) const { + return !(MBBDefinesCTR(TMBB) && MBBDefinesCTR(FMBB)); +} + + +bool PPCInstrInfo::isPredicated(const MachineInstr *MI) const { + // The predicated branches are identified by their type, not really by the + // explicit presence of a predicate. Furthermore, some of them can be + // predicated more than once. Because if conversion won't try to predicate + // any instruction which already claims to be predicated (by returning true + // here), always return false. In doing so, we let isPredicable() be the + // final word on whether not the instruction can be (further) predicated. + + return false; +} + +bool PPCInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { + if (!MI->isTerminator()) + return false; + + // Conditional branch is a special case. + if (MI->isBranch() && !MI->isBarrier()) + return true; + + return !isPredicated(MI); +} + +bool PPCInstrInfo::PredicateInstruction( + MachineInstr *MI, + const SmallVectorImpl<MachineOperand> &Pred) const { + unsigned OpC = MI->getOpcode(); + if (OpC == PPC::BLR) { + if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) { + bool isPPC64 = TM.getSubtargetImpl()->isPPC64(); + MI->setDesc(get(Pred[0].getImm() ? + (isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR) : + (isPPC64 ? PPC::BDZLR8 : PPC::BDZLR))); + } else { + MI->setDesc(get(PPC::BCLR)); + MachineInstrBuilder(*MI->getParent()->getParent(), MI) + .addImm(Pred[0].getImm()) + .addReg(Pred[1].getReg()); + } + + return true; + } else if (OpC == PPC::B) { + if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) { + bool isPPC64 = TM.getSubtargetImpl()->isPPC64(); + MI->setDesc(get(Pred[0].getImm() ? + (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) : + (isPPC64 ? PPC::BDZ8 : PPC::BDZ))); + } else { + MachineBasicBlock *MBB = MI->getOperand(0).getMBB(); + MI->RemoveOperand(0); + + MI->setDesc(get(PPC::BCC)); + MachineInstrBuilder(*MI->getParent()->getParent(), MI) + .addImm(Pred[0].getImm()) + .addReg(Pred[1].getReg()) + .addMBB(MBB); + } + + return true; + } else if (OpC == PPC::BCTR || OpC == PPC::BCTR8 || + OpC == PPC::BCTRL || OpC == PPC::BCTRL8) { + if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) + llvm_unreachable("Cannot predicate bctr[l] on the ctr register"); + + bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8; + bool isPPC64 = TM.getSubtargetImpl()->isPPC64(); + MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8) : + (setLR ? PPC::BCCTRL : PPC::BCCTR))); + MachineInstrBuilder(*MI->getParent()->getParent(), MI) + .addImm(Pred[0].getImm()) + .addReg(Pred[1].getReg()); + return true; + } + + return false; +} + +bool PPCInstrInfo::SubsumesPredicate( + const SmallVectorImpl<MachineOperand> &Pred1, + const SmallVectorImpl<MachineOperand> &Pred2) const { + assert(Pred1.size() == 2 && "Invalid PPC first predicate"); + assert(Pred2.size() == 2 && "Invalid PPC second predicate"); + + if (Pred1[1].getReg() == PPC::CTR8 || Pred1[1].getReg() == PPC::CTR) + return false; + if (Pred2[1].getReg() == PPC::CTR8 || Pred2[1].getReg() == PPC::CTR) + return false; + + PPC::Predicate P1 = (PPC::Predicate) Pred1[0].getImm(); + PPC::Predicate P2 = (PPC::Predicate) Pred2[0].getImm(); + + if (P1 == P2) + return true; + + // Does P1 subsume P2, e.g. GE subsumes GT. + if (P1 == PPC::PRED_LE && + (P2 == PPC::PRED_LT || P2 == PPC::PRED_EQ)) + return true; + if (P1 == PPC::PRED_GE && + (P2 == PPC::PRED_GT || P2 == PPC::PRED_EQ)) + return true; + + return false; +} + +bool PPCInstrInfo::DefinesPredicate(MachineInstr *MI, + std::vector<MachineOperand> &Pred) const { + // Note: At the present time, the contents of Pred from this function is + // unused by IfConversion. This implementation follows ARM by pushing the + // CR-defining operand. Because the 'DZ' and 'DNZ' count as types of + // predicate, instructions defining CTR or CTR8 are also included as + // predicate-defining instructions. + + const TargetRegisterClass *RCs[] = + { &PPC::CRRCRegClass, &PPC::CRBITRCRegClass, + &PPC::CTRRCRegClass, &PPC::CTRRC8RegClass }; + + bool Found = false; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + for (unsigned c = 0; c < array_lengthof(RCs) && !Found; ++c) { + const TargetRegisterClass *RC = RCs[c]; + if (MO.isReg()) { + if (MO.isDef() && RC->contains(MO.getReg())) { + Pred.push_back(MO); + Found = true; + } + } else if (MO.isRegMask()) { + for (TargetRegisterClass::iterator I = RC->begin(), + IE = RC->end(); I != IE; ++I) + if (MO.clobbersPhysReg(*I)) { + Pred.push_back(MO); + Found = true; + } + } + } + } + + return Found; +} + +bool PPCInstrInfo::isPredicable(MachineInstr *MI) const { + unsigned OpC = MI->getOpcode(); + switch (OpC) { + default: + return false; + case PPC::B: + case PPC::BLR: + case PPC::BCTR: + case PPC::BCTR8: + case PPC::BCTRL: + case PPC::BCTRL8: + return true; + } +} + +bool PPCInstrInfo::analyzeCompare(const MachineInstr *MI, + unsigned &SrcReg, unsigned &SrcReg2, + int &Mask, int &Value) const { + unsigned Opc = MI->getOpcode(); + + switch (Opc) { + default: return false; + case PPC::CMPWI: + case PPC::CMPLWI: + case PPC::CMPDI: + case PPC::CMPLDI: + SrcReg = MI->getOperand(1).getReg(); + SrcReg2 = 0; + Value = MI->getOperand(2).getImm(); + Mask = 0xFFFF; + return true; + case PPC::CMPW: + case PPC::CMPLW: + case PPC::CMPD: + case PPC::CMPLD: + case PPC::FCMPUS: + case PPC::FCMPUD: + SrcReg = MI->getOperand(1).getReg(); + SrcReg2 = MI->getOperand(2).getReg(); + return true; + } +} + +bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr, + unsigned SrcReg, unsigned SrcReg2, + int Mask, int Value, + const MachineRegisterInfo *MRI) const { + if (DisableCmpOpt) + return false; + + int OpC = CmpInstr->getOpcode(); + unsigned CRReg = CmpInstr->getOperand(0).getReg(); + + // FP record forms set CR1 based on the execption status bits, not a + // comparison with zero. + if (OpC == PPC::FCMPUS || OpC == PPC::FCMPUD) + return false; + + // The record forms set the condition register based on a signed comparison + // with zero (so says the ISA manual). This is not as straightforward as it + // seems, however, because this is always a 64-bit comparison on PPC64, even + // for instructions that are 32-bit in nature (like slw for example). + // So, on PPC32, for unsigned comparisons, we can use the record forms only + // for equality checks (as those don't depend on the sign). On PPC64, + // we are restricted to equality for unsigned 64-bit comparisons and for + // signed 32-bit comparisons the applicability is more restricted. + bool isPPC64 = TM.getSubtargetImpl()->isPPC64(); + bool is32BitSignedCompare = OpC == PPC::CMPWI || OpC == PPC::CMPW; + bool is32BitUnsignedCompare = OpC == PPC::CMPLWI || OpC == PPC::CMPLW; + bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD; + + // Get the unique definition of SrcReg. + MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); + if (!MI) return false; + int MIOpC = MI->getOpcode(); + + bool equalityOnly = false; + bool noSub = false; + if (isPPC64) { + if (is32BitSignedCompare) { + // We can perform this optimization only if MI is sign-extending. + if (MIOpC == PPC::SRAW || MIOpC == PPC::SRAWo || + MIOpC == PPC::SRAWI || MIOpC == PPC::SRAWIo || + MIOpC == PPC::EXTSB || MIOpC == PPC::EXTSBo || + MIOpC == PPC::EXTSH || MIOpC == PPC::EXTSHo || + MIOpC == PPC::EXTSW || MIOpC == PPC::EXTSWo) { + noSub = true; + } else + return false; + } else if (is32BitUnsignedCompare) { + // We can perform this optimization, equality only, if MI is + // zero-extending. + if (MIOpC == PPC::CNTLZW || MIOpC == PPC::CNTLZWo || + MIOpC == PPC::SLW || MIOpC == PPC::SLWo || + MIOpC == PPC::SRW || MIOpC == PPC::SRWo) { + noSub = true; + equalityOnly = true; + } else + return false; + } else + equalityOnly = is64BitUnsignedCompare; + } else + equalityOnly = is32BitUnsignedCompare; + + if (equalityOnly) { + // We need to check the uses of the condition register in order to reject + // non-equality comparisons. + for (MachineRegisterInfo::use_iterator I = MRI->use_begin(CRReg), + IE = MRI->use_end(); I != IE; ++I) { + MachineInstr *UseMI = &*I; + if (UseMI->getOpcode() == PPC::BCC) { + unsigned Pred = UseMI->getOperand(0).getImm(); + if (Pred == PPC::PRED_EQ || Pred == PPC::PRED_NE) + continue; + + return false; + } else if (UseMI->getOpcode() == PPC::ISEL || + UseMI->getOpcode() == PPC::ISEL8) { + unsigned SubIdx = UseMI->getOperand(3).getSubReg(); + if (SubIdx == PPC::sub_eq) + continue; + + return false; + } else + return false; + } + } + + // Get ready to iterate backward from CmpInstr. + MachineBasicBlock::iterator I = CmpInstr, E = MI, + B = CmpInstr->getParent()->begin(); + + // Scan forward to find the first use of the compare. + for (MachineBasicBlock::iterator EL = CmpInstr->getParent()->end(); + I != EL; ++I) { + bool FoundUse = false; + for (MachineRegisterInfo::use_iterator J = MRI->use_begin(CRReg), + JE = MRI->use_end(); J != JE; ++J) + if (&*J == &*I) { + FoundUse = true; + break; + } + + if (FoundUse) + break; + } + + // Early exit if we're at the beginning of the BB. + if (I == B) return false; + + // There are two possible candidates which can be changed to set CR[01]. + // One is MI, the other is a SUB instruction. + // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1). + MachineInstr *Sub = NULL; + if (SrcReg2 != 0) + // MI is not a candidate for CMPrr. + MI = NULL; + // FIXME: Conservatively refuse to convert an instruction which isn't in the + // same BB as the comparison. This is to allow the check below to avoid calls + // (and other explicit clobbers); instead we should really check for these + // more explicitly (in at least a few predecessors). + else if (MI->getParent() != CmpInstr->getParent() || Value != 0) { + // PPC does not have a record-form SUBri. + return false; + } + + // Search for Sub. + const TargetRegisterInfo *TRI = &getRegisterInfo(); + --I; + for (; I != E && !noSub; --I) { + const MachineInstr &Instr = *I; + unsigned IOpC = Instr.getOpcode(); + + if (&*I != CmpInstr && ( + Instr.modifiesRegister(PPC::CR0, TRI) || + Instr.readsRegister(PPC::CR0, TRI))) + // This instruction modifies or uses the record condition register after + // the one we want to change. While we could do this transformation, it + // would likely not be profitable. This transformation removes one + // instruction, and so even forcing RA to generate one move probably + // makes it unprofitable. + return false; + + // Check whether CmpInstr can be made redundant by the current instruction. + if ((OpC == PPC::CMPW || OpC == PPC::CMPLW || + OpC == PPC::CMPD || OpC == PPC::CMPLD) && + (IOpC == PPC::SUBF || IOpC == PPC::SUBF8) && + ((Instr.getOperand(1).getReg() == SrcReg && + Instr.getOperand(2).getReg() == SrcReg2) || + (Instr.getOperand(1).getReg() == SrcReg2 && + Instr.getOperand(2).getReg() == SrcReg))) { + Sub = &*I; + break; + } + + if (I == B) + // The 'and' is below the comparison instruction. + return false; + } + + // Return false if no candidates exist. + if (!MI && !Sub) + return false; + + // The single candidate is called MI. + if (!MI) MI = Sub; + + int NewOpC = -1; + MIOpC = MI->getOpcode(); + if (MIOpC == PPC::ANDIo || MIOpC == PPC::ANDIo8) + NewOpC = MIOpC; + else { + NewOpC = PPC::getRecordFormOpcode(MIOpC); + if (NewOpC == -1 && PPC::getNonRecordFormOpcode(MIOpC) != -1) + NewOpC = MIOpC; + } + + // FIXME: On the non-embedded POWER architectures, only some of the record + // forms are fast, and we should use only the fast ones. + + // The defining instruction has a record form (or is already a record + // form). It is possible, however, that we'll need to reverse the condition + // code of the users. + if (NewOpC == -1) + return false; + + SmallVector<std::pair<MachineOperand*, PPC::Predicate>, 4> PredsToUpdate; + SmallVector<std::pair<MachineOperand*, unsigned>, 4> SubRegsToUpdate; + + // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based on CMP + // needs to be updated to be based on SUB. Push the condition code + // operands to OperandsToUpdate. If it is safe to remove CmpInstr, the + // condition code of these operands will be modified. + bool ShouldSwap = false; + if (Sub) { + ShouldSwap = SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && + Sub->getOperand(2).getReg() == SrcReg; + + // The operands to subf are the opposite of sub, so only in the fixed-point + // case, invert the order. + ShouldSwap = !ShouldSwap; + } + + if (ShouldSwap) + for (MachineRegisterInfo::use_iterator I = MRI->use_begin(CRReg), + IE = MRI->use_end(); I != IE; ++I) { + MachineInstr *UseMI = &*I; + if (UseMI->getOpcode() == PPC::BCC) { + PPC::Predicate Pred = (PPC::Predicate) UseMI->getOperand(0).getImm(); + assert((!equalityOnly || + Pred == PPC::PRED_EQ || Pred == PPC::PRED_NE) && + "Invalid predicate for equality-only optimization"); + PredsToUpdate.push_back(std::make_pair(&((*I).getOperand(0)), + PPC::getSwappedPredicate(Pred))); + } else if (UseMI->getOpcode() == PPC::ISEL || + UseMI->getOpcode() == PPC::ISEL8) { + unsigned NewSubReg = UseMI->getOperand(3).getSubReg(); + assert((!equalityOnly || NewSubReg == PPC::sub_eq) && + "Invalid CR bit for equality-only optimization"); + + if (NewSubReg == PPC::sub_lt) + NewSubReg = PPC::sub_gt; + else if (NewSubReg == PPC::sub_gt) + NewSubReg = PPC::sub_lt; + + SubRegsToUpdate.push_back(std::make_pair(&((*I).getOperand(3)), + NewSubReg)); + } else // We need to abort on a user we don't understand. + return false; + } + + // Create a new virtual register to hold the value of the CR set by the + // record-form instruction. If the instruction was not previously in + // record form, then set the kill flag on the CR. + CmpInstr->eraseFromParent(); + + MachineBasicBlock::iterator MII = MI; + BuildMI(*MI->getParent(), llvm::next(MII), MI->getDebugLoc(), + get(TargetOpcode::COPY), CRReg) + .addReg(PPC::CR0, MIOpC != NewOpC ? RegState::Kill : 0); + + if (MIOpC != NewOpC) { + // We need to be careful here: we're replacing one instruction with + // another, and we need to make sure that we get all of the right + // implicit uses and defs. On the other hand, the caller may be holding + // an iterator to this instruction, and so we can't delete it (this is + // specifically the case if this is the instruction directly after the + // compare). + + const MCInstrDesc &NewDesc = get(NewOpC); + MI->setDesc(NewDesc); + + if (NewDesc.ImplicitDefs) + for (const uint16_t *ImpDefs = NewDesc.getImplicitDefs(); + *ImpDefs; ++ImpDefs) + if (!MI->definesRegister(*ImpDefs)) + MI->addOperand(*MI->getParent()->getParent(), + MachineOperand::CreateReg(*ImpDefs, true, true)); + if (NewDesc.ImplicitUses) + for (const uint16_t *ImpUses = NewDesc.getImplicitUses(); + *ImpUses; ++ImpUses) + if (!MI->readsRegister(*ImpUses)) + MI->addOperand(*MI->getParent()->getParent(), + MachineOperand::CreateReg(*ImpUses, false, true)); + } + + // Modify the condition code of operands in OperandsToUpdate. + // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to + // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc. + for (unsigned i = 0, e = PredsToUpdate.size(); i < e; i++) + PredsToUpdate[i].first->setImm(PredsToUpdate[i].second); + + for (unsigned i = 0, e = SubRegsToUpdate.size(); i < e; i++) + SubRegsToUpdate[i].first->setSubReg(SubRegsToUpdate[i].second); + + return true; +} + /// GetInstSize - Return the number of bytes of code the specified /// instruction may be. This returns the maximum number of bytes. /// @@ -729,3 +1384,152 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { return 4; // PowerPC instructions are all 4 bytes } } + +#undef DEBUG_TYPE +#define DEBUG_TYPE "ppc-early-ret" +STATISTIC(NumBCLR, "Number of early conditional returns"); +STATISTIC(NumBLR, "Number of early returns"); + +namespace llvm { + void initializePPCEarlyReturnPass(PassRegistry&); +} + +namespace { + // PPCEarlyReturn pass - For simple functions without epilogue code, move + // returns up, and create conditional returns, to avoid unnecessary + // branch-to-blr sequences. + struct PPCEarlyReturn : public MachineFunctionPass { + static char ID; + PPCEarlyReturn() : MachineFunctionPass(ID) { + initializePPCEarlyReturnPass(*PassRegistry::getPassRegistry()); + } + + const PPCTargetMachine *TM; + const PPCInstrInfo *TII; + +protected: + bool processBlock(MachineBasicBlock &ReturnMBB) { + bool Changed = false; + + MachineBasicBlock::iterator I = ReturnMBB.begin(); + I = ReturnMBB.SkipPHIsAndLabels(I); + + // The block must be essentially empty except for the blr. + if (I == ReturnMBB.end() || I->getOpcode() != PPC::BLR || + I != ReturnMBB.getLastNonDebugInstr()) + return Changed; + + SmallVector<MachineBasicBlock*, 8> PredToRemove; + for (MachineBasicBlock::pred_iterator PI = ReturnMBB.pred_begin(), + PIE = ReturnMBB.pred_end(); PI != PIE; ++PI) { + bool OtherReference = false, BlockChanged = false; + for (MachineBasicBlock::iterator J = (*PI)->getLastNonDebugInstr();;) { + if (J->getOpcode() == PPC::B) { + if (J->getOperand(0).getMBB() == &ReturnMBB) { + // This is an unconditional branch to the return. Replace the + // branch with a blr. + BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BLR)); + MachineBasicBlock::iterator K = J--; + K->eraseFromParent(); + BlockChanged = true; + ++NumBLR; + continue; + } + } else if (J->getOpcode() == PPC::BCC) { + if (J->getOperand(2).getMBB() == &ReturnMBB) { + // This is a conditional branch to the return. Replace the branch + // with a bclr. + BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCLR)) + .addImm(J->getOperand(0).getImm()) + .addReg(J->getOperand(1).getReg()); + MachineBasicBlock::iterator K = J--; + K->eraseFromParent(); + BlockChanged = true; + ++NumBCLR; + continue; + } + } else if (J->isBranch()) { + if (J->isIndirectBranch()) { + if (ReturnMBB.hasAddressTaken()) + OtherReference = true; + } else + for (unsigned i = 0; i < J->getNumOperands(); ++i) + if (J->getOperand(i).isMBB() && + J->getOperand(i).getMBB() == &ReturnMBB) + OtherReference = true; + } else if (!J->isTerminator() && !J->isDebugValue()) + break; + + if (J == (*PI)->begin()) + break; + + --J; + } + + if ((*PI)->canFallThrough() && (*PI)->isLayoutSuccessor(&ReturnMBB)) + OtherReference = true; + + // Predecessors are stored in a vector and can't be removed here. + if (!OtherReference && BlockChanged) { + PredToRemove.push_back(*PI); + } + + if (BlockChanged) + Changed = true; + } + + for (unsigned i = 0, ie = PredToRemove.size(); i != ie; ++i) + PredToRemove[i]->removeSuccessor(&ReturnMBB); + + if (Changed && !ReturnMBB.hasAddressTaken()) { + // We now might be able to merge this blr-only block into its + // by-layout predecessor. + if (ReturnMBB.pred_size() == 1 && + (*ReturnMBB.pred_begin())->isLayoutSuccessor(&ReturnMBB)) { + // Move the blr into the preceding block. + MachineBasicBlock &PrevMBB = **ReturnMBB.pred_begin(); + PrevMBB.splice(PrevMBB.end(), &ReturnMBB, I); + PrevMBB.removeSuccessor(&ReturnMBB); + } + + if (ReturnMBB.pred_empty()) + ReturnMBB.eraseFromParent(); + } + + return Changed; + } + +public: + virtual bool runOnMachineFunction(MachineFunction &MF) { + TM = static_cast<const PPCTargetMachine *>(&MF.getTarget()); + TII = TM->getInstrInfo(); + + bool Changed = false; + + // If the function does not have at least two blocks, then there is + // nothing to do. + if (MF.size() < 2) + return Changed; + + for (MachineFunction::iterator I = MF.begin(); I != MF.end();) { + MachineBasicBlock &B = *I++; + if (processBlock(B)) + Changed = true; + } + + return Changed; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + MachineFunctionPass::getAnalysisUsage(AU); + } + }; +} + +INITIALIZE_PASS(PPCEarlyReturn, DEBUG_TYPE, + "PowerPC Early-Return Creation", false, false) + +char PPCEarlyReturn::ID = 0; +FunctionPass* +llvm::createPPCEarlyReturnPass() { return new PPCEarlyReturn(); } + diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h index 635e348..34a1a73 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -120,6 +120,17 @@ public: MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const; + + // Select analysis. + virtual bool canInsertSelect(const MachineBasicBlock&, + const SmallVectorImpl<MachineOperand> &Cond, + unsigned, unsigned, int&, int&, int&) const; + virtual void insertSelect(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DstReg, + const SmallVectorImpl<MachineOperand> &Cond, + unsigned TrueReg, unsigned FalseReg) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, @@ -146,6 +157,66 @@ public: virtual bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; + virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, + unsigned Reg, MachineRegisterInfo *MRI) const; + + // If conversion by predication (only supported by some branch instructions). + // All of the profitability checks always return true; it is always + // profitable to use the predicated branches. + virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, + unsigned NumCycles, unsigned ExtraPredCycles, + const BranchProbability &Probability) const { + return true; + } + + virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB, + unsigned NumT, unsigned ExtraT, + MachineBasicBlock &FMBB, + unsigned NumF, unsigned ExtraF, + const BranchProbability &Probability) const; + + virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, + unsigned NumCycles, + const BranchProbability + &Probability) const { + return true; + } + + virtual bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, + MachineBasicBlock &FMBB) const { + return false; + } + + // Predication support. + bool isPredicated(const MachineInstr *MI) const; + + virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const; + + virtual + bool PredicateInstruction(MachineInstr *MI, + const SmallVectorImpl<MachineOperand> &Pred) const; + + virtual + bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, + const SmallVectorImpl<MachineOperand> &Pred2) const; + + virtual bool DefinesPredicate(MachineInstr *MI, + std::vector<MachineOperand> &Pred) const; + + virtual bool isPredicable(MachineInstr *MI) const; + + // Comparison optimization. + + + virtual bool analyzeCompare(const MachineInstr *MI, + unsigned &SrcReg, unsigned &SrcReg2, + int &Mask, int &Value) const; + + virtual bool optimizeCompareInstr(MachineInstr *CmpInstr, + unsigned SrcReg, unsigned SrcReg2, + int Mask, int Value, + const MachineRegisterInfo *MRI) const; + /// GetInstSize - Return the number of bytes of code the specified /// instruction may be. This returns the maximum number of bytes. /// diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td index ab90762..4763069 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -319,10 +319,7 @@ def unaligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{ // PowerPC Flag Definitions. class isPPC64 { bit PPC64 = 1; } -class isDOT { - list<Register> Defs = [CR0]; - bit RC = 1; -} +class isDOT { bit RC = 1; } class RegConstraint<string C> { string Constraints = C; @@ -335,20 +332,111 @@ class NoEncode<string E> { //===----------------------------------------------------------------------===// // PowerPC Operand Definitions. +// In the default PowerPC assembler syntax, registers are specified simply +// by number, so they cannot be distinguished from immediate values (without +// looking at the opcode). This means that the default operand matching logic +// for the asm parser does not work, and we need to specify custom matchers. +// Since those can only be specified with RegisterOperand classes and not +// directly on the RegisterClass, all instructions patterns used by the asm +// parser need to use a RegisterOperand (instead of a RegisterClass) for +// all their register operands. +// For this purpose, we define one RegisterOperand for each RegisterClass, +// using the same name as the class, just in lower case. + +def PPCRegGPRCAsmOperand : AsmOperandClass { + let Name = "RegGPRC"; let PredicateMethod = "isRegNumber"; +} +def gprc : RegisterOperand<GPRC> { + let ParserMatchClass = PPCRegGPRCAsmOperand; +} +def PPCRegG8RCAsmOperand : AsmOperandClass { + let Name = "RegG8RC"; let PredicateMethod = "isRegNumber"; +} +def g8rc : RegisterOperand<G8RC> { + let ParserMatchClass = PPCRegG8RCAsmOperand; +} +def PPCRegGPRCNoR0AsmOperand : AsmOperandClass { + let Name = "RegGPRCNoR0"; let PredicateMethod = "isRegNumber"; +} +def gprc_nor0 : RegisterOperand<GPRC_NOR0> { + let ParserMatchClass = PPCRegGPRCNoR0AsmOperand; +} +def PPCRegG8RCNoX0AsmOperand : AsmOperandClass { + let Name = "RegG8RCNoX0"; let PredicateMethod = "isRegNumber"; +} +def g8rc_nox0 : RegisterOperand<G8RC_NOX0> { + let ParserMatchClass = PPCRegG8RCNoX0AsmOperand; +} +def PPCRegF8RCAsmOperand : AsmOperandClass { + let Name = "RegF8RC"; let PredicateMethod = "isRegNumber"; +} +def f8rc : RegisterOperand<F8RC> { + let ParserMatchClass = PPCRegF8RCAsmOperand; +} +def PPCRegF4RCAsmOperand : AsmOperandClass { + let Name = "RegF4RC"; let PredicateMethod = "isRegNumber"; +} +def f4rc : RegisterOperand<F4RC> { + let ParserMatchClass = PPCRegF4RCAsmOperand; +} +def PPCRegVRRCAsmOperand : AsmOperandClass { + let Name = "RegVRRC"; let PredicateMethod = "isRegNumber"; +} +def vrrc : RegisterOperand<VRRC> { + let ParserMatchClass = PPCRegVRRCAsmOperand; +} +def PPCRegCRBITRCAsmOperand : AsmOperandClass { + let Name = "RegCRBITRC"; let PredicateMethod = "isRegNumber"; +} +def crbitrc : RegisterOperand<CRBITRC> { + let ParserMatchClass = PPCRegCRBITRCAsmOperand; +} +def PPCRegCRRCAsmOperand : AsmOperandClass { + let Name = "RegCRRC"; let PredicateMethod = "isCCRegNumber"; +} +def crrc : RegisterOperand<CRRC> { + let ParserMatchClass = PPCRegCRRCAsmOperand; +} + +def PPCS5ImmAsmOperand : AsmOperandClass { + let Name = "S5Imm"; let PredicateMethod = "isS5Imm"; + let RenderMethod = "addImmOperands"; +} def s5imm : Operand<i32> { let PrintMethod = "printS5ImmOperand"; + let ParserMatchClass = PPCS5ImmAsmOperand; +} +def PPCU5ImmAsmOperand : AsmOperandClass { + let Name = "U5Imm"; let PredicateMethod = "isU5Imm"; + let RenderMethod = "addImmOperands"; } def u5imm : Operand<i32> { let PrintMethod = "printU5ImmOperand"; + let ParserMatchClass = PPCU5ImmAsmOperand; +} +def PPCU6ImmAsmOperand : AsmOperandClass { + let Name = "U6Imm"; let PredicateMethod = "isU6Imm"; + let RenderMethod = "addImmOperands"; } def u6imm : Operand<i32> { let PrintMethod = "printU6ImmOperand"; + let ParserMatchClass = PPCU6ImmAsmOperand; +} +def PPCS16ImmAsmOperand : AsmOperandClass { + let Name = "S16Imm"; let PredicateMethod = "isS16Imm"; + let RenderMethod = "addImmOperands"; } def s16imm : Operand<i32> { let PrintMethod = "printS16ImmOperand"; + let ParserMatchClass = PPCS16ImmAsmOperand; +} +def PPCU16ImmAsmOperand : AsmOperandClass { + let Name = "U16Imm"; let PredicateMethod = "isU16Imm"; + let RenderMethod = "addImmOperands"; } def u16imm : Operand<i32> { let PrintMethod = "printU16ImmOperand"; + let ParserMatchClass = PPCU16ImmAsmOperand; } def directbrtarget : Operand<OtherVT> { let PrintMethod = "printBranchOperand"; @@ -367,21 +455,49 @@ def aaddr : Operand<iPTR> { def symbolHi: Operand<i32> { let PrintMethod = "printSymbolHi"; let EncoderMethod = "getHA16Encoding"; + let ParserMatchClass = PPCS16ImmAsmOperand; } def symbolLo: Operand<i32> { let PrintMethod = "printSymbolLo"; let EncoderMethod = "getLO16Encoding"; + let ParserMatchClass = PPCS16ImmAsmOperand; +} +def PPCCRBitMaskOperand : AsmOperandClass { + let Name = "CRBitMask"; let PredicateMethod = "isCRBitMask"; } def crbitm: Operand<i8> { let PrintMethod = "printcrbitm"; let EncoderMethod = "get_crbitm_encoding"; + let ParserMatchClass = PPCCRBitMaskOperand; } // Address operands // A version of ptr_rc which excludes R0 (or X0 in 64-bit mode). -def ptr_rc_nor0 : PointerLikeRegClass<1>; +def PPCRegGxRCNoR0Operand : AsmOperandClass { + let Name = "RegGxRCNoR0"; let PredicateMethod = "isRegNumber"; +} +def ptr_rc_nor0 : Operand<iPTR>, PointerLikeRegClass<1> { + let ParserMatchClass = PPCRegGxRCNoR0Operand; +} +// A version of ptr_rc usable with the asm parser. +def PPCRegGxRCOperand : AsmOperandClass { + let Name = "RegGxRC"; let PredicateMethod = "isRegNumber"; +} +def ptr_rc_idx : Operand<iPTR>, PointerLikeRegClass<0> { + let ParserMatchClass = PPCRegGxRCOperand; +} -def dispRI : Operand<iPTR>; -def dispRIX : Operand<iPTR>; +def PPCDispRIOperand : AsmOperandClass { + let Name = "DispRI"; let PredicateMethod = "isS16Imm"; +} +def dispRI : Operand<iPTR> { + let ParserMatchClass = PPCDispRIOperand; +} +def PPCDispRIXOperand : AsmOperandClass { + let Name = "DispRIX"; let PredicateMethod = "isS16ImmX4"; +} +def dispRIX : Operand<iPTR> { + let ParserMatchClass = PPCDispRIXOperand; +} def memri : Operand<iPTR> { let PrintMethod = "printMemRegImm"; @@ -390,7 +506,7 @@ def memri : Operand<iPTR> { } def memrr : Operand<iPTR> { let PrintMethod = "printMemRegReg"; - let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg, ptr_rc:$offreg); + let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg, ptr_rc_idx:$offreg); } def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits. let PrintMethod = "printMemRegImmShifted"; @@ -407,7 +523,7 @@ def memr : Operand<iPTR> { // PowerPC Predicate operand. def pred : Operand<OtherVT> { let PrintMethod = "printPredicateOperand"; - let MIOperandInfo = (ops i32imm:$bibo, CRRC:$reg); + let MIOperandInfo = (ops i32imm:$bibo, crrc:$reg); } // Define PowerPC specific addressing mode. @@ -430,6 +546,252 @@ def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">; def IsBookE : Predicate<"PPCSubTarget.isBookE()">; //===----------------------------------------------------------------------===// +// PowerPC Multiclass Definitions. + +multiclass XForm_6r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + def NAME : XForm_6<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CR0] in + def o : XForm_6<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel; + } +} + +multiclass XForm_6rc<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + let Defs = [CARRY] in + def NAME : XForm_6<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CARRY, CR0] in + def o : XForm_6<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel; + } +} + +multiclass XForm_10r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + def NAME : XForm_10<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CR0] in + def o : XForm_10<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel; + } +} + +multiclass XForm_10rc<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + let Defs = [CARRY] in + def NAME : XForm_10<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CARRY, CR0] in + def o : XForm_10<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel; + } +} + +multiclass XForm_11r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + def NAME : XForm_11<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CR0] in + def o : XForm_11<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel; + } +} + +multiclass XOForm_1r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + def NAME : XOForm_1<opcode, xo, oe, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CR0] in + def o : XOForm_1<opcode, xo, oe, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel; + } +} + +multiclass XOForm_1rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + let Defs = [CARRY] in + def NAME : XOForm_1<opcode, xo, oe, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CARRY, CR0] in + def o : XOForm_1<opcode, xo, oe, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel; + } +} + +multiclass XOForm_3r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + def NAME : XOForm_3<opcode, xo, oe, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CR0] in + def o : XOForm_3<opcode, xo, oe, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel; + } +} + +multiclass XOForm_3rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + let Defs = [CARRY] in + def NAME : XOForm_3<opcode, xo, oe, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CARRY, CR0] in + def o : XOForm_3<opcode, xo, oe, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel; + } +} + +multiclass MForm_2r<bits<6> opcode, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + def NAME : MForm_2<opcode, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CR0] in + def o : MForm_2<opcode, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel; + } +} + +multiclass MDForm_1r<bits<6> opcode, bits<3> xo, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + def NAME : MDForm_1<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CR0] in + def o : MDForm_1<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel; + } +} + +multiclass MDSForm_1r<bits<6> opcode, bits<4> xo, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + def NAME : MDSForm_1<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CR0] in + def o : MDSForm_1<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel; + } +} + +multiclass XSForm_1rc<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + let Defs = [CARRY] in + def NAME : XSForm_1<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CARRY, CR0] in + def o : XSForm_1<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel; + } +} + +multiclass XForm_26r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + def NAME : XForm_26<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CR1] in + def o : XForm_26<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel; + } +} + +multiclass AForm_1r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + def NAME : AForm_1<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CR1] in + def o : AForm_1<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel; + } +} + +multiclass AForm_2r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + def NAME : AForm_2<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CR1] in + def o : AForm_2<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel; + } +} + +multiclass AForm_3r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, + string asmbase, string asmstr, InstrItinClass itin, + list<dag> pattern> { + let BaseName = asmbase in { + def NAME : AForm_3<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(" ", asmstr)), itin, + pattern>, RecFormRel; + let Defs = [CR1] in + def o : AForm_3<opcode, xo, OOL, IOL, + !strconcat(asmbase, !strconcat(". ", asmstr)), itin, + []>, isDOT, RecFormRel; + } +} + +//===----------------------------------------------------------------------===// // PowerPC Instruction Definitions. // Pseudo-instructions: @@ -442,12 +804,12 @@ def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), "#ADJCAL [(callseq_end timm:$amt1, timm:$amt2)]>; } -def UPDATE_VRSAVE : Pseudo<(outs GPRC:$rD), (ins GPRC:$rS), +def UPDATE_VRSAVE : Pseudo<(outs gprc:$rD), (ins gprc:$rS), "UPDATE_VRSAVE $rD, $rS", []>; } let Defs = [R1], Uses = [R1] in -def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi), "#DYNALLOC", +def DYNALLOC : Pseudo<(outs gprc:$result), (ins gprc:$negsize, memri:$fpsi), "#DYNALLOC", [(set i32:$result, (PPCdynalloc i32:$negsize, iaddr:$fpsi))]>; @@ -458,21 +820,21 @@ let usesCustomInserter = 1, // Expanded after instruction selection. // Note that SELECT_CC_I4 and SELECT_CC_I8 use the no-r0 register classes // because either operand might become the first operand in an isel, and // that operand cannot be r0. - def SELECT_CC_I4 : Pseudo<(outs GPRC:$dst), (ins CRRC:$cond, - GPRC_NOR0:$T, GPRC_NOR0:$F, + def SELECT_CC_I4 : Pseudo<(outs gprc:$dst), (ins crrc:$cond, + gprc_nor0:$T, gprc_nor0:$F, i32imm:$BROPC), "#SELECT_CC_I4", []>; - def SELECT_CC_I8 : Pseudo<(outs G8RC:$dst), (ins CRRC:$cond, - G8RC_NOX0:$T, G8RC_NOX0:$F, + def SELECT_CC_I8 : Pseudo<(outs g8rc:$dst), (ins crrc:$cond, + g8rc_nox0:$T, g8rc_nox0:$F, i32imm:$BROPC), "#SELECT_CC_I8", []>; - def SELECT_CC_F4 : Pseudo<(outs F4RC:$dst), (ins CRRC:$cond, F4RC:$T, F4RC:$F, + def SELECT_CC_F4 : Pseudo<(outs f4rc:$dst), (ins crrc:$cond, f4rc:$T, f4rc:$F, i32imm:$BROPC), "#SELECT_CC_F4", []>; - def SELECT_CC_F8 : Pseudo<(outs F8RC:$dst), (ins CRRC:$cond, F8RC:$T, F8RC:$F, + def SELECT_CC_F8 : Pseudo<(outs f8rc:$dst), (ins crrc:$cond, f8rc:$T, f8rc:$F, i32imm:$BROPC), "#SELECT_CC_F8", []>; - def SELECT_CC_VRRC: Pseudo<(outs VRRC:$dst), (ins CRRC:$cond, VRRC:$T, VRRC:$F, + def SELECT_CC_VRRC: Pseudo<(outs vrrc:$dst), (ins crrc:$cond, vrrc:$T, vrrc:$F, i32imm:$BROPC), "#SELECT_CC_VRRC", []>; } @@ -480,21 +842,26 @@ let usesCustomInserter = 1, // Expanded after instruction selection. // SPILL_CR - Indicate that we're dumping the CR register, so we'll need to // scavenge a register for it. let mayStore = 1 in -def SPILL_CR : Pseudo<(outs), (ins CRRC:$cond, memri:$F), +def SPILL_CR : Pseudo<(outs), (ins crrc:$cond, memri:$F), "#SPILL_CR", []>; // RESTORE_CR - Indicate that we're restoring the CR register (previously // spilled), so we'll need to scavenge a register for it. let mayLoad = 1 in -def RESTORE_CR : Pseudo<(outs CRRC:$cond), (ins memri:$F), +def RESTORE_CR : Pseudo<(outs crrc:$cond), (ins memri:$F), "#RESTORE_CR", []>; let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in { let isReturn = 1, Uses = [LR, RM] in def BLR : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", BrB, [(retflag)]>; - let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in + let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in { def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>; + + let isCodeGenOnly = 1 in + def BCCTR : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond), + "b${cond:cc}ctr ${cond:reg}", BrB, []>; + } } let Defs = [LR] in @@ -511,10 +878,21 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { // BCC represents an arbitrary conditional branch on a predicate. // FIXME: should be able to write a pattern for PPCcondbranch, but can't use // a two-value operand where a dag node expects two operands. :( - let isCodeGenOnly = 1 in + let isCodeGenOnly = 1 in { def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst), "b${cond:cc} ${cond:reg}, $dst" - /*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>; + /*[(PPCcondbranch crrc:$crS, imm:$opc, bb:$dst)]*/>; + let isReturn = 1, Uses = [LR, RM] in + def BCLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$cond), + "b${cond:cc}lr ${cond:reg}", BrB, []>; + + let isReturn = 1, Defs = [CTR], Uses = [CTR, LR, RM] in { + def BDZLR : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins), + "bdzlr", BrB, []>; + def BDNZLR : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins), + "bdnzlr", BrB, []>; + } + } let Defs = [CTR], Uses = [CTR] in { def BDZ : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst), @@ -544,6 +922,10 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { def BCTRL : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins), "bctrl", BrB, [(PPCbctrl)]>, Requires<[In32BitMode]>; + + let isCodeGenOnly = 1 in + def BCCTRL : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond), + "b${cond:cc}ctrl ${cond:reg}", BrB, []>; } } @@ -589,7 +971,7 @@ def TAILBA : IForm<18, 0, 0, (outs), (ins aaddr:$dst), []>; let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { - def EH_SjLj_SetJmp32 : Pseudo<(outs GPRC:$dst), (ins memr:$buf), + def EH_SjLj_SetJmp32 : Pseudo<(outs gprc:$dst), (ins memr:$buf), "#EH_SJLJ_SETJMP32", [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>, Requires<[In32BitMode]>; @@ -638,89 +1020,89 @@ def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 1)), let usesCustomInserter = 1 in { let Defs = [CR0] in { def ATOMIC_LOAD_ADD_I8 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I8", + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I8", [(set i32:$dst, (atomic_load_add_8 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_SUB_I8 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I8", + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I8", [(set i32:$dst, (atomic_load_sub_8 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_AND_I8 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I8", + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I8", [(set i32:$dst, (atomic_load_and_8 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_OR_I8 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I8", + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I8", [(set i32:$dst, (atomic_load_or_8 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_XOR_I8 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "ATOMIC_LOAD_XOR_I8", + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "ATOMIC_LOAD_XOR_I8", [(set i32:$dst, (atomic_load_xor_8 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_NAND_I8 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I8", + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I8", [(set i32:$dst, (atomic_load_nand_8 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_ADD_I16 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I16", + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I16", [(set i32:$dst, (atomic_load_add_16 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_SUB_I16 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I16", + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I16", [(set i32:$dst, (atomic_load_sub_16 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_AND_I16 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I16", + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I16", [(set i32:$dst, (atomic_load_and_16 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_OR_I16 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I16", + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I16", [(set i32:$dst, (atomic_load_or_16 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_XOR_I16 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_XOR_I16", + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_XOR_I16", [(set i32:$dst, (atomic_load_xor_16 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_NAND_I16 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I16", + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I16", [(set i32:$dst, (atomic_load_nand_16 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_ADD_I32 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I32", + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I32", [(set i32:$dst, (atomic_load_add_32 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_SUB_I32 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I32", + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I32", [(set i32:$dst, (atomic_load_sub_32 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_AND_I32 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I32", + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I32", [(set i32:$dst, (atomic_load_and_32 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_OR_I32 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I32", + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I32", [(set i32:$dst, (atomic_load_or_32 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_XOR_I32 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_XOR_I32", + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_XOR_I32", [(set i32:$dst, (atomic_load_xor_32 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_NAND_I32 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I32", + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I32", [(set i32:$dst, (atomic_load_nand_32 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_CMP_SWAP_I8 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I8", + (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I8", [(set i32:$dst, (atomic_cmp_swap_8 xoaddr:$ptr, i32:$old, i32:$new))]>; def ATOMIC_CMP_SWAP_I16 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I16 $dst $ptr $old $new", + (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I16 $dst $ptr $old $new", [(set i32:$dst, (atomic_cmp_swap_16 xoaddr:$ptr, i32:$old, i32:$new))]>; def ATOMIC_CMP_SWAP_I32 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I32 $dst $ptr $old $new", + (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I32 $dst $ptr $old $new", [(set i32:$dst, (atomic_cmp_swap_32 xoaddr:$ptr, i32:$old, i32:$new))]>; def ATOMIC_SWAP_I8 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_i8", + (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_i8", [(set i32:$dst, (atomic_swap_8 xoaddr:$ptr, i32:$new))]>; def ATOMIC_SWAP_I16 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_I16", + (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_I16", [(set i32:$dst, (atomic_swap_16 xoaddr:$ptr, i32:$new))]>; def ATOMIC_SWAP_I32 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_I32", + (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_I32", [(set i32:$dst, (atomic_swap_32 xoaddr:$ptr, i32:$new))]>; } } // Instructions to support atomic operations -def LWARX : XForm_1<31, 20, (outs GPRC:$rD), (ins memrr:$src), +def LWARX : XForm_1<31, 20, (outs gprc:$rD), (ins memrr:$src), "lwarx $rD, $src", LdStLWARX, [(set i32:$rD, (PPClarx xoaddr:$src))]>; let Defs = [CR0] in -def STWCX : XForm_1<31, 150, (outs), (ins GPRC:$rS, memrr:$dst), +def STWCX : XForm_1<31, 150, (outs), (ins gprc:$rS, memrr:$dst), "stwcx. $rS, $dst", LdStSTWCX, [(PPCstcx i32:$rS, xoaddr:$dst)]>, isDOT; @@ -734,93 +1116,93 @@ def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStLoad, [(trap)]>; // Unindexed (r+i) Loads. let canFoldAsLoad = 1, PPC970_Unit = 2 in { -def LBZ : DForm_1<34, (outs GPRC:$rD), (ins memri:$src), +def LBZ : DForm_1<34, (outs gprc:$rD), (ins memri:$src), "lbz $rD, $src", LdStLoad, [(set i32:$rD, (zextloadi8 iaddr:$src))]>; -def LHA : DForm_1<42, (outs GPRC:$rD), (ins memri:$src), +def LHA : DForm_1<42, (outs gprc:$rD), (ins memri:$src), "lha $rD, $src", LdStLHA, [(set i32:$rD, (sextloadi16 iaddr:$src))]>, PPC970_DGroup_Cracked; -def LHZ : DForm_1<40, (outs GPRC:$rD), (ins memri:$src), +def LHZ : DForm_1<40, (outs gprc:$rD), (ins memri:$src), "lhz $rD, $src", LdStLoad, [(set i32:$rD, (zextloadi16 iaddr:$src))]>; -def LWZ : DForm_1<32, (outs GPRC:$rD), (ins memri:$src), +def LWZ : DForm_1<32, (outs gprc:$rD), (ins memri:$src), "lwz $rD, $src", LdStLoad, [(set i32:$rD, (load iaddr:$src))]>; -def LFS : DForm_1<48, (outs F4RC:$rD), (ins memri:$src), +def LFS : DForm_1<48, (outs f4rc:$rD), (ins memri:$src), "lfs $rD, $src", LdStLFD, [(set f32:$rD, (load iaddr:$src))]>; -def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src), +def LFD : DForm_1<50, (outs f8rc:$rD), (ins memri:$src), "lfd $rD, $src", LdStLFD, [(set f64:$rD, (load iaddr:$src))]>; // Unindexed (r+i) Loads with Update (preinc). -let mayLoad = 1 in { -def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), +let mayLoad = 1, neverHasSideEffects = 1 in { +def LBZU : DForm_1<35, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lbzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), +def LHAU : DForm_1<43, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lhau $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), +def LHZU : DForm_1<41, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lhzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), +def LWZU : DForm_1<33, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lwzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), +def LFSU : DForm_1<49, (outs f4rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lfsu $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), +def LFDU : DForm_1<51, (outs f8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lfdu $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; // Indexed (r+r) Loads with Update (preinc). -def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), +def LBZUX : XForm_1<31, 119, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lbzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; -def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), +def LHAUX : XForm_1<31, 375, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lhaux $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; -def LHZUX : XForm_1<31, 311, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), +def LHZUX : XForm_1<31, 311, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lhzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; -def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), +def LWZUX : XForm_1<31, 55, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lwzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; -def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc_nor0:$ea_result), +def LFSUX : XForm_1<31, 567, (outs f4rc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lfsux $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; -def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc_nor0:$ea_result), +def LFDUX : XForm_1<31, 631, (outs f8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lfdux $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.ptrreg = $ea_result">, @@ -831,39 +1213,39 @@ def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc_nor0:$ea_result), // Indexed (r+r) Loads. // let canFoldAsLoad = 1, PPC970_Unit = 2 in { -def LBZX : XForm_1<31, 87, (outs GPRC:$rD), (ins memrr:$src), +def LBZX : XForm_1<31, 87, (outs gprc:$rD), (ins memrr:$src), "lbzx $rD, $src", LdStLoad, [(set i32:$rD, (zextloadi8 xaddr:$src))]>; -def LHAX : XForm_1<31, 343, (outs GPRC:$rD), (ins memrr:$src), +def LHAX : XForm_1<31, 343, (outs gprc:$rD), (ins memrr:$src), "lhax $rD, $src", LdStLHA, [(set i32:$rD, (sextloadi16 xaddr:$src))]>, PPC970_DGroup_Cracked; -def LHZX : XForm_1<31, 279, (outs GPRC:$rD), (ins memrr:$src), +def LHZX : XForm_1<31, 279, (outs gprc:$rD), (ins memrr:$src), "lhzx $rD, $src", LdStLoad, [(set i32:$rD, (zextloadi16 xaddr:$src))]>; -def LWZX : XForm_1<31, 23, (outs GPRC:$rD), (ins memrr:$src), +def LWZX : XForm_1<31, 23, (outs gprc:$rD), (ins memrr:$src), "lwzx $rD, $src", LdStLoad, [(set i32:$rD, (load xaddr:$src))]>; -def LHBRX : XForm_1<31, 790, (outs GPRC:$rD), (ins memrr:$src), +def LHBRX : XForm_1<31, 790, (outs gprc:$rD), (ins memrr:$src), "lhbrx $rD, $src", LdStLoad, [(set i32:$rD, (PPClbrx xoaddr:$src, i16))]>; -def LWBRX : XForm_1<31, 534, (outs GPRC:$rD), (ins memrr:$src), +def LWBRX : XForm_1<31, 534, (outs gprc:$rD), (ins memrr:$src), "lwbrx $rD, $src", LdStLoad, [(set i32:$rD, (PPClbrx xoaddr:$src, i32))]>; -def LFSX : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src), +def LFSX : XForm_25<31, 535, (outs f4rc:$frD), (ins memrr:$src), "lfsx $frD, $src", LdStLFD, [(set f32:$frD, (load xaddr:$src))]>; -def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src), +def LFDX : XForm_25<31, 599, (outs f8rc:$frD), (ins memrr:$src), "lfdx $frD, $src", LdStLFD, [(set f64:$frD, (load xaddr:$src))]>; -def LFIWAX : XForm_25<31, 855, (outs F8RC:$frD), (ins memrr:$src), +def LFIWAX : XForm_25<31, 855, (outs f8rc:$frD), (ins memrr:$src), "lfiwax $frD, $src", LdStLFD, [(set f64:$frD, (PPClfiwax xoaddr:$src))]>; -def LFIWZX : XForm_25<31, 887, (outs F8RC:$frD), (ins memrr:$src), +def LFIWZX : XForm_25<31, 887, (outs f8rc:$frD), (ins memrr:$src), "lfiwzx $frD, $src", LdStLFD, [(set f64:$frD, (PPClfiwzx xoaddr:$src))]>; } @@ -874,38 +1256,38 @@ def LFIWZX : XForm_25<31, 887, (outs F8RC:$frD), (ins memrr:$src), // Unindexed (r+i) Stores. let PPC970_Unit = 2 in { -def STB : DForm_1<38, (outs), (ins GPRC:$rS, memri:$src), +def STB : DForm_1<38, (outs), (ins gprc:$rS, memri:$src), "stb $rS, $src", LdStStore, [(truncstorei8 i32:$rS, iaddr:$src)]>; -def STH : DForm_1<44, (outs), (ins GPRC:$rS, memri:$src), +def STH : DForm_1<44, (outs), (ins gprc:$rS, memri:$src), "sth $rS, $src", LdStStore, [(truncstorei16 i32:$rS, iaddr:$src)]>; -def STW : DForm_1<36, (outs), (ins GPRC:$rS, memri:$src), +def STW : DForm_1<36, (outs), (ins gprc:$rS, memri:$src), "stw $rS, $src", LdStStore, [(store i32:$rS, iaddr:$src)]>; -def STFS : DForm_1<52, (outs), (ins F4RC:$rS, memri:$dst), +def STFS : DForm_1<52, (outs), (ins f4rc:$rS, memri:$dst), "stfs $rS, $dst", LdStSTFD, [(store f32:$rS, iaddr:$dst)]>; -def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst), +def STFD : DForm_1<54, (outs), (ins f8rc:$rS, memri:$dst), "stfd $rS, $dst", LdStSTFD, [(store f64:$rS, iaddr:$dst)]>; } // Unindexed (r+i) Stores with Update (preinc). let PPC970_Unit = 2, mayStore = 1 in { -def STBU : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst), +def STBU : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst), "stbu $rS, $dst", LdStStoreUpd, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; -def STHU : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst), +def STHU : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst), "sthu $rS, $dst", LdStStoreUpd, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; -def STWU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst), +def STWU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst), "stwu $rS, $dst", LdStStoreUpd, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; -def STFSU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins F4RC:$rS, memri:$dst), +def STFSU : DForm_1<53, (outs ptr_rc_nor0:$ea_res), (ins f4rc:$rS, memri:$dst), "stfsu $rS, $dst", LdStSTFDU, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; -def STFDU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins F8RC:$rS, memri:$dst), +def STFDU : DForm_1<55, (outs ptr_rc_nor0:$ea_res), (ins f8rc:$rS, memri:$dst), "stfdu $rS, $dst", LdStSTFDU, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; } @@ -926,59 +1308,59 @@ def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iaddroff:$ptroff), // Indexed (r+r) Stores. let PPC970_Unit = 2 in { -def STBX : XForm_8<31, 215, (outs), (ins GPRC:$rS, memrr:$dst), +def STBX : XForm_8<31, 215, (outs), (ins gprc:$rS, memrr:$dst), "stbx $rS, $dst", LdStStore, [(truncstorei8 i32:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; -def STHX : XForm_8<31, 407, (outs), (ins GPRC:$rS, memrr:$dst), +def STHX : XForm_8<31, 407, (outs), (ins gprc:$rS, memrr:$dst), "sthx $rS, $dst", LdStStore, [(truncstorei16 i32:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; -def STWX : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst), +def STWX : XForm_8<31, 151, (outs), (ins gprc:$rS, memrr:$dst), "stwx $rS, $dst", LdStStore, [(store i32:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; -def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst), +def STHBRX: XForm_8<31, 918, (outs), (ins gprc:$rS, memrr:$dst), "sthbrx $rS, $dst", LdStStore, [(PPCstbrx i32:$rS, xoaddr:$dst, i16)]>, PPC970_DGroup_Cracked; -def STWBRX: XForm_8<31, 662, (outs), (ins GPRC:$rS, memrr:$dst), +def STWBRX: XForm_8<31, 662, (outs), (ins gprc:$rS, memrr:$dst), "stwbrx $rS, $dst", LdStStore, [(PPCstbrx i32:$rS, xoaddr:$dst, i32)]>, PPC970_DGroup_Cracked; -def STFIWX: XForm_28<31, 983, (outs), (ins F8RC:$frS, memrr:$dst), +def STFIWX: XForm_28<31, 983, (outs), (ins f8rc:$frS, memrr:$dst), "stfiwx $frS, $dst", LdStSTFD, [(PPCstfiwx f64:$frS, xoaddr:$dst)]>; -def STFSX : XForm_28<31, 663, (outs), (ins F4RC:$frS, memrr:$dst), +def STFSX : XForm_28<31, 663, (outs), (ins f4rc:$frS, memrr:$dst), "stfsx $frS, $dst", LdStSTFD, [(store f32:$frS, xaddr:$dst)]>; -def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst), +def STFDX : XForm_28<31, 727, (outs), (ins f8rc:$frS, memrr:$dst), "stfdx $frS, $dst", LdStSTFD, [(store f64:$frS, xaddr:$dst)]>; } // Indexed (r+r) Stores with Update (preinc). let PPC970_Unit = 2, mayStore = 1 in { -def STBUX : XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst), +def STBUX : XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst), "stbux $rS, $dst", LdStStoreUpd, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; -def STHUX : XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst), +def STHUX : XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst), "sthux $rS, $dst", LdStStoreUpd, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; -def STWUX : XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst), +def STWUX : XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst), "stwux $rS, $dst", LdStStoreUpd, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; -def STFSUX: XForm_8<31, 695, (outs ptr_rc_nor0:$ea_res), (ins F4RC:$rS, memrr:$dst), +def STFSUX: XForm_8<31, 695, (outs ptr_rc_nor0:$ea_res), (ins f4rc:$rS, memrr:$dst), "stfsux $rS, $dst", LdStSTFDU, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; -def STFDUX: XForm_8<31, 759, (outs ptr_rc_nor0:$ea_res), (ins F8RC:$rS, memrr:$dst), +def STFDUX: XForm_8<31, 759, (outs ptr_rc_nor0:$ea_res), (ins f8rc:$rS, memrr:$dst), "stfdux $rS, $dst", LdStSTFDU, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; @@ -1007,193 +1389,206 @@ def SYNC : XForm_24_sync<31, 598, (outs), (ins), // let PPC970_Unit = 1 in { // FXU Operations. -def ADDI : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolLo:$imm), +def ADDI : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, symbolLo:$imm), "addi $rD, $rA, $imm", IntSimple, [(set i32:$rD, (add i32:$rA, immSExt16:$imm))]>; -let Defs = [CARRY] in { -def ADDIC : DForm_2<12, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), +let BaseName = "addic" in { +let Defs = [CARRY] in +def ADDIC : DForm_2<12, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm), "addic $rD, $rA, $imm", IntGeneral, [(set i32:$rD, (addc i32:$rA, immSExt16:$imm))]>, - PPC970_DGroup_Cracked; -def ADDICo : DForm_2<13, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), + RecFormRel, PPC970_DGroup_Cracked; +let Defs = [CARRY, CR0] in +def ADDICo : DForm_2<13, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm), "addic. $rD, $rA, $imm", IntGeneral, - []>; + []>, isDOT, RecFormRel; } -def ADDIS : DForm_2<15, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolHi:$imm), +def ADDIS : DForm_2<15, (outs gprc:$rD), (ins gprc_nor0:$rA, symbolHi:$imm), "addis $rD, $rA, $imm", IntSimple, [(set i32:$rD, (add i32:$rA, imm16ShiftedSExt:$imm))]>; let isCodeGenOnly = 1 in -def LA : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolLo:$sym), +def LA : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, symbolLo:$sym), "la $rD, $sym($rA)", IntGeneral, [(set i32:$rD, (add i32:$rA, (PPClo tglobaladdr:$sym, 0)))]>; -def MULLI : DForm_2< 7, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), +def MULLI : DForm_2< 7, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm), "mulli $rD, $rA, $imm", IntMulLI, [(set i32:$rD, (mul i32:$rA, immSExt16:$imm))]>; -let Defs = [CARRY] in { -def SUBFIC : DForm_2< 8, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), +let Defs = [CARRY] in +def SUBFIC : DForm_2< 8, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm), "subfic $rD, $rA, $imm", IntGeneral, [(set i32:$rD, (subc immSExt16:$imm, i32:$rA))]>; -} let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { - def LI : DForm_2_r0<14, (outs GPRC:$rD), (ins symbolLo:$imm), + def LI : DForm_2_r0<14, (outs gprc:$rD), (ins symbolLo:$imm), "li $rD, $imm", IntSimple, [(set i32:$rD, immSExt16:$imm)]>; - def LIS : DForm_2_r0<15, (outs GPRC:$rD), (ins symbolHi:$imm), + def LIS : DForm_2_r0<15, (outs gprc:$rD), (ins symbolHi:$imm), "lis $rD, $imm", IntSimple, [(set i32:$rD, imm16ShiftedSExt:$imm)]>; } } let PPC970_Unit = 1 in { // FXU Operations. -def ANDIo : DForm_4<28, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), +let Defs = [CR0] in { +def ANDIo : DForm_4<28, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2), "andi. $dst, $src1, $src2", IntGeneral, [(set i32:$dst, (and i32:$src1, immZExt16:$src2))]>, isDOT; -def ANDISo : DForm_4<29, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), +def ANDISo : DForm_4<29, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2), "andis. $dst, $src1, $src2", IntGeneral, [(set i32:$dst, (and i32:$src1, imm16ShiftedZExt:$src2))]>, isDOT; -def ORI : DForm_4<24, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), +} +def ORI : DForm_4<24, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2), "ori $dst, $src1, $src2", IntSimple, [(set i32:$dst, (or i32:$src1, immZExt16:$src2))]>; -def ORIS : DForm_4<25, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), +def ORIS : DForm_4<25, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2), "oris $dst, $src1, $src2", IntSimple, [(set i32:$dst, (or i32:$src1, imm16ShiftedZExt:$src2))]>; -def XORI : DForm_4<26, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), +def XORI : DForm_4<26, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2), "xori $dst, $src1, $src2", IntSimple, [(set i32:$dst, (xor i32:$src1, immZExt16:$src2))]>; -def XORIS : DForm_4<27, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), +def XORIS : DForm_4<27, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2), "xoris $dst, $src1, $src2", IntSimple, [(set i32:$dst, (xor i32:$src1, imm16ShiftedZExt:$src2))]>; def NOP : DForm_4_zero<24, (outs), (ins), "nop", IntSimple, []>; -def CMPWI : DForm_5_ext<11, (outs CRRC:$crD), (ins GPRC:$rA, s16imm:$imm), - "cmpwi $crD, $rA, $imm", IntCompare>; -def CMPLWI : DForm_6_ext<10, (outs CRRC:$dst), (ins GPRC:$src1, u16imm:$src2), - "cmplwi $dst, $src1, $src2", IntCompare>; +let isCompare = 1, neverHasSideEffects = 1 in { + def CMPWI : DForm_5_ext<11, (outs crrc:$crD), (ins gprc:$rA, s16imm:$imm), + "cmpwi $crD, $rA, $imm", IntCompare>; + def CMPLWI : DForm_6_ext<10, (outs crrc:$dst), (ins gprc:$src1, u16imm:$src2), + "cmplwi $dst, $src1, $src2", IntCompare>; +} +} + +let PPC970_Unit = 1, neverHasSideEffects = 1 in { // FXU Operations. +defm NAND : XForm_6r<31, 476, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), + "nand", "$rA, $rS, $rB", IntSimple, + [(set i32:$rA, (not (and i32:$rS, i32:$rB)))]>; +defm AND : XForm_6r<31, 28, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), + "and", "$rA, $rS, $rB", IntSimple, + [(set i32:$rA, (and i32:$rS, i32:$rB))]>; +defm ANDC : XForm_6r<31, 60, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), + "andc", "$rA, $rS, $rB", IntSimple, + [(set i32:$rA, (and i32:$rS, (not i32:$rB)))]>; +defm OR : XForm_6r<31, 444, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), + "or", "$rA, $rS, $rB", IntSimple, + [(set i32:$rA, (or i32:$rS, i32:$rB))]>; +defm NOR : XForm_6r<31, 124, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), + "nor", "$rA, $rS, $rB", IntSimple, + [(set i32:$rA, (not (or i32:$rS, i32:$rB)))]>; +defm ORC : XForm_6r<31, 412, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), + "orc", "$rA, $rS, $rB", IntSimple, + [(set i32:$rA, (or i32:$rS, (not i32:$rB)))]>; +defm EQV : XForm_6r<31, 284, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), + "eqv", "$rA, $rS, $rB", IntSimple, + [(set i32:$rA, (not (xor i32:$rS, i32:$rB)))]>; +defm XOR : XForm_6r<31, 316, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), + "xor", "$rA, $rS, $rB", IntSimple, + [(set i32:$rA, (xor i32:$rS, i32:$rB))]>; +defm SLW : XForm_6r<31, 24, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), + "slw", "$rA, $rS, $rB", IntGeneral, + [(set i32:$rA, (PPCshl i32:$rS, i32:$rB))]>; +defm SRW : XForm_6r<31, 536, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), + "srw", "$rA, $rS, $rB", IntGeneral, + [(set i32:$rA, (PPCsrl i32:$rS, i32:$rB))]>; +defm SRAW : XForm_6rc<31, 792, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), + "sraw", "$rA, $rS, $rB", IntShift, + [(set i32:$rA, (PPCsra i32:$rS, i32:$rB))]>; } - let PPC970_Unit = 1 in { // FXU Operations. -def NAND : XForm_6<31, 476, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), - "nand $rA, $rS, $rB", IntSimple, - [(set i32:$rA, (not (and i32:$rS, i32:$rB)))]>; -def AND : XForm_6<31, 28, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), - "and $rA, $rS, $rB", IntSimple, - [(set i32:$rA, (and i32:$rS, i32:$rB))]>; -def ANDC : XForm_6<31, 60, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), - "andc $rA, $rS, $rB", IntSimple, - [(set i32:$rA, (and i32:$rS, (not i32:$rB)))]>; -def OR : XForm_6<31, 444, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), - "or $rA, $rS, $rB", IntSimple, - [(set i32:$rA, (or i32:$rS, i32:$rB))]>; -def NOR : XForm_6<31, 124, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), - "nor $rA, $rS, $rB", IntSimple, - [(set i32:$rA, (not (or i32:$rS, i32:$rB)))]>; -def ORC : XForm_6<31, 412, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), - "orc $rA, $rS, $rB", IntSimple, - [(set i32:$rA, (or i32:$rS, (not i32:$rB)))]>; -def EQV : XForm_6<31, 284, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), - "eqv $rA, $rS, $rB", IntSimple, - [(set i32:$rA, (not (xor i32:$rS, i32:$rB)))]>; -def XOR : XForm_6<31, 316, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), - "xor $rA, $rS, $rB", IntSimple, - [(set i32:$rA, (xor i32:$rS, i32:$rB))]>; -def SLW : XForm_6<31, 24, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), - "slw $rA, $rS, $rB", IntGeneral, - [(set i32:$rA, (PPCshl i32:$rS, i32:$rB))]>; -def SRW : XForm_6<31, 536, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), - "srw $rA, $rS, $rB", IntGeneral, - [(set i32:$rA, (PPCsrl i32:$rS, i32:$rB))]>; -let Defs = [CARRY] in { -def SRAW : XForm_6<31, 792, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), - "sraw $rA, $rS, $rB", IntShift, - [(set i32:$rA, (PPCsra i32:$rS, i32:$rB))]>; +let neverHasSideEffects = 1 in { +defm SRAWI : XForm_10rc<31, 824, (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH), + "srawi", "$rA, $rS, $SH", IntShift, + [(set i32:$rA, (sra i32:$rS, (i32 imm:$SH)))]>; +defm CNTLZW : XForm_11r<31, 26, (outs gprc:$rA), (ins gprc:$rS), + "cntlzw", "$rA, $rS", IntGeneral, + [(set i32:$rA, (ctlz i32:$rS))]>; +defm EXTSB : XForm_11r<31, 954, (outs gprc:$rA), (ins gprc:$rS), + "extsb", "$rA, $rS", IntSimple, + [(set i32:$rA, (sext_inreg i32:$rS, i8))]>; +defm EXTSH : XForm_11r<31, 922, (outs gprc:$rA), (ins gprc:$rS), + "extsh", "$rA, $rS", IntSimple, + [(set i32:$rA, (sext_inreg i32:$rS, i16))]>; +} +let isCompare = 1, neverHasSideEffects = 1 in { + def CMPW : XForm_16_ext<31, 0, (outs crrc:$crD), (ins gprc:$rA, gprc:$rB), + "cmpw $crD, $rA, $rB", IntCompare>; + def CMPLW : XForm_16_ext<31, 32, (outs crrc:$crD), (ins gprc:$rA, gprc:$rB), + "cmplw $crD, $rA, $rB", IntCompare>; } } - -let PPC970_Unit = 1 in { // FXU Operations. -let Defs = [CARRY] in { -def SRAWI : XForm_10<31, 824, (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH), - "srawi $rA, $rS, $SH", IntShift, - [(set i32:$rA, (sra i32:$rS, (i32 imm:$SH)))]>; -} -def CNTLZW : XForm_11<31, 26, (outs GPRC:$rA), (ins GPRC:$rS), - "cntlzw $rA, $rS", IntGeneral, - [(set i32:$rA, (ctlz i32:$rS))]>; -def EXTSB : XForm_11<31, 954, (outs GPRC:$rA), (ins GPRC:$rS), - "extsb $rA, $rS", IntSimple, - [(set i32:$rA, (sext_inreg i32:$rS, i8))]>; -def EXTSH : XForm_11<31, 922, (outs GPRC:$rA), (ins GPRC:$rS), - "extsh $rA, $rS", IntSimple, - [(set i32:$rA, (sext_inreg i32:$rS, i16))]>; - -def CMPW : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB), - "cmpw $crD, $rA, $rB", IntCompare>; -def CMPLW : XForm_16_ext<31, 32, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB), - "cmplw $crD, $rA, $rB", IntCompare>; -} let PPC970_Unit = 3 in { // FPU Operations. //def FCMPO : XForm_17<63, 32, (outs CRRC:$crD), (ins FPRC:$fA, FPRC:$fB), // "fcmpo $crD, $fA, $fB", FPCompare>; -def FCMPUS : XForm_17<63, 0, (outs CRRC:$crD), (ins F4RC:$fA, F4RC:$fB), - "fcmpu $crD, $fA, $fB", FPCompare>; -def FCMPUD : XForm_17<63, 0, (outs CRRC:$crD), (ins F8RC:$fA, F8RC:$fB), - "fcmpu $crD, $fA, $fB", FPCompare>; +let isCompare = 1, neverHasSideEffects = 1 in { + def FCMPUS : XForm_17<63, 0, (outs crrc:$crD), (ins f4rc:$fA, f4rc:$fB), + "fcmpu $crD, $fA, $fB", FPCompare>; + def FCMPUD : XForm_17<63, 0, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB), + "fcmpu $crD, $fA, $fB", FPCompare>; +} let Uses = [RM] in { - def FCTIWZ : XForm_26<63, 15, (outs F8RC:$frD), (ins F8RC:$frB), - "fctiwz $frD, $frB", FPGeneral, - [(set f64:$frD, (PPCfctiwz f64:$frB))]>; + let neverHasSideEffects = 1 in { + defm FCTIWZ : XForm_26r<63, 15, (outs f8rc:$frD), (ins f8rc:$frB), + "fctiwz", "$frD, $frB", FPGeneral, + [(set f64:$frD, (PPCfctiwz f64:$frB))]>; - def FRSP : XForm_26<63, 12, (outs F4RC:$frD), (ins F8RC:$frB), - "frsp $frD, $frB", FPGeneral, - [(set f32:$frD, (fround f64:$frB))]>; + defm FRSP : XForm_26r<63, 12, (outs f4rc:$frD), (ins f8rc:$frB), + "frsp", "$frD, $frB", FPGeneral, + [(set f32:$frD, (fround f64:$frB))]>; // The frin -> nearbyint mapping is valid only in fast-math mode. - def FRIND : XForm_26<63, 392, (outs F8RC:$frD), (ins F8RC:$frB), - "frin $frD, $frB", FPGeneral, - [(set f64:$frD, (fnearbyint f64:$frB))]>; - def FRINS : XForm_26<63, 392, (outs F4RC:$frD), (ins F4RC:$frB), - "frin $frD, $frB", FPGeneral, - [(set f32:$frD, (fnearbyint f32:$frB))]>; + let Interpretation64Bit = 1 in + defm FRIND : XForm_26r<63, 392, (outs f8rc:$frD), (ins f8rc:$frB), + "frin", "$frD, $frB", FPGeneral, + [(set f64:$frD, (fnearbyint f64:$frB))]>; + defm FRINS : XForm_26r<63, 392, (outs f4rc:$frD), (ins f4rc:$frB), + "frin", "$frD, $frB", FPGeneral, + [(set f32:$frD, (fnearbyint f32:$frB))]>; + } // These pseudos expand to rint but also set FE_INEXACT when the result does // not equal the argument. let usesCustomInserter = 1, Defs = [RM] in { // FIXME: Model FPSCR! - def FRINDrint : Pseudo<(outs F8RC:$frD), (ins F8RC:$frB), + def FRINDrint : Pseudo<(outs f8rc:$frD), (ins f8rc:$frB), "#FRINDrint", [(set f64:$frD, (frint f64:$frB))]>; - def FRINSrint : Pseudo<(outs F4RC:$frD), (ins F4RC:$frB), + def FRINSrint : Pseudo<(outs f4rc:$frD), (ins f4rc:$frB), "#FRINSrint", [(set f32:$frD, (frint f32:$frB))]>; } - def FRIPD : XForm_26<63, 456, (outs F8RC:$frD), (ins F8RC:$frB), - "frip $frD, $frB", FPGeneral, - [(set f64:$frD, (fceil f64:$frB))]>; - def FRIPS : XForm_26<63, 456, (outs F4RC:$frD), (ins F4RC:$frB), - "frip $frD, $frB", FPGeneral, - [(set f32:$frD, (fceil f32:$frB))]>; - def FRIZD : XForm_26<63, 424, (outs F8RC:$frD), (ins F8RC:$frB), - "friz $frD, $frB", FPGeneral, - [(set f64:$frD, (ftrunc f64:$frB))]>; - def FRIZS : XForm_26<63, 424, (outs F4RC:$frD), (ins F4RC:$frB), - "friz $frD, $frB", FPGeneral, - [(set f32:$frD, (ftrunc f32:$frB))]>; - def FRIMD : XForm_26<63, 488, (outs F8RC:$frD), (ins F8RC:$frB), - "frim $frD, $frB", FPGeneral, - [(set f64:$frD, (ffloor f64:$frB))]>; - def FRIMS : XForm_26<63, 488, (outs F4RC:$frD), (ins F4RC:$frB), - "frim $frD, $frB", FPGeneral, - [(set f32:$frD, (ffloor f32:$frB))]>; - - def FSQRT : XForm_26<63, 22, (outs F8RC:$frD), (ins F8RC:$frB), - "fsqrt $frD, $frB", FPSqrt, - [(set f64:$frD, (fsqrt f64:$frB))]>; - def FSQRTS : XForm_26<59, 22, (outs F4RC:$frD), (ins F4RC:$frB), - "fsqrts $frD, $frB", FPSqrt, - [(set f32:$frD, (fsqrt f32:$frB))]>; + let neverHasSideEffects = 1 in { + let Interpretation64Bit = 1 in + defm FRIPD : XForm_26r<63, 456, (outs f8rc:$frD), (ins f8rc:$frB), + "frip", "$frD, $frB", FPGeneral, + [(set f64:$frD, (fceil f64:$frB))]>; + defm FRIPS : XForm_26r<63, 456, (outs f4rc:$frD), (ins f4rc:$frB), + "frip", "$frD, $frB", FPGeneral, + [(set f32:$frD, (fceil f32:$frB))]>; + let Interpretation64Bit = 1 in + defm FRIZD : XForm_26r<63, 424, (outs f8rc:$frD), (ins f8rc:$frB), + "friz", "$frD, $frB", FPGeneral, + [(set f64:$frD, (ftrunc f64:$frB))]>; + defm FRIZS : XForm_26r<63, 424, (outs f4rc:$frD), (ins f4rc:$frB), + "friz", "$frD, $frB", FPGeneral, + [(set f32:$frD, (ftrunc f32:$frB))]>; + let Interpretation64Bit = 1 in + defm FRIMD : XForm_26r<63, 488, (outs f8rc:$frD), (ins f8rc:$frB), + "frim", "$frD, $frB", FPGeneral, + [(set f64:$frD, (ffloor f64:$frB))]>; + defm FRIMS : XForm_26r<63, 488, (outs f4rc:$frD), (ins f4rc:$frB), + "frim", "$frD, $frB", FPGeneral, + [(set f32:$frD, (ffloor f32:$frB))]>; + + defm FSQRT : XForm_26r<63, 22, (outs f8rc:$frD), (ins f8rc:$frB), + "fsqrt", "$frD, $frB", FPSqrt, + [(set f64:$frD, (fsqrt f64:$frB))]>; + defm FSQRTS : XForm_26r<59, 22, (outs f4rc:$frD), (ins f4rc:$frB), + "fsqrts", "$frD, $frB", FPSqrt, + [(set f32:$frD, (fsqrt f32:$frB))]>; + } } } @@ -1201,69 +1596,74 @@ let Uses = [RM] in { /// often coalesced away and we don't want the dispatch group builder to think /// that they will fill slots (which could cause the load of a LSU reject to /// sneak into a d-group with a store). -def FMR : XForm_26<63, 72, (outs F4RC:$frD), (ins F4RC:$frB), - "fmr $frD, $frB", FPGeneral, - []>, // (set f32:$frD, f32:$frB) - PPC970_Unit_Pseudo; +let neverHasSideEffects = 1 in +defm FMR : XForm_26r<63, 72, (outs f4rc:$frD), (ins f4rc:$frB), + "fmr", "$frD, $frB", FPGeneral, + []>, // (set f32:$frD, f32:$frB) + PPC970_Unit_Pseudo; -let PPC970_Unit = 3 in { // FPU Operations. +let PPC970_Unit = 3, neverHasSideEffects = 1 in { // FPU Operations. // These are artificially split into two different forms, for 4/8 byte FP. -def FABSS : XForm_26<63, 264, (outs F4RC:$frD), (ins F4RC:$frB), - "fabs $frD, $frB", FPGeneral, - [(set f32:$frD, (fabs f32:$frB))]>; -def FABSD : XForm_26<63, 264, (outs F8RC:$frD), (ins F8RC:$frB), - "fabs $frD, $frB", FPGeneral, - [(set f64:$frD, (fabs f64:$frB))]>; -def FNABSS : XForm_26<63, 136, (outs F4RC:$frD), (ins F4RC:$frB), - "fnabs $frD, $frB", FPGeneral, - [(set f32:$frD, (fneg (fabs f32:$frB)))]>; -def FNABSD : XForm_26<63, 136, (outs F8RC:$frD), (ins F8RC:$frB), - "fnabs $frD, $frB", FPGeneral, - [(set f64:$frD, (fneg (fabs f64:$frB)))]>; -def FNEGS : XForm_26<63, 40, (outs F4RC:$frD), (ins F4RC:$frB), - "fneg $frD, $frB", FPGeneral, - [(set f32:$frD, (fneg f32:$frB))]>; -def FNEGD : XForm_26<63, 40, (outs F8RC:$frD), (ins F8RC:$frB), - "fneg $frD, $frB", FPGeneral, - [(set f64:$frD, (fneg f64:$frB))]>; +defm FABSS : XForm_26r<63, 264, (outs f4rc:$frD), (ins f4rc:$frB), + "fabs", "$frD, $frB", FPGeneral, + [(set f32:$frD, (fabs f32:$frB))]>; +let Interpretation64Bit = 1 in +defm FABSD : XForm_26r<63, 264, (outs f8rc:$frD), (ins f8rc:$frB), + "fabs", "$frD, $frB", FPGeneral, + [(set f64:$frD, (fabs f64:$frB))]>; +defm FNABSS : XForm_26r<63, 136, (outs f4rc:$frD), (ins f4rc:$frB), + "fnabs", "$frD, $frB", FPGeneral, + [(set f32:$frD, (fneg (fabs f32:$frB)))]>; +let Interpretation64Bit = 1 in +defm FNABSD : XForm_26r<63, 136, (outs f8rc:$frD), (ins f8rc:$frB), + "fnabs", "$frD, $frB", FPGeneral, + [(set f64:$frD, (fneg (fabs f64:$frB)))]>; +defm FNEGS : XForm_26r<63, 40, (outs f4rc:$frD), (ins f4rc:$frB), + "fneg", "$frD, $frB", FPGeneral, + [(set f32:$frD, (fneg f32:$frB))]>; +let Interpretation64Bit = 1 in +defm FNEGD : XForm_26r<63, 40, (outs f8rc:$frD), (ins f8rc:$frB), + "fneg", "$frD, $frB", FPGeneral, + [(set f64:$frD, (fneg f64:$frB))]>; // Reciprocal estimates. -def FRE : XForm_26<63, 24, (outs F8RC:$frD), (ins F8RC:$frB), - "fre $frD, $frB", FPGeneral, - [(set f64:$frD, (PPCfre f64:$frB))]>; -def FRES : XForm_26<59, 24, (outs F4RC:$frD), (ins F4RC:$frB), - "fres $frD, $frB", FPGeneral, - [(set f32:$frD, (PPCfre f32:$frB))]>; -def FRSQRTE : XForm_26<63, 26, (outs F8RC:$frD), (ins F8RC:$frB), - "frsqrte $frD, $frB", FPGeneral, - [(set f64:$frD, (PPCfrsqrte f64:$frB))]>; -def FRSQRTES : XForm_26<59, 26, (outs F4RC:$frD), (ins F4RC:$frB), - "frsqrtes $frD, $frB", FPGeneral, - [(set f32:$frD, (PPCfrsqrte f32:$frB))]>; +defm FRE : XForm_26r<63, 24, (outs f8rc:$frD), (ins f8rc:$frB), + "fre", "$frD, $frB", FPGeneral, + [(set f64:$frD, (PPCfre f64:$frB))]>; +defm FRES : XForm_26r<59, 24, (outs f4rc:$frD), (ins f4rc:$frB), + "fres", "$frD, $frB", FPGeneral, + [(set f32:$frD, (PPCfre f32:$frB))]>; +defm FRSQRTE : XForm_26r<63, 26, (outs f8rc:$frD), (ins f8rc:$frB), + "frsqrte", "$frD, $frB", FPGeneral, + [(set f64:$frD, (PPCfrsqrte f64:$frB))]>; +defm FRSQRTES : XForm_26r<59, 26, (outs f4rc:$frD), (ins f4rc:$frB), + "frsqrtes", "$frD, $frB", FPGeneral, + [(set f32:$frD, (PPCfrsqrte f32:$frB))]>; } // XL-Form instructions. condition register logical ops. // -def MCRF : XLForm_3<19, 0, (outs CRRC:$BF), (ins CRRC:$BFA), +let neverHasSideEffects = 1 in +def MCRF : XLForm_3<19, 0, (outs crrc:$BF), (ins crrc:$BFA), "mcrf $BF, $BFA", BrMCR>, PPC970_DGroup_First, PPC970_Unit_CRU; -def CREQV : XLForm_1<19, 289, (outs CRBITRC:$CRD), - (ins CRBITRC:$CRA, CRBITRC:$CRB), +def CREQV : XLForm_1<19, 289, (outs crbitrc:$CRD), + (ins crbitrc:$CRA, crbitrc:$CRB), "creqv $CRD, $CRA, $CRB", BrCR, []>; -def CROR : XLForm_1<19, 449, (outs CRBITRC:$CRD), - (ins CRBITRC:$CRA, CRBITRC:$CRB), +def CROR : XLForm_1<19, 449, (outs crbitrc:$CRD), + (ins crbitrc:$CRA, crbitrc:$CRB), "cror $CRD, $CRA, $CRB", BrCR, []>; let isCodeGenOnly = 1 in { -def CRSET : XLForm_1_ext<19, 289, (outs CRBITRC:$dst), (ins), +def CRSET : XLForm_1_ext<19, 289, (outs crbitrc:$dst), (ins), "creqv $dst, $dst, $dst", BrCR, []>; -def CRUNSET: XLForm_1_ext<19, 193, (outs CRBITRC:$dst), (ins), +def CRUNSET: XLForm_1_ext<19, 193, (outs crbitrc:$dst), (ins), "crxor $dst, $dst, $dst", BrCR, []>; @@ -1281,23 +1681,23 @@ def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins), // XFX-Form instructions. Instructions that deal with SPRs. // let Uses = [CTR] in { -def MFCTR : XFXForm_1_ext<31, 339, 9, (outs GPRC:$rT), (ins), +def MFCTR : XFXForm_1_ext<31, 339, 9, (outs gprc:$rT), (ins), "mfctr $rT", SprMFSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } let Defs = [CTR], Pattern = [(PPCmtctr i32:$rS)] in { -def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins GPRC:$rS), +def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS), "mtctr $rS", SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } let Defs = [LR] in { -def MTLR : XFXForm_7_ext<31, 467, 8, (outs), (ins GPRC:$rS), +def MTLR : XFXForm_7_ext<31, 467, 8, (outs), (ins gprc:$rS), "mtlr $rS", SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } let Uses = [LR] in { -def MFLR : XFXForm_1_ext<31, 339, 8, (outs GPRC:$rT), (ins), +def MFLR : XFXForm_1_ext<31, 339, 8, (outs gprc:$rT), (ins), "mflr $rT", SprMFSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } @@ -1305,19 +1705,19 @@ def MFLR : XFXForm_1_ext<31, 339, 8, (outs GPRC:$rT), (ins), // Move to/from VRSAVE: despite being a SPR, the VRSAVE register is renamed like // a GPR on the PPC970. As such, copies in and out have the same performance // characteristics as an OR instruction. -def MTVRSAVE : XFXForm_7_ext<31, 467, 256, (outs), (ins GPRC:$rS), +def MTVRSAVE : XFXForm_7_ext<31, 467, 256, (outs), (ins gprc:$rS), "mtspr 256, $rS", IntGeneral>, PPC970_DGroup_Single, PPC970_Unit_FXU; -def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins), +def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs gprc:$rT), (ins), "mfspr $rT, 256", IntGeneral>, PPC970_DGroup_First, PPC970_Unit_FXU; let isCodeGenOnly = 1 in { def MTVRSAVEv : XFXForm_7_ext<31, 467, 256, - (outs VRSAVERC:$reg), (ins GPRC:$rS), + (outs VRSAVERC:$reg), (ins gprc:$rS), "mtspr 256, $rS", IntGeneral>, PPC970_DGroup_Single, PPC970_Unit_FXU; - def MFVRSAVEv : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), + def MFVRSAVEv : XFXForm_1_ext<31, 339, 256, (outs gprc:$rT), (ins VRSAVERC:$reg), "mfspr $rT, 256", IntGeneral>, PPC970_DGroup_First, PPC970_Unit_FXU; @@ -1335,7 +1735,8 @@ let mayLoad = 1 in def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F), "#RESTORE_VRSAVE", []>; -def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS), +let neverHasSideEffects = 1 in { +def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins gprc:$rS), "mtcrf $FXM, $rS", BrMCRX>, PPC970_MicroCode, PPC970_Unit_CRU; @@ -1350,21 +1751,23 @@ def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS), // // FIXME: Make this a real Pseudo instruction when the JIT switches to MC. let isCodeGenOnly = 1 in -def MFCRpseud: XFXForm_3<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM), +def MFCRpseud: XFXForm_3<31, 19, (outs gprc:$rT), (ins crbitm:$FXM), "#MFCRpseud", SprMFCR>, PPC970_MicroCode, PPC970_Unit_CRU; - -def MFCR : XFXForm_3<31, 19, (outs GPRC:$rT), (ins), - "mfcr $rT", SprMFCR>, - PPC970_MicroCode, PPC970_Unit_CRU; -def MFOCRF: XFXForm_5a<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM), +def MFOCRF: XFXForm_5a<31, 19, (outs gprc:$rT), (ins crbitm:$FXM), "mfocrf $rT, $FXM", SprMFCR>, PPC970_DGroup_First, PPC970_Unit_CRU; +} // neverHasSideEffects = 1 + +let neverHasSideEffects = 1 in +def MFCR : XFXForm_3<31, 19, (outs gprc:$rT), (ins), + "mfcr $rT", SprMFCR>, + PPC970_MicroCode, PPC970_Unit_CRU; // Pseudo instruction to perform FADD in round-to-zero mode. let usesCustomInserter = 1, Uses = [RM] in { - def FADDrtz: Pseudo<(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), "", + def FADDrtz: Pseudo<(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), "", [(set f64:$FRT, (PPCfaddrtz f64:$FRA, f64:$FRB))]>; } @@ -1377,123 +1780,118 @@ let Uses = [RM], Defs = [RM] in { def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM), "mtfsb1 $FM", IntMTFSB0, []>, PPC970_DGroup_Single, PPC970_Unit_FPU; - def MTFSF : XFLForm<63, 711, (outs), (ins i32imm:$FM, F8RC:$rT), + def MTFSF : XFLForm<63, 711, (outs), (ins i32imm:$FM, f8rc:$rT), "mtfsf $FM, $rT", IntMTFSB0, []>, PPC970_DGroup_Single, PPC970_Unit_FPU; } let Uses = [RM] in { - def MFFS : XForm_42<63, 583, (outs F8RC:$rT), (ins), + def MFFS : XForm_42<63, 583, (outs f8rc:$rT), (ins), "mffs $rT", IntMFFS, [(set f64:$rT, (PPCmffs))]>, PPC970_DGroup_Single, PPC970_Unit_FPU; } -let PPC970_Unit = 1 in { // FXU Operations. - +let PPC970_Unit = 1, neverHasSideEffects = 1 in { // FXU Operations. // XO-Form instructions. Arithmetic instructions that can set overflow bit // -def ADD4 : XOForm_1<31, 266, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), - "add $rT, $rA, $rB", IntSimple, - [(set i32:$rT, (add i32:$rA, i32:$rB))]>; -let Defs = [CARRY] in { -def ADDC : XOForm_1<31, 10, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), - "addc $rT, $rA, $rB", IntGeneral, - [(set i32:$rT, (addc i32:$rA, i32:$rB))]>, - PPC970_DGroup_Cracked; -} -def DIVW : XOForm_1<31, 491, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), - "divw $rT, $rA, $rB", IntDivW, - [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>, - PPC970_DGroup_First, PPC970_DGroup_Cracked; -def DIVWU : XOForm_1<31, 459, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), - "divwu $rT, $rA, $rB", IntDivW, - [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>, - PPC970_DGroup_First, PPC970_DGroup_Cracked; -def MULHW : XOForm_1<31, 75, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), - "mulhw $rT, $rA, $rB", IntMulHW, - [(set i32:$rT, (mulhs i32:$rA, i32:$rB))]>; -def MULHWU : XOForm_1<31, 11, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), - "mulhwu $rT, $rA, $rB", IntMulHWU, - [(set i32:$rT, (mulhu i32:$rA, i32:$rB))]>; -def MULLW : XOForm_1<31, 235, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), - "mullw $rT, $rA, $rB", IntMulHW, - [(set i32:$rT, (mul i32:$rA, i32:$rB))]>; -def SUBF : XOForm_1<31, 40, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), - "subf $rT, $rA, $rB", IntGeneral, - [(set i32:$rT, (sub i32:$rB, i32:$rA))]>; -let Defs = [CARRY] in { -def SUBFC : XOForm_1<31, 8, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), - "subfc $rT, $rA, $rB", IntGeneral, - [(set i32:$rT, (subc i32:$rB, i32:$rA))]>, - PPC970_DGroup_Cracked; -} -def NEG : XOForm_3<31, 104, 0, (outs GPRC:$rT), (ins GPRC:$rA), - "neg $rT, $rA", IntSimple, - [(set i32:$rT, (ineg i32:$rA))]>; -let Uses = [CARRY], Defs = [CARRY] in { -def ADDE : XOForm_1<31, 138, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), - "adde $rT, $rA, $rB", IntGeneral, - [(set i32:$rT, (adde i32:$rA, i32:$rB))]>; -def ADDME : XOForm_3<31, 234, 0, (outs GPRC:$rT), (ins GPRC:$rA), - "addme $rT, $rA", IntGeneral, - [(set i32:$rT, (adde i32:$rA, -1))]>; -def ADDZE : XOForm_3<31, 202, 0, (outs GPRC:$rT), (ins GPRC:$rA), - "addze $rT, $rA", IntGeneral, - [(set i32:$rT, (adde i32:$rA, 0))]>; -def SUBFE : XOForm_1<31, 136, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), - "subfe $rT, $rA, $rB", IntGeneral, - [(set i32:$rT, (sube i32:$rB, i32:$rA))]>; -def SUBFME : XOForm_3<31, 232, 0, (outs GPRC:$rT), (ins GPRC:$rA), - "subfme $rT, $rA", IntGeneral, - [(set i32:$rT, (sube -1, i32:$rA))]>; -def SUBFZE : XOForm_3<31, 200, 0, (outs GPRC:$rT), (ins GPRC:$rA), - "subfze $rT, $rA", IntGeneral, - [(set i32:$rT, (sube 0, i32:$rA))]>; +defm ADD4 : XOForm_1r<31, 266, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "add", "$rT, $rA, $rB", IntSimple, + [(set i32:$rT, (add i32:$rA, i32:$rB))]>; +defm ADDC : XOForm_1rc<31, 10, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "addc", "$rT, $rA, $rB", IntGeneral, + [(set i32:$rT, (addc i32:$rA, i32:$rB))]>, + PPC970_DGroup_Cracked; +defm DIVW : XOForm_1r<31, 491, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "divw", "$rT, $rA, $rB", IntDivW, + [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>, + PPC970_DGroup_First, PPC970_DGroup_Cracked; +defm DIVWU : XOForm_1r<31, 459, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "divwu", "$rT, $rA, $rB", IntDivW, + [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>, + PPC970_DGroup_First, PPC970_DGroup_Cracked; +defm MULHW : XOForm_1r<31, 75, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "mulhw", "$rT, $rA, $rB", IntMulHW, + [(set i32:$rT, (mulhs i32:$rA, i32:$rB))]>; +defm MULHWU : XOForm_1r<31, 11, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "mulhwu", "$rT, $rA, $rB", IntMulHWU, + [(set i32:$rT, (mulhu i32:$rA, i32:$rB))]>; +defm MULLW : XOForm_1r<31, 235, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "mullw", "$rT, $rA, $rB", IntMulHW, + [(set i32:$rT, (mul i32:$rA, i32:$rB))]>; +defm SUBF : XOForm_1r<31, 40, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "subf", "$rT, $rA, $rB", IntGeneral, + [(set i32:$rT, (sub i32:$rB, i32:$rA))]>; +defm SUBFC : XOForm_1rc<31, 8, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "subfc", "$rT, $rA, $rB", IntGeneral, + [(set i32:$rT, (subc i32:$rB, i32:$rA))]>, + PPC970_DGroup_Cracked; +defm NEG : XOForm_3r<31, 104, 0, (outs gprc:$rT), (ins gprc:$rA), + "neg", "$rT, $rA", IntSimple, + [(set i32:$rT, (ineg i32:$rA))]>; +let Uses = [CARRY] in { +defm ADDE : XOForm_1rc<31, 138, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "adde", "$rT, $rA, $rB", IntGeneral, + [(set i32:$rT, (adde i32:$rA, i32:$rB))]>; +defm ADDME : XOForm_3rc<31, 234, 0, (outs gprc:$rT), (ins gprc:$rA), + "addme", "$rT, $rA", IntGeneral, + [(set i32:$rT, (adde i32:$rA, -1))]>; +defm ADDZE : XOForm_3rc<31, 202, 0, (outs gprc:$rT), (ins gprc:$rA), + "addze", "$rT, $rA", IntGeneral, + [(set i32:$rT, (adde i32:$rA, 0))]>; +defm SUBFE : XOForm_1rc<31, 136, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "subfe", "$rT, $rA, $rB", IntGeneral, + [(set i32:$rT, (sube i32:$rB, i32:$rA))]>; +defm SUBFME : XOForm_3rc<31, 232, 0, (outs gprc:$rT), (ins gprc:$rA), + "subfme", "$rT, $rA", IntGeneral, + [(set i32:$rT, (sube -1, i32:$rA))]>; +defm SUBFZE : XOForm_3rc<31, 200, 0, (outs gprc:$rT), (ins gprc:$rA), + "subfze", "$rT, $rA", IntGeneral, + [(set i32:$rT, (sube 0, i32:$rA))]>; } } // A-Form instructions. Most of the instructions executed in the FPU are of // this type. // -let PPC970_Unit = 3 in { // FPU Operations. +let PPC970_Unit = 3, neverHasSideEffects = 1 in { // FPU Operations. let Uses = [RM] in { - def FMADD : AForm_1<63, 29, - (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), - "fmadd $FRT, $FRA, $FRC, $FRB", FPFused, + defm FMADD : AForm_1r<63, 29, + (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), + "fmadd", "$FRT, $FRA, $FRC, $FRB", FPFused, [(set f64:$FRT, (fma f64:$FRA, f64:$FRC, f64:$FRB))]>; - def FMADDS : AForm_1<59, 29, - (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), - "fmadds $FRT, $FRA, $FRC, $FRB", FPGeneral, + defm FMADDS : AForm_1r<59, 29, + (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB), + "fmadds", "$FRT, $FRA, $FRC, $FRB", FPGeneral, [(set f32:$FRT, (fma f32:$FRA, f32:$FRC, f32:$FRB))]>; - def FMSUB : AForm_1<63, 28, - (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), - "fmsub $FRT, $FRA, $FRC, $FRB", FPFused, + defm FMSUB : AForm_1r<63, 28, + (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), + "fmsub", "$FRT, $FRA, $FRC, $FRB", FPFused, [(set f64:$FRT, (fma f64:$FRA, f64:$FRC, (fneg f64:$FRB)))]>; - def FMSUBS : AForm_1<59, 28, - (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), - "fmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral, + defm FMSUBS : AForm_1r<59, 28, + (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB), + "fmsubs", "$FRT, $FRA, $FRC, $FRB", FPGeneral, [(set f32:$FRT, (fma f32:$FRA, f32:$FRC, (fneg f32:$FRB)))]>; - def FNMADD : AForm_1<63, 31, - (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), - "fnmadd $FRT, $FRA, $FRC, $FRB", FPFused, + defm FNMADD : AForm_1r<63, 31, + (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), + "fnmadd", "$FRT, $FRA, $FRC, $FRB", FPFused, [(set f64:$FRT, (fneg (fma f64:$FRA, f64:$FRC, f64:$FRB)))]>; - def FNMADDS : AForm_1<59, 31, - (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), - "fnmadds $FRT, $FRA, $FRC, $FRB", FPGeneral, + defm FNMADDS : AForm_1r<59, 31, + (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB), + "fnmadds", "$FRT, $FRA, $FRC, $FRB", FPGeneral, [(set f32:$FRT, (fneg (fma f32:$FRA, f32:$FRC, f32:$FRB)))]>; - def FNMSUB : AForm_1<63, 30, - (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), - "fnmsub $FRT, $FRA, $FRC, $FRB", FPFused, + defm FNMSUB : AForm_1r<63, 30, + (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), + "fnmsub", "$FRT, $FRA, $FRC, $FRB", FPFused, [(set f64:$FRT, (fneg (fma f64:$FRA, f64:$FRC, (fneg f64:$FRB))))]>; - def FNMSUBS : AForm_1<59, 30, - (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), - "fnmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral, + defm FNMSUBS : AForm_1r<59, 30, + (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB), + "fnmsubs", "$FRT, $FRA, $FRC, $FRB", FPGeneral, [(set f32:$FRT, (fneg (fma f32:$FRA, f32:$FRC, (fneg f32:$FRB))))]>; } @@ -1501,53 +1899,56 @@ let Uses = [RM] in { // having 4 of these, force the comparison to always be an 8-byte double (code // should use an FMRSD if the input comparison value really wants to be a float) // and 4/8 byte forms for the result and operand type.. -def FSELD : AForm_1<63, 23, - (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), - "fsel $FRT, $FRA, $FRC, $FRB", FPGeneral, - [(set f64:$FRT, (PPCfsel f64:$FRA, f64:$FRC, f64:$FRB))]>; -def FSELS : AForm_1<63, 23, - (outs F4RC:$FRT), (ins F8RC:$FRA, F4RC:$FRC, F4RC:$FRB), - "fsel $FRT, $FRA, $FRC, $FRB", FPGeneral, - [(set f32:$FRT, (PPCfsel f64:$FRA, f32:$FRC, f32:$FRB))]>; +let Interpretation64Bit = 1 in +defm FSELD : AForm_1r<63, 23, + (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), + "fsel", "$FRT, $FRA, $FRC, $FRB", FPGeneral, + [(set f64:$FRT, (PPCfsel f64:$FRA, f64:$FRC, f64:$FRB))]>; +defm FSELS : AForm_1r<63, 23, + (outs f4rc:$FRT), (ins f8rc:$FRA, f4rc:$FRC, f4rc:$FRB), + "fsel", "$FRT, $FRA, $FRC, $FRB", FPGeneral, + [(set f32:$FRT, (PPCfsel f64:$FRA, f32:$FRC, f32:$FRB))]>; let Uses = [RM] in { - def FADD : AForm_2<63, 21, - (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), - "fadd $FRT, $FRA, $FRB", FPAddSub, - [(set f64:$FRT, (fadd f64:$FRA, f64:$FRB))]>; - def FADDS : AForm_2<59, 21, - (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB), - "fadds $FRT, $FRA, $FRB", FPGeneral, - [(set f32:$FRT, (fadd f32:$FRA, f32:$FRB))]>; - def FDIV : AForm_2<63, 18, - (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), - "fdiv $FRT, $FRA, $FRB", FPDivD, - [(set f64:$FRT, (fdiv f64:$FRA, f64:$FRB))]>; - def FDIVS : AForm_2<59, 18, - (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB), - "fdivs $FRT, $FRA, $FRB", FPDivS, - [(set f32:$FRT, (fdiv f32:$FRA, f32:$FRB))]>; - def FMUL : AForm_3<63, 25, - (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC), - "fmul $FRT, $FRA, $FRC", FPFused, - [(set f64:$FRT, (fmul f64:$FRA, f64:$FRC))]>; - def FMULS : AForm_3<59, 25, - (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC), - "fmuls $FRT, $FRA, $FRC", FPGeneral, - [(set f32:$FRT, (fmul f32:$FRA, f32:$FRC))]>; - def FSUB : AForm_2<63, 20, - (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), - "fsub $FRT, $FRA, $FRB", FPAddSub, - [(set f64:$FRT, (fsub f64:$FRA, f64:$FRB))]>; - def FSUBS : AForm_2<59, 20, - (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB), - "fsubs $FRT, $FRA, $FRB", FPGeneral, - [(set f32:$FRT, (fsub f32:$FRA, f32:$FRB))]>; + defm FADD : AForm_2r<63, 21, + (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), + "fadd", "$FRT, $FRA, $FRB", FPAddSub, + [(set f64:$FRT, (fadd f64:$FRA, f64:$FRB))]>; + defm FADDS : AForm_2r<59, 21, + (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB), + "fadds", "$FRT, $FRA, $FRB", FPGeneral, + [(set f32:$FRT, (fadd f32:$FRA, f32:$FRB))]>; + defm FDIV : AForm_2r<63, 18, + (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), + "fdiv", "$FRT, $FRA, $FRB", FPDivD, + [(set f64:$FRT, (fdiv f64:$FRA, f64:$FRB))]>; + defm FDIVS : AForm_2r<59, 18, + (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB), + "fdivs", "$FRT, $FRA, $FRB", FPDivS, + [(set f32:$FRT, (fdiv f32:$FRA, f32:$FRB))]>; + defm FMUL : AForm_3r<63, 25, + (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC), + "fmul", "$FRT, $FRA, $FRC", FPFused, + [(set f64:$FRT, (fmul f64:$FRA, f64:$FRC))]>; + defm FMULS : AForm_3r<59, 25, + (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC), + "fmuls", "$FRT, $FRA, $FRC", FPGeneral, + [(set f32:$FRT, (fmul f32:$FRA, f32:$FRC))]>; + defm FSUB : AForm_2r<63, 20, + (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), + "fsub", "$FRT, $FRA, $FRB", FPAddSub, + [(set f64:$FRT, (fsub f64:$FRA, f64:$FRB))]>; + defm FSUBS : AForm_2r<59, 20, + (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB), + "fsubs", "$FRT, $FRA, $FRB", FPGeneral, + [(set f32:$FRT, (fsub f32:$FRA, f32:$FRB))]>; } } +let neverHasSideEffects = 1 in { let PPC970_Unit = 1 in { // FXU Operations. + let isSelect = 1 in def ISEL : AForm_4<31, 15, - (outs GPRC:$rT), (ins GPRC_NOR0:$rA, GPRC:$rB, CRBITRC:$cond), + (outs gprc:$rT), (ins gprc_nor0:$rA, gprc:$rB, crbitrc:$cond), "isel $rT, $rA, $rB, $cond", IntGeneral, []>; } @@ -1557,26 +1958,29 @@ let PPC970_Unit = 1 in { // FXU Operations. // let isCommutable = 1 in { // RLWIMI can be commuted if the rotate amount is zero. -def RLWIMI : MForm_2<20, - (outs GPRC:$rA), (ins GPRC:$rSi, GPRC:$rS, u5imm:$SH, u5imm:$MB, - u5imm:$ME), "rlwimi $rA, $rS, $SH, $MB, $ME", IntRotate, - []>, PPC970_DGroup_Cracked, RegConstraint<"$rSi = $rA">, - NoEncode<"$rSi">; +defm RLWIMI : MForm_2r<20, (outs gprc:$rA), + (ins gprc:$rSi, gprc:$rS, u5imm:$SH, u5imm:$MB, + u5imm:$ME), "rlwimi", "$rA, $rS, $SH, $MB, $ME", IntRotate, + []>, PPC970_DGroup_Cracked, RegConstraint<"$rSi = $rA">, + NoEncode<"$rSi">; } +let BaseName = "rlwinm" in { def RLWINM : MForm_2<21, - (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME), + (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME), "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral, - []>; + []>, RecFormRel; +let Defs = [CR0] in def RLWINMo : MForm_2<21, - (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME), - "rlwinm. $rA, $rS, $SH, $MB, $ME", IntGeneral, - []>, isDOT, PPC970_DGroup_Cracked; -def RLWNM : MForm_2<23, - (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB, u5imm:$MB, u5imm:$ME), - "rlwnm $rA, $rS, $rB, $MB, $ME", IntGeneral, - []>; + (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME), + "rlwinm. $rA, $rS, $SH, $MB, $ME", IntGeneral, + []>, isDOT, RecFormRel, PPC970_DGroup_Cracked; } - +defm RLWNM : MForm_2r<23, (outs gprc:$rA), + (ins gprc:$rS, gprc:$rB, u5imm:$MB, u5imm:$ME), + "rlwnm", "$rA, $rS, $rB, $MB, $ME", IntGeneral, + []>; +} +} // neverHasSideEffects = 1 //===----------------------------------------------------------------------===// // PowerPC Instruction Patterns @@ -1693,14 +2097,6 @@ def : Pat<(f64 (extloadf32 xaddr:$src)), def : Pat<(f64 (fextend f32:$src)), (COPY_TO_REGCLASS $src, F8RC)>; -// Memory barriers -def : Pat<(membarrier (i32 imm /*ll*/), - (i32 imm /*ls*/), - (i32 imm /*sl*/), - (i32 imm /*ss*/), - (i32 imm /*device*/)), - (SYNC)>; - def : Pat<(atomic_fence (imm), (imm)), (SYNC)>; // Additional FNMSUB patterns: -a*c + b == -(a*c - b) @@ -1715,3 +2111,98 @@ def : Pat<(fma f32:$A, (fneg f32:$C), f32:$B), include "PPCInstrAltivec.td" include "PPCInstr64Bit.td" + + +//===----------------------------------------------------------------------===// +// PowerPC Instructions used for assembler/disassembler only +// + +def ISYNC : XLForm_2_ext<19, 150, 0, 0, 0, (outs), (ins), + "isync", SprISYNC, []>; + +def ICBI : XForm_1a<31, 982, (outs), (ins memrr:$src), + "icbi $src", LdStICBI, []>; + +//===----------------------------------------------------------------------===// +// PowerPC Assembler Instruction Aliases +// + +// Pseudo-instructions for alternate assembly syntax (never used by codegen). +// These are aliases that require C++ handling to convert to the target +// instruction, while InstAliases can be handled directly by tblgen. +class PPCAsmPseudo<string asm, dag iops> + : Instruction { + let Namespace = "PPC"; + bit PPC64 = 0; // Default value, override with isPPC64 + + let OutOperandList = (outs); + let InOperandList = iops; + let Pattern = []; + let AsmString = asm; + let isAsmParserOnly = 1; + let isPseudo = 1; +} + +def : InstAlias<"mr $rA, $rB", (OR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>; + +def SLWI : PPCAsmPseudo<"slwi $rA, $rS, $n", + (ins gprc:$rA, gprc:$rS, u5imm:$n)>; +def SRWI : PPCAsmPseudo<"srwi $rA, $rS, $n", + (ins gprc:$rA, gprc:$rS, u5imm:$n)>; +def SLDI : PPCAsmPseudo<"sldi $rA, $rS, $n", + (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>; +def SRDI : PPCAsmPseudo<"srdi $rA, $rS, $n", + (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>; + +def : InstAlias<"blt $cc, $dst", (BCC 12, crrc:$cc, condbrtarget:$dst)>; +def : InstAlias<"bgt $cc, $dst", (BCC 44, crrc:$cc, condbrtarget:$dst)>; +def : InstAlias<"beq $cc, $dst", (BCC 76, crrc:$cc, condbrtarget:$dst)>; +def : InstAlias<"bun $cc, $dst", (BCC 108, crrc:$cc, condbrtarget:$dst)>; +def : InstAlias<"bso $cc, $dst", (BCC 108, crrc:$cc, condbrtarget:$dst)>; +def : InstAlias<"bge $cc, $dst", (BCC 4, crrc:$cc, condbrtarget:$dst)>; +def : InstAlias<"bnl $cc, $dst", (BCC 4, crrc:$cc, condbrtarget:$dst)>; +def : InstAlias<"ble $cc, $dst", (BCC 36, crrc:$cc, condbrtarget:$dst)>; +def : InstAlias<"bng $cc, $dst", (BCC 36, crrc:$cc, condbrtarget:$dst)>; +def : InstAlias<"bne $cc, $dst", (BCC 68, crrc:$cc, condbrtarget:$dst)>; +def : InstAlias<"bnu $cc, $dst", (BCC 100, crrc:$cc, condbrtarget:$dst)>; +def : InstAlias<"bns $cc, $dst", (BCC 100, crrc:$cc, condbrtarget:$dst)>; + +def : InstAlias<"bltlr $cc", (BCLR 12, crrc:$cc)>; +def : InstAlias<"bgtlr $cc", (BCLR 44, crrc:$cc)>; +def : InstAlias<"beqlr $cc", (BCLR 76, crrc:$cc)>; +def : InstAlias<"bunlr $cc", (BCLR 108, crrc:$cc)>; +def : InstAlias<"bsolr $cc", (BCLR 108, crrc:$cc)>; +def : InstAlias<"bgelr $cc", (BCLR 4, crrc:$cc)>; +def : InstAlias<"bnllr $cc", (BCLR 4, crrc:$cc)>; +def : InstAlias<"blelr $cc", (BCLR 36, crrc:$cc)>; +def : InstAlias<"bnglr $cc", (BCLR 36, crrc:$cc)>; +def : InstAlias<"bnelr $cc", (BCLR 68, crrc:$cc)>; +def : InstAlias<"bnulr $cc", (BCLR 100, crrc:$cc)>; +def : InstAlias<"bnslr $cc", (BCLR 100, crrc:$cc)>; + +def : InstAlias<"bltctr $cc", (BCCTR 12, crrc:$cc)>; +def : InstAlias<"bgtctr $cc", (BCCTR 44, crrc:$cc)>; +def : InstAlias<"beqctr $cc", (BCCTR 76, crrc:$cc)>; +def : InstAlias<"bunctr $cc", (BCCTR 108, crrc:$cc)>; +def : InstAlias<"bsoctr $cc", (BCCTR 108, crrc:$cc)>; +def : InstAlias<"bgectr $cc", (BCCTR 4, crrc:$cc)>; +def : InstAlias<"bnlctr $cc", (BCCTR 4, crrc:$cc)>; +def : InstAlias<"blectr $cc", (BCCTR 36, crrc:$cc)>; +def : InstAlias<"bngctr $cc", (BCCTR 36, crrc:$cc)>; +def : InstAlias<"bnectr $cc", (BCCTR 68, crrc:$cc)>; +def : InstAlias<"bnuctr $cc", (BCCTR 100, crrc:$cc)>; +def : InstAlias<"bnsctr $cc", (BCCTR 100, crrc:$cc)>; + +def : InstAlias<"bltctrl $cc", (BCCTRL 12, crrc:$cc)>; +def : InstAlias<"bgtctrl $cc", (BCCTRL 44, crrc:$cc)>; +def : InstAlias<"beqctrl $cc", (BCCTRL 76, crrc:$cc)>; +def : InstAlias<"bunctrl $cc", (BCCTRL 108, crrc:$cc)>; +def : InstAlias<"bsoctrl $cc", (BCCTRL 108, crrc:$cc)>; +def : InstAlias<"bgectrl $cc", (BCCTRL 4, crrc:$cc)>; +def : InstAlias<"bnlctrl $cc", (BCCTRL 4, crrc:$cc)>; +def : InstAlias<"blectrl $cc", (BCCTRL 36, crrc:$cc)>; +def : InstAlias<"bngctrl $cc", (BCCTRL 36, crrc:$cc)>; +def : InstAlias<"bnectrl $cc", (BCCTRL 68, crrc:$cc)>; +def : InstAlias<"bnuctrl $cc", (BCCTRL 100, crrc:$cc)>; +def : InstAlias<"bnsctrl $cc", (BCCTRL 100, crrc:$cc)>; + diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp index 9b0df3e..f8cf3a5 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -14,6 +14,7 @@ #include "PPC.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" @@ -51,7 +52,14 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){ // before we return the symbol. if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB) { Name += "$stub"; - MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str()); + const char *PGP = AP.MAI->getPrivateGlobalPrefix(); + const char *Prefix = ""; + if (!Name.startswith(PGP)) { + // http://llvm.org/bugs/show_bug.cgi?id=15763 + // all stubs and lazy_ptrs should be local symbols, which need leading 'L' + Prefix = PGP; + } + MCSymbol *Sym = Ctx.GetOrCreateSymbol(Twine(Prefix) + Twine(Name)); MachineModuleInfoImpl::StubValueTy &StubSym = getMachOMMI(AP).getFnStubEntry(Sym); if (StubSym.getPointer()) diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h index ee18ead..40d1f3a 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -84,6 +84,11 @@ class PPCFunctionInfo : public MachineFunctionInfo { /// CRSpillFrameIndex - FrameIndex for CR spill slot for 32-bit SVR4. int CRSpillFrameIndex; + /// If any of CR[2-4] need to be saved in the prologue and restored in the + /// epilogue then they are added to this array. This is used for the + /// 64-bit SVR4 ABI. + SmallVector<unsigned, 3> MustSaveCRs; + public: explicit PPCFunctionInfo(MachineFunction &MF) : FramePointerSaveIndex(0), @@ -154,6 +159,10 @@ public: int getCRSpillFrameIndex() const { return CRSpillFrameIndex; } void setCRSpillFrameIndex(int idx) { CRSpillFrameIndex = idx; } + + const SmallVector<unsigned, 3> & + getMustSaveCRs() const { return MustSaveCRs; } + void addMustSaveCR(unsigned Reg) { MustSaveCRs.push_back(Reg); } }; } // end of namespace llvm diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp index 1d61a3a..2be6324 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -76,6 +76,8 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST, const TargetRegisterClass * PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) const { + // Note that PPCInstrInfo::FoldImmediate also directly uses this Kind value + // when it checks for ZERO folding. if (Kind == 1) { if (Subtarget.isPPC64()) return &PPC::G8RC_NOX0RegClass; @@ -452,6 +454,33 @@ PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, return false; } +// Figure out if the offset in the instruction is shifted right two bits. This +// is true for instructions like "STD", which the machine implicitly adds two +// low zeros to. +static bool usesIXAddr(const MachineInstr &MI) { + unsigned OpC = MI.getOpcode(); + + switch (OpC) { + default: + return false; + case PPC::LWA: + case PPC::LD: + case PPC::STD: + return true; + } +} + +// Return the OffsetOperandNo given the FIOperandNum (and the instruction). +static unsigned getOffsetONFromFION(const MachineInstr &MI, + unsigned FIOperandNum) { + // Take into account whether it's an add or mem instruction + unsigned OffsetOperandNo = (FIOperandNum == 2) ? 1 : 2; + if (MI.isInlineAsm()) + OffsetOperandNo = FIOperandNum-1; + + return OffsetOperandNo; +} + void PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, @@ -469,10 +498,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); DebugLoc dl = MI.getDebugLoc(); - // Take into account whether it's an add or mem instruction - unsigned OffsetOperandNo = (FIOperandNum == 2) ? 1 : 2; - if (MI.isInlineAsm()) - OffsetOperandNo = FIOperandNum-1; + unsigned OffsetOperandNo = getOffsetONFromFION(MI, FIOperandNum); // Get the frame index. int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); @@ -514,17 +540,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, (is64Bit ? PPC::X1 : PPC::R1), false); - // Figure out if the offset in the instruction is shifted right two bits. This - // is true for instructions like "STD", which the machine implicitly adds two - // low zeros to. - bool isIXAddr = false; - switch (OpC) { - case PPC::LWA: - case PPC::LD: - case PPC::STD: - isIXAddr = true; - break; - } + // Figure out if the offset in the instruction is shifted right two bits. + bool isIXAddr = usesIXAddr(MI); // If the instruction is not present in ImmToIdxMap, then it has no immediate // form (and must be r+r). @@ -616,3 +633,124 @@ unsigned PPCRegisterInfo::getEHExceptionRegister() const { unsigned PPCRegisterInfo::getEHHandlerRegister() const { return !Subtarget.isPPC64() ? PPC::R4 : PPC::X4; } + +/// Returns true if the instruction's frame index +/// reference would be better served by a base register other than FP +/// or SP. Used by LocalStackFrameAllocation to determine which frame index +/// references it should create new base registers for. +bool PPCRegisterInfo:: +needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { + assert(Offset < 0 && "Local offset must be negative"); + + unsigned FIOperandNum = 0; + while (!MI->getOperand(FIOperandNum).isFI()) { + ++FIOperandNum; + assert(FIOperandNum < MI->getNumOperands() && + "Instr doesn't have FrameIndex operand!"); + } + + unsigned OffsetOperandNo = getOffsetONFromFION(*MI, FIOperandNum); + + if (!usesIXAddr(*MI)) + Offset += MI->getOperand(OffsetOperandNo).getImm(); + else + Offset += MI->getOperand(OffsetOperandNo).getImm() << 2; + + // It's the load/store FI references that cause issues, as it can be difficult + // to materialize the offset if it won't fit in the literal field. Estimate + // based on the size of the local frame and some conservative assumptions + // about the rest of the stack frame (note, this is pre-regalloc, so + // we don't know everything for certain yet) whether this offset is likely + // to be out of range of the immediate. Return true if so. + + // We only generate virtual base registers for loads and stores that have + // an r+i form. Return false for everything else. + unsigned OpC = MI->getOpcode(); + if (!ImmToIdxMap.count(OpC)) + return false; + + // Don't generate a new virtual base register just to add zero to it. + if ((OpC == PPC::ADDI || OpC == PPC::ADDI8) && + MI->getOperand(2).getImm() == 0) + return false; + + MachineBasicBlock &MBB = *MI->getParent(); + MachineFunction &MF = *MBB.getParent(); + + const PPCFrameLowering *PPCFI = + static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering()); + unsigned StackEst = + PPCFI->determineFrameLayout(MF, false, true); + + // If we likely don't need a stack frame, then we probably don't need a + // virtual base register either. + if (!StackEst) + return false; + + // Estimate an offset from the stack pointer. + // The incoming offset is relating to the SP at the start of the function, + // but when we access the local it'll be relative to the SP after local + // allocation, so adjust our SP-relative offset by that allocation size. + Offset += StackEst; + + // The frame pointer will point to the end of the stack, so estimate the + // offset as the difference between the object offset and the FP location. + return !isFrameOffsetLegal(MI, Offset); +} + +/// Insert defining instruction(s) for BaseReg to +/// be a pointer to FrameIdx at the beginning of the basic block. +void PPCRegisterInfo:: +materializeFrameBaseRegister(MachineBasicBlock *MBB, + unsigned BaseReg, int FrameIdx, + int64_t Offset) const { + unsigned ADDriOpc = Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI; + + MachineBasicBlock::iterator Ins = MBB->begin(); + DebugLoc DL; // Defaults to "unknown" + if (Ins != MBB->end()) + DL = Ins->getDebugLoc(); + + const MCInstrDesc &MCID = TII.get(ADDriOpc); + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + const MachineFunction &MF = *MBB->getParent(); + MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF)); + + BuildMI(*MBB, Ins, DL, MCID, BaseReg) + .addFrameIndex(FrameIdx).addImm(Offset); +} + +void +PPCRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I, + unsigned BaseReg, int64_t Offset) const { + MachineInstr &MI = *I; + + unsigned FIOperandNum = 0; + while (!MI.getOperand(FIOperandNum).isFI()) { + ++FIOperandNum; + assert(FIOperandNum < MI.getNumOperands() && + "Instr doesn't have FrameIndex operand!"); + } + + MI.getOperand(FIOperandNum).ChangeToRegister(BaseReg, false); + unsigned OffsetOperandNo = getOffsetONFromFION(MI, FIOperandNum); + + bool isIXAddr = usesIXAddr(MI); + if (!isIXAddr) + Offset += MI.getOperand(OffsetOperandNo).getImm(); + else + Offset += MI.getOperand(OffsetOperandNo).getImm() << 2; + + // Figure out if the offset in the instruction is shifted right two bits. + if (isIXAddr) + Offset >>= 2; // The actual encoded value has the low two bits zero. + + MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset); +} + +bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, + int64_t Offset) const { + return MI->getOpcode() == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm + (isInt<16>(Offset) && (!usesIXAddr(*MI) || (Offset & 3) == 0)); +} + diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h index 7e6683e..7a48b4b 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h @@ -61,6 +61,10 @@ public: return true; } + virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const { + return true; + } + void lowerDynamicAlloc(MachineBasicBlock::iterator II) const; void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex) const; @@ -77,6 +81,15 @@ public: int SPAdj, unsigned FIOperandNum, RegScavenger *RS = NULL) const; + // Support for virtual base registers. + bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const; + void materializeFrameBaseRegister(MachineBasicBlock *MBB, + unsigned BaseReg, int FrameIdx, + int64_t Offset) const; + void resolveFrameIndex(MachineBasicBlock::iterator I, + unsigned BaseReg, int64_t Offset) const; + bool isFrameOffsetLegal(const MachineInstr *MI, int64_t Offset) const; + // Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td index ae084aa..8d5838e 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td @@ -759,7 +759,7 @@ def PPCA2Model : SchedMachineModel { let LoadLatency = 6; // Optimistic load latency assuming bypass. // This is overriden by OperandCycles if the // Itineraries are queried instead. - let MispredictPenalty = 6; + let MispredictPenalty = 13; let Itineraries = PPCA2Itineraries; } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp index fe851c1..14dc794 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -86,8 +86,14 @@ public: return getTM<PPCTargetMachine>(); } + const PPCSubtarget &getPPCSubtarget() const { + return *getPPCTargetMachine().getSubtargetImpl(); + } + virtual bool addPreRegAlloc(); + virtual bool addILPOpts(); virtual bool addInstSelector(); + virtual bool addPreSched2(); virtual bool addPreEmitPass(); }; } // namespace @@ -103,13 +109,31 @@ bool PPCPassConfig::addPreRegAlloc() { return false; } +bool PPCPassConfig::addILPOpts() { + if (getPPCSubtarget().hasISEL()) { + addPass(&EarlyIfConverterID); + return true; + } + + return false; +} + bool PPCPassConfig::addInstSelector() { // Install an instruction selector. addPass(createPPCISelDag(getPPCTargetMachine())); return false; } +bool PPCPassConfig::addPreSched2() { + if (getOptLevel() != CodeGenOpt::None) + addPass(&IfConverterID); + + return true; +} + bool PPCPassConfig::addPreEmitPass() { + if (getOptLevel() != CodeGenOpt::None) + addPass(createPPCEarlyReturnPass()); // Must run branch selection immediately preceding the asm printer. addPass(createPPCBranchSelectionPass()); return false; diff --git a/contrib/llvm/lib/Target/R600/AMDGPU.h b/contrib/llvm/lib/Target/R600/AMDGPU.h index 0b01433..9792bd8 100644 --- a/contrib/llvm/lib/Target/R600/AMDGPU.h +++ b/contrib/llvm/lib/Target/R600/AMDGPU.h @@ -24,6 +24,7 @@ class AMDGPUTargetMachine; FunctionPass* createR600KernelParametersPass(const DataLayout *TD); FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm); FunctionPass *createR600EmitClauseMarkers(TargetMachine &tm); +FunctionPass *createR600Packetizer(TargetMachine &tm); FunctionPass *createR600ControlFlowFinalizer(TargetMachine &tm); // SI Passes diff --git a/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp b/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp index f600144..4c35ecf 100644 --- a/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp @@ -19,9 +19,16 @@ #include "AMDGPUAsmPrinter.h" #include "AMDGPU.h" +#include "SIDefines.h" #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" +#include "R600Defines.h" +#include "R600MachineFunctionInfo.h" +#include "R600RegisterInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/Support/ELF.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -50,15 +57,82 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { if (OutStreamer.hasRawTextSupport()) { OutStreamer.EmitRawText("@" + MF.getName() + ":"); } - OutStreamer.SwitchSection(getObjFileLowering().getTextSection()); + + const MCSectionELF *ConfigSection = getObjFileLowering().getContext() + .getELFSection(".AMDGPU.config", + ELF::SHT_PROGBITS, 0, + SectionKind::getReadOnly()); + OutStreamer.SwitchSection(ConfigSection); if (STM.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) { - EmitProgramInfo(MF); + EmitProgramInfoSI(MF); + } else { + EmitProgramInfoR600(MF); } + OutStreamer.SwitchSection(getObjFileLowering().getTextSection()); EmitFunctionBody(); return false; } -void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) { +void AMDGPUAsmPrinter::EmitProgramInfoR600(MachineFunction &MF) { + unsigned MaxGPR = 0; + bool killPixel = false; + const R600RegisterInfo * RI = + static_cast<const R600RegisterInfo*>(TM.getRegisterInfo()); + R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); + const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>(); + + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); + BB != BB_E; ++BB) { + MachineBasicBlock &MBB = *BB; + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E; ++I) { + MachineInstr &MI = *I; + if (MI.getOpcode() == AMDGPU::KILLGT) + killPixel = true; + unsigned numOperands = MI.getNumOperands(); + for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) { + MachineOperand & MO = MI.getOperand(op_idx); + if (!MO.isReg()) + continue; + unsigned HWReg = RI->getEncodingValue(MO.getReg()) & 0xff; + + // Register with value > 127 aren't GPR + if (HWReg > 127) + continue; + MaxGPR = std::max(MaxGPR, HWReg); + } + } + } + + unsigned RsrcReg; + if (STM.device()->getGeneration() >= AMDGPUDeviceInfo::HD5XXX) { + // Evergreen / Northern Islands + switch (MFI->ShaderType) { + default: // Fall through + case ShaderType::COMPUTE: RsrcReg = R_0288D4_SQ_PGM_RESOURCES_LS; break; + case ShaderType::GEOMETRY: RsrcReg = R_028878_SQ_PGM_RESOURCES_GS; break; + case ShaderType::PIXEL: RsrcReg = R_028844_SQ_PGM_RESOURCES_PS; break; + case ShaderType::VERTEX: RsrcReg = R_028860_SQ_PGM_RESOURCES_VS; break; + } + } else { + // R600 / R700 + switch (MFI->ShaderType) { + default: // Fall through + case ShaderType::GEOMETRY: // Fall through + case ShaderType::COMPUTE: // Fall through + case ShaderType::VERTEX: RsrcReg = R_028868_SQ_PGM_RESOURCES_VS; break; + case ShaderType::PIXEL: RsrcReg = R_028850_SQ_PGM_RESOURCES_PS; break; + } + } + + OutStreamer.EmitIntValue(RsrcReg, 4); + OutStreamer.EmitIntValue(S_NUM_GPRS(MaxGPR + 1) | + S_STACK_SIZE(MFI->StackSize), 4); + OutStreamer.EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4); + OutStreamer.EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4); +} + +void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF) { unsigned MaxSGPR = 0; unsigned MaxVGPR = 0; bool VCCUsed = false; @@ -107,6 +181,9 @@ void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) { } else if (AMDGPU::VReg_64RegClass.contains(reg)) { isSGPR = false; width = 2; + } else if (AMDGPU::VReg_96RegClass.contains(reg)) { + isSGPR = false; + width = 3; } else if (AMDGPU::SReg_128RegClass.contains(reg)) { isSGPR = true; width = 4; @@ -139,7 +216,19 @@ void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) { MaxSGPR += 2; } SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>(); - OutStreamer.EmitIntValue(MaxSGPR + 1, 4); - OutStreamer.EmitIntValue(MaxVGPR + 1, 4); - OutStreamer.EmitIntValue(MFI->PSInputAddr, 4); + unsigned RsrcReg; + switch (MFI->ShaderType) { + default: // Fall through + case ShaderType::COMPUTE: RsrcReg = R_00B848_COMPUTE_PGM_RSRC1; break; + case ShaderType::GEOMETRY: RsrcReg = R_00B228_SPI_SHADER_PGM_RSRC1_GS; break; + case ShaderType::PIXEL: RsrcReg = R_00B028_SPI_SHADER_PGM_RSRC1_PS; break; + case ShaderType::VERTEX: RsrcReg = R_00B128_SPI_SHADER_PGM_RSRC1_VS; break; + } + + OutStreamer.EmitIntValue(RsrcReg, 4); + OutStreamer.EmitIntValue(S_00B028_VGPRS(MaxVGPR / 4) | S_00B028_SGPRS(MaxSGPR / 8), 4); + if (MFI->ShaderType == ShaderType::PIXEL) { + OutStreamer.EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4); + OutStreamer.EmitIntValue(MFI->PSInputAddr, 4); + } } diff --git a/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.h b/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.h index 3812282..f425ef4 100644 --- a/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.h +++ b/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.h @@ -33,7 +33,8 @@ public: /// \brief Emit register usage information so that the GPU driver /// can correctly setup the GPU state. - void EmitProgramInfo(MachineFunction &MF); + void EmitProgramInfoR600(MachineFunction &MF); + void EmitProgramInfoSI(MachineFunction &MF); /// Implemented in AMDGPUMCInstLower.cpp virtual void EmitInstruction(const MachineInstr *MI); diff --git a/contrib/llvm/lib/Target/R600/AMDGPUCallingConv.td b/contrib/llvm/lib/Target/R600/AMDGPUCallingConv.td index 45ae37e..9c30515 100644 --- a/contrib/llvm/lib/Target/R600/AMDGPUCallingConv.td +++ b/contrib/llvm/lib/Target/R600/AMDGPUCallingConv.td @@ -32,8 +32,14 @@ def CC_SI : CallingConv<[ VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31 - ]>>> + ]>>>, + // This is the default for i64 values. + // XXX: We should change this once clang understands the CC_AMDGPU. + CCIfType<[i64], CCAssignToRegWithShadow< + [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14 ], + [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15 ] + >> ]>; def CC_AMDGPU : CallingConv<[ diff --git a/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.h b/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.h index f31b646..c2a79ea 100644 --- a/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.h +++ b/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.h @@ -116,6 +116,7 @@ enum { BRANCH_COND, // End AMDIL ISD Opcodes BITALIGN, + BUFFER_STORE, DWORDADDR, FRACT, FMAX, diff --git a/contrib/llvm/lib/Target/R600/AMDGPUInstructions.td b/contrib/llvm/lib/Target/R600/AMDGPUInstructions.td index e740348..d2620b2 100644 --- a/contrib/llvm/lib/Target/R600/AMDGPUInstructions.td +++ b/contrib/llvm/lib/Target/R600/AMDGPUInstructions.td @@ -94,6 +94,7 @@ class Constants { int TWO_PI = 0x40c90fdb; int PI = 0x40490fdb; int TWO_PI_INV = 0x3e22f983; +int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding } def CONST : Constants; @@ -115,21 +116,21 @@ class CLAMP <RegisterClass rc> : AMDGPUShaderInst < (outs rc:$dst), (ins rc:$src0), "CLAMP $dst, $src0", - [(set rc:$dst, (int_AMDIL_clamp rc:$src0, (f32 FP_ZERO), (f32 FP_ONE)))] + [(set f32:$dst, (int_AMDIL_clamp f32:$src0, (f32 FP_ZERO), (f32 FP_ONE)))] >; class FABS <RegisterClass rc> : AMDGPUShaderInst < (outs rc:$dst), (ins rc:$src0), "FABS $dst, $src0", - [(set rc:$dst, (fabs rc:$src0))] + [(set f32:$dst, (fabs f32:$src0))] >; class FNEG <RegisterClass rc> : AMDGPUShaderInst < (outs rc:$dst), (ins rc:$src0), "FNEG $dst, $src0", - [(set rc:$dst, (fneg rc:$src0))] + [(set f32:$dst, (fneg f32:$src0))] >; } // usesCustomInserter = 1 @@ -140,8 +141,7 @@ multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass, (outs dstClass:$dst), (ins addrClass:$addr, i32imm:$chan), "RegisterLoad $dst, $addr", - [(set (i32 dstClass:$dst), (AMDGPUregister_load addrPat:$addr, - (i32 timm:$chan)))] + [(set i32:$dst, (AMDGPUregister_load addrPat:$addr, (i32 timm:$chan)))] > { let isRegisterLoad = 1; } @@ -150,7 +150,7 @@ multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass, (outs), (ins dstClass:$val, addrClass:$addr, i32imm:$chan), "RegisterStore $val, $addr", - [(AMDGPUregister_store (i32 dstClass:$val), addrPat:$addr, (i32 timm:$chan))] + [(AMDGPUregister_store i32:$val, addrPat:$addr, (i32 timm:$chan))] > { let isRegisterStore = 1; } @@ -161,105 +161,140 @@ multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass, /* Generic helper patterns for intrinsics */ /* -------------------------------------- */ -class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul, - RegisterClass rc> : Pat < - (fpow rc:$src0, rc:$src1), - (exp_ieee (mul rc:$src1, (log_ieee rc:$src0))) +class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul> + : Pat < + (fpow f32:$src0, f32:$src1), + (exp_ieee (mul f32:$src1, (log_ieee f32:$src0))) >; /* Other helper patterns */ /* --------------------- */ /* Extract element pattern */ -class Extract_Element <ValueType sub_type, ValueType vec_type, - RegisterClass vec_class, int sub_idx, - SubRegIndex sub_reg>: Pat< - (sub_type (vector_extract (vec_type vec_class:$src), sub_idx)), - (EXTRACT_SUBREG vec_class:$src, sub_reg) +class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx, + SubRegIndex sub_reg> + : Pat< + (sub_type (vector_extract vec_type:$src, sub_idx)), + (EXTRACT_SUBREG $src, sub_reg) >; /* Insert element pattern */ class Insert_Element <ValueType elem_type, ValueType vec_type, - RegisterClass elem_class, RegisterClass vec_class, - int sub_idx, SubRegIndex sub_reg> : Pat < - - (vec_type (vector_insert (vec_type vec_class:$vec), - (elem_type elem_class:$elem), sub_idx)), - (INSERT_SUBREG vec_class:$vec, elem_class:$elem, sub_reg) + int sub_idx, SubRegIndex sub_reg> + : Pat < + (vector_insert vec_type:$vec, elem_type:$elem, sub_idx), + (INSERT_SUBREG $vec, $elem, sub_reg) >; // Vector Build pattern -class Vector1_Build <ValueType vecType, RegisterClass vectorClass, - ValueType elemType, RegisterClass elemClass> : Pat < - (vecType (build_vector (elemType elemClass:$src))), - (vecType elemClass:$src) +class Vector1_Build <ValueType vecType, ValueType elemType, + RegisterClass rc> : Pat < + (vecType (build_vector elemType:$src)), + (vecType (COPY_TO_REGCLASS $src, rc)) >; -class Vector2_Build <ValueType vecType, RegisterClass vectorClass, - ValueType elemType, RegisterClass elemClass> : Pat < - (vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1))), +class Vector2_Build <ValueType vecType, ValueType elemType> : Pat < + (vecType (build_vector elemType:$sub0, elemType:$sub1)), (INSERT_SUBREG (INSERT_SUBREG - (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1) + (vecType (IMPLICIT_DEF)), $sub0, sub0), $sub1, sub1) >; -class Vector4_Build <ValueType vecType, RegisterClass vectorClass, - ValueType elemType, RegisterClass elemClass> : Pat < - (vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y), - (elemType elemClass:$z), (elemType elemClass:$w))), +class Vector4_Build <ValueType vecType, ValueType elemType> : Pat < + (vecType (build_vector elemType:$x, elemType:$y, elemType:$z, elemType:$w)), (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG - (vecType (IMPLICIT_DEF)), elemClass:$x, sub0), elemClass:$y, sub1), - elemClass:$z, sub2), elemClass:$w, sub3) + (vecType (IMPLICIT_DEF)), $x, sub0), $y, sub1), $z, sub2), $w, sub3) >; -class Vector8_Build <ValueType vecType, RegisterClass vectorClass, - ValueType elemType, RegisterClass elemClass> : Pat < - (vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1), - (elemType elemClass:$sub2), (elemType elemClass:$sub3), - (elemType elemClass:$sub4), (elemType elemClass:$sub5), - (elemType elemClass:$sub6), (elemType elemClass:$sub7))), - (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG +class Vector8_Build <ValueType vecType, ValueType elemType> : Pat < + (vecType (build_vector elemType:$sub0, elemType:$sub1, + elemType:$sub2, elemType:$sub3, + elemType:$sub4, elemType:$sub5, + elemType:$sub6, elemType:$sub7)), (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG - (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1), - elemClass:$sub2, sub2), elemClass:$sub3, sub3), - elemClass:$sub4, sub4), elemClass:$sub5, sub5), - elemClass:$sub6, sub6), elemClass:$sub7, sub7) + (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG + (vecType (IMPLICIT_DEF)), $sub0, sub0), $sub1, sub1), + $sub2, sub2), $sub3, sub3), + $sub4, sub4), $sub5, sub5), + $sub6, sub6), $sub7, sub7) >; -class Vector16_Build <ValueType vecType, RegisterClass vectorClass, - ValueType elemType, RegisterClass elemClass> : Pat < - (vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1), - (elemType elemClass:$sub2), (elemType elemClass:$sub3), - (elemType elemClass:$sub4), (elemType elemClass:$sub5), - (elemType elemClass:$sub6), (elemType elemClass:$sub7), - (elemType elemClass:$sub8), (elemType elemClass:$sub9), - (elemType elemClass:$sub10), (elemType elemClass:$sub11), - (elemType elemClass:$sub12), (elemType elemClass:$sub13), - (elemType elemClass:$sub14), (elemType elemClass:$sub15))), - (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG - (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG - (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG +class Vector16_Build <ValueType vecType, ValueType elemType> : Pat < + (vecType (build_vector elemType:$sub0, elemType:$sub1, + elemType:$sub2, elemType:$sub3, + elemType:$sub4, elemType:$sub5, + elemType:$sub6, elemType:$sub7, + elemType:$sub8, elemType:$sub9, + elemType:$sub10, elemType:$sub11, + elemType:$sub12, elemType:$sub13, + elemType:$sub14, elemType:$sub15)), (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG - (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1), - elemClass:$sub2, sub2), elemClass:$sub3, sub3), - elemClass:$sub4, sub4), elemClass:$sub5, sub5), - elemClass:$sub6, sub6), elemClass:$sub7, sub7), - elemClass:$sub8, sub8), elemClass:$sub9, sub9), - elemClass:$sub10, sub10), elemClass:$sub11, sub11), - elemClass:$sub12, sub12), elemClass:$sub13, sub13), - elemClass:$sub14, sub14), elemClass:$sub15, sub15) + (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG + (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG + (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG + (vecType (IMPLICIT_DEF)), $sub0, sub0), $sub1, sub1), + $sub2, sub2), $sub3, sub3), + $sub4, sub4), $sub5, sub5), + $sub6, sub6), $sub7, sub7), + $sub8, sub8), $sub9, sub9), + $sub10, sub10), $sub11, sub11), + $sub12, sub12), $sub13, sub13), + $sub14, sub14), $sub15, sub15) >; +// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer +// can handle COPY instructions. // bitconvert pattern class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat < (dt (bitconvert (st rc:$src0))), (dt rc:$src0) >; +// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer +// can handle COPY instructions. class DwordAddrPat<ValueType vt, RegisterClass rc> : Pat < (vt (AMDGPUdwordaddr (vt rc:$addr))), (vt rc:$addr) >; +// BFI_INT patterns + +multiclass BFIPatterns <Instruction BFI_INT> { + + // Definition from ISA doc: + // (y & x) | (z & ~x) + def : Pat < + (or (and i32:$y, i32:$x), (and i32:$z, (not i32:$x))), + (BFI_INT $x, $y, $z) + >; + + // SHA-256 Ch function + // z ^ (x & (y ^ z)) + def : Pat < + (xor i32:$z, (and i32:$x, (xor i32:$y, i32:$z))), + (BFI_INT $x, $y, $z) + >; + +} + +// SHA-256 Ma patterns + +// ((x & z) | (y & (x | z))) -> BFI_INT (XOR x, y), z, y +class SHA256MaPattern <Instruction BFI_INT, Instruction XOR> : Pat < + (or (and i32:$x, i32:$z), (and i32:$y, (or i32:$x, i32:$z))), + (BFI_INT (XOR i32:$x, i32:$y), i32:$z, i32:$y) +>; + +// Bitfield extract patterns + +def legalshift32 : ImmLeaf <i32, [{return Imm >=0 && Imm < 32;}]>; +def bfemask : PatLeaf <(imm), [{return isMask_32(N->getZExtValue());}], + SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(CountTrailingOnes_32(N->getZExtValue()), MVT::i32);}]>>; + +class BFEPattern <Instruction BFE> : Pat < + (and (srl i32:$x, legalshift32:$y), bfemask:$z), + (BFE $x, $y, $z) +>; + include "R600Instructions.td" include "SIInstrInfo.td" diff --git a/contrib/llvm/lib/Target/R600/AMDGPUMachineFunction.cpp b/contrib/llvm/lib/Target/R600/AMDGPUMachineFunction.cpp index 0223ec8..0461025 100644 --- a/contrib/llvm/lib/Target/R600/AMDGPUMachineFunction.cpp +++ b/contrib/llvm/lib/Target/R600/AMDGPUMachineFunction.cpp @@ -1,4 +1,5 @@ #include "AMDGPUMachineFunction.h" +#include "AMDGPU.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" @@ -8,6 +9,7 @@ const char *AMDGPUMachineFunction::ShaderTypeAttribute = "ShaderType"; AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) : MachineFunctionInfo() { + ShaderType = ShaderType::COMPUTE; AttributeSet Set = MF.getFunction()->getAttributes(); Attribute A = Set.getAttribute(AttributeSet::FunctionIndex, ShaderTypeAttribute); diff --git a/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.cpp b/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.cpp index 0f356a1..a7e1d7b 100644 --- a/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.cpp +++ b/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.cpp @@ -33,6 +33,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) : DefaultSize[0] = 64; DefaultSize[1] = 1; DefaultSize[2] = 1; + HasVertexCache = false; ParseSubtargetFeatures(GPU, FS); DevName = GPU; Device = AMDGPUDeviceInfo::getDeviceFromName(DevName, this, Is64bit); @@ -53,6 +54,10 @@ AMDGPUSubtarget::is64bit() const { return Is64bit; } bool +AMDGPUSubtarget::hasVertexCache() const { + return HasVertexCache; +} +bool AMDGPUSubtarget::isTargetELF() const { return false; } diff --git a/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.h b/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.h index 1973fc6..b6501a4 100644 --- a/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.h +++ b/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.h @@ -36,6 +36,7 @@ private: bool Is32on64bit; bool DumpCode; bool R600ALUInst; + bool HasVertexCache; InstrItineraryData InstrItins; @@ -48,6 +49,7 @@ public: bool isOverride(AMDGPUDeviceInfo::Caps) const; bool is64bit() const; + bool hasVertexCache() const; // Helper functions to simplify if statements bool isTargetELF() const; diff --git a/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp b/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp index e7ea876..31fbf32 100644 --- a/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -115,7 +115,6 @@ AMDGPUPassConfig::addPreISel() { } bool AMDGPUPassConfig::addInstSelector() { - addPass(createAMDGPUPeepholeOpt(*TM)); addPass(createAMDGPUISelDag(getAMDGPUTargetMachine())); const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); @@ -153,8 +152,9 @@ bool AMDGPUPassConfig::addPreEmitPass() { addPass(createAMDGPUCFGStructurizerPass(*TM)); addPass(createR600EmitClauseMarkers(*TM)); addPass(createR600ExpandSpecialInstrsPass(*TM)); - addPass(createR600ControlFlowFinalizer(*TM)); addPass(&FinalizeMachineBundlesID); + addPass(createR600Packetizer(*TM)); + addPass(createR600ControlFlowFinalizer(*TM)); } else { addPass(createSILowerControlFlowPass(*TM)); } diff --git a/contrib/llvm/lib/Target/R600/AMDILBase.td b/contrib/llvm/lib/Target/R600/AMDILBase.td index c12cedc..e221110 100644 --- a/contrib/llvm/lib/Target/R600/AMDILBase.td +++ b/contrib/llvm/lib/Target/R600/AMDILBase.td @@ -74,6 +74,10 @@ def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst", "false", "Older version of ALU instructions encoding.">; +def FeatureVertexCache : SubtargetFeature<"HasVertexCache", + "HasVertexCache", + "true", + "Specify use of dedicated vertex cache.">; //===----------------------------------------------------------------------===// // Register File, Calling Conv, Instruction Descriptions diff --git a/contrib/llvm/lib/Target/R600/AMDILDeviceInfo.cpp b/contrib/llvm/lib/Target/R600/AMDILDeviceInfo.cpp index 9605fbe..126514b 100644 --- a/contrib/llvm/lib/Target/R600/AMDILDeviceInfo.cpp +++ b/contrib/llvm/lib/Target/R600/AMDILDeviceInfo.cpp @@ -44,7 +44,7 @@ AMDGPUDevice* getDeviceFromName(const std::string &deviceName, " on 32bit pointers!"); #endif return new AMDGPUEvergreenDevice(ptr); - } else if (deviceName == "redwood") { + } else if (deviceName == "redwood" || deviceName == "sumo") { #if DEBUG assert(!is64bit && "This device does not support 64bit pointers!"); assert(!is64on32bit && "This device does not support 64bit" @@ -79,7 +79,10 @@ AMDGPUDevice* getDeviceFromName(const std::string &deviceName, " on 32bit pointers!"); #endif return new AMDGPUNIDevice(ptr); - } else if (deviceName == "SI") { + } else if (deviceName == "SI" || + deviceName == "tahiti" || deviceName == "pitcairn" || + deviceName == "verde" || deviceName == "oland" || + deviceName == "hainan") { return new AMDGPUSIDevice(ptr); } else { #if DEBUG diff --git a/contrib/llvm/lib/Target/R600/AMDILISelDAGToDAG.cpp b/contrib/llvm/lib/Target/R600/AMDILISelDAGToDAG.cpp index fa8f62d..ba75a44 100644 --- a/contrib/llvm/lib/Target/R600/AMDILISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/R600/AMDILISelDAGToDAG.cpp @@ -191,6 +191,29 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs, 2 * N->getNumOperands() + 1); } + case ISD::BUILD_PAIR: { + SDValue RC, SubReg0, SubReg1; + const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); + if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) { + break; + } + if (N->getValueType(0) == MVT::i128) { + RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32); + SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32); + SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32); + } else if (N->getValueType(0) == MVT::i64) { + RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32); + SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32); + SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32); + } else { + llvm_unreachable("Unhandled value type for BUILD_PAIR"); + } + const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, + N->getOperand(1), SubReg1 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, + N->getDebugLoc(), N->getValueType(0), Ops); + } + case ISD::ConstantFP: case ISD::Constant: { const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); diff --git a/contrib/llvm/lib/Target/R600/AMDILPeepholeOptimizer.cpp b/contrib/llvm/lib/Target/R600/AMDILPeepholeOptimizer.cpp deleted file mode 100644 index 3a28038..0000000 --- a/contrib/llvm/lib/Target/R600/AMDILPeepholeOptimizer.cpp +++ /dev/null @@ -1,1215 +0,0 @@ -//===-- AMDILPeepholeOptimizer.cpp - AMDGPU Peephole optimizations ---------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -/// \file -//==-----------------------------------------------------------------------===// - -#define DEBUG_TYPE "PeepholeOpt" -#ifdef DEBUG -#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) -#else -#define DEBUGME 0 -#endif - -#include "AMDILDevices.h" -#include "AMDGPUInstrInfo.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/Twine.h" -#include "llvm/IR/Constants.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionAnalysis.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/MathExtras.h" - -#include <sstream> - -#if 0 -STATISTIC(PointerAssignments, "Number of dynamic pointer " - "assigments discovered"); -STATISTIC(PointerSubtract, "Number of pointer subtractions discovered"); -#endif - -using namespace llvm; -// The Peephole optimization pass is used to do simple last minute optimizations -// that are required for correct code or to remove redundant functions -namespace { - -class OpaqueType; - -class LLVM_LIBRARY_VISIBILITY AMDGPUPeepholeOpt : public FunctionPass { -public: - TargetMachine &TM; - static char ID; - AMDGPUPeepholeOpt(TargetMachine &tm); - ~AMDGPUPeepholeOpt(); - const char *getPassName() const; - bool runOnFunction(Function &F); - bool doInitialization(Module &M); - bool doFinalization(Module &M); - void getAnalysisUsage(AnalysisUsage &AU) const; -protected: -private: - // Function to initiate all of the instruction level optimizations. - bool instLevelOptimizations(BasicBlock::iterator *inst); - // Quick check to see if we need to dump all of the pointers into the - // arena. If this is correct, then we set all pointers to exist in arena. This - // is a workaround for aliasing of pointers in a struct/union. - bool dumpAllIntoArena(Function &F); - // Because I don't want to invalidate any pointers while in the - // safeNestedForEachFunction. I push atomic conversions to a vector and handle - // it later. This function does the conversions if required. - void doAtomicConversionIfNeeded(Function &F); - // Because __amdil_is_constant cannot be properly evaluated if - // optimizations are disabled, the call's are placed in a vector - // and evaluated after the __amdil_image* functions are evaluated - // which should allow the __amdil_is_constant function to be - // evaluated correctly. - void doIsConstCallConversionIfNeeded(); - bool mChanged; - bool mDebug; - bool mConvertAtomics; - CodeGenOpt::Level optLevel; - // Run a series of tests to see if we can optimize a CALL instruction. - bool optimizeCallInst(BasicBlock::iterator *bbb); - // A peephole optimization to optimize bit extract sequences. - bool optimizeBitExtract(Instruction *inst); - // A peephole optimization to optimize bit insert sequences. - bool optimizeBitInsert(Instruction *inst); - bool setupBitInsert(Instruction *base, - Instruction *&src, - Constant *&mask, - Constant *&shift); - // Expand the bit field insert instruction on versions of OpenCL that - // don't support it. - bool expandBFI(CallInst *CI); - // Expand the bit field mask instruction on version of OpenCL that - // don't support it. - bool expandBFM(CallInst *CI); - // On 7XX and 8XX operations, we do not have 24 bit signed operations. So in - // this case we need to expand them. These functions check for 24bit functions - // and then expand. - bool isSigned24BitOps(CallInst *CI); - void expandSigned24BitOps(CallInst *CI); - // One optimization that can occur is that if the required workgroup size is - // specified then the result of get_local_size is known at compile time and - // can be returned accordingly. - bool isRWGLocalOpt(CallInst *CI); - // On northern island cards, the division is slightly less accurate than on - // previous generations, so we need to utilize a more accurate division. So we - // can translate the accurate divide to a normal divide on all other cards. - bool convertAccurateDivide(CallInst *CI); - void expandAccurateDivide(CallInst *CI); - // If the alignment is set incorrectly, it can produce really inefficient - // code. This checks for this scenario and fixes it if possible. - bool correctMisalignedMemOp(Instruction *inst); - - // If we are in no opt mode, then we need to make sure that - // local samplers are properly propagated as constant propagation - // doesn't occur and we need to know the value of kernel defined - // samplers at compile time. - bool propagateSamplerInst(CallInst *CI); - - // Helper functions - - // Group of functions that recursively calculate the size of a structure based - // on it's sub-types. - size_t getTypeSize(Type * const T, bool dereferencePtr = false); - size_t getTypeSize(StructType * const ST, bool dereferencePtr = false); - size_t getTypeSize(IntegerType * const IT, bool dereferencePtr = false); - size_t getTypeSize(FunctionType * const FT,bool dereferencePtr = false); - size_t getTypeSize(ArrayType * const AT, bool dereferencePtr = false); - size_t getTypeSize(VectorType * const VT, bool dereferencePtr = false); - size_t getTypeSize(PointerType * const PT, bool dereferencePtr = false); - size_t getTypeSize(OpaqueType * const OT, bool dereferencePtr = false); - - LLVMContext *mCTX; - Function *mF; - const AMDGPUSubtarget *mSTM; - SmallVector< std::pair<CallInst *, Function *>, 16> atomicFuncs; - SmallVector<CallInst *, 16> isConstVec; -}; // class AMDGPUPeepholeOpt - char AMDGPUPeepholeOpt::ID = 0; - -// A template function that has two levels of looping before calling the -// function with a pointer to the current iterator. -template<class InputIterator, class SecondIterator, class Function> -Function safeNestedForEach(InputIterator First, InputIterator Last, - SecondIterator S, Function F) { - for ( ; First != Last; ++First) { - SecondIterator sf, sl; - for (sf = First->begin(), sl = First->end(); - sf != sl; ) { - if (!F(&sf)) { - ++sf; - } - } - } - return F; -} - -} // anonymous namespace - -namespace llvm { - FunctionPass * - createAMDGPUPeepholeOpt(TargetMachine &tm) { - return new AMDGPUPeepholeOpt(tm); - } -} // llvm namespace - -AMDGPUPeepholeOpt::AMDGPUPeepholeOpt(TargetMachine &tm) - : FunctionPass(ID), TM(tm) { - mDebug = DEBUGME; - optLevel = TM.getOptLevel(); - -} - -AMDGPUPeepholeOpt::~AMDGPUPeepholeOpt() { -} - -const char * -AMDGPUPeepholeOpt::getPassName() const { - return "AMDGPU PeepHole Optimization Pass"; -} - -bool -containsPointerType(Type *Ty) { - if (!Ty) { - return false; - } - switch(Ty->getTypeID()) { - default: - return false; - case Type::StructTyID: { - const StructType *ST = dyn_cast<StructType>(Ty); - for (StructType::element_iterator stb = ST->element_begin(), - ste = ST->element_end(); stb != ste; ++stb) { - if (!containsPointerType(*stb)) { - continue; - } - return true; - } - break; - } - case Type::VectorTyID: - case Type::ArrayTyID: - return containsPointerType(dyn_cast<SequentialType>(Ty)->getElementType()); - case Type::PointerTyID: - return true; - }; - return false; -} - -bool -AMDGPUPeepholeOpt::dumpAllIntoArena(Function &F) { - bool dumpAll = false; - for (Function::const_arg_iterator cab = F.arg_begin(), - cae = F.arg_end(); cab != cae; ++cab) { - const Argument *arg = cab; - const PointerType *PT = dyn_cast<PointerType>(arg->getType()); - if (!PT) { - continue; - } - Type *DereferencedType = PT->getElementType(); - if (!dyn_cast<StructType>(DereferencedType) - ) { - continue; - } - if (!containsPointerType(DereferencedType)) { - continue; - } - // FIXME: Because a pointer inside of a struct/union may be aliased to - // another pointer we need to take the conservative approach and place all - // pointers into the arena until more advanced detection is implemented. - dumpAll = true; - } - return dumpAll; -} -void -AMDGPUPeepholeOpt::doIsConstCallConversionIfNeeded() { - if (isConstVec.empty()) { - return; - } - for (unsigned x = 0, y = isConstVec.size(); x < y; ++x) { - CallInst *CI = isConstVec[x]; - Constant *CV = dyn_cast<Constant>(CI->getOperand(0)); - Type *aType = Type::getInt32Ty(*mCTX); - Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1) - : ConstantInt::get(aType, 0); - CI->replaceAllUsesWith(Val); - CI->eraseFromParent(); - } - isConstVec.clear(); -} -void -AMDGPUPeepholeOpt::doAtomicConversionIfNeeded(Function &F) { - // Don't do anything if we don't have any atomic operations. - if (atomicFuncs.empty()) { - return; - } - // Change the function name for the atomic if it is required - uint32_t size = atomicFuncs.size(); - for (uint32_t x = 0; x < size; ++x) { - atomicFuncs[x].first->setOperand( - atomicFuncs[x].first->getNumOperands()-1, - atomicFuncs[x].second); - - } - mChanged = true; - if (mConvertAtomics) { - return; - } -} - -bool -AMDGPUPeepholeOpt::runOnFunction(Function &MF) { - mChanged = false; - mF = &MF; - mSTM = &TM.getSubtarget<AMDGPUSubtarget>(); - if (mDebug) { - MF.dump(); - } - mCTX = &MF.getType()->getContext(); - mConvertAtomics = true; - safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(), - std::bind1st(std::mem_fun(&AMDGPUPeepholeOpt::instLevelOptimizations), - this)); - - doAtomicConversionIfNeeded(MF); - doIsConstCallConversionIfNeeded(); - - if (mDebug) { - MF.dump(); - } - return mChanged; -} - -bool -AMDGPUPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb) { - Instruction *inst = (*bbb); - CallInst *CI = dyn_cast<CallInst>(inst); - if (!CI) { - return false; - } - if (isSigned24BitOps(CI)) { - expandSigned24BitOps(CI); - ++(*bbb); - CI->eraseFromParent(); - return true; - } - if (propagateSamplerInst(CI)) { - return false; - } - if (expandBFI(CI) || expandBFM(CI)) { - ++(*bbb); - CI->eraseFromParent(); - return true; - } - if (convertAccurateDivide(CI)) { - expandAccurateDivide(CI); - ++(*bbb); - CI->eraseFromParent(); - return true; - } - - StringRef calleeName = CI->getOperand(CI->getNumOperands()-1)->getName(); - if (calleeName.startswith("__amdil_is_constant")) { - // If we do not have optimizations, then this - // cannot be properly evaluated, so we add the - // call instruction to a vector and process - // them at the end of processing after the - // samplers have been correctly handled. - if (optLevel == CodeGenOpt::None) { - isConstVec.push_back(CI); - return false; - } else { - Constant *CV = dyn_cast<Constant>(CI->getOperand(0)); - Type *aType = Type::getInt32Ty(*mCTX); - Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1) - : ConstantInt::get(aType, 0); - CI->replaceAllUsesWith(Val); - ++(*bbb); - CI->eraseFromParent(); - return true; - } - } - - if (calleeName.equals("__amdil_is_asic_id_i32")) { - ConstantInt *CV = dyn_cast<ConstantInt>(CI->getOperand(0)); - Type *aType = Type::getInt32Ty(*mCTX); - Value *Val = CV; - if (Val) { - Val = ConstantInt::get(aType, - mSTM->device()->getDeviceFlag() & CV->getZExtValue()); - } else { - Val = ConstantInt::get(aType, 0); - } - CI->replaceAllUsesWith(Val); - ++(*bbb); - CI->eraseFromParent(); - return true; - } - Function *F = dyn_cast<Function>(CI->getOperand(CI->getNumOperands()-1)); - if (!F) { - return false; - } - if (F->getName().startswith("__atom") && !CI->getNumUses() - && F->getName().find("_xchg") == StringRef::npos) { - std::string buffer(F->getName().str() + "_noret"); - F = dyn_cast<Function>( - F->getParent()->getOrInsertFunction(buffer, F->getFunctionType())); - atomicFuncs.push_back(std::make_pair(CI, F)); - } - - if (!mSTM->device()->isSupported(AMDGPUDeviceInfo::ArenaSegment) - && !mSTM->device()->isSupported(AMDGPUDeviceInfo::MultiUAV)) { - return false; - } - if (!mConvertAtomics) { - return false; - } - StringRef name = F->getName(); - if (name.startswith("__atom") && name.find("_g") != StringRef::npos) { - mConvertAtomics = false; - } - return false; -} - -bool -AMDGPUPeepholeOpt::setupBitInsert(Instruction *base, - Instruction *&src, - Constant *&mask, - Constant *&shift) { - if (!base) { - if (mDebug) { - dbgs() << "Null pointer passed into function.\n"; - } - return false; - } - bool andOp = false; - if (base->getOpcode() == Instruction::Shl) { - shift = dyn_cast<Constant>(base->getOperand(1)); - } else if (base->getOpcode() == Instruction::And) { - mask = dyn_cast<Constant>(base->getOperand(1)); - andOp = true; - } else { - if (mDebug) { - dbgs() << "Failed setup with no Shl or And instruction on base opcode!\n"; - } - // If the base is neither a Shl or a And, we don't fit any of the patterns above. - return false; - } - src = dyn_cast<Instruction>(base->getOperand(0)); - if (!src) { - if (mDebug) { - dbgs() << "Failed setup since the base operand is not an instruction!\n"; - } - return false; - } - // If we find an 'and' operation, then we don't need to - // find the next operation as we already know the - // bits that are valid at this point. - if (andOp) { - return true; - } - if (src->getOpcode() == Instruction::Shl && !shift) { - shift = dyn_cast<Constant>(src->getOperand(1)); - src = dyn_cast<Instruction>(src->getOperand(0)); - } else if (src->getOpcode() == Instruction::And && !mask) { - mask = dyn_cast<Constant>(src->getOperand(1)); - } - if (!mask && !shift) { - if (mDebug) { - dbgs() << "Failed setup since both mask and shift are NULL!\n"; - } - // Did not find a constant mask or a shift. - return false; - } - return true; -} -bool -AMDGPUPeepholeOpt::optimizeBitInsert(Instruction *inst) { - if (!inst) { - return false; - } - if (!inst->isBinaryOp()) { - return false; - } - if (inst->getOpcode() != Instruction::Or) { - return false; - } - if (optLevel == CodeGenOpt::None) { - return false; - } - // We want to do an optimization on a sequence of ops that in the end equals a - // single ISA instruction. - // The base pattern for this optimization is - ((A & B) << C) | ((D & E) << F) - // Some simplified versions of this pattern are as follows: - // (A & B) | (D & E) when B & E == 0 && C == 0 && F == 0 - // ((A & B) << C) | (D & E) when B ^ E == 0 && (1 << C) >= E - // (A & B) | ((D & E) << F) when B ^ E == 0 && (1 << F) >= B - // (A & B) | (D << F) when (1 << F) >= B - // (A << C) | (D & E) when (1 << C) >= E - if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) { - // The HD4XXX hardware doesn't support the ubit_insert instruction. - return false; - } - Type *aType = inst->getType(); - bool isVector = aType->isVectorTy(); - int numEle = 1; - // This optimization only works on 32bit integers. - if (aType->getScalarType() - != Type::getInt32Ty(inst->getContext())) { - return false; - } - if (isVector) { - const VectorType *VT = dyn_cast<VectorType>(aType); - numEle = VT->getNumElements(); - // We currently cannot support more than 4 elements in a intrinsic and we - // cannot support Vec3 types. - if (numEle > 4 || numEle == 3) { - return false; - } - } - // TODO: Handle vectors. - if (isVector) { - if (mDebug) { - dbgs() << "!!! Vectors are not supported yet!\n"; - } - return false; - } - Instruction *LHSSrc = NULL, *RHSSrc = NULL; - Constant *LHSMask = NULL, *RHSMask = NULL; - Constant *LHSShift = NULL, *RHSShift = NULL; - Instruction *LHS = dyn_cast<Instruction>(inst->getOperand(0)); - Instruction *RHS = dyn_cast<Instruction>(inst->getOperand(1)); - if (!setupBitInsert(LHS, LHSSrc, LHSMask, LHSShift)) { - if (mDebug) { - dbgs() << "Found an OR Operation that failed setup!\n"; - inst->dump(); - if (LHS) { LHS->dump(); } - if (LHSSrc) { LHSSrc->dump(); } - if (LHSMask) { LHSMask->dump(); } - if (LHSShift) { LHSShift->dump(); } - } - // There was an issue with the setup for BitInsert. - return false; - } - if (!setupBitInsert(RHS, RHSSrc, RHSMask, RHSShift)) { - if (mDebug) { - dbgs() << "Found an OR Operation that failed setup!\n"; - inst->dump(); - if (RHS) { RHS->dump(); } - if (RHSSrc) { RHSSrc->dump(); } - if (RHSMask) { RHSMask->dump(); } - if (RHSShift) { RHSShift->dump(); } - } - // There was an issue with the setup for BitInsert. - return false; - } - if (mDebug) { - dbgs() << "Found an OR operation that can possible be optimized to ubit insert!\n"; - dbgs() << "Op: "; inst->dump(); - dbgs() << "LHS: "; if (LHS) { LHS->dump(); } else { dbgs() << "(None)\n"; } - dbgs() << "LHS Src: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(None)\n"; } - dbgs() << "LHS Mask: "; if (LHSMask) { LHSMask->dump(); } else { dbgs() << "(None)\n"; } - dbgs() << "LHS Shift: "; if (LHSShift) { LHSShift->dump(); } else { dbgs() << "(None)\n"; } - dbgs() << "RHS: "; if (RHS) { RHS->dump(); } else { dbgs() << "(None)\n"; } - dbgs() << "RHS Src: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(None)\n"; } - dbgs() << "RHS Mask: "; if (RHSMask) { RHSMask->dump(); } else { dbgs() << "(None)\n"; } - dbgs() << "RHS Shift: "; if (RHSShift) { RHSShift->dump(); } else { dbgs() << "(None)\n"; } - } - Constant *offset = NULL; - Constant *width = NULL; - uint32_t lhsMaskVal = 0, rhsMaskVal = 0; - uint32_t lhsShiftVal = 0, rhsShiftVal = 0; - uint32_t lhsMaskWidth = 0, rhsMaskWidth = 0; - uint32_t lhsMaskOffset = 0, rhsMaskOffset = 0; - lhsMaskVal = (LHSMask - ? dyn_cast<ConstantInt>(LHSMask)->getZExtValue() : 0); - rhsMaskVal = (RHSMask - ? dyn_cast<ConstantInt>(RHSMask)->getZExtValue() : 0); - lhsShiftVal = (LHSShift - ? dyn_cast<ConstantInt>(LHSShift)->getZExtValue() : 0); - rhsShiftVal = (RHSShift - ? dyn_cast<ConstantInt>(RHSShift)->getZExtValue() : 0); - lhsMaskWidth = lhsMaskVal ? CountPopulation_32(lhsMaskVal) : 32 - lhsShiftVal; - rhsMaskWidth = rhsMaskVal ? CountPopulation_32(rhsMaskVal) : 32 - rhsShiftVal; - lhsMaskOffset = lhsMaskVal ? CountTrailingZeros_32(lhsMaskVal) : lhsShiftVal; - rhsMaskOffset = rhsMaskVal ? CountTrailingZeros_32(rhsMaskVal) : rhsShiftVal; - // TODO: Handle the case of A & B | D & ~B(i.e. inverted masks). - if ((lhsMaskVal || rhsMaskVal) && !(lhsMaskVal ^ rhsMaskVal)) { - return false; - } - if (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset)) { - offset = ConstantInt::get(aType, lhsMaskOffset, false); - width = ConstantInt::get(aType, lhsMaskWidth, false); - RHSSrc = RHS; - if (!isMask_32(lhsMaskVal) && !isShiftedMask_32(lhsMaskVal)) { - return false; - } - if (!LHSShift) { - LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset, - "MaskShr", LHS); - } else if (lhsShiftVal != lhsMaskOffset) { - LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset, - "MaskShr", LHS); - } - if (mDebug) { - dbgs() << "Optimizing LHS!\n"; - } - } else if (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset)) { - offset = ConstantInt::get(aType, rhsMaskOffset, false); - width = ConstantInt::get(aType, rhsMaskWidth, false); - LHSSrc = RHSSrc; - RHSSrc = LHS; - if (!isMask_32(rhsMaskVal) && !isShiftedMask_32(rhsMaskVal)) { - return false; - } - if (!RHSShift) { - LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset, - "MaskShr", RHS); - } else if (rhsShiftVal != rhsMaskOffset) { - LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset, - "MaskShr", RHS); - } - if (mDebug) { - dbgs() << "Optimizing RHS!\n"; - } - } else { - if (mDebug) { - dbgs() << "Failed constraint 3!\n"; - } - return false; - } - if (mDebug) { - dbgs() << "Width: "; if (width) { width->dump(); } else { dbgs() << "(0)\n"; } - dbgs() << "Offset: "; if (offset) { offset->dump(); } else { dbgs() << "(0)\n"; } - dbgs() << "LHSSrc: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(0)\n"; } - dbgs() << "RHSSrc: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(0)\n"; } - } - if (!offset || !width) { - if (mDebug) { - dbgs() << "Either width or offset are NULL, failed detection!\n"; - } - return false; - } - // Lets create the function signature. - std::vector<Type *> callTypes; - callTypes.push_back(aType); - callTypes.push_back(aType); - callTypes.push_back(aType); - callTypes.push_back(aType); - FunctionType *funcType = FunctionType::get(aType, callTypes, false); - std::string name = "__amdil_ubit_insert"; - if (isVector) { name += "_v" + itostr(numEle) + "u32"; } else { name += "_u32"; } - Function *Func = - dyn_cast<Function>(inst->getParent()->getParent()->getParent()-> - getOrInsertFunction(StringRef(name), funcType)); - Value *Operands[4] = { - width, - offset, - LHSSrc, - RHSSrc - }; - CallInst *CI = CallInst::Create(Func, Operands, "BitInsertOpt"); - if (mDebug) { - dbgs() << "Old Inst: "; - inst->dump(); - dbgs() << "New Inst: "; - CI->dump(); - dbgs() << "\n\n"; - } - CI->insertBefore(inst); - inst->replaceAllUsesWith(CI); - return true; -} - -bool -AMDGPUPeepholeOpt::optimizeBitExtract(Instruction *inst) { - if (!inst) { - return false; - } - if (!inst->isBinaryOp()) { - return false; - } - if (inst->getOpcode() != Instruction::And) { - return false; - } - if (optLevel == CodeGenOpt::None) { - return false; - } - // We want to do some simple optimizations on Shift right/And patterns. The - // basic optimization is to turn (A >> B) & C where A is a 32bit type, B is a - // value smaller than 32 and C is a mask. If C is a constant value, then the - // following transformation can occur. For signed integers, it turns into the - // function call dst = __amdil_ibit_extract(log2(C), B, A) For unsigned - // integers, it turns into the function call dst = - // __amdil_ubit_extract(log2(C), B, A) The function __amdil_[u|i]bit_extract - // can be found in Section 7.9 of the ATI IL spec of the stream SDK for - // Evergreen hardware. - if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) { - // This does not work on HD4XXX hardware. - return false; - } - Type *aType = inst->getType(); - bool isVector = aType->isVectorTy(); - - // XXX Support vector types - if (isVector) { - return false; - } - int numEle = 1; - // This only works on 32bit integers - if (aType->getScalarType() - != Type::getInt32Ty(inst->getContext())) { - return false; - } - if (isVector) { - const VectorType *VT = dyn_cast<VectorType>(aType); - numEle = VT->getNumElements(); - // We currently cannot support more than 4 elements in a intrinsic and we - // cannot support Vec3 types. - if (numEle > 4 || numEle == 3) { - return false; - } - } - BinaryOperator *ShiftInst = dyn_cast<BinaryOperator>(inst->getOperand(0)); - // If the first operand is not a shift instruction, then we can return as it - // doesn't match this pattern. - if (!ShiftInst || !ShiftInst->isShift()) { - return false; - } - // If we are a shift left, then we need don't match this pattern. - if (ShiftInst->getOpcode() == Instruction::Shl) { - return false; - } - bool isSigned = ShiftInst->isArithmeticShift(); - Constant *AndMask = dyn_cast<Constant>(inst->getOperand(1)); - Constant *ShrVal = dyn_cast<Constant>(ShiftInst->getOperand(1)); - // Lets make sure that the shift value and the and mask are constant integers. - if (!AndMask || !ShrVal) { - return false; - } - Constant *newMaskConst; - Constant *shiftValConst; - if (isVector) { - // Handle the vector case - std::vector<Constant *> maskVals; - std::vector<Constant *> shiftVals; - ConstantVector *AndMaskVec = dyn_cast<ConstantVector>(AndMask); - ConstantVector *ShrValVec = dyn_cast<ConstantVector>(ShrVal); - Type *scalarType = AndMaskVec->getType()->getScalarType(); - assert(AndMaskVec->getNumOperands() == - ShrValVec->getNumOperands() && "cannot have a " - "combination where the number of elements to a " - "shift and an and are different!"); - for (size_t x = 0, y = AndMaskVec->getNumOperands(); x < y; ++x) { - ConstantInt *AndCI = dyn_cast<ConstantInt>(AndMaskVec->getOperand(x)); - ConstantInt *ShiftIC = dyn_cast<ConstantInt>(ShrValVec->getOperand(x)); - if (!AndCI || !ShiftIC) { - return false; - } - uint32_t maskVal = (uint32_t)AndCI->getZExtValue(); - if (!isMask_32(maskVal)) { - return false; - } - maskVal = (uint32_t)CountTrailingOnes_32(maskVal); - uint32_t shiftVal = (uint32_t)ShiftIC->getZExtValue(); - // If the mask or shiftval is greater than the bitcount, then break out. - if (maskVal >= 32 || shiftVal >= 32) { - return false; - } - // If the mask val is greater than the the number of original bits left - // then this optimization is invalid. - if (maskVal > (32 - shiftVal)) { - return false; - } - maskVals.push_back(ConstantInt::get(scalarType, maskVal, isSigned)); - shiftVals.push_back(ConstantInt::get(scalarType, shiftVal, isSigned)); - } - newMaskConst = ConstantVector::get(maskVals); - shiftValConst = ConstantVector::get(shiftVals); - } else { - // Handle the scalar case - uint32_t maskVal = (uint32_t)dyn_cast<ConstantInt>(AndMask)->getZExtValue(); - // This must be a mask value where all lower bits are set to 1 and then any - // bit higher is set to 0. - if (!isMask_32(maskVal)) { - return false; - } - maskVal = (uint32_t)CountTrailingOnes_32(maskVal); - // Count the number of bits set in the mask, this is the width of the - // resulting bit set that is extracted from the source value. - uint32_t shiftVal = (uint32_t)dyn_cast<ConstantInt>(ShrVal)->getZExtValue(); - // If the mask or shift val is greater than the bitcount, then break out. - if (maskVal >= 32 || shiftVal >= 32) { - return false; - } - // If the mask val is greater than the the number of original bits left then - // this optimization is invalid. - if (maskVal > (32 - shiftVal)) { - return false; - } - newMaskConst = ConstantInt::get(aType, maskVal, isSigned); - shiftValConst = ConstantInt::get(aType, shiftVal, isSigned); - } - // Lets create the function signature. - std::vector<Type *> callTypes; - callTypes.push_back(aType); - callTypes.push_back(aType); - callTypes.push_back(aType); - FunctionType *funcType = FunctionType::get(aType, callTypes, false); - std::string name = "llvm.AMDGPU.bit.extract.u32"; - if (isVector) { - name += ".v" + itostr(numEle) + "i32"; - } else { - name += "."; - } - // Lets create the function. - Function *Func = - dyn_cast<Function>(inst->getParent()->getParent()->getParent()-> - getOrInsertFunction(StringRef(name), funcType)); - Value *Operands[3] = { - ShiftInst->getOperand(0), - shiftValConst, - newMaskConst - }; - // Lets create the Call with the operands - CallInst *CI = CallInst::Create(Func, Operands, "ByteExtractOpt"); - CI->setDoesNotAccessMemory(); - CI->insertBefore(inst); - inst->replaceAllUsesWith(CI); - return true; -} - -bool -AMDGPUPeepholeOpt::expandBFI(CallInst *CI) { - if (!CI) { - return false; - } - Value *LHS = CI->getOperand(CI->getNumOperands() - 1); - if (!LHS->getName().startswith("__amdil_bfi")) { - return false; - } - Type* type = CI->getOperand(0)->getType(); - Constant *negOneConst = NULL; - if (type->isVectorTy()) { - std::vector<Constant *> negOneVals; - negOneConst = ConstantInt::get(CI->getContext(), - APInt(32, StringRef("-1"), 10)); - for (size_t x = 0, - y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) { - negOneVals.push_back(negOneConst); - } - negOneConst = ConstantVector::get(negOneVals); - } else { - negOneConst = ConstantInt::get(CI->getContext(), - APInt(32, StringRef("-1"), 10)); - } - // __amdil_bfi => (A & B) | (~A & C) - BinaryOperator *lhs = - BinaryOperator::Create(Instruction::And, CI->getOperand(0), - CI->getOperand(1), "bfi_and", CI); - BinaryOperator *rhs = - BinaryOperator::Create(Instruction::Xor, CI->getOperand(0), negOneConst, - "bfi_not", CI); - rhs = BinaryOperator::Create(Instruction::And, rhs, CI->getOperand(2), - "bfi_and", CI); - lhs = BinaryOperator::Create(Instruction::Or, lhs, rhs, "bfi_or", CI); - CI->replaceAllUsesWith(lhs); - return true; -} - -bool -AMDGPUPeepholeOpt::expandBFM(CallInst *CI) { - if (!CI) { - return false; - } - Value *LHS = CI->getOperand(CI->getNumOperands() - 1); - if (!LHS->getName().startswith("__amdil_bfm")) { - return false; - } - // __amdil_bfm => ((1 << (src0 & 0x1F)) - 1) << (src1 & 0x1f) - Constant *newMaskConst = NULL; - Constant *newShiftConst = NULL; - Type* type = CI->getOperand(0)->getType(); - if (type->isVectorTy()) { - std::vector<Constant*> newMaskVals, newShiftVals; - newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F); - newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1); - for (size_t x = 0, - y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) { - newMaskVals.push_back(newMaskConst); - newShiftVals.push_back(newShiftConst); - } - newMaskConst = ConstantVector::get(newMaskVals); - newShiftConst = ConstantVector::get(newShiftVals); - } else { - newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F); - newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1); - } - BinaryOperator *lhs = - BinaryOperator::Create(Instruction::And, CI->getOperand(0), - newMaskConst, "bfm_mask", CI); - lhs = BinaryOperator::Create(Instruction::Shl, newShiftConst, - lhs, "bfm_shl", CI); - lhs = BinaryOperator::Create(Instruction::Sub, lhs, - newShiftConst, "bfm_sub", CI); - BinaryOperator *rhs = - BinaryOperator::Create(Instruction::And, CI->getOperand(1), - newMaskConst, "bfm_mask", CI); - lhs = BinaryOperator::Create(Instruction::Shl, lhs, rhs, "bfm_shl", CI); - CI->replaceAllUsesWith(lhs); - return true; -} - -bool -AMDGPUPeepholeOpt::instLevelOptimizations(BasicBlock::iterator *bbb) { - Instruction *inst = (*bbb); - if (optimizeCallInst(bbb)) { - return true; - } - if (optimizeBitExtract(inst)) { - return false; - } - if (optimizeBitInsert(inst)) { - return false; - } - if (correctMisalignedMemOp(inst)) { - return false; - } - return false; -} -bool -AMDGPUPeepholeOpt::correctMisalignedMemOp(Instruction *inst) { - LoadInst *linst = dyn_cast<LoadInst>(inst); - StoreInst *sinst = dyn_cast<StoreInst>(inst); - unsigned alignment; - Type* Ty = inst->getType(); - if (linst) { - alignment = linst->getAlignment(); - Ty = inst->getType(); - } else if (sinst) { - alignment = sinst->getAlignment(); - Ty = sinst->getValueOperand()->getType(); - } else { - return false; - } - unsigned size = getTypeSize(Ty); - if (size == alignment || size < alignment) { - return false; - } - if (!Ty->isStructTy()) { - return false; - } - if (alignment < 4) { - if (linst) { - linst->setAlignment(0); - return true; - } else if (sinst) { - sinst->setAlignment(0); - return true; - } - } - return false; -} -bool -AMDGPUPeepholeOpt::isSigned24BitOps(CallInst *CI) { - if (!CI) { - return false; - } - Value *LHS = CI->getOperand(CI->getNumOperands() - 1); - std::string namePrefix = LHS->getName().substr(0, 14); - if (namePrefix != "__amdil_imad24" && namePrefix != "__amdil_imul24" - && namePrefix != "__amdil__imul24_high") { - return false; - } - if (mSTM->device()->usesHardware(AMDGPUDeviceInfo::Signed24BitOps)) { - return false; - } - return true; -} - -void -AMDGPUPeepholeOpt::expandSigned24BitOps(CallInst *CI) { - assert(isSigned24BitOps(CI) && "Must be a " - "signed 24 bit operation to call this function!"); - Value *LHS = CI->getOperand(CI->getNumOperands()-1); - // On 7XX and 8XX we do not have signed 24bit, so we need to - // expand it to the following: - // imul24 turns into 32bit imul - // imad24 turns into 32bit imad - // imul24_high turns into 32bit imulhigh - if (LHS->getName().substr(0, 14) == "__amdil_imad24") { - Type *aType = CI->getOperand(0)->getType(); - bool isVector = aType->isVectorTy(); - int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1; - std::vector<Type*> callTypes; - callTypes.push_back(CI->getOperand(0)->getType()); - callTypes.push_back(CI->getOperand(1)->getType()); - callTypes.push_back(CI->getOperand(2)->getType()); - FunctionType *funcType = - FunctionType::get(CI->getOperand(0)->getType(), callTypes, false); - std::string name = "__amdil_imad"; - if (isVector) { - name += "_v" + itostr(numEle) + "i32"; - } else { - name += "_i32"; - } - Function *Func = dyn_cast<Function>( - CI->getParent()->getParent()->getParent()-> - getOrInsertFunction(StringRef(name), funcType)); - Value *Operands[3] = { - CI->getOperand(0), - CI->getOperand(1), - CI->getOperand(2) - }; - CallInst *nCI = CallInst::Create(Func, Operands, "imad24"); - nCI->insertBefore(CI); - CI->replaceAllUsesWith(nCI); - } else if (LHS->getName().substr(0, 14) == "__amdil_imul24") { - BinaryOperator *mulOp = - BinaryOperator::Create(Instruction::Mul, CI->getOperand(0), - CI->getOperand(1), "imul24", CI); - CI->replaceAllUsesWith(mulOp); - } else if (LHS->getName().substr(0, 19) == "__amdil_imul24_high") { - Type *aType = CI->getOperand(0)->getType(); - - bool isVector = aType->isVectorTy(); - int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1; - std::vector<Type*> callTypes; - callTypes.push_back(CI->getOperand(0)->getType()); - callTypes.push_back(CI->getOperand(1)->getType()); - FunctionType *funcType = - FunctionType::get(CI->getOperand(0)->getType(), callTypes, false); - std::string name = "__amdil_imul_high"; - if (isVector) { - name += "_v" + itostr(numEle) + "i32"; - } else { - name += "_i32"; - } - Function *Func = dyn_cast<Function>( - CI->getParent()->getParent()->getParent()-> - getOrInsertFunction(StringRef(name), funcType)); - Value *Operands[2] = { - CI->getOperand(0), - CI->getOperand(1) - }; - CallInst *nCI = CallInst::Create(Func, Operands, "imul24_high"); - nCI->insertBefore(CI); - CI->replaceAllUsesWith(nCI); - } -} - -bool -AMDGPUPeepholeOpt::isRWGLocalOpt(CallInst *CI) { - return (CI != NULL - && CI->getOperand(CI->getNumOperands() - 1)->getName() - == "__amdil_get_local_size_int"); -} - -bool -AMDGPUPeepholeOpt::convertAccurateDivide(CallInst *CI) { - if (!CI) { - return false; - } - if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD6XXX - && (mSTM->getDeviceName() == "cayman")) { - return false; - } - return CI->getOperand(CI->getNumOperands() - 1)->getName().substr(0, 20) - == "__amdil_improved_div"; -} - -void -AMDGPUPeepholeOpt::expandAccurateDivide(CallInst *CI) { - assert(convertAccurateDivide(CI) - && "expanding accurate divide can only happen if it is expandable!"); - BinaryOperator *divOp = - BinaryOperator::Create(Instruction::FDiv, CI->getOperand(0), - CI->getOperand(1), "fdiv32", CI); - CI->replaceAllUsesWith(divOp); -} - -bool -AMDGPUPeepholeOpt::propagateSamplerInst(CallInst *CI) { - if (optLevel != CodeGenOpt::None) { - return false; - } - - if (!CI) { - return false; - } - - unsigned funcNameIdx = 0; - funcNameIdx = CI->getNumOperands() - 1; - StringRef calleeName = CI->getOperand(funcNameIdx)->getName(); - if (calleeName != "__amdil_image2d_read_norm" - && calleeName != "__amdil_image2d_read_unnorm" - && calleeName != "__amdil_image3d_read_norm" - && calleeName != "__amdil_image3d_read_unnorm") { - return false; - } - - unsigned samplerIdx = 2; - samplerIdx = 1; - Value *sampler = CI->getOperand(samplerIdx); - LoadInst *lInst = dyn_cast<LoadInst>(sampler); - if (!lInst) { - return false; - } - - if (lInst->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) { - return false; - } - - GlobalVariable *gv = dyn_cast<GlobalVariable>(lInst->getPointerOperand()); - // If we are loading from what is not a global value, then we - // fail and return. - if (!gv) { - return false; - } - - // If we don't have an initializer or we have an initializer and - // the initializer is not a 32bit integer, we fail. - if (!gv->hasInitializer() - || !gv->getInitializer()->getType()->isIntegerTy(32)) { - return false; - } - - // Now that we have the global variable initializer, lets replace - // all uses of the load instruction with the samplerVal and - // reparse the __amdil_is_constant() function. - Constant *samplerVal = gv->getInitializer(); - lInst->replaceAllUsesWith(samplerVal); - return true; -} - -bool -AMDGPUPeepholeOpt::doInitialization(Module &M) { - return false; -} - -bool -AMDGPUPeepholeOpt::doFinalization(Module &M) { - return false; -} - -void -AMDGPUPeepholeOpt::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<MachineFunctionAnalysis>(); - FunctionPass::getAnalysisUsage(AU); - AU.setPreservesAll(); -} - -size_t AMDGPUPeepholeOpt::getTypeSize(Type * const T, bool dereferencePtr) { - size_t size = 0; - if (!T) { - return size; - } - switch (T->getTypeID()) { - case Type::X86_FP80TyID: - case Type::FP128TyID: - case Type::PPC_FP128TyID: - case Type::LabelTyID: - assert(0 && "These types are not supported by this backend"); - default: - case Type::FloatTyID: - case Type::DoubleTyID: - size = T->getPrimitiveSizeInBits() >> 3; - break; - case Type::PointerTyID: - size = getTypeSize(dyn_cast<PointerType>(T), dereferencePtr); - break; - case Type::IntegerTyID: - size = getTypeSize(dyn_cast<IntegerType>(T), dereferencePtr); - break; - case Type::StructTyID: - size = getTypeSize(dyn_cast<StructType>(T), dereferencePtr); - break; - case Type::ArrayTyID: - size = getTypeSize(dyn_cast<ArrayType>(T), dereferencePtr); - break; - case Type::FunctionTyID: - size = getTypeSize(dyn_cast<FunctionType>(T), dereferencePtr); - break; - case Type::VectorTyID: - size = getTypeSize(dyn_cast<VectorType>(T), dereferencePtr); - break; - }; - return size; -} - -size_t AMDGPUPeepholeOpt::getTypeSize(StructType * const ST, - bool dereferencePtr) { - size_t size = 0; - if (!ST) { - return size; - } - Type *curType; - StructType::element_iterator eib; - StructType::element_iterator eie; - for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) { - curType = *eib; - size += getTypeSize(curType, dereferencePtr); - } - return size; -} - -size_t AMDGPUPeepholeOpt::getTypeSize(IntegerType * const IT, - bool dereferencePtr) { - return IT ? (IT->getBitWidth() >> 3) : 0; -} - -size_t AMDGPUPeepholeOpt::getTypeSize(FunctionType * const FT, - bool dereferencePtr) { - assert(0 && "Should not be able to calculate the size of an function type"); - return 0; -} - -size_t AMDGPUPeepholeOpt::getTypeSize(ArrayType * const AT, - bool dereferencePtr) { - return (size_t)(AT ? (getTypeSize(AT->getElementType(), - dereferencePtr) * AT->getNumElements()) - : 0); -} - -size_t AMDGPUPeepholeOpt::getTypeSize(VectorType * const VT, - bool dereferencePtr) { - return VT ? (VT->getBitWidth() >> 3) : 0; -} - -size_t AMDGPUPeepholeOpt::getTypeSize(PointerType * const PT, - bool dereferencePtr) { - if (!PT) { - return 0; - } - Type *CT = PT->getElementType(); - if (CT->getTypeID() == Type::StructTyID && - PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) { - return getTypeSize(dyn_cast<StructType>(CT)); - } else if (dereferencePtr) { - size_t size = 0; - for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) { - size += getTypeSize(PT->getContainedType(x), dereferencePtr); - } - return size; - } else { - return 4; - } -} - -size_t AMDGPUPeepholeOpt::getTypeSize(OpaqueType * const OT, - bool dereferencePtr) { - //assert(0 && "Should not be able to calculate the size of an opaque type"); - return 4; -} diff --git a/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp index 10547a5..303cdf2 100644 --- a/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp +++ b/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp @@ -17,6 +17,7 @@ using namespace llvm; void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot) { + OS.flush(); printInstruction(MI, OS); printAnnotation(OS, Annot); @@ -67,11 +68,14 @@ void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo, } void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo, - raw_ostream &O, StringRef Asm) { + raw_ostream &O, StringRef Asm, + StringRef Default) { const MCOperand &Op = MI->getOperand(OpNo); assert(Op.isImm()); if (Op.getImm() == 1) { O << Asm; + } else { + O << Default; } } @@ -98,7 +102,7 @@ void AMDGPUInstPrinter::printLiteral(const MCInst *MI, unsigned OpNo, void AMDGPUInstPrinter::printLast(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - printIfSet(MI, OpNo, O, " *"); + printIfSet(MI, OpNo, O.indent(20 - O.GetNumBytesInBuffer()), "*", " "); } void AMDGPUInstPrinter::printNeg(const MCInst *MI, unsigned OpNo, @@ -169,4 +173,41 @@ void AMDGPUInstPrinter::printSel(const MCInst *MI, unsigned OpNo, O << "." << chans[chan]; } +void AMDGPUInstPrinter::printBankSwizzle(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + int BankSwizzle = MI->getOperand(OpNo).getImm(); + switch (BankSwizzle) { + case 1: + O << "BS:VEC_021"; + break; + case 2: + O << "BS:VEC_120"; + break; + case 3: + O << "BS:VEC_102"; + break; + case 4: + O << "BS:VEC_201"; + break; + case 5: + O << "BS:VEC_210"; + break; + default: + break; + } + return; +} + +void AMDGPUInstPrinter::printKCache(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + int KCacheMode = MI->getOperand(OpNo).getImm(); + if (KCacheMode > 0) { + int KCacheBank = MI->getOperand(OpNo - 2).getImm(); + O << "CB" << KCacheBank <<":"; + int KCacheAddr = MI->getOperand(OpNo + 2).getImm(); + int LineSize = (KCacheMode == 1)?16:32; + O << KCacheAddr * 16 << "-" << KCacheAddr * 16 + LineSize; + } +} + #include "AMDGPUGenAsmWriter.inc" diff --git a/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h index 767a708..c6fd053 100644 --- a/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h +++ b/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h @@ -35,7 +35,8 @@ private: void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printInterpSlot(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O, StringRef Asm); + void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O, + StringRef Asm, StringRef Default = ""); void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printClamp(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printLiteral(const MCInst *MI, unsigned OpNo, raw_ostream &O); @@ -47,6 +48,8 @@ private: void printUpdatePred(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printWrite(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printSel(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printBankSwizzle(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printKCache(const MCInst *MI, unsigned OpNo, raw_ostream &O); }; } // End namespace llvm diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp index 98fca43..a3397f3 100644 --- a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -44,7 +44,6 @@ public: AMDGPUAsmBackend(const Target &T) : MCAsmBackend() {} - virtual AMDGPUMCObjectWriter *createObjectWriter(raw_ostream &OS) const; virtual unsigned getNumFixupKinds() const { return 0; }; virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value) const; @@ -71,16 +70,6 @@ void AMDGPUMCObjectWriter::WriteObject(MCAssembler &Asm, } } -MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T, StringRef TT, - StringRef CPU) { - return new AMDGPUAsmBackend(T); -} - -AMDGPUMCObjectWriter * AMDGPUAsmBackend::createObjectWriter( - raw_ostream &OS) const { - return new AMDGPUMCObjectWriter(OS); -} - void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value) const { @@ -88,3 +77,21 @@ void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, assert(Fixup.getKind() == FK_PCRel_4); *Dst = (Value - 4) / 4; } + +//===----------------------------------------------------------------------===// +// ELFAMDGPUAsmBackend class +//===----------------------------------------------------------------------===// + +class ELFAMDGPUAsmBackend : public AMDGPUAsmBackend { +public: + ELFAMDGPUAsmBackend(const Target &T) : AMDGPUAsmBackend(T) { } + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + return createAMDGPUELFObjectWriter(OS); + } +}; + +MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T, StringRef TT, + StringRef CPU) { + return new ELFAMDGPUAsmBackend(T); +} diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp new file mode 100644 index 0000000..48fac9f --- /dev/null +++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp @@ -0,0 +1,39 @@ +//===-- AMDGPUELFObjectWriter.cpp - AMDGPU ELF Writer ----------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +/// \file +//===----------------------------------------------------------------------===// + +#include "AMDGPUMCTargetDesc.h" +#include "llvm/MC/MCELFObjectWriter.h" + +using namespace llvm; + +namespace { + +class AMDGPUELFObjectWriter : public MCELFObjectTargetWriter { +public: + AMDGPUELFObjectWriter(); +protected: + virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, + bool IsPCRel, bool IsRelocWithSymbol, + int64_t Addend) const { + llvm_unreachable("Not implemented"); + } + +}; + + +} // End anonymous namespace + +AMDGPUELFObjectWriter::AMDGPUELFObjectWriter() + : MCELFObjectTargetWriter(false, 0, 0, false) { } + +MCObjectWriter *llvm::createAMDGPUELFObjectWriter(raw_ostream &OS) { + MCELFObjectTargetWriter *MOTW = new AMDGPUELFObjectWriter(); + return createELFObjectWriter(MOTW, OS, true); +} diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp index b7cdd7c..2aae26a 100644 --- a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp @@ -68,8 +68,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Target &T, StringRef &TT) : MCAsmInfo() { //===--- Dwarf Emission Directives -----------------------------------===// HasLEB128 = true; SupportsDebugInformation = true; - DwarfSectionOffsetDirective = ".offset"; - } const char* diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp index 072ee49..61d70bb 100644 --- a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp @@ -78,7 +78,7 @@ static MCCodeEmitter *createAMDGPUMCCodeEmitter(const MCInstrInfo &MCII, if (STI.getFeatureBits() & AMDGPU::Feature64BitPtr) { return createSIMCCodeEmitter(MCII, MRI, STI, Ctx); } else { - return createR600MCCodeEmitter(MCII, MRI, STI, Ctx); + return createR600MCCodeEmitter(MCII, MRI, STI); } } @@ -88,7 +88,7 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, MCCodeEmitter *_Emitter, bool RelaxAll, bool NoExecStack) { - return createPureStreamer(Ctx, MAB, _OS, _Emitter); + return createELFStreamer(Ctx, MAB, _OS, _Emitter, false, false); } extern "C" void LLVMInitializeR600TargetMC() { diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h index 363a4af..abb0320 100644 --- a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h +++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h @@ -23,16 +23,17 @@ class MCAsmBackend; class MCCodeEmitter; class MCContext; class MCInstrInfo; +class MCObjectWriter; class MCRegisterInfo; class MCSubtargetInfo; class Target; +class raw_ostream; extern Target TheAMDGPUTarget; MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, - MCContext &Ctx); + const MCSubtargetInfo &STI); MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, @@ -41,6 +42,8 @@ MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII, MCAsmBackend *createAMDGPUAsmBackend(const Target &T, StringRef TT, StringRef CPU); + +MCObjectWriter *createAMDGPUELFObjectWriter(raw_ostream &OS); } // End llvm namespace #define GET_REGINFO_ENUM diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp index 927bcbd..cb4cf0c 100644 --- a/contrib/llvm/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp @@ -9,12 +9,8 @@ // /// \file /// -/// This code emitter outputs bytecode that is understood by the r600g driver -/// in the Mesa [1] project. The bytecode is very similar to the hardware's ISA, -/// but it still needs to be run through a finalizer in order to be executed -/// by the GPU. -/// -/// [1] http://www.mesa3d.org/ +/// \brief The R600 code emitter produces machine code that can be executed +/// directly on the GPU device. // //===----------------------------------------------------------------------===// @@ -30,9 +26,6 @@ #include "llvm/Support/raw_ostream.h" #include <stdio.h> -#define SRC_BYTE_COUNT 11 -#define DST_BYTE_COUNT 5 - using namespace llvm; namespace { @@ -43,13 +36,12 @@ class R600MCCodeEmitter : public AMDGPUMCCodeEmitter { const MCInstrInfo &MCII; const MCRegisterInfo &MRI; const MCSubtargetInfo &STI; - MCContext &Ctx; public: R600MCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri, - const MCSubtargetInfo &sti, MCContext &ctx) - : MCII(mcii), MRI(mri), STI(sti), Ctx(ctx) { } + const MCSubtargetInfo &sti) + : MCII(mcii), MRI(mri), STI(sti) { } /// \brief Encode the instruction and write it to the OS. virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS, @@ -60,30 +52,14 @@ public: SmallVectorImpl<MCFixup> &Fixups) const; private: - void EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups, - raw_ostream &OS) const; - void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const; - void EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, unsigned SelOpIdx, - raw_ostream &OS) const; - void EmitDst(const MCInst &MI, raw_ostream &OS) const; - void EmitFCInstr(const MCInst &MI, raw_ostream &OS) const; - - void EmitNullBytes(unsigned int byteCount, raw_ostream &OS) const; - void EmitByte(unsigned int byte, raw_ostream &OS) const; - void EmitTwoBytes(uint32_t bytes, raw_ostream &OS) const; - void Emit(uint32_t value, raw_ostream &OS) const; void Emit(uint64_t value, raw_ostream &OS) const; unsigned getHWRegChan(unsigned reg) const; unsigned getHWReg(unsigned regNo) const; - bool isFCOp(unsigned opcode) const; - bool isTexOp(unsigned opcode) const; - bool isFlagSet(const MCInst &MI, unsigned Operand, unsigned Flag) const; - }; } // End anonymous namespace @@ -95,16 +71,6 @@ enum RegElement { ELEMENT_W }; -enum InstrTypes { - INSTR_ALU = 0, - INSTR_TEX, - INSTR_FC, - INSTR_NATIVE, - INSTR_VTX, - INSTR_EXPORT, - INSTR_CFALU -}; - enum FCInstr { FC_IF_PREDICATE = 0, FC_ELSE, @@ -132,355 +98,95 @@ enum TextureTypes { MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, - MCContext &Ctx) { - return new R600MCCodeEmitter(MCII, MRI, STI, Ctx); + const MCSubtargetInfo &STI) { + return new R600MCCodeEmitter(MCII, MRI, STI); } void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const { - if (isFCOp(MI.getOpcode())){ - EmitFCInstr(MI, OS); - } else if (MI.getOpcode() == AMDGPU::RETURN || + const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); + if (MI.getOpcode() == AMDGPU::RETURN || + MI.getOpcode() == AMDGPU::FETCH_CLAUSE || + MI.getOpcode() == AMDGPU::ALU_CLAUSE || MI.getOpcode() == AMDGPU::BUNDLE || MI.getOpcode() == AMDGPU::KILL) { return; - } else { - switch(MI.getOpcode()) { - case AMDGPU::STACK_SIZE: { - EmitByte(MI.getOperand(0).getImm(), OS); - break; - } - case AMDGPU::RAT_WRITE_CACHELESS_32_eg: - case AMDGPU::RAT_WRITE_CACHELESS_128_eg: { - uint64_t inst = getBinaryCodeForInstr(MI, Fixups); - EmitByte(INSTR_NATIVE, OS); - Emit(inst, OS); - break; - } - case AMDGPU::CONSTANT_LOAD_eg: - case AMDGPU::VTX_READ_PARAM_8_eg: - case AMDGPU::VTX_READ_PARAM_16_eg: - case AMDGPU::VTX_READ_PARAM_32_eg: - case AMDGPU::VTX_READ_PARAM_128_eg: - case AMDGPU::VTX_READ_GLOBAL_8_eg: - case AMDGPU::VTX_READ_GLOBAL_32_eg: - case AMDGPU::VTX_READ_GLOBAL_128_eg: - case AMDGPU::TEX_VTX_CONSTBUF: - case AMDGPU::TEX_VTX_TEXBUF : { - uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups); - uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset - - EmitByte(INSTR_VTX, OS); - Emit(InstWord01, OS); - Emit(InstWord2, OS); - break; - } - case AMDGPU::TEX_LD: - case AMDGPU::TEX_GET_TEXTURE_RESINFO: - case AMDGPU::TEX_SAMPLE: - case AMDGPU::TEX_SAMPLE_C: - case AMDGPU::TEX_SAMPLE_L: - case AMDGPU::TEX_SAMPLE_C_L: - case AMDGPU::TEX_SAMPLE_LB: - case AMDGPU::TEX_SAMPLE_C_LB: - case AMDGPU::TEX_SAMPLE_G: - case AMDGPU::TEX_SAMPLE_C_G: - case AMDGPU::TEX_GET_GRADIENTS_H: - case AMDGPU::TEX_GET_GRADIENTS_V: - case AMDGPU::TEX_SET_GRADIENTS_H: - case AMDGPU::TEX_SET_GRADIENTS_V: { - unsigned Opcode = MI.getOpcode(); - bool HasOffsets = (Opcode == AMDGPU::TEX_LD); - unsigned OpOffset = HasOffsets ? 3 : 0; - int64_t Sampler = MI.getOperand(OpOffset + 3).getImm(); - int64_t TextureType = MI.getOperand(OpOffset + 4).getImm(); - - uint32_t SrcSelect[4] = {0, 1, 2, 3}; - uint32_t Offsets[3] = {0, 0, 0}; - uint64_t CoordType[4] = {1, 1, 1, 1}; - - if (HasOffsets) - for (unsigned i = 0; i < 3; i++) { - int SignedOffset = MI.getOperand(i + 2).getImm(); - Offsets[i] = (SignedOffset & 0x1F); - } - - - if (TextureType == TEXTURE_RECT || - TextureType == TEXTURE_SHADOWRECT) { - CoordType[ELEMENT_X] = 0; - CoordType[ELEMENT_Y] = 0; - } - - if (TextureType == TEXTURE_1D_ARRAY || - TextureType == TEXTURE_SHADOW1D_ARRAY) { - if (Opcode == AMDGPU::TEX_SAMPLE_C_L || - Opcode == AMDGPU::TEX_SAMPLE_C_LB) { - CoordType[ELEMENT_Y] = 0; - } else { - CoordType[ELEMENT_Z] = 0; - SrcSelect[ELEMENT_Z] = ELEMENT_Y; - } - } else if (TextureType == TEXTURE_2D_ARRAY || - TextureType == TEXTURE_SHADOW2D_ARRAY) { - CoordType[ELEMENT_Z] = 0; + } else if (IS_VTX(Desc)) { + uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups); + uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset + InstWord2 |= 1 << 19; + + Emit(InstWord01, OS); + Emit(InstWord2, OS); + Emit((u_int32_t) 0, OS); + } else if (IS_TEX(Desc)) { + unsigned Opcode = MI.getOpcode(); + bool HasOffsets = (Opcode == AMDGPU::TEX_LD); + unsigned OpOffset = HasOffsets ? 3 : 0; + int64_t Sampler = MI.getOperand(OpOffset + 3).getImm(); + int64_t TextureType = MI.getOperand(OpOffset + 4).getImm(); + + uint32_t SrcSelect[4] = {0, 1, 2, 3}; + uint32_t Offsets[3] = {0, 0, 0}; + uint64_t CoordType[4] = {1, 1, 1, 1}; + + if (HasOffsets) + for (unsigned i = 0; i < 3; i++) { + int SignedOffset = MI.getOperand(i + 2).getImm(); + Offsets[i] = (SignedOffset & 0x1F); } - - if ((TextureType == TEXTURE_SHADOW1D || - TextureType == TEXTURE_SHADOW2D || - TextureType == TEXTURE_SHADOWRECT || - TextureType == TEXTURE_SHADOW1D_ARRAY) && - Opcode != AMDGPU::TEX_SAMPLE_C_L && - Opcode != AMDGPU::TEX_SAMPLE_C_LB) { - SrcSelect[ELEMENT_W] = ELEMENT_Z; - } - - uint64_t Word01 = getBinaryCodeForInstr(MI, Fixups) | - CoordType[ELEMENT_X] << 60 | CoordType[ELEMENT_Y] << 61 | - CoordType[ELEMENT_Z] << 62 | CoordType[ELEMENT_W] << 63; - uint32_t Word2 = Sampler << 15 | SrcSelect[ELEMENT_X] << 20 | - SrcSelect[ELEMENT_Y] << 23 | SrcSelect[ELEMENT_Z] << 26 | - SrcSelect[ELEMENT_W] << 29 | Offsets[0] << 0 | Offsets[1] << 5 | - Offsets[2] << 10; - - EmitByte(INSTR_TEX, OS); - Emit(Word01, OS); - Emit(Word2, OS); - break; - } - case AMDGPU::EG_ExportSwz: - case AMDGPU::R600_ExportSwz: - case AMDGPU::EG_ExportBuf: - case AMDGPU::R600_ExportBuf: { - uint64_t Inst = getBinaryCodeForInstr(MI, Fixups); - EmitByte(INSTR_EXPORT, OS); - Emit(Inst, OS); - break; - } - case AMDGPU::CF_ALU: - case AMDGPU::CF_ALU_PUSH_BEFORE: { - uint64_t Inst = getBinaryCodeForInstr(MI, Fixups); - EmitByte(INSTR_CFALU, OS); - Emit(Inst, OS); - break; - } - case AMDGPU::CF_TC: - case AMDGPU::CF_VC: - case AMDGPU::CF_CALL_FS: - return; - case AMDGPU::WHILE_LOOP: - case AMDGPU::END_LOOP: - case AMDGPU::LOOP_BREAK: - case AMDGPU::CF_CONTINUE: - case AMDGPU::CF_JUMP: - case AMDGPU::CF_ELSE: - case AMDGPU::POP: { - uint64_t Inst = getBinaryCodeForInstr(MI, Fixups); - EmitByte(INSTR_NATIVE, OS); - Emit(Inst, OS); - break; + if (TextureType == TEXTURE_RECT || + TextureType == TEXTURE_SHADOWRECT) { + CoordType[ELEMENT_X] = 0; + CoordType[ELEMENT_Y] = 0; } - default: - EmitALUInstr(MI, Fixups, OS); - break; - } - } -} - -void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI, - SmallVectorImpl<MCFixup> &Fixups, - raw_ostream &OS) const { - const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode()); - - // Emit instruction type - EmitByte(INSTR_ALU, OS); - - uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups); - - //older alu have different encoding for instructions with one or two src - //parameters. - if ((STI.getFeatureBits() & AMDGPU::FeatureR600ALUInst) && - !(MCDesc.TSFlags & R600_InstFlag::OP3)) { - uint64_t ISAOpCode = InstWord01 & (0x3FFULL << 39); - InstWord01 &= ~(0x3FFULL << 39); - InstWord01 |= ISAOpCode << 1; - } - - unsigned SrcNum = MCDesc.TSFlags & R600_InstFlag::OP3 ? 3 : - MCDesc.TSFlags & R600_InstFlag::OP2 ? 2 : 1; - - EmitByte(SrcNum, OS); - - const unsigned SrcOps[3][2] = { - {R600Operands::SRC0, R600Operands::SRC0_SEL}, - {R600Operands::SRC1, R600Operands::SRC1_SEL}, - {R600Operands::SRC2, R600Operands::SRC2_SEL} - }; - for (unsigned SrcIdx = 0; SrcIdx < SrcNum; ++SrcIdx) { - unsigned RegOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][0]]; - unsigned SelOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][1]]; - EmitSrcISA(MI, RegOpIdx, SelOpIdx, OS); - } - - Emit(InstWord01, OS); - return; -} - -void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx, - raw_ostream &OS) const { - const MCOperand &MO = MI.getOperand(OpIdx); - union { - float f; - uint32_t i; - } Value; - Value.i = 0; - // Emit the source select (2 bytes). For GPRs, this is the register index. - // For other potential instruction operands, (e.g. constant registers) the - // value of the source select is defined in the r600isa docs. - if (MO.isReg()) { - unsigned reg = MO.getReg(); - EmitTwoBytes(getHWReg(reg), OS); - if (reg == AMDGPU::ALU_LITERAL_X) { - unsigned ImmOpIndex = MI.getNumOperands() - 1; - MCOperand ImmOp = MI.getOperand(ImmOpIndex); - if (ImmOp.isFPImm()) { - Value.f = ImmOp.getFPImm(); + if (TextureType == TEXTURE_1D_ARRAY || + TextureType == TEXTURE_SHADOW1D_ARRAY) { + if (Opcode == AMDGPU::TEX_SAMPLE_C_L || + Opcode == AMDGPU::TEX_SAMPLE_C_LB) { + CoordType[ELEMENT_Y] = 0; } else { - assert(ImmOp.isImm()); - Value.i = ImmOp.getImm(); + CoordType[ELEMENT_Z] = 0; + SrcSelect[ELEMENT_Z] = ELEMENT_Y; } + } else if (TextureType == TEXTURE_2D_ARRAY || + TextureType == TEXTURE_SHADOW2D_ARRAY) { + CoordType[ELEMENT_Z] = 0; } - } else { - // XXX: Handle other operand types. - EmitTwoBytes(0, OS); - } - - // Emit the source channel (1 byte) - if (MO.isReg()) { - EmitByte(getHWRegChan(MO.getReg()), OS); - } else { - EmitByte(0, OS); - } - - // XXX: Emit isNegated (1 byte) - if ((!(isFlagSet(MI, OpIdx, MO_FLAG_ABS))) - && (isFlagSet(MI, OpIdx, MO_FLAG_NEG) || - (MO.isReg() && - (MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){ - EmitByte(1, OS); - } else { - EmitByte(0, OS); - } - - // Emit isAbsolute (1 byte) - if (isFlagSet(MI, OpIdx, MO_FLAG_ABS)) { - EmitByte(1, OS); - } else { - EmitByte(0, OS); - } - - // XXX: Emit relative addressing mode (1 byte) - EmitByte(0, OS); - - // Emit kc_bank, This will be adjusted later by r600_asm - EmitByte(0, OS); - // Emit the literal value, if applicable (4 bytes). - Emit(Value.i, OS); -} - -void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, - unsigned SelOpIdx, raw_ostream &OS) const { - const MCOperand &RegMO = MI.getOperand(RegOpIdx); - const MCOperand &SelMO = MI.getOperand(SelOpIdx); - - union { - float f; - uint32_t i; - } InlineConstant; - InlineConstant.i = 0; - // Emit source type (1 byte) and source select (4 bytes). For GPRs type is 0 - // and select is 0 (GPR index is encoded in the instr encoding. For constants - // type is 1 and select is the original const select passed from the driver. - unsigned Reg = RegMO.getReg(); - if (Reg == AMDGPU::ALU_CONST) { - EmitByte(1, OS); - uint32_t Sel = SelMO.getImm(); - Emit(Sel, OS); - } else { - EmitByte(0, OS); - Emit((uint32_t)0, OS); - } - - if (Reg == AMDGPU::ALU_LITERAL_X) { - unsigned ImmOpIndex = MI.getNumOperands() - 1; - MCOperand ImmOp = MI.getOperand(ImmOpIndex); - if (ImmOp.isFPImm()) { - InlineConstant.f = ImmOp.getFPImm(); - } else { - assert(ImmOp.isImm()); - InlineConstant.i = ImmOp.getImm(); + if ((TextureType == TEXTURE_SHADOW1D || + TextureType == TEXTURE_SHADOW2D || + TextureType == TEXTURE_SHADOWRECT || + TextureType == TEXTURE_SHADOW1D_ARRAY) && + Opcode != AMDGPU::TEX_SAMPLE_C_L && + Opcode != AMDGPU::TEX_SAMPLE_C_LB) { + SrcSelect[ELEMENT_W] = ELEMENT_Z; } - } - - // Emit the literal value, if applicable (4 bytes). - Emit(InlineConstant.i, OS); -} - -void R600MCCodeEmitter::EmitFCInstr(const MCInst &MI, raw_ostream &OS) const { - - // Emit instruction type - EmitByte(INSTR_FC, OS); - // Emit SRC - unsigned NumOperands = MI.getNumOperands(); - if (NumOperands > 0) { - assert(NumOperands == 1); - EmitSrc(MI, 0, OS); + uint64_t Word01 = getBinaryCodeForInstr(MI, Fixups) | + CoordType[ELEMENT_X] << 60 | CoordType[ELEMENT_Y] << 61 | + CoordType[ELEMENT_Z] << 62 | CoordType[ELEMENT_W] << 63; + uint32_t Word2 = Sampler << 15 | SrcSelect[ELEMENT_X] << 20 | + SrcSelect[ELEMENT_Y] << 23 | SrcSelect[ELEMENT_Z] << 26 | + SrcSelect[ELEMENT_W] << 29 | Offsets[0] << 0 | Offsets[1] << 5 | + Offsets[2] << 10; + + Emit(Word01, OS); + Emit(Word2, OS); + Emit((u_int32_t) 0, OS); } else { - EmitNullBytes(SRC_BYTE_COUNT, OS); - } - - // Emit FC Instruction - enum FCInstr instr; - switch (MI.getOpcode()) { - case AMDGPU::PREDICATED_BREAK: - instr = FC_BREAK_PREDICATE; - break; - case AMDGPU::CONTINUE: - instr = FC_CONTINUE; - break; - case AMDGPU::IF_PREDICATE_SET: - instr = FC_IF_PREDICATE; - break; - case AMDGPU::ELSE: - instr = FC_ELSE; - break; - case AMDGPU::ENDIF: - instr = FC_ENDIF; - break; - case AMDGPU::ENDLOOP: - instr = FC_ENDLOOP; - break; - case AMDGPU::WHILELOOP: - instr = FC_BGNLOOP; - break; - default: - abort(); - break; - } - EmitByte(instr, OS); -} - -void R600MCCodeEmitter::EmitNullBytes(unsigned int ByteCount, - raw_ostream &OS) const { - - for (unsigned int i = 0; i < ByteCount; i++) { - EmitByte(0, OS); + uint64_t Inst = getBinaryCodeForInstr(MI, Fixups); + if ((STI.getFeatureBits() & AMDGPU::FeatureR600ALUInst) && + ((Desc.TSFlags & R600_InstFlag::OP1) || + Desc.TSFlags & R600_InstFlag::OP2)) { + uint64_t ISAOpCode = Inst & (0x3FFULL << 39); + Inst &= ~(0x3FFULL << 39); + Inst |= ISAOpCode << 1; + } + Emit(Inst, OS); } } @@ -488,12 +194,6 @@ void R600MCCodeEmitter::EmitByte(unsigned int Byte, raw_ostream &OS) const { OS.write((uint8_t) Byte & 0xff); } -void R600MCCodeEmitter::EmitTwoBytes(unsigned int Bytes, - raw_ostream &OS) const { - OS.write((uint8_t) (Bytes & 0xff)); - OS.write((uint8_t) ((Bytes >> 8) & 0xff)); -} - void R600MCCodeEmitter::Emit(uint32_t Value, raw_ostream &OS) const { for (unsigned i = 0; i < 4; i++) { OS.write((uint8_t) ((Value >> (8 * i)) & 0xff)); @@ -531,55 +231,4 @@ uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI, } } -//===----------------------------------------------------------------------===// -// Encoding helper functions -//===----------------------------------------------------------------------===// - -bool R600MCCodeEmitter::isFCOp(unsigned opcode) const { - switch(opcode) { - default: return false; - case AMDGPU::PREDICATED_BREAK: - case AMDGPU::CONTINUE: - case AMDGPU::IF_PREDICATE_SET: - case AMDGPU::ELSE: - case AMDGPU::ENDIF: - case AMDGPU::ENDLOOP: - case AMDGPU::WHILELOOP: - return true; - } -} - -bool R600MCCodeEmitter::isTexOp(unsigned opcode) const { - switch(opcode) { - default: return false; - case AMDGPU::TEX_LD: - case AMDGPU::TEX_GET_TEXTURE_RESINFO: - case AMDGPU::TEX_SAMPLE: - case AMDGPU::TEX_SAMPLE_C: - case AMDGPU::TEX_SAMPLE_L: - case AMDGPU::TEX_SAMPLE_C_L: - case AMDGPU::TEX_SAMPLE_LB: - case AMDGPU::TEX_SAMPLE_C_LB: - case AMDGPU::TEX_SAMPLE_G: - case AMDGPU::TEX_SAMPLE_C_G: - case AMDGPU::TEX_GET_GRADIENTS_H: - case AMDGPU::TEX_GET_GRADIENTS_V: - case AMDGPU::TEX_SET_GRADIENTS_H: - case AMDGPU::TEX_SET_GRADIENTS_V: - return true; - } -} - -bool R600MCCodeEmitter::isFlagSet(const MCInst &MI, unsigned Operand, - unsigned Flag) const { - const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode()); - unsigned FlagIndex = GET_FLAG_OPERAND_IDX(MCDesc.TSFlags); - if (FlagIndex == 0) { - return false; - } - assert(MI.getOperand(FlagIndex).isImm()); - return !!((MI.getOperand(FlagIndex).getImm() >> - (NUM_MO_FLAGS * Operand)) & Flag); -} - #include "AMDGPUGenMCCodeEmitter.inc" diff --git a/contrib/llvm/lib/Target/R600/Processors.td b/contrib/llvm/lib/Target/R600/Processors.td index 868810c..0cbe919 100644 --- a/contrib/llvm/lib/Target/R600/Processors.td +++ b/contrib/llvm/lib/Target/R600/Processors.td @@ -1,4 +1,4 @@ -//===-- Processors.td - TODO: Add brief description -------===// +//===-- Processors.td - R600 Processor definitions ------------------------===// // // The LLVM Compiler Infrastructure // @@ -6,25 +6,43 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// AMDIL processors supported. -// -//===----------------------------------------------------------------------===// class Proc<string Name, ProcessorItineraries itin, list<SubtargetFeature> Features> : Processor<Name, itin, Features>; -def : Proc<"", R600_EG_Itin, [FeatureR600ALUInst]>; -def : Proc<"r600", R600_EG_Itin, [FeatureR600ALUInst]>; -def : Proc<"rv710", R600_EG_Itin, []>; -def : Proc<"rv730", R600_EG_Itin, []>; -def : Proc<"rv770", R600_EG_Itin, [FeatureFP64]>; -def : Proc<"cedar", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; -def : Proc<"redwood", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; -def : Proc<"juniper", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; -def : Proc<"cypress", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>; -def : Proc<"barts", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; -def : Proc<"turks", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; -def : Proc<"caicos", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; -def : Proc<"cayman", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>; -def : Proc<"SI", SI_Itin, [Feature64BitPtr]>; - +def : Proc<"", R600_VLIW5_Itin, + [FeatureR600ALUInst, FeatureVertexCache]>; +def : Proc<"r600", R600_VLIW5_Itin, + [FeatureR600ALUInst , FeatureVertexCache]>; +def : Proc<"rs880", R600_VLIW5_Itin, + [FeatureR600ALUInst]>; +def : Proc<"rv670", R600_VLIW5_Itin, + [FeatureR600ALUInst, FeatureFP64, FeatureVertexCache]>; +def : Proc<"rv710", R600_VLIW5_Itin, + [FeatureVertexCache]>; +def : Proc<"rv730", R600_VLIW5_Itin, + [FeatureVertexCache]>; +def : Proc<"rv770", R600_VLIW5_Itin, + [FeatureFP64, FeatureVertexCache]>; +def : Proc<"cedar", R600_VLIW5_Itin, + [FeatureByteAddress, FeatureImages, FeatureVertexCache]>; +def : Proc<"redwood", R600_VLIW5_Itin, + [FeatureByteAddress, FeatureImages, FeatureVertexCache]>; +def : Proc<"sumo", R600_VLIW5_Itin, + [FeatureByteAddress, FeatureImages]>; +def : Proc<"juniper", R600_VLIW5_Itin, + [FeatureByteAddress, FeatureImages, FeatureVertexCache]>; +def : Proc<"cypress", R600_VLIW5_Itin, + [FeatureByteAddress, FeatureImages, FeatureFP64, FeatureVertexCache]>; +def : Proc<"barts", R600_VLIW5_Itin, + [FeatureByteAddress, FeatureImages, FeatureVertexCache]>; +def : Proc<"turks", R600_VLIW5_Itin, + [FeatureByteAddress, FeatureImages, FeatureVertexCache]>; +def : Proc<"caicos", R600_VLIW5_Itin, + [FeatureByteAddress, FeatureImages]>; +def : Proc<"cayman", R600_VLIW4_Itin, + [FeatureByteAddress, FeatureImages, FeatureFP64]>;def : Proc<"SI", SI_Itin, [Feature64BitPtr, FeatureFP64]>; +def : Proc<"tahiti", SI_Itin, [Feature64BitPtr, FeatureFP64]>; +def : Proc<"pitcairn", SI_Itin, [Feature64BitPtr, FeatureFP64]>; +def : Proc<"verde", SI_Itin, [Feature64BitPtr, FeatureFP64]>; +def : Proc<"oland", SI_Itin, [Feature64BitPtr, FeatureFP64]>; +def : Proc<"hainan", SI_Itin, [Feature64BitPtr, FeatureFP64]>; diff --git a/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp b/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp index 3a6c7ea..ffe3414 100644 --- a/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp +++ b/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp @@ -30,35 +30,27 @@ namespace llvm { class R600ControlFlowFinalizer : public MachineFunctionPass { private: + typedef std::pair<MachineInstr *, std::vector<MachineInstr *> > ClauseFile; + + enum ControlFlowInstruction { + CF_TC, + CF_VC, + CF_CALL_FS, + CF_WHILE_LOOP, + CF_END_LOOP, + CF_LOOP_BREAK, + CF_LOOP_CONTINUE, + CF_JUMP, + CF_ELSE, + CF_POP, + CF_END + }; + static char ID; const R600InstrInfo *TII; + const R600RegisterInfo &TRI; unsigned MaxFetchInst; - - bool isFetch(const MachineInstr *MI) const { - switch (MI->getOpcode()) { - case AMDGPU::TEX_VTX_CONSTBUF: - case AMDGPU::TEX_VTX_TEXBUF: - case AMDGPU::TEX_LD: - case AMDGPU::TEX_GET_TEXTURE_RESINFO: - case AMDGPU::TEX_GET_GRADIENTS_H: - case AMDGPU::TEX_GET_GRADIENTS_V: - case AMDGPU::TEX_SET_GRADIENTS_H: - case AMDGPU::TEX_SET_GRADIENTS_V: - case AMDGPU::TEX_SAMPLE: - case AMDGPU::TEX_SAMPLE_C: - case AMDGPU::TEX_SAMPLE_L: - case AMDGPU::TEX_SAMPLE_C_L: - case AMDGPU::TEX_SAMPLE_LB: - case AMDGPU::TEX_SAMPLE_C_LB: - case AMDGPU::TEX_SAMPLE_G: - case AMDGPU::TEX_SAMPLE_C_G: - case AMDGPU::TXD: - case AMDGPU::TXD_SHADOW: - return true; - default: - return false; - } - } + const AMDGPUSubtarget &ST; bool IsTrivialInst(MachineInstr *MI) const { switch (MI->getOpcode()) { @@ -70,26 +62,226 @@ private: } } - MachineBasicBlock::iterator - MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned CfAddress) const { + const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const { + unsigned Opcode = 0; + bool isEg = (ST.device()->getGeneration() >= AMDGPUDeviceInfo::HD5XXX); + switch (CFI) { + case CF_TC: + Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600; + break; + case CF_VC: + Opcode = isEg ? AMDGPU::CF_VC_EG : AMDGPU::CF_VC_R600; + break; + case CF_CALL_FS: + Opcode = isEg ? AMDGPU::CF_CALL_FS_EG : AMDGPU::CF_CALL_FS_R600; + break; + case CF_WHILE_LOOP: + Opcode = isEg ? AMDGPU::WHILE_LOOP_EG : AMDGPU::WHILE_LOOP_R600; + break; + case CF_END_LOOP: + Opcode = isEg ? AMDGPU::END_LOOP_EG : AMDGPU::END_LOOP_R600; + break; + case CF_LOOP_BREAK: + Opcode = isEg ? AMDGPU::LOOP_BREAK_EG : AMDGPU::LOOP_BREAK_R600; + break; + case CF_LOOP_CONTINUE: + Opcode = isEg ? AMDGPU::CF_CONTINUE_EG : AMDGPU::CF_CONTINUE_R600; + break; + case CF_JUMP: + Opcode = isEg ? AMDGPU::CF_JUMP_EG : AMDGPU::CF_JUMP_R600; + break; + case CF_ELSE: + Opcode = isEg ? AMDGPU::CF_ELSE_EG : AMDGPU::CF_ELSE_R600; + break; + case CF_POP: + Opcode = isEg ? AMDGPU::POP_EG : AMDGPU::POP_R600; + break; + case CF_END: + if (ST.device()->getDeviceFlag() == OCL_DEVICE_CAYMAN) { + Opcode = AMDGPU::CF_END_CM; + break; + } + Opcode = isEg ? AMDGPU::CF_END_EG : AMDGPU::CF_END_R600; + break; + } + assert (Opcode && "No opcode selected"); + return TII->get(Opcode); + } + + bool isCompatibleWithClause(const MachineInstr *MI, + std::set<unsigned> &DstRegs, std::set<unsigned> &SrcRegs) const { + unsigned DstMI, SrcMI; + for (MachineInstr::const_mop_iterator I = MI->operands_begin(), + E = MI->operands_end(); I != E; ++I) { + const MachineOperand &MO = *I; + if (!MO.isReg()) + continue; + if (MO.isDef()) + DstMI = MO.getReg(); + if (MO.isUse()) { + unsigned Reg = MO.getReg(); + if (AMDGPU::R600_Reg128RegClass.contains(Reg)) + SrcMI = Reg; + else + SrcMI = TRI.getMatchingSuperReg(Reg, + TRI.getSubRegFromChannel(TRI.getHWRegChan(Reg)), + &AMDGPU::R600_Reg128RegClass); + } + } + if ((DstRegs.find(SrcMI) == DstRegs.end()) && + (SrcRegs.find(DstMI) == SrcRegs.end())) { + SrcRegs.insert(SrcMI); + DstRegs.insert(DstMI); + return true; + } else + return false; + } + + ClauseFile + MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I) + const { MachineBasicBlock::iterator ClauseHead = I; + std::vector<MachineInstr *> ClauseContent; unsigned AluInstCount = 0; + bool IsTex = TII->usesTextureCache(ClauseHead); + std::set<unsigned> DstRegs, SrcRegs; for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) { if (IsTrivialInst(I)) continue; - if (!isFetch(I)) + if (AluInstCount > MaxFetchInst) + break; + if ((IsTex && !TII->usesTextureCache(I)) || + (!IsTex && !TII->usesVertexCache(I))) + break; + if (!isCompatibleWithClause(I, DstRegs, SrcRegs)) break; AluInstCount ++; - if (AluInstCount > MaxFetchInst) + ClauseContent.push_back(I); + } + MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), + getHWInstrDesc(IsTex?CF_TC:CF_VC)) + .addImm(0) // ADDR + .addImm(AluInstCount - 1); // COUNT + return ClauseFile(MIb, ClauseContent); + } + + void getLiteral(MachineInstr *MI, std::vector<int64_t> &Lits) const { + unsigned LiteralRegs[] = { + AMDGPU::ALU_LITERAL_X, + AMDGPU::ALU_LITERAL_Y, + AMDGPU::ALU_LITERAL_Z, + AMDGPU::ALU_LITERAL_W + }; + for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + if (MO.getReg() != AMDGPU::ALU_LITERAL_X) + continue; + unsigned ImmIdx = TII->getOperandIdx(MI->getOpcode(), R600Operands::IMM); + int64_t Imm = MI->getOperand(ImmIdx).getImm(); + std::vector<int64_t>::iterator It = + std::find(Lits.begin(), Lits.end(), Imm); + if (It != Lits.end()) { + unsigned Index = It - Lits.begin(); + MO.setReg(LiteralRegs[Index]); + } else { + assert(Lits.size() < 4 && "Too many literals in Instruction Group"); + MO.setReg(LiteralRegs[Lits.size()]); + Lits.push_back(Imm); + } + } + } + + MachineBasicBlock::iterator insertLiterals( + MachineBasicBlock::iterator InsertPos, + const std::vector<unsigned> &Literals) const { + MachineBasicBlock *MBB = InsertPos->getParent(); + for (unsigned i = 0, e = Literals.size(); i < e; i+=2) { + unsigned LiteralPair0 = Literals[i]; + unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0; + InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(), + TII->get(AMDGPU::LITERALS)) + .addImm(LiteralPair0) + .addImm(LiteralPair1); + } + return InsertPos; + } + + ClauseFile + MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I) + const { + MachineBasicBlock::iterator ClauseHead = I; + std::vector<MachineInstr *> ClauseContent; + I++; + for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) { + if (IsTrivialInst(I)) { + ++I; + continue; + } + if (!I->isBundle() && !TII->isALUInstr(I->getOpcode())) break; + std::vector<int64_t> Literals; + if (I->isBundle()) { + MachineInstr *DeleteMI = I; + MachineBasicBlock::instr_iterator BI = I.getInstrIterator(); + while (++BI != E && BI->isBundledWithPred()) { + BI->unbundleFromPred(); + for (unsigned i = 0, e = BI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = BI->getOperand(i); + if (MO.isReg() && MO.isInternalRead()) + MO.setIsInternalRead(false); + } + getLiteral(BI, Literals); + ClauseContent.push_back(BI); + } + I = BI; + DeleteMI->eraseFromParent(); + } else { + getLiteral(I, Literals); + ClauseContent.push_back(I); + I++; + } + for (unsigned i = 0, e = Literals.size(); i < e; i+=2) { + unsigned literal0 = Literals[i]; + unsigned literal2 = (i + 1 < e)?Literals[i + 1]:0; + MachineInstr *MILit = BuildMI(MBB, I, I->getDebugLoc(), + TII->get(AMDGPU::LITERALS)) + .addImm(literal0) + .addImm(literal2); + ClauseContent.push_back(MILit); + } } - BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), - TII->get(AMDGPU::CF_TC)) - .addImm(CfAddress) // ADDR - .addImm(AluInstCount); // COUNT - return I; + ClauseHead->getOperand(7).setImm(ClauseContent.size() - 1); + return ClauseFile(ClauseHead, ClauseContent); } + + void + EmitFetchClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause, + unsigned &CfCount) { + CounterPropagateAddr(Clause.first, CfCount); + MachineBasicBlock *BB = Clause.first->getParent(); + BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::FETCH_CLAUSE)) + .addImm(CfCount); + for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) { + BB->splice(InsertPos, BB, Clause.second[i]); + } + CfCount += 2 * Clause.second.size(); + } + + void + EmitALUClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause, + unsigned &CfCount) { + CounterPropagateAddr(Clause.first, CfCount); + MachineBasicBlock *BB = Clause.first->getParent(); + BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::ALU_CLAUSE)) + .addImm(CfCount); + for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) { + BB->splice(InsertPos, BB, Clause.second[i]); + } + CfCount += Clause.second.size(); + } + void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const { MI->getOperand(0).setImm(Addr + MI->getOperand(0).getImm()); } @@ -102,9 +294,27 @@ private: } } + unsigned getHWStackSize(unsigned StackSubEntry, bool hasPush) const { + switch (ST.device()->getGeneration()) { + case AMDGPUDeviceInfo::HD4XXX: + if (hasPush) + StackSubEntry += 2; + break; + case AMDGPUDeviceInfo::HD5XXX: + if (hasPush) + StackSubEntry ++; + case AMDGPUDeviceInfo::HD6XXX: + StackSubEntry += 2; + break; + } + return (StackSubEntry + 3)/4; // Need ceil value of StackSubEntry/4 + } + public: R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID), - TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { + TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())), + TRI(TII->getRegisterInfo()), + ST(tm.getSubtarget<AMDGPUSubtarget>()) { const AMDGPUSubtarget &ST = tm.getSubtarget<AMDGPUSubtarget>(); if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX) MaxFetchInst = 8; @@ -115,6 +325,7 @@ public: virtual bool runOnMachineFunction(MachineFunction &MF) { unsigned MaxStack = 0; unsigned CurrentStack = 0; + bool HasPush = false; for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME; ++MB) { MachineBasicBlock &MBB = *MB; @@ -124,14 +335,16 @@ public: R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); if (MFI->ShaderType == 1) { BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()), - TII->get(AMDGPU::CF_CALL_FS)); + getHWInstrDesc(CF_CALL_FS)); CfCount++; + MaxStack = 1; } + std::vector<ClauseFile> FetchClauses, AluClauses; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) { - if (isFetch(I)) { + if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) { DEBUG(dbgs() << CfCount << ":"; I->dump();); - I = MakeFetchClause(MBB, I, 0); + FetchClauses.push_back(MakeFetchClause(MBB, I)); CfCount++; continue; } @@ -142,20 +355,25 @@ public: case AMDGPU::CF_ALU_PUSH_BEFORE: CurrentStack++; MaxStack = std::max(MaxStack, CurrentStack); + HasPush = true; case AMDGPU::CF_ALU: + I = MI; + AluClauses.push_back(MakeALUClause(MBB, I)); case AMDGPU::EG_ExportBuf: case AMDGPU::EG_ExportSwz: case AMDGPU::R600_ExportBuf: case AMDGPU::R600_ExportSwz: + case AMDGPU::RAT_WRITE_CACHELESS_32_eg: + case AMDGPU::RAT_WRITE_CACHELESS_128_eg: DEBUG(dbgs() << CfCount << ":"; MI->dump();); CfCount++; break; case AMDGPU::WHILELOOP: { - CurrentStack++; + CurrentStack+=4; MaxStack = std::max(MaxStack, CurrentStack); MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), - TII->get(AMDGPU::WHILE_LOOP)) - .addImm(2); + getHWInstrDesc(CF_WHILE_LOOP)) + .addImm(1); std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount, std::set<MachineInstr *>()); Pair.second.insert(MIb); @@ -165,12 +383,12 @@ public: break; } case AMDGPU::ENDLOOP: { - CurrentStack--; + CurrentStack-=4; std::pair<unsigned, std::set<MachineInstr *> > Pair = LoopStack.back(); LoopStack.pop_back(); CounterPropagateAddr(Pair.second, CfCount); - BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::END_LOOP)) + BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP)) .addImm(Pair.first + 1); MI->eraseFromParent(); CfCount++; @@ -178,7 +396,7 @@ public: } case AMDGPU::IF_PREDICATE_SET: { MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), - TII->get(AMDGPU::CF_JUMP)) + getHWInstrDesc(CF_JUMP)) .addImm(0) .addImm(0); IfThenElseStack.push_back(MIb); @@ -192,7 +410,7 @@ public: IfThenElseStack.pop_back(); CounterPropagateAddr(JumpInst, CfCount); MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), - TII->get(AMDGPU::CF_ELSE)) + getHWInstrDesc(CF_ELSE)) .addImm(0) .addImm(1); DEBUG(dbgs() << CfCount << ":"; MIb->dump();); @@ -207,9 +425,10 @@ public: IfThenElseStack.pop_back(); CounterPropagateAddr(IfOrElseInst, CfCount + 1); MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), - TII->get(AMDGPU::POP)) + getHWInstrDesc(CF_POP)) .addImm(CfCount + 1) .addImm(1); + (void)MIb; DEBUG(dbgs() << CfCount << ":"; MIb->dump();); MI->eraseFromParent(); CfCount++; @@ -218,13 +437,13 @@ public: case AMDGPU::PREDICATED_BREAK: { CurrentStack--; CfCount += 3; - BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_JUMP)) + BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_JUMP)) .addImm(CfCount) .addImm(1); MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), - TII->get(AMDGPU::LOOP_BREAK)) + getHWInstrDesc(CF_LOOP_BREAK)) .addImm(0); - BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::POP)) + BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_POP)) .addImm(CfCount) .addImm(1); LoopStack.back().second.insert(MIb); @@ -233,20 +452,31 @@ public: } case AMDGPU::CONTINUE: { MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), - TII->get(AMDGPU::CF_CONTINUE)) + getHWInstrDesc(CF_LOOP_CONTINUE)) .addImm(0); LoopStack.back().second.insert(MIb); MI->eraseFromParent(); CfCount++; break; } + case AMDGPU::RETURN: { + BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END)); + CfCount++; + MI->eraseFromParent(); + if (CfCount % 2) { + BuildMI(MBB, I, MBB.findDebugLoc(MI), TII->get(AMDGPU::PAD)); + CfCount++; + } + for (unsigned i = 0, e = FetchClauses.size(); i < e; i++) + EmitFetchClause(I, FetchClauses[i], CfCount); + for (unsigned i = 0, e = AluClauses.size(); i < e; i++) + EmitALUClause(I, AluClauses[i], CfCount); + } default: break; } } - BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()), - TII->get(AMDGPU::STACK_SIZE)) - .addImm(MaxStack); + MFI->StackSize = getHWStackSize(MaxStack, HasPush); } return false; @@ -265,4 +495,3 @@ char R600ControlFlowFinalizer::ID = 0; llvm::FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) { return new R600ControlFlowFinalizer(TM); } - diff --git a/contrib/llvm/lib/Target/R600/R600Defines.h b/contrib/llvm/lib/Target/R600/R600Defines.h index 16cfcf5..36bfb18 100644 --- a/contrib/llvm/lib/Target/R600/R600Defines.h +++ b/contrib/llvm/lib/Target/R600/R600Defines.h @@ -39,7 +39,9 @@ namespace R600_InstFlag { //FlagOperand bits 7, 8 NATIVE_OPERANDS = (1 << 9), OP1 = (1 << 10), - OP2 = (1 << 11) + OP2 = (1 << 11), + VTX_INST = (1 << 12), + TEX_INST = (1 << 13) }; } @@ -52,6 +54,9 @@ namespace R600_InstFlag { #define GET_REG_CHAN(reg) ((reg) >> HW_CHAN_SHIFT) #define GET_REG_INDEX(reg) ((reg) & HW_REG_MASK) +#define IS_VTX(desc) ((desc).TSFlags & R600_InstFlag::VTX_INST) +#define IS_TEX(desc) ((desc).TSFlags & R600_InstFlag::TEX_INST) + namespace R600Operands { enum Ops { DST, @@ -78,6 +83,7 @@ namespace R600Operands { LAST, PRED_SEL, IMM, + BANK_SWIZZLE, COUNT }; @@ -85,13 +91,39 @@ namespace R600Operands { // W C S S S S S S S S S S S // R O D L S R R R R S R R R R S R R R L P // D U I M R A R C C C C R C C C C R C C C A R I -// S E U T O E M C 0 0 0 0 C 1 1 1 1 C 2 2 2 S E M -// T M P E D L P 0 N R A S 1 N R A S 2 N R S T D M - {0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,10,11,12}, - {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,13,14,15,16,-1,-1,-1,-1,17,18,19}, - {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8, 9,-1,10,11,12,13,14,15,16,17} +// S E U T O E M C 0 0 0 0 C 1 1 1 1 C 2 2 2 S E M B +// T M P E D L P 0 N R A S 1 N R A S 2 N R S T D M S + {0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,10,11,12,13}, + {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,13,14,15,16,-1,-1,-1,-1,17,18,19,20}, + {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8, 9,-1,10,11,12,13,14,15,16,17,18} }; } +//===----------------------------------------------------------------------===// +// Config register definitions +//===----------------------------------------------------------------------===// + +#define R_02880C_DB_SHADER_CONTROL 0x02880C +#define S_02880C_KILL_ENABLE(x) (((x) & 0x1) << 6) + +// These fields are the same for all shader types and families. +#define S_NUM_GPRS(x) (((x) & 0xFF) << 0) +#define S_STACK_SIZE(x) (((x) & 0xFF) << 8) +//===----------------------------------------------------------------------===// +// R600, R700 Registers +//===----------------------------------------------------------------------===// + +#define R_028850_SQ_PGM_RESOURCES_PS 0x028850 +#define R_028868_SQ_PGM_RESOURCES_VS 0x028868 + +//===----------------------------------------------------------------------===// +// Evergreen, Northern Islands Registers +//===----------------------------------------------------------------------===// + +#define R_028844_SQ_PGM_RESOURCES_PS 0x028844 +#define R_028860_SQ_PGM_RESOURCES_VS 0x028860 +#define R_028878_SQ_PGM_RESOURCES_GS 0x028878 +#define R_0288D4_SQ_PGM_RESOURCES_LS 0x0288d4 + #endif // R600DEFINES_H_ diff --git a/contrib/llvm/lib/Target/R600/R600ISelLowering.cpp b/contrib/llvm/lib/Target/R600/R600ISelLowering.cpp index 53e6e51..7252235 100644 --- a/contrib/llvm/lib/Target/R600/R600ISelLowering.cpp +++ b/contrib/llvm/lib/Target/R600/R600ISelLowering.cpp @@ -43,11 +43,25 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setOperationAction(ISD::AND, MVT::v4i32, Expand); setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand); setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand); + setOperationAction(ISD::MUL, MVT::v2i32, Expand); + setOperationAction(ISD::MUL, MVT::v4i32, Expand); + setOperationAction(ISD::OR, MVT::v4i32, Expand); + setOperationAction(ISD::OR, MVT::v2i32, Expand); setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand); + setOperationAction(ISD::SHL, MVT::v4i32, Expand); + setOperationAction(ISD::SHL, MVT::v2i32, Expand); + setOperationAction(ISD::SRL, MVT::v4i32, Expand); + setOperationAction(ISD::SRL, MVT::v2i32, Expand); + setOperationAction(ISD::SRA, MVT::v4i32, Expand); + setOperationAction(ISD::SRA, MVT::v2i32, Expand); + setOperationAction(ISD::SUB, MVT::v4i32, Expand); + setOperationAction(ISD::SUB, MVT::v2i32, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand); setOperationAction(ISD::UDIV, MVT::v4i32, Expand); setOperationAction(ISD::UREM, MVT::v4i32, Expand); setOperationAction(ISD::SETCC, MVT::v4i32, Expand); + setOperationAction(ISD::XOR, MVT::v4i32, Expand); + setOperationAction(ISD::XOR, MVT::v2i32, Expand); setOperationAction(ISD::BR_CC, MVT::i32, Expand); setOperationAction(ISD::BR_CC, MVT::f32, Expand); @@ -70,6 +84,9 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setOperationAction(ISD::SELECT, MVT::i32, Custom); setOperationAction(ISD::SELECT, MVT::f32, Custom); + setOperationAction(ISD::VSELECT, MVT::v4i32, Expand); + setOperationAction(ISD::VSELECT, MVT::v2i32, Expand); + // Legalize loads and stores to the private address space. setOperationAction(ISD::LOAD, MVT::i32, Custom); setOperationAction(ISD::LOAD, MVT::v2i32, Custom); @@ -93,6 +110,7 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setTargetDAGCombine(ISD::SELECT_CC); setBooleanContents(ZeroOrNegativeOneBooleanContent); + setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); setSchedulingPreference(Sched::VLIW); } diff --git a/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp b/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp index b232188..37150c4 100644 --- a/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp +++ b/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "R600InstrInfo.h" +#include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "AMDGPUTargetMachine.h" #include "R600Defines.h" @@ -29,7 +30,8 @@ using namespace llvm; R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm) : AMDGPUInstrInfo(tm), - RI(tm, *this) + RI(tm, *this), + ST(tm.getSubtarget<AMDGPUSubtarget>()) { } const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const { @@ -139,6 +141,33 @@ bool R600InstrInfo::isALUInstr(unsigned Opcode) const { (TargetFlags & R600_InstFlag::OP3)); } +bool R600InstrInfo::isTransOnly(unsigned Opcode) const { + return (get(Opcode).TSFlags & R600_InstFlag::TRANS_ONLY); +} + +bool R600InstrInfo::isTransOnly(const MachineInstr *MI) const { + return isTransOnly(MI->getOpcode()); +} + +bool R600InstrInfo::usesVertexCache(unsigned Opcode) const { + return ST.hasVertexCache() && IS_VTX(get(Opcode)); +} + +bool R600InstrInfo::usesVertexCache(const MachineInstr *MI) const { + const R600MachineFunctionInfo *MFI = MI->getParent()->getParent()->getInfo<R600MachineFunctionInfo>(); + return MFI->ShaderType != ShaderType::COMPUTE && usesVertexCache(MI->getOpcode()); +} + +bool R600InstrInfo::usesTextureCache(unsigned Opcode) const { + return (!ST.hasVertexCache() && IS_VTX(get(Opcode))) || IS_TEX(get(Opcode)); +} + +bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const { + const R600MachineFunctionInfo *MFI = MI->getParent()->getParent()->getInfo<R600MachineFunctionInfo>(); + return (MFI->ShaderType == ShaderType::COMPUTE && usesVertexCache(MI->getOpcode())) || + usesTextureCache(MI->getOpcode()); +} + bool R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts) const { @@ -183,10 +212,19 @@ R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) const { int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]); if (SrcIdx < 0) break; - if (MI->getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) { + unsigned Reg = MI->getOperand(SrcIdx).getReg(); + if (Reg == AMDGPU::ALU_CONST) { unsigned Const = MI->getOperand( getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm(); Consts.push_back(Const); + continue; + } + if (AMDGPU::R600_KC0RegClass.contains(Reg) || + AMDGPU::R600_KC1RegClass.contains(Reg)) { + unsigned Index = RI.getEncodingValue(Reg) & 0xff; + unsigned Chan = RI.getHWRegChan(Reg); + Consts.push_back((Index << 2) | Chan); + continue; } } } @@ -684,7 +722,8 @@ MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MB //scheduling to the backend, we can change the default to 0. MIB.addImm(1) // $last .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel - .addImm(0); // $literal + .addImm(0) // $literal + .addImm(0); // $bank_swizzle return MIB; } diff --git a/contrib/llvm/lib/Target/R600/R600InstrInfo.h b/contrib/llvm/lib/Target/R600/R600InstrInfo.h index dbae900..babe4b8 100644 --- a/contrib/llvm/lib/Target/R600/R600InstrInfo.h +++ b/contrib/llvm/lib/Target/R600/R600InstrInfo.h @@ -33,6 +33,7 @@ namespace llvm { class R600InstrInfo : public AMDGPUInstrInfo { private: const R600RegisterInfo RI; + const AMDGPUSubtarget &ST; int getBranchInstr(const MachineOperand &op) const; @@ -53,6 +54,14 @@ namespace llvm { /// \returns true if this \p Opcode represents an ALU instruction. bool isALUInstr(unsigned Opcode) const; + bool isTransOnly(unsigned Opcode) const; + bool isTransOnly(const MachineInstr *MI) const; + + bool usesVertexCache(unsigned Opcode) const; + bool usesVertexCache(const MachineInstr *MI) const; + bool usesTextureCache(unsigned Opcode) const; + bool usesTextureCache(const MachineInstr *MI) const; + bool fitsConstReadLimitations(const std::vector<unsigned>&) const; bool canBundle(const std::vector<MachineInstr *> &) const; diff --git a/contrib/llvm/lib/Target/R600/R600Instructions.td b/contrib/llvm/lib/Target/R600/R600Instructions.td index 663b41a..8f47523 100644 --- a/contrib/llvm/lib/Target/R600/R600Instructions.td +++ b/contrib/llvm/lib/Target/R600/R600Instructions.td @@ -13,11 +13,12 @@ include "R600Intrinsics.td" -class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern, +class InstR600 <dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin> : AMDGPUInst <outs, ins, asm, pattern> { field bits<64> Inst; + bit TransOnly = 0; bit Trig = 0; bit Op3 = 0; bit isVector = 0; @@ -25,9 +26,9 @@ class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern, bit Op1 = 0; bit Op2 = 0; bit HasNativeOperands = 0; + bit VTXInst = 0; + bit TEXInst = 0; - bits<11> op_code = inst; - //let Inst = inst; let Namespace = "AMDGPU"; let OutOperandList = outs; let InOperandList = ins; @@ -35,6 +36,7 @@ class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern, let Pattern = pattern; let Itinerary = itin; + let TSFlags{0} = TransOnly; let TSFlags{4} = Trig; let TSFlags{5} = Op3; @@ -45,11 +47,12 @@ class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern, let TSFlags{9} = HasNativeOperands; let TSFlags{10} = Op1; let TSFlags{11} = Op2; + let TSFlags{12} = VTXInst; + let TSFlags{13} = TEXInst; } class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> : - AMDGPUInst <outs, ins, asm, pattern> { - field bits<64> Inst; + InstR600 <outs, ins, asm, pattern, NullALU> { let Namespace = "AMDGPU"; } @@ -74,6 +77,9 @@ class InstFlag<string PM = "printOperand", int Default = 0> def SEL : OperandWithDefaultOps <i32, (ops (i32 -1))> { let PrintMethod = "printSel"; } +def BANK_SWIZZLE : OperandWithDefaultOps <i32, (ops (i32 0))> { + let PrintMethod = "printBankSwizzle"; +} def LITERAL : InstFlag<"printLiteral">; @@ -137,7 +143,7 @@ class R600ALU_Word1 { field bits<32> Word1; bits<11> dst; - bits<3> bank_swizzle = 0; + bits<3> bank_swizzle; bits<1> dst_rel; bits<1> clamp; @@ -346,15 +352,15 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { // and R600InstrInfo::getOperandIdx(). class R600_1OP <bits<11> inst, string opName, list<dag> pattern, InstrItinClass itin = AnyALU> : - InstR600 <0, - (outs R600_Reg32:$dst), + InstR600 <(outs R600_Reg32:$dst), (ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp, R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel, - LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal), + LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal, + BANK_SWIZZLE:$bank_swizzle), !strconcat(" ", opName, - "$clamp $dst$write$dst_rel$omod, " + "$last$clamp $dst$write$dst_rel$omod, " "$src0_neg$src0_abs$src0$src0_abs$src0_rel, " - "$literal $pred_sel$last"), + "$pred_sel $bank_swizzle"), pattern, itin>, R600ALU_Word0, @@ -385,18 +391,18 @@ class R600_1OP_Helper <bits<11> inst, string opName, SDPatternOperator node, // R600InstrInfo::buildDefaultInstruction(), and R600InstrInfo::getOperandIdx(). class R600_2OP <bits<11> inst, string opName, list<dag> pattern, InstrItinClass itin = AnyALU> : - InstR600 <inst, - (outs R600_Reg32:$dst), + InstR600 <(outs R600_Reg32:$dst), (ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp, R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel, R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel, - LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal), + LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal, + BANK_SWIZZLE:$bank_swizzle), !strconcat(" ", opName, - "$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, " + "$last$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, " "$src0_neg$src0_abs$src0$src0_abs$src0_rel, " "$src1_neg$src1_abs$src1$src1_abs$src1_rel, " - "$literal $pred_sel$last"), + "$pred_sel $bank_swizzle"), pattern, itin>, R600ALU_Word0, @@ -423,18 +429,19 @@ class R600_2OP_Helper <bits<11> inst, string opName, SDPatternOperator node, // R600InstrInfo::getOperandIdx(). class R600_3OP <bits<5> inst, string opName, list<dag> pattern, InstrItinClass itin = AnyALU> : - InstR600 <0, - (outs R600_Reg32:$dst), + InstR600 <(outs R600_Reg32:$dst), (ins REL:$dst_rel, CLAMP:$clamp, R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel, R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel, R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel, - LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal), - !strconcat(" ", opName, "$clamp $dst$dst_rel, " + LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal, + BANK_SWIZZLE:$bank_swizzle), + !strconcat(" ", opName, "$last$clamp $dst$dst_rel, " "$src0_neg$src0$src0_rel, " "$src1_neg$src1$src1_rel, " "$src2_neg$src2$src2_rel, " - "$literal $pred_sel$last"), + "$pred_sel" + "$bank_swizzle"), pattern, itin>, R600ALU_Word0, @@ -450,8 +457,7 @@ class R600_3OP <bits<5> inst, string opName, list<dag> pattern, class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern, InstrItinClass itin = VecALU> : - InstR600 <inst, - (outs R600_Reg32:$dst), + InstR600 <(outs R600_Reg32:$dst), ins, asm, pattern, @@ -459,8 +465,7 @@ class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern, class R600_TEX <bits<11> inst, string opName, list<dag> pattern, InstrItinClass itin = AnyALU> : - InstR600 <inst, - (outs R600_Reg128:$DST_GPR), + InstR600 <(outs R600_Reg128:$DST_GPR), (ins R600_Reg128:$SRC_GPR, i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID, i32imm:$textureTarget), !strconcat(opName, "$DST_GPR, $SRC_GPR, $RESOURCE_ID, $SAMPLER_ID, $textureTarget"), pattern, @@ -481,11 +486,14 @@ class R600_TEX <bits<11> inst, string opName, list<dag> pattern, let FETCH_WHOLE_QUAD = 0; let ALT_CONST = 0; let SAMPLER_INDEX_MODE = 0; + let RESOURCE_INDEX_MODE = 0; let COORD_TYPE_X = 0; let COORD_TYPE_Y = 0; let COORD_TYPE_Z = 0; let COORD_TYPE_W = 0; + + let TEXInst = 1; } } // End mayLoad = 1, mayStore = 0, hasSideEffects = 0 @@ -738,7 +746,9 @@ multiclass SteamOutputExportPattern<Instruction ExportInst, 4095, imm:$mask, buf3inst, 0)>; } -let usesCustomInserter = 1 in { +// Export Instructions should not be duplicated by TailDuplication pass +// (which assumes that duplicable instruction are affected by exec mask) +let usesCustomInserter = 1, isNotDuplicable = 1 in { class ExportSwzInst : InstR600ISA<( outs), @@ -805,12 +815,15 @@ class CF_ALU_WORD1 { let Word1{31} = BARRIER; } +def KCACHE : InstFlag<"printKCache">; + class ALU_CLAUSE<bits<4> inst, string OpName> : AMDGPUInst <(outs), -(ins i32imm:$ADDR, i32imm:$KCACHE_BANK0, i32imm:$KCACHE_BANK1, i32imm:$KCACHE_MODE0, i32imm:$KCACHE_MODE1, -i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1, i32imm:$COUNT), +(ins i32imm:$ADDR, i32imm:$KCACHE_BANK0, i32imm:$KCACHE_BANK1, +KCACHE:$KCACHE_MODE0, KCACHE:$KCACHE_MODE1, +i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1, +i32imm:$COUNT), !strconcat(OpName, " $COUNT, @$ADDR, " -"KC0[CB$KCACHE_BANK0:$KCACHE_ADDR0-$KCACHE_ADDR0+32]" -", KC1[CB$KCACHE_BANK1:$KCACHE_ADDR1-$KCACHE_ADDR1+32]"), +"KC0[$KCACHE_MODE0], KC1[$KCACHE_MODE1]"), [] >, CF_ALU_WORD0, CF_ALU_WORD1 { field bits<64> Inst; @@ -823,109 +836,139 @@ i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1, i32imm:$COUNT), let Inst{63-32} = Word1; } -class CF_WORD0 { +class CF_WORD0_R600 { field bits<32> Word0; - bits<24> ADDR; - bits<3> JUMPTABLE_SEL; + bits<32> ADDR; - let Word0{23-0} = ADDR; - let Word0{26-24} = JUMPTABLE_SEL; + let Word0 = ADDR; } -class CF_WORD1 { +class CF_WORD1_R600 { field bits<32> Word1; bits<3> POP_COUNT; bits<5> CF_CONST; bits<2> COND; - bits<6> COUNT; + bits<3> COUNT; + bits<6> CALL_COUNT; + bits<1> COUNT_3; + bits<1> END_OF_PROGRAM; bits<1> VALID_PIXEL_MODE; - bits<8> CF_INST; + bits<7> CF_INST; + bits<1> WHOLE_QUAD_MODE; bits<1> BARRIER; let Word1{2-0} = POP_COUNT; let Word1{7-3} = CF_CONST; let Word1{9-8} = COND; - let Word1{15-10} = COUNT; - let Word1{20} = VALID_PIXEL_MODE; - let Word1{29-22} = CF_INST; + let Word1{12-10} = COUNT; + let Word1{18-13} = CALL_COUNT; + let Word1{19} = COUNT_3; + let Word1{21} = END_OF_PROGRAM; + let Word1{22} = VALID_PIXEL_MODE; + let Word1{29-23} = CF_INST; + let Word1{30} = WHOLE_QUAD_MODE; let Word1{31} = BARRIER; } -class CF_CLAUSE <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs), -ins, AsmPrint, [] >, CF_WORD0, CF_WORD1 { +class CF_CLAUSE_R600 <bits<7> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs), +ins, AsmPrint, [] >, CF_WORD0_R600, CF_WORD1_R600 { field bits<64> Inst; let CF_INST = inst; let BARRIER = 1; - let JUMPTABLE_SEL = 0; let CF_CONST = 0; let VALID_PIXEL_MODE = 0; let COND = 0; + let CALL_COUNT = 0; + let COUNT_3 = 0; + let END_OF_PROGRAM = 0; + let WHOLE_QUAD_MODE = 0; let Inst{31-0} = Word0; let Inst{63-32} = Word1; } -def CF_TC : CF_CLAUSE<1, (ins i32imm:$ADDR, i32imm:$COUNT), -"TEX $COUNT @$ADDR"> { - let POP_COUNT = 0; -} - -def CF_VC : CF_CLAUSE<2, (ins i32imm:$ADDR, i32imm:$COUNT), -"VTX $COUNT @$ADDR"> { - let POP_COUNT = 0; -} +class CF_WORD0_EG { + field bits<32> Word0; -def WHILE_LOOP : CF_CLAUSE<6, (ins i32imm:$ADDR), "LOOP_START_DX10 @$ADDR"> { - let POP_COUNT = 0; - let COUNT = 0; -} + bits<24> ADDR; + bits<3> JUMPTABLE_SEL; -def END_LOOP : CF_CLAUSE<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> { - let POP_COUNT = 0; - let COUNT = 0; + let Word0{23-0} = ADDR; + let Word0{26-24} = JUMPTABLE_SEL; } -def LOOP_BREAK : CF_CLAUSE<9, (ins i32imm:$ADDR), "LOOP_BREAK @$ADDR"> { - let POP_COUNT = 0; - let COUNT = 0; -} +class CF_WORD1_EG { + field bits<32> Word1; -def CF_CONTINUE : CF_CLAUSE<8, (ins i32imm:$ADDR), "CONTINUE @$ADDR"> { - let POP_COUNT = 0; - let COUNT = 0; -} + bits<3> POP_COUNT; + bits<5> CF_CONST; + bits<2> COND; + bits<6> COUNT; + bits<1> VALID_PIXEL_MODE; + bits<1> END_OF_PROGRAM; + bits<8> CF_INST; + bits<1> BARRIER; -def CF_JUMP : CF_CLAUSE<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "JUMP @$ADDR POP:$POP_COUNT"> { - let COUNT = 0; + let Word1{2-0} = POP_COUNT; + let Word1{7-3} = CF_CONST; + let Word1{9-8} = COND; + let Word1{15-10} = COUNT; + let Word1{20} = VALID_PIXEL_MODE; + let Word1{21} = END_OF_PROGRAM; + let Word1{29-22} = CF_INST; + let Word1{31} = BARRIER; } -def CF_ELSE : CF_CLAUSE<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "ELSE @$ADDR POP:$POP_COUNT"> { - let COUNT = 0; -} +class CF_CLAUSE_EG <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs), +ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG { + field bits<64> Inst; -def CF_CALL_FS : CF_CLAUSE<19, (ins), "CALL_FS"> { - let ADDR = 0; - let COUNT = 0; - let POP_COUNT = 0; -} + let CF_INST = inst; + let BARRIER = 1; + let JUMPTABLE_SEL = 0; + let CF_CONST = 0; + let VALID_PIXEL_MODE = 0; + let COND = 0; + let END_OF_PROGRAM = 0; -def POP : CF_CLAUSE<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "POP @$ADDR POP:$POP_COUNT"> { - let COUNT = 0; + let Inst{31-0} = Word0; + let Inst{63-32} = Word1; } def CF_ALU : ALU_CLAUSE<8, "ALU">; def CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">; -def STACK_SIZE : AMDGPUInst <(outs), -(ins i32imm:$num), "nstack $num", [] > { +def FETCH_CLAUSE : AMDGPUInst <(outs), +(ins i32imm:$addr), "Fetch clause starting at $addr:", [] > { field bits<8> Inst; bits<8> num; let Inst = num; } +def ALU_CLAUSE : AMDGPUInst <(outs), +(ins i32imm:$addr), "ALU clause starting at $addr:", [] > { + field bits<8> Inst; + bits<8> num; + let Inst = num; +} + +def LITERALS : AMDGPUInst <(outs), +(ins LITERAL:$literal1, LITERAL:$literal2), "$literal1, $literal2", [] > { + field bits<64> Inst; + bits<32> literal1; + bits<32> literal2; + + let Inst{31-0} = literal1; + let Inst{63-32} = literal2; +} + +def PAD : AMDGPUInst <(outs), (ins), "PAD", [] > { + field bits<64> Inst; +} + let Predicates = [isR600toCayman] in { //===----------------------------------------------------------------------===// @@ -944,58 +987,42 @@ def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>; // XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics. def SETE : R600_2OP < 0x08, "SETE", - [(set R600_Reg32:$dst, - (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, - COND_EQ))] + [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_EQ))] >; def SGT : R600_2OP < 0x09, "SETGT", - [(set R600_Reg32:$dst, - (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, - COND_GT))] + [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_GT))] >; def SGE : R600_2OP < 0xA, "SETGE", - [(set R600_Reg32:$dst, - (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, - COND_GE))] + [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_GE))] >; def SNE : R600_2OP < 0xB, "SETNE", - [(set R600_Reg32:$dst, - (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, - COND_NE))] + [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_NE))] >; def SETE_DX10 : R600_2OP < 0xC, "SETE_DX10", - [(set R600_Reg32:$dst, - (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0), - COND_EQ))] + [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_EQ))] >; def SETGT_DX10 : R600_2OP < 0xD, "SETGT_DX10", - [(set R600_Reg32:$dst, - (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0), - COND_GT))] + [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_GT))] >; def SETGE_DX10 : R600_2OP < 0xE, "SETGE_DX10", - [(set R600_Reg32:$dst, - (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0), - COND_GE))] + [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_GE))] >; def SETNE_DX10 : R600_2OP < 0xF, "SETNE_DX10", - [(set R600_Reg32:$dst, - (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0), - COND_NE))] + [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_NE))] >; def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>; @@ -1053,38 +1080,32 @@ def MIN_UINT : R600_2OP_Helper <0x39, "MIN_UINT", AMDGPUumin>; def SETE_INT : R600_2OP < 0x3A, "SETE_INT", - [(set (i32 R600_Reg32:$dst), - (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETEQ))] + [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETEQ))] >; def SETGT_INT : R600_2OP < 0x3B, "SETGT_INT", - [(set (i32 R600_Reg32:$dst), - (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGT))] + [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETGT))] >; def SETGE_INT : R600_2OP < 0x3C, "SETGE_INT", - [(set (i32 R600_Reg32:$dst), - (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGE))] + [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETGE))] >; def SETNE_INT : R600_2OP < 0x3D, "SETNE_INT", - [(set (i32 R600_Reg32:$dst), - (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETNE))] + [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETNE))] >; def SETGT_UINT : R600_2OP < 0x3E, "SETGT_UINT", - [(set (i32 R600_Reg32:$dst), - (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGT))] + [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETUGT))] >; def SETGE_UINT : R600_2OP < 0x3F, "SETGE_UINT", - [(set (i32 R600_Reg32:$dst), - (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGE))] + [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETUGE))] >; def PRED_SETE_INT : R600_2OP <0x42, "PRED_SETE_INT", []>; @@ -1094,26 +1115,17 @@ def PRED_SETNE_INT : R600_2OP <0x45, "PRED_SETNE_INT", []>; def CNDE_INT : R600_3OP < 0x1C, "CNDE_INT", - [(set (i32 R600_Reg32:$dst), - (selectcc (i32 R600_Reg32:$src0), 0, - (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2), - COND_EQ))] + [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_EQ))] >; def CNDGE_INT : R600_3OP < 0x1E, "CNDGE_INT", - [(set (i32 R600_Reg32:$dst), - (selectcc (i32 R600_Reg32:$src0), 0, - (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2), - COND_GE))] + [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_GE))] >; def CNDGT_INT : R600_3OP < 0x1D, "CNDGT_INT", - [(set (i32 R600_Reg32:$dst), - (selectcc (i32 R600_Reg32:$src0), 0, - (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2), - COND_GT))] + [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_GT))] >; //===----------------------------------------------------------------------===// @@ -1122,7 +1134,7 @@ def CNDGT_INT : R600_3OP < def TEX_LD : R600_TEX < 0x03, "TEX_LD", - [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txf R600_Reg128:$SRC_GPR, + [(set v4f32:$DST_GPR, (int_AMDGPU_txf v4f32:$SRC_GPR, imm:$OFFSET_X, imm:$OFFSET_Y, imm:$OFFSET_Z, imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] > { @@ -1135,19 +1147,19 @@ let InOperandList = (ins R600_Reg128:$SRC_GPR, i32imm:$OFFSET_X, def TEX_GET_TEXTURE_RESINFO : R600_TEX < 0x04, "TEX_GET_TEXTURE_RESINFO", - [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txq R600_Reg128:$SRC_GPR, + [(set v4f32:$DST_GPR, (int_AMDGPU_txq v4f32:$SRC_GPR, imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] >; def TEX_GET_GRADIENTS_H : R600_TEX < 0x07, "TEX_GET_GRADIENTS_H", - [(set R600_Reg128:$DST_GPR, (int_AMDGPU_ddx R600_Reg128:$SRC_GPR, + [(set v4f32:$DST_GPR, (int_AMDGPU_ddx v4f32:$SRC_GPR, imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] >; def TEX_GET_GRADIENTS_V : R600_TEX < 0x08, "TEX_GET_GRADIENTS_V", - [(set R600_Reg128:$DST_GPR, (int_AMDGPU_ddy R600_Reg128:$SRC_GPR, + [(set v4f32:$DST_GPR, (int_AMDGPU_ddy v4f32:$SRC_GPR, imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] >; @@ -1163,37 +1175,37 @@ def TEX_SET_GRADIENTS_V : R600_TEX < def TEX_SAMPLE : R600_TEX < 0x10, "TEX_SAMPLE", - [(set R600_Reg128:$DST_GPR, (int_AMDGPU_tex R600_Reg128:$SRC_GPR, + [(set v4f32:$DST_GPR, (int_AMDGPU_tex v4f32:$SRC_GPR, imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] >; def TEX_SAMPLE_C : R600_TEX < 0x18, "TEX_SAMPLE_C", - [(set R600_Reg128:$DST_GPR, (int_AMDGPU_tex R600_Reg128:$SRC_GPR, + [(set v4f32:$DST_GPR, (int_AMDGPU_tex v4f32:$SRC_GPR, imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))] >; def TEX_SAMPLE_L : R600_TEX < 0x11, "TEX_SAMPLE_L", - [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txl R600_Reg128:$SRC_GPR, + [(set v4f32:$DST_GPR, (int_AMDGPU_txl v4f32:$SRC_GPR, imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] >; def TEX_SAMPLE_C_L : R600_TEX < 0x19, "TEX_SAMPLE_C_L", - [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txl R600_Reg128:$SRC_GPR, + [(set v4f32:$DST_GPR, (int_AMDGPU_txl v4f32:$SRC_GPR, imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))] >; def TEX_SAMPLE_LB : R600_TEX < 0x12, "TEX_SAMPLE_LB", - [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txb R600_Reg128:$SRC_GPR, + [(set v4f32:$DST_GPR, (int_AMDGPU_txb v4f32:$SRC_GPR, imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] >; def TEX_SAMPLE_C_LB : R600_TEX < 0x1A, "TEX_SAMPLE_C_LB", - [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txb R600_Reg128:$SRC_GPR, + [(set v4f32:$DST_GPR, (int_AMDGPU_txb v4f32:$SRC_GPR, imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))] >; @@ -1223,32 +1235,22 @@ class MULADD_Common <bits<5> inst> : R600_3OP < class MULADD_IEEE_Common <bits<5> inst> : R600_3OP < inst, "MULADD_IEEE", - [(set (f32 R600_Reg32:$dst), - (fadd (fmul R600_Reg32:$src0, R600_Reg32:$src1), R600_Reg32:$src2))] + [(set f32:$dst, (fadd (fmul f32:$src0, f32:$src1), f32:$src2))] >; class CNDE_Common <bits<5> inst> : R600_3OP < inst, "CNDE", - [(set R600_Reg32:$dst, - (selectcc (f32 R600_Reg32:$src0), FP_ZERO, - (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2), - COND_EQ))] + [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_EQ))] >; class CNDGT_Common <bits<5> inst> : R600_3OP < inst, "CNDGT", - [(set R600_Reg32:$dst, - (selectcc (f32 R600_Reg32:$src0), FP_ZERO, - (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2), - COND_GT))] + [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GT))] >; class CNDGE_Common <bits<5> inst> : R600_3OP < inst, "CNDGE", - [(set R600_Reg32:$dst, - (selectcc (f32 R600_Reg32:$src0), FP_ZERO, - (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2), - COND_GE))] + [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GE))] >; multiclass DOT4_Common <bits<11> inst> { @@ -1256,7 +1258,7 @@ multiclass DOT4_Common <bits<11> inst> { def _pseudo : R600_REDUCTION <inst, (ins R600_Reg128:$src0, R600_Reg128:$src1), "DOT4 $dst $src0, $src1", - [(set R600_Reg32:$dst, (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1))] + [(set f32:$dst, (int_AMDGPU_dp4 v4f32:$src0, v4f32:$src1))] >; def _real : R600_2OP <inst, "DOT4", []>; @@ -1266,11 +1268,10 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { multiclass CUBE_Common <bits<11> inst> { def _pseudo : InstR600 < - inst, (outs R600_Reg128:$dst), (ins R600_Reg128:$src), "CUBE $dst $src", - [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))], + [(set v4f32:$dst, (int_AMDGPU_cube v4f32:$src))], VecALU > { let isPseudo = 1; @@ -1282,23 +1283,38 @@ multiclass CUBE_Common <bits<11> inst> { class EXP_IEEE_Common <bits<11> inst> : R600_1OP_Helper < inst, "EXP_IEEE", fexp2 ->; +> { + let TransOnly = 1; + let Itinerary = TransALU; +} class FLT_TO_INT_Common <bits<11> inst> : R600_1OP_Helper < inst, "FLT_TO_INT", fp_to_sint ->; +> { + let TransOnly = 1; + let Itinerary = TransALU; +} class INT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper < inst, "INT_TO_FLT", sint_to_fp ->; +> { + let TransOnly = 1; + let Itinerary = TransALU; +} class FLT_TO_UINT_Common <bits<11> inst> : R600_1OP_Helper < inst, "FLT_TO_UINT", fp_to_uint ->; +> { + let TransOnly = 1; + let Itinerary = TransALU; +} class UINT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper < inst, "UINT_TO_FLT", uint_to_fp ->; +> { + let TransOnly = 1; + let Itinerary = TransALU; +} class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP < inst, "LOG_CLAMPED", [] @@ -1306,50 +1322,84 @@ class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP < class LOG_IEEE_Common <bits<11> inst> : R600_1OP_Helper < inst, "LOG_IEEE", flog2 ->; +> { + let TransOnly = 1; + let Itinerary = TransALU; +} class LSHL_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHL", shl>; class LSHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHR", srl>; class ASHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "ASHR", sra>; class MULHI_INT_Common <bits<11> inst> : R600_2OP_Helper < inst, "MULHI_INT", mulhs ->; +> { + let TransOnly = 1; + let Itinerary = TransALU; +} class MULHI_UINT_Common <bits<11> inst> : R600_2OP_Helper < inst, "MULHI", mulhu ->; +> { + let TransOnly = 1; + let Itinerary = TransALU; +} class MULLO_INT_Common <bits<11> inst> : R600_2OP_Helper < inst, "MULLO_INT", mul ->; -class MULLO_UINT_Common <bits<11> inst> : R600_2OP <inst, "MULLO_UINT", []>; +> { + let TransOnly = 1; + let Itinerary = TransALU; +} +class MULLO_UINT_Common <bits<11> inst> : R600_2OP <inst, "MULLO_UINT", []> { + let TransOnly = 1; + let Itinerary = TransALU; +} class RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP < inst, "RECIP_CLAMPED", [] ->; +> { + let TransOnly = 1; + let Itinerary = TransALU; +} class RECIP_IEEE_Common <bits<11> inst> : R600_1OP < - inst, "RECIP_IEEE", [(set R600_Reg32:$dst, (fdiv FP_ONE, R600_Reg32:$src0))] ->; + inst, "RECIP_IEEE", [(set f32:$dst, (fdiv FP_ONE, f32:$src0))] +> { + let TransOnly = 1; + let Itinerary = TransALU; +} class RECIP_UINT_Common <bits<11> inst> : R600_1OP_Helper < inst, "RECIP_UINT", AMDGPUurecip ->; +> { + let TransOnly = 1; + let Itinerary = TransALU; +} class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP_Helper < inst, "RECIPSQRT_CLAMPED", int_AMDGPU_rsq ->; +> { + let TransOnly = 1; + let Itinerary = TransALU; +} class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP < inst, "RECIPSQRT_IEEE", [] ->; +> { + let TransOnly = 1; + let Itinerary = TransALU; +} class SIN_Common <bits<11> inst> : R600_1OP < inst, "SIN", []>{ let Trig = 1; + let TransOnly = 1; + let Itinerary = TransALU; } class COS_Common <bits<11> inst> : R600_1OP < inst, "COS", []> { let Trig = 1; + let TransOnly = 1; + let Itinerary = TransALU; } //===----------------------------------------------------------------------===// @@ -1358,19 +1408,20 @@ class COS_Common <bits<11> inst> : R600_1OP < multiclass DIV_Common <InstR600 recip_ieee> { def : Pat< - (int_AMDGPU_div R600_Reg32:$src0, R600_Reg32:$src1), - (MUL_IEEE R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1)) + (int_AMDGPU_div f32:$src0, f32:$src1), + (MUL_IEEE $src0, (recip_ieee $src1)) >; def : Pat< - (fdiv R600_Reg32:$src0, R600_Reg32:$src1), - (MUL_IEEE R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1)) + (fdiv f32:$src0, f32:$src1), + (MUL_IEEE $src0, (recip_ieee $src1)) >; } -class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee> : Pat < - (int_TGSI_lit_z R600_Reg32:$src_x, R600_Reg32:$src_y, R600_Reg32:$src_w), - (exp_ieee (mul_lit (log_clamped (MAX R600_Reg32:$src_y, (f32 ZERO))), R600_Reg32:$src_w, R600_Reg32:$src_x)) +class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee> + : Pat < + (int_TGSI_lit_z f32:$src_x, f32:$src_y, f32:$src_w), + (exp_ieee (mul_lit (log_clamped (MAX $src_y, (f32 ZERO))), $src_w, $src_x)) >; //===----------------------------------------------------------------------===// @@ -1410,14 +1461,13 @@ let Predicates = [isR600] in { def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>; defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>; - def : POW_Common <LOG_IEEE_r600, EXP_IEEE_r600, MUL, R600_Reg32>; + def : POW_Common <LOG_IEEE_r600, EXP_IEEE_r600, MUL>; def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>; - def : Pat<(fsqrt R600_Reg32:$src), - (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_r600 R600_Reg32:$src))>; + def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>; def R600_ExportSwz : ExportSwzInst { - let Word1{20-17} = 1; // BURST_COUNT + let Word1{20-17} = 0; // BURST_COUNT let Word1{21} = eop; let Word1{22} = 1; // VALID_PIXEL_MODE let Word1{30-23} = inst; @@ -1426,25 +1476,77 @@ let Predicates = [isR600] in { defm : ExportPattern<R600_ExportSwz, 39>; def R600_ExportBuf : ExportBufInst { - let Word1{20-17} = 1; // BURST_COUNT + let Word1{20-17} = 0; // BURST_COUNT let Word1{21} = eop; let Word1{22} = 1; // VALID_PIXEL_MODE let Word1{30-23} = inst; let Word1{31} = 1; // BARRIER } defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>; + + def CF_TC_R600 : CF_CLAUSE_R600<1, (ins i32imm:$ADDR, i32imm:$COUNT), + "TEX $COUNT @$ADDR"> { + let POP_COUNT = 0; + } + def CF_VC_R600 : CF_CLAUSE_R600<2, (ins i32imm:$ADDR, i32imm:$COUNT), + "VTX $COUNT @$ADDR"> { + let POP_COUNT = 0; + } + def WHILE_LOOP_R600 : CF_CLAUSE_R600<6, (ins i32imm:$ADDR), + "LOOP_START_DX10 @$ADDR"> { + let POP_COUNT = 0; + let COUNT = 0; + } + def END_LOOP_R600 : CF_CLAUSE_R600<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> { + let POP_COUNT = 0; + let COUNT = 0; + } + def LOOP_BREAK_R600 : CF_CLAUSE_R600<9, (ins i32imm:$ADDR), + "LOOP_BREAK @$ADDR"> { + let POP_COUNT = 0; + let COUNT = 0; + } + def CF_CONTINUE_R600 : CF_CLAUSE_R600<8, (ins i32imm:$ADDR), + "CONTINUE @$ADDR"> { + let POP_COUNT = 0; + let COUNT = 0; + } + def CF_JUMP_R600 : CF_CLAUSE_R600<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT), + "JUMP @$ADDR POP:$POP_COUNT"> { + let COUNT = 0; + } + def CF_ELSE_R600 : CF_CLAUSE_R600<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT), + "ELSE @$ADDR POP:$POP_COUNT"> { + let COUNT = 0; + } + def CF_CALL_FS_R600 : CF_CLAUSE_R600<19, (ins), "CALL_FS"> { + let ADDR = 0; + let COUNT = 0; + let POP_COUNT = 0; + } + def POP_R600 : CF_CLAUSE_R600<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT), + "POP @$ADDR POP:$POP_COUNT"> { + let COUNT = 0; + } + def CF_END_R600 : CF_CLAUSE_R600<0, (ins), "CF_END"> { + let COUNT = 0; + let POP_COUNT = 0; + let ADDR = 0; + let END_OF_PROGRAM = 1; + } + } // Helper pattern for normalizing inputs to triginomic instructions for R700+ // cards. class COS_PAT <InstR600 trig> : Pat< - (fcos R600_Reg32:$src), - (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src)) + (fcos f32:$src), + (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), $src)) >; class SIN_PAT <InstR600 trig> : Pat< - (fsin R600_Reg32:$src), - (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src)) + (fsin f32:$src), + (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), $src)) >; //===----------------------------------------------------------------------===// @@ -1482,11 +1584,10 @@ def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>; def SIN_eg : SIN_Common<0x8D>; def COS_eg : COS_Common<0x8E>; -def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL, R600_Reg32>; +def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL>; def : SIN_PAT <SIN_eg>; def : COS_PAT <COS_eg>; -def : Pat<(fsqrt R600_Reg32:$src), - (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_eg R600_Reg32:$src))>; +def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>; } // End Predicates = [isEG] //===----------------------------------------------------------------------===// @@ -1510,15 +1611,17 @@ let Predicates = [isEGorCayman] in { // (16,8) = (Input << 8) >> 24 = (Input & 0xffffff) >> 16 // (24,8) = (Input << 0) >> 24 = (Input & 0xffffffff) >> 24 def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT", - [(set R600_Reg32:$dst, (int_AMDIL_bit_extract_u32 R600_Reg32:$src0, - R600_Reg32:$src1, - R600_Reg32:$src2))], + [(set i32:$dst, (int_AMDIL_bit_extract_u32 i32:$src0, i32:$src1, + i32:$src2))], VecALU >; + def : BFEPattern <BFE_UINT_eg>; + + def BFI_INT_eg : R600_3OP <0x06, "BFI_INT", [], VecALU>; + defm : BFIPatterns <BFI_INT_eg>; def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", - [(set R600_Reg32:$dst, (AMDGPUbitalign R600_Reg32:$src0, R600_Reg32:$src1, - R600_Reg32:$src2))], + [(set i32:$dst, (AMDGPUbitalign i32:$src0, i32:$src1, i32:$src2))], VecALU >; @@ -1563,14 +1666,15 @@ let hasSideEffects = 1 in { // XXX: Lowering SELECT_CC will sometimes generate fp_to_[su]int nodes, // which do not need to be truncated since the fp values are 0.0f or 1.0f. // We should look into handling these cases separately. - def : Pat<(fp_to_sint R600_Reg32:$src0), - (FLT_TO_INT_eg (TRUNC R600_Reg32:$src0))>; + def : Pat<(fp_to_sint f32:$src0), (FLT_TO_INT_eg (TRUNC $src0))>; + + def : Pat<(fp_to_uint f32:$src0), (FLT_TO_UINT_eg (TRUNC $src0))>; - def : Pat<(fp_to_uint R600_Reg32:$src0), - (FLT_TO_UINT_eg (TRUNC R600_Reg32:$src0))>; + // SHA-256 Patterns + def : SHA256MaPattern <BFI_INT_eg, XOR_INT>; def EG_ExportSwz : ExportSwzInst { - let Word1{19-16} = 1; // BURST_COUNT + let Word1{19-16} = 0; // BURST_COUNT let Word1{20} = 1; // VALID_PIXEL_MODE let Word1{21} = eop; let Word1{29-22} = inst; @@ -1580,7 +1684,7 @@ let hasSideEffects = 1 in { defm : ExportPattern<EG_ExportSwz, 83>; def EG_ExportBuf : ExportBufInst { - let Word1{19-16} = 1; // BURST_COUNT + let Word1{19-16} = 0; // BURST_COUNT let Word1{20} = 1; // VALID_PIXEL_MODE let Word1{21} = eop; let Word1{29-22} = inst; @@ -1589,6 +1693,57 @@ let hasSideEffects = 1 in { } defm : SteamOutputExportPattern<EG_ExportBuf, 0x40, 0x41, 0x42, 0x43>; + def CF_TC_EG : CF_CLAUSE_EG<1, (ins i32imm:$ADDR, i32imm:$COUNT), + "TEX $COUNT @$ADDR"> { + let POP_COUNT = 0; + } + def CF_VC_EG : CF_CLAUSE_EG<2, (ins i32imm:$ADDR, i32imm:$COUNT), + "VTX $COUNT @$ADDR"> { + let POP_COUNT = 0; + } + def WHILE_LOOP_EG : CF_CLAUSE_EG<6, (ins i32imm:$ADDR), + "LOOP_START_DX10 @$ADDR"> { + let POP_COUNT = 0; + let COUNT = 0; + } + def END_LOOP_EG : CF_CLAUSE_EG<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> { + let POP_COUNT = 0; + let COUNT = 0; + } + def LOOP_BREAK_EG : CF_CLAUSE_EG<9, (ins i32imm:$ADDR), + "LOOP_BREAK @$ADDR"> { + let POP_COUNT = 0; + let COUNT = 0; + } + def CF_CONTINUE_EG : CF_CLAUSE_EG<8, (ins i32imm:$ADDR), + "CONTINUE @$ADDR"> { + let POP_COUNT = 0; + let COUNT = 0; + } + def CF_JUMP_EG : CF_CLAUSE_EG<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT), + "JUMP @$ADDR POP:$POP_COUNT"> { + let COUNT = 0; + } + def CF_ELSE_EG : CF_CLAUSE_EG<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT), + "ELSE @$ADDR POP:$POP_COUNT"> { + let COUNT = 0; + } + def CF_CALL_FS_EG : CF_CLAUSE_EG<19, (ins), "CALL_FS"> { + let ADDR = 0; + let COUNT = 0; + let POP_COUNT = 0; + } + def POP_EG : CF_CLAUSE_EG<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT), + "POP @$ADDR POP:$POP_COUNT"> { + let COUNT = 0; + } + def CF_END_EG : CF_CLAUSE_EG<0, (ins), "CF_END"> { + let COUNT = 0; + let POP_COUNT = 0; + let ADDR = 0; + let END_OF_PROGRAM = 1; + } + //===----------------------------------------------------------------------===// // Memory read/write instructions //===----------------------------------------------------------------------===// @@ -1618,14 +1773,14 @@ class RAT_WRITE_CACHELESS_eg <dag ins, bits<4> comp_mask, string name, def RAT_WRITE_CACHELESS_32_eg : RAT_WRITE_CACHELESS_eg < (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), 0x1, "RAT_WRITE_CACHELESS_32_eg", - [(global_store (i32 R600_TReg32_X:$rw_gpr), R600_TReg32_X:$index_gpr)] + [(global_store i32:$rw_gpr, i32:$index_gpr)] >; //128-bit store def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg < (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), 0xf, "RAT_WRITE_CACHELESS_128", - [(global_store (v4i32 R600_Reg128:$rw_gpr), R600_TReg32_X:$index_gpr)] + [(global_store v4i32:$rw_gpr, i32:$index_gpr)] >; class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern> @@ -1679,6 +1834,8 @@ class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern> // VTX_WORD3 (Padding) // // Inst{127-96} = 0; + + let VTXInst = 1; } class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern> @@ -1748,19 +1905,19 @@ class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern> //===----------------------------------------------------------------------===// def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0, - [(set (i32 R600_TReg32_X:$dst), (load_param_zexti8 ADDRVTX_READ:$ptr))] + [(set i32:$dst, (load_param_zexti8 ADDRVTX_READ:$ptr))] >; def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0, - [(set (i32 R600_TReg32_X:$dst), (load_param_zexti16 ADDRVTX_READ:$ptr))] + [(set i32:$dst, (load_param_zexti16 ADDRVTX_READ:$ptr))] >; def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0, - [(set (i32 R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))] + [(set i32:$dst, (load_param ADDRVTX_READ:$ptr))] >; def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0, - [(set (v4i32 R600_Reg128:$dst), (load_param ADDRVTX_READ:$ptr))] + [(set v4i32:$dst, (load_param ADDRVTX_READ:$ptr))] >; //===----------------------------------------------------------------------===// @@ -1769,17 +1926,17 @@ def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0, // 8-bit reads def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1, - [(set (i32 R600_TReg32_X:$dst), (zextloadi8_global ADDRVTX_READ:$ptr))] + [(set i32:$dst, (zextloadi8_global ADDRVTX_READ:$ptr))] >; // 32-bit reads def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1, - [(set (i32 R600_TReg32_X:$dst), (global_load ADDRVTX_READ:$ptr))] + [(set i32:$dst, (global_load ADDRVTX_READ:$ptr))] >; // 128-bit reads def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1, - [(set (v4i32 R600_Reg128:$dst), (global_load ADDRVTX_READ:$ptr))] + [(set v4i32:$dst, (global_load ADDRVTX_READ:$ptr))] >; //===----------------------------------------------------------------------===// @@ -1788,7 +1945,7 @@ def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1, //===----------------------------------------------------------------------===// def CONSTANT_LOAD_eg : VTX_READ_32_eg <1, - [(set (i32 R600_TReg32_X:$dst), (constant_load ADDRVTX_READ:$ptr))] + [(set i32:$dst, (constant_load ADDRVTX_READ:$ptr))] >; } @@ -1818,22 +1975,27 @@ def SIN_cm : SIN_Common<0x8D>; def COS_cm : COS_Common<0x8E>; } // End isVector = 1 -def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL, R600_Reg32>; +def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>; def : SIN_PAT <SIN_cm>; def : COS_PAT <COS_cm>; defm DIV_cm : DIV_Common<RECIP_IEEE_cm>; // RECIP_UINT emulation for Cayman +// The multiplication scales from [0,1] to the unsigned integer range def : Pat < - (AMDGPUurecip R600_Reg32:$src0), - (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg R600_Reg32:$src0)), - (MOV_IMM_I32 0x4f800000))) + (AMDGPUurecip i32:$src0), + (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg $src0)), + (MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1))) >; + def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> { + let ADDR = 0; + let POP_COUNT = 0; + let COUNT = 0; + } -def : Pat<(fsqrt R600_Reg32:$src), - (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm R600_Reg32:$src))>; +def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>; } // End isCayman @@ -1855,21 +2017,21 @@ def PREDICATED_BREAK : ILFormat<(outs), (ins GPRI32:$src), let isPseudo = 1 in { def PRED_X : InstR600 < - 0, (outs R600_Predicate_Bit:$dst), + (outs R600_Predicate_Bit:$dst), (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags), "", [], NullALU> { let FlagOperandIdx = 3; } let isTerminator = 1, isBranch = 1 in { -def JUMP_COND : InstR600 <0x10, +def JUMP_COND : InstR600 < (outs), (ins brtarget:$target, R600_Predicate_Bit:$p), "JUMP $target ($p)", [], AnyALU >; -def JUMP : InstR600 <0x10, +def JUMP : InstR600 < (outs), (ins brtarget:$target), "JUMP $target", @@ -1896,20 +2058,28 @@ def MASK_WRITE : AMDGPUShaderInst < } // End mayLoad = 0, mayStore = 0, hasSideEffects = 1 -def TXD: AMDGPUShaderInst < +def TXD: InstR600 < (outs R600_Reg128:$dst), - (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget), + (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, + i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget), "TXD $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget", - [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] ->; + [(set v4f32:$dst, (int_AMDGPU_txd v4f32:$src0, v4f32:$src1, v4f32:$src2, + imm:$resourceId, imm:$samplerId, imm:$textureTarget))], + NullALU > { + let TEXInst = 1; +} -def TXD_SHADOW: AMDGPUShaderInst < +def TXD_SHADOW: InstR600 < (outs R600_Reg128:$dst), - (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget), + (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, + i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget), "TXD_SHADOW $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget", - [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))] ->; - + [(set v4f32:$dst, (int_AMDGPU_txd v4f32:$src0, v4f32:$src1, v4f32:$src2, + imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))], + NullALU +> { + let TEXInst = 1; +} } // End isPseudo = 1 } // End usesCustomInserter = 1 @@ -1946,7 +2116,7 @@ def CONST_COPY : Instruction { def TEX_VTX_CONSTBUF : InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "VTX_READ_eg $dst, $ptr", - [(set R600_Reg128:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 imm:$BUFFER_ID)))]>, + [(set v4i32:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 imm:$BUFFER_ID)))]>, VTX_WORD1_GPR, VTX_WORD0 { let VC_INST = 0; @@ -1995,11 +2165,12 @@ def TEX_VTX_CONSTBUF : // VTX_WORD3 (Padding) // // Inst{127-96} = 0; + let VTXInst = 1; } def TEX_VTX_TEXBUF: InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "TEX_VTX_EXPLICIT_READ $dst, $ptr", - [(set R600_Reg128:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>, + [(set v4f32:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>, VTX_WORD1_GPR, VTX_WORD0 { let VC_INST = 0; @@ -2048,6 +2219,7 @@ let Inst{63-32} = Word1; // VTX_WORD3 (Padding) // // Inst{127-96} = 0; + let VTXInst = 1; } @@ -2124,9 +2296,8 @@ let isTerminator=1 in { // CND*_INT Pattterns for f32 True / False values class CND_INT_f32 <InstR600 cnd, CondCode cc> : Pat < - (selectcc (i32 R600_Reg32:$src0), 0, (f32 R600_Reg32:$src1), - R600_Reg32:$src2, cc), - (cnd R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2) + (selectcc i32:$src0, 0, f32:$src1, f32:$src2, cc), + (cnd $src0, $src1, $src2) >; def : CND_INT_f32 <CNDE_INT, SETEQ>; @@ -2135,9 +2306,8 @@ def : CND_INT_f32 <CNDGE_INT, SETGE>; //CNDGE_INT extra pattern def : Pat < - (selectcc (i32 R600_Reg32:$src0), -1, (i32 R600_Reg32:$src1), - (i32 R600_Reg32:$src2), COND_GT), - (CNDGE_INT R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2) + (selectcc i32:$src0, -1, i32:$src1, i32:$src2, COND_GT), + (CNDGE_INT $src0, $src1, $src2) >; // KIL Patterns @@ -2147,56 +2317,56 @@ def KILP : Pat < >; def KIL : Pat < - (int_AMDGPU_kill R600_Reg32:$src0), - (MASK_WRITE (KILLGT (f32 ZERO), (f32 R600_Reg32:$src0))) + (int_AMDGPU_kill f32:$src0), + (MASK_WRITE (KILLGT (f32 ZERO), $src0)) >; // SGT Reverse args def : Pat < - (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LT), - (SGT R600_Reg32:$src1, R600_Reg32:$src0) + (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_LT), + (SGT $src1, $src0) >; // SGE Reverse args def : Pat < - (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LE), - (SGE R600_Reg32:$src1, R600_Reg32:$src0) + (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_LE), + (SGE $src1, $src0) >; // SETGT_DX10 reverse args def : Pat < - (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, COND_LT), - (SETGT_DX10 R600_Reg32:$src1, R600_Reg32:$src0) + (selectcc f32:$src0, f32:$src1, -1, 0, COND_LT), + (SETGT_DX10 $src1, $src0) >; // SETGE_DX10 reverse args def : Pat < - (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, COND_LE), - (SETGE_DX10 R600_Reg32:$src1, R600_Reg32:$src0) + (selectcc f32:$src0, f32:$src1, -1, 0, COND_LE), + (SETGE_DX10 $src1, $src0) >; // SETGT_INT reverse args def : Pat < - (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLT), - (SETGT_INT R600_Reg32:$src1, R600_Reg32:$src0) + (selectcc i32:$src0, i32:$src1, -1, 0, SETLT), + (SETGT_INT $src1, $src0) >; // SETGE_INT reverse args def : Pat < - (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLE), - (SETGE_INT R600_Reg32:$src1, R600_Reg32:$src0) + (selectcc i32:$src0, i32:$src1, -1, 0, SETLE), + (SETGE_INT $src1, $src0) >; // SETGT_UINT reverse args def : Pat < - (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULT), - (SETGT_UINT R600_Reg32:$src1, R600_Reg32:$src0) + (selectcc i32:$src0, i32:$src1, -1, 0, SETULT), + (SETGT_UINT $src1, $src0) >; // SETGE_UINT reverse args def : Pat < - (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULE), - (SETGE_UINT R600_Reg32:$src1, R600_Reg32:$src0) + (selectcc i32:$src0, i32:$src1, -1, 0, SETULE), + (SETGE_UINT $src1, $src0) >; // The next two patterns are special cases for handling 'true if ordered' and @@ -2209,50 +2379,50 @@ def : Pat < //SETE - 'true if ordered' def : Pat < - (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETO), - (SETE R600_Reg32:$src0, R600_Reg32:$src1) + (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, SETO), + (SETE $src0, $src1) >; //SETE_DX10 - 'true if ordered' def : Pat < - (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETO), - (SETE_DX10 R600_Reg32:$src0, R600_Reg32:$src1) + (selectcc f32:$src0, f32:$src1, -1, 0, SETO), + (SETE_DX10 $src0, $src1) >; //SNE - 'true if unordered' def : Pat < - (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETUO), - (SNE R600_Reg32:$src0, R600_Reg32:$src1) + (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, SETUO), + (SNE $src0, $src1) >; //SETNE_DX10 - 'true if ordered' def : Pat < - (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUO), - (SETNE_DX10 R600_Reg32:$src0, R600_Reg32:$src1) + (selectcc f32:$src0, f32:$src1, -1, 0, SETUO), + (SETNE_DX10 $src0, $src1) >; -def : Extract_Element <f32, v4f32, R600_Reg128, 0, sub0>; -def : Extract_Element <f32, v4f32, R600_Reg128, 1, sub1>; -def : Extract_Element <f32, v4f32, R600_Reg128, 2, sub2>; -def : Extract_Element <f32, v4f32, R600_Reg128, 3, sub3>; +def : Extract_Element <f32, v4f32, 0, sub0>; +def : Extract_Element <f32, v4f32, 1, sub1>; +def : Extract_Element <f32, v4f32, 2, sub2>; +def : Extract_Element <f32, v4f32, 3, sub3>; -def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 0, sub0>; -def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 1, sub1>; -def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 2, sub2>; -def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 3, sub3>; +def : Insert_Element <f32, v4f32, 0, sub0>; +def : Insert_Element <f32, v4f32, 1, sub1>; +def : Insert_Element <f32, v4f32, 2, sub2>; +def : Insert_Element <f32, v4f32, 3, sub3>; -def : Extract_Element <i32, v4i32, R600_Reg128, 0, sub0>; -def : Extract_Element <i32, v4i32, R600_Reg128, 1, sub1>; -def : Extract_Element <i32, v4i32, R600_Reg128, 2, sub2>; -def : Extract_Element <i32, v4i32, R600_Reg128, 3, sub3>; +def : Extract_Element <i32, v4i32, 0, sub0>; +def : Extract_Element <i32, v4i32, 1, sub1>; +def : Extract_Element <i32, v4i32, 2, sub2>; +def : Extract_Element <i32, v4i32, 3, sub3>; -def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 0, sub0>; -def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sub1>; -def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sub2>; -def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sub3>; +def : Insert_Element <i32, v4i32, 0, sub0>; +def : Insert_Element <i32, v4i32, 1, sub1>; +def : Insert_Element <i32, v4i32, 2, sub2>; +def : Insert_Element <i32, v4i32, 3, sub3>; -def : Vector4_Build <v4f32, R600_Reg128, f32, R600_Reg32>; -def : Vector4_Build <v4i32, R600_Reg128, i32, R600_Reg32>; +def : Vector4_Build <v4f32, f32>; +def : Vector4_Build <v4i32, i32>; // bitconvert patterns diff --git a/contrib/llvm/lib/Target/R600/R600MachineFunctionInfo.h b/contrib/llvm/lib/Target/R600/R600MachineFunctionInfo.h index 99c1f91..70fddbb 100644 --- a/contrib/llvm/lib/Target/R600/R600MachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/R600/R600MachineFunctionInfo.h @@ -25,6 +25,7 @@ public: R600MachineFunctionInfo(const MachineFunction &MF); SmallVector<unsigned, 4> LiveOuts; std::vector<unsigned> IndirectRegs; + unsigned StackSize; }; } // End llvm namespace diff --git a/contrib/llvm/lib/Target/R600/R600Packetizer.cpp b/contrib/llvm/lib/Target/R600/R600Packetizer.cpp new file mode 100644 index 0000000..cd7b7d0 --- /dev/null +++ b/contrib/llvm/lib/Target/R600/R600Packetizer.cpp @@ -0,0 +1,459 @@ +//===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This pass implements instructions packetization for R600. It unsets isLast +/// bit of instructions inside a bundle and substitutes src register with +/// PreviousVector when applicable. +// +//===----------------------------------------------------------------------===// + +#ifndef R600PACKETIZER_CPP +#define R600PACKETIZER_CPP + +#define DEBUG_TYPE "packets" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/CodeGen/DFAPacketizer.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "AMDGPU.h" +#include "R600InstrInfo.h" + +namespace llvm { + +class R600Packetizer : public MachineFunctionPass { + +public: + static char ID; + R600Packetizer(const TargetMachine &TM) : MachineFunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + const char *getPassName() const { + return "R600 Packetizer"; + } + + bool runOnMachineFunction(MachineFunction &Fn); +}; +char R600Packetizer::ID = 0; + +class R600PacketizerList : public VLIWPacketizerList { + +private: + const R600InstrInfo *TII; + const R600RegisterInfo &TRI; + + enum BankSwizzle { + ALU_VEC_012 = 0, + ALU_VEC_021, + ALU_VEC_120, + ALU_VEC_102, + ALU_VEC_201, + ALU_VEC_210 + }; + + unsigned getSlot(const MachineInstr *MI) const { + return TRI.getHWRegChan(MI->getOperand(0).getReg()); + } + + /// \returns register to PV chan mapping for bundle/single instructions that + /// immediatly precedes I. + DenseMap<unsigned, unsigned> getPreviousVector(MachineBasicBlock::iterator I) + const { + DenseMap<unsigned, unsigned> Result; + I--; + if (!TII->isALUInstr(I->getOpcode()) && !I->isBundle()) + return Result; + MachineBasicBlock::instr_iterator BI = I.getInstrIterator(); + if (I->isBundle()) + BI++; + do { + if (TII->isPredicated(BI)) + continue; + if (TII->isTransOnly(BI)) + continue; + int OperandIdx = TII->getOperandIdx(BI->getOpcode(), R600Operands::WRITE); + if (OperandIdx < 0) + continue; + if (BI->getOperand(OperandIdx).getImm() == 0) + continue; + unsigned Dst = BI->getOperand(0).getReg(); + if (BI->getOpcode() == AMDGPU::DOT4_r600_real) { + Result[Dst] = AMDGPU::PV_X; + continue; + } + unsigned PVReg = 0; + switch (TRI.getHWRegChan(Dst)) { + case 0: + PVReg = AMDGPU::PV_X; + break; + case 1: + PVReg = AMDGPU::PV_Y; + break; + case 2: + PVReg = AMDGPU::PV_Z; + break; + case 3: + PVReg = AMDGPU::PV_W; + break; + default: + llvm_unreachable("Invalid Chan"); + } + Result[Dst] = PVReg; + } while ((++BI)->isBundledWithPred()); + return Result; + } + + void substitutePV(MachineInstr *MI, const DenseMap<unsigned, unsigned> &PVs) + const { + R600Operands::Ops Ops[] = { + R600Operands::SRC0, + R600Operands::SRC1, + R600Operands::SRC2 + }; + for (unsigned i = 0; i < 3; i++) { + int OperandIdx = TII->getOperandIdx(MI->getOpcode(), Ops[i]); + if (OperandIdx < 0) + continue; + unsigned Src = MI->getOperand(OperandIdx).getReg(); + const DenseMap<unsigned, unsigned>::const_iterator It = PVs.find(Src); + if (It != PVs.end()) + MI->getOperand(OperandIdx).setReg(It->second); + } + } +public: + // Ctor. + R600PacketizerList(MachineFunction &MF, MachineLoopInfo &MLI, + MachineDominatorTree &MDT) + : VLIWPacketizerList(MF, MLI, MDT, true), + TII (static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo())), + TRI(TII->getRegisterInfo()) { } + + // initPacketizerState - initialize some internal flags. + void initPacketizerState() { } + + // ignorePseudoInstruction - Ignore bundling of pseudo instructions. + bool ignorePseudoInstruction(MachineInstr *MI, MachineBasicBlock *MBB) { + return false; + } + + // isSoloInstruction - return true if instruction MI can not be packetized + // with any other instruction, which means that MI itself is a packet. + bool isSoloInstruction(MachineInstr *MI) { + if (TII->isVector(*MI)) + return true; + if (!TII->isALUInstr(MI->getOpcode())) + return true; + if (TII->get(MI->getOpcode()).TSFlags & R600_InstFlag::TRANS_ONLY) + return true; + if (TII->isTransOnly(MI)) + return true; + return false; + } + + // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ + // together. + bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { + MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr(); + if (getSlot(MII) <= getSlot(MIJ)) + return false; + // Does MII and MIJ share the same pred_sel ? + int OpI = TII->getOperandIdx(MII->getOpcode(), R600Operands::PRED_SEL), + OpJ = TII->getOperandIdx(MIJ->getOpcode(), R600Operands::PRED_SEL); + unsigned PredI = (OpI > -1)?MII->getOperand(OpI).getReg():0, + PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg():0; + if (PredI != PredJ) + return false; + if (SUJ->isSucc(SUI)) { + for (unsigned i = 0, e = SUJ->Succs.size(); i < e; ++i) { + const SDep &Dep = SUJ->Succs[i]; + if (Dep.getSUnit() != SUI) + continue; + if (Dep.getKind() == SDep::Anti) + continue; + if (Dep.getKind() == SDep::Output) + if (MII->getOperand(0).getReg() != MIJ->getOperand(0).getReg()) + continue; + return false; + } + } + return true; + } + + // isLegalToPruneDependencies - Is it legal to prune dependece between SUI + // and SUJ. + bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {return false;} + + void setIsLastBit(MachineInstr *MI, unsigned Bit) const { + unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), R600Operands::LAST); + MI->getOperand(LastOp).setImm(Bit); + } + + MachineBasicBlock::iterator addToPacket(MachineInstr *MI) { + CurrentPacketMIs.push_back(MI); + bool FitsConstLimits = TII->canBundle(CurrentPacketMIs); + DEBUG( + if (!FitsConstLimits) { + dbgs() << "Couldn't pack :\n"; + MI->dump(); + dbgs() << "with the following packets :\n"; + for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) { + CurrentPacketMIs[i]->dump(); + dbgs() << "\n"; + } + dbgs() << "because of Consts read limitations\n"; + }); + const DenseMap<unsigned, unsigned> &PV = + getPreviousVector(CurrentPacketMIs.front()); + bool FitsReadPortLimits = fitsReadPortLimitation(CurrentPacketMIs, PV); + DEBUG( + if (!FitsReadPortLimits) { + dbgs() << "Couldn't pack :\n"; + MI->dump(); + dbgs() << "with the following packets :\n"; + for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) { + CurrentPacketMIs[i]->dump(); + dbgs() << "\n"; + } + dbgs() << "because of Read port limitations\n"; + }); + bool isBundlable = FitsConstLimits && FitsReadPortLimits; + CurrentPacketMIs.pop_back(); + if (!isBundlable) { + endPacket(MI->getParent(), MI); + substitutePV(MI, getPreviousVector(MI)); + return VLIWPacketizerList::addToPacket(MI); + } + if (!CurrentPacketMIs.empty()) + setIsLastBit(CurrentPacketMIs.back(), 0); + substitutePV(MI, PV); + return VLIWPacketizerList::addToPacket(MI); + } +private: + std::vector<std::pair<int, unsigned> > + ExtractSrcs(const MachineInstr *MI, const DenseMap<unsigned, unsigned> &PV) + const { + R600Operands::Ops Ops[] = { + R600Operands::SRC0, + R600Operands::SRC1, + R600Operands::SRC2 + }; + std::vector<std::pair<int, unsigned> > Result; + for (unsigned i = 0; i < 3; i++) { + int OperandIdx = TII->getOperandIdx(MI->getOpcode(), Ops[i]); + if (OperandIdx < 0){ + Result.push_back(std::pair<int, unsigned>(-1,0)); + continue; + } + unsigned Src = MI->getOperand(OperandIdx).getReg(); + if (PV.find(Src) != PV.end()) { + Result.push_back(std::pair<int, unsigned>(-1,0)); + continue; + } + unsigned Reg = TRI.getEncodingValue(Src) & 0xff; + if (Reg > 127) { + Result.push_back(std::pair<int, unsigned>(-1,0)); + continue; + } + unsigned Chan = TRI.getHWRegChan(Src); + Result.push_back(std::pair<int, unsigned>(Reg, Chan)); + } + return Result; + } + + std::vector<std::pair<int, unsigned> > + Swizzle(std::vector<std::pair<int, unsigned> > Src, + BankSwizzle Swz) const { + switch (Swz) { + case ALU_VEC_012: + break; + case ALU_VEC_021: + std::swap(Src[1], Src[2]); + break; + case ALU_VEC_102: + std::swap(Src[0], Src[1]); + break; + case ALU_VEC_120: + std::swap(Src[0], Src[1]); + std::swap(Src[0], Src[2]); + break; + case ALU_VEC_201: + std::swap(Src[0], Src[2]); + std::swap(Src[0], Src[1]); + break; + case ALU_VEC_210: + std::swap(Src[0], Src[2]); + break; + } + return Src; + } + + bool isLegal(const std::vector<MachineInstr *> &IG, + const std::vector<BankSwizzle> &Swz, + const DenseMap<unsigned, unsigned> &PV) const { + assert (Swz.size() == IG.size()); + int Vector[4][3]; + memset(Vector, -1, sizeof(Vector)); + for (unsigned i = 0, e = IG.size(); i < e; i++) { + const std::vector<std::pair<int, unsigned> > &Srcs = + Swizzle(ExtractSrcs(IG[i], PV), Swz[i]); + for (unsigned j = 0; j < 3; j++) { + const std::pair<int, unsigned> &Src = Srcs[j]; + if (Src.first < 0) + continue; + if (Vector[Src.second][j] < 0) + Vector[Src.second][j] = Src.first; + if (Vector[Src.second][j] != Src.first) + return false; + } + } + return true; + } + + bool recursiveFitsFPLimitation( + std::vector<MachineInstr *> IG, + const DenseMap<unsigned, unsigned> &PV, + std::vector<BankSwizzle> &SwzCandidate, + std::vector<MachineInstr *> CurrentlyChecked) + const { + if (!isLegal(CurrentlyChecked, SwzCandidate, PV)) + return false; + if (IG.size() == CurrentlyChecked.size()) { + return true; + } + BankSwizzle AvailableSwizzle[] = { + ALU_VEC_012, + ALU_VEC_021, + ALU_VEC_120, + ALU_VEC_102, + ALU_VEC_201, + ALU_VEC_210 + }; + CurrentlyChecked.push_back(IG[CurrentlyChecked.size()]); + for (unsigned i = 0; i < 6; i++) { + SwzCandidate.push_back(AvailableSwizzle[i]); + if (recursiveFitsFPLimitation(IG, PV, SwzCandidate, CurrentlyChecked)) + return true; + SwzCandidate.pop_back(); + } + return false; + } + + bool fitsReadPortLimitation( + std::vector<MachineInstr *> IG, + const DenseMap<unsigned, unsigned> &PV) + const { + //Todo : support shared src0 - src1 operand + std::vector<BankSwizzle> SwzCandidate; + bool Result = recursiveFitsFPLimitation(IG, PV, SwzCandidate, + std::vector<MachineInstr *>()); + if (!Result) + return false; + for (unsigned i = 0, e = IG.size(); i < e; i++) { + MachineInstr *MI = IG[i]; + unsigned Op = TII->getOperandIdx(MI->getOpcode(), + R600Operands::BANK_SWIZZLE); + MI->getOperand(Op).setImm(SwzCandidate[i]); + } + return true; + } +}; + +bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) { + const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo(); + MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); + MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); + + // Instantiate the packetizer. + R600PacketizerList Packetizer(Fn, MLI, MDT); + + // DFA state table should not be empty. + assert(Packetizer.getResourceTracker() && "Empty DFA table!"); + + // + // Loop over all basic blocks and remove KILL pseudo-instructions + // These instructions confuse the dependence analysis. Consider: + // D0 = ... (Insn 0) + // R0 = KILL R0, D0 (Insn 1) + // R0 = ... (Insn 2) + // Here, Insn 1 will result in the dependence graph not emitting an output + // dependence between Insn 0 and Insn 2. This can lead to incorrect + // packetization + // + for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); + MBB != MBBe; ++MBB) { + MachineBasicBlock::iterator End = MBB->end(); + MachineBasicBlock::iterator MI = MBB->begin(); + while (MI != End) { + if (MI->isKill()) { + MachineBasicBlock::iterator DeleteMI = MI; + ++MI; + MBB->erase(DeleteMI); + End = MBB->end(); + continue; + } + ++MI; + } + } + + // Loop over all of the basic blocks. + for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); + MBB != MBBe; ++MBB) { + // Find scheduling regions and schedule / packetize each region. + unsigned RemainingCount = MBB->size(); + for(MachineBasicBlock::iterator RegionEnd = MBB->end(); + RegionEnd != MBB->begin();) { + // The next region starts above the previous region. Look backward in the + // instruction stream until we find the nearest boundary. + MachineBasicBlock::iterator I = RegionEnd; + for(;I != MBB->begin(); --I, --RemainingCount) { + if (TII->isSchedulingBoundary(llvm::prior(I), MBB, Fn)) + break; + } + I = MBB->begin(); + + // Skip empty scheduling regions. + if (I == RegionEnd) { + RegionEnd = llvm::prior(RegionEnd); + --RemainingCount; + continue; + } + // Skip regions with one instruction. + if (I == llvm::prior(RegionEnd)) { + RegionEnd = llvm::prior(RegionEnd); + continue; + } + + Packetizer.PacketizeMIs(MBB, I, RegionEnd); + RegionEnd = I; + } + } + + return true; + +} + +} + +llvm::FunctionPass *llvm::createR600Packetizer(TargetMachine &tm) { + return new R600Packetizer(tm); +} + +#endif // R600PACKETIZER_CPP diff --git a/contrib/llvm/lib/Target/R600/R600RegisterInfo.td b/contrib/llvm/lib/Target/R600/R600RegisterInfo.td index 03f4976..bfc546b 100644 --- a/contrib/llvm/lib/Target/R600/R600RegisterInfo.td +++ b/contrib/llvm/lib/Target/R600/R600RegisterInfo.td @@ -88,8 +88,14 @@ def NEG_ONE : R600Reg<"-1.0", 249>; def ONE_INT : R600Reg<"1", 250>; def HALF : R600Reg<"0.5", 252>; def NEG_HALF : R600Reg<"-0.5", 252>; -def ALU_LITERAL_X : R600Reg<"literal.x", 253>; -def PV_X : R600Reg<"pv.x", 254>; +def ALU_LITERAL_X : R600RegWithChan<"literal.x", 253, "X">; +def ALU_LITERAL_Y : R600RegWithChan<"literal.y", 253, "Y">; +def ALU_LITERAL_Z : R600RegWithChan<"literal.z", 253, "Z">; +def ALU_LITERAL_W : R600RegWithChan<"literal.w", 253, "W">; +def PV_X : R600RegWithChan<"PV.x", 254, "X">; +def PV_Y : R600RegWithChan<"PV.y", 254, "Y">; +def PV_Z : R600RegWithChan<"PV.z", 254, "Z">; +def PV_W : R600RegWithChan<"PV.w", 254, "W">; def PREDICATE_BIT : R600Reg<"PredicateBit", 0>; def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>; def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>; diff --git a/contrib/llvm/lib/Target/R600/R600Schedule.td b/contrib/llvm/lib/Target/R600/R600Schedule.td index 7ede181..78a460a 100644 --- a/contrib/llvm/lib/Target/R600/R600Schedule.td +++ b/contrib/llvm/lib/Target/R600/R600Schedule.td @@ -24,7 +24,7 @@ def AnyALU : InstrItinClass; def VecALU : InstrItinClass; def TransALU : InstrItinClass; -def R600_EG_Itin : ProcessorItineraries < +def R600_VLIW5_Itin : ProcessorItineraries < [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS, ALU_NULL], [], [ @@ -34,3 +34,14 @@ def R600_EG_Itin : ProcessorItineraries < InstrItinData<NullALU, [InstrStage<1, [ALU_NULL]>]> ] >; + +def R600_VLIW4_Itin : ProcessorItineraries < + [ALU_X, ALU_Y, ALU_Z, ALU_W, ALU_NULL], + [], + [ + InstrItinData<AnyALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_Z, ALU_W]>]>, + InstrItinData<VecALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_X, ALU_W]>]>, + InstrItinData<TransALU, [InstrStage<1, [ALU_NULL]>]>, + InstrItinData<NullALU, [InstrStage<1, [ALU_NULL]>]> + ] +>; diff --git a/contrib/llvm/lib/Target/R600/SIDefines.h b/contrib/llvm/lib/Target/R600/SIDefines.h new file mode 100644 index 0000000..716b093 --- /dev/null +++ b/contrib/llvm/lib/Target/R600/SIDefines.h @@ -0,0 +1,22 @@ +//===-- SIDefines.h - SI Helper Macros ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +/// \file +//===----------------------------------------------------------------------===// + +#ifndef SIDEFINES_H_ +#define SIDEFINES_H_ + +#define R_00B028_SPI_SHADER_PGM_RSRC1_PS 0x00B028 +#define R_00B128_SPI_SHADER_PGM_RSRC1_VS 0x00B128 +#define R_00B228_SPI_SHADER_PGM_RSRC1_GS 0x00B228 +#define R_00B848_COMPUTE_PGM_RSRC1 0x00B848 +#define S_00B028_VGPRS(x) (((x) & 0x3F) << 0) +#define S_00B028_SGPRS(x) (((x) & 0x0F) << 6) +#define R_0286CC_SPI_PS_INPUT_ENA 0x0286CC + +#endif // SIDEFINES_H_ diff --git a/contrib/llvm/lib/Target/R600/SIISelLowering.cpp b/contrib/llvm/lib/Target/R600/SIISelLowering.cpp index 6f0c307..6bd82a5 100644 --- a/contrib/llvm/lib/Target/R600/SIISelLowering.cpp +++ b/contrib/llvm/lib/Target/R600/SIISelLowering.cpp @@ -49,6 +49,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : addRegisterClass(MVT::v4i32, &AMDGPU::VReg_128RegClass); addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass); + addRegisterClass(MVT::i128, &AMDGPU::SReg_128RegClass); addRegisterClass(MVT::v8i32, &AMDGPU::VReg_256RegClass); addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass); @@ -70,6 +71,10 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + + setOperationAction(ISD::STORE, MVT::i32, Custom); + setOperationAction(ISD::STORE, MVT::i64, Custom); + setTargetDAGCombine(ISD::SELECT_CC); setTargetDAGCombine(ISD::SETCC); @@ -234,6 +239,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::STORE: return LowerSTORE(Op, DAG); } return SDValue(); } @@ -332,6 +338,32 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND, return Chain; } +#define RSRC_DATA_FORMAT 0xf00000000000 + +SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { + StoreSDNode *StoreNode = cast<StoreSDNode>(Op); + SDValue Chain = Op.getOperand(0); + SDValue Value = Op.getOperand(1); + SDValue VirtualAddress = Op.getOperand(2); + DebugLoc DL = Op.getDebugLoc(); + + if (StoreNode->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS) { + return SDValue(); + } + + SDValue SrcSrc = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, + DAG.getConstant(0, MVT::i64), + DAG.getConstant(RSRC_DATA_FORMAT, MVT::i64)); + + SDValue Ops[2]; + Ops[0] = DAG.getNode(AMDGPUISD::BUFFER_STORE, DL, MVT::Other, Chain, + Value, SrcSrc, VirtualAddress); + Ops[1] = Chain; + + return DAG.getMergeValues(Ops, 2, DL); + +} + SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); @@ -424,9 +456,12 @@ int32_t SITargetLowering::analyzeImmediate(const SDNode *N) const { float F; } Imm; - if (const ConstantSDNode *Node = dyn_cast<ConstantSDNode>(N)) + if (const ConstantSDNode *Node = dyn_cast<ConstantSDNode>(N)) { + if (Node->getZExtValue() >> 32) { + return -1; + } Imm.I = Node->getSExtValue(); - else if (const ConstantFPSDNode *Node = dyn_cast<ConstantFPSDNode>(N)) + } else if (const ConstantFPSDNode *Node = dyn_cast<ConstantFPSDNode>(N)) Imm.F = Node->getValueAPF().convertToFloat(); else return -1; // It isn't an immediate @@ -534,8 +569,9 @@ void SITargetLowering::ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand, Operand = SDValue(Node, 0); } -SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, - SelectionDAG &DAG) const { +/// \brief Try to fold the Nodes operands into the Node +SDNode *SITargetLowering::foldOperands(MachineSDNode *Node, + SelectionDAG &DAG) const { // Original encoding (either e32 or e64) int Opcode = Node->getMachineOpcode(); @@ -666,5 +702,116 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, // Create a complete new instruction return DAG.getMachineNode(Desc->Opcode, Node->getDebugLoc(), - Node->getVTList(), Ops.data(), Ops.size()); + Node->getVTList(), Ops); +} + +/// \brief Helper function for adjustWritemask +unsigned SubIdx2Lane(unsigned Idx) { + switch (Idx) { + default: return 0; + case AMDGPU::sub0: return 0; + case AMDGPU::sub1: return 1; + case AMDGPU::sub2: return 2; + case AMDGPU::sub3: return 3; + } +} + +/// \brief Adjust the writemask of MIMG instructions +void SITargetLowering::adjustWritemask(MachineSDNode *&Node, + SelectionDAG &DAG) const { + SDNode *Users[4] = { }; + unsigned Writemask = 0, Lane = 0; + + // Try to figure out the used register components + for (SDNode::use_iterator I = Node->use_begin(), E = Node->use_end(); + I != E; ++I) { + + // Abort if we can't understand the usage + if (!I->isMachineOpcode() || + I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG) + return; + + Lane = SubIdx2Lane(I->getConstantOperandVal(1)); + + // Abort if we have more than one user per component + if (Users[Lane]) + return; + + Users[Lane] = *I; + Writemask |= 1 << Lane; + } + + // Abort if all components are used + if (Writemask == 0xf) + return; + + // Adjust the writemask in the node + std::vector<SDValue> Ops; + Ops.push_back(DAG.getTargetConstant(Writemask, MVT::i32)); + for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) + Ops.push_back(Node->getOperand(i)); + Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size()); + + // If we only got one lane, replace it with a copy + if (Writemask == (1U << Lane)) { + SDValue RC = DAG.getTargetConstant(AMDGPU::VReg_32RegClassID, MVT::i32); + SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, + DebugLoc(), Users[Lane]->getValueType(0), + SDValue(Node, 0), RC); + DAG.ReplaceAllUsesWith(Users[Lane], Copy); + return; + } + + // Update the users of the node with the new indices + for (unsigned i = 0, Idx = AMDGPU::sub0; i < 4; ++i) { + + SDNode *User = Users[i]; + if (!User) + continue; + + SDValue Op = DAG.getTargetConstant(Idx, MVT::i32); + DAG.UpdateNodeOperands(User, User->getOperand(0), Op); + + switch (Idx) { + default: break; + case AMDGPU::sub0: Idx = AMDGPU::sub1; break; + case AMDGPU::sub1: Idx = AMDGPU::sub2; break; + case AMDGPU::sub2: Idx = AMDGPU::sub3; break; + } + } +} + +/// \brief Fold the instructions after slecting them +SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, + SelectionDAG &DAG) const { + + if (AMDGPU::isMIMG(Node->getMachineOpcode()) != -1) + adjustWritemask(Node, DAG); + + return foldOperands(Node, DAG); +} + +/// \brief Assign the register class depending on the number of +/// bits set in the writemask +void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, + SDNode *Node) const { + if (AMDGPU::isMIMG(MI->getOpcode()) == -1) + return; + + unsigned VReg = MI->getOperand(0).getReg(); + unsigned Writemask = MI->getOperand(1).getImm(); + unsigned BitsSet = 0; + for (unsigned i = 0; i < 4; ++i) + BitsSet += Writemask & (1 << i) ? 1 : 0; + + const TargetRegisterClass *RC; + switch (BitsSet) { + default: return; + case 1: RC = &AMDGPU::VReg_32RegClass; break; + case 2: RC = &AMDGPU::VReg_64RegClass; break; + case 3: RC = &AMDGPU::VReg_96RegClass; break; + } + + MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + MRI.setRegClass(VReg, RC); } diff --git a/contrib/llvm/lib/Target/R600/SIISelLowering.h b/contrib/llvm/lib/Target/R600/SIISelLowering.h index 5ad2f40..de637be 100644 --- a/contrib/llvm/lib/Target/R600/SIISelLowering.h +++ b/contrib/llvm/lib/Target/R600/SIISelLowering.h @@ -24,6 +24,7 @@ class SITargetLowering : public AMDGPUTargetLowering { const SIInstrInfo * TII; const TargetRegisterInfo * TRI; + SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; @@ -33,6 +34,9 @@ class SITargetLowering : public AMDGPUTargetLowering { void ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand, unsigned RegClass, bool &ScalarSlotUsed) const; + SDNode *foldOperands(MachineSDNode *N, SelectionDAG &DAG) const; + void adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const; + public: SITargetLowering(TargetMachine &tm); @@ -49,6 +53,8 @@ public: virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const; + virtual void AdjustInstrPostInstrSelection(MachineInstr *MI, + SDNode *Node) const; int32_t analyzeImmediate(const SDNode *N) const; }; diff --git a/contrib/llvm/lib/Target/R600/SIInstrFormats.td b/contrib/llvm/lib/Target/R600/SIInstrFormats.td index 3891ddb..f737ddd 100644 --- a/contrib/llvm/lib/Target/R600/SIInstrFormats.td +++ b/contrib/llvm/lib/Target/R600/SIInstrFormats.td @@ -284,33 +284,33 @@ let Uses = [EXEC] in { class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : Enc64<outs, ins, asm, pattern> { - bits<8> VDATA; - bits<12> OFFSET; - bits<1> OFFEN; - bits<1> IDXEN; - bits<1> GLC; - bits<1> ADDR64; - bits<1> LDS; - bits<8> VADDR; - bits<7> SRSRC; - bits<1> SLC; - bits<1> TFE; - bits<8> SOFFSET; - - let Inst{11-0} = OFFSET; - let Inst{12} = OFFEN; - let Inst{13} = IDXEN; - let Inst{14} = GLC; - let Inst{15} = ADDR64; - let Inst{16} = LDS; + bits<12> offset; + bits<1> offen; + bits<1> idxen; + bits<1> glc; + bits<1> addr64; + bits<1> lds; + bits<8> vaddr; + bits<8> vdata; + bits<7> srsrc; + bits<1> slc; + bits<1> tfe; + bits<8> soffset; + + let Inst{11-0} = offset; + let Inst{12} = offen; + let Inst{13} = idxen; + let Inst{14} = glc; + let Inst{15} = addr64; + let Inst{16} = lds; let Inst{24-18} = op; let Inst{31-26} = 0x38; //encoding - let Inst{39-32} = VADDR; - let Inst{47-40} = VDATA; - let Inst{52-48} = SRSRC{6-2}; - let Inst{54} = SLC; - let Inst{55} = TFE; - let Inst{63-56} = SOFFSET; + let Inst{39-32} = vaddr; + let Inst{47-40} = vdata; + let Inst{52-48} = srsrc{6-2}; + let Inst{54} = slc; + let Inst{55} = tfe; + let Inst{63-56} = soffset; let VM_CNT = 1; let EXP_CNT = 1; diff --git a/contrib/llvm/lib/Target/R600/SIInstrInfo.cpp b/contrib/llvm/lib/Target/R600/SIInstrInfo.cpp index 0bfcef5..9a04c60 100644 --- a/contrib/llvm/lib/Target/R600/SIInstrInfo.cpp +++ b/contrib/llvm/lib/Target/R600/SIInstrInfo.cpp @@ -58,6 +58,10 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0 }; + const int16_t Sub0_2[] = { + AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, 0 + }; + const int16_t Sub0_1[] = { AMDGPU::sub0, AMDGPU::sub1, 0 }; @@ -125,6 +129,11 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opcode = AMDGPU::V_MOV_B32_e32; SubIndices = Sub0_1; + } else if (AMDGPU::VReg_96RegClass.contains(DestReg)) { + assert(AMDGPU::VReg_96RegClass.contains(SrcReg)); + Opcode = AMDGPU::V_MOV_B32_e32; + SubIndices = Sub0_2; + } else if (AMDGPU::VReg_128RegClass.contains(DestReg)) { assert(AMDGPU::VReg_128RegClass.contains(SrcReg) || AMDGPU::SReg_128RegClass.contains(SrcReg)); diff --git a/contrib/llvm/lib/Target/R600/SIInstrInfo.h b/contrib/llvm/lib/Target/R600/SIInstrInfo.h index d4e60e5..87eff4d 100644 --- a/contrib/llvm/lib/Target/R600/SIInstrInfo.h +++ b/contrib/llvm/lib/Target/R600/SIInstrInfo.h @@ -80,6 +80,7 @@ namespace AMDGPU { int getVOPe64(uint16_t Opcode); int getCommuteRev(uint16_t Opcode); int getCommuteOrig(uint16_t Opcode); + int isMIMG(uint16_t Opcode); } // End namespace AMDGPU diff --git a/contrib/llvm/lib/Target/R600/SIInstrInfo.td b/contrib/llvm/lib/Target/R600/SIInstrInfo.td index 617f0b8..c8aecb7 100644 --- a/contrib/llvm/lib/Target/R600/SIInstrInfo.td +++ b/contrib/llvm/lib/Target/R600/SIInstrInfo.td @@ -26,6 +26,10 @@ def HI32 : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(N->getZExtValue() >> 32, MVT::i32); }]>; +def SIbuffer_store : SDNode<"AMDGPUISD::BUFFER_STORE", + SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>, + [SDNPHasChain, SDNPMayStore]>; + def IMM8bitDWORD : ImmLeaf < i32, [{ return (Imm & ~0x3FC) == 0; @@ -255,14 +259,14 @@ multiclass VOPC_64 <bits<8> op, string opName, class VOP3_32 <bits<9> op, string opName, list<dag> pattern> : VOP3 < op, (outs VReg_32:$dst), (ins VSrc_32:$src0, VSrc_32:$src1, VSrc_32:$src2, - i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg), + InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg), opName#" $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern >, VOP <opName>; class VOP3_64 <bits<9> op, string opName, list<dag> pattern> : VOP3 < op, (outs VReg_64:$dst), (ins VSrc_64:$src0, VSrc_64:$src1, VSrc_64:$src2, - i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg), + InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg), opName#" $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern >, VOP <opName>; @@ -285,17 +289,39 @@ class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBU class MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> : MUBUF < op, - (outs regClass:$dst), + (outs regClass:$vdata), (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, i1imm:$lds, VReg_32:$vaddr, SReg_128:$srsrc, i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset), - asm#" $dst, $offset, $offen, $idxen, $glc, $addr64, " + asm#" $vdata, $offset, $offen, $idxen, $glc, $addr64, " #"$lds, $vaddr, $srsrc, $slc, $tfe, $soffset", []> { let mayLoad = 1; let mayStore = 0; } +class MUBUF_Store_Helper <bits<7> op, string name, RegisterClass vdataClass, + ValueType VT> : + MUBUF <op, (outs), (ins vdataClass:$vdata, SReg_128:$srsrc, VReg_64:$vaddr), + name#" $vdata, $srsrc + $vaddr", + [(SIbuffer_store (VT vdataClass:$vdata), (i128 SReg_128:$srsrc), + (i64 VReg_64:$vaddr))]> { + + let mayLoad = 0; + let mayStore = 1; + + // Encoding + let offset = 0; + let offen = 0; + let idxen = 0; + let glc = 0; + let addr64 = 1; + let lds = 0; + let slc = 0; + let tfe = 0; + let soffset = 128; // ZERO +} + class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF < op, (outs regClass:$dst), @@ -309,7 +335,22 @@ class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF let mayStore = 0; } -class MIMG_Load_Helper <bits<7> op, string asm> : MIMG < +class MIMG_NoSampler_Helper <bits<7> op, string asm> : MIMG < + op, + (outs VReg_128:$vdata), + (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128, + i1imm:$tfe, i1imm:$lwe, i1imm:$slc, unknown:$vaddr, + SReg_256:$srsrc), + asm#" $vdata, $dmask, $unorm, $glc, $da, $r128," + #" $tfe, $lwe, $slc, $vaddr, $srsrc", + []> { + let SSAMP = 0; + let mayLoad = 1; + let mayStore = 0; + let hasPostISelHook = 1; +} + +class MIMG_Sampler_Helper <bits<7> op, string asm> : MIMG < op, (outs VReg_128:$vdata), (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128, @@ -320,6 +361,7 @@ class MIMG_Load_Helper <bits<7> op, string asm> : MIMG < []> { let mayLoad = 1; let mayStore = 0; + let hasPostISelHook = 1; } //===----------------------------------------------------------------------===// @@ -353,4 +395,13 @@ def getCommuteOrig : InstrMapping { let ValueCols = [["1"]]; } +// Test if the supplied opcode is an MIMG instruction +def isMIMG : InstrMapping { + let FilterClass = "MIMG"; + let RowFields = ["Inst"]; + let ColFields = ["Size"]; + let KeyCol = ["8"]; + let ValueCols = [["8"]]; +} + include "SIInstructions.td" diff --git a/contrib/llvm/lib/Target/R600/SIInstructions.td b/contrib/llvm/lib/Target/R600/SIInstructions.td index 4f734f9..0d50c5d 100644 --- a/contrib/llvm/lib/Target/R600/SIInstructions.td +++ b/contrib/llvm/lib/Target/R600/SIInstructions.td @@ -108,7 +108,7 @@ VGPR0 = V_CNDMASK VCC, VGPR0, VGPR1 def S_CMPK_EQ_I32 : SOPK < 0x00000003, (outs SCCReg:$dst), (ins SReg_32:$src0, i32imm:$src1), "S_CMPK_EQ_I32", - [(set SCCReg:$dst, (setcc SReg_32:$src0, imm:$src1, SETEQ))] + [(set i1:$dst, (setcc i32:$src0, imm:$src1, SETEQ))] >; */ @@ -408,8 +408,14 @@ def BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <0x0000000d, "BUFFER_LOAD_DWORDX2", def BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <0x0000000e, "BUFFER_LOAD_DWORDX4", VReg_128>; //def BUFFER_STORE_BYTE : MUBUF_ <0x00000018, "BUFFER_STORE_BYTE", []>; //def BUFFER_STORE_SHORT : MUBUF_ <0x0000001a, "BUFFER_STORE_SHORT", []>; -//def BUFFER_STORE_DWORD : MUBUF_ <0x0000001c, "BUFFER_STORE_DWORD", []>; -//def BUFFER_STORE_DWORDX2 : MUBUF_DWORDX2 <0x0000001d, "BUFFER_STORE_DWORDX2", []>; + +def BUFFER_STORE_DWORD : MUBUF_Store_Helper < + 0x0000001c, "BUFFER_STORE_DWORD", VReg_32, i32 +>; + +def BUFFER_STORE_DWORDX2 : MUBUF_Store_Helper < + 0x0000001d, "BUFFER_STORE_DWORDX2", VReg_64, i64 +>; //def BUFFER_STORE_DWORDX4 : MUBUF_DWORDX4 <0x0000001e, "BUFFER_STORE_DWORDX4", []>; //def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>; //def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>; @@ -489,7 +495,7 @@ defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper < //def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>; //def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>; //def IMAGE_LOAD : MIMG_NoPattern_ <"IMAGE_LOAD", 0x00000000>; -//def IMAGE_LOAD_MIP : MIMG_NoPattern_ <"IMAGE_LOAD_MIP", 0x00000001>; +def IMAGE_LOAD_MIP : MIMG_NoSampler_Helper <0x00000001, "IMAGE_LOAD_MIP">; //def IMAGE_LOAD_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_PCK", 0x00000002>; //def IMAGE_LOAD_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_PCK_SGN", 0x00000003>; //def IMAGE_LOAD_MIP_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK", 0x00000004>; @@ -498,7 +504,7 @@ defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper < //def IMAGE_STORE_MIP : MIMG_NoPattern_ <"IMAGE_STORE_MIP", 0x00000009>; //def IMAGE_STORE_PCK : MIMG_NoPattern_ <"IMAGE_STORE_PCK", 0x0000000a>; //def IMAGE_STORE_MIP_PCK : MIMG_NoPattern_ <"IMAGE_STORE_MIP_PCK", 0x0000000b>; -//def IMAGE_GET_RESINFO : MIMG_NoPattern_ <"IMAGE_GET_RESINFO", 0x0000000e>; +def IMAGE_GET_RESINFO : MIMG_NoSampler_Helper <0x0000000e, "IMAGE_GET_RESINFO">; //def IMAGE_ATOMIC_SWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_SWAP", 0x0000000f>; //def IMAGE_ATOMIC_CMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_CMPSWAP", 0x00000010>; //def IMAGE_ATOMIC_ADD : MIMG_NoPattern_ <"IMAGE_ATOMIC_ADD", 0x00000011>; @@ -516,20 +522,20 @@ defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper < //def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_FCMPSWAP", 0x0000001d>; //def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMIN", 0x0000001e>; //def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMAX", 0x0000001f>; -def IMAGE_SAMPLE : MIMG_Load_Helper <0x00000020, "IMAGE_SAMPLE">; +def IMAGE_SAMPLE : MIMG_Sampler_Helper <0x00000020, "IMAGE_SAMPLE">; //def IMAGE_SAMPLE_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL", 0x00000021>; -def IMAGE_SAMPLE_D : MIMG_Load_Helper <0x00000022, "IMAGE_SAMPLE_D">; +def IMAGE_SAMPLE_D : MIMG_Sampler_Helper <0x00000022, "IMAGE_SAMPLE_D">; //def IMAGE_SAMPLE_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL", 0x00000023>; -def IMAGE_SAMPLE_L : MIMG_Load_Helper <0x00000024, "IMAGE_SAMPLE_L">; -def IMAGE_SAMPLE_B : MIMG_Load_Helper <0x00000025, "IMAGE_SAMPLE_B">; +def IMAGE_SAMPLE_L : MIMG_Sampler_Helper <0x00000024, "IMAGE_SAMPLE_L">; +def IMAGE_SAMPLE_B : MIMG_Sampler_Helper <0x00000025, "IMAGE_SAMPLE_B">; //def IMAGE_SAMPLE_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL", 0x00000026>; //def IMAGE_SAMPLE_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ", 0x00000027>; -def IMAGE_SAMPLE_C : MIMG_Load_Helper <0x00000028, "IMAGE_SAMPLE_C">; +def IMAGE_SAMPLE_C : MIMG_Sampler_Helper <0x00000028, "IMAGE_SAMPLE_C">; //def IMAGE_SAMPLE_C_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL", 0x00000029>; //def IMAGE_SAMPLE_C_D : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D", 0x0000002a>; //def IMAGE_SAMPLE_C_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL", 0x0000002b>; -def IMAGE_SAMPLE_C_L : MIMG_Load_Helper <0x0000002c, "IMAGE_SAMPLE_C_L">; -def IMAGE_SAMPLE_C_B : MIMG_Load_Helper <0x0000002d, "IMAGE_SAMPLE_C_B">; +def IMAGE_SAMPLE_C_L : MIMG_Sampler_Helper <0x0000002c, "IMAGE_SAMPLE_C_L">; +def IMAGE_SAMPLE_C_B : MIMG_Sampler_Helper <0x0000002d, "IMAGE_SAMPLE_C_B">; //def IMAGE_SAMPLE_C_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL", 0x0000002e>; //def IMAGE_SAMPLE_C_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ", 0x0000002f>; //def IMAGE_SAMPLE_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_O", 0x00000030>; @@ -594,12 +600,14 @@ defm V_READFIRSTLANE_B32 : VOP1_32 <0x00000002, "V_READFIRSTLANE_B32", []>; //defm V_CVT_I32_F64 : VOP1_32 <0x00000003, "V_CVT_I32_F64", []>; //defm V_CVT_F64_I32 : VOP1_64 <0x00000004, "V_CVT_F64_I32", []>; defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32", - [(set VReg_32:$dst, (sint_to_fp VSrc_32:$src0))] + [(set f32:$dst, (sint_to_fp i32:$src0))] +>; +defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", + [(set f32:$dst, (uint_to_fp i32:$src0))] >; -//defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>; -//defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>; +defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>; defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32", - [(set (i32 VReg_32:$dst), (fp_to_sint VSrc_32:$src0))] + [(set i32:$dst, (fp_to_sint f32:$src0))] >; defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>; ////def V_CVT_F16_F32 : VOP1_F16 <0x0000000a, "V_CVT_F16_F32", []>; @@ -616,35 +624,37 @@ defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>; //defm V_CVT_U32_F64 : VOP1_32 <0x00000015, "V_CVT_U32_F64", []>; //defm V_CVT_F64_U32 : VOP1_64 <0x00000016, "V_CVT_F64_U32", []>; defm V_FRACT_F32 : VOP1_32 <0x00000020, "V_FRACT_F32", - [(set VReg_32:$dst, (AMDGPUfract VSrc_32:$src0))] + [(set f32:$dst, (AMDGPUfract f32:$src0))] +>; +defm V_TRUNC_F32 : VOP1_32 <0x00000021, "V_TRUNC_F32", + [(set f32:$dst, (int_AMDGPU_trunc f32:$src0))] >; -defm V_TRUNC_F32 : VOP1_32 <0x00000021, "V_TRUNC_F32", []>; defm V_CEIL_F32 : VOP1_32 <0x00000022, "V_CEIL_F32", - [(set VReg_32:$dst, (fceil VSrc_32:$src0))] + [(set f32:$dst, (fceil f32:$src0))] >; defm V_RNDNE_F32 : VOP1_32 <0x00000023, "V_RNDNE_F32", - [(set VReg_32:$dst, (frint VSrc_32:$src0))] + [(set f32:$dst, (frint f32:$src0))] >; defm V_FLOOR_F32 : VOP1_32 <0x00000024, "V_FLOOR_F32", - [(set VReg_32:$dst, (ffloor VSrc_32:$src0))] + [(set f32:$dst, (ffloor f32:$src0))] >; defm V_EXP_F32 : VOP1_32 <0x00000025, "V_EXP_F32", - [(set VReg_32:$dst, (fexp2 VSrc_32:$src0))] + [(set f32:$dst, (fexp2 f32:$src0))] >; defm V_LOG_CLAMP_F32 : VOP1_32 <0x00000026, "V_LOG_CLAMP_F32", []>; defm V_LOG_F32 : VOP1_32 <0x00000027, "V_LOG_F32", - [(set VReg_32:$dst, (flog2 VSrc_32:$src0))] + [(set f32:$dst, (flog2 f32:$src0))] >; defm V_RCP_CLAMP_F32 : VOP1_32 <0x00000028, "V_RCP_CLAMP_F32", []>; defm V_RCP_LEGACY_F32 : VOP1_32 <0x00000029, "V_RCP_LEGACY_F32", []>; defm V_RCP_F32 : VOP1_32 <0x0000002a, "V_RCP_F32", - [(set VReg_32:$dst, (fdiv FP_ONE, VSrc_32:$src0))] + [(set f32:$dst, (fdiv FP_ONE, f32:$src0))] >; defm V_RCP_IFLAG_F32 : VOP1_32 <0x0000002b, "V_RCP_IFLAG_F32", []>; defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32", []>; defm V_RSQ_LEGACY_F32 : VOP1_32 < 0x0000002d, "V_RSQ_LEGACY_F32", - [(set VReg_32:$dst, (int_AMDGPU_rsq VSrc_32:$src0))] + [(set f32:$dst, (int_AMDGPU_rsq f32:$src0))] >; defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32", []>; defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64", []>; @@ -787,14 +797,13 @@ def V_CNDMASK_B32_e64 : VOP3 <0x00000100, (outs VReg_32:$dst), (ins VSrc_32:$src0, VSrc_32:$src1, SSrc_64:$src2, InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg), "V_CNDMASK_B32_e64 $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", - [(set (i32 VReg_32:$dst), (select (i1 SSrc_64:$src2), - VSrc_32:$src1, VSrc_32:$src0))] + [(set i32:$dst, (select i1:$src2, i32:$src1, i32:$src0))] >; //f32 pattern for V_CNDMASK_B32_e64 def : Pat < - (f32 (select (i1 SSrc_64:$src2), VSrc_32:$src1, VSrc_32:$src0)), - (V_CNDMASK_B32_e64 VSrc_32:$src0, VSrc_32:$src1, SSrc_64:$src2) + (f32 (select i1:$src2, f32:$src1, f32:$src0)), + (V_CNDMASK_B32_e64 $src0, $src1, $src2) >; defm V_READLANE_B32 : VOP2_32 <0x00000001, "V_READLANE_B32", []>; @@ -802,11 +811,11 @@ defm V_WRITELANE_B32 : VOP2_32 <0x00000002, "V_WRITELANE_B32", []>; let isCommutable = 1 in { defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32", - [(set VReg_32:$dst, (fadd VSrc_32:$src0, VReg_32:$src1))] + [(set f32:$dst, (fadd f32:$src0, f32:$src1))] >; defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32", - [(set VReg_32:$dst, (fsub VSrc_32:$src0, VReg_32:$src1))] + [(set f32:$dst, (fsub f32:$src0, f32:$src1))] >; defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", [], "V_SUB_F32">; } // End isCommutable = 1 @@ -817,11 +826,11 @@ let isCommutable = 1 in { defm V_MUL_LEGACY_F32 : VOP2_32 < 0x00000007, "V_MUL_LEGACY_F32", - [(set VReg_32:$dst, (int_AMDGPU_mul VSrc_32:$src0, VReg_32:$src1))] + [(set f32:$dst, (int_AMDGPU_mul f32:$src0, f32:$src1))] >; defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32", - [(set VReg_32:$dst, (fmul VSrc_32:$src0, VReg_32:$src1))] + [(set f32:$dst, (fmul f32:$src0, f32:$src1))] >; } // End isCommutable = 1 @@ -834,43 +843,51 @@ defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32", let isCommutable = 1 in { defm V_MIN_LEGACY_F32 : VOP2_32 <0x0000000d, "V_MIN_LEGACY_F32", - [(set VReg_32:$dst, (AMDGPUfmin VSrc_32:$src0, VReg_32:$src1))] + [(set f32:$dst, (AMDGPUfmin f32:$src0, f32:$src1))] >; defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32", - [(set VReg_32:$dst, (AMDGPUfmax VSrc_32:$src0, VReg_32:$src1))] + [(set f32:$dst, (AMDGPUfmax f32:$src0, f32:$src1))] >; defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>; defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>; -defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32", []>; -defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", []>; -defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>; -defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>; +defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32", + [(set i32:$dst, (AMDGPUsmin i32:$src0, i32:$src1))] +>; +defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", + [(set i32:$dst, (AMDGPUsmax i32:$src0, i32:$src1))] +>; +defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", + [(set i32:$dst, (AMDGPUumin i32:$src0, i32:$src1))] +>; +defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", + [(set i32:$dst, (AMDGPUumax i32:$src0, i32:$src1))] +>; defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", - [(set VReg_32:$dst, (srl VSrc_32:$src0, (i32 VReg_32:$src1)))] + [(set i32:$dst, (srl i32:$src0, i32:$src1))] >; defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", [], "V_LSHR_B32">; defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", - [(set VReg_32:$dst, (sra VSrc_32:$src0, (i32 VReg_32:$src1)))] + [(set i32:$dst, (sra i32:$src0, i32:$src1))] >; defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", [], "V_ASHR_I32">; defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", - [(set VReg_32:$dst, (shl VSrc_32:$src0, (i32 VReg_32:$src1)))] + [(set i32:$dst, (shl i32:$src0, i32:$src1))] >; defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", [], "V_LSHL_B32">; defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32", - [(set VReg_32:$dst, (and VSrc_32:$src0, VReg_32:$src1))] + [(set i32:$dst, (and i32:$src0, i32:$src1))] >; defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32", - [(set VReg_32:$dst, (or VSrc_32:$src0, VReg_32:$src1))] + [(set i32:$dst, (or i32:$src0, i32:$src1))] >; defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32", - [(set VReg_32:$dst, (xor VSrc_32:$src0, VReg_32:$src1))] + [(set i32:$dst, (xor i32:$src0, i32:$src1))] >; } // End isCommutable = 1 @@ -885,11 +902,11 @@ defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>; let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", - [(set VReg_32:$dst, (add (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))] + [(set i32:$dst, (add (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))] >; defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", - [(set VReg_32:$dst, (sub (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))] + [(set i32:$dst, (sub i32:$src0, i32:$src1))] >; defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], "V_SUB_I32">; @@ -905,7 +922,7 @@ defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>; ////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>; ////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "V_CVT_PKNORM_U16_F32", []>; defm V_CVT_PKRTZ_F16_F32 : VOP2_32 <0x0000002f, "V_CVT_PKRTZ_F16_F32", - [(set VReg_32:$dst, (int_SI_packf16 VSrc_32:$src0, VReg_32:$src1))] + [(set i32:$dst, (int_SI_packf16 f32:$src0, f32:$src1))] >; ////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "V_CVT_PK_U16_U32", []>; ////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "V_CVT_PK_I16_I32", []>; @@ -942,6 +959,7 @@ def V_CUBEMA_F32 : VOP3_32 <0x00000147, "V_CUBEMA_F32", []>; def V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32", []>; def V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32", []>; def V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32", []>; +defm : BFIPatterns <V_BFI_B32>; def V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32", []>; def V_FMA_F64 : VOP3_64 <0x0000014c, "V_FMA_F64", []>; //def V_LERP_U8 : VOP3_U8 <0x0000014d, "V_LERP_U8", []>; @@ -983,18 +1001,18 @@ def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>; } // isCommutable = 1 def : Pat < - (mul VSrc_32:$src0, VReg_32:$src1), - (V_MUL_LO_I32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0) + (mul i32:$src0, i32:$src1), + (V_MUL_LO_I32 $src0, $src1, (i32 0)) >; def : Pat < - (mulhu VSrc_32:$src0, VReg_32:$src1), - (V_MUL_HI_U32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0) + (mulhu i32:$src0, i32:$src1), + (V_MUL_HI_U32 $src0, $src1, (i32 0)) >; def : Pat < - (mulhs VSrc_32:$src0, VReg_32:$src1), - (V_MUL_HI_I32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0) + (mulhs i32:$src0, i32:$src1), + (V_MUL_HI_I32 $src0, $src1, (i32 0)) >; def V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>; @@ -1019,34 +1037,27 @@ def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32", []>; def S_CSELECT_B32 : SOP2 < 0x0000000a, (outs SReg_32:$dst), (ins SReg_32:$src0, SReg_32:$src1, SCCReg:$scc), "S_CSELECT_B32", - [(set (i32 SReg_32:$dst), (select (i1 SCCReg:$scc), - SReg_32:$src0, SReg_32:$src1))] + [] >; def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>; -// f32 pattern for S_CSELECT_B32 -def : Pat < - (f32 (select (i1 SCCReg:$scc), SReg_32:$src0, SReg_32:$src1)), - (S_CSELECT_B32 SReg_32:$src0, SReg_32:$src1, SCCReg:$scc) ->; - def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32", []>; def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64", - [(set SReg_64:$dst, (i64 (and SSrc_64:$src0, SSrc_64:$src1)))] + [(set i64:$dst, (and i64:$src0, i64:$src1))] >; def : Pat < - (i1 (and SSrc_64:$src0, SSrc_64:$src1)), - (S_AND_B64 SSrc_64:$src0, SSrc_64:$src1) + (i1 (and i1:$src0, i1:$src1)), + (S_AND_B64 $src0, $src1) >; def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", []>; def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", []>; def : Pat < - (i1 (or SSrc_64:$src0, SSrc_64:$src1)), - (S_OR_B64 SSrc_64:$src0, SSrc_64:$src1) + (i1 (or i1:$src0, i1:$src1)), + (S_OR_B64 $src0, $src1) >; def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", []>; def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64", []>; @@ -1097,14 +1108,14 @@ def SI_IF : InstSI < (outs SReg_64:$dst), (ins SReg_64:$vcc, brtarget:$target), "SI_IF $dst, $vcc, $target", - [(set SReg_64:$dst, (int_SI_if SReg_64:$vcc, bb:$target))] + [(set i64:$dst, (int_SI_if i1:$vcc, bb:$target))] >; def SI_ELSE : InstSI < (outs SReg_64:$dst), (ins SReg_64:$src, brtarget:$target), "SI_ELSE $dst, $src, $target", - [(set SReg_64:$dst, (int_SI_else SReg_64:$src, bb:$target))]> { + [(set i64:$dst, (int_SI_else i64:$src, bb:$target))]> { let Constraints = "$src = $dst"; } @@ -1113,7 +1124,7 @@ def SI_LOOP : InstSI < (outs), (ins SReg_64:$saved, brtarget:$target), "SI_LOOP $saved, $target", - [(int_SI_loop SReg_64:$saved, bb:$target)] + [(int_SI_loop i64:$saved, bb:$target)] >; } // end isBranch = 1, isTerminator = 1 @@ -1122,35 +1133,35 @@ def SI_BREAK : InstSI < (outs SReg_64:$dst), (ins SReg_64:$src), "SI_ELSE $dst, $src", - [(set SReg_64:$dst, (int_SI_break SReg_64:$src))] + [(set i64:$dst, (int_SI_break i64:$src))] >; def SI_IF_BREAK : InstSI < (outs SReg_64:$dst), (ins SReg_64:$vcc, SReg_64:$src), "SI_IF_BREAK $dst, $vcc, $src", - [(set SReg_64:$dst, (int_SI_if_break SReg_64:$vcc, SReg_64:$src))] + [(set i64:$dst, (int_SI_if_break i1:$vcc, i64:$src))] >; def SI_ELSE_BREAK : InstSI < (outs SReg_64:$dst), (ins SReg_64:$src0, SReg_64:$src1), "SI_ELSE_BREAK $dst, $src0, $src1", - [(set SReg_64:$dst, (int_SI_else_break SReg_64:$src0, SReg_64:$src1))] + [(set i64:$dst, (int_SI_else_break i64:$src0, i64:$src1))] >; def SI_END_CF : InstSI < (outs), (ins SReg_64:$saved), "SI_END_CF $saved", - [(int_SI_end_cf SReg_64:$saved)] + [(int_SI_end_cf i64:$saved)] >; def SI_KILL : InstSI < (outs), (ins VReg_32:$src), "SI_KIL $src", - [(int_AMDGPU_kill VReg_32:$src)] + [(int_AMDGPU_kill f32:$src)] >; } // end mayLoad = 1, mayStore = 1, hasSideEffects = 1 @@ -1184,8 +1195,8 @@ def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>; } // end IsCodeGenOnly, isPseudo def : Pat< - (int_AMDGPU_cndlt VReg_32:$src0, VReg_32:$src1, VReg_32:$src2), - (V_CNDMASK_B32_e64 VReg_32:$src2, VReg_32:$src1, (V_CMP_GT_F32_e64 0, VReg_32:$src0)) + (int_AMDGPU_cndlt f32:$src0, f32:$src1, f32:$src2), + (V_CNDMASK_B32_e64 $src2, $src1, (V_CMP_GT_F32_e64 0, $src0)) >; def : Pat < @@ -1195,93 +1206,110 @@ def : Pat < /* int_SI_vs_load_input */ def : Pat< - (int_SI_vs_load_input SReg_128:$tlst, IMM12bit:$attr_offset, - VReg_32:$buf_idx_vgpr), + (int_SI_vs_load_input v16i8:$tlst, IMM12bit:$attr_offset, + i32:$buf_idx_vgpr), (BUFFER_LOAD_FORMAT_XYZW imm:$attr_offset, 0, 1, 0, 0, 0, - VReg_32:$buf_idx_vgpr, SReg_128:$tlst, - 0, 0, 0) + $buf_idx_vgpr, $tlst, 0, 0, 0) >; /* int_SI_export */ def : Pat < (int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr, - VReg_32:$src0,VReg_32:$src1, VReg_32:$src2, VReg_32:$src3), + f32:$src0, f32:$src1, f32:$src2, f32:$src3), (EXP imm:$en, imm:$tgt, imm:$compr, imm:$done, imm:$vm, - VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3) + $src0, $src1, $src2, $src3) >; +/********** ======================= **********/ +/********** Image sampling patterns **********/ +/********** ======================= **********/ /* int_SI_sample for simple 1D texture lookup */ def : Pat < - (int_SI_sample imm:$writemask, VReg_32:$addr, - SReg_256:$rsrc, SReg_128:$sampler, imm), - (IMAGE_SAMPLE imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_32:$addr, - SReg_256:$rsrc, SReg_128:$sampler) + (int_SI_sample v1i32:$addr, v32i8:$rsrc, v16i8:$sampler, imm), + (IMAGE_SAMPLE 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; -class SamplePattern<Intrinsic name, MIMG opcode, RegisterClass addr_class, - ValueType addr_type> : Pat < - (name imm:$writemask, (addr_type addr_class:$addr), - SReg_256:$rsrc, SReg_128:$sampler, imm), - (opcode imm:$writemask, 0, 0, 0, 0, 0, 0, 0, addr_class:$addr, - SReg_256:$rsrc, SReg_128:$sampler) +class SamplePattern<Intrinsic name, MIMG opcode, ValueType vt> : Pat < + (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, imm), + (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; -class SampleRectPattern<Intrinsic name, MIMG opcode, RegisterClass addr_class, - ValueType addr_type> : Pat < - (name imm:$writemask, (addr_type addr_class:$addr), - SReg_256:$rsrc, SReg_128:$sampler, TEX_RECT), - (opcode imm:$writemask, 1, 0, 0, 0, 0, 0, 0, addr_class:$addr, - SReg_256:$rsrc, SReg_128:$sampler) +class SampleRectPattern<Intrinsic name, MIMG opcode, ValueType vt> : Pat < + (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_RECT), + (opcode 0xf, 1, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; -class SampleArrayPattern<Intrinsic name, MIMG opcode, RegisterClass addr_class, - ValueType addr_type> : Pat < - (name imm:$writemask, (addr_type addr_class:$addr), - SReg_256:$rsrc, SReg_128:$sampler, TEX_ARRAY), - (opcode imm:$writemask, 0, 0, 1, 0, 0, 0, 0, addr_class:$addr, - SReg_256:$rsrc, SReg_128:$sampler) +class SampleArrayPattern<Intrinsic name, MIMG opcode, ValueType vt> : Pat < + (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_ARRAY), + (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; class SampleShadowPattern<Intrinsic name, MIMG opcode, - RegisterClass addr_class, ValueType addr_type> : Pat < - (name imm:$writemask, (addr_type addr_class:$addr), - SReg_256:$rsrc, SReg_128:$sampler, TEX_SHADOW), - (opcode imm:$writemask, 0, 0, 0, 0, 0, 0, 0, addr_class:$addr, - SReg_256:$rsrc, SReg_128:$sampler) + ValueType vt> : Pat < + (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_SHADOW), + (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; class SampleShadowArrayPattern<Intrinsic name, MIMG opcode, - RegisterClass addr_class, ValueType addr_type> : Pat < - (name imm:$writemask, (addr_type addr_class:$addr), - SReg_256:$rsrc, SReg_128:$sampler, TEX_SHADOW_ARRAY), - (opcode imm:$writemask, 0, 0, 1, 0, 0, 0, 0, addr_class:$addr, - SReg_256:$rsrc, SReg_128:$sampler) + ValueType vt> : Pat < + (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_SHADOW_ARRAY), + (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; /* int_SI_sample* for texture lookups consuming more address parameters */ -multiclass SamplePatterns<RegisterClass addr_class, ValueType addr_type> { - def : SamplePattern <int_SI_sample, IMAGE_SAMPLE, addr_class, addr_type>; - def : SampleRectPattern <int_SI_sample, IMAGE_SAMPLE, addr_class, addr_type>; - def : SampleArrayPattern <int_SI_sample, IMAGE_SAMPLE, addr_class, addr_type>; - def : SampleShadowPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_class, addr_type>; - def : SampleShadowArrayPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_class, addr_type>; - - def : SamplePattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_class, addr_type>; - def : SampleArrayPattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_class, addr_type>; - def : SampleShadowPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_class, addr_type>; - def : SampleShadowArrayPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_class, addr_type>; - - def : SamplePattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_class, addr_type>; - def : SampleArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_class, addr_type>; - def : SampleShadowPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_class, addr_type>; - def : SampleShadowArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_class, addr_type>; +multiclass SamplePatterns<ValueType addr_type> { + def : SamplePattern <int_SI_sample, IMAGE_SAMPLE, addr_type>; + def : SampleRectPattern <int_SI_sample, IMAGE_SAMPLE, addr_type>; + def : SampleArrayPattern <int_SI_sample, IMAGE_SAMPLE, addr_type>; + def : SampleShadowPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_type>; + def : SampleShadowArrayPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_type>; + + def : SamplePattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_type>; + def : SampleArrayPattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_type>; + def : SampleShadowPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_type>; + def : SampleShadowArrayPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_type>; + + def : SamplePattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_type>; + def : SampleArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_type>; + def : SampleShadowPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_type>; + def : SampleShadowArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_type>; +} + +defm : SamplePatterns<v2i32>; +defm : SamplePatterns<v4i32>; +defm : SamplePatterns<v8i32>; +defm : SamplePatterns<v16i32>; + +/* int_SI_imageload for texture fetches consuming varying address parameters */ +class ImageLoadPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat < + (name addr_type:$addr, v32i8:$rsrc, imm), + (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc) +>; + +class ImageLoadArrayPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat < + (name addr_type:$addr, v32i8:$rsrc, TEX_ARRAY), + (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc) +>; + +multiclass ImageLoadPatterns<ValueType addr_type> { + def : ImageLoadPattern <int_SI_imageload, IMAGE_LOAD_MIP, addr_type>; + def : ImageLoadArrayPattern <int_SI_imageload, IMAGE_LOAD_MIP, addr_type>; } -defm : SamplePatterns<VReg_64, v2i32>; -defm : SamplePatterns<VReg_128, v4i32>; -defm : SamplePatterns<VReg_256, v8i32>; -defm : SamplePatterns<VReg_512, v16i32>; +defm : ImageLoadPatterns<v2i32>; +defm : ImageLoadPatterns<v4i32>; + +/* Image resource information */ +def : Pat < + (int_SI_resinfo i32:$mipid, v32i8:$rsrc, imm), + (IMAGE_GET_RESINFO 0xf, 0, 0, 0, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc) +>; + +def : Pat < + (int_SI_resinfo i32:$mipid, v32i8:$rsrc, TEX_ARRAY), + (IMAGE_GET_RESINFO 0xf, 0, 0, 1, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc) +>; /********** ============================================ **********/ /********** Extraction, Insertion, Building and Casting **********/ @@ -1289,77 +1317,77 @@ defm : SamplePatterns<VReg_512, v16i32>; foreach Index = 0-2 in { def Extract_Element_v2i32_#Index : Extract_Element < - i32, v2i32, VReg_64, Index, !cast<SubRegIndex>(sub#Index) + i32, v2i32, Index, !cast<SubRegIndex>(sub#Index) >; def Insert_Element_v2i32_#Index : Insert_Element < - i32, v2i32, VReg_32, VReg_64, Index, !cast<SubRegIndex>(sub#Index) + i32, v2i32, Index, !cast<SubRegIndex>(sub#Index) >; def Extract_Element_v2f32_#Index : Extract_Element < - f32, v2f32, VReg_64, Index, !cast<SubRegIndex>(sub#Index) + f32, v2f32, Index, !cast<SubRegIndex>(sub#Index) >; def Insert_Element_v2f32_#Index : Insert_Element < - f32, v2f32, VReg_32, VReg_64, Index, !cast<SubRegIndex>(sub#Index) + f32, v2f32, Index, !cast<SubRegIndex>(sub#Index) >; } foreach Index = 0-3 in { def Extract_Element_v4i32_#Index : Extract_Element < - i32, v4i32, VReg_128, Index, !cast<SubRegIndex>(sub#Index) + i32, v4i32, Index, !cast<SubRegIndex>(sub#Index) >; def Insert_Element_v4i32_#Index : Insert_Element < - i32, v4i32, VReg_32, VReg_128, Index, !cast<SubRegIndex>(sub#Index) + i32, v4i32, Index, !cast<SubRegIndex>(sub#Index) >; def Extract_Element_v4f32_#Index : Extract_Element < - f32, v4f32, VReg_128, Index, !cast<SubRegIndex>(sub#Index) + f32, v4f32, Index, !cast<SubRegIndex>(sub#Index) >; def Insert_Element_v4f32_#Index : Insert_Element < - f32, v4f32, VReg_32, VReg_128, Index, !cast<SubRegIndex>(sub#Index) + f32, v4f32, Index, !cast<SubRegIndex>(sub#Index) >; } foreach Index = 0-7 in { def Extract_Element_v8i32_#Index : Extract_Element < - i32, v8i32, VReg_256, Index, !cast<SubRegIndex>(sub#Index) + i32, v8i32, Index, !cast<SubRegIndex>(sub#Index) >; def Insert_Element_v8i32_#Index : Insert_Element < - i32, v8i32, VReg_32, VReg_256, Index, !cast<SubRegIndex>(sub#Index) + i32, v8i32, Index, !cast<SubRegIndex>(sub#Index) >; def Extract_Element_v8f32_#Index : Extract_Element < - f32, v8f32, VReg_256, Index, !cast<SubRegIndex>(sub#Index) + f32, v8f32, Index, !cast<SubRegIndex>(sub#Index) >; def Insert_Element_v8f32_#Index : Insert_Element < - f32, v8f32, VReg_32, VReg_256, Index, !cast<SubRegIndex>(sub#Index) + f32, v8f32, Index, !cast<SubRegIndex>(sub#Index) >; } foreach Index = 0-15 in { def Extract_Element_v16i32_#Index : Extract_Element < - i32, v16i32, VReg_512, Index, !cast<SubRegIndex>(sub#Index) + i32, v16i32, Index, !cast<SubRegIndex>(sub#Index) >; def Insert_Element_v16i32_#Index : Insert_Element < - i32, v16i32, VReg_32, VReg_512, Index, !cast<SubRegIndex>(sub#Index) + i32, v16i32, Index, !cast<SubRegIndex>(sub#Index) >; def Extract_Element_v16f32_#Index : Extract_Element < - f32, v16f32, VReg_512, Index, !cast<SubRegIndex>(sub#Index) + f32, v16f32, Index, !cast<SubRegIndex>(sub#Index) >; def Insert_Element_v16f32_#Index : Insert_Element < - f32, v16f32, VReg_32, VReg_512, Index, !cast<SubRegIndex>(sub#Index) + f32, v16f32, Index, !cast<SubRegIndex>(sub#Index) >; } -def : Vector1_Build <v1i32, VReg_32, i32, VReg_32>; -def : Vector2_Build <v2i32, VReg_64, i32, VReg_32>; -def : Vector2_Build <v2f32, VReg_64, f32, VReg_32>; -def : Vector4_Build <v4i32, VReg_128, i32, VReg_32>; -def : Vector4_Build <v4f32, VReg_128, f32, VReg_32>; -def : Vector8_Build <v8i32, VReg_256, i32, VReg_32>; -def : Vector8_Build <v8f32, VReg_256, f32, VReg_32>; -def : Vector16_Build <v16i32, VReg_512, i32, VReg_32>; -def : Vector16_Build <v16f32, VReg_512, f32, VReg_32>; +def : Vector1_Build <v1i32, i32, VReg_32>; +def : Vector2_Build <v2i32, i32>; +def : Vector2_Build <v2f32, f32>; +def : Vector4_Build <v4i32, i32>; +def : Vector4_Build <v4f32, f32>; +def : Vector8_Build <v8i32, i32>; +def : Vector8_Build <v8f32, f32>; +def : Vector16_Build <v16i32, i32>; +def : Vector16_Build <v16f32, f32>; def : BitConvert <i32, f32, SReg_32>; def : BitConvert <i32, f32, VReg_32>; @@ -1372,20 +1400,20 @@ def : BitConvert <f32, i32, VReg_32>; /********** =================== **********/ def : Pat < - (int_AMDIL_clamp VReg_32:$src, (f32 FP_ZERO), (f32 FP_ONE)), - (V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */), + (int_AMDIL_clamp f32:$src, (f32 FP_ZERO), (f32 FP_ONE)), + (V_ADD_F32_e64 $src, (i32 0 /* SRC1 */), 0 /* ABS */, 1 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */) >; def : Pat < - (fabs VReg_32:$src), - (V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */), + (fabs f32:$src), + (V_ADD_F32_e64 $src, (i32 0 /* SRC1 */), 1 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */) >; def : Pat < - (fneg VReg_32:$src), - (V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */), + (fneg f32:$src), + (V_ADD_F32_e64 $src, (i32 0 /* SRC1 */), 0 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 1 /* NEG */) >; @@ -1426,16 +1454,16 @@ def : Pat < /********** ===================== **********/ def : Pat < - (int_SI_fs_constant imm:$attr_chan, imm:$attr, M0Reg:$params), - (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, M0Reg:$params) + (int_SI_fs_constant imm:$attr_chan, imm:$attr, i32:$params), + (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, $params) >; def : Pat < - (int_SI_fs_interp imm:$attr_chan, imm:$attr, M0Reg:$params, VReg_64:$ij), - (V_INTERP_P2_F32 (V_INTERP_P1_F32 (EXTRACT_SUBREG VReg_64:$ij, sub0), - imm:$attr_chan, imm:$attr, M0Reg:$params), - (EXTRACT_SUBREG VReg_64:$ij, sub1), - imm:$attr_chan, imm:$attr, M0Reg:$params) + (int_SI_fs_interp imm:$attr_chan, imm:$attr, M0Reg:$params, v2i32:$ij), + (V_INTERP_P2_F32 (V_INTERP_P1_F32 (EXTRACT_SUBREG v2i32:$ij, sub0), + imm:$attr_chan, imm:$attr, i32:$params), + (EXTRACT_SUBREG $ij, sub1), + imm:$attr_chan, imm:$attr, $params) >; /********** ================== **********/ @@ -1443,101 +1471,111 @@ def : Pat < /********** ================== **********/ /* llvm.AMDGPU.pow */ -def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32, VReg_32>; +def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32>; def : Pat < - (int_AMDGPU_div VSrc_32:$src0, VSrc_32:$src1), - (V_MUL_LEGACY_F32_e32 VSrc_32:$src0, (V_RCP_LEGACY_F32_e32 VSrc_32:$src1)) + (int_AMDGPU_div f32:$src0, f32:$src1), + (V_MUL_LEGACY_F32_e32 $src0, (V_RCP_LEGACY_F32_e32 $src1)) >; def : Pat< - (fdiv VSrc_32:$src0, VSrc_32:$src1), - (V_MUL_F32_e32 VSrc_32:$src0, (V_RCP_F32_e32 VSrc_32:$src1)) + (fdiv f32:$src0, f32:$src1), + (V_MUL_F32_e32 $src0, (V_RCP_F32_e32 $src1)) >; def : Pat < - (fcos VSrc_32:$src0), - (V_COS_F32_e32 (V_MUL_F32_e32 VSrc_32:$src0, (V_MOV_B32_e32 CONST.TWO_PI_INV))) + (fcos f32:$src0), + (V_COS_F32_e32 (V_MUL_F32_e32 $src0, (V_MOV_B32_e32 CONST.TWO_PI_INV))) >; def : Pat < - (fsin VSrc_32:$src0), - (V_SIN_F32_e32 (V_MUL_F32_e32 VSrc_32:$src0, (V_MOV_B32_e32 CONST.TWO_PI_INV))) + (fsin f32:$src0), + (V_SIN_F32_e32 (V_MUL_F32_e32 $src0, (V_MOV_B32_e32 CONST.TWO_PI_INV))) >; def : Pat < - (int_AMDGPU_cube VReg_128:$src), + (int_AMDGPU_cube v4f32:$src), (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), - (V_CUBETC_F32 (EXTRACT_SUBREG VReg_128:$src, sub0), - (EXTRACT_SUBREG VReg_128:$src, sub1), - (EXTRACT_SUBREG VReg_128:$src, sub2), - 0, 0, 0, 0), sub0), - (V_CUBESC_F32 (EXTRACT_SUBREG VReg_128:$src, sub0), - (EXTRACT_SUBREG VReg_128:$src, sub1), - (EXTRACT_SUBREG VReg_128:$src, sub2), - 0, 0, 0, 0), sub1), - (V_CUBEMA_F32 (EXTRACT_SUBREG VReg_128:$src, sub0), - (EXTRACT_SUBREG VReg_128:$src, sub1), - (EXTRACT_SUBREG VReg_128:$src, sub2), - 0, 0, 0, 0), sub2), - (V_CUBEID_F32 (EXTRACT_SUBREG VReg_128:$src, sub0), - (EXTRACT_SUBREG VReg_128:$src, sub1), - (EXTRACT_SUBREG VReg_128:$src, sub2), - 0, 0, 0, 0), sub3) + (V_CUBETC_F32 (EXTRACT_SUBREG $src, sub0), + (EXTRACT_SUBREG $src, sub1), + (EXTRACT_SUBREG $src, sub2)), + sub0), + (V_CUBESC_F32 (EXTRACT_SUBREG $src, sub0), + (EXTRACT_SUBREG $src, sub1), + (EXTRACT_SUBREG $src, sub2)), + sub1), + (V_CUBEMA_F32 (EXTRACT_SUBREG $src, sub0), + (EXTRACT_SUBREG $src, sub1), + (EXTRACT_SUBREG $src, sub2)), + sub2), + (V_CUBEID_F32 (EXTRACT_SUBREG $src, sub0), + (EXTRACT_SUBREG $src, sub1), + (EXTRACT_SUBREG $src, sub2)), + sub3) >; def : Pat < - (i32 (sext (i1 SReg_64:$src0))), - (V_CNDMASK_B32_e64 (i32 0), (i32 -1), SReg_64:$src0) + (i32 (sext i1:$src0)), + (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src0) >; // 1. Offset as 8bit DWORD immediate def : Pat < - (int_SI_load_const SReg_128:$sbase, IMM8bitDWORD:$offset), - (S_BUFFER_LOAD_DWORD_IMM SReg_128:$sbase, IMM8bitDWORD:$offset) + (int_SI_load_const v16i8:$sbase, IMM8bitDWORD:$offset), + (S_BUFFER_LOAD_DWORD_IMM $sbase, IMM8bitDWORD:$offset) >; // 2. Offset loaded in an 32bit SGPR def : Pat < - (int_SI_load_const SReg_128:$sbase, imm:$offset), - (S_BUFFER_LOAD_DWORD_SGPR SReg_128:$sbase, (S_MOV_B32 imm:$offset)) + (int_SI_load_const v16i8:$sbase, imm:$offset), + (S_BUFFER_LOAD_DWORD_SGPR $sbase, (S_MOV_B32 imm:$offset)) >; // 3. Offset in an 32Bit VGPR def : Pat < - (int_SI_load_const SReg_128:$sbase, VReg_32:$voff), - (BUFFER_LOAD_DWORD 0, 1, 0, 0, 0, 0, VReg_32:$voff, SReg_128:$sbase, 0, 0, 0) + (int_SI_load_const v16i8:$sbase, i32:$voff), + (BUFFER_LOAD_DWORD 0, 1, 0, 0, 0, 0, $voff, $sbase, 0, 0, 0) +>; + +// The multiplication scales from [0,1] to the unsigned integer range +def : Pat < + (AMDGPUurecip i32:$src0), + (V_CVT_U32_F32_e32 + (V_MUL_F32_e32 CONST.FP_UINT_MAX_PLUS_1, + (V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 $src0)))) >; /********** ================== **********/ /********** VOP3 Patterns **********/ /********** ================== **********/ -def : Pat <(f32 (fadd (fmul VSrc_32:$src0, VSrc_32:$src1), VSrc_32:$src2)), - (V_MAD_F32 VSrc_32:$src0, VSrc_32:$src1, VSrc_32:$src2, - 0, 0, 0, 0)>; +def : Pat < + (f32 (fadd (fmul f32:$src0, f32:$src1), f32:$src2)), + (V_MAD_F32 $src0, $src1, $src2) +>; /********** ================== **********/ /********** SMRD Patterns **********/ /********** ================== **********/ multiclass SMRD_Pattern <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> { + // 1. Offset as 8bit DWORD immediate def : Pat < - (constant_load (SIadd64bit32bit SReg_64:$sbase, IMM8bitDWORD:$offset)), - (vt (Instr_IMM SReg_64:$sbase, IMM8bitDWORD:$offset)) + (constant_load (SIadd64bit32bit i64:$sbase, IMM8bitDWORD:$offset)), + (vt (Instr_IMM $sbase, IMM8bitDWORD:$offset)) >; // 2. Offset loaded in an 32bit SGPR def : Pat < - (constant_load (SIadd64bit32bit SReg_64:$sbase, imm:$offset)), - (vt (Instr_SGPR SReg_64:$sbase, (S_MOV_B32 imm:$offset))) + (constant_load (SIadd64bit32bit i64:$sbase, imm:$offset)), + (vt (Instr_SGPR $sbase, (S_MOV_B32 imm:$offset))) >; // 3. No offset at all def : Pat < - (constant_load SReg_64:$sbase), - (vt (Instr_IMM SReg_64:$sbase, 0)) + (constant_load i64:$sbase), + (vt (Instr_IMM $sbase, 0)) >; } @@ -1550,45 +1588,37 @@ defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>; /********** Indirect adressing **********/ /********** ====================== **********/ -multiclass SI_INDIRECT_Pattern <RegisterClass rc, ValueType vt, - SI_INDIRECT_DST IndDst> { +multiclass SI_INDIRECT_Pattern <ValueType vt, SI_INDIRECT_DST IndDst> { + // 1. Extract with offset def : Pat< - (vector_extract (vt rc:$vec), - (i64 (zext (i32 (add VReg_32:$idx, imm:$off)))) - ), - (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off)) + (vector_extract vt:$vec, (i64 (zext (add i32:$idx, imm:$off)))), + (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, imm:$off)) >; // 2. Extract without offset def : Pat< - (vector_extract (vt rc:$vec), - (i64 (zext (i32 VReg_32:$idx))) - ), - (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0)) + (vector_extract vt:$vec, (i64 (zext i32:$idx))), + (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, 0)) >; // 3. Insert with offset def : Pat< - (vector_insert (vt rc:$vec), (f32 VReg_32:$val), - (i64 (zext (i32 (add VReg_32:$idx, imm:$off)))) - ), - (vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off, VReg_32:$val)) + (vector_insert vt:$vec, f32:$val, (i64 (zext (add i32:$idx, imm:$off)))), + (IndDst (IMPLICIT_DEF), $vec, $idx, imm:$off, $val) >; // 4. Insert without offset def : Pat< - (vector_insert (vt rc:$vec), (f32 VReg_32:$val), - (i64 (zext (i32 VReg_32:$idx))) - ), - (vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0, VReg_32:$val)) + (vector_insert vt:$vec, f32:$val, (i64 (zext i32:$idx))), + (IndDst (IMPLICIT_DEF), $vec, $idx, 0, $val) >; } -defm : SI_INDIRECT_Pattern <VReg_64, v2f32, SI_INDIRECT_DST_V2>; -defm : SI_INDIRECT_Pattern <VReg_128, v4f32, SI_INDIRECT_DST_V4>; -defm : SI_INDIRECT_Pattern <VReg_256, v8f32, SI_INDIRECT_DST_V8>; -defm : SI_INDIRECT_Pattern <VReg_512, v16f32, SI_INDIRECT_DST_V16>; +defm : SI_INDIRECT_Pattern <v2f32, SI_INDIRECT_DST_V2>; +defm : SI_INDIRECT_Pattern <v4f32, SI_INDIRECT_DST_V4>; +defm : SI_INDIRECT_Pattern <v8f32, SI_INDIRECT_DST_V8>; +defm : SI_INDIRECT_Pattern <v16f32, SI_INDIRECT_DST_V16>; /********** =============== **********/ /********** Conditions **********/ @@ -1596,12 +1626,18 @@ defm : SI_INDIRECT_Pattern <VReg_512, v16f32, SI_INDIRECT_DST_V16>; def : Pat< (i1 (setcc f32:$src0, f32:$src1, SETO)), - (V_CMP_O_F32_e64 f32:$src0, f32:$src1) + (V_CMP_O_F32_e64 $src0, $src1) >; def : Pat< (i1 (setcc f32:$src0, f32:$src1, SETUO)), - (V_CMP_U_F32_e64 f32:$src0, f32:$src1) + (V_CMP_U_F32_e64 $src0, $src1) >; +//============================================================================// +// Miscellaneous Optimization Patterns +//============================================================================// + +def : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e32>; + } // End isSI predicate diff --git a/contrib/llvm/lib/Target/R600/SIIntrinsics.td b/contrib/llvm/lib/Target/R600/SIIntrinsics.td index 0af378e..224cd2f 100644 --- a/contrib/llvm/lib/Target/R600/SIIntrinsics.td +++ b/contrib/llvm/lib/Target/R600/SIIntrinsics.td @@ -19,12 +19,16 @@ let TargetPrefix = "SI", isTarget = 1 in { def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v16i8_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]> ; - class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_anyvector_ty, llvm_v32i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; + class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_SI_sample : Sample; def int_SI_sampleb : Sample; def int_SI_samplel : Sample; + def int_SI_imageload : Intrinsic <[llvm_v4i32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; + + def int_SI_resinfo : Intrinsic <[llvm_v4i32_ty], [llvm_i32_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; + /* Interpolation Intrinsics */ def int_SI_fs_constant : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; diff --git a/contrib/llvm/lib/Target/R600/SIRegisterInfo.td b/contrib/llvm/lib/Target/R600/SIRegisterInfo.td index 4f14931..244d4c00 100644 --- a/contrib/llvm/lib/Target/R600/SIRegisterInfo.td +++ b/contrib/llvm/lib/Target/R600/SIRegisterInfo.td @@ -94,6 +94,12 @@ def VGPR_64 : RegisterTuples<[sub0, sub1], [(add (trunc VGPR_32, 255)), (add (shl VGPR_32, 1))]>; +// VGPR 96-bit registers +def VGPR_96 : RegisterTuples<[sub0, sub1, sub2], + [(add (trunc VGPR_32, 254)), + (add (shl VGPR_32, 1)), + (add (shl VGPR_32, 2))]>; + // VGPR 128-bit registers def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3], [(add (trunc VGPR_32, 253)), @@ -151,7 +157,7 @@ def SReg_64 : RegisterClass<"AMDGPU", [i64, i1], 64, (add SGPR_64, VCCReg, EXECReg) >; -def SReg_128 : RegisterClass<"AMDGPU", [v16i8], 128, (add SGPR_128)>; +def SReg_128 : RegisterClass<"AMDGPU", [v16i8, i128], 128, (add SGPR_128)>; def SReg_256 : RegisterClass<"AMDGPU", [v32i8], 256, (add SGPR_256)>; @@ -162,6 +168,10 @@ def VReg_32 : RegisterClass<"AMDGPU", [i32, f32, v1i32], 32, (add VGPR_32)>; def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 64, (add VGPR_64)>; +def VReg_96 : RegisterClass<"AMDGPU", [untyped], 96, (add VGPR_96)> { + let Size = 96; +} + def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>; def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 256, (add VGPR_256)>; diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcBaseInfo.h b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcBaseInfo.h new file mode 100644 index 0000000..aac0e8d --- /dev/null +++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcBaseInfo.h @@ -0,0 +1,62 @@ +//===-- SparcBaseInfo.h - Top level definitions for Sparc ---- --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains small standalone helper functions and enum definitions +// for the Sparc target useful for the compiler back-end and the MC libraries. +// As such, it deliberately does not include references to LLVM core code gen +// types, passes, etc.. +// +//===----------------------------------------------------------------------===// + +#ifndef SPARCBASEINFO_H +#define SPARCBASEINFO_H + +namespace llvm { + +/// SPII - This namespace holds target specific flags for instruction info. +namespace SPII { + +/// Target Operand Flags. Sparc specific TargetFlags for MachineOperands and +/// SDNodes. +enum TOF { + MO_NO_FLAG, + + // Extract the low 10 bits of an address. + // Assembler: %lo(addr) + MO_LO, + + // Extract bits 31-10 of an address. Only for sethi. + // Assembler: %hi(addr) or %lm(addr) + MO_HI, + + // Extract bits 43-22 of an adress. Only for sethi. + // Assembler: %h44(addr) + MO_H44, + + // Extract bits 21-12 of an address. + // Assembler: %m44(addr) + MO_M44, + + // Extract bits 11-0 of an address. + // Assembler: %l44(addr) + MO_L44, + + // Extract bits 63-42 of an address. Only for sethi. + // Assembler: %hh(addr) + MO_HH, + + // Extract bits 41-32 of an address. + // Assembler: %hm(addr) + MO_HM +}; + +} // end namespace SPII +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp index 7fdb0c3..1c64e1b 100644 --- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp @@ -50,14 +50,42 @@ static MCSubtargetInfo *createSparcMCSubtargetInfo(StringRef TT, StringRef CPU, return X; } +// Code models. Some only make sense for 64-bit code. +// +// SunCC Reloc CodeModel Constraints +// abs32 Static Small text+data+bss linked below 2^32 bytes +// abs44 Static Medium text+data+bss linked below 2^44 bytes +// abs64 Static Large text smaller than 2^31 bytes +// pic13 PIC_ Small GOT < 2^13 bytes +// pic32 PIC_ Medium GOT < 2^32 bytes +// +// All code models require that the text segment is smaller than 2GB. + static MCCodeGenInfo *createSparcMCCodeGenInfo(StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); + + // The default 32-bit code model is abs32/pic32. + if (CM == CodeModel::Default) + CM = RM == Reloc::PIC_ ? CodeModel::Medium : CodeModel::Small; + X->InitMCCodeGenInfo(RM, CM, OL); return X; } +static MCCodeGenInfo *createSparcV9MCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OL) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + + // The default 64-bit code model is abs44/pic32. + if (CM == CodeModel::Default) + CM = CodeModel::Medium; + + X->InitMCCodeGenInfo(RM, CM, OL); + return X; +} extern "C" void LLVMInitializeSparcTargetMC() { // Register the MC asm info. RegisterMCAsmInfo<SparcELFMCAsmInfo> X(TheSparcTarget); @@ -67,7 +95,7 @@ extern "C" void LLVMInitializeSparcTargetMC() { TargetRegistry::RegisterMCCodeGenInfo(TheSparcTarget, createSparcMCCodeGenInfo); TargetRegistry::RegisterMCCodeGenInfo(TheSparcV9Target, - createSparcMCCodeGenInfo); + createSparcV9MCCodeGenInfo); // Register the MC instruction info. TargetRegistry::RegisterMCInstrInfo(TheSparcTarget, createSparcMCInstrInfo); diff --git a/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp index e14b3cb..108eb90 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp @@ -16,6 +16,7 @@ #include "Sparc.h" #include "SparcInstrInfo.h" #include "SparcTargetMachine.h" +#include "MCTargetDesc/SparcBaseInfo.h" #include "llvm/ADT/SmallString.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineInstr.h" @@ -72,15 +73,39 @@ namespace { void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum, raw_ostream &O) { const MachineOperand &MO = MI->getOperand (opNum); - bool CloseParen = false; - if (MI->getOpcode() == SP::SETHIi && !MO.isReg() && !MO.isImm()) { - O << "%hi("; - CloseParen = true; - } else if ((MI->getOpcode() == SP::ORri || MI->getOpcode() == SP::ADDri) && - !MO.isReg() && !MO.isImm()) { - O << "%lo("; - CloseParen = true; + unsigned TF = MO.getTargetFlags(); +#ifndef NDEBUG + // Verify the target flags. + if (MO.isGlobal() || MO.isSymbol() || MO.isCPI()) { + if (MI->getOpcode() == SP::CALL) + assert(TF == SPII::MO_NO_FLAG && + "Cannot handle target flags on call address"); + else if (MI->getOpcode() == SP::SETHIi) + assert((TF == SPII::MO_HI || TF == SPII::MO_H44 || TF == SPII::MO_HH) && + "Invalid target flags for address operand on sethi"); + else + assert((TF == SPII::MO_LO || TF == SPII::MO_M44 || TF == SPII::MO_L44 || + TF == SPII::MO_HM) && + "Invalid target flags for small address operand"); } +#endif + + bool CloseParen = true; + switch (TF) { + default: + llvm_unreachable("Unknown target flags on operand"); + case SPII::MO_NO_FLAG: + CloseParen = false; + break; + case SPII::MO_LO: O << "%lo("; break; + case SPII::MO_HI: O << "%hi("; break; + case SPII::MO_H44: O << "%h44("; break; + case SPII::MO_M44: O << "%m44("; break; + case SPII::MO_L44: O << "%l44("; break; + case SPII::MO_HH: O << "%hh("; break; + case SPII::MO_HM: O << "%hm("; break; + } + switch (MO.getType()) { case MachineOperand::MO_Register: O << "%" << StringRef(getRegisterName(MO.getReg())).lower(); @@ -127,14 +152,7 @@ void SparcAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, return; // don't print "+0" O << "+"; - if (MI->getOperand(opNum+1).isGlobal() || - MI->getOperand(opNum+1).isCPI()) { - O << "%lo("; - printOperand(MI, opNum+1, O); - O << ")"; - } else { - printOperand(MI, opNum+1, O); - } + printOperand(MI, opNum+1, O); } bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum, diff --git a/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td b/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td index b38ac61..54784e0 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td +++ b/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td @@ -12,25 +12,9 @@ //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// Return Value Calling Conventions +// SPARC v8 32-bit. //===----------------------------------------------------------------------===// -// Sparc 32-bit C return-value convention. -def RetCC_Sparc32 : CallingConv<[ - CCIfType<[i32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>, - CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>, - CCIfType<[f64], CCAssignToReg<[D0, D1]>> -]>; - -// Sparc 64-bit C return-value convention. -def RetCC_Sparc64 : CallingConv<[ - CCIfType<[i32], CCPromoteToType<i64>>, - CCIfType<[i64], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>, - CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>, - CCIfType<[f64], CCAssignToReg<[D0, D1]>> -]>; - -// Sparc 32-bit C Calling convention. def CC_Sparc32 : CallingConv<[ //Custom assign SRet to [sp+64]. CCIfSRet<CCCustom<"CC_Sparc_Assign_SRet">>, @@ -43,14 +27,93 @@ def CC_Sparc32 : CallingConv<[ CCAssignToStack<4, 4> ]>; -// Sparc 64-bit C Calling convention. +def RetCC_Sparc32 : CallingConv<[ + CCIfType<[i32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>, + CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>, + CCIfType<[f64], CCAssignToReg<[D0, D1]>> +]>; + + +//===----------------------------------------------------------------------===// +// SPARC v9 64-bit. +//===----------------------------------------------------------------------===// +// +// The 64-bit ABI conceptually assigns all function arguments to a parameter +// array starting at [%fp+BIAS+128] in the callee's stack frame. All arguments +// occupy a multiple of 8 bytes in the array. Integer arguments are extended to +// 64 bits by the caller. Floats are right-aligned in their 8-byte slot, the +// first 4 bytes in the slot are undefined. +// +// The integer registers %i0 to %i5 shadow the first 48 bytes of the parameter +// array at fixed offsets. Integer arguments are promoted to registers when +// possible. +// +// The floating point registers %f0 to %f31 shadow the first 128 bytes of the +// parameter array at fixed offsets. Float and double parameters are promoted +// to these registers when possible. +// +// Structs up to 16 bytes in size are passed by value. They are right-aligned +// in one or two 8-byte slots in the parameter array. Struct members are +// promoted to both floating point and integer registers when possible. A +// struct containing two floats would thus be passed in %f0 and %f1, while two +// float function arguments would occupy 8 bytes each, and be passed in %f1 and +// %f3. +// +// When a struct { int, float } is passed by value, the int goes in the high +// bits of an integer register while the float goes in a floating point +// register. +// +// The difference is encoded in LLVM IR using the inreg atttribute on function +// arguments: +// +// C: void f(float, float); +// IR: declare void f(float %f1, float %f3) +// +// C: void f(struct { float f0, f1; }); +// IR: declare void f(float inreg %f0, float inreg %f1) +// +// C: void f(int, float); +// IR: declare void f(int signext %i0, float %f3) +// +// C: void f(struct { int i0high; float f1; }); +// IR: declare void f(i32 inreg %i0high, float inreg %f1) +// +// Two ints in a struct are simply coerced to i64: +// +// C: void f(struct { int i0high, i0low; }); +// IR: declare void f(i64 %i0.coerced) +// +// The frontend and backend divide the task of producing ABI compliant code for +// C functions. The C frontend will: +// +// - Annotate integer arguments with zeroext or signext attributes. +// +// - Split structs into one or two 64-bit sized chunks, or 32-bit chunks with +// inreg attributes. +// +// - Pass structs larger than 16 bytes indirectly with an explicit pointer +// argument. The byval attribute is not used. +// +// The backend will: +// +// - Assign all arguments to 64-bit aligned stack slots, 32-bits for inreg. +// +// - Promote to integer or floating point registers depending on type. +// +// Function return values are passed exactly like function arguments, except a +// struct up to 32 bytes in size can be returned in registers. + +// Function arguments AND return values. def CC_Sparc64 : CallingConv<[ + // The frontend uses the inreg flag to indicate i32 and float arguments from + // structs. These arguments are not promoted to 64 bits, but they can still + // be assigned to integer and float registers. + CCIfInReg<CCIfType<[i32, f32], CCCustom<"CC_Sparc64_Half">>>, + // All integers are promoted to i64 by the caller. CCIfType<[i32], CCPromoteToType<i64>>, - // Integer arguments get passed in integer registers if there is space. - CCIfType<[i64], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>, - // FIXME: Floating point arguments. - // Alternatively, they are assigned to the stack in 8-byte aligned units. - CCAssignToStack<8, 8> + // Custom assignment is required because stack space is reserved for all + // arguments whether they are passed in registers or not. + CCCustom<"CC_Sparc64_Full"> ]>; diff --git a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp index a0dae6e..7874240 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp +++ b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp @@ -37,18 +37,27 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const { // Get the number of bytes to allocate from the FrameInfo int NumBytes = (int) MFI->getStackSize(); - // Emit the correct save instruction based on the number of bytes in - // the frame. Minimum stack frame size according to V8 ABI is: - // 16 words for register window spill - // 1 word for address of returned aggregate-value - // + 6 words for passing parameters on the stack - // ---------- - // 23 words * 4 bytes per word = 92 bytes - NumBytes += 92; + if (SubTarget.is64Bit()) { + // All 64-bit stack frames must be 16-byte aligned, and must reserve space + // for spilling the 16 window registers at %sp+BIAS..%sp+BIAS+128. + NumBytes += 128; + // Frames with calls must also reserve space for 6 outgoing arguments + // whether they are used or not. LowerCall_64 takes care of that. + assert(NumBytes % 16 == 0 && "Stack size not 16-byte aligned"); + } else { + // Emit the correct save instruction based on the number of bytes in + // the frame. Minimum stack frame size according to V8 ABI is: + // 16 words for register window spill + // 1 word for address of returned aggregate-value + // + 6 words for passing parameters on the stack + // ---------- + // 23 words * 4 bytes per word = 92 bytes + NumBytes += 92; - // Round up to next doubleword boundary -- a double-word boundary - // is required by the ABI. - NumBytes = (NumBytes + 7) & ~7; + // Round up to next doubleword boundary -- a double-word boundary + // is required by the ABI. + NumBytes = RoundUpToAlignment(NumBytes, 8); + } NumBytes = -NumBytes; if (NumBytes >= -4096) { @@ -70,15 +79,18 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const { void SparcFrameLowering:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { - MachineInstr &MI = *I; - DebugLoc dl = MI.getDebugLoc(); - int Size = MI.getOperand(0).getImm(); - if (MI.getOpcode() == SP::ADJCALLSTACKDOWN) - Size = -Size; - const SparcInstrInfo &TII = - *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo()); - if (Size) - BuildMI(MBB, I, dl, TII.get(SP::ADDri), SP::O6).addReg(SP::O6).addImm(Size); + if (!hasReservedCallFrame(MF)) { + MachineInstr &MI = *I; + DebugLoc DL = MI.getDebugLoc(); + int Size = MI.getOperand(0).getImm(); + if (MI.getOpcode() == SP::ADJCALLSTACKDOWN) + Size = -Size; + const SparcInstrInfo &TII = + *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo()); + if (Size) + BuildMI(MBB, I, DL, TII.get(SP::ADDri), SP::O6).addReg(SP::O6) + .addImm(Size); + } MBB.erase(I); } diff --git a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h index 464233e..c375662 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h +++ b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h @@ -22,10 +22,12 @@ namespace llvm { class SparcSubtarget; class SparcFrameLowering : public TargetFrameLowering { + const SparcSubtarget &SubTarget; public: - explicit SparcFrameLowering(const SparcSubtarget &/*sti*/) - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, 0) { - } + explicit SparcFrameLowering(const SparcSubtarget &ST) + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, + ST.is64Bit() ? 16 : 8, 0, ST.is64Bit() ? 16 : 8), + SubTarget(ST) {} /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp index 5fa545d..a709685 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp @@ -73,7 +73,7 @@ SDNode* SparcDAGToDAGISel::getGlobalBaseReg() { bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr, SDValue &Base, SDValue &Offset) { if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), TLI.getPointerTy()); Offset = CurDAG->getTargetConstant(0, MVT::i32); return true; } @@ -87,7 +87,8 @@ bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr, if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) { // Constant offset from frame ref. - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), + TLI.getPointerTy()); } else { Base = Addr.getOperand(0); } @@ -130,7 +131,7 @@ bool SparcDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) { } R1 = Addr; - R2 = CurDAG->getRegister(SP::G0, MVT::i32); + R2 = CurDAG->getRegister(SP::G0, TLI.getPointerTy()); return true; } @@ -146,6 +147,9 @@ SDNode *SparcDAGToDAGISel::Select(SDNode *N) { case ISD::SDIV: case ISD::UDIV: { + // sdivx / udivx handle 64-bit divides. + if (N->getValueType(0) == MVT::i64) + break; // FIXME: should use a custom expander to expose the SRA to the dag. SDValue DivLHS = N->getOperand(0); SDValue DivRHS = N->getOperand(1); diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp index 325f134..3863e2c 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp +++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp @@ -15,6 +15,7 @@ #include "SparcISelLowering.h" #include "SparcMachineFunctionInfo.h" #include "SparcTargetMachine.h" +#include "MCTargetDesc/SparcBaseInfo.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -74,27 +75,118 @@ static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT, return true; } +// Allocate a full-sized argument for the 64-bit ABI. +static bool CC_Sparc64_Full(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) { + assert((LocVT == MVT::f32 || LocVT.getSizeInBits() == 64) && + "Can't handle non-64 bits locations"); + + // Stack space is allocated for all arguments starting from [%fp+BIAS+128]. + unsigned Offset = State.AllocateStack(8, 8); + unsigned Reg = 0; + + if (LocVT == MVT::i64 && Offset < 6*8) + // Promote integers to %i0-%i5. + Reg = SP::I0 + Offset/8; + else if (LocVT == MVT::f64 && Offset < 16*8) + // Promote doubles to %d0-%d30. (Which LLVM calls D0-D15). + Reg = SP::D0 + Offset/8; + else if (LocVT == MVT::f32 && Offset < 16*8) + // Promote floats to %f1, %f3, ... + Reg = SP::F1 + Offset/4; + + // Promote to register when possible, otherwise use the stack slot. + if (Reg) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return true; + } + + // This argument goes on the stack in an 8-byte slot. + // When passing floats, LocVT is smaller than 8 bytes. Adjust the offset to + // the right-aligned float. The first 4 bytes of the stack slot are undefined. + if (LocVT == MVT::f32) + Offset += 4; + + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return true; +} + +// Allocate a half-sized argument for the 64-bit ABI. +// +// This is used when passing { float, int } structs by value in registers. +static bool CC_Sparc64_Half(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) { + assert(LocVT.getSizeInBits() == 32 && "Can't handle non-32 bits locations"); + unsigned Offset = State.AllocateStack(4, 4); + + if (LocVT == MVT::f32 && Offset < 16*8) { + // Promote floats to %f0-%f31. + State.addLoc(CCValAssign::getReg(ValNo, ValVT, SP::F0 + Offset/4, + LocVT, LocInfo)); + return true; + } + + if (LocVT == MVT::i32 && Offset < 6*8) { + // Promote integers to %i0-%i5, using half the register. + unsigned Reg = SP::I0 + Offset/8; + LocVT = MVT::i64; + LocInfo = CCValAssign::AExt; + + // Set the Custom bit if this i32 goes in the high bits of a register. + if (Offset % 8 == 0) + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, + LocVT, LocInfo)); + else + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return true; + } + + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return true; +} + #include "SparcGenCallingConv.inc" +// The calling conventions in SparcCallingConv.td are described in terms of the +// callee's register window. This function translates registers to the +// corresponding caller window %o register. +static unsigned toCallerWindow(unsigned Reg) { + assert(SP::I0 + 7 == SP::I7 && SP::O0 + 7 == SP::O7 && "Unexpected enum"); + if (Reg >= SP::I0 && Reg <= SP::I7) + return Reg - SP::I0 + SP::O0; + return Reg; +} + SDValue SparcTargetLowering::LowerReturn(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, + CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, - DebugLoc dl, SelectionDAG &DAG) const { + DebugLoc DL, SelectionDAG &DAG) const { + if (Subtarget->is64Bit()) + return LowerReturn_64(Chain, CallConv, IsVarArg, Outs, OutVals, DL, DAG); + return LowerReturn_32(Chain, CallConv, IsVarArg, Outs, OutVals, DL, DAG); +} +SDValue +SparcTargetLowering::LowerReturn_32(SDValue Chain, + CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + DebugLoc DL, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); // CCValAssign - represent the assignment of the return value to locations. SmallVector<CCValAssign, 16> RVLocs; // CCState - Info about the registers and stack slot. - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), DAG.getTarget(), RVLocs, *DAG.getContext()); - // Analize return values. - CCInfo.AnalyzeReturn(Outs, Subtarget->is64Bit() ? - RetCC_Sparc64 : RetCC_Sparc32); + // Analyze return values. + CCInfo.AnalyzeReturn(Outs, RetCC_Sparc32); SDValue Flag; SmallVector<SDValue, 4> RetOps(1, Chain); @@ -106,7 +198,7 @@ SparcTargetLowering::LowerReturn(SDValue Chain, CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVals[i], Flag); // Guarantee that all emitted copies are stuck together with flags. @@ -121,8 +213,8 @@ SparcTargetLowering::LowerReturn(SDValue Chain, unsigned Reg = SFI->getSRetReturnReg(); if (!Reg) llvm_unreachable("sret virtual register not created in the entry block"); - SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy()); - Chain = DAG.getCopyToReg(Chain, dl, SP::I0, Val, Flag); + SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy()); + Chain = DAG.getCopyToReg(Chain, DL, SP::I0, Val, Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(SP::I0, getPointerTy())); RetAddrOffset = 12; // CallInst + Delay Slot + Unimp @@ -135,7 +227,85 @@ SparcTargetLowering::LowerReturn(SDValue Chain, if (Flag.getNode()) RetOps.push_back(Flag); - return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, + return DAG.getNode(SPISD::RET_FLAG, DL, MVT::Other, + &RetOps[0], RetOps.size()); +} + +// Lower return values for the 64-bit ABI. +// Return values are passed the exactly the same way as function arguments. +SDValue +SparcTargetLowering::LowerReturn_64(SDValue Chain, + CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + DebugLoc DL, SelectionDAG &DAG) const { + // CCValAssign - represent the assignment of the return value to locations. + SmallVector<CCValAssign, 16> RVLocs; + + // CCState - Info about the registers and stack slot. + CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), + DAG.getTarget(), RVLocs, *DAG.getContext()); + + // Analyze return values. + CCInfo.AnalyzeReturn(Outs, CC_Sparc64); + + SDValue Flag; + SmallVector<SDValue, 4> RetOps(1, Chain); + + // The second operand on the return instruction is the return address offset. + // The return address is always %i7+8 with the 64-bit ABI. + RetOps.push_back(DAG.getConstant(8, MVT::i32)); + + // Copy the result values into the output registers. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + SDValue OutVal = OutVals[i]; + + // Integer return values must be sign or zero extended by the callee. + switch (VA.getLocInfo()) { + case CCValAssign::SExt: + OutVal = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), OutVal); + break; + case CCValAssign::ZExt: + OutVal = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), OutVal); + break; + case CCValAssign::AExt: + OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal); + default: + break; + } + + // The custom bit on an i32 return value indicates that it should be passed + // in the high bits of the register. + if (VA.getValVT() == MVT::i32 && VA.needsCustom()) { + OutVal = DAG.getNode(ISD::SHL, DL, MVT::i64, OutVal, + DAG.getConstant(32, MVT::i32)); + + // The next value may go in the low bits of the same register. + // Handle both at once. + if (i+1 < RVLocs.size() && RVLocs[i+1].getLocReg() == VA.getLocReg()) { + SDValue NV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, OutVals[i+1]); + OutVal = DAG.getNode(ISD::OR, DL, MVT::i64, OutVal, NV); + // Skip the next value, it's already done. + ++i; + } + } + + Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Flag); + + // Guarantee that all emitted copies are stuck together with flags. + Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); + } + + RetOps[0] = Chain; // Update chain. + + // Add the flag if we have it. + if (Flag.getNode()) + RetOps.push_back(Flag); + + return DAG.getNode(SPISD::RET_FLAG, DL, MVT::Other, &RetOps[0], RetOps.size()); } @@ -373,6 +543,9 @@ LowerFormalArguments_64(SDValue Chain, getTargetMachine(), ArgLocs, *DAG.getContext()); CCInfo.AnalyzeFormalArguments(Ins, CC_Sparc64); + // The argument array begins at %fp+BIAS+128, after the register save area. + const unsigned ArgArea = 128; + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; if (VA.isRegLoc()) { @@ -384,6 +557,11 @@ LowerFormalArguments_64(SDValue Chain, getRegClassFor(VA.getLocVT())); SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT()); + // Get the high bits for i32 struct elements. + if (VA.getValVT() == MVT::i32 && VA.needsCustom()) + Arg = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Arg, + DAG.getConstant(32, MVT::i32)); + // The caller promoted the argument, so insert an Assert?ext SDNode so we // won't promote the value again in this function. switch (VA.getLocInfo()) { @@ -409,13 +587,71 @@ LowerFormalArguments_64(SDValue Chain, // The registers are exhausted. This argument was passed on the stack. assert(VA.isMemLoc()); + // The CC_Sparc64_Full/Half functions compute stack offsets relative to the + // beginning of the arguments area at %fp+BIAS+128. + unsigned Offset = VA.getLocMemOffset() + ArgArea; + unsigned ValSize = VA.getValVT().getSizeInBits() / 8; + // Adjust offset for extended arguments, SPARC is big-endian. + // The caller will have written the full slot with extended bytes, but we + // prefer our own extending loads. + if (VA.isExtInLoc()) + Offset += 8 - ValSize; + int FI = MF.getFrameInfo()->CreateFixedObject(ValSize, Offset, true); + InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, + DAG.getFrameIndex(FI, getPointerTy()), + MachinePointerInfo::getFixedStack(FI), + false, false, false, 0)); } + + if (!IsVarArg) + return Chain; + + // This function takes variable arguments, some of which may have been passed + // in registers %i0-%i5. Variable floating point arguments are never passed + // in floating point registers. They go on %i0-%i5 or on the stack like + // integer arguments. + // + // The va_start intrinsic needs to know the offset to the first variable + // argument. + unsigned ArgOffset = CCInfo.getNextStackOffset(); + SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>(); + // Skip the 128 bytes of register save area. + FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgArea + + Subtarget->getStackPointerBias()); + + // Save the variable arguments that were passed in registers. + // The caller is required to reserve stack space for 6 arguments regardless + // of how many arguments were actually passed. + SmallVector<SDValue, 8> OutChains; + for (; ArgOffset < 6*8; ArgOffset += 8) { + unsigned VReg = MF.addLiveIn(SP::I0 + ArgOffset/8, &SP::I64RegsRegClass); + SDValue VArg = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64); + int FI = MF.getFrameInfo()->CreateFixedObject(8, ArgOffset + ArgArea, true); + OutChains.push_back(DAG.getStore(Chain, DL, VArg, + DAG.getFrameIndex(FI, getPointerTy()), + MachinePointerInfo::getFixedStack(FI), + false, false, 0)); + } + + if (!OutChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + &OutChains[0], OutChains.size()); + return Chain; } SDValue SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const { + if (Subtarget->is64Bit()) + return LowerCall_64(CLI, InVals); + return LowerCall_32(CLI, InVals); +} + +// Lower a call for the 32-bit ABI. +SDValue +SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const { SelectionDAG &DAG = CLI.DAG; DebugLoc &dl = CLI.DL; SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs; @@ -618,11 +854,7 @@ SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // stuck together. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - unsigned Reg = RegsToPass[i].first; - // Remap I0->I7 -> O0->O7. - if (Reg >= SP::I0 && Reg <= SP::I7) - Reg = Reg-SP::I0+SP::O0; - + unsigned Reg = toCallerWindow(RegsToPass[i].first); Chain = DAG.getCopyToReg(Chain, dl, Reg, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } @@ -644,13 +876,9 @@ SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Ops.push_back(Callee); if (hasStructRetAttr) Ops.push_back(DAG.getTargetConstant(SRetArgSize, MVT::i32)); - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - unsigned Reg = RegsToPass[i].first; - if (Reg >= SP::I0 && Reg <= SP::I7) - Reg = Reg-SP::I0+SP::O0; - - Ops.push_back(DAG.getRegister(Reg, RegsToPass[i].second.getValueType())); - } + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(DAG.getRegister(toCallerWindow(RegsToPass[i].first), + RegsToPass[i].second.getValueType())); if (InFlag.getNode()) Ops.push_back(InFlag); @@ -670,13 +898,7 @@ SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { - unsigned Reg = RVLocs[i].getLocReg(); - - // Remap I0->I7 -> O0->O7. - if (Reg >= SP::I0 && Reg <= SP::I7) - Reg = Reg-SP::I0+SP::O0; - - Chain = DAG.getCopyFromReg(Chain, dl, Reg, + Chain = DAG.getCopyFromReg(Chain, dl, toCallerWindow(RVLocs[i].getLocReg()), RVLocs[i].getValVT(), InFlag).getValue(1); InFlag = Chain.getValue(2); InVals.push_back(Chain.getValue(0)); @@ -709,6 +931,259 @@ SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const return getDataLayout()->getTypeAllocSize(ElementTy); } + +// Fixup floating point arguments in the ... part of a varargs call. +// +// The SPARC v9 ABI requires that floating point arguments are treated the same +// as integers when calling a varargs function. This does not apply to the +// fixed arguments that are part of the function's prototype. +// +// This function post-processes a CCValAssign array created by +// AnalyzeCallOperands(). +static void fixupVariableFloatArgs(SmallVectorImpl<CCValAssign> &ArgLocs, + ArrayRef<ISD::OutputArg> Outs) { + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + const CCValAssign &VA = ArgLocs[i]; + // FIXME: What about f32 arguments? C promotes them to f64 when calling + // varargs functions. + if (!VA.isRegLoc() || VA.getLocVT() != MVT::f64) + continue; + // The fixed arguments to a varargs function still go in FP registers. + if (Outs[VA.getValNo()].IsFixed) + continue; + + // This floating point argument should be reassigned. + CCValAssign NewVA; + + // Determine the offset into the argument array. + unsigned Offset = 8 * (VA.getLocReg() - SP::D0); + assert(Offset < 16*8 && "Offset out of range, bad register enum?"); + + if (Offset < 6*8) { + // This argument should go in %i0-%i5. + unsigned IReg = SP::I0 + Offset/8; + // Full register, just bitconvert into i64. + NewVA = CCValAssign::getReg(VA.getValNo(), VA.getValVT(), + IReg, MVT::i64, CCValAssign::BCvt); + } else { + // This needs to go to memory, we're out of integer registers. + NewVA = CCValAssign::getMem(VA.getValNo(), VA.getValVT(), + Offset, VA.getLocVT(), VA.getLocInfo()); + } + ArgLocs[i] = NewVA; + } +} + +// Lower a call for the 64-bit ABI. +SDValue +SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const { + SelectionDAG &DAG = CLI.DAG; + DebugLoc DL = CLI.DL; + SDValue Chain = CLI.Chain; + + // Analyze operands of the call, assigning locations to each operand. + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), + DAG.getTarget(), ArgLocs, *DAG.getContext()); + CCInfo.AnalyzeCallOperands(CLI.Outs, CC_Sparc64); + + // Get the size of the outgoing arguments stack space requirement. + // The stack offset computed by CC_Sparc64 includes all arguments. + // Called functions expect 6 argument words to exist in the stack frame, used + // or not. + unsigned ArgsSize = std::max(6*8u, CCInfo.getNextStackOffset()); + + // Keep stack frames 16-byte aligned. + ArgsSize = RoundUpToAlignment(ArgsSize, 16); + + // Varargs calls require special treatment. + if (CLI.IsVarArg) + fixupVariableFloatArgs(ArgLocs, CLI.Outs); + + // Adjust the stack pointer to make room for the arguments. + // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls + // with more than 6 arguments. + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true)); + + // Collect the set of registers to pass to the function and their values. + // This will be emitted as a sequence of CopyToReg nodes glued to the call + // instruction. + SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; + + // Collect chains from all the memory opeations that copy arguments to the + // stack. They must follow the stack pointer adjustment above and precede the + // call instruction itself. + SmallVector<SDValue, 8> MemOpChains; + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + const CCValAssign &VA = ArgLocs[i]; + SDValue Arg = CLI.OutVals[i]; + + // Promote the value if needed. + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unknown location info!"); + case CCValAssign::Full: + break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg); + break; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg); + break; + case CCValAssign::BCvt: + Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg); + break; + } + + if (VA.isRegLoc()) { + // The custom bit on an i32 return value indicates that it should be + // passed in the high bits of the register. + if (VA.getValVT() == MVT::i32 && VA.needsCustom()) { + Arg = DAG.getNode(ISD::SHL, DL, MVT::i64, Arg, + DAG.getConstant(32, MVT::i32)); + + // The next value may go in the low bits of the same register. + // Handle both at once. + if (i+1 < ArgLocs.size() && ArgLocs[i+1].isRegLoc() && + ArgLocs[i+1].getLocReg() == VA.getLocReg()) { + SDValue NV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, + CLI.OutVals[i+1]); + Arg = DAG.getNode(ISD::OR, DL, MVT::i64, Arg, NV); + // Skip the next value, it's already done. + ++i; + } + } + RegsToPass.push_back(std::make_pair(toCallerWindow(VA.getLocReg()), Arg)); + continue; + } + + assert(VA.isMemLoc()); + + // Create a store off the stack pointer for this argument. + SDValue StackPtr = DAG.getRegister(SP::O6, getPointerTy()); + // The argument area starts at %fp+BIAS+128 in the callee frame, + // %sp+BIAS+128 in ours. + SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset() + + Subtarget->getStackPointerBias() + + 128); + PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff); + MemOpChains.push_back(DAG.getStore(Chain, DL, Arg, PtrOff, + MachinePointerInfo(), + false, false, 0)); + } + + // Emit all stores, make sure they occur before the call. + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + + // Build a sequence of CopyToReg nodes glued together with token chain and + // glue operands which copy the outgoing args into registers. The InGlue is + // necessary since all emitted instructions must be stuck together in order + // to pass the live physical registers. + SDValue InGlue; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, DL, + RegsToPass[i].first, RegsToPass[i].second, InGlue); + InGlue = Chain.getValue(1); + } + + // If the callee is a GlobalAddress node (quite common, every direct call is) + // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. + // Likewise ExternalSymbol -> TargetExternalSymbol. + SDValue Callee = CLI.Callee; + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, getPointerTy()); + else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) + Callee = DAG.getTargetExternalSymbol(E->getSymbol(), getPointerTy()); + + // Build the operands for the call instruction itself. + SmallVector<SDValue, 8> Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + // Make sure the CopyToReg nodes are glued to the call instruction which + // consumes the registers. + if (InGlue.getNode()) + Ops.push_back(InGlue); + + // Now the call itself. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + Chain = DAG.getNode(SPISD::CALL, DL, NodeTys, &Ops[0], Ops.size()); + InGlue = Chain.getValue(1); + + // Revert the stack pointer immediately after the call. + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true), + DAG.getIntPtrConstant(0, true), InGlue); + InGlue = Chain.getValue(1); + + // Now extract the return values. This is more or less the same as + // LowerFormalArguments_64. + + // Assign locations to each value returned by this call. + SmallVector<CCValAssign, 16> RVLocs; + CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), + DAG.getTarget(), RVLocs, *DAG.getContext()); + RVInfo.AnalyzeCallResult(CLI.Ins, CC_Sparc64); + + // Copy all of the result registers out of their specified physreg. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + unsigned Reg = toCallerWindow(VA.getLocReg()); + + // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can + // reside in the same register in the high and low bits. Reuse the + // CopyFromReg previous node to avoid duplicate copies. + SDValue RV; + if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Chain.getOperand(1))) + if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg) + RV = Chain.getValue(0); + + // But usually we'll create a new CopyFromReg for a different register. + if (!RV.getNode()) { + RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue); + Chain = RV.getValue(1); + InGlue = Chain.getValue(2); + } + + // Get the high bits for i32 struct elements. + if (VA.getValVT() == MVT::i32 && VA.needsCustom()) + RV = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), RV, + DAG.getConstant(32, MVT::i32)); + + // The callee promoted the return value, so insert an Assert?ext SDNode so + // we won't promote the value again in this function. + switch (VA.getLocInfo()) { + case CCValAssign::SExt: + RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV, + DAG.getValueType(VA.getValVT())); + break; + case CCValAssign::ZExt: + RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV, + DAG.getValueType(VA.getValVT())); + break; + default: + break; + } + + // Truncate the register down to the return value type. + if (VA.isExtInLoc()) + RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV); + + InVals.push_back(RV); + } + + return Chain; +} + //===----------------------------------------------------------------------===// // TargetLowering Implementation //===----------------------------------------------------------------------===// @@ -778,9 +1253,9 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) setTruncStoreAction(MVT::f64, MVT::f32, Expand); // Custom legalize GlobalAddress nodes into LO/HI parts. - setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); - setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); - setOperationAction(ISD::ConstantPool , MVT::i32, Custom); + setOperationAction(ISD::GlobalAddress, getPointerTy(), Custom); + setOperationAction(ISD::GlobalTLSAddress, getPointerTy(), Custom); + setOperationAction(ISD::ConstantPool, getPointerTy(), Custom); // Sparc doesn't have sext_inreg, replace them with shl/sra setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); @@ -831,7 +1306,6 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) // FIXME: There are instructions available for ATOMIC_FENCE // on SparcV8 and later. - setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); setOperationAction(ISD::FSIN , MVT::f64, Expand); @@ -965,46 +1439,89 @@ static void LookThroughSetCC(SDValue &LHS, SDValue &RHS, } } +// Convert to a target node and set target flags. +SDValue SparcTargetLowering::withTargetFlags(SDValue Op, unsigned TF, + SelectionDAG &DAG) const { + if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) + return DAG.getTargetGlobalAddress(GA->getGlobal(), + GA->getDebugLoc(), + GA->getValueType(0), + GA->getOffset(), TF); + + if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) + return DAG.getTargetConstantPool(CP->getConstVal(), + CP->getValueType(0), + CP->getAlignment(), + CP->getOffset(), TF); + + if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) + return DAG.getTargetExternalSymbol(ES->getSymbol(), + ES->getValueType(0), TF); + + llvm_unreachable("Unhandled address SDNode"); +} + +// Split Op into high and low parts according to HiTF and LoTF. +// Return an ADD node combining the parts. +SDValue SparcTargetLowering::makeHiLoPair(SDValue Op, + unsigned HiTF, unsigned LoTF, + SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + EVT VT = Op.getValueType(); + SDValue Hi = DAG.getNode(SPISD::Hi, DL, VT, withTargetFlags(Op, HiTF, DAG)); + SDValue Lo = DAG.getNode(SPISD::Lo, DL, VT, withTargetFlags(Op, LoTF, DAG)); + return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo); +} + +// Build SDNodes for producing an address from a GlobalAddress, ConstantPool, +// or ExternalSymbol SDNode. +SDValue SparcTargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + EVT VT = getPointerTy(); + + // Handle PIC mode first. + if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { + // This is the pic32 code model, the GOT is known to be smaller than 4GB. + SDValue HiLo = makeHiLoPair(Op, SPII::MO_HI, SPII::MO_LO, DAG); + SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, DL, VT); + SDValue AbsAddr = DAG.getNode(ISD::ADD, DL, VT, GlobalBase, HiLo); + return DAG.getLoad(VT, DL, DAG.getEntryNode(), AbsAddr, + MachinePointerInfo::getGOT(), false, false, false, 0); + } + + // This is one of the absolute code models. + switch(getTargetMachine().getCodeModel()) { + default: + llvm_unreachable("Unsupported absolute code model"); + case CodeModel::Small: + // abs32. + return makeHiLoPair(Op, SPII::MO_HI, SPII::MO_LO, DAG); + case CodeModel::Medium: { + // abs44. + SDValue H44 = makeHiLoPair(Op, SPII::MO_H44, SPII::MO_M44, DAG); + H44 = DAG.getNode(ISD::SHL, DL, VT, H44, DAG.getConstant(12, MVT::i32)); + SDValue L44 = withTargetFlags(Op, SPII::MO_L44, DAG); + L44 = DAG.getNode(SPISD::Lo, DL, VT, L44); + return DAG.getNode(ISD::ADD, DL, VT, H44, L44); + } + case CodeModel::Large: { + // abs64. + SDValue Hi = makeHiLoPair(Op, SPII::MO_HH, SPII::MO_HM, DAG); + Hi = DAG.getNode(ISD::SHL, DL, VT, Hi, DAG.getConstant(32, MVT::i32)); + SDValue Lo = makeHiLoPair(Op, SPII::MO_HI, SPII::MO_LO, DAG); + return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo); + } + } +} + SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { - const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); - // FIXME there isn't really any debug info here - DebugLoc dl = Op.getDebugLoc(); - SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32); - SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, GA); - SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, GA); - - if (getTargetMachine().getRelocationModel() != Reloc::PIC_) - return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi); - - SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl, - getPointerTy()); - SDValue RelAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi); - SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, - GlobalBase, RelAddr); - return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - AbsAddr, MachinePointerInfo(), false, false, false, 0); + return makeAddress(Op, DAG); } SDValue SparcTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { - ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); - // FIXME there isn't really any debug info here - DebugLoc dl = Op.getDebugLoc(); - const Constant *C = N->getConstVal(); - SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment()); - SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, CP); - SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, CP); - if (getTargetMachine().getRelocationModel() != Reloc::PIC_) - return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi); - - SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl, - getPointerTy()); - SDValue RelAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi); - SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, - GlobalBase, RelAddr); - return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - AbsAddr, MachinePointerInfo(), false, false, false, 0); + return makeAddress(Op, DAG); } static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { @@ -1092,14 +1609,13 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. - DebugLoc dl = Op.getDebugLoc(); + DebugLoc DL = Op.getDebugLoc(); SDValue Offset = - DAG.getNode(ISD::ADD, dl, MVT::i32, - DAG.getRegister(SP::I6, MVT::i32), - DAG.getConstant(FuncInfo->getVarArgsFrameOffset(), - MVT::i32)); + DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), + DAG.getRegister(SP::I6, TLI.getPointerTy()), + DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset())); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); - return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1), + return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1), MachinePointerInfo(SV), false, false, 0); } @@ -1108,33 +1624,22 @@ static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) { EVT VT = Node->getValueType(0); SDValue InChain = Node->getOperand(0); SDValue VAListPtr = Node->getOperand(1); + EVT PtrVT = VAListPtr.getValueType(); const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); - DebugLoc dl = Node->getDebugLoc(); - SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr, + DebugLoc DL = Node->getDebugLoc(); + SDValue VAList = DAG.getLoad(PtrVT, DL, InChain, VAListPtr, MachinePointerInfo(SV), false, false, false, 0); - // Increment the pointer, VAList, to the next vaarg - SDValue NextPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, VAList, - DAG.getConstant(VT.getSizeInBits()/8, - MVT::i32)); - // Store the incremented VAList to the legalized pointer - InChain = DAG.getStore(VAList.getValue(1), dl, NextPtr, + // Increment the pointer, VAList, to the next vaarg. + SDValue NextPtr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, + DAG.getIntPtrConstant(VT.getSizeInBits()/8)); + // Store the incremented VAList to the legalized pointer. + InChain = DAG.getStore(VAList.getValue(1), DL, NextPtr, VAListPtr, MachinePointerInfo(SV), false, false, 0); - // Load the actual argument out of the pointer VAList, unless this is an - // f64 load. - if (VT != MVT::f64) - return DAG.getLoad(VT, dl, InChain, VAList, MachinePointerInfo(), - false, false, false, 0); - - // Otherwise, load it as i64, then do a bitconvert. - SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, MachinePointerInfo(), - false, false, false, 0); - - // Bit-Convert the value to f64. - SDValue Ops[2] = { - DAG.getNode(ISD::BITCAST, dl, MVT::f64, V), - V.getValue(1) - }; - return DAG.getMergeValues(Ops, 2, dl); + // Load the actual argument out of the pointer VAList. + // We can't count on greater alignment than the word size. + return DAG.getLoad(VT, DL, InChain, VAList, MachinePointerInfo(), + false, false, false, + std::min(PtrVT.getSizeInBits(), VT.getSizeInBits())/8); } static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) { diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h index aa2ef71..fd706be 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h +++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h @@ -71,6 +71,7 @@ namespace llvm { getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; + virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; } virtual SDValue LowerFormalArguments(SDValue Chain, @@ -95,6 +96,10 @@ namespace llvm { virtual SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const; + SDValue LowerCall_32(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const; + SDValue LowerCall_64(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerReturn(SDValue Chain, @@ -102,11 +107,25 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; + SDValue LowerReturn_32(SDValue Chain, + CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + DebugLoc DL, SelectionDAG &DAG) const; + SDValue LowerReturn_64(SDValue Chain, + CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + DebugLoc DL, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; unsigned getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const; + SDValue withTargetFlags(SDValue Op, unsigned TF, SelectionDAG &DAG) const; + SDValue makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF, + SelectionDAG &DAG) const; + SDValue makeAddress(SDValue Op, SelectionDAG &DAG) const; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstr64Bit.td b/contrib/llvm/lib/Target/Sparc/SparcInstr64Bit.td index ca1153b..91805f9 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcInstr64Bit.td +++ b/contrib/llvm/lib/Target/Sparc/SparcInstr64Bit.td @@ -40,6 +40,9 @@ let Predicates = [Is64Bit] in { def : Pat<(i64 (zext i32:$val)), (SRLri $val, 0)>; def : Pat<(i64 (sext i32:$val)), (SRAri $val, 0)>; +def : Pat<(i64 (and i64:$val, 0xffffffff)), (SRLri $val, 0)>; +def : Pat<(i64 (sext_inreg i64:$val, i32)), (SRAri $val, 0)>; + defm SLLX : F3_S<"sllx", 0b100101, 1, shl, i64, I64Regs>; defm SRLX : F3_S<"srlx", 0b100110, 1, srl, i64, I64Regs>; defm SRAX : F3_S<"srax", 0b100111, 1, sra, i64, I64Regs>; @@ -130,7 +133,7 @@ def HM10 : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(Val, MVT::i32); }]>; def : Pat<(i64 imm:$val), - (ORrr (SLLXri (ORri (SETHIi (HH22 $val)), (HM10 $val)), (i64 32)), + (ORrr (SLLXri (ORri (SETHIi (HH22 $val)), (HM10 $val)), (i32 32)), (ORri (SETHIi (HI22 $val)), (LO10 $val)))>, Requires<[Is64Bit]>; @@ -178,6 +181,45 @@ def : Pat<(SPcmpicc i64:$a, (i64 simm13:$b)), (SUBCCri $a, (as_i32imm $b))>; //===----------------------------------------------------------------------===// +// 64-bit Integer Multiply and Divide. +//===----------------------------------------------------------------------===// + +let Predicates = [Is64Bit] in { + +def MULXrr : F3_1<2, 0b001001, + (outs I64Regs:$rd), (ins I64Regs:$rs1, I64Regs:$rs2), + "mulx $rs1, $rs2, $rd", + [(set i64:$rd, (mul i64:$rs1, i64:$rs2))]>; +def MULXri : F3_2<2, 0b001001, + (outs IntRegs:$rd), (ins IntRegs:$rs1, i64imm:$i), + "mulx $rs1, $i, $rd", + [(set i64:$rd, (mul i64:$rs1, (i64 simm13:$i)))]>; + +// Division can trap. +let hasSideEffects = 1 in { +def SDIVXrr : F3_1<2, 0b101101, + (outs I64Regs:$rd), (ins I64Regs:$rs1, I64Regs:$rs2), + "sdivx $rs1, $rs2, $rd", + [(set i64:$rd, (sdiv i64:$rs1, i64:$rs2))]>; +def SDIVXri : F3_2<2, 0b101101, + (outs IntRegs:$rd), (ins IntRegs:$rs1, i64imm:$i), + "sdivx $rs1, $i, $rd", + [(set i64:$rd, (sdiv i64:$rs1, (i64 simm13:$i)))]>; + +def UDIVXrr : F3_1<2, 0b001101, + (outs I64Regs:$rd), (ins I64Regs:$rs1, I64Regs:$rs2), + "udivx $rs1, $rs2, $rd", + [(set i64:$rd, (udiv i64:$rs1, i64:$rs2))]>; +def UDIVXri : F3_2<2, 0b001101, + (outs IntRegs:$rd), (ins IntRegs:$rs1, i64imm:$i), + "udivx $rs1, $i, $rd", + [(set i64:$rd, (udiv i64:$rs1, (i64 simm13:$i)))]>; +} // hasSideEffects = 1 + +} // Predicates = [Is64Bit] + + +//===----------------------------------------------------------------------===// // 64-bit Loads and Stores. //===----------------------------------------------------------------------===// // @@ -203,16 +245,22 @@ def LDXri : F3_2<3, 0b001011, // Extending loads to i64. def : Pat<(i64 (zextloadi8 ADDRrr:$addr)), (LDUBrr ADDRrr:$addr)>; def : Pat<(i64 (zextloadi8 ADDRri:$addr)), (LDUBri ADDRri:$addr)>; +def : Pat<(i64 (extloadi8 ADDRrr:$addr)), (LDUBrr ADDRrr:$addr)>; +def : Pat<(i64 (extloadi8 ADDRri:$addr)), (LDUBri ADDRri:$addr)>; def : Pat<(i64 (sextloadi8 ADDRrr:$addr)), (LDSBrr ADDRrr:$addr)>; def : Pat<(i64 (sextloadi8 ADDRri:$addr)), (LDSBri ADDRri:$addr)>; def : Pat<(i64 (zextloadi16 ADDRrr:$addr)), (LDUHrr ADDRrr:$addr)>; def : Pat<(i64 (zextloadi16 ADDRri:$addr)), (LDUHri ADDRri:$addr)>; +def : Pat<(i64 (extloadi16 ADDRrr:$addr)), (LDUHrr ADDRrr:$addr)>; +def : Pat<(i64 (extloadi16 ADDRri:$addr)), (LDUHri ADDRri:$addr)>; def : Pat<(i64 (sextloadi16 ADDRrr:$addr)), (LDSHrr ADDRrr:$addr)>; def : Pat<(i64 (sextloadi16 ADDRri:$addr)), (LDSHri ADDRri:$addr)>; def : Pat<(i64 (zextloadi32 ADDRrr:$addr)), (LDrr ADDRrr:$addr)>; def : Pat<(i64 (zextloadi32 ADDRri:$addr)), (LDri ADDRri:$addr)>; +def : Pat<(i64 (extloadi32 ADDRrr:$addr)), (LDrr ADDRrr:$addr)>; +def : Pat<(i64 (extloadi32 ADDRri:$addr)), (LDri ADDRri:$addr)>; // Sign-extending load of i32 into i64 is a new SPARC v9 instruction. def LDSWrr : F3_1<3, 0b001011, diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrFormats.td b/contrib/llvm/lib/Target/Sparc/SparcInstrFormats.td index f101856..e7fde08 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcInstrFormats.td +++ b/contrib/llvm/lib/Target/Sparc/SparcInstrFormats.td @@ -142,10 +142,10 @@ class F3_Si<bits<2> opVal, bits<6> op3val, bit xVal, dag outs, dag ins, // Define rr and ri shift instructions with patterns. multiclass F3_S<string OpcStr, bits<6> Op3Val, bit XVal, SDNode OpNode, ValueType VT, RegisterClass RC> { - def rr : F3_Sr<2, Op3Val, XVal, (outs RC:$rd), (ins RC:$rs, RC:$rs2), + def rr : F3_Sr<2, Op3Val, XVal, (outs RC:$rd), (ins RC:$rs, IntRegs:$rs2), !strconcat(OpcStr, " $rs, $rs2, $rd"), - [(set VT:$rd, (OpNode VT:$rs, VT:$rs2))]>; - def ri : F3_Si<2, Op3Val, XVal, (outs RC:$rd), (ins RC:$rs, unknown:$shcnt), + [(set VT:$rd, (OpNode VT:$rs, i32:$rs2))]>; + def ri : F3_Si<2, Op3Val, XVal, (outs RC:$rd), (ins RC:$rs, i32imm:$shcnt), !strconcat(OpcStr, " $rs, $shcnt, $rd"), - [(set VT:$rd, (OpNode VT:$rs, (VT imm:$shcnt)))]>; + [(set VT:$rd, (OpNode VT:$rs, (i32 imm:$shcnt)))]>; } diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td index 5ff4395..baefb06 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td +++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td @@ -64,8 +64,7 @@ def HI22 : SDNodeXForm<imm, [{ }]>; def SETHIimm : PatLeaf<(imm), [{ - return (((unsigned)N->getZExtValue() >> 10) << 10) == - (unsigned)N->getZExtValue(); + return isShiftedUInt<22, 10>(N->getZExtValue()); }], HI22>; // Addressing modes. @@ -796,10 +795,8 @@ def : Pat<(SPhi tconstpool:$in), (SETHIi tconstpool:$in)>; def : Pat<(SPlo tconstpool:$in), (ORri (i32 G0), tconstpool:$in)>; // Add reg, lo. This is used when taking the addr of a global/constpool entry. -def : Pat<(add i32:$r, (SPlo tglobaladdr:$in)), - (ADDri $r, tglobaladdr:$in)>; -def : Pat<(add i32:$r, (SPlo tconstpool:$in)), - (ADDri $r, tconstpool:$in)>; +def : Pat<(add iPTR:$r, (SPlo tglobaladdr:$in)), (ADDri $r, tglobaladdr:$in)>; +def : Pat<(add iPTR:$r, (SPlo tconstpool:$in)), (ADDri $r, tconstpool:$in)>; // Calls: def : Pat<(call tglobaladdr:$dst), diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp index db9b30e..3af4c61 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -74,8 +74,9 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Addressable stack objects are accessed using neg. offsets from %fp MachineFunction &MF = *MI.getParent()->getParent(); - int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + - MI.getOperand(FIOperandNum + 1).getImm(); + int64_t Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + + MI.getOperand(FIOperandNum + 1).getImm() + + Subtarget.getStackPointerBias(); // Replace frame index with a frame pointer reference. if (Offset >= -4096 && Offset <= 4095) { diff --git a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h index a81931b..b94dd11 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h +++ b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h @@ -52,6 +52,12 @@ public: } return std::string(p); } + + /// The 64-bit ABI uses biased stack and frame pointers, so the stack frame + /// of the current function is the area from [%sp+BIAS] to [%fp+BIAS]. + int64_t getStackPointerBias() const { + return is64Bit() ? 2047 : 0; + } }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp new file mode 100644 index 0000000..c7725a1 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -0,0 +1,689 @@ +//===-- SystemZAsmParser.cpp - Parse SystemZ assembly instructions --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/SystemZMCTargetDesc.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCTargetAsmParser.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +// Return true if Expr is in the range [MinValue, MaxValue]. +static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue) { + if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) { + int64_t Value = CE->getValue(); + return Value >= MinValue && Value <= MaxValue; + } + return false; +} + +namespace { +class SystemZOperand : public MCParsedAsmOperand { +public: + enum RegisterKind { + GR32Reg, + GR64Reg, + GR128Reg, + ADDR32Reg, + ADDR64Reg, + FP32Reg, + FP64Reg, + FP128Reg + }; + +private: + enum OperandKind { + KindToken, + KindReg, + KindAccessReg, + KindImm, + KindMem + }; + + OperandKind Kind; + SMLoc StartLoc, EndLoc; + + // A string of length Length, starting at Data. + struct TokenOp { + const char *Data; + unsigned Length; + }; + + // LLVM register Num, which has kind Kind. + struct RegOp { + RegisterKind Kind; + unsigned Num; + }; + + // Base + Disp + Index, where Base and Index are LLVM registers or 0. + // RegKind says what type the registers have (ADDR32Reg or ADDR64Reg). + struct MemOp { + unsigned Base : 8; + unsigned Index : 8; + unsigned RegKind : 8; + unsigned Unused : 8; + const MCExpr *Disp; + }; + + union { + TokenOp Token; + RegOp Reg; + unsigned AccessReg; + const MCExpr *Imm; + MemOp Mem; + }; + + SystemZOperand(OperandKind kind, SMLoc startLoc, SMLoc endLoc) + : Kind(kind), StartLoc(startLoc), EndLoc(endLoc) + {} + + void addExpr(MCInst &Inst, const MCExpr *Expr) const { + // Add as immediates when possible. Null MCExpr = 0. + if (Expr == 0) + Inst.addOperand(MCOperand::CreateImm(0)); + else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) + Inst.addOperand(MCOperand::CreateImm(CE->getValue())); + else + Inst.addOperand(MCOperand::CreateExpr(Expr)); + } + +public: + // Create particular kinds of operand. + static SystemZOperand *createToken(StringRef Str, SMLoc Loc) { + SystemZOperand *Op = new SystemZOperand(KindToken, Loc, Loc); + Op->Token.Data = Str.data(); + Op->Token.Length = Str.size(); + return Op; + } + static SystemZOperand *createReg(RegisterKind Kind, unsigned Num, + SMLoc StartLoc, SMLoc EndLoc) { + SystemZOperand *Op = new SystemZOperand(KindReg, StartLoc, EndLoc); + Op->Reg.Kind = Kind; + Op->Reg.Num = Num; + return Op; + } + static SystemZOperand *createAccessReg(unsigned Num, SMLoc StartLoc, + SMLoc EndLoc) { + SystemZOperand *Op = new SystemZOperand(KindAccessReg, StartLoc, EndLoc); + Op->AccessReg = Num; + return Op; + } + static SystemZOperand *createImm(const MCExpr *Expr, SMLoc StartLoc, + SMLoc EndLoc) { + SystemZOperand *Op = new SystemZOperand(KindImm, StartLoc, EndLoc); + Op->Imm = Expr; + return Op; + } + static SystemZOperand *createMem(RegisterKind RegKind, unsigned Base, + const MCExpr *Disp, unsigned Index, + SMLoc StartLoc, SMLoc EndLoc) { + SystemZOperand *Op = new SystemZOperand(KindMem, StartLoc, EndLoc); + Op->Mem.RegKind = RegKind; + Op->Mem.Base = Base; + Op->Mem.Index = Index; + Op->Mem.Disp = Disp; + return Op; + } + + // Token operands + virtual bool isToken() const LLVM_OVERRIDE { + return Kind == KindToken; + } + StringRef getToken() const { + assert(Kind == KindToken && "Not a token"); + return StringRef(Token.Data, Token.Length); + } + + // Register operands. + virtual bool isReg() const LLVM_OVERRIDE { + return Kind == KindReg; + } + bool isReg(RegisterKind RegKind) const { + return Kind == KindReg && Reg.Kind == RegKind; + } + virtual unsigned getReg() const LLVM_OVERRIDE { + assert(Kind == KindReg && "Not a register"); + return Reg.Num; + } + + // Access register operands. Access registers aren't exposed to LLVM + // as registers. + bool isAccessReg() const { + return Kind == KindAccessReg; + } + + // Immediate operands. + virtual bool isImm() const LLVM_OVERRIDE { + return Kind == KindImm; + } + bool isImm(int64_t MinValue, int64_t MaxValue) const { + return Kind == KindImm && inRange(Imm, MinValue, MaxValue); + } + const MCExpr *getImm() const { + assert(Kind == KindImm && "Not an immediate"); + return Imm; + } + + // Memory operands. + virtual bool isMem() const LLVM_OVERRIDE { + return Kind == KindMem; + } + bool isMem(RegisterKind RegKind, bool HasIndex) const { + return (Kind == KindMem && + Mem.RegKind == RegKind && + (HasIndex || !Mem.Index)); + } + bool isMemDisp12(RegisterKind RegKind, bool HasIndex) const { + return isMem(RegKind, HasIndex) && inRange(Mem.Disp, 0, 0xfff); + } + bool isMemDisp20(RegisterKind RegKind, bool HasIndex) const { + return isMem(RegKind, HasIndex) && inRange(Mem.Disp, -524288, 524287); + } + + // Override MCParsedAsmOperand. + virtual SMLoc getStartLoc() const LLVM_OVERRIDE { return StartLoc; } + virtual SMLoc getEndLoc() const LLVM_OVERRIDE { return EndLoc; } + virtual void print(raw_ostream &OS) const LLVM_OVERRIDE; + + // Used by the TableGen code to add particular types of operand + // to an instruction. + void addRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands"); + Inst.addOperand(MCOperand::CreateReg(getReg())); + } + void addAccessRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands"); + assert(Kind == KindAccessReg && "Invalid operand type"); + Inst.addOperand(MCOperand::CreateImm(AccessReg)); + } + void addImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands"); + addExpr(Inst, getImm()); + } + void addBDAddrOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands"); + assert(Kind == KindMem && Mem.Index == 0 && "Invalid operand type"); + Inst.addOperand(MCOperand::CreateReg(Mem.Base)); + addExpr(Inst, Mem.Disp); + } + void addBDXAddrOperands(MCInst &Inst, unsigned N) const { + assert(N == 3 && "Invalid number of operands"); + assert(Kind == KindMem && "Invalid operand type"); + Inst.addOperand(MCOperand::CreateReg(Mem.Base)); + addExpr(Inst, Mem.Disp); + Inst.addOperand(MCOperand::CreateReg(Mem.Index)); + } + + // Used by the TableGen code to check for particular operand types. + bool isGR32() const { return isReg(GR32Reg); } + bool isGR64() const { return isReg(GR64Reg); } + bool isGR128() const { return isReg(GR128Reg); } + bool isADDR32() const { return isReg(ADDR32Reg); } + bool isADDR64() const { return isReg(ADDR64Reg); } + bool isADDR128() const { return false; } + bool isFP32() const { return isReg(FP32Reg); } + bool isFP64() const { return isReg(FP64Reg); } + bool isFP128() const { return isReg(FP128Reg); } + bool isBDAddr32Disp12() const { return isMemDisp12(ADDR32Reg, false); } + bool isBDAddr32Disp20() const { return isMemDisp20(ADDR32Reg, false); } + bool isBDAddr64Disp12() const { return isMemDisp12(ADDR64Reg, false); } + bool isBDAddr64Disp20() const { return isMemDisp20(ADDR64Reg, false); } + bool isBDXAddr64Disp12() const { return isMemDisp12(ADDR64Reg, true); } + bool isBDXAddr64Disp20() const { return isMemDisp20(ADDR64Reg, true); } + bool isU4Imm() const { return isImm(0, 15); } + bool isU6Imm() const { return isImm(0, 63); } + bool isU8Imm() const { return isImm(0, 255); } + bool isS8Imm() const { return isImm(-128, 127); } + bool isU16Imm() const { return isImm(0, 65535); } + bool isS16Imm() const { return isImm(-32768, 32767); } + bool isU32Imm() const { return isImm(0, (1LL << 32) - 1); } + bool isS32Imm() const { return isImm(-(1LL << 31), (1LL << 31) - 1); } +}; + +// Maps of asm register numbers to LLVM register numbers, with 0 indicating +// an invalid register. We don't use register class directly because that +// specifies the allocation order. +static const unsigned GR32Regs[] = { + SystemZ::R0W, SystemZ::R1W, SystemZ::R2W, SystemZ::R3W, + SystemZ::R4W, SystemZ::R5W, SystemZ::R6W, SystemZ::R7W, + SystemZ::R8W, SystemZ::R9W, SystemZ::R10W, SystemZ::R11W, + SystemZ::R12W, SystemZ::R13W, SystemZ::R14W, SystemZ::R15W +}; +static const unsigned GR64Regs[] = { + SystemZ::R0D, SystemZ::R1D, SystemZ::R2D, SystemZ::R3D, + SystemZ::R4D, SystemZ::R5D, SystemZ::R6D, SystemZ::R7D, + SystemZ::R8D, SystemZ::R9D, SystemZ::R10D, SystemZ::R11D, + SystemZ::R12D, SystemZ::R13D, SystemZ::R14D, SystemZ::R15D +}; +static const unsigned GR128Regs[] = { + SystemZ::R0Q, 0, SystemZ::R2Q, 0, + SystemZ::R4Q, 0, SystemZ::R6Q, 0, + SystemZ::R8Q, 0, SystemZ::R10Q, 0, + SystemZ::R12Q, 0, SystemZ::R14Q, 0 +}; +static const unsigned FP32Regs[] = { + SystemZ::F0S, SystemZ::F1S, SystemZ::F2S, SystemZ::F3S, + SystemZ::F4S, SystemZ::F5S, SystemZ::F6S, SystemZ::F7S, + SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S, + SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S +}; +static const unsigned FP64Regs[] = { + SystemZ::F0D, SystemZ::F1D, SystemZ::F2D, SystemZ::F3D, + SystemZ::F4D, SystemZ::F5D, SystemZ::F6D, SystemZ::F7D, + SystemZ::F8D, SystemZ::F9D, SystemZ::F10D, SystemZ::F11D, + SystemZ::F12D, SystemZ::F13D, SystemZ::F14D, SystemZ::F15D +}; +static const unsigned FP128Regs[] = { + SystemZ::F0Q, SystemZ::F1Q, 0, 0, + SystemZ::F4Q, SystemZ::F5Q, 0, 0, + SystemZ::F8Q, SystemZ::F9Q, 0, 0, + SystemZ::F12Q, SystemZ::F13Q, 0, 0 +}; + +class SystemZAsmParser : public MCTargetAsmParser { +#define GET_ASSEMBLER_HEADER +#include "SystemZGenAsmMatcher.inc" + +private: + MCSubtargetInfo &STI; + MCAsmParser &Parser; + struct Register { + char Prefix; + unsigned Number; + SMLoc StartLoc, EndLoc; + }; + + bool parseRegister(Register &Reg); + + OperandMatchResultTy + parseRegister(Register &Reg, char Prefix, const unsigned *Regs, + bool IsAddress = false); + + OperandMatchResultTy + parseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + char Prefix, const unsigned *Regs, + SystemZOperand::RegisterKind Kind, + bool IsAddress = false); + + OperandMatchResultTy + parseAddress(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + const unsigned *Regs, SystemZOperand::RegisterKind RegKind, + bool HasIndex); + + bool parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + StringRef Mnemonic); + +public: + SystemZAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser) + : MCTargetAsmParser(), STI(sti), Parser(parser) { + MCAsmParserExtension::Initialize(Parser); + + // Initialize the set of available features. + setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); + } + + // Override MCTargetAsmParser. + virtual bool ParseDirective(AsmToken DirectiveID) LLVM_OVERRIDE; + virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, + SMLoc &EndLoc) LLVM_OVERRIDE; + virtual bool ParseInstruction(ParseInstructionInfo &Info, + StringRef Name, SMLoc NameLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands) + LLVM_OVERRIDE; + virtual bool + MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + SmallVectorImpl<MCParsedAsmOperand*> &Operands, + MCStreamer &Out, unsigned &ErrorInfo, + bool MatchingInlineAsm) LLVM_OVERRIDE; + + // Used by the TableGen code to parse particular operand types. + OperandMatchResultTy + parseGR32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + return parseRegister(Operands, 'r', GR32Regs, SystemZOperand::GR32Reg); + } + OperandMatchResultTy + parseGR64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + return parseRegister(Operands, 'r', GR64Regs, SystemZOperand::GR64Reg); + } + OperandMatchResultTy + parseGR128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + return parseRegister(Operands, 'r', GR128Regs, SystemZOperand::GR128Reg); + } + OperandMatchResultTy + parseADDR32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + return parseRegister(Operands, 'r', GR32Regs, SystemZOperand::ADDR32Reg, + true); + } + OperandMatchResultTy + parseADDR64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + return parseRegister(Operands, 'r', GR64Regs, SystemZOperand::ADDR64Reg, + true); + } + OperandMatchResultTy + parseADDR128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + llvm_unreachable("Shouldn't be used as an operand"); + } + OperandMatchResultTy + parseFP32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + return parseRegister(Operands, 'f', FP32Regs, SystemZOperand::FP32Reg); + } + OperandMatchResultTy + parseFP64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + return parseRegister(Operands, 'f', FP64Regs, SystemZOperand::FP64Reg); + } + OperandMatchResultTy + parseFP128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + return parseRegister(Operands, 'f', FP128Regs, SystemZOperand::FP128Reg); + } + OperandMatchResultTy + parseBDAddr32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + return parseAddress(Operands, GR32Regs, SystemZOperand::ADDR32Reg, false); + } + OperandMatchResultTy + parseBDAddr64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + return parseAddress(Operands, GR64Regs, SystemZOperand::ADDR64Reg, false); + } + OperandMatchResultTy + parseBDXAddr64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + return parseAddress(Operands, GR64Regs, SystemZOperand::ADDR64Reg, true); + } + OperandMatchResultTy + parseAccessReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands); +}; +} + +#define GET_REGISTER_MATCHER +#define GET_SUBTARGET_FEATURE_NAME +#define GET_MATCHER_IMPLEMENTATION +#include "SystemZGenAsmMatcher.inc" + +void SystemZOperand::print(raw_ostream &OS) const { + llvm_unreachable("Not implemented"); +} + +// Parse one register of the form %<prefix><number>. +bool SystemZAsmParser::parseRegister(Register &Reg) { + Reg.StartLoc = Parser.getTok().getLoc(); + + // Eat the % prefix. + if (Parser.getTok().isNot(AsmToken::Percent)) + return true; + Parser.Lex(); + + // Expect a register name. + if (Parser.getTok().isNot(AsmToken::Identifier)) + return true; + + // Check the prefix. + StringRef Name = Parser.getTok().getString(); + if (Name.size() < 2) + return true; + Reg.Prefix = Name[0]; + + // Treat the rest of the register name as a register number. + if (Name.substr(1).getAsInteger(10, Reg.Number)) + return true; + + Reg.EndLoc = Parser.getTok().getLoc(); + Parser.Lex(); + return false; +} + +// Parse a register with prefix Prefix and convert it to LLVM numbering. +// Regs maps asm register numbers to LLVM register numbers, with zero +// entries indicating an invalid register. IsAddress says whether the +// register appears in an address context. +SystemZAsmParser::OperandMatchResultTy +SystemZAsmParser::parseRegister(Register &Reg, char Prefix, + const unsigned *Regs, bool IsAddress) { + if (parseRegister(Reg)) + return MatchOperand_NoMatch; + if (Reg.Prefix != Prefix || Reg.Number > 15 || Regs[Reg.Number] == 0) { + Error(Reg.StartLoc, "invalid register"); + return MatchOperand_ParseFail; + } + if (Reg.Number == 0 && IsAddress) { + Error(Reg.StartLoc, "%r0 used in an address"); + return MatchOperand_ParseFail; + } + Reg.Number = Regs[Reg.Number]; + return MatchOperand_Success; +} + +// Parse a register and add it to Operands. Prefix is 'r' for GPRs, +// 'f' for FPRs, etc. Regs maps asm register numbers to LLVM register numbers, +// with zero entries indicating an invalid register. Kind is the type of +// register represented by Regs and IsAddress says whether the register is +// being parsed in an address context, meaning that %r0 evaluates as 0. +SystemZAsmParser::OperandMatchResultTy +SystemZAsmParser::parseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + char Prefix, const unsigned *Regs, + SystemZOperand::RegisterKind Kind, + bool IsAddress) { + Register Reg; + OperandMatchResultTy Result = parseRegister(Reg, Prefix, Regs, IsAddress); + if (Result == MatchOperand_Success) + Operands.push_back(SystemZOperand::createReg(Kind, Reg.Number, + Reg.StartLoc, Reg.EndLoc)); + return Result; +} + +// Parse a memory operand and add it to Operands. Regs maps asm register +// numbers to LLVM address registers and RegKind says what kind of address +// register we're using (ADDR32Reg or ADDR64Reg). HasIndex says whether +// the address allows index registers. +SystemZAsmParser::OperandMatchResultTy +SystemZAsmParser::parseAddress(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + const unsigned *Regs, + SystemZOperand::RegisterKind RegKind, + bool HasIndex) { + SMLoc StartLoc = Parser.getTok().getLoc(); + + // Parse the displacement, which must always be present. + const MCExpr *Disp; + if (getParser().parseExpression(Disp)) + return MatchOperand_NoMatch; + + // Parse the optional base and index. + unsigned Index = 0; + unsigned Base = 0; + if (getLexer().is(AsmToken::LParen)) { + Parser.Lex(); + + // Parse the first register. + Register Reg; + OperandMatchResultTy Result = parseRegister(Reg, 'r', GR64Regs, true); + if (Result != MatchOperand_Success) + return Result; + + // Check whether there's a second register. If so, the one that we + // just parsed was the index. + if (getLexer().is(AsmToken::Comma)) { + Parser.Lex(); + + if (!HasIndex) { + Error(Reg.StartLoc, "invalid use of indexed addressing"); + return MatchOperand_ParseFail; + } + + Index = Reg.Number; + Result = parseRegister(Reg, 'r', GR64Regs, true); + if (Result != MatchOperand_Success) + return Result; + } + Base = Reg.Number; + + // Consume the closing bracket. + if (getLexer().isNot(AsmToken::RParen)) + return MatchOperand_NoMatch; + Parser.Lex(); + } + + SMLoc EndLoc = + SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + Operands.push_back(SystemZOperand::createMem(RegKind, Base, Disp, Index, + StartLoc, EndLoc)); + return MatchOperand_Success; +} + +bool SystemZAsmParser::ParseDirective(AsmToken DirectiveID) { + return true; +} + +bool SystemZAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, + SMLoc &EndLoc) { + Register Reg; + if (parseRegister(Reg)) + return Error(Reg.StartLoc, "register expected"); + if (Reg.Prefix == 'r' && Reg.Number < 16) + RegNo = GR64Regs[Reg.Number]; + else if (Reg.Prefix == 'f' && Reg.Number < 16) + RegNo = FP64Regs[Reg.Number]; + else + return Error(Reg.StartLoc, "invalid register"); + StartLoc = Reg.StartLoc; + EndLoc = Reg.EndLoc; + return false; +} + +bool SystemZAsmParser:: +ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + Operands.push_back(SystemZOperand::createToken(Name, NameLoc)); + + // Read the remaining operands. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + // Read the first operand. + if (parseOperand(Operands, Name)) { + Parser.eatToEndOfStatement(); + return true; + } + + // Read any subsequent operands. + while (getLexer().is(AsmToken::Comma)) { + Parser.Lex(); + if (parseOperand(Operands, Name)) { + Parser.eatToEndOfStatement(); + return true; + } + } + if (getLexer().isNot(AsmToken::EndOfStatement)) { + SMLoc Loc = getLexer().getLoc(); + Parser.eatToEndOfStatement(); + return Error(Loc, "unexpected token in argument list"); + } + } + + // Consume the EndOfStatement. + Parser.Lex(); + return false; +} + +bool SystemZAsmParser:: +parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + StringRef Mnemonic) { + // Check if the current operand has a custom associated parser, if so, try to + // custom parse the operand, or fallback to the general approach. + OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); + if (ResTy == MatchOperand_Success) + return false; + + // If there wasn't a custom match, try the generic matcher below. Otherwise, + // there was a match, but an error occurred, in which case, just return that + // the operand parsing failed. + if (ResTy == MatchOperand_ParseFail) + return true; + + // The only other type of operand is an immediate. + const MCExpr *Expr; + SMLoc StartLoc = Parser.getTok().getLoc(); + if (getParser().parseExpression(Expr)) + return true; + + SMLoc EndLoc = + SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc)); + return false; +} + +bool SystemZAsmParser:: +MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + SmallVectorImpl<MCParsedAsmOperand*> &Operands, + MCStreamer &Out, unsigned &ErrorInfo, + bool MatchingInlineAsm) { + MCInst Inst; + unsigned MatchResult; + + MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo, + MatchingInlineAsm); + switch (MatchResult) { + default: break; + case Match_Success: + Inst.setLoc(IDLoc); + Out.EmitInstruction(Inst); + return false; + + case Match_MissingFeature: { + assert(ErrorInfo && "Unknown missing feature!"); + // Special case the error message for the very common case where only + // a single subtarget feature is missing + std::string Msg = "instruction requires:"; + unsigned Mask = 1; + for (unsigned I = 0; I < sizeof(ErrorInfo) * 8 - 1; ++I) { + if (ErrorInfo & Mask) { + Msg += " "; + Msg += getSubtargetFeatureName(ErrorInfo & Mask); + } + Mask <<= 1; + } + return Error(IDLoc, Msg); + } + + case Match_InvalidOperand: { + SMLoc ErrorLoc = IDLoc; + if (ErrorInfo != ~0U) { + if (ErrorInfo >= Operands.size()) + return Error(IDLoc, "too few operands for instruction"); + + ErrorLoc = ((SystemZOperand*)Operands[ErrorInfo])->getStartLoc(); + if (ErrorLoc == SMLoc()) + ErrorLoc = IDLoc; + } + return Error(ErrorLoc, "invalid operand for instruction"); + } + + case Match_MnemonicFail: + return Error(IDLoc, "invalid instruction"); + } + + llvm_unreachable("Unexpected match type"); +} + +SystemZAsmParser::OperandMatchResultTy SystemZAsmParser:: +parseAccessReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + Register Reg; + if (parseRegister(Reg)) + return MatchOperand_NoMatch; + if (Reg.Prefix != 'a' || Reg.Number > 15) { + Error(Reg.StartLoc, "invalid register"); + return MatchOperand_ParseFail; + } + Operands.push_back(SystemZOperand::createAccessReg(Reg.Number, + Reg.StartLoc, Reg.EndLoc)); + return MatchOperand_Success; +} + +// Force static initialization. +extern "C" void LLVMInitializeSystemZAsmParser() { + RegisterMCAsmParser<SystemZAsmParser> X(TheSystemZTarget); +} diff --git a/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp new file mode 100644 index 0000000..d73cf49 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp @@ -0,0 +1,150 @@ +//===-- SystemZInstPrinter.cpp - Convert SystemZ MCInst to assembly syntax ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" + +#include "SystemZInstPrinter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#include "SystemZGenAsmWriter.inc" + +void SystemZInstPrinter::printAddress(unsigned Base, int64_t Disp, + unsigned Index, raw_ostream &O) { + O << Disp; + if (Base) { + O << '('; + if (Index) + O << '%' << getRegisterName(Index) << ','; + O << '%' << getRegisterName(Base) << ')'; + } else + assert(!Index && "Shouldn't have an index without a base"); +} + +void SystemZInstPrinter::printOperand(const MCOperand &MO, raw_ostream &O) { + if (MO.isReg()) + O << '%' << getRegisterName(MO.getReg()); + else if (MO.isImm()) + O << MO.getImm(); + else if (MO.isExpr()) + O << *MO.getExpr(); + else + llvm_unreachable("Invalid operand"); +} + +void SystemZInstPrinter::printInst(const MCInst *MI, raw_ostream &O, + StringRef Annot) { + printInstruction(MI, O); + printAnnotation(O, Annot); +} + +void SystemZInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const { + O << '%' << getRegisterName(RegNo); +} + +void SystemZInstPrinter::printU4ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + int64_t Value = MI->getOperand(OpNum).getImm(); + assert(isUInt<4>(Value) && "Invalid u4imm argument"); + O << Value; +} + +void SystemZInstPrinter::printU6ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + int64_t Value = MI->getOperand(OpNum).getImm(); + assert(isUInt<6>(Value) && "Invalid u6imm argument"); + O << Value; +} + +void SystemZInstPrinter::printS8ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + int64_t Value = MI->getOperand(OpNum).getImm(); + assert(isInt<8>(Value) && "Invalid s8imm argument"); + O << Value; +} + +void SystemZInstPrinter::printU8ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + int64_t Value = MI->getOperand(OpNum).getImm(); + assert(isUInt<8>(Value) && "Invalid u8imm argument"); + O << Value; +} + +void SystemZInstPrinter::printS16ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + int64_t Value = MI->getOperand(OpNum).getImm(); + assert(isInt<16>(Value) && "Invalid s16imm argument"); + O << Value; +} + +void SystemZInstPrinter::printU16ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + int64_t Value = MI->getOperand(OpNum).getImm(); + assert(isUInt<16>(Value) && "Invalid u16imm argument"); + O << Value; +} + +void SystemZInstPrinter::printS32ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + int64_t Value = MI->getOperand(OpNum).getImm(); + assert(isInt<32>(Value) && "Invalid s32imm argument"); + O << Value; +} + +void SystemZInstPrinter::printU32ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + int64_t Value = MI->getOperand(OpNum).getImm(); + assert(isUInt<32>(Value) && "Invalid u32imm argument"); + O << Value; +} + +void SystemZInstPrinter::printAccessRegOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + uint64_t Value = MI->getOperand(OpNum).getImm(); + assert(Value < 16 && "Invalid access register number"); + O << "%a" << (unsigned int)Value; +} + +void SystemZInstPrinter::printCallOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printOperand(MI, OpNum, O); + O << "@PLT"; +} + +void SystemZInstPrinter::printOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printOperand(MI->getOperand(OpNum), O); +} + +void SystemZInstPrinter::printBDAddrOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printAddress(MI->getOperand(OpNum).getReg(), + MI->getOperand(OpNum + 1).getImm(), 0, O); +} + +void SystemZInstPrinter::printBDXAddrOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printAddress(MI->getOperand(OpNum).getReg(), + MI->getOperand(OpNum + 1).getImm(), + MI->getOperand(OpNum + 2).getReg(), O); +} + +void SystemZInstPrinter::printCond4Operand(const MCInst *MI, int OpNum, + raw_ostream &O) { + static const char *const CondNames[] = { + "o", "h", "nle", "l", "nhe", "lh", "ne", + "e", "nlh", "he", "nl", "le", "nh", "no" + }; + uint64_t Imm = MI->getOperand(OpNum).getImm(); + assert(Imm > 0 && Imm < 15 && "Invalid condition"); + O << CondNames[Imm - 1]; +} diff --git a/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h new file mode 100644 index 0000000..b82e79d --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h @@ -0,0 +1,68 @@ +//==- SystemZInstPrinter.h - Convert SystemZ MCInst to assembly --*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints a SystemZ MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SYSTEMZINSTPRINTER_H +#define LLVM_SYSTEMZINSTPRINTER_H + +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { +class MCOperand; + +class SystemZInstPrinter : public MCInstPrinter { +public: + SystemZInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} + + // Automatically generated by tblgen. + void printInstruction(const MCInst *MI, raw_ostream &O); + static const char *getRegisterName(unsigned RegNo); + + // Print an address with the given base, displacement and index. + static void printAddress(unsigned Base, int64_t Disp, unsigned Index, + raw_ostream &O); + + // Print the given operand. + static void printOperand(const MCOperand &MO, raw_ostream &O); + + // Override MCInstPrinter. + virtual void printRegName(raw_ostream &O, unsigned RegNo) const + LLVM_OVERRIDE; + virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) + LLVM_OVERRIDE; + +private: + // Print various types of operand. + void printOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printBDAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printBDXAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printU4ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printU6ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printS8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printU8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printS16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printU16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printS32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printU32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printCallOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printAccessRegOperand(const MCInst *MI, int OpNum, raw_ostream &O); + + // Print the mnemonic for a condition-code mask ("ne", "lh", etc.) + // This forms part of the instruction name rather than the operand list. + void printCond4Operand(const MCInst *MI, int OpNum, raw_ostream &O); +}; +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp new file mode 100644 index 0000000..e901c6c --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp @@ -0,0 +1,151 @@ +//===-- SystemZMCAsmBackend.cpp - SystemZ assembler backend ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/SystemZMCTargetDesc.h" +#include "MCTargetDesc/SystemZMCFixups.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCObjectWriter.h" + +using namespace llvm; + +// Value is a fully-resolved relocation value: Symbol + Addend [- Pivot]. +// Return the bits that should be installed in a relocation field for +// fixup kind Kind. +static uint64_t extractBitsForFixup(MCFixupKind Kind, uint64_t Value) { + if (Kind < FirstTargetFixupKind) + return Value; + + switch (unsigned(Kind)) { + case SystemZ::FK_390_PC16DBL: + case SystemZ::FK_390_PC32DBL: + case SystemZ::FK_390_PLT16DBL: + case SystemZ::FK_390_PLT32DBL: + return (int64_t)Value / 2; + } + + llvm_unreachable("Unknown fixup kind!"); +} + +// If Opcode can be relaxed, return the relaxed form, otherwise return 0. +static unsigned getRelaxedOpcode(unsigned Opcode) { + switch (Opcode) { + case SystemZ::BRC: return SystemZ::BRCL; + case SystemZ::J: return SystemZ::JG; + case SystemZ::BRAS: return SystemZ::BRASL; + } + return 0; +} + +namespace { +class SystemZMCAsmBackend : public MCAsmBackend { + uint8_t OSABI; +public: + SystemZMCAsmBackend(uint8_t osABI) + : OSABI(osABI) {} + + // Override MCAsmBackend + virtual unsigned getNumFixupKinds() const LLVM_OVERRIDE { + return SystemZ::NumTargetFixupKinds; + } + virtual const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const + LLVM_OVERRIDE; + virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + uint64_t Value) const LLVM_OVERRIDE; + virtual bool mayNeedRelaxation(const MCInst &Inst) const LLVM_OVERRIDE; + virtual bool fixupNeedsRelaxation(const MCFixup &Fixup, + uint64_t Value, + const MCRelaxableFragment *Fragment, + const MCAsmLayout &Layout) const + LLVM_OVERRIDE; + virtual void relaxInstruction(const MCInst &Inst, + MCInst &Res) const LLVM_OVERRIDE; + virtual bool writeNopData(uint64_t Count, + MCObjectWriter *OW) const LLVM_OVERRIDE; + virtual MCObjectWriter *createObjectWriter(raw_ostream &OS) const + LLVM_OVERRIDE { + return createSystemZObjectWriter(OS, OSABI); + } + virtual bool doesSectionRequireSymbols(const MCSection &Section) const + LLVM_OVERRIDE { + return false; + } +}; +} // end anonymous namespace + +const MCFixupKindInfo & +SystemZMCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { + const static MCFixupKindInfo Infos[SystemZ::NumTargetFixupKinds] = { + { "FK_390_PC16DBL", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, + { "FK_390_PC32DBL", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "FK_390_PLT16DBL", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, + { "FK_390_PLT32DBL", 0, 32, MCFixupKindInfo::FKF_IsPCRel } + }; + + if (Kind < FirstTargetFixupKind) + return MCAsmBackend::getFixupKindInfo(Kind); + + assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && + "Invalid kind!"); + return Infos[Kind - FirstTargetFixupKind]; +} + +void SystemZMCAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, + unsigned DataSize, uint64_t Value) const { + MCFixupKind Kind = Fixup.getKind(); + unsigned Offset = Fixup.getOffset(); + unsigned Size = (getFixupKindInfo(Kind).TargetSize + 7) / 8; + + assert(Offset + Size <= DataSize && "Invalid fixup offset!"); + + // Big-endian insertion of Size bytes. + Value = extractBitsForFixup(Kind, Value); + unsigned ShiftValue = (Size * 8) - 8; + for (unsigned I = 0; I != Size; ++I) { + Data[Offset + I] |= uint8_t(Value >> ShiftValue); + ShiftValue -= 8; + } +} + +bool SystemZMCAsmBackend::mayNeedRelaxation(const MCInst &Inst) const { + return getRelaxedOpcode(Inst.getOpcode()) != 0; +} + +bool +SystemZMCAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, + uint64_t Value, + const MCRelaxableFragment *Fragment, + const MCAsmLayout &Layout) const { + // At the moment we just need to relax 16-bit fields to wider fields. + Value = extractBitsForFixup(Fixup.getKind(), Value); + return (int16_t)Value != (int64_t)Value; +} + +void SystemZMCAsmBackend::relaxInstruction(const MCInst &Inst, + MCInst &Res) const { + unsigned Opcode = getRelaxedOpcode(Inst.getOpcode()); + assert(Opcode && "Unexpected insn to relax"); + Res = Inst; + Res.setOpcode(Opcode); +} + +bool SystemZMCAsmBackend::writeNopData(uint64_t Count, + MCObjectWriter *OW) const { + for (uint64_t I = 0; I != Count; ++I) + OW->Write8(7); + return true; +} + +MCAsmBackend *llvm::createSystemZMCAsmBackend(const Target &T, StringRef TT, + StringRef CPU) { + uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS()); + return new SystemZMCAsmBackend(OSABI); +} diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp new file mode 100644 index 0000000..c96a0d4 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp @@ -0,0 +1,38 @@ +//===-- SystemZMCAsmInfo.cpp - SystemZ asm properties ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SystemZMCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionELF.h" + +using namespace llvm; + +SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, StringRef TT) { + PointerSize = 8; + CalleeSaveStackSlotSize = 8; + IsLittleEndian = false; + + CommentString = "#"; + PCSymbol = "."; + GlobalPrefix = ""; + PrivateGlobalPrefix = ".L"; + WeakRefDirective = "\t.weak\t"; + ZeroDirective = "\t.space\t"; + Data64bitsDirective = "\t.quad\t"; + UsesELFSectionDirectiveForBSS = true; + SupportsDebugInformation = true; + HasLEB128 = true; + ExceptionsType = ExceptionHandling::DwarfCFI; +} + +const MCSection * +SystemZMCAsmInfo::getNonexecutableStackSection(MCContext &Ctx) const { + return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS, + 0, SectionKind::getMetadata()); +} diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h new file mode 100644 index 0000000..bac1bca --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h @@ -0,0 +1,31 @@ +//====-- SystemZMCAsmInfo.h - SystemZ asm properties -----------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef SystemZTARGETASMINFO_H +#define SystemZTARGETASMINFO_H + +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { +class Target; +class StringRef; + +class SystemZMCAsmInfo : public MCAsmInfo { +public: + explicit SystemZMCAsmInfo(const Target &T, StringRef TT); + + // Override MCAsmInfo; + virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const + LLVM_OVERRIDE; +}; + +} // namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp new file mode 100644 index 0000000..ea2250f --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp @@ -0,0 +1,131 @@ +//===-- SystemZMCCodeEmitter.cpp - Convert SystemZ code to machine code ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SystemZMCCodeEmitter class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mccodeemitter" +#include "MCTargetDesc/SystemZMCTargetDesc.h" +#include "MCTargetDesc/SystemZMCFixups.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInstrInfo.h" + +using namespace llvm; + +namespace { +class SystemZMCCodeEmitter : public MCCodeEmitter { + const MCInstrInfo &MCII; + MCContext &Ctx; + +public: + SystemZMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx) + : MCII(mcii), Ctx(ctx) { + } + + ~SystemZMCCodeEmitter() {} + + // OVerride MCCodeEmitter. + virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups) const + LLVM_OVERRIDE; + +private: + // Automatically generated by TableGen. + uint64_t getBinaryCodeForInstr(const MCInst &MI, + SmallVectorImpl<MCFixup> &Fixups) const; + + // Called by the TableGen code to get the binary encoding of operand + // MO in MI. Fixups is the list of fixups against MI. + unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl<MCFixup> &Fixups) const; + + // Operand OpNum of MI needs a PC-relative fixup of kind Kind at + // Offset bytes from the start of MI. Add the fixup to Fixups + // and return the in-place addend, which since we're a RELA target + // is always 0. + unsigned getPCRelEncoding(const MCInst &MI, unsigned int OpNum, + SmallVectorImpl<MCFixup> &Fixups, + unsigned Kind, int64_t Offset) const; + + unsigned getPC16DBLEncoding(const MCInst &MI, unsigned int OpNum, + SmallVectorImpl<MCFixup> &Fixups) const { + return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PC16DBL, 2); + } + unsigned getPC32DBLEncoding(const MCInst &MI, unsigned int OpNum, + SmallVectorImpl<MCFixup> &Fixups) const { + return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PC32DBL, 2); + } + unsigned getPLT16DBLEncoding(const MCInst &MI, unsigned int OpNum, + SmallVectorImpl<MCFixup> &Fixups) const { + return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PLT16DBL, 2); + } + unsigned getPLT32DBLEncoding(const MCInst &MI, unsigned int OpNum, + SmallVectorImpl<MCFixup> &Fixups) const { + return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PLT32DBL, 2); + } +}; +} + +MCCodeEmitter *llvm::createSystemZMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &MCSTI, + MCContext &Ctx) { + return new SystemZMCCodeEmitter(MCII, Ctx); +} + +void SystemZMCCodeEmitter:: +EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups) const { + uint64_t Bits = getBinaryCodeForInstr(MI, Fixups); + unsigned Size = MCII.get(MI.getOpcode()).getSize(); + // Big-endian insertion of Size bytes. + unsigned ShiftValue = (Size * 8) - 8; + for (unsigned I = 0; I != Size; ++I) { + OS << uint8_t(Bits >> ShiftValue); + ShiftValue -= 8; + } +} + +unsigned SystemZMCCodeEmitter:: +getMachineOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl<MCFixup> &Fixups) const { + if (MO.isReg()) + return Ctx.getRegisterInfo().getEncodingValue(MO.getReg()); + if (MO.isImm()) + return static_cast<unsigned>(MO.getImm()); + llvm_unreachable("Unexpected operand type!"); +} + +unsigned +SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned int OpNum, + SmallVectorImpl<MCFixup> &Fixups, + unsigned Kind, int64_t Offset) const { + const MCOperand &MO = MI.getOperand(OpNum); + // For compatibility with the GNU assembler, treat constant operands as + // unadjusted PC-relative offsets. + if (MO.isImm()) + return MO.getImm() / 2; + + const MCExpr *Expr = MO.getExpr(); + if (Offset) { + // The operand value is relative to the start of MI, but the fixup + // is relative to the operand field itself, which is Offset bytes + // into MI. Add Offset to the relocation value to cancel out + // this difference. + const MCExpr *OffsetExpr = MCConstantExpr::Create(Offset, Ctx); + Expr = MCBinaryExpr::CreateAdd(Expr, OffsetExpr, Ctx); + } + Fixups.push_back(MCFixup::Create(Offset, Expr, (MCFixupKind)Kind)); + return 0; +} + +#include "SystemZGenMCCodeEmitter.inc" diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h new file mode 100644 index 0000000..9c94ebb --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h @@ -0,0 +1,31 @@ +//===-- SystemZMCFixups.h - SystemZ-specific fixup entries ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SYSTEMZMCFIXUPS_H +#define LLVM_SYSTEMZMCFIXUPS_H + +#include "llvm/MC/MCFixup.h" + +namespace llvm { +namespace SystemZ { + enum FixupKind { + // These correspond directly to R_390_* relocations. + FK_390_PC16DBL = FirstTargetFixupKind, + FK_390_PC32DBL, + FK_390_PLT16DBL, + FK_390_PLT32DBL, + + // Marker + LastTargetFixupKind, + NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind + }; +} +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp new file mode 100644 index 0000000..36e3d83 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp @@ -0,0 +1,140 @@ +//===-- SystemZMCObjectWriter.cpp - SystemZ ELF writer --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/SystemZMCTargetDesc.h" +#include "MCTargetDesc/SystemZMCFixups.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCValue.h" + +using namespace llvm; + +namespace { +class SystemZObjectWriter : public MCELFObjectTargetWriter { +public: + SystemZObjectWriter(uint8_t OSABI); + + virtual ~SystemZObjectWriter(); + +protected: + // Override MCELFObjectTargetWriter. + virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, + bool IsPCRel, bool IsRelocWithSymbol, + int64_t Addend) const LLVM_OVERRIDE; + virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm, + const MCValue &Target, + const MCFragment &F, + const MCFixup &Fixup, + bool IsPCRel) const LLVM_OVERRIDE; +}; +} // end anonymouse namespace + +SystemZObjectWriter::SystemZObjectWriter(uint8_t OSABI) + : MCELFObjectTargetWriter(/*Is64Bit=*/true, OSABI, ELF::EM_S390, + /*HasRelocationAddend=*/ true) {} + +SystemZObjectWriter::~SystemZObjectWriter() { +} + +// Return the relocation type for an absolute value of MCFixupKind Kind. +static unsigned getAbsoluteReloc(unsigned Kind) { + switch (Kind) { + case FK_Data_1: return ELF::R_390_8; + case FK_Data_2: return ELF::R_390_16; + case FK_Data_4: return ELF::R_390_32; + case FK_Data_8: return ELF::R_390_64; + } + llvm_unreachable("Unsupported absolute address"); +} + +// Return the relocation type for a PC-relative value of MCFixupKind Kind. +static unsigned getPCRelReloc(unsigned Kind) { + switch (Kind) { + case FK_Data_2: return ELF::R_390_PC16; + case FK_Data_4: return ELF::R_390_PC32; + case FK_Data_8: return ELF::R_390_PC64; + case SystemZ::FK_390_PC16DBL: return ELF::R_390_PC16DBL; + case SystemZ::FK_390_PC32DBL: return ELF::R_390_PC32DBL; + case SystemZ::FK_390_PLT16DBL: return ELF::R_390_PLT16DBL; + case SystemZ::FK_390_PLT32DBL: return ELF::R_390_PLT32DBL; + } + llvm_unreachable("Unsupported PC-relative address"); +} + +// Return the R_390_TLS_LE* relocation type for MCFixupKind Kind. +static unsigned getTLSLEReloc(unsigned Kind) { + switch (Kind) { + case FK_Data_4: return ELF::R_390_TLS_LE32; + case FK_Data_8: return ELF::R_390_TLS_LE64; + } + llvm_unreachable("Unsupported absolute address"); +} + +// Return the PLT relocation counterpart of MCFixupKind Kind. +static unsigned getPLTReloc(unsigned Kind) { + switch (Kind) { + case SystemZ::FK_390_PC16DBL: return ELF::R_390_PLT16DBL; + case SystemZ::FK_390_PC32DBL: return ELF::R_390_PLT32DBL; + } + llvm_unreachable("Unsupported absolute address"); +} + +unsigned SystemZObjectWriter::GetRelocType(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel, + bool IsRelocWithSymbol, + int64_t Addend) const { + MCSymbolRefExpr::VariantKind Modifier = (Target.isAbsolute() ? + MCSymbolRefExpr::VK_None : + Target.getSymA()->getKind()); + unsigned Kind = Fixup.getKind(); + switch (Modifier) { + case MCSymbolRefExpr::VK_None: + if (IsPCRel) + return getPCRelReloc(Kind); + return getAbsoluteReloc(Kind); + + case MCSymbolRefExpr::VK_NTPOFF: + assert(!IsPCRel && "NTPOFF shouldn't be PC-relative"); + return getTLSLEReloc(Kind); + + case MCSymbolRefExpr::VK_GOT: + if (IsPCRel && Kind == SystemZ::FK_390_PC32DBL) + return ELF::R_390_GOTENT; + llvm_unreachable("Only PC-relative GOT accesses are supported for now"); + + case MCSymbolRefExpr::VK_PLT: + assert(IsPCRel && "@PLT shouldt be PC-relative"); + return getPLTReloc(Kind); + + default: + llvm_unreachable("Modifier not supported"); + } +} + +const MCSymbol *SystemZObjectWriter::ExplicitRelSym(const MCAssembler &Asm, + const MCValue &Target, + const MCFragment &F, + const MCFixup &Fixup, + bool IsPCRel) const { + // The addend in a PC-relative R_390_* relocation is always applied to + // the PC-relative part of the address. If some kind of indirection + // is applied to the symbol first, we can't use an addend there too. + if (!Target.isAbsolute() && + Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None && + IsPCRel) + return &Target.getSymA()->getSymbol().AliasedSymbol(); + return NULL; +} + +MCObjectWriter *llvm::createSystemZObjectWriter(raw_ostream &OS, + uint8_t OSABI) { + MCELFObjectTargetWriter *MOTW = new SystemZObjectWriter(OSABI); + return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/false); +} diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp new file mode 100644 index 0000000..49a7f47 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp @@ -0,0 +1,160 @@ +//===-- SystemZMCTargetDesc.cpp - SystemZ target descriptions -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SystemZMCTargetDesc.h" +#include "InstPrinter/SystemZInstPrinter.h" +#include "SystemZMCAsmInfo.h" +#include "llvm/MC/MCCodeGenInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/TargetRegistry.h" + +#define GET_INSTRINFO_MC_DESC +#include "SystemZGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "SystemZGenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "SystemZGenRegisterInfo.inc" + +using namespace llvm; + +static MCAsmInfo *createSystemZMCAsmInfo(const Target &T, StringRef TT) { + MCAsmInfo *MAI = new SystemZMCAsmInfo(T, TT); + MachineLocation FPDst(MachineLocation::VirtualFP); + MachineLocation FPSrc(SystemZ::R15D, -SystemZMC::CFAOffsetFromInitialSP); + MAI->addInitialFrameState(0, FPDst, FPSrc); + return MAI; +} + +static MCInstrInfo *createSystemZMCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitSystemZMCInstrInfo(X); + return X; +} + +static MCRegisterInfo *createSystemZMCRegisterInfo(StringRef TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitSystemZMCRegisterInfo(X, SystemZ::R14D); + return X; +} + +static MCSubtargetInfo *createSystemZMCSubtargetInfo(StringRef TT, + StringRef CPU, + StringRef FS) { + MCSubtargetInfo *X = new MCSubtargetInfo(); + InitSystemZMCSubtargetInfo(X, TT, CPU, FS); + return X; +} + +static MCCodeGenInfo *createSystemZMCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OL) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + + // Static code is suitable for use in a dynamic executable; there is no + // separate DynamicNoPIC model. + if (RM == Reloc::Default || RM == Reloc::DynamicNoPIC) + RM = Reloc::Static; + + // For SystemZ we define the models as follows: + // + // Small: BRASL can call any function and will use a stub if necessary. + // Locally-binding symbols will always be in range of LARL. + // + // Medium: BRASL can call any function and will use a stub if necessary. + // GOT slots and locally-defined text will always be in range + // of LARL, but other symbols might not be. + // + // Large: Equivalent to Medium for now. + // + // Kernel: Equivalent to Medium for now. + // + // This means that any PIC module smaller than 4GB meets the + // requirements of Small, so Small seems like the best default there. + // + // All symbols bind locally in a non-PIC module, so the choice is less + // obvious. There are two cases: + // + // - When creating an executable, PLTs and copy relocations allow + // us to treat external symbols as part of the executable. + // Any executable smaller than 4GB meets the requirements of Small, + // so that seems like the best default. + // + // - When creating JIT code, stubs will be in range of BRASL if the + // image is less than 4GB in size. GOT entries will likewise be + // in range of LARL. However, the JIT environment has no equivalent + // of copy relocs, so locally-binding data symbols might not be in + // the range of LARL. We need the Medium model in that case. + if (CM == CodeModel::Default) + CM = CodeModel::Small; + else if (CM == CodeModel::JITDefault) + CM = RM == Reloc::PIC_ ? CodeModel::Small : CodeModel::Medium; + X->InitMCCodeGenInfo(RM, CM, OL); + return X; +} + +static MCInstPrinter *createSystemZMCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI) { + return new SystemZInstPrinter(MAI, MII, MRI); +} + +static MCStreamer *createSystemZMCObjectStreamer(const Target &T, StringRef TT, + MCContext &Ctx, + MCAsmBackend &MAB, + raw_ostream &OS, + MCCodeEmitter *Emitter, + bool RelaxAll, + bool NoExecStack) { + return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack); +} + +extern "C" void LLVMInitializeSystemZTargetMC() { + // Register the MCAsmInfo. + TargetRegistry::RegisterMCAsmInfo(TheSystemZTarget, + createSystemZMCAsmInfo); + + // Register the MCCodeGenInfo. + TargetRegistry::RegisterMCCodeGenInfo(TheSystemZTarget, + createSystemZMCCodeGenInfo); + + // Register the MCCodeEmitter. + TargetRegistry::RegisterMCCodeEmitter(TheSystemZTarget, + createSystemZMCCodeEmitter); + + // Register the MCInstrInfo. + TargetRegistry::RegisterMCInstrInfo(TheSystemZTarget, + createSystemZMCInstrInfo); + + // Register the MCRegisterInfo. + TargetRegistry::RegisterMCRegInfo(TheSystemZTarget, + createSystemZMCRegisterInfo); + + // Register the MCSubtargetInfo. + TargetRegistry::RegisterMCSubtargetInfo(TheSystemZTarget, + createSystemZMCSubtargetInfo); + + // Register the MCAsmBackend. + TargetRegistry::RegisterMCAsmBackend(TheSystemZTarget, + createSystemZMCAsmBackend); + + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(TheSystemZTarget, + createSystemZMCInstPrinter); + + // Register the MCObjectStreamer; + TargetRegistry::RegisterMCObjectStreamer(TheSystemZTarget, + createSystemZMCObjectStreamer); +} diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h new file mode 100644 index 0000000..229912f --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h @@ -0,0 +1,62 @@ +//===-- SystemZMCTargetDesc.h - SystemZ target descriptions -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef SYSTEMZMCTARGETDESC_H +#define SYSTEMZMCTARGETDESC_H + +#include "llvm/Support/DataTypes.h" + +namespace llvm { + +class MCAsmBackend; +class MCCodeEmitter; +class MCContext; +class MCInstrInfo; +class MCObjectWriter; +class MCRegisterInfo; +class MCSubtargetInfo; +class StringRef; +class Target; +class raw_ostream; + +extern Target TheSystemZTarget; + +namespace SystemZMC { + // How many bytes are in the ABI-defined, caller-allocated part of + // a stack frame. + const int64_t CallFrameSize = 160; + + // The offset of the DWARF CFA from the incoming stack pointer. + const int64_t CFAOffsetFromInitialSP = CallFrameSize; +} + +MCCodeEmitter *createSystemZMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI, + MCContext &Ctx); + +MCAsmBackend *createSystemZMCAsmBackend(const Target &T, StringRef TT, + StringRef CPU); + +MCObjectWriter *createSystemZObjectWriter(raw_ostream &OS, uint8_t OSABI); +} // end namespace llvm + +// Defines symbolic names for SystemZ registers. +// This defines a mapping from register name to register number. +#define GET_REGINFO_ENUM +#include "SystemZGenRegisterInfo.inc" + +// Defines symbolic names for the SystemZ instructions. +#define GET_INSTRINFO_ENUM +#include "SystemZGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "SystemZGenSubtargetInfo.inc" + +#endif diff --git a/contrib/llvm/lib/Target/SystemZ/README.txt b/contrib/llvm/lib/Target/SystemZ/README.txt new file mode 100644 index 0000000..d1f56a4 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/README.txt @@ -0,0 +1,146 @@ +//===---------------------------------------------------------------------===// +// Random notes about and ideas for the SystemZ backend. +//===---------------------------------------------------------------------===// + +The initial backend is deliberately restricted to z10. We should add support +for later architectures at some point. + +-- + +SystemZDAGToDAGISel::SelectInlineAsmMemoryOperand() is passed "m" for all +inline asm memory constraints; it doesn't get to see the original constraint. +This means that it must conservatively treat all inline asm constraints +as the most restricted type, "R". + +-- + +If an inline asm ties an i32 "r" result to an i64 input, the input +will be treated as an i32, leaving the upper bits uninitialised. +For example: + +define void @f4(i32 *%dst) { + %val = call i32 asm "blah $0", "=r,0" (i64 103) + store i32 %val, i32 *%dst + ret void +} + +from CodeGen/SystemZ/asm-09.ll will use LHI rather than LGHI. +to load 103. This seems to be a general target-independent problem. + +-- + +The tuning of the choice between Load Address (LA) and addition in +SystemZISelDAGToDAG.cpp is suspect. It should be tweaked based on +performance measurements. + +-- + +There is no scheduling support. + +-- + +We don't use the Branch on Count or Branch on Index families of instruction. + +-- + +We don't use the condition code results of anything except comparisons. + +Implementing this may need something more finely grained than the z_cmp +and z_ucmp that we have now. It might (or might not) also be useful to +have a mask of "don't care" values in conditional branches. For example, +integer comparisons never set CC to 3, so the bottom bit of the CC mask +isn't particularly relevant. JNLH and JE are equally good for testing +equality after an integer comparison, etc. + +-- + +We don't optimize string and block memory operations. + +-- + +We don't take full advantage of builtins like fabsl because the calling +conventions require f128s to be returned by invisible reference. + +-- + +DAGCombiner can detect integer absolute, but there's not yet an associated +ISD opcode. We could add one and implement it using Load Positive. +Negated absolutes could use Load Negative. + +-- + +DAGCombiner doesn't yet fold truncations of extended loads. Functions like: + + unsigned long f (unsigned long x, unsigned short *y) + { + return (x << 32) | *y; + } + +therefore end up as: + + sllg %r2, %r2, 32 + llgh %r0, 0(%r3) + lr %r2, %r0 + br %r14 + +but truncating the load would give: + + sllg %r2, %r2, 32 + lh %r2, 0(%r3) + br %r14 + +-- + +Functions like: + +define i64 @f1(i64 %a) { + %and = and i64 %a, 1 + ret i64 %and +} + +ought to be implemented as: + + lhi %r0, 1 + ngr %r2, %r0 + br %r14 + +but two-address optimisations reverse the order of the AND and force: + + lhi %r0, 1 + ngr %r0, %r2 + lgr %r2, %r0 + br %r14 + +CodeGen/SystemZ/and-04.ll has several examples of this. + +-- + +Out-of-range displacements are usually handled by loading the full +address into a register. In many cases it would be better to create +an anchor point instead. E.g. for: + +define void @f4a(i128 *%aptr, i64 %base) { + %addr = add i64 %base, 524288 + %bptr = inttoptr i64 %addr to i128 * + %a = load volatile i128 *%aptr + %b = load i128 *%bptr + %add = add i128 %a, %b + store i128 %add, i128 *%aptr + ret void +} + +(from CodeGen/SystemZ/int-add-08.ll) we load %base+524288 and %base+524296 +into separate registers, rather than using %base+524288 as a base for both. + +-- + +Dynamic stack allocations round the size to 8 bytes and then allocate +that rounded amount. It would be simpler to subtract the unrounded +size from the copy of the stack pointer and then align the result. +See CodeGen/SystemZ/alloca-01.ll for an example. + +-- + +Atomic loads and stores use the default compare-and-swap based implementation. +This is probably much too conservative in practice, and the overhead is +especially bad for 8- and 16-bit accesses. diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZ.h b/contrib/llvm/lib/Target/SystemZ/SystemZ.h new file mode 100644 index 0000000..b811cbe --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZ.h @@ -0,0 +1,77 @@ +//==- SystemZ.h - Top-Level Interface for SystemZ representation -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in +// the LLVM SystemZ backend. +// +//===----------------------------------------------------------------------===// + +#ifndef SYSTEMZ_H +#define SYSTEMZ_H + +#include "MCTargetDesc/SystemZMCTargetDesc.h" +#include "llvm/Support/CodeGen.h" + +namespace llvm { + class SystemZTargetMachine; + class FunctionPass; + + namespace SystemZ { + // Condition-code mask values. + const unsigned CCMASK_0 = 1 << 3; + const unsigned CCMASK_1 = 1 << 2; + const unsigned CCMASK_2 = 1 << 1; + const unsigned CCMASK_3 = 1 << 0; + const unsigned CCMASK_ANY = CCMASK_0 | CCMASK_1 | CCMASK_2 | CCMASK_3; + + // Condition-code mask assignments for floating-point comparisons. + const unsigned CCMASK_CMP_EQ = CCMASK_0; + const unsigned CCMASK_CMP_LT = CCMASK_1; + const unsigned CCMASK_CMP_GT = CCMASK_2; + const unsigned CCMASK_CMP_UO = CCMASK_3; + const unsigned CCMASK_CMP_NE = CCMASK_CMP_LT | CCMASK_CMP_GT; + const unsigned CCMASK_CMP_LE = CCMASK_CMP_EQ | CCMASK_CMP_LT; + const unsigned CCMASK_CMP_GE = CCMASK_CMP_EQ | CCMASK_CMP_GT; + const unsigned CCMASK_CMP_O = CCMASK_ANY ^ CCMASK_CMP_UO; + + // Return true if Val fits an LLILL operand. + static inline bool isImmLL(uint64_t Val) { + return (Val & ~0x000000000000ffffULL) == 0; + } + + // Return true if Val fits an LLILH operand. + static inline bool isImmLH(uint64_t Val) { + return (Val & ~0x00000000ffff0000ULL) == 0; + } + + // Return true if Val fits an LLIHL operand. + static inline bool isImmHL(uint64_t Val) { + return (Val & ~0x00000ffff00000000ULL) == 0; + } + + // Return true if Val fits an LLIHH operand. + static inline bool isImmHH(uint64_t Val) { + return (Val & ~0xffff000000000000ULL) == 0; + } + + // Return true if Val fits an LLILF operand. + static inline bool isImmLF(uint64_t Val) { + return (Val & ~0x00000000ffffffffULL) == 0; + } + + // Return true if Val fits an LLIHF operand. + static inline bool isImmHF(uint64_t Val) { + return (Val & ~0xffffffff00000000ULL) == 0; + } + } + + FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM, + CodeGenOpt::Level OptLevel); +} // end namespace llvm; +#endif diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZ.td b/contrib/llvm/lib/Target/SystemZ/SystemZ.td new file mode 100644 index 0000000..e03c32f --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZ.td @@ -0,0 +1,75 @@ +//===-- SystemZ.td - Describe the SystemZ target machine -----*- tblgen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces which we are implementing +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// SystemZ supported processors +//===----------------------------------------------------------------------===// + +class Proc<string Name, list<SubtargetFeature> Features> + : Processor<Name, NoItineraries, Features>; + +def : Proc<"z10", []>; + +//===----------------------------------------------------------------------===// +// Register file description +//===----------------------------------------------------------------------===// + +include "SystemZRegisterInfo.td" + +//===----------------------------------------------------------------------===// +// Calling convention description +//===----------------------------------------------------------------------===// + +include "SystemZCallingConv.td" + +//===----------------------------------------------------------------------===// +// Instruction descriptions +//===----------------------------------------------------------------------===// + +include "SystemZOperators.td" +include "SystemZOperands.td" +include "SystemZPatterns.td" +include "SystemZInstrFormats.td" +include "SystemZInstrInfo.td" +include "SystemZInstrFP.td" + +def SystemZInstrInfo : InstrInfo {} + +//===----------------------------------------------------------------------===// +// Assembly parser +//===----------------------------------------------------------------------===// + +def SystemZAsmParser : AsmParser { + let ShouldEmitMatchRegisterName = 0; +} + +//===----------------------------------------------------------------------===// +// Assembly writer +//===----------------------------------------------------------------------===// + +def SystemZAsmWriter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + bit isMCAsmWriter = 1; +} + +//===----------------------------------------------------------------------===// +// Top-level target declaration +//===----------------------------------------------------------------------===// + +def SystemZ : Target { + let InstructionSet = SystemZInstrInfo; + let AssemblyParsers = [SystemZAsmParser]; + let AssemblyWriters = [SystemZAsmWriter]; +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp new file mode 100644 index 0000000..1e15ab1 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -0,0 +1,113 @@ +//===-- SystemZAsmPrinter.cpp - SystemZ LLVM assembly printer -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Streams SystemZ assembly language and associated data, in the form of +// MCInsts and MCExprs respectively. +// +//===----------------------------------------------------------------------===// + +#include "SystemZAsmPrinter.h" +#include "InstPrinter/SystemZInstPrinter.h" +#include "SystemZConstantPoolValue.h" +#include "SystemZMCInstLower.h" +#include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/Mangler.h" + +using namespace llvm; + +void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { + SystemZMCInstLower Lower(Mang, MF->getContext(), *this); + MCInst LoweredMI; + Lower.lower(MI, LoweredMI); + OutStreamer.EmitInstruction(LoweredMI); +} + +// Convert a SystemZ-specific constant pool modifier into the associated +// MCSymbolRefExpr variant kind. +static MCSymbolRefExpr::VariantKind +getModifierVariantKind(SystemZCP::SystemZCPModifier Modifier) { + switch (Modifier) { + case SystemZCP::NTPOFF: return MCSymbolRefExpr::VK_NTPOFF; + } + llvm_unreachable("Invalid SystemCPModifier!"); +} + +void SystemZAsmPrinter:: +EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { + SystemZConstantPoolValue *ZCPV = + static_cast<SystemZConstantPoolValue*>(MCPV); + + const MCExpr *Expr = + MCSymbolRefExpr::Create(Mang->getSymbol(ZCPV->getGlobalValue()), + getModifierVariantKind(ZCPV->getModifier()), + OutContext); + uint64_t Size = TM.getDataLayout()->getTypeAllocSize(ZCPV->getType()); + + OutStreamer.EmitValue(Expr, Size); +} + +bool SystemZAsmPrinter::PrintAsmOperand(const MachineInstr *MI, + unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &OS) { + if (ExtraCode && *ExtraCode == 'n') { + if (!MI->getOperand(OpNo).isImm()) + return true; + OS << -int64_t(MI->getOperand(OpNo).getImm()); + } else { + SystemZMCInstLower Lower(Mang, MF->getContext(), *this); + MCOperand MO(Lower.lowerOperand(MI->getOperand(OpNo))); + SystemZInstPrinter::printOperand(MO, OS); + } + return false; +} + +bool SystemZAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &OS) { + SystemZInstPrinter::printAddress(MI->getOperand(OpNo).getReg(), + MI->getOperand(OpNo + 1).getImm(), + MI->getOperand(OpNo + 2).getReg(), OS); + return false; +} + +void SystemZAsmPrinter::EmitEndOfAsmFile(Module &M) { + if (Subtarget->isTargetELF()) { + const TargetLoweringObjectFileELF &TLOFELF = + static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering()); + + MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>(); + + // Output stubs for external and common global variables. + MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); + if (!Stubs.empty()) { + OutStreamer.SwitchSection(TLOFELF.getDataRelSection()); + const DataLayout *TD = TM.getDataLayout(); + + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { + OutStreamer.EmitLabel(Stubs[i].first); + OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(), + TD->getPointerSize(0), 0); + } + Stubs.clear(); + } + } +} + +// Force static initialization. +extern "C" void LLVMInitializeSystemZAsmPrinter() { + RegisterAsmPrinter<SystemZAsmPrinter> X(TheSystemZTarget); +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h new file mode 100644 index 0000000..4b6c51b --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h @@ -0,0 +1,52 @@ +//===-- SystemZAsmPrinter.h - SystemZ LLVM assembly printer ----*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef SYSTEMZASMPRINTER_H +#define SYSTEMZASMPRINTER_H + +#include "SystemZTargetMachine.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { +class MCStreamer; +class MachineBasicBlock; +class MachineInstr; +class Module; +class raw_ostream; + +class LLVM_LIBRARY_VISIBILITY SystemZAsmPrinter : public AsmPrinter { +private: + const SystemZSubtarget *Subtarget; + +public: + SystemZAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) + : AsmPrinter(TM, Streamer) { + Subtarget = &TM.getSubtarget<SystemZSubtarget>(); + } + + // Override AsmPrinter. + virtual const char *getPassName() const LLVM_OVERRIDE { + return "SystemZ Assembly Printer"; + } + virtual void EmitInstruction(const MachineInstr *MI) LLVM_OVERRIDE; + virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) + LLVM_OVERRIDE; + virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &OS) LLVM_OVERRIDE; + virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &OS) LLVM_OVERRIDE; + virtual void EmitEndOfAsmFile(Module &M) LLVM_OVERRIDE; +}; +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp new file mode 100644 index 0000000..cc9c84b --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp @@ -0,0 +1,21 @@ +//===-- SystemZCallingConv.cpp - Calling conventions for SystemZ ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SystemZCallingConv.h" +#include "SystemZRegisterInfo.h" + +using namespace llvm; + +const unsigned SystemZ::ArgGPRs[SystemZ::NumArgGPRs] = { + SystemZ::R2D, SystemZ::R3D, SystemZ::R4D, SystemZ::R5D, SystemZ::R6D +}; + +const unsigned SystemZ::ArgFPRs[SystemZ::NumArgFPRs] = { + SystemZ::F0D, SystemZ::F2D, SystemZ::F4D, SystemZ::F6D +}; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.h b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.h new file mode 100644 index 0000000..298985e --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.h @@ -0,0 +1,23 @@ +//===-- SystemZCallingConv.h - Calling conventions for SystemZ --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef SYSTEMZCALLINGCONV_H +#define SYSTEMZCALLINGCONV_H + +namespace llvm { + namespace SystemZ { + const unsigned NumArgGPRs = 5; + extern const unsigned ArgGPRs[NumArgGPRs]; + + const unsigned NumArgFPRs = 4; + extern const unsigned ArgFPRs[NumArgFPRs]; + } +} + +#endif diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td new file mode 100644 index 0000000..c2d727f --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td @@ -0,0 +1,65 @@ +//=- SystemZCallingConv.td - Calling conventions for SystemZ -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This describes the calling conventions for the SystemZ ABI. +//===----------------------------------------------------------------------===// + +class CCIfExtend<CCAction A> + : CCIf<"ArgFlags.isSExt() || ArgFlags.isZExt()", A>; + +//===----------------------------------------------------------------------===// +// SVR4 return value calling convention +//===----------------------------------------------------------------------===// +def RetCC_SystemZ : CallingConv<[ + // Promote i32 to i64 if it has an explicit extension type. + CCIfType<[i32], CCIfExtend<CCPromoteToType<i64>>>, + + // ABI-compliant code returns 64-bit integers in R2. Make the other + // call-clobbered argument registers available for code that doesn't + // care about the ABI. (R6 is an argument register too, but is + // call-saved and therefore not suitable for return values.) + CCIfType<[i32], CCAssignToReg<[R2W, R3W, R4W, R5W]>>, + CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D]>>, + + // ABI-complaint code returns float and double in F0. Make the + // other floating-point argument registers available for code that + // doesn't care about the ABI. All floating-point argument registers + // are call-clobbered, so we can use all of them here. + CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>, + CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>> + + // ABI-compliant code returns long double by reference, but that conversion + // is left to higher-level code. Perhaps we could add an f128 definition + // here for code that doesn't care about the ABI? +]>; + +//===----------------------------------------------------------------------===// +// SVR4 argument calling conventions +//===----------------------------------------------------------------------===// +def CC_SystemZ : CallingConv<[ + // Promote i32 to i64 if it has an explicit extension type. + // The convention is that true integer arguments that are smaller + // than 64 bits should be marked as extended, but structures that + // are smaller than 64 bits shouldn't. + CCIfType<[i32], CCIfExtend<CCPromoteToType<i64>>>, + + // Force long double values to the stack and pass i64 pointers to them. + CCIfType<[f128], CCPassIndirect<i64>>, + + // The first 5 integer arguments are passed in R2-R6. Note that R6 + // is call-saved. + CCIfType<[i32], CCAssignToReg<[R2W, R3W, R4W, R5W, R6W]>>, + CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D, R6D]>>, + + // The first 4 float and double arguments are passed in even registers F0-F6. + CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>, + CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>, + + // Other arguments are passed in 8-byte-aligned 8-byte stack slots. + CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>> +]>; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp new file mode 100644 index 0000000..e9c4f6d --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp @@ -0,0 +1,62 @@ +//===-- SystemZConstantPoolValue.cpp - SystemZ constant-pool value --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SystemZConstantPoolValue.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +SystemZConstantPoolValue:: +SystemZConstantPoolValue(const GlobalValue *gv, + SystemZCP::SystemZCPModifier modifier) + : MachineConstantPoolValue(gv->getType()), GV(gv), Modifier(modifier) {} + +SystemZConstantPoolValue * +SystemZConstantPoolValue::Create(const GlobalValue *GV, + SystemZCP::SystemZCPModifier Modifier) { + return new SystemZConstantPoolValue(GV, Modifier); +} + +unsigned SystemZConstantPoolValue::getRelocationInfo() const { + switch (Modifier) { + case SystemZCP::NTPOFF: + // May require a relocation, but the relocations are always resolved + // by the static linker. + return 1; + } + llvm_unreachable("Unknown modifier"); +} + +int SystemZConstantPoolValue:: +getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) { + unsigned AlignMask = Alignment - 1; + const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants(); + for (unsigned I = 0, E = Constants.size(); I != E; ++I) { + if (Constants[I].isMachineConstantPoolEntry() && + (Constants[I].getAlignment() & AlignMask) == 0) { + SystemZConstantPoolValue *ZCPV = + static_cast<SystemZConstantPoolValue *>(Constants[I].Val.MachineCPVal); + if (ZCPV->GV == GV && ZCPV->Modifier == Modifier) + return I; + } + } + return -1; +} + +void SystemZConstantPoolValue::addSelectionDAGCSEId(FoldingSetNodeID &ID) { + ID.AddPointer(GV); + ID.AddInteger(Modifier); +} + +void SystemZConstantPoolValue::print(raw_ostream &O) const { + O << GV << "@" << int(Modifier); +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h new file mode 100644 index 0000000..9927bdb --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h @@ -0,0 +1,55 @@ +//===- SystemZConstantPoolValue.h - SystemZ constant-pool value -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef SYSTEMZCONSTANTPOOLVALUE_H +#define SYSTEMZCONSTANTPOOLVALUE_H + +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/Support/ErrorHandling.h" + +namespace llvm { + +class GlobalValue; + +namespace SystemZCP { + enum SystemZCPModifier { + NTPOFF + }; +} + +/// A SystemZ-specific constant pool value. At present, the only +/// defined constant pool values are offsets of thread-local variables +/// (written x@NTPOFF). +class SystemZConstantPoolValue : public MachineConstantPoolValue { + const GlobalValue *GV; + SystemZCP::SystemZCPModifier Modifier; + +protected: + SystemZConstantPoolValue(const GlobalValue *GV, + SystemZCP::SystemZCPModifier Modifier); + +public: + static SystemZConstantPoolValue * + Create(const GlobalValue *GV, SystemZCP::SystemZCPModifier Modifier); + + // Override MachineConstantPoolValue. + virtual unsigned getRelocationInfo() const LLVM_OVERRIDE; + virtual int getExistingMachineCPValue(MachineConstantPool *CP, + unsigned Alignment) LLVM_OVERRIDE; + virtual void addSelectionDAGCSEId(FoldingSetNodeID &ID) LLVM_OVERRIDE; + virtual void print(raw_ostream &O) const LLVM_OVERRIDE; + + // Access SystemZ-specific fields. + const GlobalValue *getGlobalValue() const { return GV; } + SystemZCP::SystemZCPModifier getModifier() const { return Modifier; } +}; + +} // End llvm namespace + +#endif diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp new file mode 100644 index 0000000..fda33de --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -0,0 +1,535 @@ +//===-- SystemZFrameLowering.cpp - Frame lowering for SystemZ -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SystemZFrameLowering.h" +#include "SystemZCallingConv.h" +#include "SystemZInstrBuilder.h" +#include "SystemZMachineFunctionInfo.h" +#include "SystemZTargetMachine.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Function.h" + +using namespace llvm; + +SystemZFrameLowering::SystemZFrameLowering(const SystemZTargetMachine &tm, + const SystemZSubtarget &sti) + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, + -SystemZMC::CallFrameSize), + TM(tm), + STI(sti) { + // The ABI-defined register save slots, relative to the incoming stack + // pointer. + static const unsigned SpillOffsetTable[][2] = { + { SystemZ::R2D, 0x10 }, + { SystemZ::R3D, 0x18 }, + { SystemZ::R4D, 0x20 }, + { SystemZ::R5D, 0x28 }, + { SystemZ::R6D, 0x30 }, + { SystemZ::R7D, 0x38 }, + { SystemZ::R8D, 0x40 }, + { SystemZ::R9D, 0x48 }, + { SystemZ::R10D, 0x50 }, + { SystemZ::R11D, 0x58 }, + { SystemZ::R12D, 0x60 }, + { SystemZ::R13D, 0x68 }, + { SystemZ::R14D, 0x70 }, + { SystemZ::R15D, 0x78 }, + { SystemZ::F0D, 0x80 }, + { SystemZ::F2D, 0x88 }, + { SystemZ::F4D, 0x90 }, + { SystemZ::F6D, 0x98 } + }; + + // Create a mapping from register number to save slot offset. + RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS); + for (unsigned I = 0, E = array_lengthof(SpillOffsetTable); I != E; ++I) + RegSpillOffsets[SpillOffsetTable[I][0]] = SpillOffsetTable[I][1]; +} + +void SystemZFrameLowering:: +processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const { + MachineFrameInfo *MFFrame = MF.getFrameInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + bool HasFP = hasFP(MF); + SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>(); + bool IsVarArg = MF.getFunction()->isVarArg(); + + // va_start stores incoming FPR varargs in the normal way, but delegates + // the saving of incoming GPR varargs to spillCalleeSavedRegisters(). + // Record these pending uses, which typically include the call-saved + // argument register R6D. + if (IsVarArg) + for (unsigned I = MFI->getVarArgsFirstGPR(); I < SystemZ::NumArgGPRs; ++I) + MRI.setPhysRegUsed(SystemZ::ArgGPRs[I]); + + // If the function requires a frame pointer, record that the hard + // frame pointer will be clobbered. + if (HasFP) + MRI.setPhysRegUsed(SystemZ::R11D); + + // If the function calls other functions, record that the return + // address register will be clobbered. + if (MFFrame->hasCalls()) + MRI.setPhysRegUsed(SystemZ::R14D); + + // If we are saving GPRs other than the stack pointer, we might as well + // save and restore the stack pointer at the same time, via STMG and LMG. + // This allows the deallocation to be done by the LMG, rather than needing + // a separate %r15 addition. + const uint16_t *CSRegs = TRI->getCalleeSavedRegs(&MF); + for (unsigned I = 0; CSRegs[I]; ++I) { + unsigned Reg = CSRegs[I]; + if (SystemZ::GR64BitRegClass.contains(Reg) && MRI.isPhysRegUsed(Reg)) { + MRI.setPhysRegUsed(SystemZ::R15D); + break; + } + } +} + +// Add GPR64 to the save instruction being built by MIB, which is in basic +// block MBB. IsImplicit says whether this is an explicit operand to the +// instruction, or an implicit one that comes between the explicit start +// and end registers. +static void addSavedGPR(MachineBasicBlock &MBB, MachineInstrBuilder &MIB, + const SystemZTargetMachine &TM, + unsigned GPR64, bool IsImplicit) { + const SystemZRegisterInfo *RI = TM.getRegisterInfo(); + unsigned GPR32 = RI->getSubReg(GPR64, SystemZ::subreg_32bit); + bool IsLive = MBB.isLiveIn(GPR64) || MBB.isLiveIn(GPR32); + if (!IsLive || !IsImplicit) { + MIB.addReg(GPR64, getImplRegState(IsImplicit) | getKillRegState(!IsLive)); + if (!IsLive) + MBB.addLiveIn(GPR64); + } +} + +bool SystemZFrameLowering:: +spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return false; + + MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo *TII = MF.getTarget().getInstrInfo(); + SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); + bool IsVarArg = MF.getFunction()->isVarArg(); + DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + + // Scan the call-saved GPRs and find the bounds of the register spill area. + unsigned SavedGPRFrameSize = 0; + unsigned LowGPR = 0; + unsigned HighGPR = SystemZ::R15D; + unsigned StartOffset = -1U; + for (unsigned I = 0, E = CSI.size(); I != E; ++I) { + unsigned Reg = CSI[I].getReg(); + if (SystemZ::GR64BitRegClass.contains(Reg)) { + SavedGPRFrameSize += 8; + unsigned Offset = RegSpillOffsets[Reg]; + assert(Offset && "Unexpected GPR save"); + if (StartOffset > Offset) { + LowGPR = Reg; + StartOffset = Offset; + } + } + } + + // Save information about the range and location of the call-saved + // registers, for use by the epilogue inserter. + ZFI->setSavedGPRFrameSize(SavedGPRFrameSize); + ZFI->setLowSavedGPR(LowGPR); + ZFI->setHighSavedGPR(HighGPR); + + // Include the GPR varargs, if any. R6D is call-saved, so would + // be included by the loop above, but we also need to handle the + // call-clobbered argument registers. + if (IsVarArg) { + unsigned FirstGPR = ZFI->getVarArgsFirstGPR(); + if (FirstGPR < SystemZ::NumArgGPRs) { + unsigned Reg = SystemZ::ArgGPRs[FirstGPR]; + unsigned Offset = RegSpillOffsets[Reg]; + if (StartOffset > Offset) { + LowGPR = Reg; StartOffset = Offset; + } + } + } + + // Save GPRs + if (LowGPR) { + assert(LowGPR != HighGPR && "Should be saving %r15 and something else"); + + // Build an STMG instruction. + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::STMG)); + + // Add the explicit register operands. + addSavedGPR(MBB, MIB, TM, LowGPR, false); + addSavedGPR(MBB, MIB, TM, HighGPR, false); + + // Add the address. + MIB.addReg(SystemZ::R15D).addImm(StartOffset); + + // Make sure all call-saved GPRs are included as operands and are + // marked as live on entry. + for (unsigned I = 0, E = CSI.size(); I != E; ++I) { + unsigned Reg = CSI[I].getReg(); + if (SystemZ::GR64BitRegClass.contains(Reg)) + addSavedGPR(MBB, MIB, TM, Reg, true); + } + + // ...likewise GPR varargs. + if (IsVarArg) + for (unsigned I = ZFI->getVarArgsFirstGPR(); I < SystemZ::NumArgGPRs; ++I) + addSavedGPR(MBB, MIB, TM, SystemZ::ArgGPRs[I], true); + } + + // Save FPRs in the normal TargetInstrInfo way. + for (unsigned I = 0, E = CSI.size(); I != E; ++I) { + unsigned Reg = CSI[I].getReg(); + if (SystemZ::FP64BitRegClass.contains(Reg)) { + MBB.addLiveIn(Reg); + TII->storeRegToStackSlot(MBB, MBBI, Reg, true, CSI[I].getFrameIdx(), + &SystemZ::FP64BitRegClass, TRI); + } + } + + return true; +} + +bool SystemZFrameLowering:: +restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return false; + + MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo *TII = MF.getTarget().getInstrInfo(); + SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); + bool HasFP = hasFP(MF); + DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + + // Restore FPRs in the normal TargetInstrInfo way. + for (unsigned I = 0, E = CSI.size(); I != E; ++I) { + unsigned Reg = CSI[I].getReg(); + if (SystemZ::FP64BitRegClass.contains(Reg)) + TII->loadRegFromStackSlot(MBB, MBBI, Reg, CSI[I].getFrameIdx(), + &SystemZ::FP64BitRegClass, TRI); + } + + // Restore call-saved GPRs (but not call-clobbered varargs, which at + // this point might hold return values). + unsigned LowGPR = ZFI->getLowSavedGPR(); + unsigned HighGPR = ZFI->getHighSavedGPR(); + unsigned StartOffset = RegSpillOffsets[LowGPR]; + if (LowGPR) { + // If we saved any of %r2-%r5 as varargs, we should also be saving + // and restoring %r6. If we're saving %r6 or above, we should be + // restoring it too. + assert(LowGPR != HighGPR && "Should be loading %r15 and something else"); + + // Build an LMG instruction. + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::LMG)); + + // Add the explicit register operands. + MIB.addReg(LowGPR, RegState::Define); + MIB.addReg(HighGPR, RegState::Define); + + // Add the address. + MIB.addReg(HasFP ? SystemZ::R11D : SystemZ::R15D); + MIB.addImm(StartOffset); + + // Do a second scan adding regs as being defined by instruction + for (unsigned I = 0, E = CSI.size(); I != E; ++I) { + unsigned Reg = CSI[I].getReg(); + if (Reg != LowGPR && Reg != HighGPR) + MIB.addReg(Reg, RegState::ImplicitDefine); + } + } + + return true; +} + +// Emit instructions before MBBI (in MBB) to add NumBytes to Reg. +static void emitIncrement(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + const DebugLoc &DL, + unsigned Reg, int64_t NumBytes, + const TargetInstrInfo *TII) { + while (NumBytes) { + unsigned Opcode; + int64_t ThisVal = NumBytes; + if (isInt<16>(NumBytes)) + Opcode = SystemZ::AGHI; + else { + Opcode = SystemZ::AGFI; + // Make sure we maintain 8-byte stack alignment. + int64_t MinVal = -int64_t(1) << 31; + int64_t MaxVal = (int64_t(1) << 31) - 8; + if (ThisVal < MinVal) + ThisVal = MinVal; + else if (ThisVal > MaxVal) + ThisVal = MaxVal; + } + MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII->get(Opcode), Reg) + .addReg(Reg).addImm(ThisVal); + // The PSW implicit def is dead. + MI->getOperand(3).setIsDead(); + NumBytes -= ThisVal; + } +} + +void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); + MachineFrameInfo *MFFrame = MF.getFrameInfo(); + const SystemZInstrInfo *ZII = + static_cast<const SystemZInstrInfo*>(MF.getTarget().getInstrInfo()); + SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + MachineModuleInfo &MMI = MF.getMMI(); + std::vector<MachineMove> &Moves = MMI.getFrameMoves(); + const std::vector<CalleeSavedInfo> &CSI = MFFrame->getCalleeSavedInfo(); + bool HasFP = hasFP(MF); + DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + + // The current offset of the stack pointer from the CFA. + int64_t SPOffsetFromCFA = -SystemZMC::CFAOffsetFromInitialSP; + + if (ZFI->getLowSavedGPR()) { + // Skip over the GPR saves. + if (MBBI != MBB.end() && MBBI->getOpcode() == SystemZ::STMG) + ++MBBI; + else + llvm_unreachable("Couldn't skip over GPR saves"); + + // Add CFI for the GPR saves. + MCSymbol *GPRSaveLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, DL, + ZII->get(TargetOpcode::PROLOG_LABEL)).addSym(GPRSaveLabel); + for (std::vector<CalleeSavedInfo>::const_iterator + I = CSI.begin(), E = CSI.end(); I != E; ++I) { + unsigned Reg = I->getReg(); + if (SystemZ::GR64BitRegClass.contains(Reg)) { + int64_t Offset = SPOffsetFromCFA + RegSpillOffsets[Reg]; + MachineLocation StackSlot(MachineLocation::VirtualFP, Offset); + MachineLocation RegValue(Reg); + Moves.push_back(MachineMove(GPRSaveLabel, StackSlot, RegValue)); + } + } + } + + uint64_t StackSize = getAllocatedStackSize(MF); + if (StackSize) { + // Allocate StackSize bytes. + int64_t Delta = -int64_t(StackSize); + emitIncrement(MBB, MBBI, DL, SystemZ::R15D, Delta, ZII); + + // Add CFI for the allocation. + MCSymbol *AdjustSPLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::PROLOG_LABEL)) + .addSym(AdjustSPLabel); + MachineLocation FPDest(MachineLocation::VirtualFP); + MachineLocation FPSrc(MachineLocation::VirtualFP, SPOffsetFromCFA + Delta); + Moves.push_back(MachineMove(AdjustSPLabel, FPDest, FPSrc)); + SPOffsetFromCFA += Delta; + } + + if (HasFP) { + // Copy the base of the frame to R11. + BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::LGR), SystemZ::R11D) + .addReg(SystemZ::R15D); + + // Add CFI for the new frame location. + MCSymbol *SetFPLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::PROLOG_LABEL)) + .addSym(SetFPLabel); + MachineLocation HardFP(SystemZ::R11D); + MachineLocation VirtualFP(MachineLocation::VirtualFP); + Moves.push_back(MachineMove(SetFPLabel, HardFP, VirtualFP)); + + // Mark the FramePtr as live at the beginning of every block except + // the entry block. (We'll have marked R11 as live on entry when + // saving the GPRs.) + for (MachineFunction::iterator + I = llvm::next(MF.begin()), E = MF.end(); I != E; ++I) + I->addLiveIn(SystemZ::R11D); + } + + // Skip over the FPR saves. + MCSymbol *FPRSaveLabel = 0; + for (std::vector<CalleeSavedInfo>::const_iterator + I = CSI.begin(), E = CSI.end(); I != E; ++I) { + unsigned Reg = I->getReg(); + if (SystemZ::FP64BitRegClass.contains(Reg)) { + if (MBBI != MBB.end() && + (MBBI->getOpcode() == SystemZ::STD || + MBBI->getOpcode() == SystemZ::STDY)) + ++MBBI; + else + llvm_unreachable("Couldn't skip over FPR save"); + + // Add CFI for the this save. + if (!FPRSaveLabel) + FPRSaveLabel = MMI.getContext().CreateTempSymbol(); + unsigned Reg = I->getReg(); + int64_t Offset = getFrameIndexOffset(MF, I->getFrameIdx()); + MachineLocation Slot(MachineLocation::VirtualFP, + SPOffsetFromCFA + Offset); + MachineLocation RegValue(Reg); + Moves.push_back(MachineMove(FPRSaveLabel, Slot, RegValue)); + } + } + // Complete the CFI for the FPR saves, modelling them as taking effect + // after the last save. + if (FPRSaveLabel) + BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::PROLOG_LABEL)) + .addSym(FPRSaveLabel); +} + +void SystemZFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + const SystemZInstrInfo *ZII = + static_cast<const SystemZInstrInfo*>(MF.getTarget().getInstrInfo()); + SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>(); + + // Skip the return instruction. + assert(MBBI->getOpcode() == SystemZ::RET && + "Can only insert epilogue into returning blocks"); + + uint64_t StackSize = getAllocatedStackSize(MF); + if (ZFI->getLowSavedGPR()) { + --MBBI; + unsigned Opcode = MBBI->getOpcode(); + if (Opcode != SystemZ::LMG) + llvm_unreachable("Expected to see callee-save register restore code"); + + unsigned AddrOpNo = 2; + DebugLoc DL = MBBI->getDebugLoc(); + uint64_t Offset = StackSize + MBBI->getOperand(AddrOpNo + 1).getImm(); + unsigned NewOpcode = ZII->getOpcodeForOffset(Opcode, Offset); + + // If the offset is too large, use the largest stack-aligned offset + // and add the rest to the base register (the stack or frame pointer). + if (!NewOpcode) { + uint64_t NumBytes = Offset - 0x7fff8; + emitIncrement(MBB, MBBI, DL, MBBI->getOperand(AddrOpNo).getReg(), + NumBytes, ZII); + Offset -= NumBytes; + NewOpcode = ZII->getOpcodeForOffset(Opcode, Offset); + assert(NewOpcode && "No restore instruction available"); + } + + MBBI->setDesc(ZII->get(NewOpcode)); + MBBI->getOperand(AddrOpNo + 1).ChangeToImmediate(Offset); + } else if (StackSize) { + DebugLoc DL = MBBI->getDebugLoc(); + emitIncrement(MBB, MBBI, DL, SystemZ::R15D, StackSize, ZII); + } +} + +bool SystemZFrameLowering::hasFP(const MachineFunction &MF) const { + return (MF.getTarget().Options.DisableFramePointerElim(MF) || + MF.getFrameInfo()->hasVarSizedObjects() || + MF.getInfo<SystemZMachineFunctionInfo>()->getManipulatesSP()); +} + +int SystemZFrameLowering::getFrameIndexOffset(const MachineFunction &MF, + int FI) const { + const MachineFrameInfo *MFFrame = MF.getFrameInfo(); + + // Start with the offset of FI from the top of the caller-allocated frame + // (i.e. the top of the 160 bytes allocated by the caller). This initial + // offset is therefore negative. + int64_t Offset = (MFFrame->getObjectOffset(FI) + + MFFrame->getOffsetAdjustment()); + if (FI >= 0) + // Non-fixed objects are allocated below the incoming stack pointer. + // Account for the space at the top of the frame that we choose not + // to allocate. + Offset += getUnallocatedTopBytes(MF); + + // Make the offset relative to the incoming stack pointer. + Offset -= getOffsetOfLocalArea(); + + // Make the offset relative to the bottom of the frame. + Offset += getAllocatedStackSize(MF); + + return Offset; +} + +uint64_t SystemZFrameLowering:: +getUnallocatedTopBytes(const MachineFunction &MF) const { + return MF.getInfo<SystemZMachineFunctionInfo>()->getSavedGPRFrameSize(); +} + +uint64_t SystemZFrameLowering:: +getAllocatedStackSize(const MachineFunction &MF) const { + const MachineFrameInfo *MFFrame = MF.getFrameInfo(); + + // Start with the size of the local variables and spill slots. + uint64_t StackSize = MFFrame->getStackSize(); + + // Remove any bytes that we choose not to allocate. + StackSize -= getUnallocatedTopBytes(MF); + + // Include space for an emergency spill slot, if one might be needed. + StackSize += getEmergencySpillSlotSize(MF); + + // We need to allocate the ABI-defined 160-byte base area whenever + // we allocate stack space for our own use and whenever we call another + // function. + if (StackSize || MFFrame->hasVarSizedObjects() || MFFrame->hasCalls()) + StackSize += SystemZMC::CallFrameSize; + + return StackSize; +} + +unsigned SystemZFrameLowering:: +getEmergencySpillSlotSize(const MachineFunction &MF) const { + const MachineFrameInfo *MFFrame = MF.getFrameInfo(); + uint64_t MaxReach = MFFrame->getStackSize() + SystemZMC::CallFrameSize * 2; + return isUInt<12>(MaxReach) ? 0 : 8; +} + +unsigned SystemZFrameLowering:: +getEmergencySpillSlotOffset(const MachineFunction &MF) const { + assert(getEmergencySpillSlotSize(MF) && "No emergency spill slot"); + return SystemZMC::CallFrameSize; +} + +bool +SystemZFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { + // The ABI requires us to allocate 160 bytes of stack space for the callee, + // with any outgoing stack arguments being placed above that. It seems + // better to make that area a permanent feature of the frame even if + // we're using a frame pointer. + return true; +} + +void SystemZFrameLowering:: +eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const { + switch (MI->getOpcode()) { + case SystemZ::ADJCALLSTACKDOWN: + case SystemZ::ADJCALLSTACKUP: + assert(hasReservedCallFrame(MF) && + "ADJSTACKDOWN and ADJSTACKUP should be no-ops"); + MBB.erase(MI); + break; + + default: + llvm_unreachable("Unexpected call frame instruction"); + } +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h new file mode 100644 index 0000000..5ca049c --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h @@ -0,0 +1,93 @@ +//===-- SystemZFrameLowering.h - Frame lowering for SystemZ -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef SYSTEMZFRAMELOWERING_H +#define SYSTEMZFRAMELOWERING_H + +#include "SystemZSubtarget.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/Target/TargetFrameLowering.h" + +namespace llvm { +class SystemZTargetMachine; +class SystemZSubtarget; + +class SystemZFrameLowering : public TargetFrameLowering { + IndexedMap<unsigned> RegSpillOffsets; + +protected: + const SystemZTargetMachine &TM; + const SystemZSubtarget &STI; + +public: + SystemZFrameLowering(const SystemZTargetMachine &tm, + const SystemZSubtarget &sti); + + // Override FrameLowering. + virtual void + processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const LLVM_OVERRIDE; + virtual bool + spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const + LLVM_OVERRIDE; + virtual bool + restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBII, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const + LLVM_OVERRIDE; + virtual void emitPrologue(MachineFunction &MF) const LLVM_OVERRIDE; + virtual void emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const LLVM_OVERRIDE; + virtual bool hasFP(const MachineFunction &MF) const LLVM_OVERRIDE; + virtual int getFrameIndexOffset(const MachineFunction &MF, + int FI) const LLVM_OVERRIDE; + virtual bool hasReservedCallFrame(const MachineFunction &MF) const + LLVM_OVERRIDE; + virtual void + eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const + LLVM_OVERRIDE; + + // The target-independent code automatically allocates save slots for + // call-saved GPRs. However, we don't need those slots for SystemZ, + // because the ABI sets aside GPR save slots in the caller-allocated part + // of the frame. Since the target-independent code puts this unneeded + // area at the top of the callee-allocated part of frame, we choose not + // to allocate it and adjust the offsets accordingly. Return the + // size of this unallocated area. + // FIXME: seems a bit hackish. + uint64_t getUnallocatedTopBytes(const MachineFunction &MF) const; + + // Return the number of bytes in the callee-allocated part of the frame. + uint64_t getAllocatedStackSize(const MachineFunction &MF) const; + + // Return the number of frame bytes that should be reserved for + // an emergency spill slot, for use by the register scaveneger. + // Return 0 if register scaveging won't be needed. + unsigned getEmergencySpillSlotSize(const MachineFunction &MF) const; + + // Return the offset from the frame pointer of the emergency spill slot, + // which always fits within a 12-bit unsigned displacement field. + // Only valid if getEmergencySpillSlotSize(MF) returns nonzero. + unsigned getEmergencySpillSlotOffset(const MachineFunction &MF) const; + + // Return the byte offset from the incoming stack pointer of Reg's + // ABI-defined save slot. Return 0 if no slot is defined for Reg. + unsigned getRegSpillOffset(unsigned Reg) const { + return RegSpillOffsets[Reg]; + } +}; +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp new file mode 100644 index 0000000..d436ba9 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -0,0 +1,616 @@ +//===-- SystemZISelDAGToDAG.cpp - A dag to dag inst selector for SystemZ --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the SystemZ target. +// +//===----------------------------------------------------------------------===// + +#include "SystemZTargetMachine.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { +// Used to build addressing modes. +struct SystemZAddressingMode { + // The shape of the address. + enum AddrForm { + // base+displacement + FormBD, + + // base+displacement+index for load and store operands + FormBDXNormal, + + // base+displacement+index for load address operands + FormBDXLA, + + // base+displacement+index+ADJDYNALLOC + FormBDXDynAlloc + }; + AddrForm Form; + + // The type of displacement. The enum names here correspond directly + // to the definitions in SystemZOperand.td. We could split them into + // flags -- single/pair, 128-bit, etc. -- but it hardly seems worth it. + enum DispRange { + Disp12Only, + Disp12Pair, + Disp20Only, + Disp20Only128, + Disp20Pair + }; + DispRange DR; + + // The parts of the address. The address is equivalent to: + // + // Base + Disp + Index + (IncludesDynAlloc ? ADJDYNALLOC : 0) + SDValue Base; + int64_t Disp; + SDValue Index; + bool IncludesDynAlloc; + + SystemZAddressingMode(AddrForm form, DispRange dr) + : Form(form), DR(dr), Base(), Disp(0), Index(), + IncludesDynAlloc(false) {} + + // True if the address can have an index register. + bool hasIndexField() { return Form != FormBD; } + + // True if the address can (and must) include ADJDYNALLOC. + bool isDynAlloc() { return Form == FormBDXDynAlloc; } + + void dump() { + errs() << "SystemZAddressingMode " << this << '\n'; + + errs() << " Base "; + if (Base.getNode() != 0) + Base.getNode()->dump(); + else + errs() << "null\n"; + + if (hasIndexField()) { + errs() << " Index "; + if (Index.getNode() != 0) + Index.getNode()->dump(); + else + errs() << "null\n"; + } + + errs() << " Disp " << Disp; + if (IncludesDynAlloc) + errs() << " + ADJDYNALLOC"; + errs() << '\n'; + } +}; + +class SystemZDAGToDAGISel : public SelectionDAGISel { + const SystemZTargetLowering &Lowering; + const SystemZSubtarget &Subtarget; + + // Used by SystemZOperands.td to create integer constants. + inline SDValue getImm(const SDNode *Node, uint64_t Imm) { + return CurDAG->getTargetConstant(Imm, Node->getValueType(0)); + } + + // Try to fold more of the base or index of AM into AM, where IsBase + // selects between the base and index. + bool expandAddress(SystemZAddressingMode &AM, bool IsBase); + + // Try to describe N in AM, returning true on success. + bool selectAddress(SDValue N, SystemZAddressingMode &AM); + + // Extract individual target operands from matched address AM. + void getAddressOperands(const SystemZAddressingMode &AM, EVT VT, + SDValue &Base, SDValue &Disp); + void getAddressOperands(const SystemZAddressingMode &AM, EVT VT, + SDValue &Base, SDValue &Disp, SDValue &Index); + + // Try to match Addr as a FormBD address with displacement type DR. + // Return true on success, storing the base and displacement in + // Base and Disp respectively. + bool selectBDAddr(SystemZAddressingMode::DispRange DR, SDValue Addr, + SDValue &Base, SDValue &Disp); + + // Try to match Addr as a FormBDX* address of form Form with + // displacement type DR. Return true on success, storing the base, + // displacement and index in Base, Disp and Index respectively. + bool selectBDXAddr(SystemZAddressingMode::AddrForm Form, + SystemZAddressingMode::DispRange DR, SDValue Addr, + SDValue &Base, SDValue &Disp, SDValue &Index); + + // PC-relative address matching routines used by SystemZOperands.td. + bool selectPCRelAddress(SDValue Addr, SDValue &Target) { + if (Addr.getOpcode() == SystemZISD::PCREL_WRAPPER) { + Target = Addr.getOperand(0); + return true; + } + return false; + } + + // BD matching routines used by SystemZOperands.td. + bool selectBDAddr12Only(SDValue Addr, SDValue &Base, SDValue &Disp) { + return selectBDAddr(SystemZAddressingMode::Disp12Only, Addr, Base, Disp); + } + bool selectBDAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp) { + return selectBDAddr(SystemZAddressingMode::Disp12Pair, Addr, Base, Disp); + } + bool selectBDAddr20Only(SDValue Addr, SDValue &Base, SDValue &Disp) { + return selectBDAddr(SystemZAddressingMode::Disp20Only, Addr, Base, Disp); + } + bool selectBDAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp) { + return selectBDAddr(SystemZAddressingMode::Disp20Pair, Addr, Base, Disp); + } + + // BDX matching routines used by SystemZOperands.td. + bool selectBDXAddr12Only(SDValue Addr, SDValue &Base, SDValue &Disp, + SDValue &Index) { + return selectBDXAddr(SystemZAddressingMode::FormBDXNormal, + SystemZAddressingMode::Disp12Only, + Addr, Base, Disp, Index); + } + bool selectBDXAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp, + SDValue &Index) { + return selectBDXAddr(SystemZAddressingMode::FormBDXNormal, + SystemZAddressingMode::Disp12Pair, + Addr, Base, Disp, Index); + } + bool selectDynAlloc12Only(SDValue Addr, SDValue &Base, SDValue &Disp, + SDValue &Index) { + return selectBDXAddr(SystemZAddressingMode::FormBDXDynAlloc, + SystemZAddressingMode::Disp12Only, + Addr, Base, Disp, Index); + } + bool selectBDXAddr20Only(SDValue Addr, SDValue &Base, SDValue &Disp, + SDValue &Index) { + return selectBDXAddr(SystemZAddressingMode::FormBDXNormal, + SystemZAddressingMode::Disp20Only, + Addr, Base, Disp, Index); + } + bool selectBDXAddr20Only128(SDValue Addr, SDValue &Base, SDValue &Disp, + SDValue &Index) { + return selectBDXAddr(SystemZAddressingMode::FormBDXNormal, + SystemZAddressingMode::Disp20Only128, + Addr, Base, Disp, Index); + } + bool selectBDXAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp, + SDValue &Index) { + return selectBDXAddr(SystemZAddressingMode::FormBDXNormal, + SystemZAddressingMode::Disp20Pair, + Addr, Base, Disp, Index); + } + bool selectLAAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp, + SDValue &Index) { + return selectBDXAddr(SystemZAddressingMode::FormBDXLA, + SystemZAddressingMode::Disp12Pair, + Addr, Base, Disp, Index); + } + bool selectLAAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp, + SDValue &Index) { + return selectBDXAddr(SystemZAddressingMode::FormBDXLA, + SystemZAddressingMode::Disp20Pair, + Addr, Base, Disp, Index); + } + + // If Op0 is null, then Node is a constant that can be loaded using: + // + // (Opcode UpperVal LowerVal) + // + // If Op0 is nonnull, then Node can be implemented using: + // + // (Opcode (Opcode Op0 UpperVal) LowerVal) + SDNode *splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0, + uint64_t UpperVal, uint64_t LowerVal); + +public: + SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel) + : SelectionDAGISel(TM, OptLevel), + Lowering(*TM.getTargetLowering()), + Subtarget(*TM.getSubtargetImpl()) { } + + // Override MachineFunctionPass. + virtual const char *getPassName() const LLVM_OVERRIDE { + return "SystemZ DAG->DAG Pattern Instruction Selection"; + } + + // Override SelectionDAGISel. + virtual SDNode *Select(SDNode *Node) LLVM_OVERRIDE; + virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector<SDValue> &OutOps) + LLVM_OVERRIDE; + + // Include the pieces autogenerated from the target description. + #include "SystemZGenDAGISel.inc" +}; +} // end anonymous namespace + +FunctionPass *llvm::createSystemZISelDag(SystemZTargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new SystemZDAGToDAGISel(TM, OptLevel); +} + +// Return true if Val should be selected as a displacement for an address +// with range DR. Here we're interested in the range of both the instruction +// described by DR and of any pairing instruction. +static bool selectDisp(SystemZAddressingMode::DispRange DR, int64_t Val) { + switch (DR) { + case SystemZAddressingMode::Disp12Only: + return isUInt<12>(Val); + + case SystemZAddressingMode::Disp12Pair: + case SystemZAddressingMode::Disp20Only: + case SystemZAddressingMode::Disp20Pair: + return isInt<20>(Val); + + case SystemZAddressingMode::Disp20Only128: + return isInt<20>(Val) && isInt<20>(Val + 8); + } + llvm_unreachable("Unhandled displacement range"); +} + +// Change the base or index in AM to Value, where IsBase selects +// between the base and index. +static void changeComponent(SystemZAddressingMode &AM, bool IsBase, + SDValue Value) { + if (IsBase) + AM.Base = Value; + else + AM.Index = Value; +} + +// The base or index of AM is equivalent to Value + ADJDYNALLOC, +// where IsBase selects between the base and index. Try to fold the +// ADJDYNALLOC into AM. +static bool expandAdjDynAlloc(SystemZAddressingMode &AM, bool IsBase, + SDValue Value) { + if (AM.isDynAlloc() && !AM.IncludesDynAlloc) { + changeComponent(AM, IsBase, Value); + AM.IncludesDynAlloc = true; + return true; + } + return false; +} + +// The base of AM is equivalent to Base + Index. Try to use Index as +// the index register. +static bool expandIndex(SystemZAddressingMode &AM, SDValue Base, + SDValue Index) { + if (AM.hasIndexField() && !AM.Index.getNode()) { + AM.Base = Base; + AM.Index = Index; + return true; + } + return false; +} + +// The base or index of AM is equivalent to Op0 + Op1, where IsBase selects +// between the base and index. Try to fold Op1 into AM's displacement. +static bool expandDisp(SystemZAddressingMode &AM, bool IsBase, + SDValue Op0, ConstantSDNode *Op1) { + // First try adjusting the displacement. + int64_t TestDisp = AM.Disp + Op1->getSExtValue(); + if (selectDisp(AM.DR, TestDisp)) { + changeComponent(AM, IsBase, Op0); + AM.Disp = TestDisp; + return true; + } + + // We could consider forcing the displacement into a register and + // using it as an index, but it would need to be carefully tuned. + return false; +} + +bool SystemZDAGToDAGISel::expandAddress(SystemZAddressingMode &AM, + bool IsBase) { + SDValue N = IsBase ? AM.Base : AM.Index; + unsigned Opcode = N.getOpcode(); + if (Opcode == ISD::TRUNCATE) { + N = N.getOperand(0); + Opcode = N.getOpcode(); + } + if (Opcode == ISD::ADD || CurDAG->isBaseWithConstantOffset(N)) { + SDValue Op0 = N.getOperand(0); + SDValue Op1 = N.getOperand(1); + + unsigned Op0Code = Op0->getOpcode(); + unsigned Op1Code = Op1->getOpcode(); + + if (Op0Code == SystemZISD::ADJDYNALLOC) + return expandAdjDynAlloc(AM, IsBase, Op1); + if (Op1Code == SystemZISD::ADJDYNALLOC) + return expandAdjDynAlloc(AM, IsBase, Op0); + + if (Op0Code == ISD::Constant) + return expandDisp(AM, IsBase, Op1, cast<ConstantSDNode>(Op0)); + if (Op1Code == ISD::Constant) + return expandDisp(AM, IsBase, Op0, cast<ConstantSDNode>(Op1)); + + if (IsBase && expandIndex(AM, Op0, Op1)) + return true; + } + return false; +} + +// Return true if an instruction with displacement range DR should be +// used for displacement value Val. selectDisp(DR, Val) must already hold. +static bool isValidDisp(SystemZAddressingMode::DispRange DR, int64_t Val) { + assert(selectDisp(DR, Val) && "Invalid displacement"); + switch (DR) { + case SystemZAddressingMode::Disp12Only: + case SystemZAddressingMode::Disp20Only: + case SystemZAddressingMode::Disp20Only128: + return true; + + case SystemZAddressingMode::Disp12Pair: + // Use the other instruction if the displacement is too large. + return isUInt<12>(Val); + + case SystemZAddressingMode::Disp20Pair: + // Use the other instruction if the displacement is small enough. + return !isUInt<12>(Val); + } + llvm_unreachable("Unhandled displacement range"); +} + +// Return true if Base + Disp + Index should be performed by LA(Y). +static bool shouldUseLA(SDNode *Base, int64_t Disp, SDNode *Index) { + // Don't use LA(Y) for constants. + if (!Base) + return false; + + // Always use LA(Y) for frame addresses, since we know that the destination + // register is almost always (perhaps always) going to be different from + // the frame register. + if (Base->getOpcode() == ISD::FrameIndex) + return true; + + if (Disp) { + // Always use LA(Y) if there is a base, displacement and index. + if (Index) + return true; + + // Always use LA if the displacement is small enough. It should always + // be no worse than AGHI (and better if it avoids a move). + if (isUInt<12>(Disp)) + return true; + + // For similar reasons, always use LAY if the constant is too big for AGHI. + // LAY should be no worse than AGFI. + if (!isInt<16>(Disp)) + return true; + } else { + // Don't use LA for plain registers. + if (!Index) + return false; + + // Don't use LA for plain addition if the index operand is only used + // once. It should be a natural two-operand addition in that case. + if (Index->hasOneUse()) + return false; + + // Prefer addition if the second operation is sign-extended, in the + // hope of using AGF. + unsigned IndexOpcode = Index->getOpcode(); + if (IndexOpcode == ISD::SIGN_EXTEND || + IndexOpcode == ISD::SIGN_EXTEND_INREG) + return false; + } + + // Don't use LA for two-operand addition if either operand is only + // used once. The addition instructions are better in that case. + if (Base->hasOneUse()) + return false; + + return true; +} + +// Return true if Addr is suitable for AM, updating AM if so. +bool SystemZDAGToDAGISel::selectAddress(SDValue Addr, + SystemZAddressingMode &AM) { + // Start out assuming that the address will need to be loaded separately, + // then try to extend it as much as we can. + AM.Base = Addr; + + // First try treating the address as a constant. + if (Addr.getOpcode() == ISD::Constant && + expandDisp(AM, true, SDValue(), cast<ConstantSDNode>(Addr))) + ; + else + // Otherwise try expanding each component. + while (expandAddress(AM, true) || + (AM.Index.getNode() && expandAddress(AM, false))) + continue; + + // Reject cases where it isn't profitable to use LA(Y). + if (AM.Form == SystemZAddressingMode::FormBDXLA && + !shouldUseLA(AM.Base.getNode(), AM.Disp, AM.Index.getNode())) + return false; + + // Reject cases where the other instruction in a pair should be used. + if (!isValidDisp(AM.DR, AM.Disp)) + return false; + + // Make sure that ADJDYNALLOC is included where necessary. + if (AM.isDynAlloc() && !AM.IncludesDynAlloc) + return false; + + DEBUG(AM.dump()); + return true; +} + +// Insert a node into the DAG at least before Pos. This will reposition +// the node as needed, and will assign it a node ID that is <= Pos's ID. +// Note that this does *not* preserve the uniqueness of node IDs! +// The selection DAG must no longer depend on their uniqueness when this +// function is used. +static void insertDAGNode(SelectionDAG *DAG, SDNode *Pos, SDValue N) { + if (N.getNode()->getNodeId() == -1 || + N.getNode()->getNodeId() > Pos->getNodeId()) { + DAG->RepositionNode(Pos, N.getNode()); + N.getNode()->setNodeId(Pos->getNodeId()); + } +} + +void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM, + EVT VT, SDValue &Base, + SDValue &Disp) { + Base = AM.Base; + if (!Base.getNode()) + // Register 0 means "no base". This is mostly useful for shifts. + Base = CurDAG->getRegister(0, VT); + else if (Base.getOpcode() == ISD::FrameIndex) { + // Lower a FrameIndex to a TargetFrameIndex. + int64_t FrameIndex = cast<FrameIndexSDNode>(Base)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FrameIndex, VT); + } else if (Base.getValueType() != VT) { + // Truncate values from i64 to i32, for shifts. + assert(VT == MVT::i32 && Base.getValueType() == MVT::i64 && + "Unexpected truncation"); + DebugLoc DL = Base.getDebugLoc(); + SDValue Trunc = CurDAG->getNode(ISD::TRUNCATE, DL, VT, Base); + insertDAGNode(CurDAG, Base.getNode(), Trunc); + Base = Trunc; + } + + // Lower the displacement to a TargetConstant. + Disp = CurDAG->getTargetConstant(AM.Disp, VT); +} + +void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM, + EVT VT, SDValue &Base, + SDValue &Disp, SDValue &Index) { + getAddressOperands(AM, VT, Base, Disp); + + Index = AM.Index; + if (!Index.getNode()) + // Register 0 means "no index". + Index = CurDAG->getRegister(0, VT); +} + +bool SystemZDAGToDAGISel::selectBDAddr(SystemZAddressingMode::DispRange DR, + SDValue Addr, SDValue &Base, + SDValue &Disp) { + SystemZAddressingMode AM(SystemZAddressingMode::FormBD, DR); + if (!selectAddress(Addr, AM)) + return false; + + getAddressOperands(AM, Addr.getValueType(), Base, Disp); + return true; +} + +bool SystemZDAGToDAGISel::selectBDXAddr(SystemZAddressingMode::AddrForm Form, + SystemZAddressingMode::DispRange DR, + SDValue Addr, SDValue &Base, + SDValue &Disp, SDValue &Index) { + SystemZAddressingMode AM(Form, DR); + if (!selectAddress(Addr, AM)) + return false; + + getAddressOperands(AM, Addr.getValueType(), Base, Disp, Index); + return true; +} + +SDNode *SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node, + SDValue Op0, uint64_t UpperVal, + uint64_t LowerVal) { + EVT VT = Node->getValueType(0); + DebugLoc DL = Node->getDebugLoc(); + SDValue Upper = CurDAG->getConstant(UpperVal, VT); + if (Op0.getNode()) + Upper = CurDAG->getNode(Opcode, DL, VT, Op0, Upper); + Upper = SDValue(Select(Upper.getNode()), 0); + + SDValue Lower = CurDAG->getConstant(LowerVal, VT); + SDValue Or = CurDAG->getNode(Opcode, DL, VT, Upper, Lower); + return Or.getNode(); +} + +SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { + // Dump information about the Node being selected + DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n"); + + // If we have a custom node, we already have selected! + if (Node->isMachineOpcode()) { + DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); + return 0; + } + + unsigned Opcode = Node->getOpcode(); + switch (Opcode) { + case ISD::OR: + case ISD::XOR: + // If this is a 64-bit operation in which both 32-bit halves are nonzero, + // split the operation into two. + if (Node->getValueType(0) == MVT::i64) + if (ConstantSDNode *Op1 = dyn_cast<ConstantSDNode>(Node->getOperand(1))) { + uint64_t Val = Op1->getZExtValue(); + if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val)) + Node = splitLargeImmediate(Opcode, Node, Node->getOperand(0), + Val - uint32_t(Val), uint32_t(Val)); + } + break; + + case ISD::Constant: + // If this is a 64-bit constant that is out of the range of LLILF, + // LLIHF and LGFI, split it into two 32-bit pieces. + if (Node->getValueType(0) == MVT::i64) { + uint64_t Val = cast<ConstantSDNode>(Node)->getZExtValue(); + if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val) && !isInt<32>(Val)) + Node = splitLargeImmediate(ISD::OR, Node, SDValue(), + Val - uint32_t(Val), uint32_t(Val)); + } + break; + + case ISD::ATOMIC_LOAD_SUB: + // Try to convert subtractions of constants to additions. + if (ConstantSDNode *Op2 = dyn_cast<ConstantSDNode>(Node->getOperand(2))) { + uint64_t Value = -Op2->getZExtValue(); + EVT VT = Node->getValueType(0); + if (VT == MVT::i32 || isInt<32>(Value)) { + SDValue Ops[] = { Node->getOperand(0), Node->getOperand(1), + CurDAG->getConstant(int32_t(Value), VT) }; + Node = CurDAG->MorphNodeTo(Node, ISD::ATOMIC_LOAD_ADD, + Node->getVTList(), Ops, array_lengthof(Ops)); + } + } + break; + } + + // Select the default instruction + SDNode *ResNode = SelectCode(Node); + + DEBUG(errs() << "=> "; + if (ResNode == NULL || ResNode == Node) + Node->dump(CurDAG); + else + ResNode->dump(CurDAG); + errs() << "\n"; + ); + return ResNode; +} + +bool SystemZDAGToDAGISel:: +SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector<SDValue> &OutOps) { + assert(ConstraintCode == 'm' && "Unexpected constraint code"); + // Accept addresses with short displacements, which are compatible + // with Q, R, S and T. But keep the index operand for future expansion. + SDValue Base, Disp, Index; + if (!selectBDXAddr(SystemZAddressingMode::FormBD, + SystemZAddressingMode::Disp12Only, + Op, Base, Disp, Index)) + return true; + OutOps.push_back(Base); + OutOps.push_back(Disp); + OutOps.push_back(Index); + return false; +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp new file mode 100644 index 0000000..eb21b31 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -0,0 +1,2233 @@ +//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SystemZTargetLowering class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "systemz-lower" + +#include "SystemZISelLowering.h" +#include "SystemZCallingConv.h" +#include "SystemZConstantPoolValue.h" +#include "SystemZMachineFunctionInfo.h" +#include "SystemZTargetMachine.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" + +using namespace llvm; + +// Classify VT as either 32 or 64 bit. +static bool is32Bit(EVT VT) { + switch (VT.getSimpleVT().SimpleTy) { + case MVT::i32: + return true; + case MVT::i64: + return false; + default: + llvm_unreachable("Unsupported type"); + } +} + +// Return a version of MachineOperand that can be safely used before the +// final use. +static MachineOperand earlyUseOperand(MachineOperand Op) { + if (Op.isReg()) + Op.setIsKill(false); + return Op; +} + +SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) + : TargetLowering(tm, new TargetLoweringObjectFileELF()), + Subtarget(*tm.getSubtargetImpl()), TM(tm) { + MVT PtrVT = getPointerTy(); + + // Set up the register classes. + addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass); + addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass); + addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass); + addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass); + addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass); + + // Compute derived properties from the register classes + computeRegisterProperties(); + + // Set up special registers. + setExceptionPointerRegister(SystemZ::R6D); + setExceptionSelectorRegister(SystemZ::R7D); + setStackPointerRegisterToSaveRestore(SystemZ::R15D); + + // TODO: It may be better to default to latency-oriented scheduling, however + // LLVM's current latency-oriented scheduler can't handle physreg definitions + // such as SystemZ has with PSW, so set this to the register-pressure + // scheduler, because it can. + setSchedulingPreference(Sched::RegPressure); + + setBooleanContents(ZeroOrOneBooleanContent); + setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? + + // Instructions are strings of 2-byte aligned 2-byte values. + setMinFunctionAlignment(2); + + // Handle operations that are handled in a similar way for all types. + for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE; + I <= MVT::LAST_FP_VALUETYPE; + ++I) { + MVT VT = MVT::SimpleValueType(I); + if (isTypeLegal(VT)) { + // Expand SETCC(X, Y, COND) into SELECT_CC(X, Y, 1, 0, COND). + setOperationAction(ISD::SETCC, VT, Expand); + + // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE). + setOperationAction(ISD::SELECT, VT, Expand); + + // Lower SELECT_CC and BR_CC into separate comparisons and branches. + setOperationAction(ISD::SELECT_CC, VT, Custom); + setOperationAction(ISD::BR_CC, VT, Custom); + } + } + + // Expand jump table branches as address arithmetic followed by an + // indirect jump. + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + + // Expand BRCOND into a BR_CC (see above). + setOperationAction(ISD::BRCOND, MVT::Other, Expand); + + // Handle integer types. + for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE; + I <= MVT::LAST_INTEGER_VALUETYPE; + ++I) { + MVT VT = MVT::SimpleValueType(I); + if (isTypeLegal(VT)) { + // Expand individual DIV and REMs into DIVREMs. + setOperationAction(ISD::SDIV, VT, Expand); + setOperationAction(ISD::UDIV, VT, Expand); + setOperationAction(ISD::SREM, VT, Expand); + setOperationAction(ISD::UREM, VT, Expand); + setOperationAction(ISD::SDIVREM, VT, Custom); + setOperationAction(ISD::UDIVREM, VT, Custom); + + // Expand ATOMIC_LOAD and ATOMIC_STORE using ATOMIC_CMP_SWAP. + // FIXME: probably much too conservative. + setOperationAction(ISD::ATOMIC_LOAD, VT, Expand); + setOperationAction(ISD::ATOMIC_STORE, VT, Expand); + + // No special instructions for these. + setOperationAction(ISD::CTPOP, VT, Expand); + setOperationAction(ISD::CTTZ, VT, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); + setOperationAction(ISD::ROTR, VT, Expand); + + // Use *MUL_LOHI where possible and a wider multiplication otherwise. + setOperationAction(ISD::MULHS, VT, Expand); + setOperationAction(ISD::MULHU, VT, Expand); + + // We have instructions for signed but not unsigned FP conversion. + setOperationAction(ISD::FP_TO_UINT, VT, Expand); + } + } + + // Type legalization will convert 8- and 16-bit atomic operations into + // forms that operate on i32s (but still keeping the original memory VT). + // Lower them into full i32 operations. + setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom); + + // We have instructions for signed but not unsigned FP conversion. + // Handle unsigned 32-bit types as signed 64-bit types. + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); + + // We have native support for a 64-bit CTLZ, via FLOGR. + setOperationAction(ISD::CTLZ, MVT::i32, Promote); + setOperationAction(ISD::CTLZ, MVT::i64, Legal); + + // Give LowerOperation the chance to replace 64-bit ORs with subregs. + setOperationAction(ISD::OR, MVT::i64, Custom); + + // The architecture has 32-bit SMUL_LOHI and UMUL_LOHI (MR and MLR), + // but they aren't really worth using. There is no 64-bit SMUL_LOHI, + // but there is a 64-bit UMUL_LOHI: MLGR. + setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom); + + // FIXME: Can we support these natively? + setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand); + setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand); + setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand); + + // We have native instructions for i8, i16 and i32 extensions, but not i1. + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + + // Handle the various types of symbolic address. + setOperationAction(ISD::ConstantPool, PtrVT, Custom); + setOperationAction(ISD::GlobalAddress, PtrVT, Custom); + setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom); + setOperationAction(ISD::BlockAddress, PtrVT, Custom); + setOperationAction(ISD::JumpTable, PtrVT, Custom); + + // We need to handle dynamic allocations specially because of the + // 160-byte area at the bottom of the stack. + setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom); + + // Use custom expanders so that we can force the function to use + // a frame pointer. + setOperationAction(ISD::STACKSAVE, MVT::Other, Custom); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom); + + // Expand these using getExceptionSelectorRegister() and + // getExceptionPointerRegister(). + setOperationAction(ISD::EXCEPTIONADDR, PtrVT, Expand); + setOperationAction(ISD::EHSELECTION, PtrVT, Expand); + + // Handle floating-point types. + for (unsigned I = MVT::FIRST_FP_VALUETYPE; + I <= MVT::LAST_FP_VALUETYPE; + ++I) { + MVT VT = MVT::SimpleValueType(I); + if (isTypeLegal(VT)) { + // We can use FI for FRINT. + setOperationAction(ISD::FRINT, VT, Legal); + + // No special instructions for these. + setOperationAction(ISD::FSIN, VT, Expand); + setOperationAction(ISD::FCOS, VT, Expand); + setOperationAction(ISD::FREM, VT, Expand); + } + } + + // We have fused multiply-addition for f32 and f64 but not f128. + setOperationAction(ISD::FMA, MVT::f32, Legal); + setOperationAction(ISD::FMA, MVT::f64, Legal); + setOperationAction(ISD::FMA, MVT::f128, Expand); + + // Needed so that we don't try to implement f128 constant loads using + // a load-and-extend of a f80 constant (in cases where the constant + // would fit in an f80). + setLoadExtAction(ISD::EXTLOAD, MVT::f80, Expand); + + // Floating-point truncation and stores need to be done separately. + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + setTruncStoreAction(MVT::f128, MVT::f32, Expand); + setTruncStoreAction(MVT::f128, MVT::f64, Expand); + + // We have 64-bit FPR<->GPR moves, but need special handling for + // 32-bit forms. + setOperationAction(ISD::BITCAST, MVT::i32, Custom); + setOperationAction(ISD::BITCAST, MVT::f32, Custom); + + // VASTART and VACOPY need to deal with the SystemZ-specific varargs + // structure, but VAEND is a no-op. + setOperationAction(ISD::VASTART, MVT::Other, Custom); + setOperationAction(ISD::VACOPY, MVT::Other, Custom); + setOperationAction(ISD::VAEND, MVT::Other, Expand); +} + +bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { + // We can load zero using LZ?R and negative zero using LZ?R;LC?BR. + return Imm.isZero() || Imm.isNegZero(); +} + +//===----------------------------------------------------------------------===// +// Inline asm support +//===----------------------------------------------------------------------===// + +TargetLowering::ConstraintType +SystemZTargetLowering::getConstraintType(const std::string &Constraint) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'a': // Address register + case 'd': // Data register (equivalent to 'r') + case 'f': // Floating-point register + case 'r': // General-purpose register + return C_RegisterClass; + + case 'Q': // Memory with base and unsigned 12-bit displacement + case 'R': // Likewise, plus an index + case 'S': // Memory with base and signed 20-bit displacement + case 'T': // Likewise, plus an index + case 'm': // Equivalent to 'T'. + return C_Memory; + + case 'I': // Unsigned 8-bit constant + case 'J': // Unsigned 12-bit constant + case 'K': // Signed 16-bit constant + case 'L': // Signed 20-bit displacement (on all targets we support) + case 'M': // 0x7fffffff + return C_Other; + + default: + break; + } + } + return TargetLowering::getConstraintType(Constraint); +} + +TargetLowering::ConstraintWeight SystemZTargetLowering:: +getSingleConstraintMatchWeight(AsmOperandInfo &info, + const char *constraint) const { + ConstraintWeight weight = CW_Invalid; + Value *CallOperandVal = info.CallOperandVal; + // If we don't have a value, we can't do a match, + // but allow it at the lowest weight. + if (CallOperandVal == NULL) + return CW_Default; + Type *type = CallOperandVal->getType(); + // Look at the constraint type. + switch (*constraint) { + default: + weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); + break; + + case 'a': // Address register + case 'd': // Data register (equivalent to 'r') + case 'r': // General-purpose register + if (CallOperandVal->getType()->isIntegerTy()) + weight = CW_Register; + break; + + case 'f': // Floating-point register + if (type->isFloatingPointTy()) + weight = CW_Register; + break; + + case 'I': // Unsigned 8-bit constant + if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) + if (isUInt<8>(C->getZExtValue())) + weight = CW_Constant; + break; + + case 'J': // Unsigned 12-bit constant + if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) + if (isUInt<12>(C->getZExtValue())) + weight = CW_Constant; + break; + + case 'K': // Signed 16-bit constant + if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) + if (isInt<16>(C->getSExtValue())) + weight = CW_Constant; + break; + + case 'L': // Signed 20-bit displacement (on all targets we support) + if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) + if (isInt<20>(C->getSExtValue())) + weight = CW_Constant; + break; + + case 'M': // 0x7fffffff + if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) + if (C->getZExtValue() == 0x7fffffff) + weight = CW_Constant; + break; + } + return weight; +} + +std::pair<unsigned, const TargetRegisterClass *> SystemZTargetLowering:: +getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { + if (Constraint.size() == 1) { + // GCC Constraint Letters + switch (Constraint[0]) { + default: break; + case 'd': // Data register (equivalent to 'r') + case 'r': // General-purpose register + if (VT == MVT::i64) + return std::make_pair(0U, &SystemZ::GR64BitRegClass); + else if (VT == MVT::i128) + return std::make_pair(0U, &SystemZ::GR128BitRegClass); + return std::make_pair(0U, &SystemZ::GR32BitRegClass); + + case 'a': // Address register + if (VT == MVT::i64) + return std::make_pair(0U, &SystemZ::ADDR64BitRegClass); + else if (VT == MVT::i128) + return std::make_pair(0U, &SystemZ::ADDR128BitRegClass); + return std::make_pair(0U, &SystemZ::ADDR32BitRegClass); + + case 'f': // Floating-point register + if (VT == MVT::f64) + return std::make_pair(0U, &SystemZ::FP64BitRegClass); + else if (VT == MVT::f128) + return std::make_pair(0U, &SystemZ::FP128BitRegClass); + return std::make_pair(0U, &SystemZ::FP32BitRegClass); + } + } + return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); +} + +void SystemZTargetLowering:: +LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, + std::vector<SDValue> &Ops, + SelectionDAG &DAG) const { + // Only support length 1 constraints for now. + if (Constraint.length() == 1) { + switch (Constraint[0]) { + case 'I': // Unsigned 8-bit constant + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) + if (isUInt<8>(C->getZExtValue())) + Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), + Op.getValueType())); + return; + + case 'J': // Unsigned 12-bit constant + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) + if (isUInt<12>(C->getZExtValue())) + Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), + Op.getValueType())); + return; + + case 'K': // Signed 16-bit constant + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) + if (isInt<16>(C->getSExtValue())) + Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), + Op.getValueType())); + return; + + case 'L': // Signed 20-bit displacement (on all targets we support) + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) + if (isInt<20>(C->getSExtValue())) + Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), + Op.getValueType())); + return; + + case 'M': // 0x7fffffff + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) + if (C->getZExtValue() == 0x7fffffff) + Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), + Op.getValueType())); + return; + } + } + TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); +} + +//===----------------------------------------------------------------------===// +// Calling conventions +//===----------------------------------------------------------------------===// + +#include "SystemZGenCallingConv.inc" + +// Value is a value that has been passed to us in the location described by VA +// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining +// any loads onto Chain. +static SDValue convertLocVTToValVT(SelectionDAG &DAG, DebugLoc DL, + CCValAssign &VA, SDValue Chain, + SDValue Value) { + // If the argument has been promoted from a smaller type, insert an + // assertion to capture this. + if (VA.getLocInfo() == CCValAssign::SExt) + Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value, + DAG.getValueType(VA.getValVT())); + else if (VA.getLocInfo() == CCValAssign::ZExt) + Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value, + DAG.getValueType(VA.getValVT())); + + if (VA.isExtInLoc()) + Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value); + else if (VA.getLocInfo() == CCValAssign::Indirect) + Value = DAG.getLoad(VA.getValVT(), DL, Chain, Value, + MachinePointerInfo(), false, false, false, 0); + else + assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo"); + return Value; +} + +// Value is a value of type VA.getValVT() that we need to copy into +// the location described by VA. Return a copy of Value converted to +// VA.getValVT(). The caller is responsible for handling indirect values. +static SDValue convertValVTToLocVT(SelectionDAG &DAG, DebugLoc DL, + CCValAssign &VA, SDValue Value) { + switch (VA.getLocInfo()) { + case CCValAssign::SExt: + return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value); + case CCValAssign::ZExt: + return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value); + case CCValAssign::AExt: + return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value); + case CCValAssign::Full: + return Value; + default: + llvm_unreachable("Unhandled getLocInfo()"); + } +} + +SDValue SystemZTargetLowering:: +LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc DL, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + SystemZMachineFunctionInfo *FuncInfo = + MF.getInfo<SystemZMachineFunctionInfo>(); + const SystemZFrameLowering *TFL = + static_cast<const SystemZFrameLowering *>(TM.getFrameLowering()); + + // Assign locations to all of the incoming arguments. + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CallConv, IsVarArg, MF, TM, ArgLocs, *DAG.getContext()); + CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ); + + unsigned NumFixedGPRs = 0; + unsigned NumFixedFPRs = 0; + for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { + SDValue ArgValue; + CCValAssign &VA = ArgLocs[I]; + EVT LocVT = VA.getLocVT(); + if (VA.isRegLoc()) { + // Arguments passed in registers + const TargetRegisterClass *RC; + switch (LocVT.getSimpleVT().SimpleTy) { + default: + // Integers smaller than i64 should be promoted to i64. + llvm_unreachable("Unexpected argument type"); + case MVT::i32: + NumFixedGPRs += 1; + RC = &SystemZ::GR32BitRegClass; + break; + case MVT::i64: + NumFixedGPRs += 1; + RC = &SystemZ::GR64BitRegClass; + break; + case MVT::f32: + NumFixedFPRs += 1; + RC = &SystemZ::FP32BitRegClass; + break; + case MVT::f64: + NumFixedFPRs += 1; + RC = &SystemZ::FP64BitRegClass; + break; + } + + unsigned VReg = MRI.createVirtualRegister(RC); + MRI.addLiveIn(VA.getLocReg(), VReg); + ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); + } else { + assert(VA.isMemLoc() && "Argument not register or memory"); + + // Create the frame index object for this incoming parameter. + int FI = MFI->CreateFixedObject(LocVT.getSizeInBits() / 8, + VA.getLocMemOffset(), true); + + // Create the SelectionDAG nodes corresponding to a load + // from this parameter. Unpromoted ints and floats are + // passed as right-justified 8-byte values. + EVT PtrVT = getPointerTy(); + SDValue FIN = DAG.getFrameIndex(FI, PtrVT); + if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) + FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(4)); + ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN, + MachinePointerInfo::getFixedStack(FI), + false, false, false, 0); + } + + // Convert the value of the argument register into the value that's + // being passed. + InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue)); + } + + if (IsVarArg) { + // Save the number of non-varargs registers for later use by va_start, etc. + FuncInfo->setVarArgsFirstGPR(NumFixedGPRs); + FuncInfo->setVarArgsFirstFPR(NumFixedFPRs); + + // Likewise the address (in the form of a frame index) of where the + // first stack vararg would be. The 1-byte size here is arbitrary. + int64_t StackSize = CCInfo.getNextStackOffset(); + FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize, true)); + + // ...and a similar frame index for the caller-allocated save area + // that will be used to store the incoming registers. + int64_t RegSaveOffset = TFL->getOffsetOfLocalArea(); + unsigned RegSaveIndex = MFI->CreateFixedObject(1, RegSaveOffset, true); + FuncInfo->setRegSaveFrameIndex(RegSaveIndex); + + // Store the FPR varargs in the reserved frame slots. (We store the + // GPRs as part of the prologue.) + if (NumFixedFPRs < SystemZ::NumArgFPRs) { + SDValue MemOps[SystemZ::NumArgFPRs]; + for (unsigned I = NumFixedFPRs; I < SystemZ::NumArgFPRs; ++I) { + unsigned Offset = TFL->getRegSpillOffset(SystemZ::ArgFPRs[I]); + int FI = MFI->CreateFixedObject(8, RegSaveOffset + Offset, true); + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + unsigned VReg = MF.addLiveIn(SystemZ::ArgFPRs[I], + &SystemZ::FP64BitRegClass); + SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64); + MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN, + MachinePointerInfo::getFixedStack(FI), + false, false, 0); + + } + // Join the stores, which are independent of one another. + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + &MemOps[NumFixedFPRs], + SystemZ::NumArgFPRs - NumFixedFPRs); + } + } + + return Chain; +} + +SDValue +SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const { + SelectionDAG &DAG = CLI.DAG; + DebugLoc &DL = CLI.DL; + SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs; + SmallVector<SDValue, 32> &OutVals = CLI.OutVals; + SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + bool &isTailCall = CLI.IsTailCall; + CallingConv::ID CallConv = CLI.CallConv; + bool IsVarArg = CLI.IsVarArg; + MachineFunction &MF = DAG.getMachineFunction(); + EVT PtrVT = getPointerTy(); + + // SystemZ target does not yet support tail call optimization. + isTailCall = false; + + // Analyze the operands of the call, assigning locations to each operand. + SmallVector<CCValAssign, 16> ArgLocs; + CCState ArgCCInfo(CallConv, IsVarArg, MF, TM, ArgLocs, *DAG.getContext()); + ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ); + + // Get a count of how many bytes are to be pushed on the stack. + unsigned NumBytes = ArgCCInfo.getNextStackOffset(); + + // Mark the start of the call. + Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes, PtrVT, true)); + + // Copy argument values to their designated locations. + SmallVector<std::pair<unsigned, SDValue>, 9> RegsToPass; + SmallVector<SDValue, 8> MemOpChains; + SDValue StackPtr; + for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { + CCValAssign &VA = ArgLocs[I]; + SDValue ArgValue = OutVals[I]; + + if (VA.getLocInfo() == CCValAssign::Indirect) { + // Store the argument in a stack slot and pass its address. + SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT()); + int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); + MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, SpillSlot, + MachinePointerInfo::getFixedStack(FI), + false, false, 0)); + ArgValue = SpillSlot; + } else + ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue); + + if (VA.isRegLoc()) + // Queue up the argument copies and emit them at the end. + RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); + else { + assert(VA.isMemLoc() && "Argument not register or memory"); + + // Work out the address of the stack slot. Unpromoted ints and + // floats are passed as right-justified 8-byte values. + if (!StackPtr.getNode()) + StackPtr = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, PtrVT); + unsigned Offset = SystemZMC::CallFrameSize + VA.getLocMemOffset(); + if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) + Offset += 4; + SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, + DAG.getIntPtrConstant(Offset)); + + // Emit the store. + MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, Address, + MachinePointerInfo(), + false, false, 0)); + } + } + + // Join the stores, which are independent of one another. + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + + // Build a sequence of copy-to-reg nodes, chained and glued together. + SDValue Glue; + for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) { + Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first, + RegsToPass[I].second, Glue); + Glue = Chain.getValue(1); + } + + // Accept direct calls by converting symbolic call addresses to the + // associated Target* opcodes. + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT); + Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); + } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) { + Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT); + Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); + } + + // The first call operand is the chain and the second is the target address. + SmallVector<SDValue, 8> Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + + // Add argument registers to the end of the list so that they are + // known live into the call. + for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) + Ops.push_back(DAG.getRegister(RegsToPass[I].first, + RegsToPass[I].second.getValueType())); + + // Glue the call to the argument copies, if any. + if (Glue.getNode()) + Ops.push_back(Glue); + + // Emit the call. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, &Ops[0], Ops.size()); + Glue = Chain.getValue(1); + + // Mark the end of the call, which is glued to the call itself. + Chain = DAG.getCALLSEQ_END(Chain, + DAG.getConstant(NumBytes, PtrVT, true), + DAG.getConstant(0, PtrVT, true), + Glue); + Glue = Chain.getValue(1); + + // Assign locations to each value returned by this call. + SmallVector<CCValAssign, 16> RetLocs; + CCState RetCCInfo(CallConv, IsVarArg, MF, TM, RetLocs, *DAG.getContext()); + RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ); + + // Copy all of the result registers out of their specified physreg. + for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) { + CCValAssign &VA = RetLocs[I]; + + // Copy the value out, gluing the copy to the end of the call sequence. + SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), + VA.getLocVT(), Glue); + Chain = RetValue.getValue(1); + Glue = RetValue.getValue(2); + + // Convert the value of the return register into the value that's + // being returned. + InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue)); + } + + return Chain; +} + +SDValue +SystemZTargetLowering::LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + DebugLoc DL, SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + + // Assign locations to each returned value. + SmallVector<CCValAssign, 16> RetLocs; + CCState RetCCInfo(CallConv, IsVarArg, MF, TM, RetLocs, *DAG.getContext()); + RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ); + + // Quick exit for void returns + if (RetLocs.empty()) + return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain); + + // Copy the result values into the output registers. + SDValue Glue; + SmallVector<SDValue, 4> RetOps; + RetOps.push_back(Chain); + for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) { + CCValAssign &VA = RetLocs[I]; + SDValue RetValue = OutVals[I]; + + // Make the return register live on exit. + assert(VA.isRegLoc() && "Can only return in registers!"); + + // Promote the value as required. + RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue); + + // Chain and glue the copies together. + unsigned Reg = VA.getLocReg(); + Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue); + Glue = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT())); + } + + // Update chain and glue. + RetOps[0] = Chain; + if (Glue.getNode()) + RetOps.push_back(Glue); + + return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, + RetOps.data(), RetOps.size()); +} + +// CC is a comparison that will be implemented using an integer or +// floating-point comparison. Return the condition code mask for +// a branch on true. In the integer case, CCMASK_CMP_UO is set for +// unsigned comparisons and clear for signed ones. In the floating-point +// case, CCMASK_CMP_UO has its normal mask meaning (unordered). +static unsigned CCMaskForCondCode(ISD::CondCode CC) { +#define CONV(X) \ + case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \ + case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \ + case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X + + switch (CC) { + default: + llvm_unreachable("Invalid integer condition!"); + + CONV(EQ); + CONV(NE); + CONV(GT); + CONV(GE); + CONV(LT); + CONV(LE); + + case ISD::SETO: return SystemZ::CCMASK_CMP_O; + case ISD::SETUO: return SystemZ::CCMASK_CMP_UO; + } +#undef CONV +} + +// If a comparison described by IsUnsigned, CCMask, CmpOp0 and CmpOp1 +// is suitable for CLI(Y), CHHSI or CLHHSI, adjust the operands as necessary. +static void adjustSubwordCmp(SelectionDAG &DAG, bool &IsUnsigned, + SDValue &CmpOp0, SDValue &CmpOp1, + unsigned &CCMask) { + // For us to make any changes, it must a comparison between a single-use + // load and a constant. + if (!CmpOp0.hasOneUse() || + CmpOp0.getOpcode() != ISD::LOAD || + CmpOp1.getOpcode() != ISD::Constant) + return; + + // We must have an 8- or 16-bit load. + LoadSDNode *Load = cast<LoadSDNode>(CmpOp0); + unsigned NumBits = Load->getMemoryVT().getStoreSizeInBits(); + if (NumBits != 8 && NumBits != 16) + return; + + // The load must be an extending one and the constant must be within the + // range of the unextended value. + ConstantSDNode *Constant = cast<ConstantSDNode>(CmpOp1); + uint64_t Value = Constant->getZExtValue(); + uint64_t Mask = (1 << NumBits) - 1; + if (Load->getExtensionType() == ISD::SEXTLOAD) { + int64_t SignedValue = Constant->getSExtValue(); + if (uint64_t(SignedValue) + (1 << (NumBits - 1)) > Mask) + return; + // Unsigned comparison between two sign-extended values is equivalent + // to unsigned comparison between two zero-extended values. + if (IsUnsigned) + Value &= Mask; + else if (CCMask == SystemZ::CCMASK_CMP_EQ || + CCMask == SystemZ::CCMASK_CMP_NE) + // Any choice of IsUnsigned is OK for equality comparisons. + // We could use either CHHSI or CLHHSI for 16-bit comparisons, + // but since we use CLHHSI for zero extensions, it seems better + // to be consistent and do the same here. + Value &= Mask, IsUnsigned = true; + else if (NumBits == 8) { + // Try to treat the comparison as unsigned, so that we can use CLI. + // Adjust CCMask and Value as necessary. + if (Value == 0 && CCMask == SystemZ::CCMASK_CMP_LT) + // Test whether the high bit of the byte is set. + Value = 127, CCMask = SystemZ::CCMASK_CMP_GT, IsUnsigned = true; + else if (SignedValue == -1 && CCMask == SystemZ::CCMASK_CMP_GT) + // Test whether the high bit of the byte is clear. + Value = 128, CCMask = SystemZ::CCMASK_CMP_LT, IsUnsigned = true; + else + // No instruction exists for this combination. + return; + } + } else if (Load->getExtensionType() == ISD::ZEXTLOAD) { + if (Value > Mask) + return; + // Signed comparison between two zero-extended values is equivalent + // to unsigned comparison. + IsUnsigned = true; + } else + return; + + // Make sure that the first operand is an i32 of the right extension type. + ISD::LoadExtType ExtType = IsUnsigned ? ISD::ZEXTLOAD : ISD::SEXTLOAD; + if (CmpOp0.getValueType() != MVT::i32 || + Load->getExtensionType() != ExtType) + CmpOp0 = DAG.getExtLoad(ExtType, Load->getDebugLoc(), MVT::i32, + Load->getChain(), Load->getBasePtr(), + Load->getPointerInfo(), Load->getMemoryVT(), + Load->isVolatile(), Load->isNonTemporal(), + Load->getAlignment()); + + // Make sure that the second operand is an i32 with the right value. + if (CmpOp1.getValueType() != MVT::i32 || + Value != Constant->getZExtValue()) + CmpOp1 = DAG.getConstant(Value, MVT::i32); +} + +// Return true if a comparison described by CCMask, CmpOp0 and CmpOp1 +// is an equality comparison that is better implemented using unsigned +// rather than signed comparison instructions. +static bool preferUnsignedComparison(SelectionDAG &DAG, SDValue CmpOp0, + SDValue CmpOp1, unsigned CCMask) { + // The test must be for equality or inequality. + if (CCMask != SystemZ::CCMASK_CMP_EQ && CCMask != SystemZ::CCMASK_CMP_NE) + return false; + + if (CmpOp1.getOpcode() == ISD::Constant) { + uint64_t Value = cast<ConstantSDNode>(CmpOp1)->getSExtValue(); + + // If we're comparing with memory, prefer unsigned comparisons for + // values that are in the unsigned 16-bit range but not the signed + // 16-bit range. We want to use CLFHSI and CLGHSI. + if (CmpOp0.hasOneUse() && + ISD::isNormalLoad(CmpOp0.getNode()) && + (Value >= 32768 && Value < 65536)) + return true; + + // Use unsigned comparisons for values that are in the CLGFI range + // but not in the CGFI range. + if (CmpOp0.getValueType() == MVT::i64 && (Value >> 31) == 1) + return true; + + return false; + } + + // Prefer CL for zero-extended loads. + if (CmpOp1.getOpcode() == ISD::ZERO_EXTEND || + ISD::isZEXTLoad(CmpOp1.getNode())) + return true; + + // ...and for "in-register" zero extensions. + if (CmpOp1.getOpcode() == ISD::AND && CmpOp1.getValueType() == MVT::i64) { + SDValue Mask = CmpOp1.getOperand(1); + if (Mask.getOpcode() == ISD::Constant && + cast<ConstantSDNode>(Mask)->getZExtValue() == 0xffffffff) + return true; + } + + return false; +} + +// Return a target node that compares CmpOp0 and CmpOp1. Set CCMask to the +// 4-bit condition-code mask for CC. +static SDValue emitCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, + ISD::CondCode CC, unsigned &CCMask) { + bool IsUnsigned = false; + CCMask = CCMaskForCondCode(CC); + if (!CmpOp0.getValueType().isFloatingPoint()) { + IsUnsigned = CCMask & SystemZ::CCMASK_CMP_UO; + CCMask &= ~SystemZ::CCMASK_CMP_UO; + adjustSubwordCmp(DAG, IsUnsigned, CmpOp0, CmpOp1, CCMask); + if (preferUnsignedComparison(DAG, CmpOp0, CmpOp1, CCMask)) + IsUnsigned = true; + } + + DebugLoc DL = CmpOp0.getDebugLoc(); + return DAG.getNode((IsUnsigned ? SystemZISD::UCMP : SystemZISD::CMP), + DL, MVT::Glue, CmpOp0, CmpOp1); +} + +// Lower a binary operation that produces two VT results, one in each +// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation, +// Extend extends Op0 to a GR128, and Opcode performs the GR128 operation +// on the extended Op0 and (unextended) Op1. Store the even register result +// in Even and the odd register result in Odd. +static void lowerGR128Binary(SelectionDAG &DAG, DebugLoc DL, EVT VT, + unsigned Extend, unsigned Opcode, + SDValue Op0, SDValue Op1, + SDValue &Even, SDValue &Odd) { + SDNode *In128 = DAG.getMachineNode(Extend, DL, MVT::Untyped, Op0); + SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, + SDValue(In128, 0), Op1); + bool Is32Bit = is32Bit(VT); + SDValue SubReg0 = DAG.getTargetConstant(SystemZ::even128(Is32Bit), VT); + SDValue SubReg1 = DAG.getTargetConstant(SystemZ::odd128(Is32Bit), VT); + SDNode *Reg0 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, + VT, Result, SubReg0); + SDNode *Reg1 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, + VT, Result, SubReg1); + Even = SDValue(Reg0, 0); + Odd = SDValue(Reg1, 0); +} + +SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { + SDValue Chain = Op.getOperand(0); + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); + SDValue CmpOp0 = Op.getOperand(2); + SDValue CmpOp1 = Op.getOperand(3); + SDValue Dest = Op.getOperand(4); + DebugLoc DL = Op.getDebugLoc(); + + unsigned CCMask; + SDValue Flags = emitCmp(DAG, CmpOp0, CmpOp1, CC, CCMask); + return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(), + Chain, DAG.getConstant(CCMask, MVT::i32), Dest, Flags); +} + +SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, + SelectionDAG &DAG) const { + SDValue CmpOp0 = Op.getOperand(0); + SDValue CmpOp1 = Op.getOperand(1); + SDValue TrueOp = Op.getOperand(2); + SDValue FalseOp = Op.getOperand(3); + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); + DebugLoc DL = Op.getDebugLoc(); + + unsigned CCMask; + SDValue Flags = emitCmp(DAG, CmpOp0, CmpOp1, CC, CCMask); + + SmallVector<SDValue, 4> Ops; + Ops.push_back(TrueOp); + Ops.push_back(FalseOp); + Ops.push_back(DAG.getConstant(CCMask, MVT::i32)); + Ops.push_back(Flags); + + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); + return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, &Ops[0], Ops.size()); +} + +SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, + SelectionDAG &DAG) const { + DebugLoc DL = Node->getDebugLoc(); + const GlobalValue *GV = Node->getGlobal(); + int64_t Offset = Node->getOffset(); + EVT PtrVT = getPointerTy(); + Reloc::Model RM = TM.getRelocationModel(); + CodeModel::Model CM = TM.getCodeModel(); + + SDValue Result; + if (Subtarget.isPC32DBLSymbol(GV, RM, CM)) { + // Make sure that the offset is aligned to a halfword. If it isn't, + // create an "anchor" at the previous 12-bit boundary. + // FIXME check whether there is a better way of handling this. + if (Offset & 1) { + Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, + Offset & ~uint64_t(0xfff)); + Offset &= 0xfff; + } else { + Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Offset); + Offset = 0; + } + Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); + } else { + Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT); + Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); + Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, + MachinePointerInfo::getGOT(), false, false, false, 0); + } + + // If there was a non-zero offset that we didn't fold, create an explicit + // addition for it. + if (Offset != 0) + Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result, + DAG.getConstant(Offset, PtrVT)); + + return Result; +} + +SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, + SelectionDAG &DAG) const { + DebugLoc DL = Node->getDebugLoc(); + const GlobalValue *GV = Node->getGlobal(); + EVT PtrVT = getPointerTy(); + TLSModel::Model model = TM.getTLSModel(GV); + + if (model != TLSModel::LocalExec) + llvm_unreachable("only local-exec TLS mode supported"); + + // The high part of the thread pointer is in access register 0. + SDValue TPHi = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32, + DAG.getConstant(0, MVT::i32)); + TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi); + + // The low part of the thread pointer is in access register 1. + SDValue TPLo = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32, + DAG.getConstant(1, MVT::i32)); + TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo); + + // Merge them into a single 64-bit address. + SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi, + DAG.getConstant(32, PtrVT)); + SDValue TP = DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo); + + // Get the offset of GA from the thread pointer. + SystemZConstantPoolValue *CPV = + SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF); + + // Force the offset into the constant pool and load it from there. + SDValue CPAddr = DAG.getConstantPool(CPV, PtrVT, 8); + SDValue Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), + CPAddr, MachinePointerInfo::getConstantPool(), + false, false, false, 0); + + // Add the base and offset together. + return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset); +} + +SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node, + SelectionDAG &DAG) const { + DebugLoc DL = Node->getDebugLoc(); + const BlockAddress *BA = Node->getBlockAddress(); + int64_t Offset = Node->getOffset(); + EVT PtrVT = getPointerTy(); + + SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset); + Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); + return Result; +} + +SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT, + SelectionDAG &DAG) const { + DebugLoc DL = JT->getDebugLoc(); + EVT PtrVT = getPointerTy(); + SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); + + // Use LARL to load the address of the table. + return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); +} + +SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP, + SelectionDAG &DAG) const { + DebugLoc DL = CP->getDebugLoc(); + EVT PtrVT = getPointerTy(); + + SDValue Result; + if (CP->isMachineConstantPoolEntry()) + Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, + CP->getAlignment()); + else + Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, + CP->getAlignment(), CP->getOffset()); + + // Use LARL to load the address of the constant pool entry. + return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); +} + +SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op, + SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + SDValue In = Op.getOperand(0); + EVT InVT = In.getValueType(); + EVT ResVT = Op.getValueType(); + + SDValue SubReg32 = DAG.getTargetConstant(SystemZ::subreg_32bit, MVT::i64); + SDValue Shift32 = DAG.getConstant(32, MVT::i64); + if (InVT == MVT::i32 && ResVT == MVT::f32) { + SDValue In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In); + SDValue Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, In64, Shift32); + SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Shift); + SDNode *Out = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, + MVT::f32, Out64, SubReg32); + return SDValue(Out, 0); + } + if (InVT == MVT::f32 && ResVT == MVT::i32) { + SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64); + SDNode *In64 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, + MVT::f64, SDValue(U64, 0), In, SubReg32); + SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, SDValue(In64, 0)); + SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64, Shift32); + SDValue Out = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift); + return Out; + } + llvm_unreachable("Unexpected bitcast combination"); +} + +SDValue SystemZTargetLowering::lowerVASTART(SDValue Op, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + SystemZMachineFunctionInfo *FuncInfo = + MF.getInfo<SystemZMachineFunctionInfo>(); + EVT PtrVT = getPointerTy(); + + SDValue Chain = Op.getOperand(0); + SDValue Addr = Op.getOperand(1); + const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); + DebugLoc DL = Op.getDebugLoc(); + + // The initial values of each field. + const unsigned NumFields = 4; + SDValue Fields[NumFields] = { + DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), PtrVT), + DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), PtrVT), + DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT), + DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT) + }; + + // Store each field into its respective slot. + SDValue MemOps[NumFields]; + unsigned Offset = 0; + for (unsigned I = 0; I < NumFields; ++I) { + SDValue FieldAddr = Addr; + if (Offset != 0) + FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr, + DAG.getIntPtrConstant(Offset)); + MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr, + MachinePointerInfo(SV, Offset), + false, false, 0); + Offset += 8; + } + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps, NumFields); +} + +SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op, + SelectionDAG &DAG) const { + SDValue Chain = Op.getOperand(0); + SDValue DstPtr = Op.getOperand(1); + SDValue SrcPtr = Op.getOperand(2); + const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); + const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); + DebugLoc DL = Op.getDebugLoc(); + + return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32), + /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false, + MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); +} + +SDValue SystemZTargetLowering:: +lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { + SDValue Chain = Op.getOperand(0); + SDValue Size = Op.getOperand(1); + DebugLoc DL = Op.getDebugLoc(); + + unsigned SPReg = getStackPointerRegisterToSaveRestore(); + + // Get a reference to the stack pointer. + SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64); + + // Get the new stack pointer value. + SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, Size); + + // Copy the new stack pointer back. + Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP); + + // The allocated data lives above the 160 bytes allocated for the standard + // frame, plus any outgoing stack arguments. We don't know how much that + // amounts to yet, so emit a special ADJDYNALLOC placeholder. + SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64); + SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust); + + SDValue Ops[2] = { Result, Chain }; + return DAG.getMergeValues(Ops, 2, DL); +} + +SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + DebugLoc DL = Op.getDebugLoc(); + assert(!is32Bit(VT) && "Only support 64-bit UMUL_LOHI"); + + // UMUL_LOHI64 returns the low result in the odd register and the high + // result in the even register. UMUL_LOHI is defined to return the + // low half first, so the results are in reverse order. + SDValue Ops[2]; + lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64, + Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); + return DAG.getMergeValues(Ops, 2, DL); +} + +SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op, + SelectionDAG &DAG) const { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + EVT VT = Op.getValueType(); + DebugLoc DL = Op.getDebugLoc(); + + // We use DSGF for 32-bit division. + if (is32Bit(VT)) { + Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0); + Op1 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op1); + } + + // DSG(F) takes a 64-bit dividend, so the even register in the GR128 + // input is "don't care". The instruction returns the remainder in + // the even register and the quotient in the odd register. + SDValue Ops[2]; + lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::SDIVREM64, + Op0, Op1, Ops[1], Ops[0]); + return DAG.getMergeValues(Ops, 2, DL); +} + +SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + DebugLoc DL = Op.getDebugLoc(); + + // DL(G) uses a double-width dividend, so we need to clear the even + // register in the GR128 input. The instruction returns the remainder + // in the even register and the quotient in the odd register. + SDValue Ops[2]; + if (is32Bit(VT)) + lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_32, SystemZISD::UDIVREM32, + Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); + else + lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_64, SystemZISD::UDIVREM64, + Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); + return DAG.getMergeValues(Ops, 2, DL); +} + +SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation"); + + // Get the known-zero masks for each operand. + SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1) }; + APInt KnownZero[2], KnownOne[2]; + DAG.ComputeMaskedBits(Ops[0], KnownZero[0], KnownOne[0]); + DAG.ComputeMaskedBits(Ops[1], KnownZero[1], KnownOne[1]); + + // See if the upper 32 bits of one operand and the lower 32 bits of the + // other are known zero. They are the low and high operands respectively. + uint64_t Masks[] = { KnownZero[0].getZExtValue(), + KnownZero[1].getZExtValue() }; + unsigned High, Low; + if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff) + High = 1, Low = 0; + else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff) + High = 0, Low = 1; + else + return Op; + + SDValue LowOp = Ops[Low]; + SDValue HighOp = Ops[High]; + + // If the high part is a constant, we're better off using IILH. + if (HighOp.getOpcode() == ISD::Constant) + return Op; + + // If the low part is a constant that is outside the range of LHI, + // then we're better off using IILF. + if (LowOp.getOpcode() == ISD::Constant) { + int64_t Value = int32_t(cast<ConstantSDNode>(LowOp)->getZExtValue()); + if (!isInt<16>(Value)) + return Op; + } + + // Check whether the high part is an AND that doesn't change the + // high 32 bits and just masks out low bits. We can skip it if so. + if (HighOp.getOpcode() == ISD::AND && + HighOp.getOperand(1).getOpcode() == ISD::Constant) { + ConstantSDNode *MaskNode = cast<ConstantSDNode>(HighOp.getOperand(1)); + uint64_t Mask = MaskNode->getZExtValue() | Masks[High]; + if ((Mask >> 32) == 0xffffffff) + HighOp = HighOp.getOperand(0); + } + + // Take advantage of the fact that all GR32 operations only change the + // low 32 bits by truncating Low to an i32 and inserting it directly + // using a subreg. The interesting cases are those where the truncation + // can be folded. + DebugLoc DL = Op.getDebugLoc(); + SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp); + SDValue SubReg32 = DAG.getTargetConstant(SystemZ::subreg_32bit, MVT::i64); + SDNode *Result = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, + MVT::i64, HighOp, Low32, SubReg32); + return SDValue(Result, 0); +} + +// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first +// two into the fullword ATOMIC_LOADW_* operation given by Opcode. +SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op, + SelectionDAG &DAG, + unsigned Opcode) const { + AtomicSDNode *Node = cast<AtomicSDNode>(Op.getNode()); + + // 32-bit operations need no code outside the main loop. + EVT NarrowVT = Node->getMemoryVT(); + EVT WideVT = MVT::i32; + if (NarrowVT == WideVT) + return Op; + + int64_t BitSize = NarrowVT.getSizeInBits(); + SDValue ChainIn = Node->getChain(); + SDValue Addr = Node->getBasePtr(); + SDValue Src2 = Node->getVal(); + MachineMemOperand *MMO = Node->getMemOperand(); + DebugLoc DL = Node->getDebugLoc(); + EVT PtrVT = Addr.getValueType(); + + // Convert atomic subtracts of constants into additions. + if (Opcode == SystemZISD::ATOMIC_LOADW_SUB) + if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Src2)) { + Opcode = SystemZISD::ATOMIC_LOADW_ADD; + Src2 = DAG.getConstant(-Const->getSExtValue(), Src2.getValueType()); + } + + // Get the address of the containing word. + SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr, + DAG.getConstant(-4, PtrVT)); + + // Get the number of bits that the word must be rotated left in order + // to bring the field to the top bits of a GR32. + SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr, + DAG.getConstant(3, PtrVT)); + BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift); + + // Get the complementing shift amount, for rotating a field in the top + // bits back to its proper position. + SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT, + DAG.getConstant(0, WideVT), BitShift); + + // Extend the source operand to 32 bits and prepare it for the inner loop. + // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other + // operations require the source to be shifted in advance. (This shift + // can be folded if the source is constant.) For AND and NAND, the lower + // bits must be set, while for other opcodes they should be left clear. + if (Opcode != SystemZISD::ATOMIC_SWAPW) + Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2, + DAG.getConstant(32 - BitSize, WideVT)); + if (Opcode == SystemZISD::ATOMIC_LOADW_AND || + Opcode == SystemZISD::ATOMIC_LOADW_NAND) + Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2, + DAG.getConstant(uint32_t(-1) >> BitSize, WideVT)); + + // Construct the ATOMIC_LOADW_* node. + SDVTList VTList = DAG.getVTList(WideVT, MVT::Other); + SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift, + DAG.getConstant(BitSize, WideVT) }; + SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops, + array_lengthof(Ops), + NarrowVT, MMO); + + // Rotate the result of the final CS so that the field is in the lower + // bits of a GR32, then truncate it. + SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift, + DAG.getConstant(BitSize, WideVT)); + SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift); + + SDValue RetOps[2] = { Result, AtomicOp.getValue(1) }; + return DAG.getMergeValues(RetOps, 2, DL); +} + +// Node is an 8- or 16-bit ATOMIC_CMP_SWAP operation. Lower the first two +// into a fullword ATOMIC_CMP_SWAPW operation. +SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op, + SelectionDAG &DAG) const { + AtomicSDNode *Node = cast<AtomicSDNode>(Op.getNode()); + + // We have native support for 32-bit compare and swap. + EVT NarrowVT = Node->getMemoryVT(); + EVT WideVT = MVT::i32; + if (NarrowVT == WideVT) + return Op; + + int64_t BitSize = NarrowVT.getSizeInBits(); + SDValue ChainIn = Node->getOperand(0); + SDValue Addr = Node->getOperand(1); + SDValue CmpVal = Node->getOperand(2); + SDValue SwapVal = Node->getOperand(3); + MachineMemOperand *MMO = Node->getMemOperand(); + DebugLoc DL = Node->getDebugLoc(); + EVT PtrVT = Addr.getValueType(); + + // Get the address of the containing word. + SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr, + DAG.getConstant(-4, PtrVT)); + + // Get the number of bits that the word must be rotated left in order + // to bring the field to the top bits of a GR32. + SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr, + DAG.getConstant(3, PtrVT)); + BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift); + + // Get the complementing shift amount, for rotating a field in the top + // bits back to its proper position. + SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT, + DAG.getConstant(0, WideVT), BitShift); + + // Construct the ATOMIC_CMP_SWAPW node. + SDVTList VTList = DAG.getVTList(WideVT, MVT::Other); + SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift, + NegBitShift, DAG.getConstant(BitSize, WideVT) }; + SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL, + VTList, Ops, array_lengthof(Ops), + NarrowVT, MMO); + return AtomicOp; +} + +SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true); + return DAG.getCopyFromReg(Op.getOperand(0), Op.getDebugLoc(), + SystemZ::R15D, Op.getValueType()); +} + +SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true); + return DAG.getCopyToReg(Op.getOperand(0), Op.getDebugLoc(), + SystemZ::R15D, Op.getOperand(1)); +} + +SDValue SystemZTargetLowering::LowerOperation(SDValue Op, + SelectionDAG &DAG) const { + switch (Op.getOpcode()) { + case ISD::BR_CC: + return lowerBR_CC(Op, DAG); + case ISD::SELECT_CC: + return lowerSELECT_CC(Op, DAG); + case ISD::GlobalAddress: + return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG); + case ISD::GlobalTLSAddress: + return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG); + case ISD::BlockAddress: + return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG); + case ISD::JumpTable: + return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG); + case ISD::ConstantPool: + return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG); + case ISD::BITCAST: + return lowerBITCAST(Op, DAG); + case ISD::VASTART: + return lowerVASTART(Op, DAG); + case ISD::VACOPY: + return lowerVACOPY(Op, DAG); + case ISD::DYNAMIC_STACKALLOC: + return lowerDYNAMIC_STACKALLOC(Op, DAG); + case ISD::UMUL_LOHI: + return lowerUMUL_LOHI(Op, DAG); + case ISD::SDIVREM: + return lowerSDIVREM(Op, DAG); + case ISD::UDIVREM: + return lowerUDIVREM(Op, DAG); + case ISD::OR: + return lowerOR(Op, DAG); + case ISD::ATOMIC_SWAP: + return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_SWAPW); + case ISD::ATOMIC_LOAD_ADD: + return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD); + case ISD::ATOMIC_LOAD_SUB: + return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB); + case ISD::ATOMIC_LOAD_AND: + return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_AND); + case ISD::ATOMIC_LOAD_OR: + return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_OR); + case ISD::ATOMIC_LOAD_XOR: + return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR); + case ISD::ATOMIC_LOAD_NAND: + return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND); + case ISD::ATOMIC_LOAD_MIN: + return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN); + case ISD::ATOMIC_LOAD_MAX: + return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX); + case ISD::ATOMIC_LOAD_UMIN: + return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN); + case ISD::ATOMIC_LOAD_UMAX: + return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX); + case ISD::ATOMIC_CMP_SWAP: + return lowerATOMIC_CMP_SWAP(Op, DAG); + case ISD::STACKSAVE: + return lowerSTACKSAVE(Op, DAG); + case ISD::STACKRESTORE: + return lowerSTACKRESTORE(Op, DAG); + default: + llvm_unreachable("Unexpected node to lower"); + } +} + +const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { +#define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME + switch (Opcode) { + OPCODE(RET_FLAG); + OPCODE(CALL); + OPCODE(PCREL_WRAPPER); + OPCODE(CMP); + OPCODE(UCMP); + OPCODE(BR_CCMASK); + OPCODE(SELECT_CCMASK); + OPCODE(ADJDYNALLOC); + OPCODE(EXTRACT_ACCESS); + OPCODE(UMUL_LOHI64); + OPCODE(SDIVREM64); + OPCODE(UDIVREM32); + OPCODE(UDIVREM64); + OPCODE(ATOMIC_SWAPW); + OPCODE(ATOMIC_LOADW_ADD); + OPCODE(ATOMIC_LOADW_SUB); + OPCODE(ATOMIC_LOADW_AND); + OPCODE(ATOMIC_LOADW_OR); + OPCODE(ATOMIC_LOADW_XOR); + OPCODE(ATOMIC_LOADW_NAND); + OPCODE(ATOMIC_LOADW_MIN); + OPCODE(ATOMIC_LOADW_MAX); + OPCODE(ATOMIC_LOADW_UMIN); + OPCODE(ATOMIC_LOADW_UMAX); + OPCODE(ATOMIC_CMP_SWAPW); + } + return NULL; +#undef OPCODE +} + +//===----------------------------------------------------------------------===// +// Custom insertion +//===----------------------------------------------------------------------===// + +// Create a new basic block after MBB. +static MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB) { + MachineFunction &MF = *MBB->getParent(); + MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock()); + MF.insert(llvm::next(MachineFunction::iterator(MBB)), NewMBB); + return NewMBB; +} + +// Split MBB after MI and return the new block (the one that contains +// instructions after MI). +static MachineBasicBlock *splitBlockAfter(MachineInstr *MI, + MachineBasicBlock *MBB) { + MachineBasicBlock *NewMBB = emitBlockAfter(MBB); + NewMBB->splice(NewMBB->begin(), MBB, + llvm::next(MachineBasicBlock::iterator(MI)), + MBB->end()); + NewMBB->transferSuccessorsAndUpdatePHIs(MBB); + return NewMBB; +} + +// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI. +MachineBasicBlock * +SystemZTargetLowering::emitSelect(MachineInstr *MI, + MachineBasicBlock *MBB) const { + const SystemZInstrInfo *TII = TM.getInstrInfo(); + + unsigned DestReg = MI->getOperand(0).getReg(); + unsigned TrueReg = MI->getOperand(1).getReg(); + unsigned FalseReg = MI->getOperand(2).getReg(); + unsigned CCMask = MI->getOperand(3).getImm(); + DebugLoc DL = MI->getDebugLoc(); + + MachineBasicBlock *StartMBB = MBB; + MachineBasicBlock *JoinMBB = splitBlockAfter(MI, MBB); + MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB); + + // StartMBB: + // ... + // TrueVal = ... + // cmpTY ccX, r1, r2 + // jCC JoinMBB + // # fallthrough to FalseMBB + MBB = StartMBB; + BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(CCMask).addMBB(JoinMBB); + MBB->addSuccessor(JoinMBB); + MBB->addSuccessor(FalseMBB); + + // FalseMBB: + // # fallthrough to JoinMBB + MBB = FalseMBB; + MBB->addSuccessor(JoinMBB); + + // JoinMBB: + // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ] + // ... + MBB = JoinMBB; + BuildMI(*MBB, MBB->begin(), DL, TII->get(SystemZ::PHI), DestReg) + .addReg(TrueReg).addMBB(StartMBB) + .addReg(FalseReg).addMBB(FalseMBB); + + MI->eraseFromParent(); + return JoinMBB; +} + +// Implement EmitInstrWithCustomInserter for pseudo ATOMIC_LOAD{,W}_* +// or ATOMIC_SWAP{,W} instruction MI. BinOpcode is the instruction that +// performs the binary operation elided by "*", or 0 for ATOMIC_SWAP{,W}. +// BitSize is the width of the field in bits, or 0 if this is a partword +// ATOMIC_LOADW_* or ATOMIC_SWAPW instruction, in which case the bitsize +// is one of the operands. Invert says whether the field should be +// inverted after performing BinOpcode (e.g. for NAND). +MachineBasicBlock * +SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI, + MachineBasicBlock *MBB, + unsigned BinOpcode, + unsigned BitSize, + bool Invert) const { + const SystemZInstrInfo *TII = TM.getInstrInfo(); + MachineFunction &MF = *MBB->getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + unsigned MaskNE = CCMaskForCondCode(ISD::SETNE); + bool IsSubWord = (BitSize < 32); + + // Extract the operands. Base can be a register or a frame index. + // Src2 can be a register or immediate. + unsigned Dest = MI->getOperand(0).getReg(); + MachineOperand Base = earlyUseOperand(MI->getOperand(1)); + int64_t Disp = MI->getOperand(2).getImm(); + MachineOperand Src2 = earlyUseOperand(MI->getOperand(3)); + unsigned BitShift = (IsSubWord ? MI->getOperand(4).getReg() : 0); + unsigned NegBitShift = (IsSubWord ? MI->getOperand(5).getReg() : 0); + DebugLoc DL = MI->getDebugLoc(); + if (IsSubWord) + BitSize = MI->getOperand(6).getImm(); + + // Subword operations use 32-bit registers. + const TargetRegisterClass *RC = (BitSize <= 32 ? + &SystemZ::GR32BitRegClass : + &SystemZ::GR64BitRegClass); + unsigned LOpcode = BitSize <= 32 ? SystemZ::L : SystemZ::LG; + unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG; + + // Get the right opcodes for the displacement. + LOpcode = TII->getOpcodeForOffset(LOpcode, Disp); + CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp); + assert(LOpcode && CSOpcode && "Displacement out of range"); + + // Create virtual registers for temporary results. + unsigned OrigVal = MRI.createVirtualRegister(RC); + unsigned OldVal = MRI.createVirtualRegister(RC); + unsigned NewVal = (BinOpcode || IsSubWord ? + MRI.createVirtualRegister(RC) : Src2.getReg()); + unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal); + unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal); + + // Insert a basic block for the main loop. + MachineBasicBlock *StartMBB = MBB; + MachineBasicBlock *DoneMBB = splitBlockAfter(MI, MBB); + MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); + + // StartMBB: + // ... + // %OrigVal = L Disp(%Base) + // # fall through to LoopMMB + MBB = StartMBB; + BuildMI(MBB, DL, TII->get(LOpcode), OrigVal) + .addOperand(Base).addImm(Disp).addReg(0); + MBB->addSuccessor(LoopMBB); + + // LoopMBB: + // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ] + // %RotatedOldVal = RLL %OldVal, 0(%BitShift) + // %RotatedNewVal = OP %RotatedOldVal, %Src2 + // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift) + // %Dest = CS %OldVal, %NewVal, Disp(%Base) + // JNE LoopMBB + // # fall through to DoneMMB + MBB = LoopMBB; + BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) + .addReg(OrigVal).addMBB(StartMBB) + .addReg(Dest).addMBB(LoopMBB); + if (IsSubWord) + BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal) + .addReg(OldVal).addReg(BitShift).addImm(0); + if (Invert) { + // Perform the operation normally and then invert every bit of the field. + unsigned Tmp = MRI.createVirtualRegister(RC); + BuildMI(MBB, DL, TII->get(BinOpcode), Tmp) + .addReg(RotatedOldVal).addOperand(Src2); + if (BitSize < 32) + // XILF with the upper BitSize bits set. + BuildMI(MBB, DL, TII->get(SystemZ::XILF32), RotatedNewVal) + .addReg(Tmp).addImm(uint32_t(~0 << (32 - BitSize))); + else if (BitSize == 32) + // XILF with every bit set. + BuildMI(MBB, DL, TII->get(SystemZ::XILF32), RotatedNewVal) + .addReg(Tmp).addImm(~uint32_t(0)); + else { + // Use LCGR and add -1 to the result, which is more compact than + // an XILF, XILH pair. + unsigned Tmp2 = MRI.createVirtualRegister(RC); + BuildMI(MBB, DL, TII->get(SystemZ::LCGR), Tmp2).addReg(Tmp); + BuildMI(MBB, DL, TII->get(SystemZ::AGHI), RotatedNewVal) + .addReg(Tmp2).addImm(-1); + } + } else if (BinOpcode) + // A simply binary operation. + BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal) + .addReg(RotatedOldVal).addOperand(Src2); + else if (IsSubWord) + // Use RISBG to rotate Src2 into position and use it to replace the + // field in RotatedOldVal. + BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal) + .addReg(RotatedOldVal).addReg(Src2.getReg()) + .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize); + if (IsSubWord) + BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal) + .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); + BuildMI(MBB, DL, TII->get(CSOpcode), Dest) + .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp); + BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(MaskNE).addMBB(LoopMBB); + MBB->addSuccessor(LoopMBB); + MBB->addSuccessor(DoneMBB); + + MI->eraseFromParent(); + return DoneMBB; +} + +// Implement EmitInstrWithCustomInserter for pseudo +// ATOMIC_LOAD{,W}_{,U}{MIN,MAX} instruction MI. CompareOpcode is the +// instruction that should be used to compare the current field with the +// minimum or maximum value. KeepOldMask is the BRC condition-code mask +// for when the current field should be kept. BitSize is the width of +// the field in bits, or 0 if this is a partword ATOMIC_LOADW_* instruction. +MachineBasicBlock * +SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI, + MachineBasicBlock *MBB, + unsigned CompareOpcode, + unsigned KeepOldMask, + unsigned BitSize) const { + const SystemZInstrInfo *TII = TM.getInstrInfo(); + MachineFunction &MF = *MBB->getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + unsigned MaskNE = CCMaskForCondCode(ISD::SETNE); + bool IsSubWord = (BitSize < 32); + + // Extract the operands. Base can be a register or a frame index. + unsigned Dest = MI->getOperand(0).getReg(); + MachineOperand Base = earlyUseOperand(MI->getOperand(1)); + int64_t Disp = MI->getOperand(2).getImm(); + unsigned Src2 = MI->getOperand(3).getReg(); + unsigned BitShift = (IsSubWord ? MI->getOperand(4).getReg() : 0); + unsigned NegBitShift = (IsSubWord ? MI->getOperand(5).getReg() : 0); + DebugLoc DL = MI->getDebugLoc(); + if (IsSubWord) + BitSize = MI->getOperand(6).getImm(); + + // Subword operations use 32-bit registers. + const TargetRegisterClass *RC = (BitSize <= 32 ? + &SystemZ::GR32BitRegClass : + &SystemZ::GR64BitRegClass); + unsigned LOpcode = BitSize <= 32 ? SystemZ::L : SystemZ::LG; + unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG; + + // Get the right opcodes for the displacement. + LOpcode = TII->getOpcodeForOffset(LOpcode, Disp); + CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp); + assert(LOpcode && CSOpcode && "Displacement out of range"); + + // Create virtual registers for temporary results. + unsigned OrigVal = MRI.createVirtualRegister(RC); + unsigned OldVal = MRI.createVirtualRegister(RC); + unsigned NewVal = MRI.createVirtualRegister(RC); + unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal); + unsigned RotatedAltVal = (IsSubWord ? MRI.createVirtualRegister(RC) : Src2); + unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal); + + // Insert 3 basic blocks for the loop. + MachineBasicBlock *StartMBB = MBB; + MachineBasicBlock *DoneMBB = splitBlockAfter(MI, MBB); + MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); + MachineBasicBlock *UseAltMBB = emitBlockAfter(LoopMBB); + MachineBasicBlock *UpdateMBB = emitBlockAfter(UseAltMBB); + + // StartMBB: + // ... + // %OrigVal = L Disp(%Base) + // # fall through to LoopMMB + MBB = StartMBB; + BuildMI(MBB, DL, TII->get(LOpcode), OrigVal) + .addOperand(Base).addImm(Disp).addReg(0); + MBB->addSuccessor(LoopMBB); + + // LoopMBB: + // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ] + // %RotatedOldVal = RLL %OldVal, 0(%BitShift) + // CompareOpcode %RotatedOldVal, %Src2 + // BRCL KeepOldMask, UpdateMBB + MBB = LoopMBB; + BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) + .addReg(OrigVal).addMBB(StartMBB) + .addReg(Dest).addMBB(UpdateMBB); + if (IsSubWord) + BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal) + .addReg(OldVal).addReg(BitShift).addImm(0); + BuildMI(MBB, DL, TII->get(CompareOpcode)) + .addReg(RotatedOldVal).addReg(Src2); + BuildMI(MBB, DL, TII->get(SystemZ::BRCL)) + .addImm(KeepOldMask).addMBB(UpdateMBB); + MBB->addSuccessor(UpdateMBB); + MBB->addSuccessor(UseAltMBB); + + // UseAltMBB: + // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0 + // # fall through to UpdateMMB + MBB = UseAltMBB; + if (IsSubWord) + BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal) + .addReg(RotatedOldVal).addReg(Src2) + .addImm(32).addImm(31 + BitSize).addImm(0); + MBB->addSuccessor(UpdateMBB); + + // UpdateMBB: + // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ], + // [ %RotatedAltVal, UseAltMBB ] + // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift) + // %Dest = CS %OldVal, %NewVal, Disp(%Base) + // JNE LoopMBB + // # fall through to DoneMMB + MBB = UpdateMBB; + BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal) + .addReg(RotatedOldVal).addMBB(LoopMBB) + .addReg(RotatedAltVal).addMBB(UseAltMBB); + if (IsSubWord) + BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal) + .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); + BuildMI(MBB, DL, TII->get(CSOpcode), Dest) + .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp); + BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(MaskNE).addMBB(LoopMBB); + MBB->addSuccessor(LoopMBB); + MBB->addSuccessor(DoneMBB); + + MI->eraseFromParent(); + return DoneMBB; +} + +// Implement EmitInstrWithCustomInserter for pseudo ATOMIC_CMP_SWAPW +// instruction MI. +MachineBasicBlock * +SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI, + MachineBasicBlock *MBB) const { + const SystemZInstrInfo *TII = TM.getInstrInfo(); + MachineFunction &MF = *MBB->getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + unsigned MaskNE = CCMaskForCondCode(ISD::SETNE); + + // Extract the operands. Base can be a register or a frame index. + unsigned Dest = MI->getOperand(0).getReg(); + MachineOperand Base = earlyUseOperand(MI->getOperand(1)); + int64_t Disp = MI->getOperand(2).getImm(); + unsigned OrigCmpVal = MI->getOperand(3).getReg(); + unsigned OrigSwapVal = MI->getOperand(4).getReg(); + unsigned BitShift = MI->getOperand(5).getReg(); + unsigned NegBitShift = MI->getOperand(6).getReg(); + int64_t BitSize = MI->getOperand(7).getImm(); + DebugLoc DL = MI->getDebugLoc(); + + const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass; + + // Get the right opcodes for the displacement. + unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp); + unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp); + assert(LOpcode && CSOpcode && "Displacement out of range"); + + // Create virtual registers for temporary results. + unsigned OrigOldVal = MRI.createVirtualRegister(RC); + unsigned OldVal = MRI.createVirtualRegister(RC); + unsigned CmpVal = MRI.createVirtualRegister(RC); + unsigned SwapVal = MRI.createVirtualRegister(RC); + unsigned StoreVal = MRI.createVirtualRegister(RC); + unsigned RetryOldVal = MRI.createVirtualRegister(RC); + unsigned RetryCmpVal = MRI.createVirtualRegister(RC); + unsigned RetrySwapVal = MRI.createVirtualRegister(RC); + + // Insert 2 basic blocks for the loop. + MachineBasicBlock *StartMBB = MBB; + MachineBasicBlock *DoneMBB = splitBlockAfter(MI, MBB); + MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); + MachineBasicBlock *SetMBB = emitBlockAfter(LoopMBB); + + // StartMBB: + // ... + // %OrigOldVal = L Disp(%Base) + // # fall through to LoopMMB + MBB = StartMBB; + BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal) + .addOperand(Base).addImm(Disp).addReg(0); + MBB->addSuccessor(LoopMBB); + + // LoopMBB: + // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ] + // %CmpVal = phi [ %OrigCmpVal, EntryBB ], [ %RetryCmpVal, SetMBB ] + // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ] + // %Dest = RLL %OldVal, BitSize(%BitShift) + // ^^ The low BitSize bits contain the field + // of interest. + // %RetryCmpVal = RISBG32 %CmpVal, %Dest, 32, 63-BitSize, 0 + // ^^ Replace the upper 32-BitSize bits of the + // comparison value with those that we loaded, + // so that we can use a full word comparison. + // CR %Dest, %RetryCmpVal + // JNE DoneMBB + // # Fall through to SetMBB + MBB = LoopMBB; + BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) + .addReg(OrigOldVal).addMBB(StartMBB) + .addReg(RetryOldVal).addMBB(SetMBB); + BuildMI(MBB, DL, TII->get(SystemZ::PHI), CmpVal) + .addReg(OrigCmpVal).addMBB(StartMBB) + .addReg(RetryCmpVal).addMBB(SetMBB); + BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal) + .addReg(OrigSwapVal).addMBB(StartMBB) + .addReg(RetrySwapVal).addMBB(SetMBB); + BuildMI(MBB, DL, TII->get(SystemZ::RLL), Dest) + .addReg(OldVal).addReg(BitShift).addImm(BitSize); + BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetryCmpVal) + .addReg(CmpVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0); + BuildMI(MBB, DL, TII->get(SystemZ::CR)) + .addReg(Dest).addReg(RetryCmpVal); + BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(MaskNE).addMBB(DoneMBB); + MBB->addSuccessor(DoneMBB); + MBB->addSuccessor(SetMBB); + + // SetMBB: + // %RetrySwapVal = RISBG32 %SwapVal, %Dest, 32, 63-BitSize, 0 + // ^^ Replace the upper 32-BitSize bits of the new + // value with those that we loaded. + // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift) + // ^^ Rotate the new field to its proper position. + // %RetryOldVal = CS %Dest, %StoreVal, Disp(%Base) + // JNE LoopMBB + // # fall through to ExitMMB + MBB = SetMBB; + BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal) + .addReg(SwapVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0); + BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal) + .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize); + BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal) + .addReg(OldVal).addReg(StoreVal).addOperand(Base).addImm(Disp); + BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(MaskNE).addMBB(LoopMBB); + MBB->addSuccessor(LoopMBB); + MBB->addSuccessor(DoneMBB); + + MI->eraseFromParent(); + return DoneMBB; +} + +// Emit an extension from a GR32 or GR64 to a GR128. ClearEven is true +// if the high register of the GR128 value must be cleared or false if +// it's "don't care". SubReg is subreg_odd32 when extending a GR32 +// and subreg_odd when extending a GR64. +MachineBasicBlock * +SystemZTargetLowering::emitExt128(MachineInstr *MI, + MachineBasicBlock *MBB, + bool ClearEven, unsigned SubReg) const { + const SystemZInstrInfo *TII = TM.getInstrInfo(); + MachineFunction &MF = *MBB->getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + DebugLoc DL = MI->getDebugLoc(); + + unsigned Dest = MI->getOperand(0).getReg(); + unsigned Src = MI->getOperand(1).getReg(); + unsigned In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); + + BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128); + if (ClearEven) { + unsigned NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); + unsigned Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass); + + BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64) + .addImm(0); + BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128) + .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_high); + In128 = NewIn128; + } + BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest) + .addReg(In128).addReg(Src).addImm(SubReg); + + MI->eraseFromParent(); + return MBB; +} + +MachineBasicBlock *SystemZTargetLowering:: +EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { + switch (MI->getOpcode()) { + case SystemZ::Select32: + case SystemZ::SelectF32: + case SystemZ::Select64: + case SystemZ::SelectF64: + case SystemZ::SelectF128: + return emitSelect(MI, MBB); + + case SystemZ::AEXT128_64: + return emitExt128(MI, MBB, false, SystemZ::subreg_low); + case SystemZ::ZEXT128_32: + return emitExt128(MI, MBB, true, SystemZ::subreg_low32); + case SystemZ::ZEXT128_64: + return emitExt128(MI, MBB, true, SystemZ::subreg_low); + + case SystemZ::ATOMIC_SWAPW: + return emitAtomicLoadBinary(MI, MBB, 0, 0); + case SystemZ::ATOMIC_SWAP_32: + return emitAtomicLoadBinary(MI, MBB, 0, 32); + case SystemZ::ATOMIC_SWAP_64: + return emitAtomicLoadBinary(MI, MBB, 0, 64); + + case SystemZ::ATOMIC_LOADW_AR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 0); + case SystemZ::ATOMIC_LOADW_AFI: + return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 0); + case SystemZ::ATOMIC_LOAD_AR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 32); + case SystemZ::ATOMIC_LOAD_AHI: + return emitAtomicLoadBinary(MI, MBB, SystemZ::AHI, 32); + case SystemZ::ATOMIC_LOAD_AFI: + return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 32); + case SystemZ::ATOMIC_LOAD_AGR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::AGR, 64); + case SystemZ::ATOMIC_LOAD_AGHI: + return emitAtomicLoadBinary(MI, MBB, SystemZ::AGHI, 64); + case SystemZ::ATOMIC_LOAD_AGFI: + return emitAtomicLoadBinary(MI, MBB, SystemZ::AGFI, 64); + + case SystemZ::ATOMIC_LOADW_SR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 0); + case SystemZ::ATOMIC_LOAD_SR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 32); + case SystemZ::ATOMIC_LOAD_SGR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::SGR, 64); + + case SystemZ::ATOMIC_LOADW_NR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0); + case SystemZ::ATOMIC_LOADW_NILH: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 0); + case SystemZ::ATOMIC_LOAD_NR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32); + case SystemZ::ATOMIC_LOAD_NILL32: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL32, 32); + case SystemZ::ATOMIC_LOAD_NILH32: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 32); + case SystemZ::ATOMIC_LOAD_NILF32: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF32, 32); + case SystemZ::ATOMIC_LOAD_NGR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64); + case SystemZ::ATOMIC_LOAD_NILL: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 64); + case SystemZ::ATOMIC_LOAD_NILH: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 64); + case SystemZ::ATOMIC_LOAD_NIHL: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL, 64); + case SystemZ::ATOMIC_LOAD_NIHH: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH, 64); + case SystemZ::ATOMIC_LOAD_NILF: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 64); + case SystemZ::ATOMIC_LOAD_NIHF: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF, 64); + + case SystemZ::ATOMIC_LOADW_OR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 0); + case SystemZ::ATOMIC_LOADW_OILH: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH32, 0); + case SystemZ::ATOMIC_LOAD_OR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 32); + case SystemZ::ATOMIC_LOAD_OILL32: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL32, 32); + case SystemZ::ATOMIC_LOAD_OILH32: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH32, 32); + case SystemZ::ATOMIC_LOAD_OILF32: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF32, 32); + case SystemZ::ATOMIC_LOAD_OGR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OGR, 64); + case SystemZ::ATOMIC_LOAD_OILL: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL, 64); + case SystemZ::ATOMIC_LOAD_OILH: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 64); + case SystemZ::ATOMIC_LOAD_OIHL: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHL, 64); + case SystemZ::ATOMIC_LOAD_OIHH: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHH, 64); + case SystemZ::ATOMIC_LOAD_OILF: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF, 64); + case SystemZ::ATOMIC_LOAD_OIHF: + return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHF, 64); + + case SystemZ::ATOMIC_LOADW_XR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 0); + case SystemZ::ATOMIC_LOADW_XILF: + return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF32, 0); + case SystemZ::ATOMIC_LOAD_XR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 32); + case SystemZ::ATOMIC_LOAD_XILF32: + return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF32, 32); + case SystemZ::ATOMIC_LOAD_XGR: + return emitAtomicLoadBinary(MI, MBB, SystemZ::XGR, 64); + case SystemZ::ATOMIC_LOAD_XILF: + return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 64); + case SystemZ::ATOMIC_LOAD_XIHF: + return emitAtomicLoadBinary(MI, MBB, SystemZ::XIHF, 64); + + case SystemZ::ATOMIC_LOADW_NRi: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0, true); + case SystemZ::ATOMIC_LOADW_NILHi: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 0, true); + case SystemZ::ATOMIC_LOAD_NRi: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32, true); + case SystemZ::ATOMIC_LOAD_NILL32i: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL32, 32, true); + case SystemZ::ATOMIC_LOAD_NILH32i: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 32, true); + case SystemZ::ATOMIC_LOAD_NILF32i: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF32, 32, true); + case SystemZ::ATOMIC_LOAD_NGRi: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64, true); + case SystemZ::ATOMIC_LOAD_NILLi: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 64, true); + case SystemZ::ATOMIC_LOAD_NILHi: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 64, true); + case SystemZ::ATOMIC_LOAD_NIHLi: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL, 64, true); + case SystemZ::ATOMIC_LOAD_NIHHi: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH, 64, true); + case SystemZ::ATOMIC_LOAD_NILFi: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 64, true); + case SystemZ::ATOMIC_LOAD_NIHFi: + return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF, 64, true); + + case SystemZ::ATOMIC_LOADW_MIN: + return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, + SystemZ::CCMASK_CMP_LE, 0); + case SystemZ::ATOMIC_LOAD_MIN_32: + return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, + SystemZ::CCMASK_CMP_LE, 32); + case SystemZ::ATOMIC_LOAD_MIN_64: + return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR, + SystemZ::CCMASK_CMP_LE, 64); + + case SystemZ::ATOMIC_LOADW_MAX: + return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, + SystemZ::CCMASK_CMP_GE, 0); + case SystemZ::ATOMIC_LOAD_MAX_32: + return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, + SystemZ::CCMASK_CMP_GE, 32); + case SystemZ::ATOMIC_LOAD_MAX_64: + return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR, + SystemZ::CCMASK_CMP_GE, 64); + + case SystemZ::ATOMIC_LOADW_UMIN: + return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, + SystemZ::CCMASK_CMP_LE, 0); + case SystemZ::ATOMIC_LOAD_UMIN_32: + return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, + SystemZ::CCMASK_CMP_LE, 32); + case SystemZ::ATOMIC_LOAD_UMIN_64: + return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR, + SystemZ::CCMASK_CMP_LE, 64); + + case SystemZ::ATOMIC_LOADW_UMAX: + return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, + SystemZ::CCMASK_CMP_GE, 0); + case SystemZ::ATOMIC_LOAD_UMAX_32: + return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, + SystemZ::CCMASK_CMP_GE, 32); + case SystemZ::ATOMIC_LOAD_UMAX_64: + return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR, + SystemZ::CCMASK_CMP_GE, 64); + + case SystemZ::ATOMIC_CMP_SWAPW: + return emitAtomicCmpSwapW(MI, MBB); + default: + llvm_unreachable("Unexpected instr type to insert"); + } +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h new file mode 100644 index 0000000..eea820c --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -0,0 +1,212 @@ +//===-- SystemZISelLowering.h - SystemZ DAG lowering interface --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that SystemZ uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_SystemZ_ISELLOWERING_H +#define LLVM_TARGET_SystemZ_ISELLOWERING_H + +#include "SystemZ.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetLowering.h" + +namespace llvm { +namespace SystemZISD { + enum { + FIRST_NUMBER = ISD::BUILTIN_OP_END, + + // Return with a flag operand. Operand 0 is the chain operand. + RET_FLAG, + + // Calls a function. Operand 0 is the chain operand and operand 1 + // is the target address. The arguments start at operand 2. + // There is an optional glue operand at the end. + CALL, + + // Wraps a TargetGlobalAddress that should be loaded using PC-relative + // accesses (LARL). Operand 0 is the address. + PCREL_WRAPPER, + + // Signed integer and floating-point comparisons. The operands are the + // two values to compare. + CMP, + + // Likewise unsigned integer comparison. + UCMP, + + // Branches if a condition is true. Operand 0 is the chain operand; + // operand 1 is the 4-bit condition-code mask, with bit N in + // big-endian order meaning "branch if CC=N"; operand 2 is the + // target block and operand 3 is the flag operand. + BR_CCMASK, + + // Selects between operand 0 and operand 1. Operand 2 is the + // mask of condition-code values for which operand 0 should be + // chosen over operand 1; it has the same form as BR_CCMASK. + // Operand 3 is the flag operand. + SELECT_CCMASK, + + // Evaluates to the gap between the stack pointer and the + // base of the dynamically-allocatable area. + ADJDYNALLOC, + + // Extracts the value of a 32-bit access register. Operand 0 is + // the number of the register. + EXTRACT_ACCESS, + + // Wrappers around the ISD opcodes of the same name. The output and + // first input operands are GR128s. The trailing numbers are the + // widths of the second operand in bits. + UMUL_LOHI64, + SDIVREM64, + UDIVREM32, + UDIVREM64, + + // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or + // ATOMIC_LOAD_<op>. + // + // Operand 0: the address of the containing 32-bit-aligned field + // Operand 1: the second operand of <op>, in the high bits of an i32 + // for everything except ATOMIC_SWAPW + // Operand 2: how many bits to rotate the i32 left to bring the first + // operand into the high bits + // Operand 3: the negative of operand 2, for rotating the other way + // Operand 4: the width of the field in bits (8 or 16) + ATOMIC_SWAPW = ISD::FIRST_TARGET_MEMORY_OPCODE, + ATOMIC_LOADW_ADD, + ATOMIC_LOADW_SUB, + ATOMIC_LOADW_AND, + ATOMIC_LOADW_OR, + ATOMIC_LOADW_XOR, + ATOMIC_LOADW_NAND, + ATOMIC_LOADW_MIN, + ATOMIC_LOADW_MAX, + ATOMIC_LOADW_UMIN, + ATOMIC_LOADW_UMAX, + + // A wrapper around the inner loop of an ATOMIC_CMP_SWAP. + // + // Operand 0: the address of the containing 32-bit-aligned field + // Operand 1: the compare value, in the low bits of an i32 + // Operand 2: the swap value, in the low bits of an i32 + // Operand 3: how many bits to rotate the i32 left to bring the first + // operand into the high bits + // Operand 4: the negative of operand 2, for rotating the other way + // Operand 5: the width of the field in bits (8 or 16) + ATOMIC_CMP_SWAPW + }; +} + +class SystemZSubtarget; +class SystemZTargetMachine; + +class SystemZTargetLowering : public TargetLowering { +public: + explicit SystemZTargetLowering(SystemZTargetMachine &TM); + + // Override TargetLowering. + virtual MVT getScalarShiftAmountTy(EVT LHSTy) const LLVM_OVERRIDE { + return MVT::i32; + } + virtual EVT getSetCCResultType(EVT VT) const { + return MVT::i32; + } + virtual bool isFMAFasterThanMulAndAdd(EVT) const LLVM_OVERRIDE { + return true; + } + virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const; + virtual const char *getTargetNodeName(unsigned Opcode) const LLVM_OVERRIDE; + virtual std::pair<unsigned, const TargetRegisterClass *> + getRegForInlineAsmConstraint(const std::string &Constraint, + EVT VT) const LLVM_OVERRIDE; + virtual TargetLowering::ConstraintType + getConstraintType(const std::string &Constraint) const LLVM_OVERRIDE; + virtual TargetLowering::ConstraintWeight + getSingleConstraintMatchWeight(AsmOperandInfo &info, + const char *constraint) const LLVM_OVERRIDE; + virtual void + LowerAsmOperandForConstraint(SDValue Op, + std::string &Constraint, + std::vector<SDValue> &Ops, + SelectionDAG &DAG) const LLVM_OVERRIDE; + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) const LLVM_OVERRIDE; + virtual SDValue LowerOperation(SDValue Op, + SelectionDAG &DAG) const LLVM_OVERRIDE; + virtual SDValue + LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc DL, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const LLVM_OVERRIDE; + virtual SDValue + LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const LLVM_OVERRIDE; + + virtual SDValue + LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + DebugLoc DL, SelectionDAG &DAG) const LLVM_OVERRIDE; + +private: + const SystemZSubtarget &Subtarget; + const SystemZTargetMachine &TM; + + // Implement LowerOperation for individual opcodes. + SDValue lowerBR_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerGlobalAddress(GlobalAddressSDNode *Node, + SelectionDAG &DAG) const; + SDValue lowerGlobalTLSAddress(GlobalAddressSDNode *Node, + SelectionDAG &DAG) const; + SDValue lowerBlockAddress(BlockAddressSDNode *Node, + SelectionDAG &DAG) const; + SDValue lowerJumpTable(JumpTableSDNode *JT, SelectionDAG &DAG) const; + SDValue lowerConstantPool(ConstantPoolSDNode *CP, SelectionDAG &DAG) const; + SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVACOPY(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSDIVREM(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG, + unsigned Opcode) const; + SDValue lowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const; + + // Implement EmitInstrWithCustomInserter for individual operation types. + MachineBasicBlock *emitSelect(MachineInstr *MI, + MachineBasicBlock *BB) const; + MachineBasicBlock *emitExt128(MachineInstr *MI, + MachineBasicBlock *MBB, + bool ClearEven, unsigned SubReg) const; + MachineBasicBlock *emitAtomicLoadBinary(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned BinOpcode, unsigned BitSize, + bool Invert = false) const; + MachineBasicBlock *emitAtomicLoadMinMax(MachineInstr *MI, + MachineBasicBlock *MBB, + unsigned CompareOpcode, + unsigned KeepOldMask, + unsigned BitSize) const; + MachineBasicBlock *emitAtomicCmpSwapW(MachineInstr *MI, + MachineBasicBlock *BB) const; +}; +} // end namespace llvm + +#endif // LLVM_TARGET_SystemZ_ISELLOWERING_H diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h b/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h new file mode 100644 index 0000000..fb699b9 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h @@ -0,0 +1,48 @@ +//===-- SystemZInstrBuilder.h - Functions to aid building insts -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file exposes functions that may be used with BuildMI from the +// MachineInstrBuilder.h file to handle SystemZ'isms in a clean way. +// +//===----------------------------------------------------------------------===// + +#ifndef SYSTEMZINSTRBUILDER_H +#define SYSTEMZINSTRBUILDER_H + +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/PseudoSourceValue.h" + +namespace llvm { + +/// Add a BDX memory reference for frame object FI to MIB. +static inline const MachineInstrBuilder & +addFrameReference(const MachineInstrBuilder &MIB, int FI) { + MachineInstr *MI = MIB; + MachineFunction &MF = *MI->getParent()->getParent(); + MachineFrameInfo *MFFrame = MF.getFrameInfo(); + const MCInstrDesc &MCID = MI->getDesc(); + unsigned Flags = 0; + if (MCID.mayLoad()) + Flags |= MachineMemOperand::MOLoad; + if (MCID.mayStore()) + Flags |= MachineMemOperand::MOStore; + int64_t Offset = 0; + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo( + PseudoSourceValue::getFixedStack(FI), Offset), + Flags, MFFrame->getObjectSize(FI), + MFFrame->getObjectAlignment(FI)); + return MIB.addFrameIndex(FI).addImm(Offset).addReg(0).addMemOperand(MMO); +} + +} // End llvm namespace + +#endif diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td new file mode 100644 index 0000000..7c9f0e6 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td @@ -0,0 +1,318 @@ +//==- SystemZInstrFP.td - Floating-point SystemZ instructions --*- tblgen-*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Control-flow instructions +//===----------------------------------------------------------------------===// + +// C's ?: operator for floating-point operands. +def SelectF32 : SelectWrapper<FP32>; +def SelectF64 : SelectWrapper<FP64>; +def SelectF128 : SelectWrapper<FP128>; + +//===----------------------------------------------------------------------===// +// Move instructions +//===----------------------------------------------------------------------===// + +// Load zero. +let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { + def LZER : InherentRRE<"lzer", 0xB374, FP32, (fpimm0)>; + def LZDR : InherentRRE<"lzdr", 0xB375, FP64, (fpimm0)>; + def LZXR : InherentRRE<"lzxr", 0xB376, FP128, (fpimm0)>; +} + +// Moves between two floating-point registers. +let neverHasSideEffects = 1 in { + def LER : UnaryRR <"ler", 0x38, null_frag, FP32, FP32>; + def LDR : UnaryRR <"ldr", 0x28, null_frag, FP64, FP64>; + def LXR : UnaryRRE<"lxr", 0xB365, null_frag, FP128, FP128>; +} + +// Moves between 64-bit integer and floating-point registers. +def LGDR : UnaryRRE<"lgdr", 0xB3CD, bitconvert, GR64, FP64>; +def LDGR : UnaryRRE<"ldgr", 0xB3C1, bitconvert, FP64, GR64>; + +// fcopysign with an FP32 result. +let isCodeGenOnly = 1 in { + def CPSDRss : BinaryRevRRF<"cpsdr", 0xB372, fcopysign, FP32, FP32>; + def CPSDRsd : BinaryRevRRF<"cpsdr", 0xB372, fcopysign, FP32, FP64>; +} + +// The sign of an FP128 is in the high register. Give the CPSDRsd +// operands in R1, R2, R3 order. +def : Pat<(fcopysign FP32:$src1, FP128:$src2), + (CPSDRsd (EXTRACT_SUBREG FP128:$src2, subreg_high), FP32:$src1)>; + +// fcopysign with an FP64 result. +let isCodeGenOnly = 1 in + def CPSDRds : BinaryRevRRF<"cpsdr", 0xB372, fcopysign, FP64, FP32>; +def CPSDRdd : BinaryRevRRF<"cpsdr", 0xB372, fcopysign, FP64, FP64>; + +// The sign of an FP128 is in the high register. Give the CPSDRdd +// operands in R1, R2, R3 order. +def : Pat<(fcopysign FP64:$src1, FP128:$src2), + (CPSDRdd (EXTRACT_SUBREG FP128:$src2, subreg_high), FP64:$src1)>; + +// fcopysign with an FP128 result. Use "upper" as the high half and leave +// the low half as-is. +class CopySign128<RegisterOperand cls, dag upper> + : Pat<(fcopysign FP128:$src1, cls:$src2), + (INSERT_SUBREG FP128:$src1, upper, subreg_high)>; + +// Give the CPSDR* operands in R1, R2, R3 order. +def : CopySign128<FP32, (CPSDRds FP32:$src2, + (EXTRACT_SUBREG FP128:$src1, subreg_high))>; +def : CopySign128<FP64, (CPSDRdd FP64:$src2, + (EXTRACT_SUBREG FP128:$src1, subreg_high))>; +def : CopySign128<FP128, (CPSDRdd (EXTRACT_SUBREG FP128:$src2, subreg_high), + (EXTRACT_SUBREG FP128:$src1, subreg_high))>; + +//===----------------------------------------------------------------------===// +// Load instructions +//===----------------------------------------------------------------------===// + +let canFoldAsLoad = 1, SimpleBDXLoad = 1 in { + defm LE : UnaryRXPair<"le", 0x78, 0xED64, load, FP32>; + defm LD : UnaryRXPair<"ld", 0x68, 0xED65, load, FP64>; + + // These instructions are split after register allocation, so we don't + // want a custom inserter. + let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in { + def LX : Pseudo<(outs FP128:$dst), (ins bdxaddr20only128:$src), + [(set FP128:$dst, (load bdxaddr20only128:$src))]>; + } +} + +//===----------------------------------------------------------------------===// +// Store instructions +//===----------------------------------------------------------------------===// + +let SimpleBDXStore = 1 in { + defm STE : StoreRXPair<"ste", 0x70, 0xED66, store, FP32>; + defm STD : StoreRXPair<"std", 0x60, 0xED67, store, FP64>; + + // These instructions are split after register allocation, so we don't + // want a custom inserter. + let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in { + def STX : Pseudo<(outs), (ins FP128:$src, bdxaddr20only128:$dst), + [(store FP128:$src, bdxaddr20only128:$dst)]>; + } +} + +//===----------------------------------------------------------------------===// +// Conversion instructions +//===----------------------------------------------------------------------===// + +// Convert floating-point values to narrower representations, rounding +// according to the current mode. The destination of LEXBR and LDXBR +// is a 128-bit value, but only the first register of the pair is used. +def LEDBR : UnaryRRE<"ledbr", 0xB344, fround, FP32, FP64>; +def LEXBR : UnaryRRE<"lexbr", 0xB346, null_frag, FP128, FP128>; +def LDXBR : UnaryRRE<"ldxbr", 0xB345, null_frag, FP128, FP128>; + +def : Pat<(f32 (fround FP128:$src)), + (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_32bit)>; +def : Pat<(f64 (fround FP128:$src)), + (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_high)>; + +// Extend register floating-point values to wider representations. +def LDEBR : UnaryRRE<"ldebr", 0xB304, fextend, FP64, FP32>; +def LXEBR : UnaryRRE<"lxebr", 0xB306, fextend, FP128, FP32>; +def LXDBR : UnaryRRE<"lxdbr", 0xB305, fextend, FP128, FP64>; + +// Extend memory floating-point values to wider representations. +def LDEB : UnaryRXE<"ldeb", 0xED04, extloadf32, FP64>; +def LXEB : UnaryRXE<"lxeb", 0xED06, extloadf32, FP128>; +def LXDB : UnaryRXE<"lxdb", 0xED05, extloadf64, FP128>; + +// Convert a signed integer register value to a floating-point one. +let Defs = [PSW] in { + def CEFBR : UnaryRRE<"cefbr", 0xB394, sint_to_fp, FP32, GR32>; + def CDFBR : UnaryRRE<"cdfbr", 0xB395, sint_to_fp, FP64, GR32>; + def CXFBR : UnaryRRE<"cxfbr", 0xB396, sint_to_fp, FP128, GR32>; + + def CEGBR : UnaryRRE<"cegbr", 0xB3A4, sint_to_fp, FP32, GR64>; + def CDGBR : UnaryRRE<"cdgbr", 0xB3A5, sint_to_fp, FP64, GR64>; + def CXGBR : UnaryRRE<"cxgbr", 0xB3A6, sint_to_fp, FP128, GR64>; +} + +// Convert a floating-point register value to a signed integer value, +// with the second operand (modifier M3) specifying the rounding mode. +let Defs = [PSW] in { + def CFEBR : UnaryRRF<"cfebr", 0xB398, GR32, FP32>; + def CFDBR : UnaryRRF<"cfdbr", 0xB399, GR32, FP64>; + def CFXBR : UnaryRRF<"cfxbr", 0xB39A, GR32, FP128>; + + def CGEBR : UnaryRRF<"cgebr", 0xB3A8, GR64, FP32>; + def CGDBR : UnaryRRF<"cgdbr", 0xB3A9, GR64, FP64>; + def CGXBR : UnaryRRF<"cgxbr", 0xB3AA, GR64, FP128>; +} + +// fp_to_sint always rounds towards zero, which is modifier value 5. +def : Pat<(i32 (fp_to_sint FP32:$src)), (CFEBR FP32:$src, 5)>; +def : Pat<(i32 (fp_to_sint FP64:$src)), (CFDBR FP64:$src, 5)>; +def : Pat<(i32 (fp_to_sint FP128:$src)), (CFXBR FP128:$src, 5)>; + +def : Pat<(i64 (fp_to_sint FP32:$src)), (CGEBR FP32:$src, 5)>; +def : Pat<(i64 (fp_to_sint FP64:$src)), (CGDBR FP64:$src, 5)>; +def : Pat<(i64 (fp_to_sint FP128:$src)), (CGXBR FP128:$src, 5)>; + +//===----------------------------------------------------------------------===// +// Unary arithmetic +//===----------------------------------------------------------------------===// + +// Negation (Load Complement). +let Defs = [PSW] in { + def LCEBR : UnaryRRE<"lcebr", 0xB303, fneg, FP32, FP32>; + def LCDBR : UnaryRRE<"lcdbr", 0xB313, fneg, FP64, FP64>; + def LCXBR : UnaryRRE<"lcxbr", 0xB343, fneg, FP128, FP128>; +} + +// Absolute value (Load Positive). +let Defs = [PSW] in { + def LPEBR : UnaryRRE<"lpebr", 0xB300, fabs, FP32, FP32>; + def LPDBR : UnaryRRE<"lpdbr", 0xB310, fabs, FP64, FP64>; + def LPXBR : UnaryRRE<"lpxbr", 0xB340, fabs, FP128, FP128>; +} + +// Negative absolute value (Load Negative). +let Defs = [PSW] in { + def LNEBR : UnaryRRE<"lnebr", 0xB301, fnabs, FP32, FP32>; + def LNDBR : UnaryRRE<"lndbr", 0xB311, fnabs, FP64, FP64>; + def LNXBR : UnaryRRE<"lnxbr", 0xB341, fnabs, FP128, FP128>; +} + +// Square root. +def SQEBR : UnaryRRE<"sqebr", 0xB314, fsqrt, FP32, FP32>; +def SQDBR : UnaryRRE<"sqdbr", 0xB315, fsqrt, FP64, FP64>; +def SQXBR : UnaryRRE<"sqxbr", 0xB316, fsqrt, FP128, FP128>; + +def SQEB : UnaryRXE<"sqeb", 0xED14, loadu<fsqrt>, FP32>; +def SQDB : UnaryRXE<"sqdb", 0xED15, loadu<fsqrt>, FP64>; + +// Round to an integer, with the second operand (modifier M3) specifying +// the rounding mode. +// +// These forms always check for inexact conditions. z196 added versions +// that allow this to suppressed (as for fnearbyint), but we don't yet +// support -march=z196. +let Defs = [PSW] in { + def FIEBR : UnaryRRF<"fiebr", 0xB357, FP32, FP32>; + def FIDBR : UnaryRRF<"fidbr", 0xB35F, FP64, FP64>; + def FIXBR : UnaryRRF<"fixbr", 0xB347, FP128, FP128>; +} + +// frint rounds according to the current mode (modifier 0) and detects +// inexact conditions. +def : Pat<(frint FP32:$src), (FIEBR FP32:$src, 0)>; +def : Pat<(frint FP64:$src), (FIDBR FP64:$src, 0)>; +def : Pat<(frint FP128:$src), (FIXBR FP128:$src, 0)>; + +//===----------------------------------------------------------------------===// +// Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition. +let Defs = [PSW] in { + let isCommutable = 1 in { + def AEBR : BinaryRRE<"aebr", 0xB30A, fadd, FP32, FP32>; + def ADBR : BinaryRRE<"adbr", 0xB31A, fadd, FP64, FP64>; + def AXBR : BinaryRRE<"axbr", 0xB34A, fadd, FP128, FP128>; + } + def AEB : BinaryRXE<"aeb", 0xED0A, fadd, FP32, load>; + def ADB : BinaryRXE<"adb", 0xED1A, fadd, FP64, load>; +} + +// Subtraction. +let Defs = [PSW] in { + def SEBR : BinaryRRE<"sebr", 0xB30B, fsub, FP32, FP32>; + def SDBR : BinaryRRE<"sdbr", 0xB31B, fsub, FP64, FP64>; + def SXBR : BinaryRRE<"sxbr", 0xB34B, fsub, FP128, FP128>; + + def SEB : BinaryRXE<"seb", 0xED0B, fsub, FP32, load>; + def SDB : BinaryRXE<"sdb", 0xED1B, fsub, FP64, load>; +} + +// Multiplication. +let isCommutable = 1 in { + def MEEBR : BinaryRRE<"meebr", 0xB317, fmul, FP32, FP32>; + def MDBR : BinaryRRE<"mdbr", 0xB31C, fmul, FP64, FP64>; + def MXBR : BinaryRRE<"mxbr", 0xB34C, fmul, FP128, FP128>; +} +def MEEB : BinaryRXE<"meeb", 0xED17, fmul, FP32, load>; +def MDB : BinaryRXE<"mdb", 0xED1C, fmul, FP64, load>; + +// f64 multiplication of two FP32 registers. +def MDEBR : BinaryRRE<"mdebr", 0xB30C, null_frag, FP64, FP32>; +def : Pat<(fmul (f64 (fextend FP32:$src1)), (f64 (fextend FP32:$src2))), + (MDEBR (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + FP32:$src1, subreg_32bit), FP32:$src2)>; + +// f64 multiplication of an FP32 register and an f32 memory. +def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load>; +def : Pat<(fmul (f64 (fextend FP32:$src1)), + (f64 (extloadf32 bdxaddr12only:$addr))), + (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_32bit), + bdxaddr12only:$addr)>; + +// f128 multiplication of two FP64 registers. +def MXDBR : BinaryRRE<"mxdbr", 0xB307, null_frag, FP128, FP64>; +def : Pat<(fmul (f128 (fextend FP64:$src1)), (f128 (fextend FP64:$src2))), + (MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)), + FP64:$src1, subreg_high), FP64:$src2)>; + +// f128 multiplication of an FP64 register and an f64 memory. +def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load>; +def : Pat<(fmul (f128 (fextend FP64:$src1)), + (f128 (extloadf64 bdxaddr12only:$addr))), + (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_high), + bdxaddr12only:$addr)>; + +// Fused multiply-add. +def MAEBR : TernaryRRD<"maebr", 0xB30E, z_fma, FP32>; +def MADBR : TernaryRRD<"madbr", 0xB31E, z_fma, FP64>; + +def MAEB : TernaryRXF<"maeb", 0xED0E, z_fma, FP32, load>; +def MADB : TernaryRXF<"madb", 0xED1E, z_fma, FP64, load>; + +// Fused multiply-subtract. +def MSEBR : TernaryRRD<"msebr", 0xB30F, z_fms, FP32>; +def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_fms, FP64>; + +def MSEB : TernaryRXF<"mseb", 0xED0F, z_fms, FP32, load>; +def MSDB : TernaryRXF<"msdb", 0xED1F, z_fms, FP64, load>; + +// Division. +def DEBR : BinaryRRE<"debr", 0xB30D, fdiv, FP32, FP32>; +def DDBR : BinaryRRE<"ddbr", 0xB31D, fdiv, FP64, FP64>; +def DXBR : BinaryRRE<"dxbr", 0xB34D, fdiv, FP128, FP128>; + +def DEB : BinaryRXE<"deb", 0xED0D, fdiv, FP32, load>; +def DDB : BinaryRXE<"ddb", 0xED1D, fdiv, FP64, load>; + +//===----------------------------------------------------------------------===// +// Comparisons +//===----------------------------------------------------------------------===// + +let Defs = [PSW] in { + def CEBR : CompareRRE<"cebr", 0xB309, z_cmp, FP32, FP32>; + def CDBR : CompareRRE<"cdbr", 0xB319, z_cmp, FP64, FP64>; + def CXBR : CompareRRE<"cxbr", 0xB349, z_cmp, FP128, FP128>; + + def CEB : CompareRXE<"ceb", 0xED09, z_cmp, FP32, load>; + def CDB : CompareRXE<"cdb", 0xED19, z_cmp, FP64, load>; +} + +//===----------------------------------------------------------------------===// +// Peepholes +//===----------------------------------------------------------------------===// + +def : Pat<(f32 fpimmneg0), (LCEBR (LZER))>; +def : Pat<(f64 fpimmneg0), (LCDBR (LZDR))>; +def : Pat<(f128 fpimmneg0), (LCXBR (LZXR))>; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td new file mode 100644 index 0000000..b32b7eb --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td @@ -0,0 +1,987 @@ +//==- SystemZInstrFormats.td - SystemZ Instruction Formats --*- tablegen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Basic SystemZ instruction definition +//===----------------------------------------------------------------------===// + +class InstSystemZ<int size, dag outs, dag ins, string asmstr, + list<dag> pattern> : Instruction { + let Namespace = "SystemZ"; + + dag OutOperandList = outs; + dag InOperandList = ins; + let Size = size; + let Pattern = pattern; + let AsmString = asmstr; + + // Used to identify a group of related instructions, such as ST and STY. + string Function = ""; + + // "12" for an instruction that has a ...Y equivalent, "20" for that + // ...Y equivalent. + string PairType = "none"; + + // True if this instruction is a simple D(X,B) load of a register + // (with no sign or zero extension). + bit SimpleBDXLoad = 0; + + // True if this instruction is a simple D(X,B) store of a register + // (with no truncation). + bit SimpleBDXStore = 0; + + // True if this instruction has a 20-bit displacement field. + bit Has20BitOffset = 0; + + // True if addresses in this instruction have an index register. + bit HasIndex = 0; + + // True if this is a 128-bit pseudo instruction that combines two 64-bit + // operations. + bit Is128Bit = 0; + + let TSFlags{0} = SimpleBDXLoad; + let TSFlags{1} = SimpleBDXStore; + let TSFlags{2} = Has20BitOffset; + let TSFlags{3} = HasIndex; + let TSFlags{4} = Is128Bit; +} + +//===----------------------------------------------------------------------===// +// Mappings between instructions +//===----------------------------------------------------------------------===// + +// Return the version of an instruction that has an unsigned 12-bit +// displacement. +def getDisp12Opcode : InstrMapping { + let FilterClass = "InstSystemZ"; + let RowFields = ["Function"]; + let ColFields = ["PairType"]; + let KeyCol = ["20"]; + let ValueCols = [["12"]]; +} + +// Return the version of an instruction that has a signed 20-bit displacement. +def getDisp20Opcode : InstrMapping { + let FilterClass = "InstSystemZ"; + let RowFields = ["Function"]; + let ColFields = ["PairType"]; + let KeyCol = ["12"]; + let ValueCols = [["20"]]; +} + +//===----------------------------------------------------------------------===// +// Instruction formats +//===----------------------------------------------------------------------===// +// +// Formats are specified using operand field declarations of the form: +// +// bits<4> Rn : register input or output for operand n +// bits<m> In : immediate value of width m for operand n +// bits<4> Bn : base register for address operand n +// bits<m> Dn : displacement value of width m for address operand n +// bits<4> Xn : index register for address operand n +// bits<4> Mn : mode value for operand n +// +// The operand numbers ("n" in the list above) follow the architecture manual, +// but the fields are always declared in assembly order, so there are some +// cases where operand "2" comes after operand "3". For address operands, +// the base register field is declared first, followed by the displacement, +// followed by the index (if any). This matches the bdaddr* and bdxaddr* +// orders. +// +//===----------------------------------------------------------------------===// + +class InstRI<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + + bits<4> R1; + bits<16> I2; + + let Inst{31-24} = op{11-4}; + let Inst{23-20} = R1; + let Inst{19-16} = op{3-0}; + let Inst{15-0} = I2; +} + +class InstRIEf<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + + bits<4> R1; + bits<4> R2; + bits<8> I3; + bits<8> I4; + bits<8> I5; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-32} = R2; + let Inst{31-24} = I3; + let Inst{23-16} = I4; + let Inst{15-8} = I5; + let Inst{7-0} = op{7-0}; +} + +class InstRIL<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + + bits<4> R1; + bits<32> I2; + + let Inst{47-40} = op{11-4}; + let Inst{39-36} = R1; + let Inst{35-32} = op{3-0}; + let Inst{31-0} = I2; +} + +class InstRR<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<2, outs, ins, asmstr, pattern> { + field bits<16> Inst; + + bits<4> R1; + bits<4> R2; + + let Inst{15-8} = op; + let Inst{7-4} = R1; + let Inst{3-0} = R2; +} + +class InstRRD<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + + bits<4> R1; + bits<4> R3; + bits<4> R2; + + let Inst{31-16} = op; + let Inst{15-12} = R1; + let Inst{11-8} = 0; + let Inst{7-4} = R3; + let Inst{3-0} = R2; +} + +class InstRRE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + + bits<4> R1; + bits<4> R2; + + let Inst{31-16} = op; + let Inst{15-8} = 0; + let Inst{7-4} = R1; + let Inst{3-0} = R2; +} + +class InstRRF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + + bits<4> R1; + bits<4> R2; + bits<4> R3; + + let Inst{31-16} = op; + let Inst{15-12} = R3; + let Inst{11-8} = 0; + let Inst{7-4} = R1; + let Inst{3-0} = R2; +} + +class InstRX<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + + bits<4> R1; + bits<4> B2; + bits<12> D2; + bits<4> X2; + + let Inst{31-24} = op; + let Inst{23-20} = R1; + let Inst{19-16} = X2; + let Inst{15-12} = B2; + let Inst{11-0} = D2; + + let HasIndex = 1; +} + +class InstRXE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + + bits<4> R1; + bits<4> B2; + bits<12> D2; + bits<4> X2; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-32} = X2; + let Inst{31-28} = B2; + let Inst{27-16} = D2; + let Inst{15-8} = 0; + let Inst{7-0} = op{7-0}; + + let HasIndex = 1; +} + +class InstRXF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + + bits<4> R1; + bits<4> R3; + bits<4> B2; + bits<12> D2; + bits<4> X2; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R3; + let Inst{35-32} = X2; + let Inst{31-28} = B2; + let Inst{27-16} = D2; + let Inst{15-12} = R1; + let Inst{11-8} = 0; + let Inst{7-0} = op{7-0}; + + let HasIndex = 1; +} + +class InstRXY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + + bits<4> R1; + bits<4> B2; + bits<20> D2; + bits<4> X2; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-32} = X2; + let Inst{31-28} = B2; + let Inst{27-16} = D2{11-0}; + let Inst{15-8} = D2{19-12}; + let Inst{7-0} = op{7-0}; + + let Has20BitOffset = 1; + let HasIndex = 1; +} + +class InstRS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + + bits<4> R1; + bits<4> R3; + bits<4> B2; + bits<12> D2; + + let Inst{31-24} = op; + let Inst{23-20} = R1; + let Inst{19-16} = R3; + let Inst{15-12} = B2; + let Inst{11-0} = D2; +} + +class InstRSY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + + bits<4> R1; + bits<4> R3; + bits<4> B2; + bits<20> D2; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-32} = R3; + let Inst{31-28} = B2; + let Inst{27-16} = D2{11-0}; + let Inst{15-8} = D2{19-12}; + let Inst{7-0} = op{7-0}; + + let Has20BitOffset = 1; +} + +class InstSI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + + bits<4> B1; + bits<12> D1; + bits<8> I2; + + let Inst{31-24} = op; + let Inst{23-16} = I2; + let Inst{15-12} = B1; + let Inst{11-0} = D1; +} + +class InstSIL<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + + bits<4> B1; + bits<12> D1; + bits<16> I2; + + let Inst{47-32} = op; + let Inst{31-28} = B1; + let Inst{27-16} = D1; + let Inst{15-0} = I2; +} + +class InstSIY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + + bits<4> B1; + bits<20> D1; + bits<8> I2; + + let Inst{47-40} = op{15-8}; + let Inst{39-32} = I2; + let Inst{31-28} = B1; + let Inst{27-16} = D1{11-0}; + let Inst{15-8} = D1{19-12}; + let Inst{7-0} = op{7-0}; + + let Has20BitOffset = 1; +} + +//===----------------------------------------------------------------------===// +// Instruction definitions with semantics +//===----------------------------------------------------------------------===// +// +// These classes have the form <Category><Format>, where <Format> is one +// of the formats defined above and where <Category> describes the inputs +// and outputs. <Category> can be one of: +// +// Inherent: +// One register output operand and no input operands. +// +// Store: +// One register or immediate input operand and one address input operand. +// The instruction stores the first operand to the address. +// +// This category is used for both pure and truncating stores. +// +// LoadMultiple: +// One address input operand and two explicit output operands. +// The instruction loads a range of registers from the address, +// with the explicit operands giving the first and last register +// to load. Other loaded registers are added as implicit definitions. +// +// StoreMultiple: +// Two explicit input register operands and an address operand. +// The instruction stores a range of registers to the address, +// with the explicit operands giving the first and last register +// to store. Other stored registers are added as implicit uses. +// +// Unary: +// One register output operand and one input operand. The input +// operand may be a register, immediate or memory. +// +// Binary: +// One register output operand and two input operands. The first +// input operand is always a register and he second may be a register, +// immediate or memory. +// +// Shift: +// One register output operand and two input operands. The first +// input operand is a register and the second has the same form as +// an address (although it isn't actually used to address memory). +// +// Compare: +// Two input operands. The first operand is always a register, +// the second may be a register, immediate or memory. +// +// Ternary: +// One register output operand and three register input operands. +// +// CmpSwap: +// One output operand and three input operands. The first two +// operands are registers and the third is an address. The instruction +// both reads from and writes to the address. +// +// RotateSelect: +// One output operand and five input operands. The first two operands +// are registers and the other three are immediates. +// +// The format determines which input operands are tied to output operands, +// and also determines the shape of any address operand. +// +// Multiclasses of the form <Category><Format>Pair define two instructions, +// one with <Category><Format> and one with <Category><Format>Y. The name +// of the first instruction has no suffix, the name of the second has +// an extra "y". +// +//===----------------------------------------------------------------------===// + +class InherentRRE<string mnemonic, bits<16> opcode, RegisterOperand cls, + dag src> + : InstRRE<opcode, (outs cls:$dst), (ins), + mnemonic#"\t$dst", + [(set cls:$dst, src)]> { + let R2 = 0; +} + +class LoadMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls> + : InstRSY<opcode, (outs cls:$dst1, cls:$dst2), (ins bdaddr20only:$addr), + mnemonic#"\t$dst1, $dst2, $addr", []> { + let mayLoad = 1; +} + +class StoreRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator, + RegisterOperand cls> + : InstRIL<opcode, (outs), (ins cls:$src, pcrel32:$addr), + mnemonic#"\t$src, $addr", + [(operator cls:$src, pcrel32:$addr)]> { + let mayStore = 1; + // We want PC-relative addresses to be tried ahead of BD and BDX addresses. + // However, BDXs have two extra operands and are therefore 6 units more + // complex. + let AddedComplexity = 7; +} + +class StoreRX<string mnemonic, bits<8> opcode, SDPatternOperator operator, + RegisterOperand cls, AddressingMode mode = bdxaddr12only> + : InstRX<opcode, (outs), (ins cls:$src, mode:$addr), + mnemonic#"\t$src, $addr", + [(operator cls:$src, mode:$addr)]> { + let mayStore = 1; +} + +class StoreRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls, AddressingMode mode = bdxaddr20only> + : InstRXY<opcode, (outs), (ins cls:$src, mode:$addr), + mnemonic#"\t$src, $addr", + [(operator cls:$src, mode:$addr)]> { + let mayStore = 1; +} + +multiclass StoreRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode, + SDPatternOperator operator, RegisterOperand cls> { + let Function = mnemonic ## #cls in { + let PairType = "12" in + def "" : StoreRX<mnemonic, rxOpcode, operator, cls, bdxaddr12pair>; + let PairType = "20" in + def Y : StoreRXY<mnemonic#"y", rxyOpcode, operator, cls, bdxaddr20pair>; + } +} + +class StoreMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls> + : InstRSY<opcode, (outs), (ins cls:$from, cls:$to, bdaddr20only:$addr), + mnemonic#"\t$from, $to, $addr", []> { + let mayStore = 1; +} + +class StoreSI<string mnemonic, bits<8> opcode, SDPatternOperator operator, + Immediate imm, AddressingMode mode = bdaddr12only> + : InstSI<opcode, (outs), (ins mode:$addr, imm:$src), + mnemonic#"\t$addr, $src", + [(operator imm:$src, mode:$addr)]> { + let mayStore = 1; +} + +class StoreSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator, + Immediate imm, AddressingMode mode = bdaddr20only> + : InstSIY<opcode, (outs), (ins mode:$addr, imm:$src), + mnemonic#"\t$addr, $src", + [(operator imm:$src, mode:$addr)]> { + let mayStore = 1; +} + +class StoreSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator, + Immediate imm> + : InstSIL<opcode, (outs), (ins bdaddr12only:$addr, imm:$src), + mnemonic#"\t$addr, $src", + [(operator imm:$src, bdaddr12only:$addr)]> { + let mayStore = 1; +} + +multiclass StoreSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode, + SDPatternOperator operator, Immediate imm> { + let Function = mnemonic in { + let PairType = "12" in + def "" : StoreSI<mnemonic, siOpcode, operator, imm, bdaddr12pair>; + let PairType = "20" in + def Y : StoreSIY<mnemonic#"y", siyOpcode, operator, imm, bdaddr20pair>; + } +} + +class UnaryRR<string mnemonic, bits<8> opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : InstRR<opcode, (outs cls1:$dst), (ins cls2:$src), + mnemonic#"\t$dst, $src", + [(set cls1:$dst, (operator cls2:$src))]>; + +class UnaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRE<opcode, (outs cls1:$dst), (ins cls2:$src), + mnemonic#"\t$dst, $src", + [(set cls1:$dst, (operator cls2:$src))]>; + +class UnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1, + RegisterOperand cls2> + : InstRRF<opcode, (outs cls1:$dst), (ins cls2:$src, uimm8zx4:$mode), + mnemonic#"\t$dst, $mode, $src", []>; + +class UnaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator, + RegisterOperand cls, Immediate imm> + : InstRI<opcode, (outs cls:$dst), (ins imm:$src), + mnemonic#"\t$dst, $src", + [(set cls:$dst, (operator imm:$src))]>; + +class UnaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator, + RegisterOperand cls, Immediate imm> + : InstRIL<opcode, (outs cls:$dst), (ins imm:$src), + mnemonic#"\t$dst, $src", + [(set cls:$dst, (operator imm:$src))]>; + +class UnaryRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator, + RegisterOperand cls> + : InstRIL<opcode, (outs cls:$dst), (ins pcrel32:$addr), + mnemonic#"\t$dst, $addr", + [(set cls:$dst, (operator pcrel32:$addr))]> { + let mayLoad = 1; + // We want PC-relative addresses to be tried ahead of BD and BDX addresses. + // However, BDXs have two extra operands and are therefore 6 units more + // complex. + let AddedComplexity = 7; +} + +class UnaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator, + RegisterOperand cls, AddressingMode mode = bdxaddr12only> + : InstRX<opcode, (outs cls:$dst), (ins mode:$addr), + mnemonic#"\t$dst, $addr", + [(set cls:$dst, (operator mode:$addr))]> { + let mayLoad = 1; +} + +class UnaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls> + : InstRXE<opcode, (outs cls:$dst), (ins bdxaddr12only:$addr), + mnemonic#"\t$dst, $addr", + [(set cls:$dst, (operator bdxaddr12only:$addr))]> { + let mayLoad = 1; +} + +class UnaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls, AddressingMode mode = bdxaddr20only> + : InstRXY<opcode, (outs cls:$dst), (ins mode:$addr), + mnemonic#"\t$dst, $addr", + [(set cls:$dst, (operator mode:$addr))]> { + let mayLoad = 1; +} + +multiclass UnaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode, + SDPatternOperator operator, RegisterOperand cls> { + let Function = mnemonic ## #cls in { + let PairType = "12" in + def "" : UnaryRX<mnemonic, rxOpcode, operator, cls, bdxaddr12pair>; + let PairType = "20" in + def Y : UnaryRXY<mnemonic#"y", rxyOpcode, operator, cls, bdxaddr20pair>; + } +} + +class BinaryRR<string mnemonic, bits<8> opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : InstRR<opcode, (outs cls1:$dst), (ins cls1:$src1, cls2:$src2), + mnemonic#"\t$dst, $src2", + [(set cls1:$dst, (operator cls1:$src1, cls2:$src2))]> { + let Constraints = "$src1 = $dst"; + let DisableEncoding = "$src1"; +} + +class BinaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRE<opcode, (outs cls1:$dst), (ins cls1:$src1, cls2:$src2), + mnemonic#"\t$dst, $src2", + [(set cls1:$dst, (operator cls1:$src1, cls2:$src2))]> { + let Constraints = "$src1 = $dst"; + let DisableEncoding = "$src1"; +} + +// Here the assembly and dag operands are in natural order, +// but the first input operand maps to R3 and the second to R2. +// This is used for "CPSDR R1, R3, R2", which is equivalent to +// R1 = copysign (R3, R2). +// +// Direct uses of the instruction must pass operands in encoding order -- +// R1, R2, R3 -- so they must pass the source operands in reverse order. +class BinaryRevRRF<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRF<opcode, (outs cls1:$dst), (ins cls2:$src2, cls1:$src1), + mnemonic#"\t$dst, $src1, $src2", + [(set cls1:$dst, (operator cls1:$src1, cls2:$src2))]>; + +class BinaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator, + RegisterOperand cls, Immediate imm> + : InstRI<opcode, (outs cls:$dst), (ins cls:$src1, imm:$src2), + mnemonic#"\t$dst, $src2", + [(set cls:$dst, (operator cls:$src1, imm:$src2))]> { + let Constraints = "$src1 = $dst"; + let DisableEncoding = "$src1"; +} + +class BinaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator, + RegisterOperand cls, Immediate imm> + : InstRIL<opcode, (outs cls:$dst), (ins cls:$src1, imm:$src2), + mnemonic#"\t$dst, $src2", + [(set cls:$dst, (operator cls:$src1, imm:$src2))]> { + let Constraints = "$src1 = $dst"; + let DisableEncoding = "$src1"; +} + +class BinaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator, + RegisterOperand cls, SDPatternOperator load, + AddressingMode mode = bdxaddr12only> + : InstRX<opcode, (outs cls:$dst), (ins cls:$src1, mode:$src2), + mnemonic#"\t$dst, $src2", + [(set cls:$dst, (operator cls:$src1, (load mode:$src2)))]> { + let Constraints = "$src1 = $dst"; + let DisableEncoding = "$src1"; + let mayLoad = 1; +} + +class BinaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls, SDPatternOperator load> + : InstRXE<opcode, (outs cls:$dst), (ins cls:$src1, bdxaddr12only:$src2), + mnemonic#"\t$dst, $src2", + [(set cls:$dst, (operator cls:$src1, + (load bdxaddr12only:$src2)))]> { + let Constraints = "$src1 = $dst"; + let DisableEncoding = "$src1"; + let mayLoad = 1; +} + +class BinaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls, SDPatternOperator load, + AddressingMode mode = bdxaddr20only> + : InstRXY<opcode, (outs cls:$dst), (ins cls:$src1, mode:$src2), + mnemonic#"\t$dst, $src2", + [(set cls:$dst, (operator cls:$src1, (load mode:$src2)))]> { + let Constraints = "$src1 = $dst"; + let DisableEncoding = "$src1"; + let mayLoad = 1; +} + +multiclass BinaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode, + SDPatternOperator operator, RegisterOperand cls, + SDPatternOperator load> { + let Function = mnemonic ## #cls in { + let PairType = "12" in + def "" : BinaryRX<mnemonic, rxOpcode, operator, cls, load, bdxaddr12pair>; + let PairType = "20" in + def Y : BinaryRXY<mnemonic#"y", rxyOpcode, operator, cls, load, + bdxaddr20pair>; + } +} + +class BinarySI<string mnemonic, bits<8> opcode, SDPatternOperator operator, + Operand imm, AddressingMode mode = bdaddr12only> + : InstSI<opcode, (outs), (ins mode:$addr, imm:$src), + mnemonic#"\t$addr, $src", + [(store (operator (load mode:$addr), imm:$src), mode:$addr)]> { + let mayLoad = 1; + let mayStore = 1; +} + +class BinarySIY<string mnemonic, bits<16> opcode, SDPatternOperator operator, + Operand imm, AddressingMode mode = bdaddr20only> + : InstSIY<opcode, (outs), (ins mode:$addr, imm:$src), + mnemonic#"\t$addr, $src", + [(store (operator (load mode:$addr), imm:$src), mode:$addr)]> { + let mayLoad = 1; + let mayStore = 1; +} + +multiclass BinarySIPair<string mnemonic, bits<8> siOpcode, + bits<16> siyOpcode, SDPatternOperator operator, + Operand imm> { + let Function = mnemonic ## #cls in { + let PairType = "12" in + def "" : BinarySI<mnemonic, siOpcode, operator, imm, bdaddr12pair>; + let PairType = "20" in + def Y : BinarySIY<mnemonic#"y", siyOpcode, operator, imm, bdaddr20pair>; + } +} + +class ShiftRS<string mnemonic, bits<8> opcode, SDPatternOperator operator, + RegisterOperand cls, AddressingMode mode> + : InstRS<opcode, (outs cls:$dst), (ins cls:$src1, mode:$src2), + mnemonic#"\t$dst, $src2", + [(set cls:$dst, (operator cls:$src1, mode:$src2))]> { + let R3 = 0; + let Constraints = "$src1 = $dst"; + let DisableEncoding = "$src1"; +} + +class ShiftRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls, AddressingMode mode> + : InstRSY<opcode, (outs cls:$dst), (ins cls:$src1, mode:$src2), + mnemonic#"\t$dst, $src1, $src2", + [(set cls:$dst, (operator cls:$src1, mode:$src2))]>; + +class CompareRR<string mnemonic, bits<8> opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : InstRR<opcode, (outs), (ins cls1:$src1, cls2:$src2), + mnemonic#"\t$src1, $src2", + [(operator cls1:$src1, cls2:$src2)]>; + +class CompareRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRE<opcode, (outs), (ins cls1:$src1, cls2:$src2), + mnemonic#"\t$src1, $src2", + [(operator cls1:$src1, cls2:$src2)]>; + +class CompareRI<string mnemonic, bits<12> opcode, SDPatternOperator operator, + RegisterOperand cls, Immediate imm> + : InstRI<opcode, (outs), (ins cls:$src1, imm:$src2), + mnemonic#"\t$src1, $src2", + [(operator cls:$src1, imm:$src2)]>; + +class CompareRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator, + RegisterOperand cls, Immediate imm> + : InstRIL<opcode, (outs), (ins cls:$src1, imm:$src2), + mnemonic#"\t$src1, $src2", + [(operator cls:$src1, imm:$src2)]>; + +class CompareRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator, + RegisterOperand cls, SDPatternOperator load> + : InstRIL<opcode, (outs), (ins cls:$src1, pcrel32:$src2), + mnemonic#"\t$src1, $src2", + [(operator cls:$src1, (load pcrel32:$src2))]> { + let mayLoad = 1; + // We want PC-relative addresses to be tried ahead of BD and BDX addresses. + // However, BDXs have two extra operands and are therefore 6 units more + // complex. + let AddedComplexity = 7; +} + +class CompareRX<string mnemonic, bits<8> opcode, SDPatternOperator operator, + RegisterOperand cls, SDPatternOperator load, + AddressingMode mode = bdxaddr12only> + : InstRX<opcode, (outs), (ins cls:$src1, mode:$src2), + mnemonic#"\t$src1, $src2", + [(operator cls:$src1, (load mode:$src2))]> { + let mayLoad = 1; +} + +class CompareRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls, SDPatternOperator load> + : InstRXE<opcode, (outs), (ins cls:$src1, bdxaddr12only:$src2), + mnemonic#"\t$src1, $src2", + [(operator cls:$src1, (load bdxaddr12only:$src2))]> { + let mayLoad = 1; +} + +class CompareRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls, SDPatternOperator load, + AddressingMode mode = bdxaddr20only> + : InstRXY<opcode, (outs), (ins cls:$src1, mode:$src2), + mnemonic#"\t$src1, $src2", + [(operator cls:$src1, (load mode:$src2))]> { + let mayLoad = 1; +} + +multiclass CompareRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode, + SDPatternOperator operator, RegisterOperand cls, + SDPatternOperator load> { + let Function = mnemonic ## #cls in { + let PairType = "12" in + def "" : CompareRX<mnemonic, rxOpcode, operator, cls, + load, bdxaddr12pair>; + let PairType = "20" in + def Y : CompareRXY<mnemonic#"y", rxyOpcode, operator, cls, + load, bdxaddr20pair>; + } +} + +class CompareSI<string mnemonic, bits<8> opcode, SDPatternOperator operator, + SDPatternOperator load, Immediate imm, + AddressingMode mode = bdaddr12only> + : InstSI<opcode, (outs), (ins mode:$addr, imm:$src), + mnemonic#"\t$addr, $src", + [(operator (load mode:$addr), imm:$src)]> { + let mayLoad = 1; +} + +class CompareSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator, + SDPatternOperator load, Immediate imm> + : InstSIL<opcode, (outs), (ins bdaddr12only:$addr, imm:$src), + mnemonic#"\t$addr, $src", + [(operator (load bdaddr12only:$addr), imm:$src)]> { + let mayLoad = 1; +} + +class CompareSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator, + SDPatternOperator load, Immediate imm, + AddressingMode mode = bdaddr20only> + : InstSIY<opcode, (outs), (ins mode:$addr, imm:$src), + mnemonic#"\t$addr, $src", + [(operator (load mode:$addr), imm:$src)]> { + let mayLoad = 1; +} + +multiclass CompareSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode, + SDPatternOperator operator, SDPatternOperator load, + Immediate imm> { + let Function = mnemonic in { + let PairType = "12" in + def "" : CompareSI<mnemonic, siOpcode, operator, load, imm, bdaddr12pair>; + let PairType = "20" in + def Y : CompareSIY<mnemonic#"y", siyOpcode, operator, load, imm, + bdaddr20pair>; + } +} + +class TernaryRRD<string mnemonic, bits<16> opcode, + SDPatternOperator operator, RegisterOperand cls> + : InstRRD<opcode, (outs cls:$dst), (ins cls:$src1, cls:$src2, cls:$src3), + mnemonic#"\t$dst, $src2, $src3", + [(set cls:$dst, (operator cls:$src1, cls:$src2, cls:$src3))]> { + let Constraints = "$src1 = $dst"; + let DisableEncoding = "$src1"; +} + +class TernaryRXF<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls, SDPatternOperator load> + : InstRXF<opcode, (outs cls:$dst), + (ins cls:$src1, cls:$src2, bdxaddr12only:$src3), + mnemonic#"\t$dst, $src2, $src3", + [(set cls:$dst, (operator cls:$src1, cls:$src2, + (load bdxaddr12only:$src3)))]> { + let Constraints = "$src1 = $dst"; + let DisableEncoding = "$src1"; + let mayLoad = 1; +} + +class CmpSwapRS<string mnemonic, bits<8> opcode, SDPatternOperator operator, + RegisterOperand cls, AddressingMode mode = bdaddr12only> + : InstRS<opcode, (outs cls:$dst), (ins cls:$old, cls:$new, mode:$ptr), + mnemonic#"\t$dst, $new, $ptr", + [(set cls:$dst, (operator mode:$ptr, cls:$old, cls:$new))]> { + let Constraints = "$old = $dst"; + let DisableEncoding = "$old"; + let mayLoad = 1; + let mayStore = 1; +} + +class CmpSwapRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls, AddressingMode mode = bdaddr20only> + : InstRSY<opcode, (outs cls:$dst), (ins cls:$old, cls:$new, mode:$ptr), + mnemonic#"\t$dst, $new, $ptr", + [(set cls:$dst, (operator mode:$ptr, cls:$old, cls:$new))]> { + let Constraints = "$old = $dst"; + let DisableEncoding = "$old"; + let mayLoad = 1; + let mayStore = 1; +} + +multiclass CmpSwapRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode, + SDPatternOperator operator, RegisterOperand cls> { + let Function = mnemonic ## #cls in { + let PairType = "12" in + def "" : CmpSwapRS<mnemonic, rsOpcode, operator, cls, bdaddr12pair>; + let PairType = "20" in + def Y : CmpSwapRSY<mnemonic#"y", rsyOpcode, operator, cls, bdaddr20pair>; + } +} + +class RotateSelectRIEf<string mnemonic, bits<16> opcode, RegisterOperand cls1, + RegisterOperand cls2> + : InstRIEf<opcode, (outs cls1:$dst), + (ins cls1:$src1, cls2:$src2, + uimm8zx6:$imm1, uimm8zx6:$imm2, uimm8zx6:$imm3), + mnemonic#"\t$dst, $src2, $imm1, $imm2, $imm3", []> { + let Constraints = "$src1 = $dst"; + let DisableEncoding = "$src1"; +} + +//===----------------------------------------------------------------------===// +// Pseudo instructions +//===----------------------------------------------------------------------===// +// +// Convenience instructions that get lowered to real instructions +// by either SystemZTargetLowering::EmitInstrWithCustomInserter() +// or SystemZInstrInfo::expandPostRAPseudo(). +// +//===----------------------------------------------------------------------===// + +class Pseudo<dag outs, dag ins, list<dag> pattern> + : InstSystemZ<0, outs, ins, "", pattern> { + let isPseudo = 1; + let isCodeGenOnly = 1; +} + +// Implements "$dst = $cc & (8 >> CC) ? $src1 : $src2", where CC is +// the value of the PSW's 2-bit condition code field. +class SelectWrapper<RegisterOperand cls> + : Pseudo<(outs cls:$dst), (ins cls:$src1, cls:$src2, i8imm:$cc), + [(set cls:$dst, (z_select_ccmask cls:$src1, cls:$src2, imm:$cc))]> { + let usesCustomInserter = 1; + // Although the instructions used by these nodes do not in themselves + // change the PSW, the insertion requires new blocks, and the PSW cannot + // be live across them. + let Defs = [PSW]; + let Uses = [PSW]; +} + +// OPERATOR is ATOMIC_SWAP or an ATOMIC_LOAD_* operation. PAT and OPERAND +// describe the second (non-memory) operand. +class AtomicLoadBinary<SDPatternOperator operator, RegisterOperand cls, + dag pat, DAGOperand operand> + : Pseudo<(outs cls:$dst), (ins bdaddr20only:$ptr, operand:$src2), + [(set cls:$dst, (operator bdaddr20only:$ptr, pat))]> { + let Defs = [PSW]; + let Has20BitOffset = 1; + let mayLoad = 1; + let mayStore = 1; + let usesCustomInserter = 1; +} + +// Specializations of AtomicLoadWBinary. +class AtomicLoadBinaryReg32<SDPatternOperator operator> + : AtomicLoadBinary<operator, GR32, (i32 GR32:$src2), GR32>; +class AtomicLoadBinaryImm32<SDPatternOperator operator, Immediate imm> + : AtomicLoadBinary<operator, GR32, (i32 imm:$src2), imm>; +class AtomicLoadBinaryReg64<SDPatternOperator operator> + : AtomicLoadBinary<operator, GR64, (i64 GR64:$src2), GR64>; +class AtomicLoadBinaryImm64<SDPatternOperator operator, Immediate imm> + : AtomicLoadBinary<operator, GR64, (i64 imm:$src2), imm>; + +// OPERATOR is ATOMIC_SWAPW or an ATOMIC_LOADW_* operation. PAT and OPERAND +// describe the second (non-memory) operand. +class AtomicLoadWBinary<SDPatternOperator operator, dag pat, + DAGOperand operand> + : Pseudo<(outs GR32:$dst), + (ins bdaddr20only:$ptr, operand:$src2, ADDR32:$bitshift, + ADDR32:$negbitshift, uimm32:$bitsize), + [(set GR32:$dst, (operator bdaddr20only:$ptr, pat, ADDR32:$bitshift, + ADDR32:$negbitshift, uimm32:$bitsize))]> { + let Defs = [PSW]; + let Has20BitOffset = 1; + let mayLoad = 1; + let mayStore = 1; + let usesCustomInserter = 1; +} + +// Specializations of AtomicLoadWBinary. +class AtomicLoadWBinaryReg<SDPatternOperator operator> + : AtomicLoadWBinary<operator, (i32 GR32:$src2), GR32>; +class AtomicLoadWBinaryImm<SDPatternOperator operator, Immediate imm> + : AtomicLoadWBinary<operator, (i32 imm:$src2), imm>; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp new file mode 100644 index 0000000..0718c83 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -0,0 +1,444 @@ +//===-- SystemZInstrInfo.cpp - SystemZ instruction information ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the SystemZ implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "SystemZInstrInfo.h" +#include "SystemZInstrBuilder.h" + +#define GET_INSTRINFO_CTOR +#define GET_INSTRMAP_INFO +#include "SystemZGenInstrInfo.inc" + +using namespace llvm; + +SystemZInstrInfo::SystemZInstrInfo(SystemZTargetMachine &tm) + : SystemZGenInstrInfo(SystemZ::ADJCALLSTACKDOWN, SystemZ::ADJCALLSTACKUP), + RI(tm, *this) { +} + +// MI is a 128-bit load or store. Split it into two 64-bit loads or stores, +// each having the opcode given by NewOpcode. +void SystemZInstrInfo::splitMove(MachineBasicBlock::iterator MI, + unsigned NewOpcode) const { + MachineBasicBlock *MBB = MI->getParent(); + MachineFunction &MF = *MBB->getParent(); + + // Get two load or store instructions. Use the original instruction for one + // of them (arbitarily the second here) and create a clone for the other. + MachineInstr *EarlierMI = MF.CloneMachineInstr(MI); + MBB->insert(MI, EarlierMI); + + // Set up the two 64-bit registers. + MachineOperand &HighRegOp = EarlierMI->getOperand(0); + MachineOperand &LowRegOp = MI->getOperand(0); + HighRegOp.setReg(RI.getSubReg(HighRegOp.getReg(), SystemZ::subreg_high)); + LowRegOp.setReg(RI.getSubReg(LowRegOp.getReg(), SystemZ::subreg_low)); + + // The address in the first (high) instruction is already correct. + // Adjust the offset in the second (low) instruction. + MachineOperand &HighOffsetOp = EarlierMI->getOperand(2); + MachineOperand &LowOffsetOp = MI->getOperand(2); + LowOffsetOp.setImm(LowOffsetOp.getImm() + 8); + + // Set the opcodes. + unsigned HighOpcode = getOpcodeForOffset(NewOpcode, HighOffsetOp.getImm()); + unsigned LowOpcode = getOpcodeForOffset(NewOpcode, LowOffsetOp.getImm()); + assert(HighOpcode && LowOpcode && "Both offsets should be in range"); + + EarlierMI->setDesc(get(HighOpcode)); + MI->setDesc(get(LowOpcode)); +} + +// Split ADJDYNALLOC instruction MI. +void SystemZInstrInfo::splitAdjDynAlloc(MachineBasicBlock::iterator MI) const { + MachineBasicBlock *MBB = MI->getParent(); + MachineFunction &MF = *MBB->getParent(); + MachineFrameInfo *MFFrame = MF.getFrameInfo(); + MachineOperand &OffsetMO = MI->getOperand(2); + + uint64_t Offset = (MFFrame->getMaxCallFrameSize() + + SystemZMC::CallFrameSize + + OffsetMO.getImm()); + unsigned NewOpcode = getOpcodeForOffset(SystemZ::LA, Offset); + assert(NewOpcode && "No support for huge argument lists yet"); + MI->setDesc(get(NewOpcode)); + OffsetMO.setImm(Offset); +} + +// If MI is a simple load or store for a frame object, return the register +// it loads or stores and set FrameIndex to the index of the frame object. +// Return 0 otherwise. +// +// Flag is SimpleBDXLoad for loads and SimpleBDXStore for stores. +static int isSimpleMove(const MachineInstr *MI, int &FrameIndex, int Flag) { + const MCInstrDesc &MCID = MI->getDesc(); + if ((MCID.TSFlags & Flag) && + MI->getOperand(1).isFI() && + MI->getOperand(2).getImm() == 0 && + MI->getOperand(3).getReg() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + return 0; +} + +unsigned SystemZInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + return isSimpleMove(MI, FrameIndex, SystemZII::SimpleBDXLoad); +} + +unsigned SystemZInstrInfo::isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + return isSimpleMove(MI, FrameIndex, SystemZII::SimpleBDXStore); +} + +bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const { + // Most of the code and comments here are boilerplate. + + // Start from the bottom of the block and work up, examining the + // terminator instructions. + MachineBasicBlock::iterator I = MBB.end(); + while (I != MBB.begin()) { + --I; + if (I->isDebugValue()) + continue; + + // Working from the bottom, when we see a non-terminator instruction, we're + // done. + if (!isUnpredicatedTerminator(I)) + break; + + // A terminator that isn't a branch can't easily be handled by this + // analysis. + unsigned ThisCond; + const MachineOperand *ThisTarget; + if (!isBranch(I, ThisCond, ThisTarget)) + return true; + + // Can't handle indirect branches. + if (!ThisTarget->isMBB()) + return true; + + if (ThisCond == SystemZ::CCMASK_ANY) { + // Handle unconditional branches. + if (!AllowModify) { + TBB = ThisTarget->getMBB(); + continue; + } + + // If the block has any instructions after a JMP, delete them. + while (llvm::next(I) != MBB.end()) + llvm::next(I)->eraseFromParent(); + + Cond.clear(); + FBB = 0; + + // Delete the JMP if it's equivalent to a fall-through. + if (MBB.isLayoutSuccessor(ThisTarget->getMBB())) { + TBB = 0; + I->eraseFromParent(); + I = MBB.end(); + continue; + } + + // TBB is used to indicate the unconditinal destination. + TBB = ThisTarget->getMBB(); + continue; + } + + // Working from the bottom, handle the first conditional branch. + if (Cond.empty()) { + // FIXME: add X86-style branch swap + FBB = TBB; + TBB = ThisTarget->getMBB(); + Cond.push_back(MachineOperand::CreateImm(ThisCond)); + continue; + } + + // Handle subsequent conditional branches. + assert(Cond.size() == 1); + assert(TBB); + + // Only handle the case where all conditional branches branch to the same + // destination. + if (TBB != ThisTarget->getMBB()) + return true; + + // If the conditions are the same, we can leave them alone. + unsigned OldCond = Cond[0].getImm(); + if (OldCond == ThisCond) + continue; + + // FIXME: Try combining conditions like X86 does. Should be easy on Z! + } + + return false; +} + +unsigned SystemZInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + // Most of the code and comments here are boilerplate. + MachineBasicBlock::iterator I = MBB.end(); + unsigned Count = 0; + + while (I != MBB.begin()) { + --I; + if (I->isDebugValue()) + continue; + unsigned Cond; + const MachineOperand *Target; + if (!isBranch(I, Cond, Target)) + break; + if (!Target->isMBB()) + break; + // Remove the branch. + I->eraseFromParent(); + I = MBB.end(); + ++Count; + } + + return Count; +} + +unsigned +SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const { + // In this function we output 32-bit branches, which should always + // have enough range. They can be shortened and relaxed by later code + // in the pipeline, if desired. + + // Shouldn't be a fall through. + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + assert((Cond.size() == 1 || Cond.size() == 0) && + "SystemZ branch conditions have one component!"); + + if (Cond.empty()) { + // Unconditional branch? + assert(!FBB && "Unconditional branch with multiple successors!"); + BuildMI(&MBB, DL, get(SystemZ::JG)).addMBB(TBB); + return 1; + } + + // Conditional branch. + unsigned Count = 0; + unsigned CC = Cond[0].getImm(); + BuildMI(&MBB, DL, get(SystemZ::BRCL)).addImm(CC).addMBB(TBB); + ++Count; + + if (FBB) { + // Two-way Conditional branch. Insert the second branch. + BuildMI(&MBB, DL, get(SystemZ::JG)).addMBB(FBB); + ++Count; + } + return Count; +} + +void +SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + // Split 128-bit GPR moves into two 64-bit moves. This handles ADDR128 too. + if (SystemZ::GR128BitRegClass.contains(DestReg, SrcReg)) { + copyPhysReg(MBB, MBBI, DL, RI.getSubReg(DestReg, SystemZ::subreg_high), + RI.getSubReg(SrcReg, SystemZ::subreg_high), KillSrc); + copyPhysReg(MBB, MBBI, DL, RI.getSubReg(DestReg, SystemZ::subreg_low), + RI.getSubReg(SrcReg, SystemZ::subreg_low), KillSrc); + return; + } + + // Everything else needs only one instruction. + unsigned Opcode; + if (SystemZ::GR32BitRegClass.contains(DestReg, SrcReg)) + Opcode = SystemZ::LR; + else if (SystemZ::GR64BitRegClass.contains(DestReg, SrcReg)) + Opcode = SystemZ::LGR; + else if (SystemZ::FP32BitRegClass.contains(DestReg, SrcReg)) + Opcode = SystemZ::LER; + else if (SystemZ::FP64BitRegClass.contains(DestReg, SrcReg)) + Opcode = SystemZ::LDR; + else if (SystemZ::FP128BitRegClass.contains(DestReg, SrcReg)) + Opcode = SystemZ::LXR; + else + llvm_unreachable("Impossible reg-to-reg copy"); + + BuildMI(MBB, MBBI, DL, get(Opcode), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); +} + +void +SystemZInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, + int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + + // Callers may expect a single instruction, so keep 128-bit moves + // together for now and lower them after register allocation. + unsigned LoadOpcode, StoreOpcode; + getLoadStoreOpcodes(RC, LoadOpcode, StoreOpcode); + addFrameReference(BuildMI(MBB, MBBI, DL, get(StoreOpcode)) + .addReg(SrcReg, getKillRegState(isKill)), FrameIdx); +} + +void +SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + + // Callers may expect a single instruction, so keep 128-bit moves + // together for now and lower them after register allocation. + unsigned LoadOpcode, StoreOpcode; + getLoadStoreOpcodes(RC, LoadOpcode, StoreOpcode); + addFrameReference(BuildMI(MBB, MBBI, DL, get(LoadOpcode), DestReg), + FrameIdx); +} + +bool +SystemZInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { + switch (MI->getOpcode()) { + case SystemZ::L128: + splitMove(MI, SystemZ::LG); + return true; + + case SystemZ::ST128: + splitMove(MI, SystemZ::STG); + return true; + + case SystemZ::LX: + splitMove(MI, SystemZ::LD); + return true; + + case SystemZ::STX: + splitMove(MI, SystemZ::STD); + return true; + + case SystemZ::ADJDYNALLOC: + splitAdjDynAlloc(MI); + return true; + + default: + return false; + } +} + +bool SystemZInstrInfo:: +ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { + assert(Cond.size() == 1 && "Invalid branch condition!"); + Cond[0].setImm(Cond[0].getImm() ^ SystemZ::CCMASK_ANY); + return false; +} + +bool SystemZInstrInfo::isBranch(const MachineInstr *MI, unsigned &Cond, + const MachineOperand *&Target) const { + switch (MI->getOpcode()) { + case SystemZ::BR: + case SystemZ::J: + case SystemZ::JG: + Cond = SystemZ::CCMASK_ANY; + Target = &MI->getOperand(0); + return true; + + case SystemZ::BRC: + case SystemZ::BRCL: + Cond = MI->getOperand(0).getImm(); + Target = &MI->getOperand(1); + return true; + + default: + assert(!MI->getDesc().isBranch() && "Unknown branch opcode"); + return false; + } +} + +void SystemZInstrInfo::getLoadStoreOpcodes(const TargetRegisterClass *RC, + unsigned &LoadOpcode, + unsigned &StoreOpcode) const { + if (RC == &SystemZ::GR32BitRegClass || RC == &SystemZ::ADDR32BitRegClass) { + LoadOpcode = SystemZ::L; + StoreOpcode = SystemZ::ST32; + } else if (RC == &SystemZ::GR64BitRegClass || + RC == &SystemZ::ADDR64BitRegClass) { + LoadOpcode = SystemZ::LG; + StoreOpcode = SystemZ::STG; + } else if (RC == &SystemZ::GR128BitRegClass || + RC == &SystemZ::ADDR128BitRegClass) { + LoadOpcode = SystemZ::L128; + StoreOpcode = SystemZ::ST128; + } else if (RC == &SystemZ::FP32BitRegClass) { + LoadOpcode = SystemZ::LE; + StoreOpcode = SystemZ::STE; + } else if (RC == &SystemZ::FP64BitRegClass) { + LoadOpcode = SystemZ::LD; + StoreOpcode = SystemZ::STD; + } else if (RC == &SystemZ::FP128BitRegClass) { + LoadOpcode = SystemZ::LX; + StoreOpcode = SystemZ::STX; + } else + llvm_unreachable("Unsupported regclass to load or store"); +} + +unsigned SystemZInstrInfo::getOpcodeForOffset(unsigned Opcode, + int64_t Offset) const { + const MCInstrDesc &MCID = get(Opcode); + int64_t Offset2 = (MCID.TSFlags & SystemZII::Is128Bit ? Offset + 8 : Offset); + if (isUInt<12>(Offset) && isUInt<12>(Offset2)) { + // Get the instruction to use for unsigned 12-bit displacements. + int Disp12Opcode = SystemZ::getDisp12Opcode(Opcode); + if (Disp12Opcode >= 0) + return Disp12Opcode; + + // All address-related instructions can use unsigned 12-bit + // displacements. + return Opcode; + } + if (isInt<20>(Offset) && isInt<20>(Offset2)) { + // Get the instruction to use for signed 20-bit displacements. + int Disp20Opcode = SystemZ::getDisp20Opcode(Opcode); + if (Disp20Opcode >= 0) + return Disp20Opcode; + + // Check whether Opcode allows signed 20-bit displacements. + if (MCID.TSFlags & SystemZII::Has20BitOffset) + return Opcode; + } + return 0; +} + +void SystemZInstrInfo::loadImmediate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned Reg, uint64_t Value) const { + DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + unsigned Opcode; + if (isInt<16>(Value)) + Opcode = SystemZ::LGHI; + else if (SystemZ::isImmLL(Value)) + Opcode = SystemZ::LLILL; + else if (SystemZ::isImmLH(Value)) { + Opcode = SystemZ::LLILH; + Value >>= 16; + } else { + assert(isInt<32>(Value) && "Huge values not handled yet"); + Opcode = SystemZ::LGFI; + } + BuildMI(MBB, MBBI, DL, get(Opcode), Reg).addImm(Value); +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h new file mode 100644 index 0000000..0fc4761 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h @@ -0,0 +1,123 @@ +//===-- SystemZInstrInfo.h - SystemZ instruction information ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the SystemZ implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_SYSTEMZINSTRINFO_H +#define LLVM_TARGET_SYSTEMZINSTRINFO_H + +#include "SystemZ.h" +#include "SystemZRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" + +#define GET_INSTRINFO_HEADER +#include "SystemZGenInstrInfo.inc" + +namespace llvm { + +class SystemZTargetMachine; + +namespace SystemZII { + enum { + // See comments in SystemZInstrFormats.td. + SimpleBDXLoad = (1 << 0), + SimpleBDXStore = (1 << 1), + Has20BitOffset = (1 << 2), + HasIndex = (1 << 3), + Is128Bit = (1 << 4) + }; + // SystemZ MachineOperand target flags. + enum { + // Masks out the bits for the access model. + MO_SYMBOL_MODIFIER = (1 << 0), + + // @GOT (aka @GOTENT) + MO_GOT = (1 << 0) + }; +} + +class SystemZInstrInfo : public SystemZGenInstrInfo { + const SystemZRegisterInfo RI; + + void splitMove(MachineBasicBlock::iterator MI, unsigned NewOpcode) const; + void splitAdjDynAlloc(MachineBasicBlock::iterator MI) const; + +public: + explicit SystemZInstrInfo(SystemZTargetMachine &TM); + + // Override TargetInstrInfo. + virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const LLVM_OVERRIDE; + virtual unsigned isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const LLVM_OVERRIDE; + virtual bool AnalyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const LLVM_OVERRIDE; + virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const LLVM_OVERRIDE; + virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const LLVM_OVERRIDE; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const LLVM_OVERRIDE; + virtual void + storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const LLVM_OVERRIDE; + virtual void + loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const LLVM_OVERRIDE; + virtual bool + expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const LLVM_OVERRIDE; + virtual bool + ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const + LLVM_OVERRIDE; + + // Return the SystemZRegisterInfo, which this class owns. + const SystemZRegisterInfo &getRegisterInfo() const { return RI; } + + // Return true if MI is a conditional or unconditional branch. + // When returning true, set Cond to the mask of condition-code + // values on which the instruction will branch, and set Target + // to the operand that contains the branch target. This target + // can be a register or a basic block. + bool isBranch(const MachineInstr *MI, unsigned &Cond, + const MachineOperand *&Target) const; + + // Get the load and store opcodes for a given register class. + void getLoadStoreOpcodes(const TargetRegisterClass *RC, + unsigned &LoadOpcode, unsigned &StoreOpcode) const; + + // Opcode is the opcode of an instruction that has an address operand, + // and the caller wants to perform that instruction's operation on an + // address that has displacement Offset. Return the opcode of a suitable + // instruction (which might be Opcode itself) or 0 if no such instruction + // exists. + unsigned getOpcodeForOffset(unsigned Opcode, int64_t Offset) const; + + // Emit code before MBBI in MI to move immediate value Value into + // physical register Reg. + void loadImmediate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned Reg, uint64_t Value) const; +}; +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td new file mode 100644 index 0000000..7ffa382 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -0,0 +1,955 @@ +//===-- SystemZInstrInfo.td - General SystemZ instructions ----*- tblgen-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Stack allocation +//===----------------------------------------------------------------------===// + +def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt), + [(callseq_start timm:$amt)]>; +def ADJCALLSTACKUP : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2), + [(callseq_end timm:$amt1, timm:$amt2)]>; + +let neverHasSideEffects = 1 in { + // Takes as input the value of the stack pointer after a dynamic allocation + // has been made. Sets the output to the address of the dynamically- + // allocated area itself, skipping the outgoing arguments. + // + // This expands to an LA or LAY instruction. We restrict the offset + // to the range of LA and keep the LAY range in reserve for when + // the size of the outgoing arguments is added. + def ADJDYNALLOC : Pseudo<(outs GR64:$dst), (ins dynalloc12only:$src), + [(set GR64:$dst, dynalloc12only:$src)]>; +} + +//===----------------------------------------------------------------------===// +// Control flow instructions +//===----------------------------------------------------------------------===// + +// A return instruction. R1 is the condition-code mask (all 1s) +// and R2 is the target address, which is always stored in %r14. +let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1, + R1 = 15, R2 = 14, isCodeGenOnly = 1 in { + def RET : InstRR<0x07, (outs), (ins), "br\t%r14", [(z_retflag)]>; +} + +// Unconditional branches. R1 is the condition-code mask (all 1s). +let isBranch = 1, isTerminator = 1, isBarrier = 1, R1 = 15 in { + let isIndirectBranch = 1 in + def BR : InstRR<0x07, (outs), (ins ADDR64:$dst), + "br\t$dst", [(brind ADDR64:$dst)]>; + + // An assembler extended mnemonic for BRC. Use a separate instruction for + // the asm parser, so that we don't relax Js to external symbols into JGs. + let isCodeGenOnly = 1 in + def J : InstRI<0xA74, (outs), (ins brtarget16:$dst), "j\t$dst", []>; + let isAsmParserOnly = 1 in + def AsmJ : InstRI<0xA74, (outs), (ins brtarget16:$dst), "j\t$dst", []>; + + // An assembler extended mnemonic for BRCL. (The extension is "G" + // rather than "L" because "JL" is "Jump if Less".) + def JG : InstRIL<0xC04, (outs), (ins brtarget32:$dst), + "jg\t$dst", [(br bb:$dst)]>; +} + +// Conditional branches. It's easier for LLVM to handle these branches +// in their raw BRC/BRCL form, with the 4-bit condition-code mask being +// the first operand. It seems friendlier to use mnemonic forms like +// JE and JLH when writing out the assembly though. +multiclass CondBranches<Operand imm, string short, string long> { + let isBranch = 1, isTerminator = 1, Uses = [PSW] in { + def "" : InstRI<0xA74, (outs), (ins imm:$cond, brtarget16:$dst), short, []>; + def L : InstRIL<0xC04, (outs), (ins imm:$cond, brtarget32:$dst), long, []>; + } +} +let isCodeGenOnly = 1 in + defm BRC : CondBranches<cond4, "j$cond\t$dst", "jg$cond\t$dst">; +let isAsmParserOnly = 1 in + defm AsmBRC : CondBranches<uimm8zx4, "brc\t$cond, $dst", "brcl\t$cond, $dst">; + +def : Pat<(z_br_ccmask cond4:$cond, bb:$dst), (BRCL cond4:$cond, bb:$dst)>; + +// Define AsmParser mnemonics for each condition code. +multiclass CondExtendedMnemonic<bits<4> Cond, string name> { + let R1 = Cond in { + def "" : InstRI<0xA74, (outs), (ins brtarget16:$dst), + "j"##name##"\t$dst", []>; + def L : InstRIL<0xC04, (outs), (ins brtarget32:$dst), + "jg"##name##"\t$dst", []>; + } +} +let isAsmParserOnly = 1 in { + defm AsmJO : CondExtendedMnemonic<1, "o">; + defm AsmJH : CondExtendedMnemonic<2, "h">; + defm AsmJNLE : CondExtendedMnemonic<3, "nle">; + defm AsmJL : CondExtendedMnemonic<4, "l">; + defm AsmJNHE : CondExtendedMnemonic<5, "nhe">; + defm AsmJLH : CondExtendedMnemonic<6, "lh">; + defm AsmJNE : CondExtendedMnemonic<7, "ne">; + defm AsmJE : CondExtendedMnemonic<8, "e">; + defm AsmJNLH : CondExtendedMnemonic<9, "nlh">; + defm AsmJHE : CondExtendedMnemonic<10, "he">; + defm AsmJNL : CondExtendedMnemonic<11, "nl">; + defm AsmJLE : CondExtendedMnemonic<12, "le">; + defm AsmJNH : CondExtendedMnemonic<13, "nh">; + defm AsmJNO : CondExtendedMnemonic<14, "no">; +} + +def Select32 : SelectWrapper<GR32>; +def Select64 : SelectWrapper<GR64>; + +//===----------------------------------------------------------------------===// +// Call instructions +//===----------------------------------------------------------------------===// + +// The definitions here are for the call-clobbered registers. +let isCall = 1, Defs = [R0D, R1D, R2D, R3D, R4D, R5D, R14D, + F0D, F1D, F2D, F3D, F4D, F5D, F6D, F7D], + R1 = 14, isCodeGenOnly = 1 in { + def BRAS : InstRI<0xA75, (outs), (ins pcrel16call:$dst, variable_ops), + "bras\t%r14, $dst", []>; + def BRASL : InstRIL<0xC05, (outs), (ins pcrel32call:$dst, variable_ops), + "brasl\t%r14, $dst", [(z_call pcrel32call:$dst)]>; + def BASR : InstRR<0x0D, (outs), (ins ADDR64:$dst, variable_ops), + "basr\t%r14, $dst", [(z_call ADDR64:$dst)]>; +} + +// Define the general form of the call instructions for the asm parser. +// These instructions don't hard-code %r14 as the return address register. +let isAsmParserOnly = 1 in { + def AsmBRAS : InstRI<0xA75, (outs), (ins GR64:$save, brtarget16:$dst), + "bras\t$save, $dst", []>; + def AsmBRASL : InstRIL<0xC05, (outs), (ins GR64:$save, brtarget32:$dst), + "brasl\t$save, $dst", []>; + def AsmBASR : InstRR<0x0D, (outs), (ins GR64:$save, ADDR64:$dst), + "basr\t$save, $dst", []>; +} + +//===----------------------------------------------------------------------===// +// Move instructions +//===----------------------------------------------------------------------===// + +// Register moves. +let neverHasSideEffects = 1 in { + def LR : UnaryRR <"lr", 0x18, null_frag, GR32, GR32>; + def LGR : UnaryRRE<"lgr", 0xB904, null_frag, GR64, GR64>; +} + +// Immediate moves. +let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { + // 16-bit sign-extended immediates. + def LHI : UnaryRI<"lhi", 0xA78, bitconvert, GR32, imm32sx16>; + def LGHI : UnaryRI<"lghi", 0xA79, bitconvert, GR64, imm64sx16>; + + // Other 16-bit immediates. + def LLILL : UnaryRI<"llill", 0xA5F, bitconvert, GR64, imm64ll16>; + def LLILH : UnaryRI<"llilh", 0xA5E, bitconvert, GR64, imm64lh16>; + def LLIHL : UnaryRI<"llihl", 0xA5D, bitconvert, GR64, imm64hl16>; + def LLIHH : UnaryRI<"llihh", 0xA5C, bitconvert, GR64, imm64hh16>; + + // 32-bit immediates. + def LGFI : UnaryRIL<"lgfi", 0xC01, bitconvert, GR64, imm64sx32>; + def LLILF : UnaryRIL<"llilf", 0xC0F, bitconvert, GR64, imm64lf32>; + def LLIHF : UnaryRIL<"llihf", 0xC0E, bitconvert, GR64, imm64hf32>; +} + +// Register loads. +let canFoldAsLoad = 1, SimpleBDXLoad = 1 in { + defm L : UnaryRXPair<"l", 0x58, 0xE358, load, GR32>; + def LRL : UnaryRILPC<"lrl", 0xC4D, aligned_load, GR32>; + + def LG : UnaryRXY<"lg", 0xE304, load, GR64>; + def LGRL : UnaryRILPC<"lgrl", 0xC48, aligned_load, GR64>; + + // These instructions are split after register allocation, so we don't + // want a custom inserter. + let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in { + def L128 : Pseudo<(outs GR128:$dst), (ins bdxaddr20only128:$src), + [(set GR128:$dst, (load bdxaddr20only128:$src))]>; + } +} + +// Register stores. +let SimpleBDXStore = 1 in { + let isCodeGenOnly = 1 in { + defm ST32 : StoreRXPair<"st", 0x50, 0xE350, store, GR32>; + def STRL32 : StoreRILPC<"strl", 0xC4F, aligned_store, GR32>; + } + + def STG : StoreRXY<"stg", 0xE324, store, GR64>; + def STGRL : StoreRILPC<"stgrl", 0xC4B, aligned_store, GR64>; + + // These instructions are split after register allocation, so we don't + // want a custom inserter. + let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in { + def ST128 : Pseudo<(outs), (ins GR128:$src, bdxaddr20only128:$dst), + [(store GR128:$src, bdxaddr20only128:$dst)]>; + } +} + +// 8-bit immediate stores to 8-bit fields. +defm MVI : StoreSIPair<"mvi", 0x92, 0xEB52, truncstorei8, imm32zx8trunc>; + +// 16-bit immediate stores to 16-, 32- or 64-bit fields. +def MVHHI : StoreSIL<"mvhhi", 0xE544, truncstorei16, imm32sx16trunc>; +def MVHI : StoreSIL<"mvhi", 0xE54C, store, imm32sx16>; +def MVGHI : StoreSIL<"mvghi", 0xE548, store, imm64sx16>; + +//===----------------------------------------------------------------------===// +// Sign extensions +//===----------------------------------------------------------------------===// + +// 32-bit extensions from registers. +let neverHasSideEffects = 1 in { + def LBR : UnaryRRE<"lbr", 0xB926, sext8, GR32, GR32>; + def LHR : UnaryRRE<"lhr", 0xB927, sext16, GR32, GR32>; +} + +// 64-bit extensions from registers. +let neverHasSideEffects = 1 in { + def LGBR : UnaryRRE<"lgbr", 0xB906, sext8, GR64, GR64>; + def LGHR : UnaryRRE<"lghr", 0xB907, sext16, GR64, GR64>; + def LGFR : UnaryRRE<"lgfr", 0xB914, sext32, GR64, GR32>; +} + +// Match 32-to-64-bit sign extensions in which the source is already +// in a 64-bit register. +def : Pat<(sext_inreg GR64:$src, i32), + (LGFR (EXTRACT_SUBREG GR64:$src, subreg_32bit))>; + +// 32-bit extensions from memory. +def LB : UnaryRXY<"lb", 0xE376, sextloadi8, GR32>; +defm LH : UnaryRXPair<"lh", 0x48, 0xE378, sextloadi16, GR32>; +def LHRL : UnaryRILPC<"lhrl", 0xC45, aligned_sextloadi16, GR32>; + +// 64-bit extensions from memory. +def LGB : UnaryRXY<"lgb", 0xE377, sextloadi8, GR64>; +def LGH : UnaryRXY<"lgh", 0xE315, sextloadi16, GR64>; +def LGF : UnaryRXY<"lgf", 0xE314, sextloadi32, GR64>; +def LGHRL : UnaryRILPC<"lghrl", 0xC44, aligned_sextloadi16, GR64>; +def LGFRL : UnaryRILPC<"lgfrl", 0xC4C, aligned_sextloadi32, GR64>; + +// If the sign of a load-extend operation doesn't matter, use the signed ones. +// There's not really much to choose between the sign and zero extensions, +// but LH is more compact than LLH for small offsets. +def : Pat<(i32 (extloadi8 bdxaddr20only:$src)), (LB bdxaddr20only:$src)>; +def : Pat<(i32 (extloadi16 bdxaddr12pair:$src)), (LH bdxaddr12pair:$src)>; +def : Pat<(i32 (extloadi16 bdxaddr20pair:$src)), (LHY bdxaddr20pair:$src)>; + +def : Pat<(i64 (extloadi8 bdxaddr20only:$src)), (LGB bdxaddr20only:$src)>; +def : Pat<(i64 (extloadi16 bdxaddr20only:$src)), (LGH bdxaddr20only:$src)>; +def : Pat<(i64 (extloadi32 bdxaddr20only:$src)), (LGF bdxaddr20only:$src)>; + +//===----------------------------------------------------------------------===// +// Zero extensions +//===----------------------------------------------------------------------===// + +// 32-bit extensions from registers. +let neverHasSideEffects = 1 in { + def LLCR : UnaryRRE<"llcr", 0xB994, zext8, GR32, GR32>; + def LLHR : UnaryRRE<"llhr", 0xB995, zext16, GR32, GR32>; +} + +// 64-bit extensions from registers. +let neverHasSideEffects = 1 in { + def LLGCR : UnaryRRE<"llgcr", 0xB984, zext8, GR64, GR64>; + def LLGHR : UnaryRRE<"llghr", 0xB985, zext16, GR64, GR64>; + def LLGFR : UnaryRRE<"llgfr", 0xB916, zext32, GR64, GR32>; +} + +// Match 32-to-64-bit zero extensions in which the source is already +// in a 64-bit register. +def : Pat<(and GR64:$src, 0xffffffff), + (LLGFR (EXTRACT_SUBREG GR64:$src, subreg_32bit))>; + +// 32-bit extensions from memory. +def LLC : UnaryRXY<"llc", 0xE394, zextloadi8, GR32>; +def LLH : UnaryRXY<"llh", 0xE395, zextloadi16, GR32>; +def LLHRL : UnaryRILPC<"llhrl", 0xC42, aligned_zextloadi16, GR32>; + +// 64-bit extensions from memory. +def LLGC : UnaryRXY<"llgc", 0xE390, zextloadi8, GR64>; +def LLGH : UnaryRXY<"llgh", 0xE391, zextloadi16, GR64>; +def LLGF : UnaryRXY<"llgf", 0xE316, zextloadi32, GR64>; +def LLGHRL : UnaryRILPC<"llghrl", 0xC46, aligned_zextloadi16, GR64>; +def LLGFRL : UnaryRILPC<"llgfrl", 0xC4E, aligned_zextloadi32, GR64>; + +//===----------------------------------------------------------------------===// +// Truncations +//===----------------------------------------------------------------------===// + +// Truncations of 64-bit registers to 32-bit registers. +def : Pat<(i32 (trunc GR64:$src)), + (EXTRACT_SUBREG GR64:$src, subreg_32bit)>; + +// Truncations of 32-bit registers to memory. +let isCodeGenOnly = 1 in { + defm STC32 : StoreRXPair<"stc", 0x42, 0xE372, truncstorei8, GR32>; + defm STH32 : StoreRXPair<"sth", 0x40, 0xE370, truncstorei16, GR32>; + def STHRL32 : StoreRILPC<"sthrl", 0xC47, aligned_truncstorei16, GR32>; +} + +// Truncations of 64-bit registers to memory. +defm STC : StoreRXPair<"stc", 0x42, 0xE372, truncstorei8, GR64>; +defm STH : StoreRXPair<"sth", 0x40, 0xE370, truncstorei16, GR64>; +def STHRL : StoreRILPC<"sthrl", 0xC47, aligned_truncstorei16, GR64>; +defm ST : StoreRXPair<"st", 0x50, 0xE350, truncstorei32, GR64>; +def STRL : StoreRILPC<"strl", 0xC4F, aligned_truncstorei32, GR64>; + +//===----------------------------------------------------------------------===// +// Multi-register moves +//===----------------------------------------------------------------------===// + +// Multi-register loads. +def LMG : LoadMultipleRSY<"lmg", 0xEB04, GR64>; + +// Multi-register stores. +def STMG : StoreMultipleRSY<"stmg", 0xEB24, GR64>; + +//===----------------------------------------------------------------------===// +// Byte swaps +//===----------------------------------------------------------------------===// + +// Byte-swapping register moves. +let neverHasSideEffects = 1 in { + def LRVR : UnaryRRE<"lrvr", 0xB91F, bswap, GR32, GR32>; + def LRVGR : UnaryRRE<"lrvgr", 0xB90F, bswap, GR64, GR64>; +} + +// Byte-swapping loads. +def LRV : UnaryRXY<"lrv", 0xE31E, loadu<bswap>, GR32>; +def LRVG : UnaryRXY<"lrvg", 0xE30F, loadu<bswap>, GR64>; + +// Byte-swapping stores. +def STRV : StoreRXY<"strv", 0xE33E, storeu<bswap>, GR32>; +def STRVG : StoreRXY<"strvg", 0xE32F, storeu<bswap>, GR64>; + +//===----------------------------------------------------------------------===// +// Load address instructions +//===----------------------------------------------------------------------===// + +// Load BDX-style addresses. +let neverHasSideEffects = 1, Function = "la" in { + let PairType = "12" in + def LA : InstRX<0x41, (outs GR64:$dst), (ins laaddr12pair:$src), + "la\t$dst, $src", + [(set GR64:$dst, laaddr12pair:$src)]>; + let PairType = "20" in + def LAY : InstRXY<0xE371, (outs GR64:$dst), (ins laaddr20pair:$src), + "lay\t$dst, $src", + [(set GR64:$dst, laaddr20pair:$src)]>; +} + +// Load a PC-relative address. There's no version of this instruction +// with a 16-bit offset, so there's no relaxation. +let neverHasSideEffects = 1 in { + def LARL : InstRIL<0xC00, (outs GR64:$dst), (ins pcrel32:$src), + "larl\t$dst, $src", + [(set GR64:$dst, pcrel32:$src)]>; +} + +//===----------------------------------------------------------------------===// +// Negation +//===----------------------------------------------------------------------===// + +let Defs = [PSW] in { + def LCR : UnaryRR <"lcr", 0x13, ineg, GR32, GR32>; + def LCGR : UnaryRRE<"lcgr", 0xB903, ineg, GR64, GR64>; + def LCGFR : UnaryRRE<"lcgfr", 0xB913, null_frag, GR64, GR32>; +} +defm : SXU<ineg, LCGFR>; + +//===----------------------------------------------------------------------===// +// Insertion +//===----------------------------------------------------------------------===// + +let isCodeGenOnly = 1 in + defm IC32 : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR32, zextloadi8>; +defm IC : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR64, zextloadi8>; + +defm : InsertMem<"inserti8", IC32, GR32, zextloadi8, bdxaddr12pair>; +defm : InsertMem<"inserti8", IC32Y, GR32, zextloadi8, bdxaddr20pair>; + +defm : InsertMem<"inserti8", IC, GR64, zextloadi8, bdxaddr12pair>; +defm : InsertMem<"inserti8", ICY, GR64, zextloadi8, bdxaddr20pair>; + +// Insertions of a 16-bit immediate, leaving other bits unaffected. +// We don't have or_as_insert equivalents of these operations because +// OI is available instead. +let isCodeGenOnly = 1 in { + def IILL32 : BinaryRI<"iill", 0xA53, insertll, GR32, imm32ll16>; + def IILH32 : BinaryRI<"iilh", 0xA52, insertlh, GR32, imm32lh16>; +} +def IILL : BinaryRI<"iill", 0xA53, insertll, GR64, imm64ll16>; +def IILH : BinaryRI<"iilh", 0xA52, insertlh, GR64, imm64lh16>; +def IIHL : BinaryRI<"iihl", 0xA51, inserthl, GR64, imm64hl16>; +def IIHH : BinaryRI<"iihh", 0xA50, inserthh, GR64, imm64hh16>; + +// ...likewise for 32-bit immediates. For GR32s this is a general +// full-width move. (We use IILF rather than something like LLILF +// for 32-bit moves because IILF leaves the upper 32 bits of the +// GR64 unchanged.) +let isCodeGenOnly = 1 in { + def IILF32 : UnaryRIL<"iilf", 0xC09, bitconvert, GR32, uimm32>; +} +def IILF : BinaryRIL<"iilf", 0xC09, insertlf, GR64, imm64lf32>; +def IIHF : BinaryRIL<"iihf", 0xC08, inserthf, GR64, imm64hf32>; + +// An alternative model of inserthf, with the first operand being +// a zero-extended value. +def : Pat<(or (zext32 GR32:$src), imm64hf32:$imm), + (IIHF (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_32bit), + imm64hf32:$imm)>; + +//===----------------------------------------------------------------------===// +// Addition +//===----------------------------------------------------------------------===// + +// Plain addition. +let Defs = [PSW] in { + // Addition of a register. + let isCommutable = 1 in { + def AR : BinaryRR <"ar", 0x1A, add, GR32, GR32>; + def AGR : BinaryRRE<"agr", 0xB908, add, GR64, GR64>; + } + def AGFR : BinaryRRE<"agfr", 0xB918, null_frag, GR64, GR32>; + + // Addition of signed 16-bit immediates. + def AHI : BinaryRI<"ahi", 0xA7A, add, GR32, imm32sx16>; + def AGHI : BinaryRI<"aghi", 0xA7B, add, GR64, imm64sx16>; + + // Addition of signed 32-bit immediates. + def AFI : BinaryRIL<"afi", 0xC29, add, GR32, simm32>; + def AGFI : BinaryRIL<"agfi", 0xC28, add, GR64, imm64sx32>; + + // Addition of memory. + defm AH : BinaryRXPair<"ah", 0x4A, 0xE37A, add, GR32, sextloadi16>; + defm A : BinaryRXPair<"a", 0x5A, 0xE35A, add, GR32, load>; + def AGF : BinaryRXY<"agf", 0xE318, add, GR64, sextloadi32>; + def AG : BinaryRXY<"ag", 0xE308, add, GR64, load>; + + // Addition to memory. + def ASI : BinarySIY<"asi", 0xEB6A, add, imm32sx8>; + def AGSI : BinarySIY<"agsi", 0xEB7A, add, imm64sx8>; +} +defm : SXB<add, GR64, AGFR>; + +// Addition producing a carry. +let Defs = [PSW] in { + // Addition of a register. + let isCommutable = 1 in { + def ALR : BinaryRR <"alr", 0x1E, addc, GR32, GR32>; + def ALGR : BinaryRRE<"algr", 0xB90A, addc, GR64, GR64>; + } + def ALGFR : BinaryRRE<"algfr", 0xB91A, null_frag, GR64, GR32>; + + // Addition of unsigned 32-bit immediates. + def ALFI : BinaryRIL<"alfi", 0xC2B, addc, GR32, uimm32>; + def ALGFI : BinaryRIL<"algfi", 0xC2A, addc, GR64, imm64zx32>; + + // Addition of memory. + defm AL : BinaryRXPair<"al", 0x5E, 0xE35E, addc, GR32, load>; + def ALGF : BinaryRXY<"algf", 0xE31A, addc, GR64, zextloadi32>; + def ALG : BinaryRXY<"alg", 0xE30A, addc, GR64, load>; +} +defm : ZXB<addc, GR64, ALGFR>; + +// Addition producing and using a carry. +let Defs = [PSW], Uses = [PSW] in { + // Addition of a register. + def ALCR : BinaryRRE<"alcr", 0xB998, adde, GR32, GR32>; + def ALCGR : BinaryRRE<"alcgr", 0xB988, adde, GR64, GR64>; + + // Addition of memory. + def ALC : BinaryRXY<"alc", 0xE398, adde, GR32, load>; + def ALCG : BinaryRXY<"alcg", 0xE388, adde, GR64, load>; +} + +//===----------------------------------------------------------------------===// +// Subtraction +//===----------------------------------------------------------------------===// + +// Plain substraction. Although immediate forms exist, we use the +// add-immediate instruction instead. +let Defs = [PSW] in { + // Subtraction of a register. + def SR : BinaryRR <"sr", 0x1B, sub, GR32, GR32>; + def SGFR : BinaryRRE<"sgfr", 0xB919, null_frag, GR64, GR32>; + def SGR : BinaryRRE<"sgr", 0xB909, sub, GR64, GR64>; + + // Subtraction of memory. + defm S : BinaryRXPair<"s", 0x5B, 0xE35B, sub, GR32, load>; + def SGF : BinaryRXY<"sgf", 0xE319, sub, GR64, sextloadi32>; + def SG : BinaryRXY<"sg", 0xE309, sub, GR64, load>; +} +defm : SXB<sub, GR64, SGFR>; + +// Subtraction producing a carry. +let Defs = [PSW] in { + // Subtraction of a register. + def SLR : BinaryRR <"slr", 0x1F, subc, GR32, GR32>; + def SLGFR : BinaryRRE<"slgfr", 0xB91B, null_frag, GR64, GR32>; + def SLGR : BinaryRRE<"slgr", 0xB90B, subc, GR64, GR64>; + + // Subtraction of unsigned 32-bit immediates. These don't match + // subc because we prefer addc for constants. + def SLFI : BinaryRIL<"slfi", 0xC25, null_frag, GR32, uimm32>; + def SLGFI : BinaryRIL<"slgfi", 0xC24, null_frag, GR64, imm64zx32>; + + // Subtraction of memory. + defm SL : BinaryRXPair<"sl", 0x5F, 0xE35F, subc, GR32, load>; + def SLGF : BinaryRXY<"slgf", 0xE31B, subc, GR64, zextloadi32>; + def SLG : BinaryRXY<"slg", 0xE30B, subc, GR64, load>; +} +defm : ZXB<subc, GR64, SLGFR>; + +// Subtraction producing and using a carry. +let Defs = [PSW], Uses = [PSW] in { + // Subtraction of a register. + def SLBR : BinaryRRE<"slbr", 0xB999, sube, GR32, GR32>; + def SLGBR : BinaryRRE<"slbgr", 0xB989, sube, GR64, GR64>; + + // Subtraction of memory. + def SLB : BinaryRXY<"slb", 0xE399, sube, GR32, load>; + def SLBG : BinaryRXY<"slbg", 0xE389, sube, GR64, load>; +} + +//===----------------------------------------------------------------------===// +// AND +//===----------------------------------------------------------------------===// + +let Defs = [PSW] in { + // ANDs of a register. + let isCommutable = 1 in { + def NR : BinaryRR <"nr", 0x14, and, GR32, GR32>; + def NGR : BinaryRRE<"ngr", 0xB980, and, GR64, GR64>; + } + + // ANDs of a 16-bit immediate, leaving other bits unaffected. + let isCodeGenOnly = 1 in { + def NILL32 : BinaryRI<"nill", 0xA57, and, GR32, imm32ll16c>; + def NILH32 : BinaryRI<"nilh", 0xA56, and, GR32, imm32lh16c>; + } + def NILL : BinaryRI<"nill", 0xA57, and, GR64, imm64ll16c>; + def NILH : BinaryRI<"nilh", 0xA56, and, GR64, imm64lh16c>; + def NIHL : BinaryRI<"nihl", 0xA55, and, GR64, imm64hl16c>; + def NIHH : BinaryRI<"nihh", 0xA54, and, GR64, imm64hh16c>; + + // ANDs of a 32-bit immediate, leaving other bits unaffected. + let isCodeGenOnly = 1 in + def NILF32 : BinaryRIL<"nilf", 0xC0B, and, GR32, uimm32>; + def NILF : BinaryRIL<"nilf", 0xC0B, and, GR64, imm64lf32c>; + def NIHF : BinaryRIL<"nihf", 0xC0A, and, GR64, imm64hf32c>; + + // ANDs of memory. + defm N : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load>; + def NG : BinaryRXY<"ng", 0xE380, and, GR64, load>; + + // AND to memory + defm NI : BinarySIPair<"ni", 0x94, 0xEB54, null_frag, uimm8>; +} +defm : RMWIByte<and, bdaddr12pair, NI>; +defm : RMWIByte<and, bdaddr20pair, NIY>; + +//===----------------------------------------------------------------------===// +// OR +//===----------------------------------------------------------------------===// + +let Defs = [PSW] in { + // ORs of a register. + let isCommutable = 1 in { + def OR : BinaryRR <"or", 0x16, or, GR32, GR32>; + def OGR : BinaryRRE<"ogr", 0xB981, or, GR64, GR64>; + } + + // ORs of a 16-bit immediate, leaving other bits unaffected. + let isCodeGenOnly = 1 in { + def OILL32 : BinaryRI<"oill", 0xA5B, or, GR32, imm32ll16>; + def OILH32 : BinaryRI<"oilh", 0xA5A, or, GR32, imm32lh16>; + } + def OILL : BinaryRI<"oill", 0xA5B, or, GR64, imm64ll16>; + def OILH : BinaryRI<"oilh", 0xA5A, or, GR64, imm64lh16>; + def OIHL : BinaryRI<"oihl", 0xA59, or, GR64, imm64hl16>; + def OIHH : BinaryRI<"oihh", 0xA58, or, GR64, imm64hh16>; + + // ORs of a 32-bit immediate, leaving other bits unaffected. + let isCodeGenOnly = 1 in + def OILF32 : BinaryRIL<"oilf", 0xC0D, or, GR32, uimm32>; + def OILF : BinaryRIL<"oilf", 0xC0D, or, GR64, imm64lf32>; + def OIHF : BinaryRIL<"oihf", 0xC0C, or, GR64, imm64hf32>; + + // ORs of memory. + defm O : BinaryRXPair<"o", 0x56, 0xE356, or, GR32, load>; + def OG : BinaryRXY<"og", 0xE381, or, GR64, load>; + + // OR to memory + defm OI : BinarySIPair<"oi", 0x96, 0xEB56, null_frag, uimm8>; +} +defm : RMWIByte<or, bdaddr12pair, OI>; +defm : RMWIByte<or, bdaddr20pair, OIY>; + +//===----------------------------------------------------------------------===// +// XOR +//===----------------------------------------------------------------------===// + +let Defs = [PSW] in { + // XORs of a register. + let isCommutable = 1 in { + def XR : BinaryRR <"xr", 0x17, xor, GR32, GR32>; + def XGR : BinaryRRE<"xgr", 0xB982, xor, GR64, GR64>; + } + + // XORs of a 32-bit immediate, leaving other bits unaffected. + let isCodeGenOnly = 1 in + def XILF32 : BinaryRIL<"xilf", 0xC07, xor, GR32, uimm32>; + def XILF : BinaryRIL<"xilf", 0xC07, xor, GR64, imm64lf32>; + def XIHF : BinaryRIL<"xihf", 0xC06, xor, GR64, imm64hf32>; + + // XORs of memory. + defm X : BinaryRXPair<"x",0x57, 0xE357, xor, GR32, load>; + def XG : BinaryRXY<"xg", 0xE382, xor, GR64, load>; + + // XOR to memory + defm XI : BinarySIPair<"xi", 0x97, 0xEB57, null_frag, uimm8>; +} +defm : RMWIByte<xor, bdaddr12pair, XI>; +defm : RMWIByte<xor, bdaddr20pair, XIY>; + +//===----------------------------------------------------------------------===// +// Multiplication +//===----------------------------------------------------------------------===// + +// Multiplication of a register. +let isCommutable = 1 in { + def MSR : BinaryRRE<"msr", 0xB252, mul, GR32, GR32>; + def MSGR : BinaryRRE<"msgr", 0xB90C, mul, GR64, GR64>; +} +def MSGFR : BinaryRRE<"msgfr", 0xB91C, null_frag, GR64, GR32>; +defm : SXB<mul, GR64, MSGFR>; + +// Multiplication of a signed 16-bit immediate. +def MHI : BinaryRI<"mhi", 0xA7C, mul, GR32, imm32sx16>; +def MGHI : BinaryRI<"mghi", 0xA7D, mul, GR64, imm64sx16>; + +// Multiplication of a signed 32-bit immediate. +def MSFI : BinaryRIL<"msfi", 0xC21, mul, GR32, simm32>; +def MSGFI : BinaryRIL<"msgfi", 0xC20, mul, GR64, imm64sx32>; + +// Multiplication of memory. +defm MH : BinaryRXPair<"mh", 0x4C, 0xE37C, mul, GR32, sextloadi16>; +defm MS : BinaryRXPair<"ms", 0x71, 0xE351, mul, GR32, load>; +def MSGF : BinaryRXY<"msgf", 0xE31C, mul, GR64, sextloadi32>; +def MSG : BinaryRXY<"msg", 0xE30C, mul, GR64, load>; + +// Multiplication of a register, producing two results. +def MLGR : BinaryRRE<"mlgr", 0xB986, z_umul_lohi64, GR128, GR64>; + +// Multiplication of memory, producing two results. +def MLG : BinaryRXY<"mlg", 0xE386, z_umul_lohi64, GR128, load>; + +//===----------------------------------------------------------------------===// +// Division and remainder +//===----------------------------------------------------------------------===// + +// Division and remainder, from registers. +def DSGFR : BinaryRRE<"dsgfr", 0xB91D, null_frag, GR128, GR32>; +def DSGR : BinaryRRE<"dsgr", 0xB90D, z_sdivrem64, GR128, GR64>; +def DLR : BinaryRRE<"dlr", 0xB997, z_udivrem32, GR128, GR32>; +def DLGR : BinaryRRE<"dlgr", 0xB987, z_udivrem64, GR128, GR64>; +defm : SXB<z_sdivrem64, GR128, DSGFR>; + +// Division and remainder, from memory. +def DSGF : BinaryRXY<"dsgf", 0xE31D, z_sdivrem64, GR128, sextloadi32>; +def DSG : BinaryRXY<"dsg", 0xE30D, z_sdivrem64, GR128, load>; +def DL : BinaryRXY<"dl", 0xE397, z_udivrem32, GR128, load>; +def DLG : BinaryRXY<"dlg", 0xE387, z_udivrem64, GR128, load>; + +//===----------------------------------------------------------------------===// +// Shifts +//===----------------------------------------------------------------------===// + +// Shift left. +let neverHasSideEffects = 1 in { + def SLL : ShiftRS <"sll", 0x89, shl, GR32, shift12only>; + def SLLG : ShiftRSY<"sllg", 0xEB0D, shl, GR64, shift20only>; +} + +// Logical shift right. +let neverHasSideEffects = 1 in { + def SRL : ShiftRS <"srl", 0x88, srl, GR32, shift12only>; + def SRLG : ShiftRSY<"srlg", 0xEB0C, srl, GR64, shift20only>; +} + +// Arithmetic shift right. +let Defs = [PSW] in { + def SRA : ShiftRS <"sra", 0x8A, sra, GR32, shift12only>; + def SRAG : ShiftRSY<"srag", 0xEB0A, sra, GR64, shift20only>; +} + +// Rotate left. +let neverHasSideEffects = 1 in { + def RLL : ShiftRSY<"rll", 0xEB1D, rotl, GR32, shift20only>; + def RLLG : ShiftRSY<"rllg", 0xEB1C, rotl, GR64, shift20only>; +} + +// Rotate second operand left and inserted selected bits into first operand. +// These can act like 32-bit operands provided that the constant start and +// end bits (operands 2 and 3) are in the range [32, 64) +let Defs = [PSW] in { + let isCodeGenOnly = 1 in + def RISBG32 : RotateSelectRIEf<"risbg", 0xEC55, GR32, GR32>; + def RISBG : RotateSelectRIEf<"risbg", 0xEC55, GR64, GR64>; +} + +//===----------------------------------------------------------------------===// +// Comparison +//===----------------------------------------------------------------------===// + +// Signed comparisons. +let Defs = [PSW] in { + // Comparison with a register. + def CR : CompareRR <"cr", 0x19, z_cmp, GR32, GR32>; + def CGFR : CompareRRE<"cgfr", 0xB930, null_frag, GR64, GR32>; + def CGR : CompareRRE<"cgr", 0xB920, z_cmp, GR64, GR64>; + + // Comparison with a signed 16-bit immediate. + def CHI : CompareRI<"chi", 0xA7E, z_cmp, GR32, imm32sx16>; + def CGHI : CompareRI<"cghi", 0xA7F, z_cmp, GR64, imm64sx16>; + + // Comparison with a signed 32-bit immediate. + def CFI : CompareRIL<"cfi", 0xC2D, z_cmp, GR32, simm32>; + def CGFI : CompareRIL<"cgfi", 0xC2C, z_cmp, GR64, imm64sx32>; + + // Comparison with memory. + defm CH : CompareRXPair<"ch", 0x49, 0xE379, z_cmp, GR32, sextloadi16>; + defm C : CompareRXPair<"c", 0x59, 0xE359, z_cmp, GR32, load>; + def CGH : CompareRXY<"cgh", 0xE334, z_cmp, GR64, sextloadi16>; + def CGF : CompareRXY<"cgf", 0xE330, z_cmp, GR64, sextloadi32>; + def CG : CompareRXY<"cg", 0xE320, z_cmp, GR64, load>; + def CHRL : CompareRILPC<"chrl", 0xC65, z_cmp, GR32, aligned_sextloadi16>; + def CRL : CompareRILPC<"crl", 0xC6D, z_cmp, GR32, aligned_load>; + def CGHRL : CompareRILPC<"cghrl", 0xC64, z_cmp, GR64, aligned_sextloadi16>; + def CGFRL : CompareRILPC<"cgfrl", 0xC6C, z_cmp, GR64, aligned_sextloadi32>; + def CGRL : CompareRILPC<"cgrl", 0xC68, z_cmp, GR64, aligned_load>; + + // Comparison between memory and a signed 16-bit immediate. + def CHHSI : CompareSIL<"chhsi", 0xE554, z_cmp, sextloadi16, imm32sx16>; + def CHSI : CompareSIL<"chsi", 0xE55C, z_cmp, load, imm32sx16>; + def CGHSI : CompareSIL<"cghsi", 0xE558, z_cmp, load, imm64sx16>; +} +defm : SXB<z_cmp, GR64, CGFR>; + +// Unsigned comparisons. +let Defs = [PSW] in { + // Comparison with a register. + def CLR : CompareRR <"clr", 0x15, z_ucmp, GR32, GR32>; + def CLGFR : CompareRRE<"clgfr", 0xB931, null_frag, GR64, GR32>; + def CLGR : CompareRRE<"clgr", 0xB921, z_ucmp, GR64, GR64>; + + // Comparison with a signed 32-bit immediate. + def CLFI : CompareRIL<"clfi", 0xC2F, z_ucmp, GR32, uimm32>; + def CLGFI : CompareRIL<"clgfi", 0xC2E, z_ucmp, GR64, imm64zx32>; + + // Comparison with memory. + defm CL : CompareRXPair<"cl", 0x55, 0xE355, z_ucmp, GR32, load>; + def CLGF : CompareRXY<"clgf", 0xE331, z_ucmp, GR64, zextloadi32>; + def CLG : CompareRXY<"clg", 0xE321, z_ucmp, GR64, load>; + def CLHRL : CompareRILPC<"clhrl", 0xC67, z_ucmp, GR32, + aligned_zextloadi16>; + def CLRL : CompareRILPC<"clrl", 0xC6F, z_ucmp, GR32, + aligned_load>; + def CLGHRL : CompareRILPC<"clghrl", 0xC66, z_ucmp, GR64, + aligned_zextloadi16>; + def CLGFRL : CompareRILPC<"clgfrl", 0xC6E, z_ucmp, GR64, + aligned_zextloadi32>; + def CLGRL : CompareRILPC<"clgrl", 0xC6A, z_ucmp, GR64, + aligned_load>; + + // Comparison between memory and an unsigned 8-bit immediate. + defm CLI : CompareSIPair<"cli", 0x95, 0xEB55, z_ucmp, zextloadi8, imm32zx8>; + + // Comparison between memory and an unsigned 16-bit immediate. + def CLHHSI : CompareSIL<"clhhsi", 0xE555, z_ucmp, zextloadi16, imm32zx16>; + def CLFHSI : CompareSIL<"clfhsi", 0xE55D, z_ucmp, load, imm32zx16>; + def CLGHSI : CompareSIL<"clghsi", 0xE559, z_ucmp, load, imm64zx16>; +} +defm : ZXB<z_ucmp, GR64, CLGFR>; + +//===----------------------------------------------------------------------===// +// Atomic operations +//===----------------------------------------------------------------------===// + +def ATOMIC_SWAPW : AtomicLoadWBinaryReg<z_atomic_swapw>; +def ATOMIC_SWAP_32 : AtomicLoadBinaryReg32<atomic_swap_32>; +def ATOMIC_SWAP_64 : AtomicLoadBinaryReg64<atomic_swap_64>; + +def ATOMIC_LOADW_AR : AtomicLoadWBinaryReg<z_atomic_loadw_add>; +def ATOMIC_LOADW_AFI : AtomicLoadWBinaryImm<z_atomic_loadw_add, simm32>; +def ATOMIC_LOAD_AR : AtomicLoadBinaryReg32<atomic_load_add_32>; +def ATOMIC_LOAD_AHI : AtomicLoadBinaryImm32<atomic_load_add_32, imm32sx16>; +def ATOMIC_LOAD_AFI : AtomicLoadBinaryImm32<atomic_load_add_32, simm32>; +def ATOMIC_LOAD_AGR : AtomicLoadBinaryReg64<atomic_load_add_64>; +def ATOMIC_LOAD_AGHI : AtomicLoadBinaryImm64<atomic_load_add_64, imm64sx16>; +def ATOMIC_LOAD_AGFI : AtomicLoadBinaryImm64<atomic_load_add_64, imm64sx32>; + +def ATOMIC_LOADW_SR : AtomicLoadWBinaryReg<z_atomic_loadw_sub>; +def ATOMIC_LOAD_SR : AtomicLoadBinaryReg32<atomic_load_sub_32>; +def ATOMIC_LOAD_SGR : AtomicLoadBinaryReg64<atomic_load_sub_64>; + +def ATOMIC_LOADW_NR : AtomicLoadWBinaryReg<z_atomic_loadw_and>; +def ATOMIC_LOADW_NILH : AtomicLoadWBinaryImm<z_atomic_loadw_and, imm32lh16c>; +def ATOMIC_LOAD_NR : AtomicLoadBinaryReg32<atomic_load_and_32>; +def ATOMIC_LOAD_NILL32 : AtomicLoadBinaryImm32<atomic_load_and_32, imm32ll16c>; +def ATOMIC_LOAD_NILH32 : AtomicLoadBinaryImm32<atomic_load_and_32, imm32lh16c>; +def ATOMIC_LOAD_NILF32 : AtomicLoadBinaryImm32<atomic_load_and_32, uimm32>; +def ATOMIC_LOAD_NGR : AtomicLoadBinaryReg64<atomic_load_and_64>; +def ATOMIC_LOAD_NILL : AtomicLoadBinaryImm64<atomic_load_and_64, imm64ll16c>; +def ATOMIC_LOAD_NILH : AtomicLoadBinaryImm64<atomic_load_and_64, imm64lh16c>; +def ATOMIC_LOAD_NIHL : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hl16c>; +def ATOMIC_LOAD_NIHH : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hh16c>; +def ATOMIC_LOAD_NILF : AtomicLoadBinaryImm64<atomic_load_and_64, imm64lf32c>; +def ATOMIC_LOAD_NIHF : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hf32c>; + +def ATOMIC_LOADW_OR : AtomicLoadWBinaryReg<z_atomic_loadw_or>; +def ATOMIC_LOADW_OILH : AtomicLoadWBinaryImm<z_atomic_loadw_or, imm32lh16>; +def ATOMIC_LOAD_OR : AtomicLoadBinaryReg32<atomic_load_or_32>; +def ATOMIC_LOAD_OILL32 : AtomicLoadBinaryImm32<atomic_load_or_32, imm32ll16>; +def ATOMIC_LOAD_OILH32 : AtomicLoadBinaryImm32<atomic_load_or_32, imm32lh16>; +def ATOMIC_LOAD_OILF32 : AtomicLoadBinaryImm32<atomic_load_or_32, uimm32>; +def ATOMIC_LOAD_OGR : AtomicLoadBinaryReg64<atomic_load_or_64>; +def ATOMIC_LOAD_OILL : AtomicLoadBinaryImm64<atomic_load_or_64, imm64ll16>; +def ATOMIC_LOAD_OILH : AtomicLoadBinaryImm64<atomic_load_or_64, imm64lh16>; +def ATOMIC_LOAD_OIHL : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hl16>; +def ATOMIC_LOAD_OIHH : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hh16>; +def ATOMIC_LOAD_OILF : AtomicLoadBinaryImm64<atomic_load_or_64, imm64lf32>; +def ATOMIC_LOAD_OIHF : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hf32>; + +def ATOMIC_LOADW_XR : AtomicLoadWBinaryReg<z_atomic_loadw_xor>; +def ATOMIC_LOADW_XILF : AtomicLoadWBinaryImm<z_atomic_loadw_xor, uimm32>; +def ATOMIC_LOAD_XR : AtomicLoadBinaryReg32<atomic_load_xor_32>; +def ATOMIC_LOAD_XILF32 : AtomicLoadBinaryImm32<atomic_load_xor_32, uimm32>; +def ATOMIC_LOAD_XGR : AtomicLoadBinaryReg64<atomic_load_xor_64>; +def ATOMIC_LOAD_XILF : AtomicLoadBinaryImm64<atomic_load_xor_64, imm64lf32>; +def ATOMIC_LOAD_XIHF : AtomicLoadBinaryImm64<atomic_load_xor_64, imm64hf32>; + +def ATOMIC_LOADW_NRi : AtomicLoadWBinaryReg<z_atomic_loadw_nand>; +def ATOMIC_LOADW_NILHi : AtomicLoadWBinaryImm<z_atomic_loadw_nand, + imm32lh16c>; +def ATOMIC_LOAD_NRi : AtomicLoadBinaryReg32<atomic_load_nand_32>; +def ATOMIC_LOAD_NILL32i : AtomicLoadBinaryImm32<atomic_load_nand_32, + imm32ll16c>; +def ATOMIC_LOAD_NILH32i : AtomicLoadBinaryImm32<atomic_load_nand_32, + imm32lh16c>; +def ATOMIC_LOAD_NILF32i : AtomicLoadBinaryImm32<atomic_load_nand_32, uimm32>; +def ATOMIC_LOAD_NGRi : AtomicLoadBinaryReg64<atomic_load_nand_64>; +def ATOMIC_LOAD_NILLi : AtomicLoadBinaryImm64<atomic_load_nand_64, + imm64ll16c>; +def ATOMIC_LOAD_NILHi : AtomicLoadBinaryImm64<atomic_load_nand_64, + imm64lh16c>; +def ATOMIC_LOAD_NIHLi : AtomicLoadBinaryImm64<atomic_load_nand_64, + imm64hl16c>; +def ATOMIC_LOAD_NIHHi : AtomicLoadBinaryImm64<atomic_load_nand_64, + imm64hh16c>; +def ATOMIC_LOAD_NILFi : AtomicLoadBinaryImm64<atomic_load_nand_64, + imm64lf32c>; +def ATOMIC_LOAD_NIHFi : AtomicLoadBinaryImm64<atomic_load_nand_64, + imm64hf32c>; + +def ATOMIC_LOADW_MIN : AtomicLoadWBinaryReg<z_atomic_loadw_min>; +def ATOMIC_LOAD_MIN_32 : AtomicLoadBinaryReg32<atomic_load_min_32>; +def ATOMIC_LOAD_MIN_64 : AtomicLoadBinaryReg64<atomic_load_min_64>; + +def ATOMIC_LOADW_MAX : AtomicLoadWBinaryReg<z_atomic_loadw_max>; +def ATOMIC_LOAD_MAX_32 : AtomicLoadBinaryReg32<atomic_load_max_32>; +def ATOMIC_LOAD_MAX_64 : AtomicLoadBinaryReg64<atomic_load_max_64>; + +def ATOMIC_LOADW_UMIN : AtomicLoadWBinaryReg<z_atomic_loadw_umin>; +def ATOMIC_LOAD_UMIN_32 : AtomicLoadBinaryReg32<atomic_load_umin_32>; +def ATOMIC_LOAD_UMIN_64 : AtomicLoadBinaryReg64<atomic_load_umin_64>; + +def ATOMIC_LOADW_UMAX : AtomicLoadWBinaryReg<z_atomic_loadw_umax>; +def ATOMIC_LOAD_UMAX_32 : AtomicLoadBinaryReg32<atomic_load_umax_32>; +def ATOMIC_LOAD_UMAX_64 : AtomicLoadBinaryReg64<atomic_load_umax_64>; + +def ATOMIC_CMP_SWAPW + : Pseudo<(outs GR32:$dst), (ins bdaddr20only:$addr, GR32:$cmp, GR32:$swap, + ADDR32:$bitshift, ADDR32:$negbitshift, + uimm32:$bitsize), + [(set GR32:$dst, + (z_atomic_cmp_swapw bdaddr20only:$addr, GR32:$cmp, GR32:$swap, + ADDR32:$bitshift, ADDR32:$negbitshift, + uimm32:$bitsize))]> { + let Defs = [PSW]; + let mayLoad = 1; + let mayStore = 1; + let usesCustomInserter = 1; +} + +let Defs = [PSW] in { + defm CS : CmpSwapRSPair<"cs", 0xBA, 0xEB14, atomic_cmp_swap_32, GR32>; + def CSG : CmpSwapRSY<"csg", 0xEB30, atomic_cmp_swap_64, GR64>; +} + +//===----------------------------------------------------------------------===// +// Miscellaneous Instructions. +//===----------------------------------------------------------------------===// + +// Read a 32-bit access register into a GR32. As with all GR32 operations, +// the upper 32 bits of the enclosing GR64 remain unchanged, which is useful +// when a 64-bit address is stored in a pair of access registers. +def EAR : InstRRE<0xB24F, (outs GR32:$dst), (ins access_reg:$src), + "ear\t$dst, $src", + [(set GR32:$dst, (z_extract_access access_reg:$src))]>; + +// Find leftmost one, AKA count leading zeros. The instruction actually +// returns a pair of GR64s, the first giving the number of leading zeros +// and the second giving a copy of the source with the leftmost one bit +// cleared. We only use the first result here. +let Defs = [PSW] in { + def FLOGR : UnaryRRE<"flogr", 0xB983, null_frag, GR128, GR64>; +} +def : Pat<(ctlz GR64:$src), + (EXTRACT_SUBREG (FLOGR GR64:$src), subreg_high)>; + +// Use subregs to populate the "don't care" bits in a 32-bit to 64-bit anyext. +def : Pat<(i64 (anyext GR32:$src)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_32bit)>; + +// There are no 32-bit equivalents of LLILL and LLILH, so use a full +// 64-bit move followed by a subreg. This preserves the invariant that +// all GR32 operations only modify the low 32 bits. +def : Pat<(i32 imm32ll16:$src), + (EXTRACT_SUBREG (LLILL (LL16 imm:$src)), subreg_32bit)>; +def : Pat<(i32 imm32lh16:$src), + (EXTRACT_SUBREG (LLILH (LH16 imm:$src)), subreg_32bit)>; + +// Extend GR32s and GR64s to GR128s. +let usesCustomInserter = 1 in { + def AEXT128_64 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>; + def ZEXT128_32 : Pseudo<(outs GR128:$dst), (ins GR32:$src), []>; + def ZEXT128_64 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>; +} + +//===----------------------------------------------------------------------===// +// Peepholes. +//===----------------------------------------------------------------------===// + +// Use AL* for GR64 additions of unsigned 32-bit values. +defm : ZXB<add, GR64, ALGFR>; +def : Pat<(add GR64:$src1, imm64zx32:$src2), + (ALGFI GR64:$src1, imm64zx32:$src2)>; +def : Pat<(add GR64:$src1, (zextloadi32 bdxaddr20only:$addr)), + (ALGF GR64:$src1, bdxaddr20only:$addr)>; + +// Use SL* for GR64 subtractions of unsigned 32-bit values. +defm : ZXB<sub, GR64, SLGFR>; +def : Pat<(add GR64:$src1, imm64zx32n:$src2), + (SLGFI GR64:$src1, imm64zx32n:$src2)>; +def : Pat<(sub GR64:$src1, (zextloadi32 bdxaddr20only:$addr)), + (SLGF GR64:$src1, bdxaddr20only:$addr)>; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp new file mode 100644 index 0000000..5d83321 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp @@ -0,0 +1,116 @@ +//===-- SystemZMCInstLower.cpp - Lower MachineInstr to MCInst -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SystemZMCInstLower.h" +#include "SystemZAsmPrinter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Target/Mangler.h" + +using namespace llvm; + +// Where relaxable pairs of reloc-generating instructions exist, +// we tend to use the longest form by default, since that produces +// correct assembly in cases where no relaxation is performed. +// If Opcode is one such instruction, return the opcode for the +// shortest possible form instead, otherwise return Opcode itself. +static unsigned getShortenedInstr(unsigned Opcode) { + switch (Opcode) { + case SystemZ::BRCL: return SystemZ::BRC; + case SystemZ::JG: return SystemZ::J; + case SystemZ::BRASL: return SystemZ::BRAS; + } + return Opcode; +} + +// Return the VK_* enumeration for MachineOperand target flags Flags. +static MCSymbolRefExpr::VariantKind getVariantKind(unsigned Flags) { + switch (Flags & SystemZII::MO_SYMBOL_MODIFIER) { + case 0: + return MCSymbolRefExpr::VK_None; + case SystemZII::MO_GOT: + return MCSymbolRefExpr::VK_GOT; + } + llvm_unreachable("Unrecognised MO_ACCESS_MODEL"); +} + +SystemZMCInstLower::SystemZMCInstLower(Mangler *mang, MCContext &ctx, + SystemZAsmPrinter &asmprinter) + : Mang(mang), Ctx(ctx), AsmPrinter(asmprinter) {} + +MCOperand SystemZMCInstLower::lowerSymbolOperand(const MachineOperand &MO, + const MCSymbol *Symbol, + int64_t Offset) const { + MCSymbolRefExpr::VariantKind Kind = getVariantKind(MO.getTargetFlags()); + const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, Kind, Ctx); + if (Offset) { + const MCExpr *OffsetExpr = MCConstantExpr::Create(Offset, Ctx); + Expr = MCBinaryExpr::CreateAdd(Expr, OffsetExpr, Ctx); + } + return MCOperand::CreateExpr(Expr); +} + +MCOperand SystemZMCInstLower::lowerOperand(const MachineOperand &MO) const { + switch (MO.getType()) { + default: + llvm_unreachable("unknown operand type"); + + case MachineOperand::MO_Register: + // Ignore all implicit register operands. + if (MO.isImplicit()) + return MCOperand(); + return MCOperand::CreateReg(MO.getReg()); + + case MachineOperand::MO_Immediate: + return MCOperand::CreateImm(MO.getImm()); + + case MachineOperand::MO_MachineBasicBlock: + return lowerSymbolOperand(MO, MO.getMBB()->getSymbol(), + /* MO has no offset field */0); + + case MachineOperand::MO_GlobalAddress: + return lowerSymbolOperand(MO, Mang->getSymbol(MO.getGlobal()), + MO.getOffset()); + + case MachineOperand::MO_ExternalSymbol: { + StringRef Name = MO.getSymbolName(); + return lowerSymbolOperand(MO, AsmPrinter.GetExternalSymbolSymbol(Name), + MO.getOffset()); + } + + case MachineOperand::MO_JumpTableIndex: + return lowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex()), + /* MO has no offset field */0); + + case MachineOperand::MO_ConstantPoolIndex: + return lowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex()), + MO.getOffset()); + + case MachineOperand::MO_BlockAddress: { + const BlockAddress *BA = MO.getBlockAddress(); + return lowerSymbolOperand(MO, AsmPrinter.GetBlockAddressSymbol(BA), + MO.getOffset()); + } + } +} + +void SystemZMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { + unsigned Opcode = MI->getOpcode(); + // When emitting binary code, start with the shortest form of an instruction + // and then relax it where necessary. + if (!AsmPrinter.OutStreamer.hasRawTextSupport()) + Opcode = getShortenedInstr(Opcode); + OutMI.setOpcode(Opcode); + for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { + const MachineOperand &MO = MI->getOperand(I); + MCOperand MCOp = lowerOperand(MO); + if (MCOp.isValid()) + OutMI.addOperand(MCOp); + } +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.h b/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.h new file mode 100644 index 0000000..afa72f3 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.h @@ -0,0 +1,47 @@ +//===-- SystemZMCInstLower.h - Lower MachineInstr to MCInst ----*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SYSTEMZMCINSTLOWER_H +#define LLVM_SYSTEMZMCINSTLOWER_H + +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { +class MCContext; +class MCInst; +class MCOperand; +class MCSymbol; +class MachineInstr; +class MachineOperand; +class Mangler; +class SystemZAsmPrinter; + +class LLVM_LIBRARY_VISIBILITY SystemZMCInstLower { + Mangler *Mang; + MCContext &Ctx; + SystemZAsmPrinter &AsmPrinter; + +public: + SystemZMCInstLower(Mangler *mang, MCContext &ctx, + SystemZAsmPrinter &asmPrinter); + + // Lower MachineInstr MI to MCInst OutMI. + void lower(const MachineInstr *MI, MCInst &OutMI) const; + + // Return an MCOperand for MO. Return an empty operand if MO is implicit. + MCOperand lowerOperand(const MachineOperand& MO) const; + + // Return an MCOperand for MO, given that it equals Symbol + Offset. + MCOperand lowerSymbolOperand(const MachineOperand &MO, + const MCSymbol *Symbol, int64_t Offset) const; +}; +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h new file mode 100644 index 0000000..1dc05a7e --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h @@ -0,0 +1,74 @@ +//==- SystemZMachineFuctionInfo.h - SystemZ machine function info -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef SYSTEMZMACHINEFUNCTIONINFO_H +#define SYSTEMZMACHINEFUNCTIONINFO_H + +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { + +class SystemZMachineFunctionInfo : public MachineFunctionInfo { + unsigned SavedGPRFrameSize; + unsigned LowSavedGPR; + unsigned HighSavedGPR; + unsigned VarArgsFirstGPR; + unsigned VarArgsFirstFPR; + unsigned VarArgsFrameIndex; + unsigned RegSaveFrameIndex; + bool ManipulatesSP; + +public: + explicit SystemZMachineFunctionInfo(MachineFunction &MF) + : SavedGPRFrameSize(0), LowSavedGPR(0), HighSavedGPR(0), VarArgsFirstGPR(0), + VarArgsFirstFPR(0), VarArgsFrameIndex(0), RegSaveFrameIndex(0), + ManipulatesSP(false) {} + + // Get and set the number of bytes allocated by generic code to store + // call-saved GPRs. + unsigned getSavedGPRFrameSize() const { return SavedGPRFrameSize; } + void setSavedGPRFrameSize(unsigned bytes) { SavedGPRFrameSize = bytes; } + + // Get and set the first call-saved GPR that should be saved and restored + // by this function. This is 0 if no GPRs need to be saved or restored. + unsigned getLowSavedGPR() const { return LowSavedGPR; } + void setLowSavedGPR(unsigned Reg) { LowSavedGPR = Reg; } + + // Get and set the last call-saved GPR that should be saved and restored + // by this function. + unsigned getHighSavedGPR() const { return HighSavedGPR; } + void setHighSavedGPR(unsigned Reg) { HighSavedGPR = Reg; } + + // Get and set the number of fixed (as opposed to variable) arguments + // that are passed in GPRs to this function. + unsigned getVarArgsFirstGPR() const { return VarArgsFirstGPR; } + void setVarArgsFirstGPR(unsigned GPR) { VarArgsFirstGPR = GPR; } + + // Likewise FPRs. + unsigned getVarArgsFirstFPR() const { return VarArgsFirstFPR; } + void setVarArgsFirstFPR(unsigned FPR) { VarArgsFirstFPR = FPR; } + + // Get and set the frame index of the first stack vararg. + unsigned getVarArgsFrameIndex() const { return VarArgsFrameIndex; } + void setVarArgsFrameIndex(unsigned FI) { VarArgsFrameIndex = FI; } + + // Get and set the frame index of the register save area + // (i.e. the incoming stack pointer). + unsigned getRegSaveFrameIndex() const { return RegSaveFrameIndex; } + void setRegSaveFrameIndex(unsigned FI) { RegSaveFrameIndex = FI; } + + // Get and set whether the function directly manipulates the stack pointer, + // e.g. through STACKSAVE or STACKRESTORE. + bool getManipulatesSP() const { return ManipulatesSP; } + void setManipulatesSP(bool MSP) { ManipulatesSP = MSP; } +}; + +} // end llvm namespace + +#endif diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td b/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td new file mode 100644 index 0000000..0abc3f7 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td @@ -0,0 +1,435 @@ +//===-- SystemZOperands.td - SystemZ instruction operands ----*- tblgen-*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Class definitions +//===----------------------------------------------------------------------===// + +class ImmediateAsmOperand<string name> + : AsmOperandClass { + let Name = name; + let RenderMethod = "addImmOperands"; +} + +// Constructs both a DAG pattern and instruction operand for an immediate +// of type VT. PRED returns true if a node is acceptable and XFORM returns +// the operand value associated with the node. ASMOP is the name of the +// associated asm operand, and also forms the basis of the asm print method. +class Immediate<ValueType vt, code pred, SDNodeXForm xform, string asmop> + : PatLeaf<(vt imm), pred, xform>, Operand<vt> { + let PrintMethod = "print"##asmop##"Operand"; + let ParserMatchClass = !cast<AsmOperandClass>(asmop); +} + +// Constructs both a DAG pattern and instruction operand for a PC-relative +// address with address size VT. SELF is the name of the operand. +class PCRelAddress<ValueType vt, string self> + : ComplexPattern<vt, 1, "selectPCRelAddress", [z_pcrel_wrapper]>, + Operand<vt> { + let MIOperandInfo = (ops !cast<Operand>(self)); +} + +// Constructs an AsmOperandClass for addressing mode FORMAT, treating the +// registers as having BITSIZE bits and displacements as having DISPSIZE bits. +class AddressAsmOperand<string format, string bitsize, string dispsize> + : AsmOperandClass { + let Name = format##bitsize##"Disp"##dispsize; + let ParserMethod = "parse"##format##bitsize; + let RenderMethod = "add"##format##"Operands"; +} + +// Constructs both a DAG pattern and instruction operand for an addressing mode. +// The mode is selected by custom code in selectTYPE...SUFFIX(). The address +// registers have BITSIZE bits and displacements have DISPSIZE bits. NUMOPS is +// the number of operands that make up an address and OPERANDS lists the types +// of those operands using (ops ...). FORMAT is the type of addressing mode, +// which needs to match the names used in AddressAsmOperand. +class AddressingMode<string type, string bitsize, string dispsize, + string suffix, int numops, string format, dag operands> + : ComplexPattern<!cast<ValueType>("i"##bitsize), numops, + "select"##type##dispsize##suffix, + [add, sub, or, frameindex, z_adjdynalloc]>, + Operand<!cast<ValueType>("i"##bitsize)> { + let PrintMethod = "print"##format##"Operand"; + let MIOperandInfo = operands; + let ParserMatchClass = + !cast<AddressAsmOperand>(format##bitsize##"Disp"##dispsize); +} + +// An addressing mode with a base and displacement but no index. +class BDMode<string type, string bitsize, string dispsize, string suffix> + : AddressingMode<type, bitsize, dispsize, suffix, 2, "BDAddr", + (ops !cast<RegisterOperand>("ADDR"##bitsize), + !cast<Immediate>("disp"##dispsize##"imm"##bitsize))>; + +// An addressing mode with a base, displacement and index. +class BDXMode<string type, string bitsize, string dispsize, string suffix> + : AddressingMode<type, bitsize, dispsize, suffix, 3, "BDXAddr", + (ops !cast<RegisterOperand>("ADDR"##bitsize), + !cast<Immediate>("disp"##dispsize##"imm"##bitsize), + !cast<RegisterOperand>("ADDR"##bitsize))>; + +//===----------------------------------------------------------------------===// +// Extracting immediate operands from nodes +// These all create MVT::i64 nodes to ensure the value is not sign-extended +// when converted from an SDNode to a MachineOperand later on. +//===----------------------------------------------------------------------===// + +// Bits 0-15 (counting from the lsb). +def LL16 : SDNodeXForm<imm, [{ + uint64_t Value = N->getZExtValue() & 0x000000000000FFFFULL; + return CurDAG->getTargetConstant(Value, MVT::i64); +}]>; + +// Bits 16-31 (counting from the lsb). +def LH16 : SDNodeXForm<imm, [{ + uint64_t Value = (N->getZExtValue() & 0x00000000FFFF0000ULL) >> 16; + return CurDAG->getTargetConstant(Value, MVT::i64); +}]>; + +// Bits 32-47 (counting from the lsb). +def HL16 : SDNodeXForm<imm, [{ + uint64_t Value = (N->getZExtValue() & 0x0000FFFF00000000ULL) >> 32; + return CurDAG->getTargetConstant(Value, MVT::i64); +}]>; + +// Bits 48-63 (counting from the lsb). +def HH16 : SDNodeXForm<imm, [{ + uint64_t Value = (N->getZExtValue() & 0xFFFF000000000000ULL) >> 48; + return CurDAG->getTargetConstant(Value, MVT::i64); +}]>; + +// Low 32 bits. +def LF32 : SDNodeXForm<imm, [{ + uint64_t Value = N->getZExtValue() & 0x00000000FFFFFFFFULL; + return CurDAG->getTargetConstant(Value, MVT::i64); +}]>; + +// High 32 bits. +def HF32 : SDNodeXForm<imm, [{ + uint64_t Value = N->getZExtValue() >> 32; + return CurDAG->getTargetConstant(Value, MVT::i64); +}]>; + +// Truncate an immediate to a 8-bit signed quantity. +def SIMM8 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(int8_t(N->getZExtValue()), MVT::i64); +}]>; + +// Truncate an immediate to a 8-bit unsigned quantity. +def UIMM8 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(uint8_t(N->getZExtValue()), MVT::i64); +}]>; + +// Truncate an immediate to a 16-bit signed quantity. +def SIMM16 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(int16_t(N->getZExtValue()), MVT::i64); +}]>; + +// Truncate an immediate to a 16-bit unsigned quantity. +def UIMM16 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(uint16_t(N->getZExtValue()), MVT::i64); +}]>; + +// Truncate an immediate to a 32-bit signed quantity. +def SIMM32 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(int32_t(N->getZExtValue()), MVT::i64); +}]>; + +// Truncate an immediate to a 32-bit unsigned quantity. +def UIMM32 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(uint32_t(N->getZExtValue()), MVT::i64); +}]>; + +// Negate and then truncate an immediate to a 32-bit unsigned quantity. +def NEGIMM32 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(uint32_t(-N->getZExtValue()), MVT::i64); +}]>; + +//===----------------------------------------------------------------------===// +// Immediate asm operands. +//===----------------------------------------------------------------------===// + +def U4Imm : ImmediateAsmOperand<"U4Imm">; +def U6Imm : ImmediateAsmOperand<"U6Imm">; +def S8Imm : ImmediateAsmOperand<"S8Imm">; +def U8Imm : ImmediateAsmOperand<"U8Imm">; +def S16Imm : ImmediateAsmOperand<"S16Imm">; +def U16Imm : ImmediateAsmOperand<"U16Imm">; +def S32Imm : ImmediateAsmOperand<"S32Imm">; +def U32Imm : ImmediateAsmOperand<"U32Imm">; + +//===----------------------------------------------------------------------===// +// 8-bit immediates +//===----------------------------------------------------------------------===// + +def uimm8zx4 : Immediate<i8, [{ + return isUInt<4>(N->getZExtValue()); +}], NOOP_SDNodeXForm, "U4Imm">; + +def uimm8zx6 : Immediate<i8, [{ + return isUInt<6>(N->getZExtValue()); +}], NOOP_SDNodeXForm, "U6Imm">; + +def simm8 : Immediate<i8, [{}], SIMM8, "S8Imm">; +def uimm8 : Immediate<i8, [{}], UIMM8, "U8Imm">; + +//===----------------------------------------------------------------------===// +// i32 immediates +//===----------------------------------------------------------------------===// + +// Immediates for the lower and upper 16 bits of an i32, with the other +// bits of the i32 being zero. +def imm32ll16 : Immediate<i32, [{ + return SystemZ::isImmLL(N->getZExtValue()); +}], LL16, "U16Imm">; + +def imm32lh16 : Immediate<i32, [{ + return SystemZ::isImmLH(N->getZExtValue()); +}], LH16, "U16Imm">; + +// Immediates for the lower and upper 16 bits of an i32, with the other +// bits of the i32 being one. +def imm32ll16c : Immediate<i32, [{ + return SystemZ::isImmLL(uint32_t(~N->getZExtValue())); +}], LL16, "U16Imm">; + +def imm32lh16c : Immediate<i32, [{ + return SystemZ::isImmLH(uint32_t(~N->getZExtValue())); +}], LH16, "U16Imm">; + +// Short immediates +def imm32sx8 : Immediate<i32, [{ + return isInt<8>(N->getSExtValue()); +}], SIMM8, "S8Imm">; + +def imm32zx8 : Immediate<i32, [{ + return isUInt<8>(N->getZExtValue()); +}], UIMM8, "U8Imm">; + +def imm32zx8trunc : Immediate<i32, [{}], UIMM8, "U8Imm">; + +def imm32sx16 : Immediate<i32, [{ + return isInt<16>(N->getSExtValue()); +}], SIMM16, "S16Imm">; + +def imm32zx16 : Immediate<i32, [{ + return isUInt<16>(N->getZExtValue()); +}], UIMM16, "U16Imm">; + +def imm32sx16trunc : Immediate<i32, [{}], SIMM16, "S16Imm">; + +// Full 32-bit immediates. we need both signed and unsigned versions +// because the assembler is picky. E.g. AFI requires signed operands +// while NILF requires unsigned ones. +def simm32 : Immediate<i32, [{}], SIMM32, "S32Imm">; +def uimm32 : Immediate<i32, [{}], UIMM32, "U32Imm">; + +def imm32 : ImmLeaf<i32, [{}]>; + +//===----------------------------------------------------------------------===// +// 64-bit immediates +//===----------------------------------------------------------------------===// + +// Immediates for 16-bit chunks of an i64, with the other bits of the +// i32 being zero. +def imm64ll16 : Immediate<i64, [{ + return SystemZ::isImmLL(N->getZExtValue()); +}], LL16, "U16Imm">; + +def imm64lh16 : Immediate<i64, [{ + return SystemZ::isImmLH(N->getZExtValue()); +}], LH16, "U16Imm">; + +def imm64hl16 : Immediate<i64, [{ + return SystemZ::isImmHL(N->getZExtValue()); +}], HL16, "U16Imm">; + +def imm64hh16 : Immediate<i64, [{ + return SystemZ::isImmHH(N->getZExtValue()); +}], HH16, "U16Imm">; + +// Immediates for 16-bit chunks of an i64, with the other bits of the +// i32 being one. +def imm64ll16c : Immediate<i64, [{ + return SystemZ::isImmLL(uint64_t(~N->getZExtValue())); +}], LL16, "U16Imm">; + +def imm64lh16c : Immediate<i64, [{ + return SystemZ::isImmLH(uint64_t(~N->getZExtValue())); +}], LH16, "U16Imm">; + +def imm64hl16c : Immediate<i64, [{ + return SystemZ::isImmHL(uint64_t(~N->getZExtValue())); +}], HL16, "U16Imm">; + +def imm64hh16c : Immediate<i64, [{ + return SystemZ::isImmHH(uint64_t(~N->getZExtValue())); +}], HH16, "U16Imm">; + +// Immediates for the lower and upper 32 bits of an i64, with the other +// bits of the i32 being zero. +def imm64lf32 : Immediate<i64, [{ + return SystemZ::isImmLF(N->getZExtValue()); +}], LF32, "U32Imm">; + +def imm64hf32 : Immediate<i64, [{ + return SystemZ::isImmHF(N->getZExtValue()); +}], HF32, "U32Imm">; + +// Immediates for the lower and upper 32 bits of an i64, with the other +// bits of the i32 being one. +def imm64lf32c : Immediate<i64, [{ + return SystemZ::isImmLF(uint64_t(~N->getZExtValue())); +}], LF32, "U32Imm">; + +def imm64hf32c : Immediate<i64, [{ + return SystemZ::isImmHF(uint64_t(~N->getZExtValue())); +}], HF32, "U32Imm">; + +// Short immediates. +def imm64sx8 : Immediate<i64, [{ + return isInt<8>(N->getSExtValue()); +}], SIMM8, "S8Imm">; + +def imm64sx16 : Immediate<i64, [{ + return isInt<16>(N->getSExtValue()); +}], SIMM16, "S16Imm">; + +def imm64zx16 : Immediate<i64, [{ + return isUInt<16>(N->getZExtValue()); +}], UIMM16, "U16Imm">; + +def imm64sx32 : Immediate<i64, [{ + return isInt<32>(N->getSExtValue()); +}], SIMM32, "S32Imm">; + +def imm64zx32 : Immediate<i64, [{ + return isUInt<32>(N->getZExtValue()); +}], UIMM32, "U32Imm">; + +def imm64zx32n : Immediate<i64, [{ + return isUInt<32>(-N->getSExtValue()); +}], NEGIMM32, "U32Imm">; + +def imm64 : ImmLeaf<i64, [{}]>; + +//===----------------------------------------------------------------------===// +// Floating-point immediates +//===----------------------------------------------------------------------===// + +// Floating-point zero. +def fpimm0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(+0.0); }]>; + +// Floating point negative zero. +def fpimmneg0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(-0.0); }]>; + +//===----------------------------------------------------------------------===// +// Symbolic address operands +//===----------------------------------------------------------------------===// + +// PC-relative offsets of a basic block. The offset is sign-extended +// and multiplied by 2. +def brtarget16 : Operand<OtherVT> { + let EncoderMethod = "getPC16DBLEncoding"; +} +def brtarget32 : Operand<OtherVT> { + let EncoderMethod = "getPC32DBLEncoding"; +} + +// A PC-relative offset of a global value. The offset is sign-extended +// and multiplied by 2. +def pcrel32 : PCRelAddress<i64, "pcrel32"> { + let EncoderMethod = "getPC32DBLEncoding"; +} + +// A PC-relative offset of a global value when the value is used as a +// call target. The offset is sign-extended and multiplied by 2. +def pcrel16call : PCRelAddress<i64, "pcrel16call"> { + let PrintMethod = "printCallOperand"; + let EncoderMethod = "getPLT16DBLEncoding"; +} +def pcrel32call : PCRelAddress<i64, "pcrel32call"> { + let PrintMethod = "printCallOperand"; + let EncoderMethod = "getPLT32DBLEncoding"; +} + +//===----------------------------------------------------------------------===// +// Addressing modes +//===----------------------------------------------------------------------===// + +// 12-bit displacement operands. +def disp12imm32 : Operand<i32>; +def disp12imm64 : Operand<i64>; + +// 20-bit displacement operands. +def disp20imm32 : Operand<i32>; +def disp20imm64 : Operand<i64>; + +def BDAddr32Disp12 : AddressAsmOperand<"BDAddr", "32", "12">; +def BDAddr32Disp20 : AddressAsmOperand<"BDAddr", "32", "20">; +def BDAddr64Disp12 : AddressAsmOperand<"BDAddr", "64", "12">; +def BDAddr64Disp20 : AddressAsmOperand<"BDAddr", "64", "20">; +def BDXAddr64Disp12 : AddressAsmOperand<"BDXAddr", "64", "12">; +def BDXAddr64Disp20 : AddressAsmOperand<"BDXAddr", "64", "20">; + +// DAG patterns and operands for addressing modes. Each mode has +// the form <type><range><group> where: +// +// <type> is one of: +// shift : base + displacement (32-bit) +// bdaddr : base + displacement +// bdxaddr : base + displacement + index +// laaddr : like bdxaddr, but used for Load Address operations +// dynalloc : base + displacement + index + ADJDYNALLOC +// +// <range> is one of: +// 12 : the displacement is an unsigned 12-bit value +// 20 : the displacement is a signed 20-bit value +// +// <group> is one of: +// pair : used when there is an equivalent instruction with the opposite +// range value (12 or 20) +// only : used when there is no equivalent instruction with the opposite +// range value +def shift12only : BDMode <"BDAddr", "32", "12", "Only">; +def shift20only : BDMode <"BDAddr", "32", "20", "Only">; +def bdaddr12only : BDMode <"BDAddr", "64", "12", "Only">; +def bdaddr12pair : BDMode <"BDAddr", "64", "12", "Pair">; +def bdaddr20only : BDMode <"BDAddr", "64", "20", "Only">; +def bdaddr20pair : BDMode <"BDAddr", "64", "20", "Pair">; +def bdxaddr12only : BDXMode<"BDXAddr", "64", "12", "Only">; +def bdxaddr12pair : BDXMode<"BDXAddr", "64", "12", "Pair">; +def bdxaddr20only : BDXMode<"BDXAddr", "64", "20", "Only">; +def bdxaddr20only128 : BDXMode<"BDXAddr", "64", "20", "Only128">; +def bdxaddr20pair : BDXMode<"BDXAddr", "64", "20", "Pair">; +def dynalloc12only : BDXMode<"DynAlloc", "64", "12", "Only">; +def laaddr12pair : BDXMode<"LAAddr", "64", "12", "Pair">; +def laaddr20pair : BDXMode<"LAAddr", "64", "20", "Pair">; + +//===----------------------------------------------------------------------===// +// Miscellaneous +//===----------------------------------------------------------------------===// + +// Access registers. At present we just use them for accessing the thread +// pointer, so we don't expose them as register to LLVM. +def AccessReg : AsmOperandClass { + let Name = "AccessReg"; + let ParserMethod = "parseAccessReg"; +} +def access_reg : Immediate<i8, [{ return N->getZExtValue() < 16; }], + NOOP_SDNodeXForm, "AccessReg"> { + let ParserMatchClass = AccessReg; +} + +// A 4-bit condition-code mask. +def cond4 : PatLeaf<(i8 imm), [{ return (N->getZExtValue() < 16); }]>, + Operand<i8> { + let PrintMethod = "printCond4Operand"; +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td b/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td new file mode 100644 index 0000000..8c4df56 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td @@ -0,0 +1,196 @@ +//===-- SystemZOperators.td - SystemZ-specific operators ------*- tblgen-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Type profiles +//===----------------------------------------------------------------------===// +def SDT_CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i64>]>; +def SDT_CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i64>, + SDTCisVT<1, i64>]>; +def SDT_ZCall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; +def SDT_ZCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>; +def SDT_ZBRCCMask : SDTypeProfile<0, 2, + [SDTCisVT<0, i8>, + SDTCisVT<1, OtherVT>]>; +def SDT_ZSelectCCMask : SDTypeProfile<1, 3, + [SDTCisSameAs<0, 1>, + SDTCisSameAs<1, 2>, + SDTCisVT<3, i8>]>; +def SDT_ZWrapPtr : SDTypeProfile<1, 1, + [SDTCisSameAs<0, 1>, + SDTCisPtrTy<0>]>; +def SDT_ZAdjDynAlloc : SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>; +def SDT_ZExtractAccess : SDTypeProfile<1, 1, + [SDTCisVT<0, i32>, + SDTCisVT<1, i8>]>; +def SDT_ZGR128Binary32 : SDTypeProfile<1, 2, + [SDTCisVT<0, untyped>, + SDTCisVT<1, untyped>, + SDTCisVT<2, i32>]>; +def SDT_ZGR128Binary64 : SDTypeProfile<1, 2, + [SDTCisVT<0, untyped>, + SDTCisVT<1, untyped>, + SDTCisVT<2, i64>]>; +def SDT_ZAtomicLoadBinaryW : SDTypeProfile<1, 5, + [SDTCisVT<0, i32>, + SDTCisPtrTy<1>, + SDTCisVT<2, i32>, + SDTCisVT<3, i32>, + SDTCisVT<4, i32>, + SDTCisVT<5, i32>]>; +def SDT_ZAtomicCmpSwapW : SDTypeProfile<1, 6, + [SDTCisVT<0, i32>, + SDTCisPtrTy<1>, + SDTCisVT<2, i32>, + SDTCisVT<3, i32>, + SDTCisVT<4, i32>, + SDTCisVT<5, i32>, + SDTCisVT<6, i32>]>; + +//===----------------------------------------------------------------------===// +// Node definitions +//===----------------------------------------------------------------------===// + +// These are target-independent nodes, but have target-specific formats. +def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_CallSeqStart, + [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>; +def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_CallSeqEnd, + [SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, + SDNPOutGlue]>; + +// Nodes for SystemZISD::*. See SystemZISelLowering.h for more details. +def z_retflag : SDNode<"SystemZISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def z_call : SDNode<"SystemZISD::CALL", SDT_ZCall, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, + SDNPVariadic]>; +def z_pcrel_wrapper : SDNode<"SystemZISD::PCREL_WRAPPER", SDT_ZWrapPtr, []>; +def z_cmp : SDNode<"SystemZISD::CMP", SDT_ZCmp, [SDNPOutGlue]>; +def z_ucmp : SDNode<"SystemZISD::UCMP", SDT_ZCmp, [SDNPOutGlue]>; +def z_br_ccmask : SDNode<"SystemZISD::BR_CCMASK", SDT_ZBRCCMask, + [SDNPHasChain, SDNPInGlue]>; +def z_select_ccmask : SDNode<"SystemZISD::SELECT_CCMASK", SDT_ZSelectCCMask, + [SDNPInGlue]>; +def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>; +def z_extract_access : SDNode<"SystemZISD::EXTRACT_ACCESS", + SDT_ZExtractAccess>; +def z_umul_lohi64 : SDNode<"SystemZISD::UMUL_LOHI64", SDT_ZGR128Binary64>; +def z_sdivrem64 : SDNode<"SystemZISD::SDIVREM64", SDT_ZGR128Binary64>; +def z_udivrem32 : SDNode<"SystemZISD::UDIVREM32", SDT_ZGR128Binary32>; +def z_udivrem64 : SDNode<"SystemZISD::UDIVREM64", SDT_ZGR128Binary64>; + +class AtomicWOp<string name, SDTypeProfile profile = SDT_ZAtomicLoadBinaryW> + : SDNode<"SystemZISD::"##name, profile, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; + +def z_atomic_swapw : AtomicWOp<"ATOMIC_SWAPW">; +def z_atomic_loadw_add : AtomicWOp<"ATOMIC_LOADW_ADD">; +def z_atomic_loadw_sub : AtomicWOp<"ATOMIC_LOADW_SUB">; +def z_atomic_loadw_and : AtomicWOp<"ATOMIC_LOADW_AND">; +def z_atomic_loadw_or : AtomicWOp<"ATOMIC_LOADW_OR">; +def z_atomic_loadw_xor : AtomicWOp<"ATOMIC_LOADW_XOR">; +def z_atomic_loadw_nand : AtomicWOp<"ATOMIC_LOADW_NAND">; +def z_atomic_loadw_min : AtomicWOp<"ATOMIC_LOADW_MIN">; +def z_atomic_loadw_max : AtomicWOp<"ATOMIC_LOADW_MAX">; +def z_atomic_loadw_umin : AtomicWOp<"ATOMIC_LOADW_UMIN">; +def z_atomic_loadw_umax : AtomicWOp<"ATOMIC_LOADW_UMAX">; +def z_atomic_cmp_swapw : AtomicWOp<"ATOMIC_CMP_SWAPW", SDT_ZAtomicCmpSwapW>; + +//===----------------------------------------------------------------------===// +// Pattern fragments +//===----------------------------------------------------------------------===// + +// Register sign-extend operations. Sub-32-bit values are represented as i32s. +def sext8 : PatFrag<(ops node:$src), (sext_inreg node:$src, i8)>; +def sext16 : PatFrag<(ops node:$src), (sext_inreg node:$src, i16)>; +def sext32 : PatFrag<(ops node:$src), (sext (i32 node:$src))>; + +// Register zero-extend operations. Sub-32-bit values are represented as i32s. +def zext8 : PatFrag<(ops node:$src), (and node:$src, 0xff)>; +def zext16 : PatFrag<(ops node:$src), (and node:$src, 0xffff)>; +def zext32 : PatFrag<(ops node:$src), (zext (i32 node:$src))>; + +// Typed floating-point loads. +def loadf32 : PatFrag<(ops node:$src), (f32 (load node:$src))>; +def loadf64 : PatFrag<(ops node:$src), (f64 (load node:$src))>; + +// Aligned loads. +class AlignedLoad<SDPatternOperator load> + : PatFrag<(ops node:$addr), (load node:$addr), [{ + LoadSDNode *Load = cast<LoadSDNode>(N); + return Load->getAlignment() >= Load->getMemoryVT().getStoreSize(); +}]>; +def aligned_load : AlignedLoad<load>; +def aligned_sextloadi16 : AlignedLoad<sextloadi16>; +def aligned_sextloadi32 : AlignedLoad<sextloadi32>; +def aligned_zextloadi16 : AlignedLoad<zextloadi16>; +def aligned_zextloadi32 : AlignedLoad<zextloadi32>; + +// Aligned stores. +class AlignedStore<SDPatternOperator store> + : PatFrag<(ops node:$src, node:$addr), (store node:$src, node:$addr), [{ + StoreSDNode *Store = cast<StoreSDNode>(N); + return Store->getAlignment() >= Store->getMemoryVT().getStoreSize(); +}]>; +def aligned_store : AlignedStore<store>; +def aligned_truncstorei16 : AlignedStore<truncstorei16>; +def aligned_truncstorei32 : AlignedStore<truncstorei32>; + +// Insertions. +def inserti8 : PatFrag<(ops node:$src1, node:$src2), + (or (and node:$src1, -256), node:$src2)>; +def insertll : PatFrag<(ops node:$src1, node:$src2), + (or (and node:$src1, 0xffffffffffff0000), node:$src2)>; +def insertlh : PatFrag<(ops node:$src1, node:$src2), + (or (and node:$src1, 0xffffffff0000ffff), node:$src2)>; +def inserthl : PatFrag<(ops node:$src1, node:$src2), + (or (and node:$src1, 0xffff0000ffffffff), node:$src2)>; +def inserthh : PatFrag<(ops node:$src1, node:$src2), + (or (and node:$src1, 0x0000ffffffffffff), node:$src2)>; +def insertlf : PatFrag<(ops node:$src1, node:$src2), + (or (and node:$src1, 0xffffffff00000000), node:$src2)>; +def inserthf : PatFrag<(ops node:$src1, node:$src2), + (or (and node:$src1, 0x00000000ffffffff), node:$src2)>; + +// ORs that can be treated as insertions. +def or_as_inserti8 : PatFrag<(ops node:$src1, node:$src2), + (or node:$src1, node:$src2), [{ + unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits(); + return CurDAG->MaskedValueIsZero(N->getOperand(0), + APInt::getLowBitsSet(BitWidth, 8)); +}]>; + +// ORs that can be treated as reversed insertions. +def or_as_revinserti8 : PatFrag<(ops node:$src1, node:$src2), + (or node:$src1, node:$src2), [{ + unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits(); + return CurDAG->MaskedValueIsZero(N->getOperand(1), + APInt::getLowBitsSet(BitWidth, 8)); +}]>; + +// Fused multiply-add and multiply-subtract, but with the order of the +// operands matching SystemZ's MA and MS instructions. +def z_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (fma node:$src2, node:$src3, node:$src1)>; +def z_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (fma node:$src2, node:$src3, (fneg node:$src1))>; + +// Floating-point negative absolute. +def fnabs : PatFrag<(ops node:$ptr), (fneg (fabs node:$ptr))>; + +// Create a unary operator that loads from memory and then performs +// the given operation on it. +class loadu<SDPatternOperator operator> + : PatFrag<(ops node:$addr), (operator (load node:$addr))>; + +// Create a store operator that performs the given unary operation +// on the value before storing it. +class storeu<SDPatternOperator operator> + : PatFrag<(ops node:$value, node:$addr), + (store (operator node:$value), node:$addr)>; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td b/contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td new file mode 100644 index 0000000..3689f74 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td @@ -0,0 +1,71 @@ +//===-- SystemZPatterns.td - SystemZ-specific pattern rules ---*- tblgen-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// Record that INSN performs a 64-bit version of unary operator OPERATOR +// in which the operand is sign-extended from 32 to 64 bits. +multiclass SXU<SDPatternOperator operator, Instruction insn> { + def : Pat<(operator (sext (i32 GR32:$src))), + (insn GR32:$src)>; + def : Pat<(operator (sext_inreg GR64:$src, i32)), + (insn (EXTRACT_SUBREG GR64:$src, subreg_32bit))>; +} + +// Record that INSN performs a 64-bit version of binary operator OPERATOR +// in which the first operand has class CLS and which the second operand +// is sign-extended from a 32-bit register. +multiclass SXB<SDPatternOperator operator, RegisterOperand cls, + Instruction insn> { + def : Pat<(operator cls:$src1, (sext GR32:$src2)), + (insn cls:$src1, GR32:$src2)>; + def : Pat<(operator cls:$src1, (sext_inreg GR64:$src2, i32)), + (insn cls:$src1, (EXTRACT_SUBREG GR64:$src2, subreg_32bit))>; +} + +// Like SXB, but for zero extension. +multiclass ZXB<SDPatternOperator operator, RegisterOperand cls, + Instruction insn> { + def : Pat<(operator cls:$src1, (zext GR32:$src2)), + (insn cls:$src1, GR32:$src2)>; + def : Pat<(operator cls:$src1, (and GR64:$src2, 0xffffffff)), + (insn cls:$src1, (EXTRACT_SUBREG GR64:$src2, subreg_32bit))>; +} + +// Record that INSN performs a binary read-modify-write operation, +// with LOAD, OPERATOR and STORE being the read, modify and write +// respectively. MODE is the addressing mode and IMM is the type +// of the second operand. +class RMWI<SDPatternOperator load, SDPatternOperator operator, + SDPatternOperator store, AddressingMode mode, + PatFrag imm, Instruction insn> + : Pat<(store (operator (load mode:$addr), imm:$src), mode:$addr), + (insn mode:$addr, (UIMM8 imm:$src))>; + +// Record that INSN performs binary operation OPERATION on a byte +// memory location. IMM is the type of the second operand. +multiclass RMWIByte<SDPatternOperator operator, AddressingMode mode, + Instruction insn> { + def : RMWI<zextloadi8, operator, truncstorei8, mode, imm32, insn>; + def : RMWI<zextloadi8, operator, truncstorei8, mode, imm64, insn>; + def : RMWI<sextloadi8, operator, truncstorei8, mode, imm32, insn>; + def : RMWI<sextloadi8, operator, truncstorei8, mode, imm64, insn>; + def : RMWI<extloadi8, operator, truncstorei8, mode, imm32, insn>; + def : RMWI<extloadi8, operator, truncstorei8, mode, imm64, insn>; +} + +// Record that INSN performs insertion TYPE into a register of class CLS. +// The inserted operand is loaded using LOAD from an address of mode MODE. +multiclass InsertMem<string type, Instruction insn, RegisterOperand cls, + SDPatternOperator load, AddressingMode mode> { + def : Pat<(!cast<SDPatternOperator>("or_as_"##type) + cls:$src1, (load mode:$src2)), + (insn cls:$src1, mode:$src2)>; + def : Pat<(!cast<SDPatternOperator>("or_as_rev"##type) + (load mode:$src2), cls:$src1), + (insn cls:$src1, mode:$src2)>; +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp new file mode 100644 index 0000000..a0ae7ed --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -0,0 +1,162 @@ +//===-- SystemZRegisterInfo.cpp - SystemZ register information ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SystemZRegisterInfo.h" +#include "SystemZTargetMachine.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +#define GET_REGINFO_TARGET_DESC +#include "SystemZGenRegisterInfo.inc" + +using namespace llvm; + +SystemZRegisterInfo::SystemZRegisterInfo(SystemZTargetMachine &tm, + const SystemZInstrInfo &tii) + : SystemZGenRegisterInfo(SystemZ::R14D), TM(tm), TII(tii) {} + +const uint16_t* +SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + static const uint16_t CalleeSavedRegs[] = { + SystemZ::R6D, SystemZ::R7D, SystemZ::R8D, SystemZ::R9D, + SystemZ::R10D, SystemZ::R11D, SystemZ::R12D, SystemZ::R13D, + SystemZ::R14D, SystemZ::R15D, + SystemZ::F8D, SystemZ::F9D, SystemZ::F10D, SystemZ::F11D, + SystemZ::F12D, SystemZ::F13D, SystemZ::F14D, SystemZ::F15D, + 0 + }; + + return CalleeSavedRegs; +} + +BitVector +SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + if (TFI->hasFP(MF)) { + // R11D is the frame pointer. Reserve all aliases. + Reserved.set(SystemZ::R11D); + Reserved.set(SystemZ::R11W); + Reserved.set(SystemZ::R10Q); + } + + // R15D is the stack pointer. Reserve all aliases. + Reserved.set(SystemZ::R15D); + Reserved.set(SystemZ::R15W); + Reserved.set(SystemZ::R14Q); + return Reserved; +} + +bool +SystemZRegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, + MachineBasicBlock::iterator SaveMBBI, + MachineBasicBlock::iterator &UseMBBI, + const TargetRegisterClass *RC, + unsigned Reg) const { + MachineFunction &MF = *MBB.getParent(); + const SystemZFrameLowering *TFI = + static_cast<const SystemZFrameLowering *>(TM.getFrameLowering()); + unsigned Base = getFrameRegister(MF); + uint64_t Offset = TFI->getEmergencySpillSlotOffset(MF); + DebugLoc DL; + + unsigned LoadOpcode, StoreOpcode; + TII.getLoadStoreOpcodes(RC, LoadOpcode, StoreOpcode); + + // The offset must always be in range of a 12-bit unsigned displacement. + BuildMI(MBB, SaveMBBI, DL, TII.get(StoreOpcode)) + .addReg(Reg, RegState::Kill).addReg(Base).addImm(Offset).addReg(0); + BuildMI(MBB, UseMBBI, DL, TII.get(LoadOpcode), Reg) + .addReg(Base).addImm(Offset).addReg(0); + return true; +} + +void +SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { + assert(SPAdj == 0 && "Outgoing arguments should be part of the frame"); + + MachineBasicBlock &MBB = *MI->getParent(); + MachineFunction &MF = *MBB.getParent(); + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + DebugLoc DL = MI->getDebugLoc(); + + // Decompose the frame index into a base and offset. + int FrameIndex = MI->getOperand(FIOperandNum).getIndex(); + unsigned BasePtr = getFrameRegister(MF); + int64_t Offset = (TFI->getFrameIndexOffset(MF, FrameIndex) + + MI->getOperand(FIOperandNum + 1).getImm()); + + // Special handling of dbg_value instructions. + if (MI->isDebugValue()) { + MI->getOperand(FIOperandNum).ChangeToRegister(BasePtr, /*isDef*/ false); + MI->getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); + return; + } + + // See if the offset is in range, or if an equivalent instruction that + // accepts the offset exists. + unsigned Opcode = MI->getOpcode(); + unsigned OpcodeForOffset = TII.getOpcodeForOffset(Opcode, Offset); + if (OpcodeForOffset) + MI->getOperand(FIOperandNum).ChangeToRegister(BasePtr, false); + else { + // Create an anchor point that is in range. Start at 0xffff so that + // can use LLILH to load the immediate. + int64_t OldOffset = Offset; + int64_t Mask = 0xffff; + do { + Offset = OldOffset & Mask; + OpcodeForOffset = TII.getOpcodeForOffset(Opcode, Offset); + Mask >>= 1; + assert(Mask && "One offset must be OK"); + } while (!OpcodeForOffset); + + unsigned ScratchReg = + MF.getRegInfo().createVirtualRegister(&SystemZ::ADDR64BitRegClass); + int64_t HighOffset = OldOffset - Offset; + + if (MI->getDesc().TSFlags & SystemZII::HasIndex + && MI->getOperand(FIOperandNum + 2).getReg() == 0) { + // Load the offset into the scratch register and use it as an index. + // The scratch register then dies here. + TII.loadImmediate(MBB, MI, ScratchReg, HighOffset); + MI->getOperand(FIOperandNum).ChangeToRegister(BasePtr, false); + MI->getOperand(FIOperandNum + 2).ChangeToRegister(ScratchReg, + false, false, true); + } else { + // Load the anchor address into a scratch register. + unsigned LAOpcode = TII.getOpcodeForOffset(SystemZ::LA, HighOffset); + if (LAOpcode) + BuildMI(MBB, MI, DL, TII.get(LAOpcode),ScratchReg) + .addReg(BasePtr).addImm(HighOffset).addReg(0); + else { + // Load the high offset into the scratch register and use it as + // an index. + TII.loadImmediate(MBB, MI, ScratchReg, HighOffset); + BuildMI(MBB, MI, DL, TII.get(SystemZ::AGR),ScratchReg) + .addReg(ScratchReg, RegState::Kill).addReg(BasePtr); + } + + // Use the scratch register as the base. It then dies here. + MI->getOperand(FIOperandNum).ChangeToRegister(ScratchReg, + false, false, true); + } + } + MI->setDesc(TII.get(OpcodeForOffset)); + MI->getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); +} + +unsigned +SystemZRegisterInfo::getFrameRegister(const MachineFunction &MF) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + return TFI->hasFP(MF) ? SystemZ::R11D : SystemZ::R15D; +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h new file mode 100644 index 0000000..91a70de --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -0,0 +1,70 @@ +//===-- SystemZRegisterInfo.h - SystemZ register information ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef SystemZREGISTERINFO_H +#define SystemZREGISTERINFO_H + +#include "SystemZ.h" +#include "llvm/Target/TargetRegisterInfo.h" + +#define GET_REGINFO_HEADER +#include "SystemZGenRegisterInfo.inc" + +namespace llvm { + +namespace SystemZ { + // Return the subreg to use for referring to the even and odd registers + // in a GR128 pair. Is32Bit says whether we want a GR32 or GR64. + inline unsigned even128(bool Is32bit) { + return Is32bit ? subreg_32bit : subreg_high; + } + inline unsigned odd128(bool Is32bit) { + return Is32bit ? subreg_low32 : subreg_low; + } +} + +class SystemZSubtarget; +class SystemZInstrInfo; + +struct SystemZRegisterInfo : public SystemZGenRegisterInfo { +private: + SystemZTargetMachine &TM; + const SystemZInstrInfo &TII; + +public: + SystemZRegisterInfo(SystemZTargetMachine &tm, const SystemZInstrInfo &tii); + + // Override TargetRegisterInfo.h. + virtual bool requiresRegisterScavenging(const MachineFunction &MF) const + LLVM_OVERRIDE { + return true; + } + virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const + LLVM_OVERRIDE { + return true; + } + virtual const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) + const LLVM_OVERRIDE; + virtual BitVector getReservedRegs(const MachineFunction &MF) + const LLVM_OVERRIDE; + virtual bool saveScavengerRegister(MachineBasicBlock &MBB, + MachineBasicBlock::iterator SaveMBBI, + MachineBasicBlock::iterator &UseMBBI, + const TargetRegisterClass *RC, + unsigned Reg) const LLVM_OVERRIDE; + virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const LLVM_OVERRIDE; + virtual unsigned getFrameRegister(const MachineFunction &MF) const + LLVM_OVERRIDE; +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td new file mode 100644 index 0000000..bd1b563 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td @@ -0,0 +1,150 @@ +//==- SystemZRegisterInfo.td - SystemZ register definitions -*- tablegen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Class definitions. +//===----------------------------------------------------------------------===// + +class SystemZReg<string n> : Register<n> { + let Namespace = "SystemZ"; +} + +class SystemZRegWithSubregs<string n, list<Register> subregs> + : RegisterWithSubRegs<n, subregs> { + let Namespace = "SystemZ"; +} + +let Namespace = "SystemZ" in { +def subreg_32bit : SubRegIndex; // could also be known as "subreg_high32" +def subreg_high : SubRegIndex; +def subreg_low : SubRegIndex; +def subreg_low32 : SubRegIndex<[subreg_low, subreg_32bit]>; +} + +// Define a register class that contains values of type TYPE and an +// associated operand called NAME. SIZE is the size and alignment +// of the registers and REGLIST is the list of individual registers. +multiclass SystemZRegClass<string name, ValueType type, int size, dag regList> { + def AsmOperand : AsmOperandClass { + let Name = name; + let ParserMethod = "parse"##name; + let RenderMethod = "addRegOperands"; + } + def Bit : RegisterClass<"SystemZ", [type], size, regList> { + let Size = size; + } + def "" : RegisterOperand<!cast<RegisterClass>(name##"Bit")> { + let ParserMatchClass = !cast<AsmOperandClass>(name##"AsmOperand"); + } +} + +//===----------------------------------------------------------------------===// +// General-purpose registers +//===----------------------------------------------------------------------===// + +// Lower 32 bits of one of the 16 64-bit general-purpose registers +class GPR32<bits<16> num, string n> : SystemZReg<n> { + let HWEncoding = num; +} + +// One of the 16 64-bit general-purpose registers. +class GPR64<bits<16> num, string n, GPR32 low> + : SystemZRegWithSubregs<n, [low]> { + let HWEncoding = num; + let SubRegIndices = [subreg_32bit]; +} + +// 8 even-odd pairs of GPR64s. +class GPR128<bits<16> num, string n, GPR64 high, GPR64 low> + : SystemZRegWithSubregs<n, [high, low]> { + let HWEncoding = num; + let SubRegIndices = [subreg_high, subreg_low]; +} + +// General-purpose registers +foreach I = 0-15 in { + def R#I#W : GPR32<I, "r"#I>; + def R#I#D : GPR64<I, "r"#I, !cast<GPR32>("R"#I#"W")>, DwarfRegNum<[I]>; +} + +foreach I = [0, 2, 4, 6, 8, 10, 12, 14] in { + def R#I#Q : GPR128<I, "r"#I, !cast<GPR64>("R"#I#"D"), + !cast<GPR64>("R"#!add(I, 1)#"D")>; +} + +/// Allocate the callee-saved R6-R13 backwards. That way they can be saved +/// together with R14 and R15 in one prolog instruction. +defm GR32 : SystemZRegClass<"GR32", i32, 32, (add (sequence "R%uW", 0, 5), + (sequence "R%uW", 15, 6))>; +defm GR64 : SystemZRegClass<"GR64", i64, 64, (add (sequence "R%uD", 0, 5), + (sequence "R%uD", 15, 6))>; + +// The architecture doesn't really have any i128 support, so model the +// register pairs as untyped instead. +defm GR128 : SystemZRegClass<"GR128", untyped, 128, (add R0Q, R2Q, R4Q, + R12Q, R10Q, R8Q, R6Q, + R14Q)>; + +// Base and index registers. Everything except R0, which in an address +// context evaluates as 0. +defm ADDR32 : SystemZRegClass<"ADDR32", i32, 32, (sub GR32Bit, R0W)>; +defm ADDR64 : SystemZRegClass<"ADDR64", i64, 64, (sub GR64Bit, R0D)>; + +// Not used directly, but needs to exist for ADDR32 and ADDR64 subregs +// of a GR128. +defm ADDR128 : SystemZRegClass<"ADDR128", untyped, 128, (sub GR128Bit, R0Q)>; + +//===----------------------------------------------------------------------===// +// Floating-point registers +//===----------------------------------------------------------------------===// + +// Lower 32 bits of one of the 16 64-bit floating-point registers +class FPR32<bits<16> num, string n> : SystemZReg<n> { + let HWEncoding = num; +} + +// One of the 16 64-bit floating-point registers +class FPR64<bits<16> num, string n, FPR32 low> + : SystemZRegWithSubregs<n, [low]> { + let HWEncoding = num; + let SubRegIndices = [subreg_32bit]; +} + +// 8 pairs of FPR64s, with a one-register gap inbetween. +class FPR128<bits<16> num, string n, FPR64 high, FPR64 low> + : SystemZRegWithSubregs<n, [high, low]> { + let HWEncoding = num; + let SubRegIndices = [subreg_high, subreg_low]; +} + +// Floating-point registers +foreach I = 0-15 in { + def F#I#S : FPR32<I, "f"#I>; + def F#I#D : FPR64<I, "f"#I, !cast<FPR32>("F"#I#"S")>, + DwarfRegNum<[!add(I, 16)]>; +} + +foreach I = [0, 1, 4, 5, 8, 9, 12, 13] in { + def F#I#Q : FPR128<I, "f"#I, !cast<FPR64>("F"#I#"D"), + !cast<FPR64>("F"#!add(I, 2)#"D")>; +} + +// There's no store-multiple instruction for FPRs, so we're not fussy +// about the order in which call-saved registers are allocated. +defm FP32 : SystemZRegClass<"FP32", f32, 32, (sequence "F%uS", 0, 15)>; +defm FP64 : SystemZRegClass<"FP64", f64, 64, (sequence "F%uD", 0, 15)>; +defm FP128 : SystemZRegClass<"FP128", f128, 128, (add F0Q, F1Q, F4Q, F5Q, + F8Q, F9Q, F12Q, F13Q)>; + +//===----------------------------------------------------------------------===// +// Other registers +//===----------------------------------------------------------------------===// + +// Status register +def PSW : SystemZReg<"psw">; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp new file mode 100644 index 0000000..cfd3324 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp @@ -0,0 +1,56 @@ +//===-- SystemZSubtarget.cpp - SystemZ subtarget information --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SystemZSubtarget.h" +#include "llvm/IR/GlobalValue.h" + +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "SystemZGenSubtargetInfo.inc" + +using namespace llvm; + +SystemZSubtarget::SystemZSubtarget(const std::string &TT, + const std::string &CPU, + const std::string &FS) + : SystemZGenSubtargetInfo(TT, CPU, FS), TargetTriple(TT) { + std::string CPUName = CPU; + if (CPUName.empty()) + CPUName = "z10"; + + // Parse features string. + ParseSubtargetFeatures(CPUName, FS); +} + +// Return true if GV binds locally under reloc model RM. +static bool bindsLocally(const GlobalValue *GV, Reloc::Model RM) { + // For non-PIC, all symbols bind locally. + if (RM == Reloc::Static) + return true; + + return GV->hasLocalLinkage() || !GV->hasDefaultVisibility(); +} + +bool SystemZSubtarget::isPC32DBLSymbol(const GlobalValue *GV, + Reloc::Model RM, + CodeModel::Model CM) const { + // PC32DBL accesses require the low bit to be clear. Note that a zero + // value selects the default alignment and is therefore OK. + if (GV->getAlignment() == 1) + return false; + + // For the small model, all locally-binding symbols are in range. + if (CM == CodeModel::Small) + return bindsLocally(GV, RM); + + // For Medium and above, assume that the symbol is not within the 4GB range. + // Taking the address of locally-defined text would be OK, but that + // case isn't easy to detect. + return false; +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h new file mode 100644 index 0000000..8d4d450 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h @@ -0,0 +1,48 @@ +//===-- SystemZSubtarget.h - SystemZ subtarget information -----*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the SystemZ specific subclass of TargetSubtargetInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef SYSTEMZSUBTARGET_H +#define SYSTEMZSUBTARGET_H + +#include "llvm/ADT/Triple.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <string> + +#define GET_SUBTARGETINFO_HEADER +#include "SystemZGenSubtargetInfo.inc" + +namespace llvm { +class GlobalValue; +class StringRef; + +class SystemZSubtarget : public SystemZGenSubtargetInfo { +private: + Triple TargetTriple; + +public: + SystemZSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS); + + // Automatically generated by tblgen. + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + + // Return true if GV can be accessed using LARL for reloc model RM + // and code model CM. + bool isPC32DBLSymbol(const GlobalValue *GV, Reloc::Model RM, + CodeModel::Model CM) const; + + bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } +}; +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp new file mode 100644 index 0000000..8c4c456 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -0,0 +1,60 @@ +//===-- SystemZTargetMachine.cpp - Define TargetMachine for SystemZ -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SystemZTargetMachine.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +extern "C" void LLVMInitializeSystemZTarget() { + // Register the target. + RegisterTargetMachine<SystemZTargetMachine> X(TheSystemZTarget); +} + +SystemZTargetMachine::SystemZTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OL) + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + Subtarget(TT, CPU, FS), + // Make sure that global data has at least 16 bits of alignment by default, + // so that we can refer to it using LARL. We don't have any special + // requirements for stack variables though. + DL("E-p:64:64:64-i1:8:16-i8:8:16-i16:16-i32:32-i64:64" + "-f32:32-f64:64-f128:64-a0:8:16-n32:64"), + InstrInfo(*this), TLInfo(*this), TSInfo(*this), + FrameLowering(*this, Subtarget) { +} + +namespace { +/// SystemZ Code Generator Pass Configuration Options. +class SystemZPassConfig : public TargetPassConfig { +public: + SystemZPassConfig(SystemZTargetMachine *TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + SystemZTargetMachine &getSystemZTargetMachine() const { + return getTM<SystemZTargetMachine>(); + } + + virtual bool addInstSelector(); +}; +} // end anonymous namespace + +bool SystemZPassConfig::addInstSelector() { + addPass(createSystemZISelDag(getSystemZTargetMachine(), getOptLevel())); + return false; +} + +TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) { + return new SystemZPassConfig(this, PM); +} diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h new file mode 100644 index 0000000..98614e7 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h @@ -0,0 +1,74 @@ +//==- SystemZTargetMachine.h - Define TargetMachine for SystemZ ---*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the SystemZ specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + + +#ifndef SYSTEMZTARGETMACHINE_H +#define SYSTEMZTARGETMACHINE_H + +#include "SystemZFrameLowering.h" +#include "SystemZISelLowering.h" +#include "SystemZInstrInfo.h" +#include "SystemZRegisterInfo.h" +#include "SystemZSubtarget.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class SystemZTargetMachine : public LLVMTargetMachine { + SystemZSubtarget Subtarget; + const DataLayout DL; + SystemZInstrInfo InstrInfo; + SystemZTargetLowering TLInfo; + TargetSelectionDAGInfo TSInfo; + SystemZFrameLowering FrameLowering; + +public: + SystemZTargetMachine(const Target &T, StringRef TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); + + // Override TargetMachine. + virtual const TargetFrameLowering *getFrameLowering() const LLVM_OVERRIDE { + return &FrameLowering; + } + virtual const SystemZInstrInfo *getInstrInfo() const LLVM_OVERRIDE { + return &InstrInfo; + } + virtual const SystemZSubtarget *getSubtargetImpl() const LLVM_OVERRIDE { + return &Subtarget; + } + virtual const DataLayout *getDataLayout() const LLVM_OVERRIDE { + return &DL; + } + virtual const SystemZRegisterInfo *getRegisterInfo() const LLVM_OVERRIDE { + return &InstrInfo.getRegisterInfo(); + } + virtual const SystemZTargetLowering *getTargetLowering() const LLVM_OVERRIDE { + return &TLInfo; + } + virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const + LLVM_OVERRIDE { + return &TSInfo; + } + + // Override LLVMTargetMachine + virtual TargetPassConfig *createPassConfig(PassManagerBase &PM) LLVM_OVERRIDE; +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp b/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp new file mode 100644 index 0000000..8f9aa28 --- /dev/null +++ b/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp @@ -0,0 +1,20 @@ +//===-- SystemZTargetInfo.cpp - SystemZ target implementation -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SystemZ.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +Target llvm::TheSystemZTarget; + +extern "C" void LLVMInitializeSystemZTargetInfo() { + RegisterTarget<Triple::systemz, /*HasJIT=*/true> + X(TheSystemZTarget, "systemz", "SystemZ"); +} diff --git a/contrib/llvm/lib/Target/Target.cpp b/contrib/llvm/lib/Target/Target.cpp index 9a78ebc..3d92f29 100644 --- a/contrib/llvm/lib/Target/Target.cpp +++ b/contrib/llvm/lib/Target/Target.cpp @@ -16,6 +16,7 @@ #include "llvm-c/Initialization.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" #include "llvm/PassManager.h" #include "llvm/Target/TargetLibraryInfo.h" @@ -23,6 +24,23 @@ using namespace llvm; +inline DataLayout *unwrap(LLVMTargetDataRef P) { + return reinterpret_cast<DataLayout*>(P); +} + +inline LLVMTargetDataRef wrap(const DataLayout *P) { + return reinterpret_cast<LLVMTargetDataRef>(const_cast<DataLayout*>(P)); +} + +inline TargetLibraryInfo *unwrap(LLVMTargetLibraryInfoRef P) { + return reinterpret_cast<TargetLibraryInfo*>(P); +} + +inline LLVMTargetLibraryInfoRef wrap(const TargetLibraryInfo *P) { + TargetLibraryInfo *X = const_cast<TargetLibraryInfo*>(P); + return reinterpret_cast<LLVMTargetLibraryInfoRef>(X); +} + void llvm::initializeTarget(PassRegistry &Registry) { initializeDataLayoutPass(Registry); initializeTargetLibraryInfoPass(Registry); diff --git a/contrib/llvm/lib/Target/TargetMachineC.cpp b/contrib/llvm/lib/Target/TargetMachineC.cpp index 79f74bd..01d12e8 100644 --- a/contrib/llvm/lib/Target/TargetMachineC.cpp +++ b/contrib/llvm/lib/Target/TargetMachineC.cpp @@ -28,7 +28,36 @@ using namespace llvm; +inline DataLayout *unwrap(LLVMTargetDataRef P) { + return reinterpret_cast<DataLayout*>(P); +} + +inline LLVMTargetDataRef wrap(const DataLayout *P) { + return reinterpret_cast<LLVMTargetDataRef>(const_cast<DataLayout*>(P)); +} + +inline TargetLibraryInfo *unwrap(LLVMTargetLibraryInfoRef P) { + return reinterpret_cast<TargetLibraryInfo*>(P); +} + +inline LLVMTargetLibraryInfoRef wrap(const TargetLibraryInfo *P) { + TargetLibraryInfo *X = const_cast<TargetLibraryInfo*>(P); + return reinterpret_cast<LLVMTargetLibraryInfoRef>(X); +} +inline TargetMachine *unwrap(LLVMTargetMachineRef P) { + return reinterpret_cast<TargetMachine*>(P); +} +inline Target *unwrap(LLVMTargetRef P) { + return reinterpret_cast<Target*>(P); +} +inline LLVMTargetMachineRef wrap(const TargetMachine *P) { + return + reinterpret_cast<LLVMTargetMachineRef>(const_cast<TargetMachine*>(P)); +} +inline LLVMTargetRef wrap(const Target * P) { + return reinterpret_cast<LLVMTargetRef>(const_cast<Target*>(P)); +} LLVMTargetRef LLVMGetFirstTarget() { const Target* target = &*TargetRegistry::begin(); @@ -77,29 +106,9 @@ LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T, char* Triple, break; } - CodeModel::Model CM; - switch (CodeModel) { - case LLVMCodeModelJITDefault: - CM = CodeModel::JITDefault; - break; - case LLVMCodeModelSmall: - CM = CodeModel::Small; - break; - case LLVMCodeModelKernel: - CM = CodeModel::Kernel; - break; - case LLVMCodeModelMedium: - CM = CodeModel::Medium; - break; - case LLVMCodeModelLarge: - CM = CodeModel::Large; - break; - default: - CM = CodeModel::Default; - break; - } - CodeGenOpt::Level OL; + CodeModel::Model CM = unwrap(CodeModel); + CodeGenOpt::Level OL; switch (Level) { case LLVMCodeGenLevelNone: OL = CodeGenOpt::None; @@ -149,8 +158,8 @@ LLVMTargetDataRef LLVMGetTargetMachineData(LLVMTargetMachineRef T) { return wrap(unwrap(T)->getDataLayout()); } -LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M, - char* Filename, LLVMCodeGenFileType codegen, char** ErrorMessage) { +static LLVMBool LLVMTargetMachineEmit(LLVMTargetMachineRef T, LLVMModuleRef M, + formatted_raw_ostream &OS, LLVMCodeGenFileType codegen, char **ErrorMessage) { TargetMachine* TM = unwrap(T); Module* Mod = unwrap(M); @@ -176,14 +185,7 @@ LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M, ft = TargetMachine::CGFT_ObjectFile; break; } - raw_fd_ostream dest(Filename, error, raw_fd_ostream::F_Binary); - formatted_raw_ostream destf(dest); - if (!error.empty()) { - *ErrorMessage = strdup(error.c_str()); - return true; - } - - if (TM->addPassesToEmitFile(pass, destf, ft)) { + if (TM->addPassesToEmitFile(pass, OS, ft)) { error = "TargetMachine can't emit a file of this type"; *ErrorMessage = strdup(error.c_str()); return true; @@ -191,7 +193,35 @@ LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M, pass.run(*Mod); - destf.flush(); - dest.flush(); + OS.flush(); return false; } + +LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M, + char* Filename, LLVMCodeGenFileType codegen, char** ErrorMessage) { + std::string error; + raw_fd_ostream dest(Filename, error, raw_fd_ostream::F_Binary); + formatted_raw_ostream destf(dest); + if (!error.empty()) { + *ErrorMessage = strdup(error.c_str()); + return true; + } + bool Result = LLVMTargetMachineEmit(T, M, destf, codegen, ErrorMessage); + dest.flush(); + return Result; +} + +LLVMBool LLVMTargetMachineEmitToMemoryBuffer(LLVMTargetMachineRef T, + LLVMModuleRef M, LLVMCodeGenFileType codegen, char** ErrorMessage, + LLVMMemoryBufferRef *OutMemBuf) { + std::string CodeString; + raw_string_ostream OStream(CodeString); + formatted_raw_ostream Out(OStream); + bool Result = LLVMTargetMachineEmit(T, M, Out, codegen, ErrorMessage); + OStream.flush(); + + std::string &Data = OStream.str(); + *OutMemBuf = LLVMCreateMemoryBufferWithMemoryRangeCopy(Data.c_str(), + Data.length(), ""); + return Result; +} diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index e462322..68908ab 100644 --- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -33,17 +33,451 @@ using namespace llvm; namespace { struct X86Operand; +static const char OpPrecedence[] = { + 0, // IC_PLUS + 0, // IC_MINUS + 1, // IC_MULTIPLY + 1, // IC_DIVIDE + 2, // IC_RPAREN + 3, // IC_LPAREN + 0, // IC_IMM + 0 // IC_REGISTER +}; + class X86AsmParser : public MCTargetAsmParser { MCSubtargetInfo &STI; MCAsmParser &Parser; ParseInstructionInfo *InstInfo; private: + enum InfixCalculatorTok { + IC_PLUS = 0, + IC_MINUS, + IC_MULTIPLY, + IC_DIVIDE, + IC_RPAREN, + IC_LPAREN, + IC_IMM, + IC_REGISTER + }; + + class InfixCalculator { + typedef std::pair< InfixCalculatorTok, int64_t > ICToken; + SmallVector<InfixCalculatorTok, 4> InfixOperatorStack; + SmallVector<ICToken, 4> PostfixStack; + + public: + int64_t popOperand() { + assert (!PostfixStack.empty() && "Poped an empty stack!"); + ICToken Op = PostfixStack.pop_back_val(); + assert ((Op.first == IC_IMM || Op.first == IC_REGISTER) + && "Expected and immediate or register!"); + return Op.second; + } + void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) { + assert ((Op == IC_IMM || Op == IC_REGISTER) && + "Unexpected operand!"); + PostfixStack.push_back(std::make_pair(Op, Val)); + } + + void popOperator() { InfixOperatorStack.pop_back_val(); } + void pushOperator(InfixCalculatorTok Op) { + // Push the new operator if the stack is empty. + if (InfixOperatorStack.empty()) { + InfixOperatorStack.push_back(Op); + return; + } + + // Push the new operator if it has a higher precedence than the operator + // on the top of the stack or the operator on the top of the stack is a + // left parentheses. + unsigned Idx = InfixOperatorStack.size() - 1; + InfixCalculatorTok StackOp = InfixOperatorStack[Idx]; + if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) { + InfixOperatorStack.push_back(Op); + return; + } + + // The operator on the top of the stack has higher precedence than the + // new operator. + unsigned ParenCount = 0; + while (1) { + // Nothing to process. + if (InfixOperatorStack.empty()) + break; + + Idx = InfixOperatorStack.size() - 1; + StackOp = InfixOperatorStack[Idx]; + if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount)) + break; + + // If we have an even parentheses count and we see a left parentheses, + // then stop processing. + if (!ParenCount && StackOp == IC_LPAREN) + break; + + if (StackOp == IC_RPAREN) { + ++ParenCount; + InfixOperatorStack.pop_back_val(); + } else if (StackOp == IC_LPAREN) { + --ParenCount; + InfixOperatorStack.pop_back_val(); + } else { + InfixOperatorStack.pop_back_val(); + PostfixStack.push_back(std::make_pair(StackOp, 0)); + } + } + // Push the new operator. + InfixOperatorStack.push_back(Op); + } + int64_t execute() { + // Push any remaining operators onto the postfix stack. + while (!InfixOperatorStack.empty()) { + InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val(); + if (StackOp != IC_LPAREN && StackOp != IC_RPAREN) + PostfixStack.push_back(std::make_pair(StackOp, 0)); + } + + if (PostfixStack.empty()) + return 0; + + SmallVector<ICToken, 16> OperandStack; + for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) { + ICToken Op = PostfixStack[i]; + if (Op.first == IC_IMM || Op.first == IC_REGISTER) { + OperandStack.push_back(Op); + } else { + assert (OperandStack.size() > 1 && "Too few operands."); + int64_t Val; + ICToken Op2 = OperandStack.pop_back_val(); + ICToken Op1 = OperandStack.pop_back_val(); + switch (Op.first) { + default: + report_fatal_error("Unexpected operator!"); + break; + case IC_PLUS: + Val = Op1.second + Op2.second; + OperandStack.push_back(std::make_pair(IC_IMM, Val)); + break; + case IC_MINUS: + Val = Op1.second - Op2.second; + OperandStack.push_back(std::make_pair(IC_IMM, Val)); + break; + case IC_MULTIPLY: + assert (Op1.first == IC_IMM && Op2.first == IC_IMM && + "Multiply operation with an immediate and a register!"); + Val = Op1.second * Op2.second; + OperandStack.push_back(std::make_pair(IC_IMM, Val)); + break; + case IC_DIVIDE: + assert (Op1.first == IC_IMM && Op2.first == IC_IMM && + "Divide operation with an immediate and a register!"); + assert (Op2.second != 0 && "Division by zero!"); + Val = Op1.second / Op2.second; + OperandStack.push_back(std::make_pair(IC_IMM, Val)); + break; + } + } + } + assert (OperandStack.size() == 1 && "Expected a single result."); + return OperandStack.pop_back_val().second; + } + }; + + enum IntelExprState { + IES_PLUS, + IES_MINUS, + IES_MULTIPLY, + IES_DIVIDE, + IES_LBRAC, + IES_RBRAC, + IES_LPAREN, + IES_RPAREN, + IES_REGISTER, + IES_INTEGER, + IES_IDENTIFIER, + IES_ERROR + }; + + class IntelExprStateMachine { + IntelExprState State, PrevState; + unsigned BaseReg, IndexReg, TmpReg, Scale; + int64_t Imm; + const MCExpr *Sym; + StringRef SymName; + bool StopOnLBrac, AddImmPrefix; + InfixCalculator IC; + InlineAsmIdentifierInfo Info; + public: + IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) : + State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0), + Scale(1), Imm(imm), Sym(0), StopOnLBrac(stoponlbrac), + AddImmPrefix(addimmprefix) { Info.clear(); } + + unsigned getBaseReg() { return BaseReg; } + unsigned getIndexReg() { return IndexReg; } + unsigned getScale() { return Scale; } + const MCExpr *getSym() { return Sym; } + StringRef getSymName() { return SymName; } + int64_t getImm() { return Imm + IC.execute(); } + bool isValidEndState() { return State == IES_RBRAC; } + bool getStopOnLBrac() { return StopOnLBrac; } + bool getAddImmPrefix() { return AddImmPrefix; } + bool hadError() { return State == IES_ERROR; } + + InlineAsmIdentifierInfo &getIdentifierInfo() { + return Info; + } + + void onPlus() { + IntelExprState CurrState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_INTEGER: + case IES_RPAREN: + case IES_REGISTER: + State = IES_PLUS; + IC.pushOperator(IC_PLUS); + if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { + // If we already have a BaseReg, then assume this is the IndexReg with + // a scale of 1. + if (!BaseReg) { + BaseReg = TmpReg; + } else { + assert (!IndexReg && "BaseReg/IndexReg already set!"); + IndexReg = TmpReg; + Scale = 1; + } + } + break; + } + PrevState = CurrState; + } + void onMinus() { + IntelExprState CurrState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_PLUS: + case IES_MULTIPLY: + case IES_DIVIDE: + case IES_LPAREN: + case IES_RPAREN: + case IES_LBRAC: + case IES_RBRAC: + case IES_INTEGER: + case IES_REGISTER: + State = IES_MINUS; + // Only push the minus operator if it is not a unary operator. + if (!(CurrState == IES_PLUS || CurrState == IES_MINUS || + CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE || + CurrState == IES_LPAREN || CurrState == IES_LBRAC)) + IC.pushOperator(IC_MINUS); + if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { + // If we already have a BaseReg, then assume this is the IndexReg with + // a scale of 1. + if (!BaseReg) { + BaseReg = TmpReg; + } else { + assert (!IndexReg && "BaseReg/IndexReg already set!"); + IndexReg = TmpReg; + Scale = 1; + } + } + break; + } + PrevState = CurrState; + } + void onRegister(unsigned Reg) { + IntelExprState CurrState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_PLUS: + case IES_LPAREN: + State = IES_REGISTER; + TmpReg = Reg; + IC.pushOperand(IC_REGISTER); + break; + case IES_MULTIPLY: + // Index Register - Scale * Register + if (PrevState == IES_INTEGER) { + assert (!IndexReg && "IndexReg already set!"); + State = IES_REGISTER; + IndexReg = Reg; + // Get the scale and replace the 'Scale * Register' with '0'. + Scale = IC.popOperand(); + IC.pushOperand(IC_IMM); + IC.popOperator(); + } else { + State = IES_ERROR; + } + break; + } + PrevState = CurrState; + } + void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) { + PrevState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_PLUS: + case IES_MINUS: + State = IES_INTEGER; + Sym = SymRef; + SymName = SymRefName; + IC.pushOperand(IC_IMM); + break; + } + } + void onInteger(int64_t TmpInt) { + IntelExprState CurrState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_PLUS: + case IES_MINUS: + case IES_DIVIDE: + case IES_MULTIPLY: + case IES_LPAREN: + State = IES_INTEGER; + if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) { + // Index Register - Register * Scale + assert (!IndexReg && "IndexReg already set!"); + IndexReg = TmpReg; + Scale = TmpInt; + // Get the scale and replace the 'Register * Scale' with '0'. + IC.popOperator(); + } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS || + PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE || + PrevState == IES_LPAREN || PrevState == IES_LBRAC) && + CurrState == IES_MINUS) { + // Unary minus. No need to pop the minus operand because it was never + // pushed. + IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm. + } else { + IC.pushOperand(IC_IMM, TmpInt); + } + break; + } + PrevState = CurrState; + } + void onStar() { + PrevState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_INTEGER: + case IES_REGISTER: + case IES_RPAREN: + State = IES_MULTIPLY; + IC.pushOperator(IC_MULTIPLY); + break; + } + } + void onDivide() { + PrevState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_INTEGER: + case IES_RPAREN: + State = IES_DIVIDE; + IC.pushOperator(IC_DIVIDE); + break; + } + } + void onLBrac() { + PrevState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_RBRAC: + State = IES_PLUS; + IC.pushOperator(IC_PLUS); + break; + } + } + void onRBrac() { + IntelExprState CurrState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_INTEGER: + case IES_REGISTER: + case IES_RPAREN: + State = IES_RBRAC; + if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { + // If we already have a BaseReg, then assume this is the IndexReg with + // a scale of 1. + if (!BaseReg) { + BaseReg = TmpReg; + } else { + assert (!IndexReg && "BaseReg/IndexReg already set!"); + IndexReg = TmpReg; + Scale = 1; + } + } + break; + } + PrevState = CurrState; + } + void onLParen() { + IntelExprState CurrState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_PLUS: + case IES_MINUS: + case IES_MULTIPLY: + case IES_DIVIDE: + case IES_LPAREN: + // FIXME: We don't handle this type of unary minus, yet. + if ((PrevState == IES_PLUS || PrevState == IES_MINUS || + PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE || + PrevState == IES_LPAREN || PrevState == IES_LBRAC) && + CurrState == IES_MINUS) { + State = IES_ERROR; + break; + } + State = IES_LPAREN; + IC.pushOperator(IC_LPAREN); + break; + } + PrevState = CurrState; + } + void onRParen() { + PrevState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_INTEGER: + case IES_REGISTER: + case IES_RPAREN: + State = IES_RPAREN; + IC.pushOperator(IC_RPAREN); + break; + } + } + }; + MCAsmParser &getParser() const { return Parser; } MCAsmLexer &getLexer() const { return Parser.getLexer(); } bool Error(SMLoc L, const Twine &Msg, - ArrayRef<SMRange> Ranges = ArrayRef<SMRange>(), + ArrayRef<SMRange> Ranges = None, bool MatchingInlineAsm = false) { if (MatchingInlineAsm) return true; return Parser.Error(L, Msg, Ranges); @@ -57,21 +491,25 @@ private: X86Operand *ParseOperand(); X86Operand *ParseATTOperand(); X86Operand *ParseIntelOperand(); - X86Operand *ParseIntelOffsetOfOperator(SMLoc StartLoc); - X86Operand *ParseIntelOperator(SMLoc StartLoc, unsigned OpKind); - X86Operand *ParseIntelMemOperand(unsigned SegReg, uint64_t ImmDisp, + X86Operand *ParseIntelOffsetOfOperator(); + X86Operand *ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp); + X86Operand *ParseIntelOperator(unsigned OpKind); + X86Operand *ParseIntelMemOperand(unsigned SegReg, int64_t ImmDisp, SMLoc StartLoc); - X86Operand *ParseIntelBracExpression(unsigned SegReg, uint64_t ImmDisp, - unsigned Size); - X86Operand *ParseIntelVarWithQualifier(const MCExpr *&Disp, - SMLoc &IdentStart); - X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc); + X86Operand *ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End); + X86Operand *ParseIntelBracExpression(unsigned SegReg, SMLoc Start, + int64_t ImmDisp, unsigned Size); + X86Operand *ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier, + InlineAsmIdentifierInfo &Info, + bool IsUnevaluatedOperand, SMLoc &End); - X86Operand *CreateMemForInlineAsm(const MCExpr *Disp, SMLoc Start, SMLoc End, - SMLoc SizeDirLoc, unsigned Size); + X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc); - bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr **NewDisp, - SmallString<64> &Err); + X86Operand *CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, + unsigned BaseReg, unsigned IndexReg, + unsigned Scale, SMLoc Start, SMLoc End, + unsigned Size, StringRef Identifier, + InlineAsmIdentifierInfo &Info); bool ParseDirectiveWord(unsigned Size, SMLoc L); bool ParseDirectiveCode(StringRef IDVal, SMLoc L); @@ -101,6 +539,10 @@ private: setAvailableFeatures(FB); } + bool isParsingIntelSyntax() { + return getParser().getAssemblerDialect(); + } + /// @name Auto-generated Matcher Functions /// { @@ -123,10 +565,6 @@ public: SmallVectorImpl<MCParsedAsmOperand*> &Operands); virtual bool ParseDirective(AsmToken DirectiveID); - - bool isParsingIntelSyntax() { - return getParser().getAssemblerDialect(); - } }; } // end anonymous namespace @@ -176,6 +614,8 @@ struct X86Operand : public MCParsedAsmOperand { SMLoc StartLoc, EndLoc; SMLoc OffsetOfLoc; + StringRef SymName; + void *OpDecl; bool AddressOf; struct TokOp { @@ -210,6 +650,9 @@ struct X86Operand : public MCParsedAsmOperand { X86Operand(KindTy K, SMLoc Start, SMLoc End) : Kind(K), StartLoc(Start), EndLoc(End) {} + StringRef getSymName() { return SymName; } + void *getOpDecl() { return OpDecl; } + /// getStartLoc - Get the location of the first token of this operand. SMLoc getStartLoc() const { return StartLoc; } /// getEndLoc - Get the location of the last token of this operand. @@ -473,11 +916,15 @@ struct X86Operand : public MCParsedAsmOperand { static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc, bool AddressOf = false, - SMLoc OffsetOfLoc = SMLoc()) { + SMLoc OffsetOfLoc = SMLoc(), + StringRef SymName = StringRef(), + void *OpDecl = 0) { X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc); Res->Reg.RegNo = RegNo; Res->AddressOf = AddressOf; Res->OffsetOfLoc = OffsetOfLoc; + Res->SymName = SymName; + Res->OpDecl = OpDecl; return Res; } @@ -489,7 +936,8 @@ struct X86Operand : public MCParsedAsmOperand { /// Create an absolute memory operand. static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc, - unsigned Size = 0) { + unsigned Size = 0, StringRef SymName = StringRef(), + void *OpDecl = 0) { X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); Res->Mem.SegReg = 0; Res->Mem.Disp = Disp; @@ -497,7 +945,9 @@ struct X86Operand : public MCParsedAsmOperand { Res->Mem.IndexReg = 0; Res->Mem.Scale = 1; Res->Mem.Size = Size; - Res->AddressOf = false; + Res->SymName = SymName; + Res->OpDecl = OpDecl; + Res->AddressOf = false; return Res; } @@ -505,7 +955,9 @@ struct X86Operand : public MCParsedAsmOperand { static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg, unsigned Scale, SMLoc StartLoc, SMLoc EndLoc, - unsigned Size = 0) { + unsigned Size = 0, + StringRef SymName = StringRef(), + void *OpDecl = 0) { // We should never just have a displacement, that should be parsed as an // absolute memory operand. assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!"); @@ -520,7 +972,9 @@ struct X86Operand : public MCParsedAsmOperand { Res->Mem.IndexReg = IndexReg; Res->Mem.Scale = Scale; Res->Mem.Size = Size; - Res->AddressOf = false; + Res->SymName = SymName; + Res->OpDecl = OpDecl; + Res->AddressOf = false; return Res; } }; @@ -676,306 +1130,104 @@ static unsigned getIntelMemOperandSize(StringRef OpStr) { return Size; } -enum IntelBracExprState { - IBES_START, - IBES_LBRAC, - IBES_RBRAC, - IBES_REGISTER, - IBES_REGISTER_STAR, - IBES_REGISTER_STAR_INTEGER, - IBES_INTEGER, - IBES_INTEGER_STAR, - IBES_INDEX_REGISTER, - IBES_IDENTIFIER, - IBES_DISP_EXPR, - IBES_MINUS, - IBES_ERROR -}; - -class IntelBracExprStateMachine { - IntelBracExprState State; - unsigned BaseReg, IndexReg, Scale; - int64_t Disp; - - unsigned TmpReg; - int64_t TmpInteger; - - bool isPlus; - -public: - IntelBracExprStateMachine(MCAsmParser &parser, int64_t disp) : - State(IBES_START), BaseReg(0), IndexReg(0), Scale(1), Disp(disp), - TmpReg(0), TmpInteger(0), isPlus(true) {} - - unsigned getBaseReg() { return BaseReg; } - unsigned getIndexReg() { return IndexReg; } - unsigned getScale() { return Scale; } - int64_t getDisp() { return Disp; } - bool isValidEndState() { return State == IBES_RBRAC; } - - void onPlus() { - switch (State) { - default: - State = IBES_ERROR; - break; - case IBES_INTEGER: - State = IBES_START; - if (isPlus) - Disp += TmpInteger; - else - Disp -= TmpInteger; - break; - case IBES_REGISTER: - State = IBES_START; - // If we already have a BaseReg, then assume this is the IndexReg with a - // scale of 1. - if (!BaseReg) { - BaseReg = TmpReg; - } else { - assert (!IndexReg && "BaseReg/IndexReg already set!"); - IndexReg = TmpReg; - Scale = 1; - } - break; - case IBES_INDEX_REGISTER: - State = IBES_START; - break; - } - isPlus = true; - } - void onMinus() { - switch (State) { - default: - State = IBES_ERROR; - break; - case IBES_START: - State = IBES_MINUS; - break; - case IBES_INTEGER: - State = IBES_START; - if (isPlus) - Disp += TmpInteger; - else - Disp -= TmpInteger; - break; - case IBES_REGISTER: - State = IBES_START; - // If we already have a BaseReg, then assume this is the IndexReg with a - // scale of 1. - if (!BaseReg) { - BaseReg = TmpReg; - } else { - assert (!IndexReg && "BaseReg/IndexReg already set!"); - IndexReg = TmpReg; - Scale = 1; - } - break; - case IBES_INDEX_REGISTER: - State = IBES_START; - break; - } - isPlus = false; - } - void onRegister(unsigned Reg) { - switch (State) { - default: - State = IBES_ERROR; - break; - case IBES_START: - State = IBES_REGISTER; - TmpReg = Reg; - break; - case IBES_INTEGER_STAR: - assert (!IndexReg && "IndexReg already set!"); - State = IBES_INDEX_REGISTER; - IndexReg = Reg; - Scale = TmpInteger; - break; - } - } - void onDispExpr() { - switch (State) { - default: - State = IBES_ERROR; - break; - case IBES_START: - State = IBES_DISP_EXPR; - break; +X86Operand * +X86AsmParser::CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, + unsigned BaseReg, unsigned IndexReg, + unsigned Scale, SMLoc Start, SMLoc End, + unsigned Size, StringRef Identifier, + InlineAsmIdentifierInfo &Info){ + if (isa<MCSymbolRefExpr>(Disp)) { + // If this is not a VarDecl then assume it is a FuncDecl or some other label + // reference. We need an 'r' constraint here, so we need to create register + // operand to ensure proper matching. Just pick a GPR based on the size of + // a pointer. + if (!Info.IsVarDecl) { + unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX; + return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true, + SMLoc(), Identifier, Info.OpDecl); } - } - void onInteger(int64_t TmpInt) { - switch (State) { - default: - State = IBES_ERROR; - break; - case IBES_START: - State = IBES_INTEGER; - TmpInteger = TmpInt; - break; - case IBES_MINUS: - State = IBES_INTEGER; - TmpInteger = TmpInt; - break; - case IBES_REGISTER_STAR: - assert (!IndexReg && "IndexReg already set!"); - State = IBES_INDEX_REGISTER; - IndexReg = TmpReg; - Scale = TmpInt; - break; - } - } - void onStar() { - switch (State) { - default: - State = IBES_ERROR; - break; - case IBES_INTEGER: - State = IBES_INTEGER_STAR; - break; - case IBES_REGISTER: - State = IBES_REGISTER_STAR; - break; - } - } - void onLBrac() { - switch (State) { - default: - State = IBES_ERROR; - break; - case IBES_RBRAC: - State = IBES_START; - isPlus = true; - break; - } - } - void onRBrac() { - switch (State) { - default: - State = IBES_ERROR; - break; - case IBES_DISP_EXPR: - State = IBES_RBRAC; - break; - case IBES_INTEGER: - State = IBES_RBRAC; - if (isPlus) - Disp += TmpInteger; - else - Disp -= TmpInteger; - break; - case IBES_REGISTER: - State = IBES_RBRAC; - // If we already have a BaseReg, then assume this is the IndexReg with a - // scale of 1. - if (!BaseReg) { - BaseReg = TmpReg; - } else { - assert (!IndexReg && "BaseReg/IndexReg already set!"); - IndexReg = TmpReg; - Scale = 1; - } - break; - case IBES_INDEX_REGISTER: - State = IBES_RBRAC; - break; - } - } -}; - -X86Operand *X86AsmParser::CreateMemForInlineAsm(const MCExpr *Disp, SMLoc Start, - SMLoc End, SMLoc SizeDirLoc, - unsigned Size) { - bool NeedSizeDir = false; - bool IsVarDecl = false; - if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) { - const MCSymbol &Sym = SymRef->getSymbol(); - // FIXME: The SemaLookup will fail if the name is anything other then an - // identifier. - // FIXME: Pass a valid SMLoc. - unsigned tLength, tSize, tType; - SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tLength, - tSize, tType, IsVarDecl); if (!Size) { - Size = tType * 8; // Size is in terms of bits in this context. - NeedSizeDir = Size > 0; + Size = Info.Type * 8; // Size is in terms of bits in this context. + if (Size) + InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start, + /*Len=*/0, Size)); } } - // If this is not a VarDecl then assume it is a FuncDecl or some other label - // reference. We need an 'r' constraint here, so we need to create register - // operand to ensure proper matching. Just pick a GPR based on the size of - // a pointer. - if (!IsVarDecl) { - unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX; - return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true); - } - - if (NeedSizeDir) - InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, SizeDirLoc, - /*Len*/0, Size)); - // When parsing inline assembly we set the base register to a non-zero value - // as we don't know the actual value at this time. This is necessary to + // if we don't know the actual value at this time. This is necessary to // get the matching correct in some cases. - return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0, - /*Scale*/1, Start, End, Size); + BaseReg = BaseReg ? BaseReg : 1; + return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start, + End, Size, Identifier, Info.OpDecl); } -X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, - uint64_t ImmDisp, - unsigned Size) { - const AsmToken &Tok = Parser.getTok(); - SMLoc Start = Tok.getLoc(), End = Tok.getEndLoc(); - - // Eat '[' - if (getLexer().isNot(AsmToken::LBrac)) - return ErrorOperand(Start, "Expected '[' token!"); - Parser.Lex(); - - unsigned TmpReg = 0; - - // Try to handle '[' 'Symbol' ']' - if (getLexer().is(AsmToken::Identifier)) { - if (ParseRegister(TmpReg, Start, End)) { - const MCExpr *Disp; - SMLoc IdentStart = Tok.getLoc(); - if (getParser().parseExpression(Disp, End)) - return 0; - - if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, IdentStart)) - return Err; - - if (getLexer().isNot(AsmToken::RBrac)) - return ErrorOperand(Parser.getTok().getLoc(), "Expected ']' token!"); - - // FIXME: We don't handle 'ImmDisp' '[' 'Symbol' ']'. - if (ImmDisp) - return ErrorOperand(Start, "Unsupported immediate displacement!"); - - // Adjust the EndLoc due to the ']'. - End = SMLoc::getFromPointer(Parser.getTok().getEndLoc().getPointer()-1); - Parser.Lex(); - if (!isParsingInlineAsm()) - return X86Operand::CreateMem(Disp, Start, End, Size); - - // We want the size directive before the '['. - SMLoc SizeDirLoc = SMLoc::getFromPointer(Start.getPointer()-1); - return CreateMemForInlineAsm(Disp, Start, End, SizeDirLoc, Size); +static void +RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites, + StringRef SymName, int64_t ImmDisp, + int64_t FinalImmDisp, SMLoc &BracLoc, + SMLoc &StartInBrac, SMLoc &End) { + // Remove the '[' and ']' from the IR string. + AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1)); + AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1)); + + // If ImmDisp is non-zero, then we parsed a displacement before the + // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp]) + // If ImmDisp doesn't match the displacement computed by the state machine + // then we have an additional displacement in the bracketed expression. + if (ImmDisp != FinalImmDisp) { + if (ImmDisp) { + // We have an immediate displacement before the bracketed expression. + // Adjust this to match the final immediate displacement. + bool Found = false; + for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(), + E = AsmRewrites->end(); I != E; ++I) { + if ((*I).Loc.getPointer() > BracLoc.getPointer()) + continue; + if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) { + assert (!Found && "ImmDisp already rewritten."); + (*I).Kind = AOK_Imm; + (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer(); + (*I).Val = FinalImmDisp; + Found = true; + break; + } + } + assert (Found && "Unable to rewrite ImmDisp."); + } else { + // We have a symbolic and an immediate displacement, but no displacement + // before the bracketed expression. Put the immediate displacement + // before the bracketed expression. + AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp)); } } + // Remove all the ImmPrefix rewrites within the brackets. + for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(), + E = AsmRewrites->end(); I != E; ++I) { + if ((*I).Loc.getPointer() < StartInBrac.getPointer()) + continue; + if ((*I).Kind == AOK_ImmPrefix) + (*I).Kind = AOK_Delete; + } + const char *SymLocPtr = SymName.data(); + // Skip everything before the symbol. + if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) { + assert(Len > 0 && "Expected a non-negative length."); + AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len)); + } + // Skip everything after the symbol. + if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) { + SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size()); + assert(Len > 0 && "Expected a non-negative length."); + AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len)); + } +} - // Parse [ BaseReg + Scale*IndexReg + Disp ]. We may have already parsed an - // immediate displacement before the bracketed expression. - bool Done = false; - IntelBracExprStateMachine SM(Parser, ImmDisp); - - // If we parsed a register, then the end loc has already been set and - // the identifier has already been lexed. We also need to update the - // state. - if (TmpReg) - SM.onRegister(TmpReg); +X86Operand * +X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { + const AsmToken &Tok = Parser.getTok(); - const MCExpr *Disp = 0; + bool Done = false; while (!Done) { bool UpdateLocLex = true; @@ -983,6 +1235,10 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, // identifier. Don't try an parse it as a register. if (Tok.getString().startswith(".")) break; + + // If we're parsing an immediate expression, we don't expect a '['. + if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac) + break; switch (getLexer().getKind()) { default: { @@ -992,139 +1248,185 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, } return ErrorOperand(Tok.getLoc(), "Unexpected token!"); } + case AsmToken::EndOfStatement: { + Done = true; + break; + } case AsmToken::Identifier: { - // This could be a register or a displacement expression. - if(!ParseRegister(TmpReg, Start, End)) { + // This could be a register or a symbolic displacement. + unsigned TmpReg; + const MCExpr *Val; + SMLoc IdentLoc = Tok.getLoc(); + StringRef Identifier = Tok.getString(); + if(!ParseRegister(TmpReg, IdentLoc, End)) { SM.onRegister(TmpReg); UpdateLocLex = false; break; - } else if (!getParser().parseExpression(Disp, End)) { - SM.onDispExpr(); + } else { + if (!isParsingInlineAsm()) { + if (getParser().parsePrimaryExpr(Val, End)) + return ErrorOperand(Tok.getLoc(), "Unexpected identifier!"); + } else { + InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo(); + if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info, + /*Unevaluated*/ false, End)) + return Err; + } + SM.onIdentifierExpr(Val, Identifier); UpdateLocLex = false; break; } return ErrorOperand(Tok.getLoc(), "Unexpected identifier!"); } - case AsmToken::Integer: { - int64_t Val = Tok.getIntVal(); - SM.onInteger(Val); + case AsmToken::Integer: + if (isParsingInlineAsm() && SM.getAddImmPrefix()) + InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, + Tok.getLoc())); + SM.onInteger(Tok.getIntVal()); break; - } case AsmToken::Plus: SM.onPlus(); break; case AsmToken::Minus: SM.onMinus(); break; case AsmToken::Star: SM.onStar(); break; + case AsmToken::Slash: SM.onDivide(); break; case AsmToken::LBrac: SM.onLBrac(); break; case AsmToken::RBrac: SM.onRBrac(); break; + case AsmToken::LParen: SM.onLParen(); break; + case AsmToken::RParen: SM.onRParen(); break; } + if (SM.hadError()) + return ErrorOperand(Tok.getLoc(), "Unexpected token!"); + if (!Done && UpdateLocLex) { End = Tok.getLoc(); Parser.Lex(); // Consume the token. } } + return 0; +} - if (!Disp) - Disp = MCConstantExpr::Create(SM.getDisp(), getContext()); +X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start, + int64_t ImmDisp, + unsigned Size) { + const AsmToken &Tok = Parser.getTok(); + SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc(); + if (getLexer().isNot(AsmToken::LBrac)) + return ErrorOperand(BracLoc, "Expected '[' token!"); + Parser.Lex(); // Eat '[' + + SMLoc StartInBrac = Tok.getLoc(); + // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We + // may have already parsed an immediate displacement before the bracketed + // expression. + IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true); + if (X86Operand *Err = ParseIntelExpression(SM, End)) + return Err; + + const MCExpr *Disp; + if (const MCExpr *Sym = SM.getSym()) { + // A symbolic displacement. + Disp = Sym; + if (isParsingInlineAsm()) + RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(), + ImmDisp, SM.getImm(), BracLoc, StartInBrac, + End); + } else { + // An immediate displacement only. + Disp = MCConstantExpr::Create(SM.getImm(), getContext()); + } // Parse the dot operator (e.g., [ebx].foo.bar). if (Tok.getString().startswith(".")) { - SmallString<64> Err; const MCExpr *NewDisp; - if (ParseIntelDotOperator(Disp, &NewDisp, Err)) - return ErrorOperand(Tok.getLoc(), Err); + if (X86Operand *Err = ParseIntelDotOperator(Disp, NewDisp)) + return Err; - End = Parser.getTok().getEndLoc(); + End = Tok.getEndLoc(); Parser.Lex(); // Eat the field. Disp = NewDisp; } int BaseReg = SM.getBaseReg(); int IndexReg = SM.getIndexReg(); - - // handle [-42] - if (!BaseReg && !IndexReg) { - if (!SegReg) - return X86Operand::CreateMem(Disp, Start, End); - else - return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size); + int Scale = SM.getScale(); + if (!isParsingInlineAsm()) { + // handle [-42] + if (!BaseReg && !IndexReg) { + if (!SegReg) + return X86Operand::CreateMem(Disp, Start, End, Size); + else + return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size); + } + return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start, + End, Size); } - int Scale = SM.getScale(); - return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, - Start, End, Size); + InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo(); + return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start, + End, Size, SM.getSymName(), Info); } // Inline assembly may use variable names with namespace alias qualifiers. -X86Operand *X86AsmParser::ParseIntelVarWithQualifier(const MCExpr *&Disp, - SMLoc &IdentStart) { - // We should only see Foo::Bar if we're parsing inline assembly. - if (!isParsingInlineAsm()) - return 0; - - // If we don't see a ':' then there can't be a qualifier. - if (getLexer().isNot(AsmToken::Colon)) - return 0; +X86Operand *X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val, + StringRef &Identifier, + InlineAsmIdentifierInfo &Info, + bool IsUnevaluatedOperand, + SMLoc &End) { + assert (isParsingInlineAsm() && "Expected to be parsing inline assembly."); + Val = 0; + StringRef LineBuf(Identifier.data()); + SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand); - bool Done = false; const AsmToken &Tok = Parser.getTok(); - SMLoc IdentEnd = Tok.getEndLoc(); - while (!Done) { - switch (getLexer().getKind()) { - default: - Done = true; - break; - case AsmToken::Colon: - getLexer().Lex(); // Consume ':'. - if (getLexer().isNot(AsmToken::Colon)) - return ErrorOperand(Tok.getLoc(), "Expected ':' token!"); - getLexer().Lex(); // Consume second ':'. - if (getLexer().isNot(AsmToken::Identifier)) - return ErrorOperand(Tok.getLoc(), "Expected an identifier token!"); - break; - case AsmToken::Identifier: - IdentEnd = Tok.getEndLoc(); - getLexer().Lex(); // Consume the identifier. - break; - } + + // Advance the token stream until the end of the current token is + // after the end of what the frontend claimed. + const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size(); + while (true) { + End = Tok.getEndLoc(); + getLexer().Lex(); + + assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?"); + if (End.getPointer() == EndPtr) break; } - size_t Len = IdentEnd.getPointer() - IdentStart.getPointer(); - StringRef Identifier(IdentStart.getPointer(), Len); + + // Create the symbol reference. + Identifier = LineBuf; MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier); MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; - Disp = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext()); + Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext()); return 0; } /// ParseIntelMemOperand - Parse intel style memory operand. X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, - uint64_t ImmDisp, + int64_t ImmDisp, SMLoc Start) { const AsmToken &Tok = Parser.getTok(); SMLoc End; unsigned Size = getIntelMemOperandSize(Tok.getString()); if (Size) { - Parser.Lex(); - assert ((Tok.getString() == "PTR" || Tok.getString() == "ptr") && - "Unexpected token!"); - Parser.Lex(); + Parser.Lex(); // Eat operand size (e.g., byte, word). + if (Tok.getString() != "PTR" && Tok.getString() != "ptr") + return ErrorOperand(Start, "Expected 'PTR' or 'ptr' token!"); + Parser.Lex(); // Eat ptr. } // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ]. if (getLexer().is(AsmToken::Integer)) { - const AsmToken &IntTok = Parser.getTok(); if (isParsingInlineAsm()) InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, - IntTok.getLoc())); - uint64_t ImmDisp = IntTok.getIntVal(); + Tok.getLoc())); + int64_t ImmDisp = Tok.getIntVal(); Parser.Lex(); // Eat the integer. if (getLexer().isNot(AsmToken::LBrac)) return ErrorOperand(Start, "Expected '[' token!"); - return ParseIntelBracExpression(SegReg, ImmDisp, Size); + return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size); } if (getLexer().is(AsmToken::LBrac)) - return ParseIntelBracExpression(SegReg, ImmDisp, Size); + return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size); if (!ParseRegister(SegReg, Start, End)) { // Handel SegReg : [ ... ] @@ -1133,37 +1435,37 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, Parser.Lex(); // Eat : if (getLexer().isNot(AsmToken::LBrac)) return ErrorOperand(Start, "Expected '[' token!"); - return ParseIntelBracExpression(SegReg, ImmDisp, Size); + return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size); } - const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); - SMLoc IdentStart = Tok.getLoc(); - if (getParser().parseExpression(Disp, End)) - return 0; + const MCExpr *Val; + if (!isParsingInlineAsm()) { + if (getParser().parsePrimaryExpr(Val, End)) + return ErrorOperand(Tok.getLoc(), "Unexpected token!"); - if (!isParsingInlineAsm()) - return X86Operand::CreateMem(Disp, Start, End, Size); + return X86Operand::CreateMem(Val, Start, End, Size); + } - if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, IdentStart)) + InlineAsmIdentifierInfo Info; + StringRef Identifier = Tok.getString(); + if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info, + /*Unevaluated*/ false, End)) return Err; - - return CreateMemForInlineAsm(Disp, Start, End, Start, Size); + return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0, + /*Scale=*/1, Start, End, Size, Identifier, Info); } /// Parse the '.' operator. -bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp, - const MCExpr **NewDisp, - SmallString<64> &Err) { - AsmToken Tok = *&Parser.getTok(); - uint64_t OrigDispVal, DotDispVal; +X86Operand *X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp, + const MCExpr *&NewDisp) { + const AsmToken &Tok = Parser.getTok(); + int64_t OrigDispVal, DotDispVal; // FIXME: Handle non-constant expressions. - if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp)) { + if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp)) OrigDispVal = OrigDisp->getValue(); - } else { - Err = "Non-constant offsets are not supported!"; - return true; - } + else + return ErrorOperand(Tok.getLoc(), "Non-constant offsets are not supported!"); // Drop the '.'. StringRef DotDispStr = Tok.getString().drop_front(1); @@ -1173,23 +1475,15 @@ bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp, APInt DotDisp; DotDispStr.getAsInteger(10, DotDisp); DotDispVal = DotDisp.getZExtValue(); - } else if (Tok.is(AsmToken::Identifier)) { - // We should only see an identifier when parsing the original inline asm. - // The front-end should rewrite this in terms of immediates. - assert (isParsingInlineAsm() && "Unexpected field name!"); - + } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) { unsigned DotDisp; std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.'); if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second, - DotDisp)) { - Err = "Unable to lookup field reference!"; - return true; - } + DotDisp)) + return ErrorOperand(Tok.getLoc(), "Unable to lookup field reference!"); DotDispVal = DotDisp; - } else { - Err = "Unexpected token type!"; - return true; - } + } else + return ErrorOperand(Tok.getLoc(), "Unexpected token type!"); if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) { SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data()); @@ -1199,22 +1493,24 @@ bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp, Val)); } - *NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext()); - return false; + NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext()); + return 0; } /// Parse the 'offset' operator. This operator is used to specify the /// location rather then the content of a variable. -X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) { - SMLoc OffsetOfLoc = Start; +X86Operand *X86AsmParser::ParseIntelOffsetOfOperator() { + const AsmToken &Tok = Parser.getTok(); + SMLoc OffsetOfLoc = Tok.getLoc(); Parser.Lex(); // Eat offset. - Start = Parser.getTok().getLoc(); - assert (Parser.getTok().is(AsmToken::Identifier) && "Expected an identifier"); - SMLoc End; const MCExpr *Val; - if (getParser().parseExpression(Val, End)) - return ErrorOperand(Start, "Unable to parse expression!"); + InlineAsmIdentifierInfo Info; + SMLoc Start = Tok.getLoc(), End; + StringRef Identifier = Tok.getString(); + if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info, + /*Unevaluated*/ false, End)) + return Err; // Don't emit the offset operator. InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7)); @@ -1224,7 +1520,7 @@ X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) { // the size of a pointer. unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX; return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true, - OffsetOfLoc); + OffsetOfLoc, Identifier, Info.OpDecl); } enum IntelOperatorKind { @@ -1239,34 +1535,25 @@ enum IntelOperatorKind { /// variable. A variable's size is the product of its LENGTH and TYPE. The /// TYPE operator returns the size of a C or C++ type or variable. If the /// variable is an array, TYPE returns the size of a single element. -X86Operand *X86AsmParser::ParseIntelOperator(SMLoc Start, unsigned OpKind) { - SMLoc TypeLoc = Start; - Parser.Lex(); // Eat offset. - Start = Parser.getTok().getLoc(); - assert (Parser.getTok().is(AsmToken::Identifier) && "Expected an identifier"); - - SMLoc End; - const MCExpr *Val; - if (getParser().parseExpression(Val, End)) - return 0; +X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) { + const AsmToken &Tok = Parser.getTok(); + SMLoc TypeLoc = Tok.getLoc(); + Parser.Lex(); // Eat operator. + + const MCExpr *Val = 0; + InlineAsmIdentifierInfo Info; + SMLoc Start = Tok.getLoc(), End; + StringRef Identifier = Tok.getString(); + if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info, + /*Unevaluated*/ true, End)) + return Err; - unsigned Length = 0, Size = 0, Type = 0; - if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Val)) { - const MCSymbol &Sym = SymRef->getSymbol(); - // FIXME: The SemaLookup will fail if the name is anything other then an - // identifier. - // FIXME: Pass a valid SMLoc. - bool IsVarDecl; - if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Length, - Size, Type, IsVarDecl)) - return ErrorOperand(Start, "Unable to lookup expr!"); - } - unsigned CVal; + unsigned CVal = 0; switch(OpKind) { default: llvm_unreachable("Unexpected operand kind!"); - case IOK_LENGTH: CVal = Length; break; - case IOK_SIZE: CVal = Size; break; - case IOK_TYPE: CVal = Type; break; + case IOK_LENGTH: CVal = Info.Length; break; + case IOK_SIZE: CVal = Info.Size; break; + case IOK_TYPE: CVal = Info.Type; break; } // Rewrite the type operator and the C or C++ type or variable in terms of an @@ -1279,44 +1566,54 @@ X86Operand *X86AsmParser::ParseIntelOperator(SMLoc Start, unsigned OpKind) { } X86Operand *X86AsmParser::ParseIntelOperand() { - SMLoc Start = Parser.getTok().getLoc(), End; - StringRef AsmTokStr = Parser.getTok().getString(); + const AsmToken &Tok = Parser.getTok(); + SMLoc Start = Tok.getLoc(), End; // Offset, length, type and size operators. if (isParsingInlineAsm()) { + StringRef AsmTokStr = Tok.getString(); if (AsmTokStr == "offset" || AsmTokStr == "OFFSET") - return ParseIntelOffsetOfOperator(Start); + return ParseIntelOffsetOfOperator(); if (AsmTokStr == "length" || AsmTokStr == "LENGTH") - return ParseIntelOperator(Start, IOK_LENGTH); + return ParseIntelOperator(IOK_LENGTH); if (AsmTokStr == "size" || AsmTokStr == "SIZE") - return ParseIntelOperator(Start, IOK_SIZE); + return ParseIntelOperator(IOK_SIZE); if (AsmTokStr == "type" || AsmTokStr == "TYPE") - return ParseIntelOperator(Start, IOK_TYPE); + return ParseIntelOperator(IOK_TYPE); } // Immediate. - if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) || - getLexer().is(AsmToken::Minus)) { - const MCExpr *Val; - bool isInteger = getLexer().is(AsmToken::Integer); - if (!getParser().parseExpression(Val, End)) { - if (isParsingInlineAsm()) + if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) || + getLexer().is(AsmToken::LParen)) { + AsmToken StartTok = Tok; + IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true, + /*AddImmPrefix=*/false); + if (X86Operand *Err = ParseIntelExpression(SM, End)) + return Err; + + int64_t Imm = SM.getImm(); + if (isParsingInlineAsm()) { + unsigned Len = Tok.getLoc().getPointer() - Start.getPointer(); + if (StartTok.getString().size() == Len) + // Just add a prefix if this wasn't a complex immediate expression. InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start)); - // Immediate. - if (getLexer().isNot(AsmToken::LBrac)) - return X86Operand::CreateImm(Val, Start, End); - - // Only positive immediates are valid. - if (!isInteger) { - Error(Parser.getTok().getLoc(), "expected a positive immediate " - "displacement before bracketed expr."); - return 0; - } + else + // Otherwise, rewrite the complex expression as a single immediate. + InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm)); + } - // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ]. - if (uint64_t ImmDisp = dyn_cast<MCConstantExpr>(Val)->getValue()) - return ParseIntelMemOperand(/*SegReg=*/0, ImmDisp, Start); + if (getLexer().isNot(AsmToken::LBrac)) { + const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext()); + return X86Operand::CreateImm(ImmExpr, Start, End); } + + // Only positive immediates are valid. + if (Imm < 0) + return ErrorOperand(Start, "expected a positive immediate displacement " + "before bracketed expr."); + + // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ]. + return ParseIntelMemOperand(/*SegReg=*/0, Imm, Start); } // Register. @@ -1907,7 +2204,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, assert(!Operands.empty() && "Unexpect empty operand list!"); X86Operand *Op = static_cast<X86Operand*>(Operands[0]); assert(Op->isToken() && "Leading operand should always be a mnemonic!"); - ArrayRef<SMRange> EmptyRanges = ArrayRef<SMRange>(); + ArrayRef<SMRange> EmptyRanges = None; // First, handle aliases that expand to multiple instructions. // FIXME: This should be replaced with a real .td file alias mechanism. diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index 3669560..d8f7278 100644 --- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -20,6 +20,7 @@ #include "X86MCTargetDesc.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/MC/MCInstrInfo.h" namespace llvm { @@ -41,7 +42,6 @@ namespace X86 { AddrNumOperands = 5 }; } // end namespace X86; - /// X86II - This namespace holds all of the target specific flags that /// instruction info tracks. @@ -274,11 +274,12 @@ namespace X86II { //// MRM_XX - A mod/rm byte of exactly 0xXX. MRM_C1 = 33, MRM_C2 = 34, MRM_C3 = 35, MRM_C4 = 36, - MRM_C8 = 37, MRM_C9 = 38, MRM_E8 = 39, MRM_F0 = 40, - MRM_F8 = 41, MRM_F9 = 42, MRM_D0 = 45, MRM_D1 = 46, - MRM_D4 = 47, MRM_D5 = 48, MRM_D6 = 49, MRM_D8 = 50, - MRM_D9 = 51, MRM_DA = 52, MRM_DB = 53, MRM_DC = 54, - MRM_DD = 55, MRM_DE = 56, MRM_DF = 57, + MRM_C8 = 37, MRM_C9 = 38, MRM_CA = 39, MRM_CB = 40, + MRM_E8 = 41, MRM_F0 = 42, MRM_F8 = 45, MRM_F9 = 46, + MRM_D0 = 47, MRM_D1 = 48, MRM_D4 = 49, MRM_D5 = 50, + MRM_D6 = 51, MRM_D8 = 52, MRM_D9 = 53, MRM_DA = 54, + MRM_DB = 55, MRM_DC = 56, MRM_DD = 57, MRM_DE = 58, + MRM_DF = 59, /// RawFrmImm8 - This is used for the ENTER instruction, which has two /// immediates, the first of which is a 16-bit immediate (specified by @@ -521,6 +522,26 @@ namespace X86II { } } + /// getOperandBias - compute any additional adjustment needed to + /// the offset to the start of the memory operand + /// in this instruction. + /// If this is a two-address instruction,skip one of the register operands. + /// FIXME: This should be handled during MCInst lowering. + inline int getOperandBias(const MCInstrDesc& Desc) + { + unsigned NumOps = Desc.getNumOperands(); + unsigned CurOp = 0; + if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) == 0) + ++CurOp; + else if (NumOps > 3 && Desc.getOperandConstraint(2, MCOI::TIED_TO) == 0) { + assert(Desc.getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1); + // Special case for GATHER with 2 TIED_TO operands + // Skip the first 2 operands: dst, mask_wb + CurOp += 2; + } + return CurOp; + } + /// getMemoryOperandNo - The function returns the MCInst operand # for the /// first field of the memory operand. If the instruction doesn't have a /// memory operand, this returns -1. @@ -576,12 +597,13 @@ namespace X86II { } case X86II::MRM_C1: case X86II::MRM_C2: case X86II::MRM_C3: case X86II::MRM_C4: case X86II::MRM_C8: case X86II::MRM_C9: - case X86II::MRM_E8: case X86II::MRM_F0: case X86II::MRM_F8: - case X86II::MRM_F9: case X86II::MRM_D0: case X86II::MRM_D1: - case X86II::MRM_D4: case X86II::MRM_D5: case X86II::MRM_D6: - case X86II::MRM_D8: case X86II::MRM_D9: case X86II::MRM_DA: - case X86II::MRM_DB: case X86II::MRM_DC: case X86II::MRM_DD: - case X86II::MRM_DE: case X86II::MRM_DF: + case X86II::MRM_CA: case X86II::MRM_CB: case X86II::MRM_E8: + case X86II::MRM_F0: case X86II::MRM_F8: case X86II::MRM_F9: + case X86II::MRM_D0: case X86II::MRM_D1: case X86II::MRM_D4: + case X86II::MRM_D5: case X86II::MRM_D6: case X86II::MRM_D8: + case X86II::MRM_D9: case X86II::MRM_DA: case X86II::MRM_DB: + case X86II::MRM_DC: case X86II::MRM_DD: case X86II::MRM_DE: + case X86II::MRM_DF: return -1; } } diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 776cee1..016af71 100644 --- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -237,6 +237,14 @@ StartsWithGlobalOffsetTable(const MCExpr *Expr) { return GOT_Normal; } +static bool HasSecRelSymbolRef(const MCExpr *Expr) { + if (Expr->getKind() == MCExpr::SymbolRef) { + const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr); + return Ref->getKind() == MCSymbolRefExpr::VK_SECREL; + } + return false; +} + void X86MCCodeEmitter:: EmitImmediate(const MCOperand &DispOp, SMLoc Loc, unsigned Size, MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &OS, @@ -268,8 +276,13 @@ EmitImmediate(const MCOperand &DispOp, SMLoc Loc, unsigned Size, if (Kind == GOT_Normal) ImmOffset = CurByte; } else if (Expr->getKind() == MCExpr::SymbolRef) { - const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr); - if (Ref->getKind() == MCSymbolRefExpr::VK_SECREL) { + if (HasSecRelSymbolRef(Expr)) { + FixupKind = MCFixupKind(FK_SecRel_4); + } + } else if (Expr->getKind() == MCExpr::Binary) { + const MCBinaryExpr *Bin = static_cast<const MCBinaryExpr*>(Expr); + if (HasSecRelSymbolRef(Bin->getLHS()) + || HasSecRelSymbolRef(Bin->getRHS())) { FixupKind = MCFixupKind(FK_SecRel_4); } } @@ -979,18 +992,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, if ((TSFlags & X86II::FormMask) == X86II::Pseudo) return; - // If this is a two-address instruction, skip one of the register operands. - // FIXME: This should be handled during MCInst lowering. unsigned NumOps = Desc.getNumOperands(); - unsigned CurOp = 0; - if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) == 0) - ++CurOp; - else if (NumOps > 3 && Desc.getOperandConstraint(2, MCOI::TIED_TO) == 0) { - assert(Desc.getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1); - // Special case for GATHER with 2 TIED_TO operands - // Skip the first 2 operands: dst, mask_wb - CurOp += 2; - } + unsigned CurOp = X86II::getOperandBias(Desc); // Keep track of the current byte being emitted. unsigned CurByte = 0; @@ -1138,12 +1141,13 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, break; case X86II::MRM_C1: case X86II::MRM_C2: case X86II::MRM_C3: case X86II::MRM_C4: case X86II::MRM_C8: case X86II::MRM_C9: - case X86II::MRM_D0: case X86II::MRM_D1: case X86II::MRM_D4: - case X86II::MRM_D5: case X86II::MRM_D6: case X86II::MRM_D8: - case X86II::MRM_D9: case X86II::MRM_DA: case X86II::MRM_DB: - case X86II::MRM_DC: case X86II::MRM_DD: case X86II::MRM_DE: - case X86II::MRM_DF: case X86II::MRM_E8: case X86II::MRM_F0: - case X86II::MRM_F8: case X86II::MRM_F9: + case X86II::MRM_CA: case X86II::MRM_CB: case X86II::MRM_D0: + case X86II::MRM_D1: case X86II::MRM_D4: case X86II::MRM_D5: + case X86II::MRM_D6: case X86II::MRM_D8: case X86II::MRM_D9: + case X86II::MRM_DA: case X86II::MRM_DB: case X86II::MRM_DC: + case X86II::MRM_DD: case X86II::MRM_DE: case X86II::MRM_DF: + case X86II::MRM_E8: case X86II::MRM_F0: case X86II::MRM_F8: + case X86II::MRM_F9: EmitByte(BaseOpcode, CurByte, OS); unsigned char MRM; @@ -1155,6 +1159,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::MRM_C4: MRM = 0xC4; break; case X86II::MRM_C8: MRM = 0xC8; break; case X86II::MRM_C9: MRM = 0xC9; break; + case X86II::MRM_CA: MRM = 0xCA; break; + case X86II::MRM_CB: MRM = 0xCB; break; case X86II::MRM_D0: MRM = 0xD0; break; case X86II::MRM_D1: MRM = 0xD1; break; case X86II::MRM_D4: MRM = 0xD4; break; diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp index bc272ef..ed64a32 100644 --- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp +++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp @@ -9,6 +9,8 @@ #include "MCTargetDesc/X86FixupKinds.h" #include "MCTargetDesc/X86MCTargetDesc.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCValue.h" #include "llvm/MC/MCWinCOFFObjectWriter.h" #include "llvm/Support/COFF.h" #include "llvm/Support/ErrorHandling.h" @@ -27,7 +29,9 @@ namespace { X86WinCOFFObjectWriter(bool Is64Bit_); ~X86WinCOFFObjectWriter(); - virtual unsigned getRelocType(unsigned FixupKind) const; + virtual unsigned getRelocType(const MCValue &Target, + const MCFixup &Fixup, + bool IsCrossSection) const LLVM_OVERRIDE; }; } @@ -38,7 +42,14 @@ X86WinCOFFObjectWriter::X86WinCOFFObjectWriter(bool Is64Bit_) X86WinCOFFObjectWriter::~X86WinCOFFObjectWriter() {} -unsigned X86WinCOFFObjectWriter::getRelocType(unsigned FixupKind) const { +unsigned X86WinCOFFObjectWriter::getRelocType(const MCValue &Target, + const MCFixup &Fixup, + bool IsCrossSection) const { + unsigned FixupKind = IsCrossSection ? FK_PCRel_4 : Fixup.getKind(); + + MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ? + MCSymbolRefExpr::VK_None : Target.getSymA()->getKind(); + switch (FixupKind) { case FK_PCRel_4: case X86::reloc_riprel_4byte: @@ -46,6 +57,9 @@ unsigned X86WinCOFFObjectWriter::getRelocType(unsigned FixupKind) const { return Is64Bit ? COFF::IMAGE_REL_AMD64_REL32 : COFF::IMAGE_REL_I386_REL32; case FK_Data_4: case X86::reloc_signed_4byte: + if (Modifier == MCSymbolRefExpr::VK_COFF_IMGREL32) + return Is64Bit ? COFF::IMAGE_REL_AMD64_ADDR32NB : + COFF::IMAGE_REL_I386_DIR32NB; return Is64Bit ? COFF::IMAGE_REL_AMD64_ADDR32 : COFF::IMAGE_REL_I386_DIR32; case FK_Data_8: if (Is64Bit) diff --git a/contrib/llvm/lib/Target/X86/X86.h b/contrib/llvm/lib/Target/X86/X86.h index 1f9919f..947002f 100644 --- a/contrib/llvm/lib/Target/X86/X86.h +++ b/contrib/llvm/lib/Target/X86/X86.h @@ -69,6 +69,11 @@ ImmutablePass *createX86TargetTransformInfoPass(const X86TargetMachine *TM); /// createX86PadShortFunctions - Return a pass that pads short functions /// with NOOPs. This will prevent a stall when returning on the Atom. FunctionPass *createX86PadShortFunctions(); +/// createX86FixupLEAs - Return a a pass that selectively replaces +/// certain instructions (like add, sub, inc, dec, some shifts, +/// and some multiplies) by equivalent LEA instructions, in order +/// to eliminate execution delays in some Atom processors. +FunctionPass *createX86FixupLEAs(); } // End llvm namespace diff --git a/contrib/llvm/lib/Target/X86/X86.td b/contrib/llvm/lib/Target/X86/X86.td index 1dcc344..c865500 100644 --- a/contrib/llvm/lib/Target/X86/X86.td +++ b/contrib/llvm/lib/Target/X86/X86.td @@ -139,6 +139,8 @@ def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions", def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect", "CallRegIndirect", "true", "Call register indirect">; +def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true", + "LEA instruction needs inputs at AG stage">; //===----------------------------------------------------------------------===// // X86 processors supported. @@ -188,6 +190,7 @@ def : ProcessorModel<"atom", AtomModel, FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP, FeatureSlowDivide, FeatureCallRegIndirect, + FeatureLEAUsesAG, FeaturePadShortFunctions]>; // "Arrandale" along with corei3 and corei5 @@ -252,11 +255,16 @@ def : Proc<"amdfam10", [FeatureSSE4A, // Bobcat def : Proc<"btver1", [FeatureSSSE3, FeatureSSE4A, FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT]>; +// Jaguar +def : Proc<"btver2", [FeatureAVX, FeatureSSE4A, FeatureCMPXCHG16B, + FeatureAES, FeaturePCLMUL, FeatureBMI, + FeatureF16C, FeatureMOVBE, FeatureLZCNT, + FeaturePOPCNT]>; // Bulldozer def : Proc<"bdver1", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, FeatureAES, FeaturePCLMUL, FeatureLZCNT, FeaturePOPCNT]>; -// Enhanced Bulldozer +// Piledriver def : Proc<"bdver2", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, FeatureAES, FeaturePCLMUL, FeatureF16C, FeatureLZCNT, @@ -300,6 +308,9 @@ def ATTAsmParser : AsmParser { def ATTAsmParserVariant : AsmParserVariant { int Variant = 0; + // Variant name. + string Name = "att"; + // Discard comments in assembly strings. string CommentDelimiter = "#"; @@ -310,6 +321,9 @@ def ATTAsmParserVariant : AsmParserVariant { def IntelAsmParserVariant : AsmParserVariant { int Variant = 1; + // Variant name. + string Name = "intel"; + // Discard comments in assembly strings. string CommentDelimiter = ";"; diff --git a/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp b/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp index 2518e02..8fea6ed 100644 --- a/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp +++ b/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp @@ -1451,6 +1451,14 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, MCE.emitByte(BaseOpcode); MCE.emitByte(0xC9); break; + case X86II::MRM_CA: + MCE.emitByte(BaseOpcode); + MCE.emitByte(0xCA); + break; + case X86II::MRM_CB: + MCE.emitByte(BaseOpcode); + MCE.emitByte(0xCB); + break; case X86II::MRM_E8: MCE.emitByte(BaseOpcode); MCE.emitByte(0xE8); diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp index cadec68..cf44bd0 100644 --- a/contrib/llvm/lib/Target/X86/X86FastISel.cpp +++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp @@ -68,12 +68,12 @@ public: virtual bool TargetSelectInstruction(const Instruction *I); - /// TryToFoldLoad - The specified machine instr operand is a vreg, and that + /// \brief The specified machine instr operand is a vreg, and that /// vreg is being provided by the specified load instruction. If possible, /// try to fold the load as an operand to the instruction, returning true if /// possible. - virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo, - const LoadInst *LI); + virtual bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, + const LoadInst *LI); virtual bool FastLowerArguments(); @@ -107,6 +107,8 @@ private: bool X86SelectShift(const Instruction *I); + bool X86SelectDivRem(const Instruction *I); + bool X86SelectSelect(const Instruction *I); bool X86SelectTrunc(const Instruction *I); @@ -691,11 +693,6 @@ bool X86FastISel::X86SelectStore(const Instruction *I) { if (S->isAtomic()) return false; - unsigned SABIAlignment = - TD.getABITypeAlignment(S->getValueOperand()->getType()); - if (S->getAlignment() != 0 && S->getAlignment() < SABIAlignment) - return false; - MVT VT; if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true)) return false; @@ -1235,6 +1232,124 @@ bool X86FastISel::X86SelectShift(const Instruction *I) { return true; } +bool X86FastISel::X86SelectDivRem(const Instruction *I) { + const static unsigned NumTypes = 4; // i8, i16, i32, i64 + const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem + const static bool S = true; // IsSigned + const static bool U = false; // !IsSigned + const static unsigned Copy = TargetOpcode::COPY; + // For the X86 DIV/IDIV instruction, in most cases the dividend + // (numerator) must be in a specific register pair highreg:lowreg, + // producing the quotient in lowreg and the remainder in highreg. + // For most data types, to set up the instruction, the dividend is + // copied into lowreg, and lowreg is sign-extended or zero-extended + // into highreg. The exception is i8, where the dividend is defined + // as a single register rather than a register pair, and we + // therefore directly sign-extend or zero-extend the dividend into + // lowreg, instead of copying, and ignore the highreg. + const static struct DivRemEntry { + // The following portion depends only on the data type. + const TargetRegisterClass *RC; + unsigned LowInReg; // low part of the register pair + unsigned HighInReg; // high part of the register pair + // The following portion depends on both the data type and the operation. + struct DivRemResult { + unsigned OpDivRem; // The specific DIV/IDIV opcode to use. + unsigned OpSignExtend; // Opcode for sign-extending lowreg into + // highreg, or copying a zero into highreg. + unsigned OpCopy; // Opcode for copying dividend into lowreg, or + // zero/sign-extending into lowreg for i8. + unsigned DivRemResultReg; // Register containing the desired result. + bool IsOpSigned; // Whether to use signed or unsigned form. + } ResultTable[NumOps]; + } OpTable[NumTypes] = { + { &X86::GR8RegClass, X86::AX, 0, { + { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S }, // SDiv + { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S }, // SRem + { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U }, // UDiv + { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U }, // URem + } + }, // i8 + { &X86::GR16RegClass, X86::AX, X86::DX, { + { X86::IDIV16r, X86::CWD, Copy, X86::AX, S }, // SDiv + { X86::IDIV16r, X86::CWD, Copy, X86::DX, S }, // SRem + { X86::DIV16r, X86::MOV16r0, Copy, X86::AX, U }, // UDiv + { X86::DIV16r, X86::MOV16r0, Copy, X86::DX, U }, // URem + } + }, // i16 + { &X86::GR32RegClass, X86::EAX, X86::EDX, { + { X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S }, // SDiv + { X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S }, // SRem + { X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U }, // UDiv + { X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U }, // URem + } + }, // i32 + { &X86::GR64RegClass, X86::RAX, X86::RDX, { + { X86::IDIV64r, X86::CQO, Copy, X86::RAX, S }, // SDiv + { X86::IDIV64r, X86::CQO, Copy, X86::RDX, S }, // SRem + { X86::DIV64r, X86::MOV64r0, Copy, X86::RAX, U }, // UDiv + { X86::DIV64r, X86::MOV64r0, Copy, X86::RDX, U }, // URem + } + }, // i64 + }; + + MVT VT; + if (!isTypeLegal(I->getType(), VT)) + return false; + + unsigned TypeIndex, OpIndex; + switch (VT.SimpleTy) { + default: return false; + case MVT::i8: TypeIndex = 0; break; + case MVT::i16: TypeIndex = 1; break; + case MVT::i32: TypeIndex = 2; break; + case MVT::i64: TypeIndex = 3; + if (!Subtarget->is64Bit()) + return false; + break; + } + + switch (I->getOpcode()) { + default: llvm_unreachable("Unexpected div/rem opcode"); + case Instruction::SDiv: OpIndex = 0; break; + case Instruction::SRem: OpIndex = 1; break; + case Instruction::UDiv: OpIndex = 2; break; + case Instruction::URem: OpIndex = 3; break; + } + + const DivRemEntry &TypeEntry = OpTable[TypeIndex]; + const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex]; + unsigned Op0Reg = getRegForValue(I->getOperand(0)); + if (Op0Reg == 0) + return false; + unsigned Op1Reg = getRegForValue(I->getOperand(1)); + if (Op1Reg == 0) + return false; + + // Move op0 into low-order input register. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg); + // Zero-extend or sign-extend into high-order input register. + if (OpEntry.OpSignExtend) { + if (OpEntry.IsOpSigned) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(OpEntry.OpSignExtend)); + else + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(OpEntry.OpSignExtend), TypeEntry.HighInReg); + } + // Generate the DIV/IDIV instruction. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(OpEntry.OpDivRem)).addReg(Op1Reg); + // Copy output register into result register. + unsigned ResultReg = createResultReg(TypeEntry.RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Copy), ResultReg).addReg(OpEntry.DivRemResultReg); + UpdateValueMap(I, ResultReg); + + return true; +} + bool X86FastISel::X86SelectSelect(const Instruction *I) { MVT VT; if (!isTypeLegal(I->getType(), VT)) @@ -2084,6 +2199,11 @@ X86FastISel::TargetSelectInstruction(const Instruction *I) { case Instruction::AShr: case Instruction::Shl: return X86SelectShift(I); + case Instruction::SDiv: + case Instruction::UDiv: + case Instruction::SRem: + case Instruction::URem: + return X86SelectDivRem(I); case Instruction::Select: return X86SelectSelect(I); case Instruction::Trunc: @@ -2275,12 +2395,8 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) { } -/// TryToFoldLoad - The specified machine instr operand is a vreg, and that -/// vreg is being provided by the specified load instruction. If possible, -/// try to fold the load as an operand to the instruction, returning true if -/// possible. -bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, - const LoadInst *LI) { +bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, + const LoadInst *LI) { X86AddressMode AM; if (!X86SelectAddress(LI->getOperand(0), AM)) return false; diff --git a/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp b/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp new file mode 100644 index 0000000..0dd034c --- /dev/null +++ b/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp @@ -0,0 +1,253 @@ +//===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the pass which will find instructions which +// can be re-written as LEA instructions in order to reduce pipeline +// delays for some models of the Intel Atom family. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "x86-fixup-LEAs" +#include "X86.h" +#include "X86InstrInfo.h" +#include "X86Subtarget.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +using namespace llvm; + +STATISTIC(NumLEAs, "Number of LEA instructions created"); + +namespace { + class FixupLEAPass : public MachineFunctionPass { + enum RegUsageState { RU_NotUsed, RU_Write, RU_Read }; + static char ID; + /// \brief Loop over all of the instructions in the basic block + /// replacing applicable instructions with LEA instructions, + /// where appropriate. + bool processBasicBlock(MachineFunction &MF, MachineFunction::iterator MFI); + + virtual const char *getPassName() const { return "X86 Atom LEA Fixup";} + + /// \brief Given a machine register, look for the instruction + /// which writes it in the current basic block. If found, + /// try to replace it with an equivalent LEA instruction. + /// If replacement succeeds, then also process the the newly created + /// instruction. + void seekLEAFixup(MachineOperand& p, MachineBasicBlock::iterator& I, + MachineFunction::iterator MFI); + + /// \brief Given a memory access or LEA instruction + /// whose address mode uses a base and/or index register, look for + /// an opportunity to replace the instruction which sets the base or index + /// register with an equivalent LEA instruction. + void processInstruction(MachineBasicBlock::iterator& I, + MachineFunction::iterator MFI); + + /// \brief Determine if an instruction references a machine register + /// and, if so, whether it reads or writes the register. + RegUsageState usesRegister(MachineOperand& p, + MachineBasicBlock::iterator I); + + /// \brief Step backwards through a basic block, looking + /// for an instruction which writes a register within + /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles. + MachineBasicBlock::iterator searchBackwards(MachineOperand& p, + MachineBasicBlock::iterator& I, + MachineFunction::iterator MFI); + + /// \brief if an instruction can be converted to an + /// equivalent LEA, insert the new instruction into the basic block + /// and return a pointer to it. Otherwise, return zero. + MachineInstr* postRAConvertToLEA(MachineFunction::iterator &MFI, + MachineBasicBlock::iterator &MBBI) const; + + public: + FixupLEAPass() : MachineFunctionPass(ID) {} + + /// \brief Loop over all of the basic blocks, + /// replacing instructions by equivalent LEA instructions + /// if needed and when possible. + virtual bool runOnMachineFunction(MachineFunction &MF); + + private: + MachineFunction *MF; + const TargetMachine *TM; + const TargetInstrInfo *TII; // Machine instruction info. + + }; + char FixupLEAPass::ID = 0; +} + +MachineInstr * +FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI, + MachineBasicBlock::iterator &MBBI) const { + MachineInstr* MI = MBBI; + MachineInstr* NewMI; + switch (MI->getOpcode()) { + case X86::MOV32rr: + case X86::MOV64rr: { + const MachineOperand& Src = MI->getOperand(1); + const MachineOperand& Dest = MI->getOperand(0); + NewMI = BuildMI(*MF, MI->getDebugLoc(), + TII->get( MI->getOpcode() == X86::MOV32rr ? X86::LEA32r : X86::LEA64r)) + .addOperand(Dest) + .addOperand(Src).addImm(1).addReg(0).addImm(0).addReg(0); + MFI->insert(MBBI, NewMI); // Insert the new inst + return NewMI; + } + case X86::ADD64ri32: + case X86::ADD64ri8: + case X86::ADD64ri32_DB: + case X86::ADD64ri8_DB: + case X86::ADD32ri: + case X86::ADD32ri8: + case X86::ADD32ri_DB: + case X86::ADD32ri8_DB: + case X86::ADD16ri: + case X86::ADD16ri8: + case X86::ADD16ri_DB: + case X86::ADD16ri8_DB: + if (!MI->getOperand(2).isImm()) { + // convertToThreeAddress will call getImm() + // which requires isImm() to be true + return 0; + } + } + return TII->convertToThreeAddress(MFI, MBBI, 0); +} + +FunctionPass *llvm::createX86FixupLEAs() { + return new FixupLEAPass(); +} + +bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) { + MF = &Func; + TII = Func.getTarget().getInstrInfo(); + TM = &MF->getTarget(); + + DEBUG(dbgs() << "Start X86FixupLEAs\n";); + // Process all basic blocks. + for (MachineFunction::iterator I = Func.begin(), E = Func.end(); I != E; ++I) + processBasicBlock(Func, I); + DEBUG(dbgs() << "End X86FixupLEAs\n";); + + return true; +} + +FixupLEAPass::RegUsageState FixupLEAPass::usesRegister(MachineOperand& p, + MachineBasicBlock::iterator I) { + RegUsageState RegUsage = RU_NotUsed; + MachineInstr* MI = I; + + for (unsigned int i = 0; i < MI->getNumOperands(); ++i) { + MachineOperand& opnd = MI->getOperand(i); + if (opnd.isReg() && opnd.getReg() == p.getReg()){ + if (opnd.isDef()) + return RU_Write; + RegUsage = RU_Read; + } + } + return RegUsage; +} + +/// getPreviousInstr - Given a reference to an instruction in a basic +/// block, return a reference to the previous instruction in the block, +/// wrapping around to the last instruction of the block if the block +/// branches to itself. +static inline bool getPreviousInstr(MachineBasicBlock::iterator& I, + MachineFunction::iterator MFI) { + if (I == MFI->begin()) { + if (MFI->isPredecessor(MFI)) { + I = --MFI->end(); + return true; + } + else + return false; + } + --I; + return true; +} + +MachineBasicBlock::iterator FixupLEAPass::searchBackwards(MachineOperand& p, + MachineBasicBlock::iterator& I, + MachineFunction::iterator MFI) { + int InstrDistance = 1; + MachineBasicBlock::iterator CurInst; + static const int INSTR_DISTANCE_THRESHOLD = 5; + + CurInst = I; + bool Found; + Found = getPreviousInstr(CurInst, MFI); + while( Found && I != CurInst) { + if (CurInst->isCall() || CurInst->isInlineAsm()) + break; + if (InstrDistance > INSTR_DISTANCE_THRESHOLD) + break; // too far back to make a difference + if (usesRegister(p, CurInst) == RU_Write){ + return CurInst; + } + InstrDistance += TII->getInstrLatency(TM->getInstrItineraryData(), CurInst); + Found = getPreviousInstr(CurInst, MFI); + } + return 0; +} + +void FixupLEAPass::processInstruction(MachineBasicBlock::iterator& I, + MachineFunction::iterator MFI) { + // Process a load, store, or LEA instruction. + MachineInstr *MI = I; + int opcode = MI->getOpcode(); + const MCInstrDesc& Desc = MI->getDesc(); + int AddrOffset = X86II::getMemoryOperandNo(Desc.TSFlags, opcode); + if (AddrOffset >= 0) { + AddrOffset += X86II::getOperandBias(Desc); + MachineOperand& p = MI->getOperand(AddrOffset + X86::AddrBaseReg); + if (p.isReg() && p.getReg() != X86::ESP) { + seekLEAFixup(p, I, MFI); + } + MachineOperand& q = MI->getOperand(AddrOffset + X86::AddrIndexReg); + if (q.isReg() && q.getReg() != X86::ESP) { + seekLEAFixup(q, I, MFI); + } + } +} + +void FixupLEAPass::seekLEAFixup(MachineOperand& p, + MachineBasicBlock::iterator& I, + MachineFunction::iterator MFI) { + MachineBasicBlock::iterator MBI = searchBackwards(p, I, MFI); + if (MBI) { + MachineInstr* NewMI = postRAConvertToLEA(MFI, MBI); + if (NewMI) { + ++NumLEAs; + DEBUG(dbgs() << "Candidate to replace:"; MBI->dump();); + // now to replace with an equivalent LEA... + DEBUG(dbgs() << "Replaced by: "; NewMI->dump();); + MFI->erase(MBI); + MachineBasicBlock::iterator J = + static_cast<MachineBasicBlock::iterator> (NewMI); + processInstruction(J, MFI); + } + } +} + +bool FixupLEAPass::processBasicBlock(MachineFunction &MF, + MachineFunction::iterator MFI) { + + for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I) + processInstruction(I, MFI); + return false; +} diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp index 54cbd40..42b4e73 100644 --- a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -369,7 +369,14 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF, /// getCompactUnwindRegNum - Get the compact unwind number for a given /// register. The number corresponds to the enum lists in /// compact_unwind_encoding.h. -static int getCompactUnwindRegNum(const uint16_t *CURegs, unsigned Reg) { +static int getCompactUnwindRegNum(unsigned Reg, bool is64Bit) { + static const uint16_t CU32BitRegs[] = { + X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0 + }; + static const uint16_t CU64BitRegs[] = { + X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 + }; + const uint16_t *CURegs = is64Bit ? CU64BitRegs : CU32BitRegs; for (int Idx = 1; *CURegs; ++CURegs, ++Idx) if (*CURegs == Reg) return Idx; @@ -398,16 +405,8 @@ encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS], // 4 3 // 5 3 // - static const uint16_t CU32BitRegs[] = { - X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0 - }; - static const uint16_t CU64BitRegs[] = { - X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 - }; - const uint16_t *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs); - for (unsigned i = 0; i != CU_NUM_SAVED_REGS; ++i) { - int CUReg = getCompactUnwindRegNum(CURegs, SavedRegs[i]); + int CUReg = getCompactUnwindRegNum(SavedRegs[i], Is64Bit); if (CUReg == -1) return ~0U; SavedRegs[i] = CUReg; } @@ -466,14 +465,6 @@ encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS], static uint32_t encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS], bool Is64Bit) { - static const uint16_t CU32BitRegs[] = { - X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0 - }; - static const uint16_t CU64BitRegs[] = { - X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 - }; - const uint16_t *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs); - // Encode the registers in the order they were saved, 3-bits per register. The // registers are numbered from 1 to CU_NUM_SAVED_REGS. uint32_t RegEnc = 0; @@ -481,7 +472,7 @@ encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS], unsigned Reg = SavedRegs[I]; if (Reg == 0) continue; - int CURegNum = getCompactUnwindRegNum(CURegs, Reg); + int CURegNum = getCompactUnwindRegNum(Reg, Is64Bit); if (CURegNum == -1) return ~0U; // Encode the 3-bit register number in order, skipping over 3-bits for each @@ -528,11 +519,17 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { if (!MI.getFlag(MachineInstr::FrameSetup)) break; // We don't exect any more prolog instructions. - if (ExpectEnd) return 0; + if (ExpectEnd) return CU::UNWIND_MODE_DWARF; if (Opc == PushInstr) { // If there are too many saved registers, we cannot use compact encoding. - if (SavedRegIdx >= CU_NUM_SAVED_REGS) return 0; + if (SavedRegIdx >= CU_NUM_SAVED_REGS) return CU::UNWIND_MODE_DWARF; + + unsigned Reg = MI.getOperand(0).getReg(); + if (Reg == (Is64Bit ? X86::RAX : X86::EAX)) { + ExpectEnd = true; + continue; + } SavedRegs[SavedRegIdx++] = MI.getOperand(0).getReg(); StackAdjust += OffsetSize; @@ -542,7 +539,7 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { unsigned DstReg = MI.getOperand(0).getReg(); if (DstReg != FramePtr || SrcReg != StackPtr) - return 0; + return CU::UNWIND_MODE_DWARF; StackAdjust = 0; memset(SavedRegs, 0, sizeof(SavedRegs)); @@ -552,7 +549,7 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { Opc == X86::SUB32ri || Opc == X86::SUB32ri8) { if (StackSize) // We already have a stack size. - return 0; + return CU::UNWIND_MODE_DWARF; if (!MI.getOperand(0).isReg() || MI.getOperand(0).getReg() != MI.getOperand(1).getReg() || @@ -560,7 +557,7 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { // We need this to be a stack adjustment pointer. Something like: // // %RSP<def> = SUB64ri8 %RSP, 48 - return 0; + return CU::UNWIND_MODE_DWARF; StackSize = MI.getOperand(2).getImm() / StackDivide; SubtractInstrIdx += InstrOffset; @@ -574,31 +571,31 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { if (HasFP) { if ((StackAdjust & 0xFF) != StackAdjust) // Offset was too big for compact encoding. - return 0; + return CU::UNWIND_MODE_DWARF; // Get the encoding of the saved registers when we have a frame pointer. uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(SavedRegs, Is64Bit); - if (RegEnc == ~0U) return 0; + if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF; - CompactUnwindEncoding |= 0x01000000; + CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME; CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16; - CompactUnwindEncoding |= RegEnc & 0x7FFF; + CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS; } else { ++StackAdjust; uint32_t TotalStackSize = StackAdjust + StackSize; if ((TotalStackSize & 0xFF) == TotalStackSize) { // Frameless stack with a small stack size. - CompactUnwindEncoding |= 0x02000000; + CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD; // Encode the stack size. CompactUnwindEncoding |= (TotalStackSize & 0xFF) << 16; } else { if ((StackAdjust & 0x7) != StackAdjust) // The extra stack adjustments are too big for us to handle. - return 0; + return CU::UNWIND_MODE_DWARF; // Frameless stack with an offset too large for us to encode compactly. - CompactUnwindEncoding |= 0x03000000; + CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND; // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP' // instruction. @@ -616,10 +613,11 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegs, SavedRegIdx, Is64Bit); - if (RegEnc == ~0U) return 0; + if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF; // Encode the register encoding. - CompactUnwindEncoding |= RegEnc & 0x3FF; + CompactUnwindEncoding |= + RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION; } return CompactUnwindEncoding; diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.h b/contrib/llvm/lib/Target/X86/X86FrameLowering.h index 3f08b9a..6e309d8 100644 --- a/contrib/llvm/lib/Target/X86/X86FrameLowering.h +++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.h @@ -19,8 +19,35 @@ #include "llvm/Target/TargetFrameLowering.h" namespace llvm { - class MCSymbol; - class X86TargetMachine; + +namespace CU { + + /// Compact unwind encoding values. + enum CompactUnwindEncodings { + /// [RE]BP based frame where [RE]BP is pused on the stack immediately after + /// the return address, then [RE]SP is moved to [RE]BP. + UNWIND_MODE_BP_FRAME = 0x01000000, + + /// A frameless function with a small constant stack size. + UNWIND_MODE_STACK_IMMD = 0x02000000, + + /// A frameless function with a large constant stack size. + UNWIND_MODE_STACK_IND = 0x03000000, + + /// No compact unwind encoding is available. + UNWIND_MODE_DWARF = 0x04000000, + + /// Mask for encoding the frame registers. + UNWIND_BP_FRAME_REGISTERS = 0x00007FFF, + + /// Mask for encoding the frameless registers. + UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF + }; + +} // end CU namespace + +class MCSymbol; +class X86TargetMachine; class X86FrameLowering : public TargetFrameLowering { const X86TargetMachine &TM; diff --git a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 6041669..968b358 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1503,8 +1503,7 @@ SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, Chain}; SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), - MVT::i32, MVT::i32, MVT::Other, Ops, - array_lengthof(Ops)); + MVT::i32, MVT::i32, MVT::Other, Ops); cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1); return ResNode; } @@ -1720,7 +1719,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { Op = ADD; break; } - + Val = getAtomicLoadArithTargetConstant(CurDAG, dl, Op, NVT, Val); bool isUnOp = !Val.getNode(); bool isCN = Val.getNode() && (Val.getOpcode() == ISD::TargetConstant); @@ -1772,12 +1771,10 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); if (isUnOp) { SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain }; - Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, - array_lengthof(Ops)), 0); + Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0); } else { SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain }; - Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, - array_lengthof(Ops)), 0); + Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0); } cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1); SDValue RetVals[] = { Undef, Ret }; @@ -1971,8 +1968,7 @@ SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) { SDValue Segment = CurDAG->getRegister(0, MVT::i32); const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue()), VIdx, Disp, Segment, VMask, Chain}; - SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), - VTs, Ops, array_lengthof(Ops)); + SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), VTs, Ops); // Node has 2 outputs: VDst and MVT::Other. // ResNode has 3 outputs: VDst, VMask_wb, and MVT::Other. // We replace VDst of Node with VDst of ResNode, and Other of Node with Other @@ -2186,7 +2182,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32); SDValue Ops[] = {N1, InFlag}; - SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, 2); + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1)); @@ -2267,16 +2263,14 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { InFlag }; if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) { SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue); - SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops, - array_lengthof(Ops)); + SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); ResHi = SDValue(CNode, 0); ResLo = SDValue(CNode, 1); Chain = SDValue(CNode, 2); InFlag = SDValue(CNode, 3); } else { SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue); - SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops, - array_lengthof(Ops)); + SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); Chain = SDValue(CNode, 0); InFlag = SDValue(CNode, 1); } @@ -2287,15 +2281,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue Ops[] = { N1, InFlag }; if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) { SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue); - SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, - array_lengthof(Ops)); + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); ResHi = SDValue(CNode, 0); ResLo = SDValue(CNode, 1); InFlag = SDValue(CNode, 2); } else { SDVTList VTs = CurDAG->getVTList(MVT::Glue); - SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, - array_lengthof(Ops)); + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); InFlag = SDValue(CNode, 0); } } @@ -2343,6 +2335,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n'); } + // Propagate ordering to the last node, for now. + CurDAG->AssignOrdering(InFlag.getNode(), CurDAG->GetOrdering(Node)); + return NULL; } @@ -2409,8 +2404,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; Move = SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32, - MVT::Other, Ops, - array_lengthof(Ops)), 0); + MVT::Other, Ops), 0); Chain = Move.getValue(1); ReplaceUses(N0.getValue(1), Chain); } else { @@ -2441,8 +2435,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), InFlag }; SDNode *CNode = - CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops, - array_lengthof(Ops)); + CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops); InFlag = SDValue(CNode, 1); // Update the chain. ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); @@ -2674,8 +2667,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { unsigned newOpc = getFusedLdStOpcode(LdVT, Opc); MachineSDNode *Result = CurDAG->getMachineNode(newOpc, Node->getDebugLoc(), - MVT::i32, MVT::Other, Ops, - array_lengthof(Ops)); + MVT::i32, MVT::Other, Ops); Result->setMemRefs(MemOp, MemOp + 2); ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1)); diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp index 6934186..f69f5d8 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -163,10 +163,28 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) Subtarget = &TM.getSubtarget<X86Subtarget>(); X86ScalarSSEf64 = Subtarget->hasSSE2(); X86ScalarSSEf32 = Subtarget->hasSSE1(); - RegInfo = TM.getRegisterInfo(); TD = getDataLayout(); + resetOperationActions(); +} + +void X86TargetLowering::resetOperationActions() { + const TargetMachine &TM = getTargetMachine(); + static bool FirstTimeThrough = true; + + // If none of the target options have changed, then we don't need to reset the + // operation actions. + if (!FirstTimeThrough && TO == TM.Options) return; + + if (!FirstTimeThrough) { + // Reinitialize the actions. + initActions(); + FirstTimeThrough = false; + } + + TO = TM.Options; + // Set up the TargetLowering object. static const MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }; @@ -508,16 +526,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) if (Subtarget->hasSSE1()) setOperationAction(ISD::PREFETCH , MVT::Other, Legal); - setOperationAction(ISD::MEMBARRIER , MVT::Other, Custom); setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom); - // On X86 and X86-64, atomic operations are lowered to locked instructions. - // Locked instructions, in turn, have implicit fence semantics (all memory - // operations are flushed before issuing the locked instruction, and they - // are not buffered), so we can fold away the common pattern of - // fence-atomic-fence. - setShouldFoldAtomicFences(true); - // Expand certain atomics for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) { MVT VT = IntVTs[i]; @@ -1785,7 +1795,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, if (isScalarFPTypeInSSEReg(VA.getValVT())) CopyVT = MVT::f80; SDValue Ops[] = { Chain, InFlag }; Chain = SDValue(DAG.getMachineNode(X86::FpPOP_RETVAL, dl, CopyVT, - MVT::Other, MVT::Glue, Ops, 2), 1); + MVT::Other, MVT::Glue, Ops), 1); Val = Chain.getValue(0); // Round the f80 to the right size, which also moves it to the appropriate @@ -4404,13 +4414,15 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, if (Subtarget->hasInt256()) { // AVX2 SDValue Cst = DAG.getTargetConstant(0, MVT::i32); SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8); + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, + array_lengthof(Ops)); } else { // 256-bit logic and arithmetic instructions in AVX are all // floating-point, no support for integer ops. Emit fp zeroed vectors. SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32); SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, 8); + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, + array_lengthof(Ops)); } } else llvm_unreachable("Unexpected vector type"); @@ -4431,7 +4443,8 @@ static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG, if (VT.is256BitVector()) { if (HasInt256) { // AVX2 SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8); + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, + array_lengthof(Ops)); } else { // AVX Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); Vec = Concat128BitVectors(Vec, Vec, MVT::v8i32, 8, DAG, dl); @@ -5101,7 +5114,8 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts, SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() }; SDValue ResNode = - DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, 2, MVT::i64, + DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, + array_lengthof(Ops), MVT::i64, LDBase->getPointerInfo(), LDBase->getAlignment(), false/*isVolatile*/, true/*ReadMem*/, @@ -7624,10 +7638,10 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, if (InFlag) { SDValue Ops[] = { Chain, TGA, *InFlag }; - Chain = DAG.getNode(CallType, dl, NodeTys, Ops, 3); + Chain = DAG.getNode(CallType, dl, NodeTys, Ops, array_lengthof(Ops)); } else { SDValue Ops[] = { Chain, TGA }; - Chain = DAG.getNode(CallType, dl, NodeTys, Ops, 2); + Chain = DAG.getNode(CallType, dl, NodeTys, Ops, array_lengthof(Ops)); } // TLSADDR will be codegen'ed as call. Inform MFI that function has calls. @@ -7937,7 +7951,7 @@ SDValue X86TargetLowering::LowerShiftParts(SDValue Op, SelectionDAG &DAG) const{ } SDValue Ops[2] = { Lo, Hi }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, array_lengthof(Ops), dl); } SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, @@ -8220,8 +8234,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other); SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) }; - SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, 3, - MVT::i64, MMO); + SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, + array_lengthof(Ops), MVT::i64, MMO); APInt FF(32, 0x5F800000ULL); @@ -8313,8 +8327,8 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI), MachineMemOperand::MOLoad, MemSize, MemSize); - Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, 3, - DstTy, MMO); + Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, + array_lengthof(Ops), DstTy, MMO); Chain = Value.getValue(1); SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false); StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); @@ -8328,7 +8342,8 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, // Build the FP_TO_INT*_IN_MEM SDValue Ops[] = { Chain, Value, StackSlot }; SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other), - Ops, 3, DstTy, MMO); + Ops, array_lengthof(Ops), DstTy, + MMO); return std::make_pair(FIST, StackSlot); } else { SDValue ftol = DAG.getNode(X86ISD::WIN_FTOL, DL, @@ -8340,8 +8355,8 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, MVT::i32, eax.getValue(2)); SDValue Ops[] = { eax, edx }; SDValue pair = IsReplace - ? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops, 2) - : DAG.getMergeValues(Ops, 2, DL); + ? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops, array_lengthof(Ops)) + : DAG.getMergeValues(Ops, array_lengthof(Ops), DL); return std::make_pair(pair, SDValue()); } } @@ -9165,14 +9180,6 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC, } if (LHS.getNode()) { - // If the LHS is of the form (x ^ -1) then replace the LHS with x and flip - // the condition code later. - bool Invert = false; - if (LHS.getOpcode() == ISD::XOR && isAllOnes(LHS.getOperand(1))) { - Invert = true; - LHS = LHS.getOperand(0); - } - // If LHS is i8, promote it to i32 with any_extend. There is no i8 BT // instruction. Since the shift amount is in-range-or-undefined, we know // that doing a bittest on the i32 value is ok. We extend to i32 because @@ -9189,9 +9196,6 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC, SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS); X86::CondCode Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B; - // Flip the condition if the LHS was a not instruction - if (Invert) - Cond = X86::GetOppositeBranchCondition(Cond); return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, DAG.getConstant(Cond, MVT::i8), BT); } @@ -9335,14 +9339,54 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, // Check that the operation in question is available (most are plain SSE2, // but PCMPGTQ and PCMPEQQ have different requirements). if (VT == MVT::v2i64) { - if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42()) - return SDValue(); + if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42()) { + assert(Subtarget->hasSSE2() && "Don't know how to lower!"); + + // First cast everything to the right type. + Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0); + Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1); + + // Since SSE has no unsigned integer comparisons, we need to flip the sign + // bits of the inputs before performing those operations. The lower + // compare is always unsigned. + SDValue SB; + if (FlipSigns) { + SB = DAG.getConstant(0x80000000U, MVT::v4i32); + } else { + SDValue Sign = DAG.getConstant(0x80000000U, MVT::i32); + SDValue Zero = DAG.getConstant(0x00000000U, MVT::i32); + SB = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + Sign, Zero, Sign, Zero); + } + Op0 = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Op0, SB); + Op1 = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Op1, SB); + + // Emulate PCMPGTQ with (hi1 > hi2) | ((hi1 == hi2) & (lo1 > lo2)) + SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1); + SDValue EQ = DAG.getNode(X86ISD::PCMPEQ, dl, MVT::v4i32, Op0, Op1); + + // Create masks for only the low parts/high parts of the 64 bit integers. + const int MaskHi[] = { 1, 1, 3, 3 }; + const int MaskLo[] = { 0, 0, 2, 2 }; + SDValue EQHi = DAG.getVectorShuffle(MVT::v4i32, dl, EQ, EQ, MaskHi); + SDValue GTLo = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskLo); + SDValue GTHi = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi); + + SDValue Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, EQHi, GTLo); + Result = DAG.getNode(ISD::OR, dl, MVT::v4i32, Result, GTHi); + + if (Invert) + Result = DAG.getNOT(dl, Result, MVT::v4i32); + + return DAG.getNode(ISD::BITCAST, dl, VT, Result); + } + if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) { // If pcmpeqq is missing but pcmpeqd is available synthesize pcmpeqq with // pcmpeqd + pshufd + pand. assert(Subtarget->hasSSE2() && !FlipSigns && "Don't know how to lower!"); - // First cast everything to the right type, + // First cast everything to the right type. Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0); Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1); @@ -9361,17 +9405,13 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, } } - // Since SSE has no unsigned integer comparisons, we need to flip the sign + // Since SSE has no unsigned integer comparisons, we need to flip the sign // bits of the inputs before performing those operations. if (FlipSigns) { EVT EltVT = VT.getVectorElementType(); - SDValue SignBit = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()), - EltVT); - std::vector<SDValue> SignBits(VT.getVectorNumElements(), SignBit); - SDValue SignVec = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &SignBits[0], - SignBits.size()); - Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SignVec); - Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SignVec); + SDValue SB = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()), VT); + Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SB); + Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SB); } SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1); @@ -10937,7 +10977,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) { SDValue(Result.getNode(), 1) }; SDValue isValid = DAG.getNode(X86ISD::CMOV, dl, DAG.getVTList(Op->getValueType(1), MVT::Glue), - Ops, 4); + Ops, array_lengthof(Ops)); // Return { result, isValid, chain }. return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid, @@ -10990,7 +11030,10 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); - unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP; + unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction()); + assert(((FrameReg == X86::RBP && VT == MVT::i64) || + (FrameReg == X86::EBP && VT == MVT::i32)) && + "Invalid Frame Register!"); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); while (Depth--) FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, @@ -11010,21 +11053,23 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { SDValue Handler = Op.getOperand(2); DebugLoc dl = Op.getDebugLoc(); - SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl, - Subtarget->is64Bit() ? X86::RBP : X86::EBP, - getPointerTy()); - unsigned StoreAddrReg = (Subtarget->is64Bit() ? X86::RCX : X86::ECX); - - SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Frame, - DAG.getIntPtrConstant(RegInfo->getSlotSize())); - StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset); + EVT PtrVT = getPointerTy(); + unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction()); + assert(((FrameReg == X86::RBP && PtrVT == MVT::i64) || + (FrameReg == X86::EBP && PtrVT == MVT::i32)) && + "Invalid Frame Register!"); + SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, PtrVT); + unsigned StoreAddrReg = (PtrVT == MVT::i64) ? X86::RCX : X86::ECX; + + SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Frame, + DAG.getIntPtrConstant(RegInfo->getSlotSize())); + StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, StoreAddr, Offset); Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(), false, false, 0); Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr); - return DAG.getNode(X86ISD::EH_RETURN, dl, - MVT::Other, - Chain, DAG.getRegister(StoreAddrReg, getPointerTy())); + return DAG.getNode(X86ISD::EH_RETURN, dl, MVT::Other, Chain, + DAG.getRegister(StoreAddrReg, PtrVT)); } SDValue X86TargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, @@ -11235,7 +11280,8 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SDValue Ops[] = { DAG.getEntryNode(), StackSlot }; SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL, DAG.getVTList(MVT::Other), - Ops, 2, MVT::i16, MMO); + Ops, array_lengthof(Ops), MVT::i16, + MMO); // Load FP Control Word from stack slot SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot, @@ -12075,52 +12121,6 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, } } -static SDValue LowerMEMBARRIER(SDValue Op, const X86Subtarget *Subtarget, - SelectionDAG &DAG) { - DebugLoc dl = Op.getDebugLoc(); - - // Go ahead and emit the fence on x86-64 even if we asked for no-sse2. - // There isn't any reason to disable it if the target processor supports it. - if (!Subtarget->hasSSE2() && !Subtarget->is64Bit()) { - SDValue Chain = Op.getOperand(0); - SDValue Zero = DAG.getConstant(0, MVT::i32); - SDValue Ops[] = { - DAG.getRegister(X86::ESP, MVT::i32), // Base - DAG.getTargetConstant(1, MVT::i8), // Scale - DAG.getRegister(0, MVT::i32), // Index - DAG.getTargetConstant(0, MVT::i32), // Disp - DAG.getRegister(0, MVT::i32), // Segment. - Zero, - Chain - }; - SDNode *Res = - DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops, - array_lengthof(Ops)); - return SDValue(Res, 0); - } - - unsigned isDev = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue(); - if (!isDev) - return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); - - unsigned Op1 = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); - unsigned Op2 = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); - unsigned Op3 = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); - unsigned Op4 = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); - - // def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>; - if (!Op1 && !Op2 && !Op3 && Op4) - return DAG.getNode(X86ISD::SFENCE, dl, MVT::Other, Op.getOperand(0)); - - // def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>; - if (Op1 && !Op2 && !Op3 && !Op4) - return DAG.getNode(X86ISD::LFENCE, dl, MVT::Other, Op.getOperand(0)); - - // def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)), - // (MFENCE)>; - return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0)); -} - static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); @@ -12149,9 +12149,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget *Subtarget, Zero, Chain }; - SDNode *Res = - DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops, - array_lengthof(Ops)); + SDNode *Res = DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops); return SDValue(Res, 0); } @@ -12185,7 +12183,7 @@ static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget, SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand(); SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys, - Ops, 5, T, MMO); + Ops, array_lengthof(Ops), T, MMO); SDValue cpOut = DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1)); return cpOut; @@ -12207,7 +12205,7 @@ static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget, DAG.getNode(ISD::OR, dl, MVT::i64, rax, Tmp), rdx.getValue(1) }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, array_lengthof(Ops), dl); } SDValue X86TargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const { @@ -12301,7 +12299,8 @@ SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetDarwin() && Subtarget->is64Bit()); // For MacOSX, we want to call an alternative entry point: __sincos_stret, - // which returns the values in two XMM registers. + // which returns the values as { float, float } (in XMM0) or + // { double, double } (which is returned in XMM0, XMM1). DebugLoc dl = Op.getDebugLoc(); SDValue Arg = Op.getOperand(0); EVT ArgVT = Arg.getValueType(); @@ -12316,14 +12315,16 @@ SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { Entry.isZExt = false; Args.push_back(Entry); + bool isF64 = ArgVT == MVT::f64; // Only optimize x86_64 for now. i386 is a bit messy. For f32, // the small struct {f32, f32} is returned in (eax, edx). For f64, // the results are returned via SRet in memory. - const char *LibcallName = (ArgVT == MVT::f64) - ? "__sincos_stret" : "__sincosf_stret"; + const char *LibcallName = isF64 ? "__sincos_stret" : "__sincosf_stret"; SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy()); - StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL); + Type *RetTy = isF64 + ? (Type*)StructType::get(ArgTy, ArgTy, NULL) + : (Type*)VectorType::get(ArgTy, 4); TargetLowering:: CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, false, false, false, false, 0, @@ -12331,7 +12332,18 @@ SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { /*doesNotRet=*/false, /*isReturnValueUsed*/true, Callee, Args, DAG, dl); std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); - return CallResult.first; + + if (isF64) + // Returned in xmm0 and xmm1. + return CallResult.first; + + // Returned in bits 0:31 and 32:64 xmm0. + SDValue SinVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT, + CallResult.first, DAG.getIntPtrConstant(0)); + SDValue CosVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT, + CallResult.first, DAG.getIntPtrConstant(1)); + SDVTList Tys = DAG.getVTList(ArgVT, ArgVT); + return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, SinVal, CosVal); } /// LowerOperation - Provide custom lowering hooks for some operations. @@ -12340,7 +12352,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Should not custom lower this!"); case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op,DAG); - case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, Subtarget, DAG); case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, Subtarget, DAG); case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op, Subtarget, DAG); case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG); @@ -12457,7 +12468,7 @@ ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results, SDValue Ops[] = { Chain, In1, In2L, In2H }; SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); SDValue Result = - DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops, 4, MVT::i64, + DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops, array_lengthof(Ops), MVT::i64, cast<MemSDNode>(Node)->getMemOperand()); SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)}; Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2)); @@ -12537,7 +12548,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, eax.getValue(2)); // Use a buildpair to merge the two 32-bit values into a 64-bit one. SDValue Ops[] = { eax, edx }; - Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Ops, 2)); + Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Ops, + array_lengthof(Ops))); Results.push_back(edx.getValue(1)); return; } @@ -12576,7 +12588,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, unsigned Opcode = Regs64bit ? X86ISD::LCMPXCHG16_DAG : X86ISD::LCMPXCHG8_DAG; SDValue Result = DAG.getMemIntrinsicNode(Opcode, dl, Tys, - Ops, 3, T, MMO); + Ops, array_lengthof(Ops), T, MMO); SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl, Regs64bit ? X86::RAX : X86::EAX, HalfT, Result.getValue(1)); @@ -15063,7 +15075,8 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG, SDVTList Tys = DAG.getVTList(MVT::v4i64, MVT::Other); SDValue Ops[] = { Ld->getChain(), Ld->getBasePtr() }; SDValue ResNode = - DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2, + DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, + array_lengthof(Ops), Ld->getMemoryVT(), Ld->getPointerInfo(), Ld->getAlignment(), @@ -15755,6 +15768,51 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, if (unsigned Op = matchIntegerMINMAX(Cond, VT, LHS, RHS, DAG, Subtarget)) return DAG.getNode(Op, DL, N->getValueType(0), LHS, RHS); + // Simplify vector selection if the selector will be produced by CMPP*/PCMP*. + if (!DCI.isBeforeLegalize() && N->getOpcode() == ISD::VSELECT && + Cond.getOpcode() == ISD::SETCC) { + + assert(Cond.getValueType().isVector() && + "vector select expects a vector selector!"); + + EVT IntVT = Cond.getValueType(); + bool TValIsAllOnes = ISD::isBuildVectorAllOnes(LHS.getNode()); + bool FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode()); + + if (!TValIsAllOnes && !FValIsAllZeros) { + // Try invert the condition if true value is not all 1s and false value + // is not all 0s. + bool TValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode()); + bool FValIsAllOnes = ISD::isBuildVectorAllOnes(RHS.getNode()); + + if (TValIsAllZeros || FValIsAllOnes) { + SDValue CC = Cond.getOperand(2); + ISD::CondCode NewCC = + ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), + Cond.getOperand(0).getValueType().isInteger()); + Cond = DAG.getSetCC(DL, IntVT, Cond.getOperand(0), Cond.getOperand(1), NewCC); + std::swap(LHS, RHS); + TValIsAllOnes = FValIsAllOnes; + FValIsAllZeros = TValIsAllZeros; + } + } + + if (TValIsAllOnes || FValIsAllZeros) { + SDValue Ret; + + if (TValIsAllOnes && FValIsAllZeros) + Ret = Cond; + else if (TValIsAllOnes) + Ret = DAG.getNode(ISD::OR, DL, IntVT, Cond, + DAG.getNode(ISD::BITCAST, DL, IntVT, RHS)); + else if (FValIsAllZeros) + Ret = DAG.getNode(ISD::AND, DL, IntVT, Cond, + DAG.getNode(ISD::BITCAST, DL, IntVT, LHS)); + + return DAG.getNode(ISD::BITCAST, DL, VT, Ret); + } + } + // If we know that this node is legal then we know that it is going to be // matched by one of the SSE/AVX BLEND instructions. These instructions only // depend on the highest bit in each word. Try to use SimplifyDemandedBits @@ -15815,6 +15873,7 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) { SDValue SetCC; const ConstantSDNode* C = 0; bool needOppositeCond = (CC == X86::COND_E); + bool checkAgainstTrue = false; // Is it a comparison against 1? if ((C = dyn_cast<ConstantSDNode>(Op1))) SetCC = Op2; @@ -15823,18 +15882,46 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) { else // Quit if all operands are not constants. return SDValue(); - if (C->getZExtValue() == 1) + if (C->getZExtValue() == 1) { needOppositeCond = !needOppositeCond; - else if (C->getZExtValue() != 0) + checkAgainstTrue = true; + } else if (C->getZExtValue() != 0) // Quit if the constant is neither 0 or 1. return SDValue(); - // Skip 'zext' or 'trunc' node. - if (SetCC.getOpcode() == ISD::ZERO_EXTEND || - SetCC.getOpcode() == ISD::TRUNCATE) - SetCC = SetCC.getOperand(0); + bool truncatedToBoolWithAnd = false; + // Skip (zext $x), (trunc $x), or (and $x, 1) node. + while (SetCC.getOpcode() == ISD::ZERO_EXTEND || + SetCC.getOpcode() == ISD::TRUNCATE || + SetCC.getOpcode() == ISD::AND) { + if (SetCC.getOpcode() == ISD::AND) { + int OpIdx = -1; + ConstantSDNode *CS; + if ((CS = dyn_cast<ConstantSDNode>(SetCC.getOperand(0))) && + CS->getZExtValue() == 1) + OpIdx = 1; + if ((CS = dyn_cast<ConstantSDNode>(SetCC.getOperand(1))) && + CS->getZExtValue() == 1) + OpIdx = 0; + if (OpIdx == -1) + break; + SetCC = SetCC.getOperand(OpIdx); + truncatedToBoolWithAnd = true; + } else + SetCC = SetCC.getOperand(0); + } switch (SetCC.getOpcode()) { + case X86ISD::SETCC_CARRY: + // Since SETCC_CARRY gives output based on R = CF ? ~0 : 0, it's unsafe to + // simplify it if the result of SETCC_CARRY is not canonicalized to 0 or 1, + // i.e. it's a comparison against true but the result of SETCC_CARRY is not + // truncated to i1 using 'and'. + if (checkAgainstTrue && !truncatedToBoolWithAnd) + break; + assert(X86::CondCode(SetCC.getConstantOperandVal(0)) == X86::COND_B && + "Invalid use of SETCC_CARRY!"); + // FALL THROUGH case X86ISD::SETCC: // Set the condition code or opposite one if necessary. CC = X86::CondCode(SetCC.getConstantOperandVal(0)); @@ -16165,8 +16252,7 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } -/// PerformShiftCombine - Transforms vector shift nodes to use vector shifts -/// when possible. +/// PerformShiftCombine - Combine shifts. static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm/lib/Target/X86/X86ISelLowering.h index 5725f7a..2727e22 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelLowering.h +++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h @@ -723,6 +723,9 @@ namespace llvm { SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot, SelectionDAG &DAG) const; + /// \brief Reset the operation actions based on target options. + virtual void resetOperationActions(); + protected: std::pair<const TargetRegisterClass*, uint8_t> findRepresentativeClass(MVT VT) const; @@ -734,6 +737,10 @@ namespace llvm { const X86RegisterInfo *RegInfo; const DataLayout *TD; + /// Used to store the TargetOptions so that we don't waste time resetting + /// the operation actions unless we have to. + TargetOptions TO; + /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 /// floating point ops. /// When SSE is available, use it for f32 operations. diff --git a/contrib/llvm/lib/Target/X86/X86InstrFormats.td b/contrib/llvm/lib/Target/X86/X86InstrFormats.td index 0ef9491..a71e024 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrFormats.td +++ b/contrib/llvm/lib/Target/X86/X86InstrFormats.td @@ -35,25 +35,27 @@ def MRM_C3 : Format<35>; def MRM_C4 : Format<36>; def MRM_C8 : Format<37>; def MRM_C9 : Format<38>; -def MRM_E8 : Format<39>; -def MRM_F0 : Format<40>; -def MRM_F8 : Format<41>; -def MRM_F9 : Format<42>; +def MRM_CA : Format<39>; +def MRM_CB : Format<40>; +def MRM_E8 : Format<41>; +def MRM_F0 : Format<42>; def RawFrmImm8 : Format<43>; def RawFrmImm16 : Format<44>; -def MRM_D0 : Format<45>; -def MRM_D1 : Format<46>; -def MRM_D4 : Format<47>; -def MRM_D5 : Format<48>; -def MRM_D6 : Format<49>; -def MRM_D8 : Format<50>; -def MRM_D9 : Format<51>; -def MRM_DA : Format<52>; -def MRM_DB : Format<53>; -def MRM_DC : Format<54>; -def MRM_DD : Format<55>; -def MRM_DE : Format<56>; -def MRM_DF : Format<57>; +def MRM_F8 : Format<45>; +def MRM_F9 : Format<46>; +def MRM_D0 : Format<47>; +def MRM_D1 : Format<48>; +def MRM_D4 : Format<49>; +def MRM_D5 : Format<50>; +def MRM_D6 : Format<51>; +def MRM_D8 : Format<52>; +def MRM_D9 : Format<53>; +def MRM_DA : Format<54>; +def MRM_DB : Format<55>; +def MRM_DC : Format<56>; +def MRM_DD : Format<57>; +def MRM_DE : Format<58>; +def MRM_DF : Format<59>; // ImmType - This specifies the immediate type used by an instruction. This is // part of the ad-hoc solution used to emit machine instruction encodings by our diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp index 7ba542c..7c0423f 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -4281,7 +4281,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= Alignment; Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl, - VT, MVT::Other, &AddrOps[0], AddrOps.size()); + VT, MVT::Other, AddrOps); NewNodes.push_back(Load); // Preserve memory reference information. @@ -4303,8 +4303,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, if (Load) BeforeOps.push_back(SDValue(Load, 0)); std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps)); - SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, &BeforeOps[0], - BeforeOps.size()); + SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, BeforeOps); NewNodes.push_back(NewNode); // Emit the store instruction. @@ -4326,8 +4325,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, (*MMOs.first)->getAlignment() >= Alignment; SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC, isAligned, TM), - dl, MVT::Other, - &AddrOps[0], AddrOps.size()); + dl, MVT::Other, AddrOps); NewNodes.push_back(Store); // Preserve memory reference information. diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm/lib/Target/X86/X86InstrInfo.td index ccc1aa2..3380d8c 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrInfo.td +++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.td @@ -1833,90 +1833,90 @@ include "X86InstrCompiler.td" // Assembler Mnemonic Aliases //===----------------------------------------------------------------------===// -def : MnemonicAlias<"call", "calll">, Requires<[In32BitMode]>; -def : MnemonicAlias<"call", "callq">, Requires<[In64BitMode]>; +def : MnemonicAlias<"call", "calll", "att">, Requires<[In32BitMode]>; +def : MnemonicAlias<"call", "callq", "att">, Requires<[In64BitMode]>; -def : MnemonicAlias<"cbw", "cbtw">; -def : MnemonicAlias<"cwde", "cwtl">; -def : MnemonicAlias<"cwd", "cwtd">; -def : MnemonicAlias<"cdq", "cltd">; -def : MnemonicAlias<"cdqe", "cltq">; -def : MnemonicAlias<"cqo", "cqto">; +def : MnemonicAlias<"cbw", "cbtw", "att">; +def : MnemonicAlias<"cwde", "cwtl", "att">; +def : MnemonicAlias<"cwd", "cwtd", "att">; +def : MnemonicAlias<"cdq", "cltd", "att">; +def : MnemonicAlias<"cdqe", "cltq", "att">; +def : MnemonicAlias<"cqo", "cqto", "att">; // lret maps to lretl, it is not ambiguous with lretq. -def : MnemonicAlias<"lret", "lretl">; +def : MnemonicAlias<"lret", "lretl", "att">; -def : MnemonicAlias<"leavel", "leave">, Requires<[In32BitMode]>; -def : MnemonicAlias<"leaveq", "leave">, Requires<[In64BitMode]>; +def : MnemonicAlias<"leavel", "leave", "att">, Requires<[In32BitMode]>; +def : MnemonicAlias<"leaveq", "leave", "att">, Requires<[In64BitMode]>; -def : MnemonicAlias<"loopz", "loope">; -def : MnemonicAlias<"loopnz", "loopne">; +def : MnemonicAlias<"loopz", "loope", "att">; +def : MnemonicAlias<"loopnz", "loopne", "att">; -def : MnemonicAlias<"pop", "popl">, Requires<[In32BitMode]>; -def : MnemonicAlias<"pop", "popq">, Requires<[In64BitMode]>; -def : MnemonicAlias<"popf", "popfl">, Requires<[In32BitMode]>; -def : MnemonicAlias<"popf", "popfq">, Requires<[In64BitMode]>; -def : MnemonicAlias<"popfd", "popfl">; +def : MnemonicAlias<"pop", "popl", "att">, Requires<[In32BitMode]>; +def : MnemonicAlias<"pop", "popq", "att">, Requires<[In64BitMode]>; +def : MnemonicAlias<"popf", "popfl", "att">, Requires<[In32BitMode]>; +def : MnemonicAlias<"popf", "popfq", "att">, Requires<[In64BitMode]>; +def : MnemonicAlias<"popfd", "popfl", "att">; // FIXME: This is wrong for "push reg". "push %bx" should turn into pushw in // all modes. However: "push (addr)" and "push $42" should default to // pushl/pushq depending on the current mode. Similar for "pop %bx" -def : MnemonicAlias<"push", "pushl">, Requires<[In32BitMode]>; -def : MnemonicAlias<"push", "pushq">, Requires<[In64BitMode]>; -def : MnemonicAlias<"pushf", "pushfl">, Requires<[In32BitMode]>; -def : MnemonicAlias<"pushf", "pushfq">, Requires<[In64BitMode]>; -def : MnemonicAlias<"pushfd", "pushfl">; +def : MnemonicAlias<"push", "pushl", "att">, Requires<[In32BitMode]>; +def : MnemonicAlias<"push", "pushq", "att">, Requires<[In64BitMode]>; +def : MnemonicAlias<"pushf", "pushfl", "att">, Requires<[In32BitMode]>; +def : MnemonicAlias<"pushf", "pushfq", "att">, Requires<[In64BitMode]>; +def : MnemonicAlias<"pushfd", "pushfl", "att">; -def : MnemonicAlias<"repe", "rep">; -def : MnemonicAlias<"repz", "rep">; -def : MnemonicAlias<"repnz", "repne">; +def : MnemonicAlias<"repe", "rep", "att">; +def : MnemonicAlias<"repz", "rep", "att">; +def : MnemonicAlias<"repnz", "repne", "att">; -def : MnemonicAlias<"retl", "ret">, Requires<[In32BitMode]>; -def : MnemonicAlias<"retq", "ret">, Requires<[In64BitMode]>; +def : MnemonicAlias<"retl", "ret", "att">, Requires<[In32BitMode]>; +def : MnemonicAlias<"retq", "ret", "att">, Requires<[In64BitMode]>; -def : MnemonicAlias<"salb", "shlb">; -def : MnemonicAlias<"salw", "shlw">; -def : MnemonicAlias<"sall", "shll">; -def : MnemonicAlias<"salq", "shlq">; +def : MnemonicAlias<"salb", "shlb", "att">; +def : MnemonicAlias<"salw", "shlw", "att">; +def : MnemonicAlias<"sall", "shll", "att">; +def : MnemonicAlias<"salq", "shlq", "att">; -def : MnemonicAlias<"smovb", "movsb">; -def : MnemonicAlias<"smovw", "movsw">; -def : MnemonicAlias<"smovl", "movsl">; -def : MnemonicAlias<"smovq", "movsq">; +def : MnemonicAlias<"smovb", "movsb", "att">; +def : MnemonicAlias<"smovw", "movsw", "att">; +def : MnemonicAlias<"smovl", "movsl", "att">; +def : MnemonicAlias<"smovq", "movsq", "att">; -def : MnemonicAlias<"ud2a", "ud2">; -def : MnemonicAlias<"verrw", "verr">; +def : MnemonicAlias<"ud2a", "ud2", "att">; +def : MnemonicAlias<"verrw", "verr", "att">; // System instruction aliases. -def : MnemonicAlias<"iret", "iretl">; -def : MnemonicAlias<"sysret", "sysretl">; -def : MnemonicAlias<"sysexit", "sysexitl">; +def : MnemonicAlias<"iret", "iretl", "att">; +def : MnemonicAlias<"sysret", "sysretl", "att">; +def : MnemonicAlias<"sysexit", "sysexitl", "att">; -def : MnemonicAlias<"lgdtl", "lgdt">, Requires<[In32BitMode]>; -def : MnemonicAlias<"lgdtq", "lgdt">, Requires<[In64BitMode]>; -def : MnemonicAlias<"lidtl", "lidt">, Requires<[In32BitMode]>; -def : MnemonicAlias<"lidtq", "lidt">, Requires<[In64BitMode]>; -def : MnemonicAlias<"sgdtl", "sgdt">, Requires<[In32BitMode]>; -def : MnemonicAlias<"sgdtq", "sgdt">, Requires<[In64BitMode]>; -def : MnemonicAlias<"sidtl", "sidt">, Requires<[In32BitMode]>; -def : MnemonicAlias<"sidtq", "sidt">, Requires<[In64BitMode]>; +def : MnemonicAlias<"lgdtl", "lgdt", "att">, Requires<[In32BitMode]>; +def : MnemonicAlias<"lgdtq", "lgdt", "att">, Requires<[In64BitMode]>; +def : MnemonicAlias<"lidtl", "lidt", "att">, Requires<[In32BitMode]>; +def : MnemonicAlias<"lidtq", "lidt", "att">, Requires<[In64BitMode]>; +def : MnemonicAlias<"sgdtl", "sgdt", "att">, Requires<[In32BitMode]>; +def : MnemonicAlias<"sgdtq", "sgdt", "att">, Requires<[In64BitMode]>; +def : MnemonicAlias<"sidtl", "sidt", "att">, Requires<[In32BitMode]>; +def : MnemonicAlias<"sidtq", "sidt", "att">, Requires<[In64BitMode]>; // Floating point stack aliases. -def : MnemonicAlias<"fcmovz", "fcmove">; -def : MnemonicAlias<"fcmova", "fcmovnbe">; -def : MnemonicAlias<"fcmovnae", "fcmovb">; -def : MnemonicAlias<"fcmovna", "fcmovbe">; -def : MnemonicAlias<"fcmovae", "fcmovnb">; -def : MnemonicAlias<"fcomip", "fcompi">; -def : MnemonicAlias<"fildq", "fildll">; -def : MnemonicAlias<"fistpq", "fistpll">; -def : MnemonicAlias<"fisttpq", "fisttpll">; -def : MnemonicAlias<"fldcww", "fldcw">; -def : MnemonicAlias<"fnstcww", "fnstcw">; -def : MnemonicAlias<"fnstsww", "fnstsw">; -def : MnemonicAlias<"fucomip", "fucompi">; -def : MnemonicAlias<"fwait", "wait">; +def : MnemonicAlias<"fcmovz", "fcmove", "att">; +def : MnemonicAlias<"fcmova", "fcmovnbe", "att">; +def : MnemonicAlias<"fcmovnae", "fcmovb", "att">; +def : MnemonicAlias<"fcmovna", "fcmovbe", "att">; +def : MnemonicAlias<"fcmovae", "fcmovnb", "att">; +def : MnemonicAlias<"fcomip", "fcompi", "att">; +def : MnemonicAlias<"fildq", "fildll", "att">; +def : MnemonicAlias<"fistpq", "fistpll", "att">; +def : MnemonicAlias<"fisttpq", "fisttpll", "att">; +def : MnemonicAlias<"fldcww", "fldcw", "att">; +def : MnemonicAlias<"fnstcww", "fnstcw", "att">; +def : MnemonicAlias<"fnstsww", "fnstsw", "att">; +def : MnemonicAlias<"fucomip", "fucompi", "att">; +def : MnemonicAlias<"fwait", "wait", "att">; class CondCodeAlias<string Prefix,string Suffix, string OldCond, string NewCond> diff --git a/contrib/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm/lib/Target/X86/X86InstrSSE.td index 3842387..cce938b 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrSSE.td +++ b/contrib/llvm/lib/Target/X86/X86InstrSSE.td @@ -4462,12 +4462,12 @@ def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), // Move Packed Doubleword Int first element to Doubleword Int // let SchedRW = [WriteMove] in { -def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), - "vmov{d|q}\t{$src, $dst|$dst, $src}", +def VMOVPQIto64rr : VRPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), + "mov{d|q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), (iPTR 0)))], IIC_SSE_MOVD_ToGP>, - TB, OpSize, VEX, VEX_W, Requires<[HasAVX, In64BitMode]>; + VEX; def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", @@ -5094,6 +5094,16 @@ multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr, Sched<[WriteVecALULd]>; } +// Helper fragments to match sext vXi1 to vXiY. +def v16i1sextv16i8 : PatLeaf<(v16i8 (X86pcmpgt (bc_v16i8 (v4i32 immAllZerosV)), + VR128:$src))>; +def v8i1sextv8i16 : PatLeaf<(v8i16 (X86vsrai VR128:$src, (i32 15)))>; +def v4i1sextv4i32 : PatLeaf<(v4i32 (X86vsrai VR128:$src, (i32 31)))>; +def v32i1sextv32i8 : PatLeaf<(v32i8 (X86pcmpgt (bc_v32i8 (v8i32 immAllZerosV)), + VR256:$src))>; +def v16i1sextv16i16: PatLeaf<(v16i16 (X86vsrai VR256:$src, (i32 15)))>; +def v8i1sextv8i32 : PatLeaf<(v8i32 (X86vsrai VR256:$src, (i32 31)))>; + let Predicates = [HasAVX] in { defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", int_x86_ssse3_pabs_b_128>, VEX; @@ -5101,6 +5111,19 @@ let Predicates = [HasAVX] in { int_x86_ssse3_pabs_w_128>, VEX; defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", int_x86_ssse3_pabs_d_128>, VEX; + + def : Pat<(xor + (bc_v2i64 (v16i1sextv16i8)), + (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))), + (VPABSBrr128 VR128:$src)>; + def : Pat<(xor + (bc_v2i64 (v8i1sextv8i16)), + (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))), + (VPABSWrr128 VR128:$src)>; + def : Pat<(xor + (bc_v2i64 (v4i1sextv4i32)), + (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))), + (VPABSDrr128 VR128:$src)>; } let Predicates = [HasAVX2] in { @@ -5110,6 +5133,19 @@ let Predicates = [HasAVX2] in { int_x86_avx2_pabs_w>, VEX, VEX_L; defm VPABSD : SS3I_unop_rm_int_y<0x1E, "vpabsd", int_x86_avx2_pabs_d>, VEX, VEX_L; + + def : Pat<(xor + (bc_v4i64 (v32i1sextv32i8)), + (bc_v4i64 (add (v32i8 VR256:$src), (v32i1sextv32i8)))), + (VPABSBrr256 VR256:$src)>; + def : Pat<(xor + (bc_v4i64 (v16i1sextv16i16)), + (bc_v4i64 (add (v16i16 VR256:$src), (v16i1sextv16i16)))), + (VPABSWrr256 VR256:$src)>; + def : Pat<(xor + (bc_v4i64 (v8i1sextv8i32)), + (bc_v4i64 (add (v8i32 VR256:$src), (v8i1sextv8i32)))), + (VPABSDrr256 VR256:$src)>; } defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", @@ -5119,6 +5155,21 @@ defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", int_x86_ssse3_pabs_d_128>; +let Predicates = [HasSSSE3] in { + def : Pat<(xor + (bc_v2i64 (v16i1sextv16i8)), + (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))), + (PABSBrr128 VR128:$src)>; + def : Pat<(xor + (bc_v2i64 (v8i1sextv8i16)), + (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))), + (PABSWrr128 VR128:$src)>; + def : Pat<(xor + (bc_v2i64 (v4i1sextv4i32)), + (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))), + (PABSDrr128 VR128:$src)>; +} + //===---------------------------------------------------------------------===// // SSSE3 - Packed Binary Operator Instructions //===---------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td b/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td index 5b6298b..89c1a68 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td +++ b/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td @@ -34,7 +34,7 @@ def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1), def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), "shl{b}\t{$src2, $dst|$dst, $src2}", [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))], IIC_SR>; - + let isConvertibleToThreeAddress = 1 in { // Can transform into LEA. def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), "shl{w}\t{$src2, $dst|$dst, $src2}", @@ -43,7 +43,7 @@ def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), def SHL32ri : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), "shl{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))], IIC_SR>; -def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst), +def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), "shl{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))], diff --git a/contrib/llvm/lib/Target/X86/X86InstrSystem.td b/contrib/llvm/lib/Target/X86/X86InstrSystem.td index 053417c..bab3cdd 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrSystem.td +++ b/contrib/llvm/lib/Target/X86/X86InstrSystem.td @@ -449,15 +449,15 @@ let Uses = [RDX, RAX] in { def XSAVE : I<0xAE, MRM4m, (outs opaque512mem:$dst), (ins), "xsave\t$dst", []>, TB; def XSAVE64 : I<0xAE, MRM4m, (outs opaque512mem:$dst), (ins), - "xsaveq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>; + "xsave{q|64}\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>; def XRSTOR : I<0xAE, MRM5m, (outs), (ins opaque512mem:$dst), "xrstor\t$dst", []>, TB; def XRSTOR64 : I<0xAE, MRM5m, (outs), (ins opaque512mem:$dst), - "xrstorq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>; + "xrstor{q|64}\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>; def XSAVEOPT : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins), "xsaveopt\t$dst", []>, TB; def XSAVEOPT64 : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins), - "xsaveoptq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>; + "xsaveopt{q|64}\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>; } } // SchedRW @@ -515,8 +515,15 @@ let Predicates = [HasFSGSBase, In64BitMode] in { //===----------------------------------------------------------------------===// // INVPCID Instruction def INVPCID32 : I<0x82, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), - "invpcid {$src2, $src1|$src1, $src2}", []>, OpSize, T8, + "invpcid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8, Requires<[In32BitMode]>; def INVPCID64 : I<0x82, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), - "invpcid {$src2, $src1|$src1, $src2}", []>, OpSize, T8, + "invpcid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8, Requires<[In64BitMode]>; + +//===----------------------------------------------------------------------===// +// SMAP Instruction +let Defs = [EFLAGS], Uses = [EFLAGS] in { + def CLAC : I<0x01, MRM_CA, (outs), (ins), "clac", []>, TB; + def STAC : I<0x01, MRM_CB, (outs), (ins), "stac", []>, TB; +} diff --git a/contrib/llvm/lib/Target/X86/X86SchedHaswell.td b/contrib/llvm/lib/Target/X86/X86SchedHaswell.td index 7de6791..84c9203 100644 --- a/contrib/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/contrib/llvm/lib/Target/X86/X86SchedHaswell.td @@ -18,7 +18,7 @@ def HaswellModel : SchedMachineModel { let IssueWidth = 4; let MinLatency = 0; // 0 = Out-of-order execution. let LoadLatency = 4; - let ILPWindow = 40; + let ILPWindow = 30; let MispredictPenalty = 16; } diff --git a/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td b/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td index 74d5f1b..b36b3ad 100644 --- a/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -19,7 +19,7 @@ def SandyBridgeModel : SchedMachineModel { let IssueWidth = 4; let MinLatency = 0; // 0 = Out-of-order execution. let LoadLatency = 4; - let ILPWindow = 30; + let ILPWindow = 20; let MispredictPenalty = 16; } diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp index 14619b6..74da2a9 100644 --- a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp @@ -170,6 +170,26 @@ bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const { return isTargetELF() || TM.getRelocationModel() == Reloc::Static; } +static bool OSHasAVXSupport() { +#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)\ + || defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64) +#if defined(__GNUC__) + // Check xgetbv; this uses a .byte sequence instead of the instruction + // directly because older assemblers do not include support for xgetbv and + // there is no easy way to conditionally compile based on the assembler used. + int rEAX, rEDX; + __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a" (rEAX), "=d" (rEDX) : "c" (0)); +#elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) + unsigned long long rEAX = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); +#else + int rEAX = 0; // Ensures we return false +#endif + return (rEAX & 6) == 6; +#else + return false; +#endif +} + void X86Subtarget::AutoDetectSubtargetFeatures() { unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; unsigned MaxLevel; @@ -192,7 +212,9 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { if ((ECX >> 9) & 1) { X86SSELevel = SSSE3; ToggleFeature(X86::FeatureSSSE3);} if ((ECX >> 19) & 1) { X86SSELevel = SSE41; ToggleFeature(X86::FeatureSSE41);} if ((ECX >> 20) & 1) { X86SSELevel = SSE42; ToggleFeature(X86::FeatureSSE42);} - if ((ECX >> 28) & 1) { X86SSELevel = AVX; ToggleFeature(X86::FeatureAVX); } + if (((ECX >> 27) & 1) && ((ECX >> 28) & 1) && OSHasAVXSupport()) { + X86SSELevel = AVX; ToggleFeature(X86::FeatureAVX); + } bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0; bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0; @@ -467,6 +489,7 @@ void X86Subtarget::initializeEnvironment() { PostRAScheduler = false; PadShortFunctions = false; CallRegIndirect = false; + LEAUsesAG = false; stackAlignment = 4; // FIXME: this is a known good value for Yonah. How about others? MaxInlineSizeThreshold = 128; diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.h b/contrib/llvm/lib/Target/X86/X86Subtarget.h index 6fbdb1d..66832b9 100644 --- a/contrib/llvm/lib/Target/X86/X86Subtarget.h +++ b/contrib/llvm/lib/Target/X86/X86Subtarget.h @@ -165,6 +165,9 @@ protected: /// CallRegIndirect - True if the Calls with memory reference should be converted /// to a register-based indirect call. bool CallRegIndirect; + /// LEAUsesAG - True if the LEA instruction inputs have to be ready at + /// address generation (AG) time. + bool LEAUsesAG; /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. @@ -278,6 +281,7 @@ public: bool hasSlowDivide() const { return HasSlowDivide; } bool padShortFunctions() const { return PadShortFunctions; } bool callRegIndirect() const { return CallRegIndirect; } + bool LEAusesAG() const { return LEAUsesAG; } bool isAtom() const { return X86ProcFamily == IntelAtom; } diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp index 8aa58a2..00fa47f 100644 --- a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -215,6 +215,11 @@ bool X86PassConfig::addPreEmitPass() { addPass(createX86PadShortFunctions()); ShouldPrint = true; } + if (getOptLevel() != CodeGenOpt::None && + getX86Subtarget().LEAusesAG()){ + addPass(createX86FixupLEAs()); + ShouldPrint = true; + } return ShouldPrint; } diff --git a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index a98c699..eba9d78 100644 --- a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -334,9 +334,44 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); + std::pair<unsigned, MVT> LTSrc = TLI->getTypeLegalizationCost(Src); + std::pair<unsigned, MVT> LTDest = TLI->getTypeLegalizationCost(Dst); + + static const TypeConversionCostTblEntry<MVT> SSE2ConvTbl[] = { + // These are somewhat magic numbers justified by looking at the output of + // Intel's IACA, running some kernels and making sure when we take + // legalization into account the throughput will be overestimated. + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 }, + // There are faster sequences for float conversions. + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 }, + }; + + if (ST->hasSSE2() && !ST->hasAVX()) { + int Idx = ConvertCostTableLookup<MVT>(SSE2ConvTbl, + array_lengthof(SSE2ConvTbl), + ISD, LTDest.second, LTSrc.second); + if (Idx != -1) + return LTSrc.first * SSE2ConvTbl[Idx].Cost; + } + EVT SrcTy = TLI->getValueType(Src); EVT DstTy = TLI->getValueType(Dst); + // The function getSimpleVT only handles simple value types. if (!SrcTy.isSimple() || !DstTy.isSimple()) return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); diff --git a/contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp index 7b99967..a2ae40c 100644 --- a/contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp +++ b/contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp @@ -97,8 +97,8 @@ static DecodeStatus DecodeRRegsRegisterClass(MCInst &Inst, static DecodeStatus DecodeBitpOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeMEMiiOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); +static DecodeStatus DecodeNegImmOperand(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); static DecodeStatus Decode2RInstruction(MCInst &Inst, unsigned Insn, @@ -242,10 +242,9 @@ static DecodeStatus DecodeBitpOperand(MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeMEMiiOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm(Val)); - Inst.addOperand(MCOperand::CreateImm(0)); +static DecodeStatus DecodeNegImmOperand(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + Inst.addOperand(MCOperand::CreateImm(-(int64_t)Val)); return MCDisassembler::Success; } diff --git a/contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp b/contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp index 1592351..9ae8c0d 100644 --- a/contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp +++ b/contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp @@ -84,14 +84,3 @@ printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { assert(Op.isExpr() && "unknown operand kind in printOperand"); printExpr(Op.getExpr(), O); } - -void XCoreInstPrinter:: -printMemOperand(const MCInst *MI, int opNum, raw_ostream &O) { - printOperand(MI, opNum, O); - - if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0) - return; - - O << "+"; - printOperand(MI, opNum+1, O); -} diff --git a/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp index b5b072d..c177365 100644 --- a/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp @@ -66,6 +66,9 @@ static MCCodeGenInfo *createXCoreMCCodeGenInfo(StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); + if (RM == Reloc::Default) { + RM = Reloc::Static; + } X->InitMCCodeGenInfo(RM, CM, OL); return X; } diff --git a/contrib/llvm/lib/Target/XCore/XCore.h b/contrib/llvm/lib/Target/XCore/XCore.h index 08f091e..2f375fc 100644 --- a/contrib/llvm/lib/Target/XCore/XCore.h +++ b/contrib/llvm/lib/Target/XCore/XCore.h @@ -20,12 +20,16 @@ namespace llvm { class FunctionPass; + class ModulePass; class TargetMachine; class XCoreTargetMachine; class formatted_raw_ostream; + void initializeXCoreLowerThreadLocalPass(PassRegistry &p); + FunctionPass *createXCoreISelDag(XCoreTargetMachine &TM, CodeGenOpt::Level OptLevel); + ModulePass *createXCoreLowerThreadLocalPass(); } // end namespace llvm; diff --git a/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp b/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp index 0d146ba..e177ad3 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp @@ -36,7 +36,6 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" @@ -46,12 +45,6 @@ #include <cctype> using namespace llvm; -static cl::opt<unsigned> MaxThreads("xcore-max-threads", cl::Optional, - cl::desc("Maximum number of threads (for emulation thread-local storage)"), - cl::Hidden, - cl::value_desc("number"), - cl::init(8)); - namespace { class XCoreAsmPrinter : public AsmPrinter { const XCoreSubtarget &Subtarget; @@ -152,10 +145,10 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { EmitAlignment(Align > 2 ? Align : 2, GV); - unsigned Size = TD->getTypeAllocSize(C->getType()); if (GV->isThreadLocal()) { - Size *= MaxThreads; + report_fatal_error("TLS is not supported by this target!"); } + unsigned Size = TD->getTypeAllocSize(C->getType()); if (MAI->hasDotTypeDotSizeDirective()) { OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject); OutStreamer.EmitRawText("\t.size " + Twine(GVSym->getName()) + "," + @@ -164,10 +157,6 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { OutStreamer.EmitLabel(GVSym); EmitGlobalConstant(C); - if (GV->isThreadLocal()) { - for (unsigned i = 1; i < MaxThreads; ++i) - EmitGlobalConstant(C); - } // The ABI requires that unsigned scalar types smaller than 32 bits // are padded to 32 bits. if (Size < 4) diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp index fbf86c5..eb29b50 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp @@ -68,8 +68,6 @@ namespace { // Complex Pattern Selectors. bool SelectADDRspii(SDValue Addr, SDValue &Base, SDValue &Offset); - bool SelectADDRdpii(SDValue Addr, SDValue &Base, SDValue &Offset); - bool SelectADDRcpii(SDValue Addr, SDValue &Base, SDValue &Offset); virtual const char *getPassName() const { return "XCore DAG->DAG Pattern Instruction Selection"; @@ -110,48 +108,6 @@ bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Addr, SDValue &Base, return false; } -bool XCoreDAGToDAGISel::SelectADDRdpii(SDValue Addr, SDValue &Base, - SDValue &Offset) { - if (Addr.getOpcode() == XCoreISD::DPRelativeWrapper) { - Base = Addr.getOperand(0); - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return true; - } - if (Addr.getOpcode() == ISD::ADD) { - ConstantSDNode *CN = 0; - if ((Addr.getOperand(0).getOpcode() == XCoreISD::DPRelativeWrapper) - && (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) - && (CN->getSExtValue() % 4 == 0 && CN->getSExtValue() >= 0)) { - // Constant word offset from a object in the data region - Base = Addr.getOperand(0).getOperand(0); - Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32); - return true; - } - } - return false; -} - -bool XCoreDAGToDAGISel::SelectADDRcpii(SDValue Addr, SDValue &Base, - SDValue &Offset) { - if (Addr.getOpcode() == XCoreISD::CPRelativeWrapper) { - Base = Addr.getOperand(0); - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return true; - } - if (Addr.getOpcode() == ISD::ADD) { - ConstantSDNode *CN = 0; - if ((Addr.getOperand(0).getOpcode() == XCoreISD::CPRelativeWrapper) - && (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) - && (CN->getSExtValue() % 4 == 0 && CN->getSExtValue() >= 0)) { - // Constant word offset from a object in the data region - Base = Addr.getOperand(0).getOperand(0); - Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32); - return true; - } - } - return false; -} - SDNode *XCoreDAGToDAGISel::Select(SDNode *N) { DebugLoc dl = N->getDebugLoc(); switch (N->getOpcode()) { @@ -185,36 +141,36 @@ SDNode *XCoreDAGToDAGISel::Select(SDNode *N) { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; return CurDAG->getMachineNode(XCore::LADD_l5r, dl, MVT::i32, MVT::i32, - Ops, 3); + Ops); } case XCoreISD::LSUB: { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; return CurDAG->getMachineNode(XCore::LSUB_l5r, dl, MVT::i32, MVT::i32, - Ops, 3); + Ops); } case XCoreISD::MACCU: { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3) }; return CurDAG->getMachineNode(XCore::MACCU_l4r, dl, MVT::i32, MVT::i32, - Ops, 4); + Ops); } case XCoreISD::MACCS: { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3) }; return CurDAG->getMachineNode(XCore::MACCS_l4r, dl, MVT::i32, MVT::i32, - Ops, 4); + Ops); } case XCoreISD::LMUL: { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3) }; return CurDAG->getMachineNode(XCore::LMUL_l6r, dl, MVT::i32, MVT::i32, - Ops, 4); + Ops); } case XCoreISD::CRC8: { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; return CurDAG->getMachineNode(XCore::CRC8_l4r, dl, MVT::i32, MVT::i32, - Ops, 3); + Ops); } case ISD::BRIND: if (SDNode *ResNode = SelectBRIND(N)) diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp index a5d2be8..2d27f1a 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp +++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp @@ -36,6 +36,8 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include <algorithm> + using namespace llvm; const char *XCoreTargetLowering:: @@ -120,9 +122,6 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::BlockAddress, MVT::i32 , Custom); - // Thread Local Storage - setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); - // Conversion of i64 -> double produces constantpool nodes setOperationAction(ISD::ConstantPool, MVT::i32, Custom); @@ -172,7 +171,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); - case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::BR_JT: return LowerBR_JT(Op, DAG); @@ -245,9 +243,20 @@ getGlobalAddressWrapper(SDValue GA, const GlobalValue *GV, SDValue XCoreTargetLowering:: LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { - const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); - SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(), MVT::i32); - return getGlobalAddressWrapper(GA, GV, DAG); + DebugLoc DL = Op.getDebugLoc(); + const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op); + const GlobalValue *GV = GN->getGlobal(); + int64_t Offset = GN->getOffset(); + // We can only fold positive offsets that are a multiple of the word size. + int64_t FoldedOffset = std::max(Offset & ~3, (int64_t)0); + SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, FoldedOffset); + GA = getGlobalAddressWrapper(GA, GV, DAG); + // Handle the rest of the offset. + if (Offset != FoldedOffset) { + SDValue Remaining = DAG.getConstant(Offset - FoldedOffset, MVT::i32); + GA = DAG.getNode(ISD::ADD, DL, MVT::i32, GA, Remaining); + } + return GA; } static inline SDValue BuildGetId(SelectionDAG &DAG, DebugLoc dl) { @@ -255,44 +264,6 @@ static inline SDValue BuildGetId(SelectionDAG &DAG, DebugLoc dl) { DAG.getConstant(Intrinsic::xcore_getid, MVT::i32)); } -static inline bool isZeroLengthArray(Type *Ty) { - ArrayType *AT = dyn_cast_or_null<ArrayType>(Ty); - return AT && (AT->getNumElements() == 0); -} - -SDValue XCoreTargetLowering:: -LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const -{ - // FIXME there isn't really debug info here - DebugLoc dl = Op.getDebugLoc(); - // transform to label + getid() * size - const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); - SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32); - const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV); - if (!GVar) { - // If GV is an alias then use the aliasee to determine size - if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) - GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal()); - } - if (!GVar) { - llvm_unreachable("Thread local object not a GlobalVariable?"); - } - Type *Ty = cast<PointerType>(GV->getType())->getElementType(); - if (!Ty->isSized() || isZeroLengthArray(Ty)) { -#ifndef NDEBUG - errs() << "Size of thread local object " << GVar->getName() - << " is unknown\n"; -#endif - llvm_unreachable(0); - } - SDValue base = getGlobalAddressWrapper(GA, GV, DAG); - const DataLayout *TD = TM.getDataLayout(); - unsigned Size = TD->getTypeAllocSize(Ty); - SDValue offset = DAG.getNode(ISD::MUL, dl, MVT::i32, BuildGetId(DAG, dl), - DAG.getConstant(Size, MVT::i32)); - return DAG.getNode(ISD::ADD, dl, MVT::i32, base, offset); -} - SDValue XCoreTargetLowering:: LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { @@ -350,55 +321,58 @@ LowerBR_JT(SDValue Op, SelectionDAG &DAG) const ScaledIndex); } -static bool -IsWordAlignedBasePlusConstantOffset(SDValue Addr, SDValue &AlignedBase, - int64_t &Offset) +SDValue XCoreTargetLowering:: +lowerLoadWordFromAlignedBasePlusOffset(DebugLoc DL, SDValue Chain, SDValue Base, + int64_t Offset, SelectionDAG &DAG) const { - if (Addr.getOpcode() != ISD::ADD) { - return false; + if ((Offset & 0x3) == 0) { + return DAG.getLoad(getPointerTy(), DL, Chain, Base, MachinePointerInfo(), + false, false, false, 0); } - ConstantSDNode *CN = 0; - if (!(CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { - return false; - } - int64_t off = CN->getSExtValue(); - const SDValue &Base = Addr.getOperand(0); - const SDValue *Root = &Base; - if (Base.getOpcode() == ISD::ADD && - Base.getOperand(1).getOpcode() == ISD::SHL) { - ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Base.getOperand(1) - .getOperand(1)); - if (CN && (CN->getSExtValue() >= 2)) { - Root = &Base.getOperand(0); - } - } - if (isa<FrameIndexSDNode>(*Root)) { - // All frame indicies are word aligned - AlignedBase = Base; - Offset = off; - return true; - } - if (Root->getOpcode() == XCoreISD::DPRelativeWrapper || - Root->getOpcode() == XCoreISD::CPRelativeWrapper) { - // All dp / cp relative addresses are word aligned - AlignedBase = Base; - Offset = off; - return true; - } - // Check for an aligned global variable. - if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(*Root)) { - const GlobalValue *GV = GA->getGlobal(); - if (GA->getOffset() == 0 && GV->getAlignment() >= 4) { - AlignedBase = Base; - Offset = off; - return true; - } + // Lower to pair of consecutive word aligned loads plus some bit shifting. + int32_t HighOffset = RoundUpToAlignment(Offset, 4); + int32_t LowOffset = HighOffset - 4; + SDValue LowAddr, HighAddr; + if (GlobalAddressSDNode *GASD = + dyn_cast<GlobalAddressSDNode>(Base.getNode())) { + LowAddr = DAG.getGlobalAddress(GASD->getGlobal(), DL, Base.getValueType(), + LowOffset); + HighAddr = DAG.getGlobalAddress(GASD->getGlobal(), DL, Base.getValueType(), + HighOffset); + } else { + LowAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, + DAG.getConstant(LowOffset, MVT::i32)); + HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, + DAG.getConstant(HighOffset, MVT::i32)); } - return false; + SDValue LowShift = DAG.getConstant((Offset - LowOffset) * 8, MVT::i32); + SDValue HighShift = DAG.getConstant((HighOffset - Offset) * 8, MVT::i32); + + SDValue Low = DAG.getLoad(getPointerTy(), DL, Chain, + LowAddr, MachinePointerInfo(), + false, false, false, 0); + SDValue High = DAG.getLoad(getPointerTy(), DL, Chain, + HighAddr, MachinePointerInfo(), + false, false, false, 0); + SDValue LowShifted = DAG.getNode(ISD::SRL, DL, MVT::i32, Low, LowShift); + SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High, HighShift); + SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, LowShifted, HighShifted); + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Low.getValue(1), + High.getValue(1)); + SDValue Ops[] = { Result, Chain }; + return DAG.getMergeValues(Ops, 2, DL); +} + +static bool isWordAligned(SDValue Value, SelectionDAG &DAG) +{ + APInt KnownZero, KnownOne; + DAG.ComputeMaskedBits(Value, KnownZero, KnownOne); + return KnownZero.countTrailingOnes() >= 2; } SDValue XCoreTargetLowering:: LowerLOAD(SDValue Op, SelectionDAG &DAG) const { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); LoadSDNode *LD = cast<LoadSDNode>(Op); assert(LD->getExtensionType() == ISD::NON_EXTLOAD && "Unexpected extension type"); @@ -416,45 +390,23 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDValue BasePtr = LD->getBasePtr(); DebugLoc DL = Op.getDebugLoc(); - SDValue Base; - int64_t Offset; - if (!LD->isVolatile() && - IsWordAlignedBasePlusConstantOffset(BasePtr, Base, Offset)) { - if (Offset % 4 == 0) { - // We've managed to infer better alignment information than the load - // already has. Use an aligned load. - // - return DAG.getLoad(getPointerTy(), DL, Chain, BasePtr, - MachinePointerInfo(), - false, false, false, 0); + if (!LD->isVolatile()) { + const GlobalValue *GV; + int64_t Offset = 0; + if (DAG.isBaseWithConstantOffset(BasePtr) && + isWordAligned(BasePtr->getOperand(0), DAG)) { + SDValue NewBasePtr = BasePtr->getOperand(0); + Offset = cast<ConstantSDNode>(BasePtr->getOperand(1))->getSExtValue(); + return lowerLoadWordFromAlignedBasePlusOffset(DL, Chain, NewBasePtr, + Offset, DAG); + } + if (TLI.isGAPlusOffset(BasePtr.getNode(), GV, Offset) && + MinAlign(GV->getAlignment(), 4) == 4) { + SDValue NewBasePtr = DAG.getGlobalAddress(GV, DL, + BasePtr->getValueType(0)); + return lowerLoadWordFromAlignedBasePlusOffset(DL, Chain, NewBasePtr, + Offset, DAG); } - // Lower to - // ldw low, base[offset >> 2] - // ldw high, base[(offset >> 2) + 1] - // shr low_shifted, low, (offset & 0x3) * 8 - // shl high_shifted, high, 32 - (offset & 0x3) * 8 - // or result, low_shifted, high_shifted - SDValue LowOffset = DAG.getConstant(Offset & ~0x3, MVT::i32); - SDValue HighOffset = DAG.getConstant((Offset & ~0x3) + 4, MVT::i32); - SDValue LowShift = DAG.getConstant((Offset & 0x3) * 8, MVT::i32); - SDValue HighShift = DAG.getConstant(32 - (Offset & 0x3) * 8, MVT::i32); - - SDValue LowAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, LowOffset); - SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, HighOffset); - - SDValue Low = DAG.getLoad(getPointerTy(), DL, Chain, - LowAddr, MachinePointerInfo(), - false, false, false, 0); - SDValue High = DAG.getLoad(getPointerTy(), DL, Chain, - HighAddr, MachinePointerInfo(), - false, false, false, 0); - SDValue LowShifted = DAG.getNode(ISD::SRL, DL, MVT::i32, Low, LowShift); - SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High, HighShift); - SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, LowShifted, HighShifted); - Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Low.getValue(1), - High.getValue(1)); - SDValue Ops[] = { Result, Chain }; - return DAG.getMergeValues(Ops, 2, DL); } if (LD->getAlignment() == 2) { diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h index 8d258f5..c7dfa26 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h +++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h @@ -133,6 +133,9 @@ namespace llvm { SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const; SDValue getGlobalAddressWrapper(SDValue GA, const GlobalValue *GV, SelectionDAG &DAG) const; + SDValue lowerLoadWordFromAlignedBasePlusOffset(DebugLoc DL, SDValue Chain, + SDValue Base, int64_t Offset, + SelectionDAG &DAG) const; // Lower Operand specifics SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td index 03653cb..587166c 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td +++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td @@ -168,21 +168,20 @@ def ldawb : PatFrag<(ops node:$addr, node:$offset), (sub node:$addr, (shl node:$offset, 2))>; // Instruction operand types -def calltarget : Operand<i32>; +def pcrel_imm : Operand<i32>; +def pcrel_imm_neg : Operand<i32> { + let DecoderMethod = "DecodeNegImmOperand"; +} def brtarget : Operand<OtherVT>; -def pclabel : Operand<i32>; +def brtarget_neg : Operand<OtherVT> { + let DecoderMethod = "DecodeNegImmOperand"; +} // Addressing modes def ADDRspii : ComplexPattern<i32, 2, "SelectADDRspii", [add, frameindex], []>; -def ADDRdpii : ComplexPattern<i32, 2, "SelectADDRdpii", [add, dprelwrapper], - []>; -def ADDRcpii : ComplexPattern<i32, 2, "SelectADDRcpii", [add, cprelwrapper], - []>; // Address operands def MEMii : Operand<i32> { - let PrintMethod = "printMemOperand"; - let DecoderMethod = "DecodeMEMiiOperand"; let MIOperandInfo = (ops i32imm, i32imm); } @@ -274,10 +273,10 @@ multiclass FRU6_LRU6_branch<bits<6> opc, string OpcStr> { } multiclass FRU6_LRU6_backwards_branch<bits<6> opc, string OpcStr> { - def _ru6: _FRU6<opc, (outs), (ins GRRegs:$a, brtarget:$b), - !strconcat(OpcStr, " $a, -$b"), []>; - def _lru6: _FLRU6<opc, (outs), (ins GRRegs:$a, brtarget:$b), - !strconcat(OpcStr, " $a, -$b"), []>; + def _ru6: _FRU6<opc, (outs), (ins GRRegs:$a, brtarget_neg:$b), + !strconcat(OpcStr, " $a, $b"), []>; + def _lru6: _FLRU6<opc, (outs), (ins GRRegs:$a, brtarget_neg:$b), + !strconcat(OpcStr, " $a, $b"), []>; } multiclass FRU6_LRU6_cp<bits<6> opc, string OpcStr> { @@ -515,29 +514,29 @@ def LMUL_l6r : _FL6R< //let Uses = [DP] in ... let neverHasSideEffects = 1, isReMaterializable = 1 in -def LDAWDP_ru6: _FRU6<0b011000, (outs RRegs:$a), (ins MEMii:$b), +def LDAWDP_ru6: _FRU6<0b011000, (outs RRegs:$a), (ins i32imm:$b), "ldaw $a, dp[$b]", []>; let isReMaterializable = 1 in -def LDAWDP_lru6: _FLRU6<0b011000, (outs RRegs:$a), (ins MEMii:$b), +def LDAWDP_lru6: _FLRU6<0b011000, (outs RRegs:$a), (ins i32imm:$b), "ldaw $a, dp[$b]", - [(set RRegs:$a, ADDRdpii:$b)]>; + [(set RRegs:$a, (dprelwrapper tglobaladdr:$b))]>; let mayLoad=1 in -def LDWDP_ru6: _FRU6<0b010110, (outs RRegs:$a), (ins MEMii:$b), +def LDWDP_ru6: _FRU6<0b010110, (outs RRegs:$a), (ins i32imm:$b), "ldw $a, dp[$b]", []>; -def LDWDP_lru6: _FLRU6<0b010110, (outs RRegs:$a), (ins MEMii:$b), +def LDWDP_lru6: _FLRU6<0b010110, (outs RRegs:$a), (ins i32imm:$b), "ldw $a, dp[$b]", - [(set RRegs:$a, (load ADDRdpii:$b))]>; + [(set RRegs:$a, (load (dprelwrapper tglobaladdr:$b)))]>; let mayStore=1 in -def STWDP_ru6 : _FRU6<0b010100, (outs), (ins RRegs:$a, MEMii:$b), +def STWDP_ru6 : _FRU6<0b010100, (outs), (ins RRegs:$a, i32imm:$b), "stw $a, dp[$b]", []>; -def STWDP_lru6 : _FLRU6<0b010100, (outs), (ins RRegs:$a, MEMii:$b), +def STWDP_lru6 : _FLRU6<0b010100, (outs), (ins RRegs:$a, i32imm:$b), "stw $a, dp[$b]", - [(store RRegs:$a, ADDRdpii:$b)]>; + [(store RRegs:$a, (dprelwrapper tglobaladdr:$b))]>; //let Uses = [CP] in .. let mayLoad = 1, isReMaterializable = 1, neverHasSideEffects = 1 in @@ -615,9 +614,9 @@ let Uses = [R11], isCall=1 in defm BLAT : FU6_LU6_np<0b0111001101, "blat">; let isBranch = 1, isTerminator = 1, isBarrier = 1 in { -def BRBU_u6 : _FU6<0b0111011100, (outs), (ins brtarget:$a), "bu -$a", []>; +def BRBU_u6 : _FU6<0b0111011100, (outs), (ins brtarget_neg:$a), "bu $a", []>; -def BRBU_lu6 : _FLU6<0b0111011100, (outs), (ins brtarget:$a), "bu -$a", []>; +def BRBU_lu6 : _FLU6<0b0111011100, (outs), (ins brtarget_neg:$a), "bu $a", []>; def BRFU_u6 : _FU6<0b0111001100, (outs), (ins brtarget:$a), "bu $a", []>; @@ -626,12 +625,12 @@ def BRFU_lu6 : _FLU6<0b0111001100, (outs), (ins brtarget:$a), "bu $a", []>; //let Uses = [CP] in ... let Defs = [R11], neverHasSideEffects = 1, isReMaterializable = 1 in -def LDAWCP_u6: _FU6<0b0111111101, (outs), (ins MEMii:$a), "ldaw r11, cp[$a]", +def LDAWCP_u6: _FU6<0b0111111101, (outs), (ins i32imm:$a), "ldaw r11, cp[$a]", []>; let Defs = [R11], isReMaterializable = 1 in -def LDAWCP_lu6: _FLU6<0b0111111101, (outs), (ins MEMii:$a), "ldaw r11, cp[$a]", - [(set R11, ADDRcpii:$a)]>; +def LDAWCP_lu6: _FLU6<0b0111111101, (outs), (ins i32imm:$a), "ldaw r11, cp[$a]", + [(set R11, (cprelwrapper tglobaladdr:$a))]>; let Defs = [R11] in defm GETSR : FU6_LU6_np<0b0111111100, "getsr r11,">; @@ -658,16 +657,26 @@ defm KRESTSP : FU6_LU6_np<0b0111101111, "krestsp">; // U10 -let Defs = [R11], isReMaterializable = 1, neverHasSideEffects = 1 in -def LDAPF_u10 : _FU10<0b110110, (outs), (ins i32imm:$a), "ldap r11, $a", []>; +let Defs = [R11], isReMaterializable = 1 in { +let neverHasSideEffects = 1 in +def LDAPF_u10 : _FU10<0b110110, (outs), (ins pcrel_imm:$a), "ldap r11, $a", []>; + +def LDAPF_lu10 : _FLU10<0b110110, (outs), (ins pcrel_imm:$a), "ldap r11, $a", + [(set R11, (pcrelwrapper tglobaladdr:$a))]>; -let Defs = [R11], isReMaterializable = 1 in -def LDAPF_lu10 : _FLU10<0b110110, (outs), (ins i32imm:$a), "ldap r11, $a", +let neverHasSideEffects = 1 in +def LDAPB_u10 : _FU10<0b110111, (outs), (ins pcrel_imm_neg:$a), "ldap r11, $a", + []>; + +let neverHasSideEffects = 1 in +def LDAPB_lu10 : _FLU10<0b110111, (outs), (ins pcrel_imm_neg:$a), + "ldap r11, $a", [(set R11, (pcrelwrapper tglobaladdr:$a))]>; -let Defs = [R11], isReMaterializable = 1, isCodeGenOnly = 1 in -def LDAPF_lu10_ba : _FLU10<0b110110, (outs), (ins i32imm:$a), "ldap r11, $a", +let isCodeGenOnly = 1 in +def LDAPF_lu10_ba : _FLU10<0b110110, (outs), (ins pcrel_imm:$a), "ldap r11, $a", [(set R11, (pcrelwrapper tblockaddress:$a))]>; +} let isCall=1, // All calls clobber the link register and the non-callee-saved registers: @@ -676,11 +685,15 @@ def BLACP_u10 : _FU10<0b111000, (outs), (ins i32imm:$a), "bla cp[$a]", []>; def BLACP_lu10 : _FLU10<0b111000, (outs), (ins i32imm:$a), "bla cp[$a]", []>; -def BLRF_u10 : _FU10<0b110100, (outs), (ins calltarget:$a), "bl $a", +def BLRF_u10 : _FU10<0b110100, (outs), (ins pcrel_imm:$a), "bl $a", [(XCoreBranchLink immU10:$a)]>; -def BLRF_lu10 : _FLU10<0b110100, (outs), (ins calltarget:$a), "bl $a", +def BLRF_lu10 : _FLU10<0b110100, (outs), (ins pcrel_imm:$a), "bl $a", [(XCoreBranchLink immU20:$a)]>; + +def BLRB_u10 : _FU10<0b110101, (outs), (ins pcrel_imm_neg:$a), "bl $a", []>; + +def BLRB_lu10 : _FLU10<0b110101, (outs), (ins pcrel_imm_neg:$a), "bl $a", []>; } let Defs = [R11], mayLoad = 1, isReMaterializable = 1, diff --git a/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp b/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp new file mode 100644 index 0000000..2e328b4 --- /dev/null +++ b/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp @@ -0,0 +1,145 @@ +//===-- XCoreLowerThreadLocal - Lower thread local variables --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file contains a pass that lowers thread local variables on the +/// XCore. +/// +//===----------------------------------------------------------------------===// + +#include "XCore.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" + +#define DEBUG_TYPE "xcore-lower-thread-local" + +using namespace llvm; + +static cl::opt<unsigned> MaxThreads( + "xcore-max-threads", cl::Optional, + cl::desc("Maximum number of threads (for emulation thread-local storage)"), + cl::Hidden, cl::value_desc("number"), cl::init(8)); + +namespace { + /// Lowers thread local variables on the XCore. Each thread local variable is + /// expanded to an array of n elements indexed by the thread ID where n is the + /// fixed number hardware threads supported by the device. + struct XCoreLowerThreadLocal : public ModulePass { + static char ID; + + XCoreLowerThreadLocal() : ModulePass(ID) { + initializeXCoreLowerThreadLocalPass(*PassRegistry::getPassRegistry()); + } + + bool lowerGlobal(GlobalVariable *GV); + + bool runOnModule(Module &M); + }; +} + +char XCoreLowerThreadLocal::ID = 0; + +INITIALIZE_PASS(XCoreLowerThreadLocal, "xcore-lower-thread-local", + "Lower thread local variables", false, false) + +ModulePass *llvm::createXCoreLowerThreadLocalPass() { + return new XCoreLowerThreadLocal(); +} + +static ArrayType *createLoweredType(Type *OriginalType) { + return ArrayType::get(OriginalType, MaxThreads); +} + +static Constant * +createLoweredInitializer(ArrayType *NewType, Constant *OriginalInitializer) { + SmallVector<Constant *, 8> Elements(MaxThreads); + for (unsigned i = 0; i != MaxThreads; ++i) { + Elements[i] = OriginalInitializer; + } + return ConstantArray::get(NewType, Elements); +} + +static bool hasNonInstructionUse(GlobalVariable *GV) { + for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E; + ++UI) + if (!isa<Instruction>(*UI)) + return true; + + return false; +} + +static bool isZeroLengthArray(Type *Ty) { + ArrayType *AT = dyn_cast<ArrayType>(Ty); + return AT && (AT->getNumElements() == 0); +} + +bool XCoreLowerThreadLocal::lowerGlobal(GlobalVariable *GV) { + Module *M = GV->getParent(); + LLVMContext &Ctx = M->getContext(); + if (!GV->isThreadLocal()) + return false; + + // Skip globals that we can't lower and leave it for the backend to error. + if (hasNonInstructionUse(GV) || + !GV->getType()->isSized() || isZeroLengthArray(GV->getType())) + return false; + + // Create replacement global. + ArrayType *NewType = createLoweredType(GV->getType()->getElementType()); + Constant *NewInitializer = createLoweredInitializer(NewType, + GV->getInitializer()); + GlobalVariable *NewGV = + new GlobalVariable(*M, NewType, GV->isConstant(), GV->getLinkage(), + NewInitializer, "", 0, GlobalVariable::NotThreadLocal, + GV->getType()->getAddressSpace(), + GV->isExternallyInitialized()); + + // Update uses. + SmallVector<User *, 16> Users(GV->use_begin(), GV->use_end()); + for (unsigned I = 0, E = Users.size(); I != E; ++I) { + User *U = Users[I]; + Instruction *Inst = cast<Instruction>(U); + IRBuilder<> Builder(Inst); + Function *GetID = Intrinsic::getDeclaration(GV->getParent(), + Intrinsic::xcore_getid); + Value *ThreadID = Builder.CreateCall(GetID); + SmallVector<Value *, 2> Indices; + Indices.push_back(Constant::getNullValue(Type::getInt64Ty(Ctx))); + Indices.push_back(ThreadID); + Value *Addr = Builder.CreateInBoundsGEP(NewGV, Indices); + U->replaceUsesOfWith(GV, Addr); + } + + // Remove old global. + NewGV->takeName(GV); + GV->eraseFromParent(); + return true; +} + +bool XCoreLowerThreadLocal::runOnModule(Module &M) { + // Find thread local globals. + bool MadeChange = false; + SmallVector<GlobalVariable *, 16> ThreadLocalGlobals; + for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); + GVI != E; ++GVI) { + GlobalVariable *GV = GVI; + if (GV->isThreadLocal()) + ThreadLocalGlobals.push_back(GV); + } + for (unsigned I = 0, E = ThreadLocalGlobals.size(); I != E; ++I) { + MadeChange |= lowerGlobal(ThreadLocalGlobals[I]); + } + return MadeChange; +} diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp index 28c3d12..07e5fff 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp @@ -46,6 +46,7 @@ public: return getTM<XCoreTargetMachine>(); } + virtual bool addPreISel(); virtual bool addInstSelector(); }; } // namespace @@ -54,6 +55,11 @@ TargetPassConfig *XCoreTargetMachine::createPassConfig(PassManagerBase &PM) { return new XCorePassConfig(this, PM); } +bool XCorePassConfig::addPreISel() { + addPass(createXCoreLowerThreadLocalPass()); + return false; +} + bool XCorePassConfig::addInstSelector() { addPass(createXCoreISelDag(getXCoreTargetMachine(), getOptLevel())); return false; diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp index 8203899..88e3bfd 100644 --- a/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp +++ b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp @@ -57,9 +57,4 @@ void XCoreTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){ ELF::SHF_ALLOC | ELF::XCORE_SHF_CP_SECTION, SectionKind::getReadOnlyWithRel()); - - // Dynamic linking is not supported. Data with relocations is placed in the - // same section as data without relocations. - DataRelSection = DataRelLocalSection = DataSection; - DataRelROSection = DataRelROLocalSection = ReadOnlySection; } diff --git a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp index 8336d3a..a7bf188 100644 --- a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp +++ b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp @@ -27,6 +27,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" #include "llvm/Pass.h" using namespace llvm; @@ -66,13 +67,13 @@ ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); } static void FindUsedValues(GlobalVariable *LLVMUsed, SmallPtrSet<const GlobalValue*, 8> &UsedValues) { if (LLVMUsed == 0) return; - ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer()); - if (Inits == 0) return; - - for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) - if (GlobalValue *GV = - dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts())) - UsedValues.insert(GV); + ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer()); + + for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) { + Value *Operand = Inits->getOperand(i)->stripPointerCastsNoFollowAliases(); + GlobalValue *GV = cast<GlobalValue>(Operand); + UsedValues.insert(GV); + } } // True if A is better than B. diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp index dc99492..201f320 100644 --- a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp +++ b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp @@ -42,6 +42,7 @@ namespace { private: SmallPtrSet<GlobalValue*, 32> AliveGlobals; + SmallPtrSet<Constant *, 8> SeenConstants; /// GlobalIsNeeded - mark the specific global value as needed, and /// recursively mark anything that it uses as also needed. @@ -151,6 +152,7 @@ bool GlobalDCE::runOnModule(Module &M) { // Make sure that all memory is released AliveGlobals.clear(); + SeenConstants.clear(); return Changed; } @@ -190,12 +192,15 @@ void GlobalDCE::GlobalIsNeeded(GlobalValue *G) { void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) { if (GlobalValue *GV = dyn_cast<GlobalValue>(C)) return GlobalIsNeeded(GV); - + // Loop over all of the operands of the constant, adding any globals they // use to the list of needed globals. - for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I) - if (Constant *OpC = dyn_cast<Constant>(*I)) - MarkUsedGlobalsAsNeeded(OpC); + for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I) { + // If we've already processed this constant there's no need to do it again. + Constant *Op = dyn_cast<Constant>(*I); + if (Op && SeenConstants.insert(Op)) + MarkUsedGlobalsAsNeeded(Op); + } } // RemoveUnusedGlobalValue - Loop over all of the uses of the specified diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp index b035a82..0ef900e 100644 --- a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -3041,6 +3041,105 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) { return true; } +static Value::use_iterator getFirst(Value *V, SmallPtrSet<Use*, 8> &Tried) { + for (Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) { + Use *U = &I.getUse(); + if (Tried.count(U)) + continue; + + User *Usr = *I; + GlobalVariable *GV = dyn_cast<GlobalVariable>(Usr); + if (!GV || !GV->hasName()) { + Tried.insert(U); + return I; + } + + StringRef Name = GV->getName(); + if (Name != "llvm.used" && Name != "llvm.compiler_used") { + Tried.insert(U); + return I; + } + } + return V->use_end(); +} + +static bool replaceAllNonLLVMUsedUsesWith(Constant *Old, Constant *New); + +static bool replaceUsesOfWithOnConstant(ConstantArray *CA, Value *From, + Value *ToV, Use *U) { + Constant *To = cast<Constant>(ToV); + + SmallVector<Constant*, 8> NewOps; + for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) { + Constant *Op = CA->getOperand(i); + NewOps.push_back(Op == From ? To : Op); + } + + Constant *Replacement = ConstantArray::get(CA->getType(), NewOps); + assert(Replacement != CA && "CA didn't contain From!"); + + bool Ret = replaceAllNonLLVMUsedUsesWith(CA, Replacement); + if (Replacement->use_empty()) + Replacement->destroyConstant(); + if (CA->use_empty()) + CA->destroyConstant(); + return Ret; +} + +static bool replaceUsesOfWithOnConstant(ConstantExpr *CE, Value *From, + Value *ToV, Use *U) { + Constant *To = cast<Constant>(ToV); + SmallVector<Constant*, 8> NewOps; + for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i) { + Constant *Op = CE->getOperand(i); + NewOps.push_back(Op == From ? To : Op); + } + + Constant *Replacement = CE->getWithOperands(NewOps); + assert(Replacement != CE && "CE didn't contain From!"); + + bool Ret = replaceAllNonLLVMUsedUsesWith(CE, Replacement); + if (Replacement->use_empty()) + Replacement->destroyConstant(); + if (CE->use_empty()) + CE->destroyConstant(); + return Ret; +} + +static bool replaceUsesOfWithOnConstant(Constant *C, Value *From, Value *To, + Use *U) { + if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) + return replaceUsesOfWithOnConstant(CA, From, To, U); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + return replaceUsesOfWithOnConstant(CE, From, To, U); + C->replaceUsesOfWithOnConstant(From, To, U); + return true; +} + +static bool replaceAllNonLLVMUsedUsesWith(Constant *Old, Constant *New) { + SmallPtrSet<Use*, 8> Tried; + bool Ret = false; + for (;;) { + Value::use_iterator I = getFirst(Old, Tried); + if (I == Old->use_end()) + break; + Use &U = I.getUse(); + + // Must handle Constants specially, we cannot call replaceUsesOfWith on a + // constant because they are uniqued. + if (Constant *C = dyn_cast<Constant>(U.getUser())) { + if (!isa<GlobalValue>(C)) { + Ret |= replaceUsesOfWithOnConstant(C, Old, New, &U); + continue; + } + } + + U.set(New); + Ret = true; + } + return Ret; +} + bool GlobalOpt::OptimizeGlobalAliases(Module &M) { bool Changed = false; @@ -3060,11 +3159,12 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) { bool hasOneUse = Target->hasOneUse() && Aliasee->hasOneUse(); // Make all users of the alias use the aliasee instead. - if (!J->use_empty()) { - J->replaceAllUsesWith(Aliasee); + if (replaceAllNonLLVMUsedUsesWith(J, Aliasee)) { ++NumAliasesResolved; Changed = true; } + if (!J->use_empty()) + continue; // If the alias is externally visible, we may still be able to simplify it. if (!J->hasLocalLinkage()) { diff --git a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp index 892100f..4ce749c 100644 --- a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp +++ b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp @@ -72,6 +72,15 @@ STATISTIC(NumThunksWritten, "Number of thunks generated"); STATISTIC(NumAliasesWritten, "Number of aliases generated"); STATISTIC(NumDoubleWeak, "Number of new functions created"); +/// Returns the type id for a type to be hashed. We turn pointer types into +/// integers here because the actual compare logic below considers pointers and +/// integers of the same size as equal. +static Type::TypeID getTypeIDForHash(Type *Ty) { + if (Ty->isPointerTy()) + return Type::IntegerTyID; + return Ty->getTypeID(); +} + /// Creates a hash-code for the function which is the same for any two /// functions that will compare equal, without looking at the instructions /// inside the function. @@ -83,9 +92,9 @@ static unsigned profileFunction(const Function *F) { ID.AddInteger(F->getCallingConv()); ID.AddBoolean(F->hasGC()); ID.AddBoolean(FTy->isVarArg()); - ID.AddInteger(FTy->getReturnType()->getTypeID()); + ID.AddInteger(getTypeIDForHash(FTy->getReturnType())); for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) - ID.AddInteger(FTy->getParamType(i)->getTypeID()); + ID.AddInteger(getTypeIDForHash(FTy->getParamType(i))); return ID.ComputeHash(); } @@ -200,8 +209,7 @@ private: // Any two pointers in the same address space are equivalent, intptr_t and // pointers are equivalent. Otherwise, standard type equivalence rules apply. -bool FunctionComparator::isEquivalentType(Type *Ty1, - Type *Ty2) const { +bool FunctionComparator::isEquivalentType(Type *Ty1, Type *Ty2) const { if (Ty1 == Ty2) return true; if (Ty1->getTypeID() != Ty2->getTypeID()) { @@ -740,7 +748,13 @@ void MergeFunctions::writeThunk(Function *F, Function *G) { if (NewG->getReturnType()->isVoidTy()) { Builder.CreateRetVoid(); } else { - Builder.CreateRet(Builder.CreateBitCast(CI, NewG->getReturnType())); + Type *RetTy = NewG->getReturnType(); + if (CI->getType()->isIntegerTy() && RetTy->isPointerTy()) + Builder.CreateRet(Builder.CreateIntToPtr(CI, RetTy)); + else if (CI->getType()->isPointerTy() && RetTy->isIntegerTy()) + Builder.CreateRet(Builder.CreatePtrToInt(CI, RetTy)); + else + Builder.CreateRet(Builder.CreateBitCast(CI, RetTy)); } NewG->copyAttributesFrom(G); diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 027a9f2..986c0b8 100644 --- a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -33,7 +33,12 @@ RunLoopVectorization("vectorize-loops", cl::desc("Run the Loop vectorization passes")); static cl::opt<bool> -RunBBVectorization("vectorize", cl::desc("Run the BB vectorization passes")); +RunSLPVectorization("vectorize-slp", + cl::desc("Run the SLP vectorization passes")); + +static cl::opt<bool> +RunBBVectorization("vectorize-slp-aggressive", + cl::desc("Run the BB vectorization passes")); static cl::opt<bool> UseGVNAfterVectorization("use-gvn-after-vectorization", @@ -52,7 +57,8 @@ PassManagerBuilder::PassManagerBuilder() { DisableSimplifyLibCalls = false; DisableUnitAtATime = false; DisableUnrollLoops = false; - Vectorize = RunBBVectorization; + BBVectorize = RunBBVectorization; + SLPVectorize = RunSLPVectorization; LoopVectorize = RunLoopVectorization; } @@ -207,7 +213,10 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { addExtensionsToPM(EP_ScalarOptimizerLate, MPM); - if (Vectorize) { + if (SLPVectorize) + MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. + + if (BBVectorize) { MPM.add(createBBVectorizePass()); MPM.add(createInstructionCombiningPass()); if (OptLevel > 1 && UseGVNAfterVectorization) @@ -321,6 +330,14 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM, PM.add(createGlobalDCEPass()); } +inline PassManagerBuilder *unwrap(LLVMPassManagerBuilderRef P) { + return reinterpret_cast<PassManagerBuilder*>(P); +} + +inline LLVMPassManagerBuilderRef wrap(PassManagerBuilder *P) { + return reinterpret_cast<LLVMPassManagerBuilderRef>(P); +} + LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate() { PassManagerBuilder *PMB = new PassManagerBuilder(); return wrap(PMB); diff --git a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp index 5f8681f..3396f79 100644 --- a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp +++ b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp @@ -195,10 +195,9 @@ static void findUsedValues(GlobalVariable *LLVMUsed, SmallPtrSet<const GlobalValue*, 8> &UsedValues) { if (LLVMUsed == 0) return; UsedValues.insert(LLVMUsed); - - ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer()); - if (Inits == 0) return; - + + ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer()); + for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) if (GlobalValue *GV = dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts())) diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h index 1f6a3a5..2a36074 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h @@ -233,6 +233,7 @@ private: Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI); bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS); Value *EmitGEPOffset(User *GEP); + Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN); public: // InsertNewInstBefore - insert an instruction New before instruction Old diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 7595da0..166f8df 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -24,9 +24,9 @@ namespace { /// Class representing coefficient of floating-point addend. /// This class needs to be highly efficient, which is especially true for /// the constructor. As of I write this comment, the cost of the default - /// constructor is merely 4-byte-store-zero (Assuming compiler is able to + /// constructor is merely 4-byte-store-zero (Assuming compiler is able to /// perform write-merging). - /// + /// class FAddendCoef { public: // The constructor has to initialize a APFloat, which is uncessary for @@ -37,31 +37,31 @@ namespace { // FAddendCoef() : IsFp(false), BufHasFpVal(false), IntVal(0) {} ~FAddendCoef(); - + void set(short C) { assert(!insaneIntVal(C) && "Insane coefficient"); IsFp = false; IntVal = C; } - + void set(const APFloat& C); void negate(); - + bool isZero() const { return isInt() ? !IntVal : getFpVal().isZero(); } Value *getValue(Type *) const; - + // If possible, don't define operator+/operator- etc because these // operators inevitably call FAddendCoef's constructor which is not cheap. void operator=(const FAddendCoef &A); void operator+=(const FAddendCoef &A); void operator-=(const FAddendCoef &A); void operator*=(const FAddendCoef &S); - + bool isOne() const { return isInt() && IntVal == 1; } bool isTwo() const { return isInt() && IntVal == 2; } bool isMinusOne() const { return isInt() && IntVal == -1; } bool isMinusTwo() const { return isInt() && IntVal == -2; } - + private: bool insaneIntVal(int V) { return V > 4 || V < -4; } APFloat *getFpValPtr(void) @@ -74,26 +74,28 @@ namespace { return *getFpValPtr(); } - APFloat &getFpVal(void) - { assert(IsFp && BufHasFpVal && "Incorret state"); return *getFpValPtr(); } - + APFloat &getFpVal(void) { + assert(IsFp && BufHasFpVal && "Incorret state"); + return *getFpValPtr(); + } + bool isInt() const { return !IsFp; } // If the coefficient is represented by an integer, promote it to a - // floating point. + // floating point. void convertToFpType(const fltSemantics &Sem); // Construct an APFloat from a signed integer. // TODO: We should get rid of this function when APFloat can be constructed - // from an *SIGNED* integer. + // from an *SIGNED* integer. APFloat createAPFloatFromInt(const fltSemantics &Sem, int Val); private: bool IsFp; - + // True iff FpValBuf contains an instance of APFloat. bool BufHasFpVal; - + // The integer coefficient of an individual addend is either 1 or -1, // and we try to simplify at most 4 addends from neighboring at most // two instructions. So the range of <IntVal> falls in [-4, 4]. APInt @@ -102,7 +104,7 @@ namespace { AlignedCharArrayUnion<APFloat> FpValBuf; }; - + /// FAddend is used to represent floating-point addend. An addend is /// represented as <C, V>, where the V is a symbolic value, and C is a /// constant coefficient. A constant addend is represented as <C, 0>. @@ -110,10 +112,10 @@ namespace { class FAddend { public: FAddend() { Val = 0; } - + Value *getSymVal (void) const { return Val; } const FAddendCoef &getCoef(void) const { return Coeff; } - + bool isConstant() const { return Val == 0; } bool isZero() const { return Coeff.isZero(); } @@ -122,17 +124,17 @@ namespace { { Coeff.set(Coefficient); Val = V; } void set(const ConstantFP* Coefficient, Value *V) { Coeff.set(Coefficient->getValueAPF()); Val = V; } - + void negate() { Coeff.negate(); } - + /// Drill down the U-D chain one step to find the definition of V, and /// try to break the definition into one or two addends. static unsigned drillValueDownOneStep(Value* V, FAddend &A0, FAddend &A1); - + /// Similar to FAddend::drillDownOneStep() except that the value being /// splitted is the addend itself. unsigned drillAddendDownOneStep(FAddend &Addend0, FAddend &Addend1) const; - + void operator+=(const FAddend &T) { assert((Val == T.Val) && "Symbolic-values disagree"); Coeff += T.Coeff; @@ -140,12 +142,12 @@ namespace { private: void Scale(const FAddendCoef& ScaleAmt) { Coeff *= ScaleAmt; } - + // This addend has the value of "Coeff * Val". Value *Val; FAddendCoef Coeff; }; - + /// FAddCombine is the class for optimizing an unsafe fadd/fsub along /// with its neighboring at most two instructions. /// @@ -153,17 +155,17 @@ namespace { public: FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(0) {} Value *simplify(Instruction *FAdd); - + private: typedef SmallVector<const FAddend*, 4> AddendVect; - + Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota); Value *performFactorization(Instruction *I); /// Convert given addend to a Value Value *createAddendVal(const FAddend &A, bool& NeedNeg); - + /// Return the number of instructions needed to emit the N-ary addition. unsigned calcInstrNumber(const AddendVect& Vect); Value *createFSub(Value *Opnd0, Value *Opnd1); @@ -173,10 +175,10 @@ namespace { Value *createFNeg(Value *V); Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota); void createInstPostProc(Instruction *NewInst); - + InstCombiner::BuilderTy *Builder; Instruction *Instr; - + private: // Debugging stuff are clustered here. #ifndef NDEBUG @@ -188,7 +190,7 @@ namespace { void incCreateInstNum() {} #endif }; -} +} //===----------------------------------------------------------------------===// // @@ -211,7 +213,7 @@ void FAddendCoef::set(const APFloat& C) { } else *P = C; - IsFp = BufHasFpVal = true; + IsFp = BufHasFpVal = true; } void FAddendCoef::convertToFpType(const fltSemantics &Sem) { @@ -225,7 +227,7 @@ void FAddendCoef::convertToFpType(const fltSemantics &Sem) { new(P) APFloat(Sem, 0 - IntVal); P->changeSign(); } - IsFp = BufHasFpVal = true; + IsFp = BufHasFpVal = true; } APFloat FAddendCoef::createAPFloatFromInt(const fltSemantics &Sem, int Val) { @@ -254,14 +256,14 @@ void FAddendCoef::operator+=(const FAddendCoef &That) { getFpVal().add(That.getFpVal(), RndMode); return; } - + if (isInt()) { const APFloat &T = That.getFpVal(); convertToFpType(T.getSemantics()); getFpVal().add(T, RndMode); return; } - + APFloat &T = getFpVal(); T.add(createAPFloatFromInt(T.getSemantics(), That.IntVal), RndMode); } @@ -275,7 +277,7 @@ void FAddendCoef::operator-=(const FAddendCoef &That) { getFpVal().subtract(That.getFpVal(), RndMode); return; } - + if (isInt()) { const APFloat &T = That.getFpVal(); convertToFpType(T.getSemantics()); @@ -303,7 +305,7 @@ void FAddendCoef::operator*=(const FAddendCoef &That) { return; } - const fltSemantics &Semantic = + const fltSemantics &Semantic = isInt() ? That.getFpVal().getSemantics() : getFpVal().getSemantics(); if (isInt()) @@ -338,11 +340,11 @@ Value *FAddendCoef::getValue(Type *Ty) const { // A - B <1, A>, <1,B> // 0 - B <-1, B> // C * A, <C, A> -// A + C <1, A> <C, NULL> +// A + C <1, A> <C, NULL> // 0 +/- 0 <0, NULL> (corner case) // // Legend: A and B are not constant, C is constant -// +// unsigned FAddend::drillValueDownOneStep (Value *Val, FAddend &Addend0, FAddend &Addend1) { Instruction *I = 0; @@ -413,7 +415,7 @@ unsigned FAddend::drillAddendDownOneStep return 0; unsigned BreakNum = FAddend::drillValueDownOneStep(Val, Addend0, Addend1); - if (!BreakNum || Coeff.isOne()) + if (!BreakNum || Coeff.isOne()) return BreakNum; Addend0.Scale(Coeff); @@ -435,10 +437,10 @@ unsigned FAddend::drillAddendDownOneStep Value *FAddCombine::performFactorization(Instruction *I) { assert((I->getOpcode() == Instruction::FAdd || I->getOpcode() == Instruction::FSub) && "Expect add/sub"); - + Instruction *I0 = dyn_cast<Instruction>(I->getOperand(0)); Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1)); - + if (!I0 || !I1 || I0->getOpcode() != I1->getOpcode()) return 0; @@ -453,14 +455,14 @@ Value *FAddCombine::performFactorization(Instruction *I) { Value *Opnd1_0 = I1->getOperand(0); Value *Opnd1_1 = I1->getOperand(1); - // Input Instr I Factor AddSub0 AddSub1 + // Input Instr I Factor AddSub0 AddSub1 // ---------------------------------------------- // (x*y) +/- (x*z) x y z // (y/x) +/- (z/x) x y z // Value *Factor = 0; Value *AddSub0 = 0, *AddSub1 = 0; - + if (isMpy) { if (Opnd0_0 == Opnd1_0 || Opnd0_0 == Opnd1_1) Factor = Opnd0_0; @@ -492,7 +494,7 @@ Value *FAddCombine::performFactorization(Instruction *I) { if (isMpy) return createFMul(Factor, NewAddSub); - + return createFDiv(NewAddSub, Factor); } @@ -506,7 +508,7 @@ Value *FAddCombine::simplify(Instruction *I) { assert((I->getOpcode() == Instruction::FAdd || I->getOpcode() == Instruction::FSub) && "Expect add/sub"); - // Save the instruction before calling other member-functions. + // Save the instruction before calling other member-functions. Instr = I; FAddend Opnd0, Opnd1, Opnd0_0, Opnd0_1, Opnd1_0, Opnd1_1; @@ -517,7 +519,7 @@ Value *FAddCombine::simplify(Instruction *I) { unsigned Opnd0_ExpNum = 0; unsigned Opnd1_ExpNum = 0; - if (!Opnd0.isConstant()) + if (!Opnd0.isConstant()) Opnd0_ExpNum = Opnd0.drillAddendDownOneStep(Opnd0_0, Opnd0_1); // Step 2: Expand the 2nd addend into Opnd1_0 and Opnd1_1. @@ -539,7 +541,7 @@ Value *FAddCombine::simplify(Instruction *I) { Value *V0 = I->getOperand(0); Value *V1 = I->getOperand(1); - InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) && + InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) && (!isa<Constant>(V1) && V1->hasOneUse())) ? 2 : 1; if (Value *R = simplifyFAdd(AllOpnds, InstQuota)) @@ -579,7 +581,7 @@ Value *FAddCombine::simplify(Instruction *I) { return R; } - // step 6: Try factorization as the last resort, + // step 6: Try factorization as the last resort, return performFactorization(I); } @@ -588,7 +590,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { unsigned AddendNum = Addends.size(); assert(AddendNum <= 4 && "Too many addends"); - // For saving intermediate results; + // For saving intermediate results; unsigned NextTmpIdx = 0; FAddend TmpResult[3]; @@ -604,7 +606,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { AddendVect SimpVect; // The outer loop works on one symbolic-value at a time. Suppose the input - // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ... + // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ... // The symbolic-values will be processed in this order: x, y, z. // for (unsigned SymIdx = 0; SymIdx < AddendNum; SymIdx++) { @@ -631,7 +633,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { if (T && T->getSymVal() == Val) { // Set null such that next iteration of the outer loop will not process // this addend again. - Addends[SameSymIdx] = 0; + Addends[SameSymIdx] = 0; SimpVect.push_back(T); } } @@ -644,7 +646,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { R += *SimpVect[Idx]; // Pop all addends being folded and push the resulting folded addend. - SimpVect.resize(StartIdx); + SimpVect.resize(StartIdx); if (Val != 0) { if (!R.isZero()) { SimpVect.push_back(&R); @@ -657,7 +659,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { } } - assert((NextTmpIdx <= sizeof(TmpResult)/sizeof(TmpResult[0]) + 1) && + assert((NextTmpIdx <= sizeof(TmpResult)/sizeof(TmpResult[0]) + 1) && "out-of-bound access"); if (ConstAdd) @@ -679,7 +681,7 @@ Value *FAddCombine::createNaryFAdd assert(!Opnds.empty() && "Expect at least one addend"); // Step 1: Check if the # of instructions needed exceeds the quota. - // + // unsigned InstrNeeded = calcInstrNumber(Opnds); if (InstrNeeded > InstrQuota) return 0; @@ -700,7 +702,7 @@ Value *FAddCombine::createNaryFAdd // Iterate the addends, creating fadd/fsub using adjacent two addends. for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end(); I != E; I++) { - bool NeedNeg; + bool NeedNeg; Value *V = createAddendVal(**I, NeedNeg); if (!LastVal) { LastVal = V; @@ -726,7 +728,7 @@ Value *FAddCombine::createNaryFAdd } #ifndef NDEBUG - assert(CreateInstrNum == InstrNeeded && + assert(CreateInstrNum == InstrNeeded && "Inconsistent in instruction numbers"); #endif @@ -784,8 +786,8 @@ unsigned FAddCombine::calcInstrNumber(const AddendVect &Opnds) { unsigned OpndNum = Opnds.size(); unsigned InstrNeeded = OpndNum - 1; - // The number of addends in the form of "(-1)*x". - unsigned NegOpndNum = 0; + // The number of addends in the form of "(-1)*x". + unsigned NegOpndNum = 0; // Adjust the number of instructions needed to emit the N-ary add. for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end(); @@ -972,6 +974,11 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI), XorLHS); } + // (X + signbit) + C could have gotten canonicalized to (X ^ signbit) + C, + // transform them into (X + (signbit ^ C)) + if (XorRHS->getValue().isSignBit()) + return BinaryOperator::CreateAdd(XorLHS, + ConstantExpr::getXor(XorRHS, CI)); } } @@ -1230,6 +1237,31 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { } } + // select C, 0, B + select C, A, 0 -> select C, A, B + { + Value *A1, *B1, *C1, *A2, *B2, *C2; + if (match(LHS, m_Select(m_Value(C1), m_Value(A1), m_Value(B1))) && + match(RHS, m_Select(m_Value(C2), m_Value(A2), m_Value(B2)))) { + if (C1 == C2) { + Constant *Z1=0, *Z2=0; + Value *A, *B, *C=C1; + if (match(A1, m_AnyZero()) && match(B2, m_AnyZero())) { + Z1 = dyn_cast<Constant>(A1); A = A2; + Z2 = dyn_cast<Constant>(B2); B = B1; + } else if (match(B1, m_AnyZero()) && match(A2, m_AnyZero())) { + Z1 = dyn_cast<Constant>(B1); B = B2; + Z2 = dyn_cast<Constant>(A2); A = A1; + } + + if (Z1 && Z2 && + (I.hasNoSignedZeros() || + (Z1->isNegativeZeroValue() && Z2->isNegativeZeroValue()))) { + return SelectInst::Create(C, A, B); + } + } + } + } + if (I.hasUnsafeAlgebra()) { if (Value *V = FAddCombine(Builder).simplify(&I)) return ReplaceInstUsesWith(I, V); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 990cbc3..ec75dd2 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -266,9 +266,8 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, return 0; } - -/// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is -/// true, otherwise (V < Lo || V >= Hi). In practice, we emit the more efficient +/// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise +/// (V < Lo || V >= Hi). In practice, we emit the more efficient /// (V-Lo) \<u Hi-Lo. This method expects that Lo <= Hi. isSigned indicates /// whether to treat the V, Lo and HI as signed or not. IB is the location to /// insert new instructions. @@ -935,6 +934,9 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { if (LHS->getPredicate() == FCmpInst::FCMP_ORD && RHS->getPredicate() == FCmpInst::FCMP_ORD) { + if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType()) + return 0; + // (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y) if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1))) if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) { @@ -1545,14 +1547,6 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { switch (RHSCC) { default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: - if (LHSCst == SubOne(RHSCst)) { - // (X == 13 | X == 14) -> X-13 <u 2 - Constant *AddCST = ConstantExpr::getNeg(LHSCst); - Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); - AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst); - return Builder->CreateICmpULT(Add, AddCST); - } - if (LHS->getOperand(0) == RHS->getOperand(0)) { // if LHSCst and RHSCst differ only by one bit: // (A == C1 || A == C2) -> (A & ~(C1 ^ C2)) == C1 @@ -1566,6 +1560,14 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { } } + if (LHSCst == SubOne(RHSCst)) { + // (X == 13 | X == 14) -> X-13 <u 2 + Constant *AddCST = ConstantExpr::getNeg(LHSCst); + Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); + AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst); + return Builder->CreateICmpULT(Add, AddCST); + } + break; // (X == 13 | X == 15) -> no change case ICmpInst::ICMP_UGT: // (X == 13 | X u> 14) -> no change case ICmpInst::ICMP_SGT: // (X == 13 | X s> 14) -> no change diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 64cd1bd..78b4a2c 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1372,7 +1372,8 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, NestF->getType() == PointerType::getUnqual(NewFTy) ? NestF : ConstantExpr::getBitCast(NestF, PointerType::getUnqual(NewFTy)); - const AttributeSet &NewPAL = AttributeSet::get(FTy->getContext(), NewAttrs); + const AttributeSet &NewPAL = + AttributeSet::get(FTy->getContext(), NewAttrs); Instruction *NewCaller; if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index a96e754..4c252c0 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -232,7 +232,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, Constant *Init = GV->getInitializer(); if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init)) return 0; - + uint64_t ArrayElementCount = Init->getType()->getArrayNumElements(); if (ArrayElementCount > 1024) return 0; // Don't blow up on huge arrays. @@ -2487,6 +2487,55 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { return new ICmpInst(Pred, Y, Z); } + // icmp slt (X + -1), Y -> icmp sle X, Y + if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLT && + match(B, m_AllOnes())) + return new ICmpInst(CmpInst::ICMP_SLE, A, Op1); + + // icmp sge (X + -1), Y -> icmp sgt X, Y + if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGE && + match(B, m_AllOnes())) + return new ICmpInst(CmpInst::ICMP_SGT, A, Op1); + + // icmp sle (X + 1), Y -> icmp slt X, Y + if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLE && + match(B, m_One())) + return new ICmpInst(CmpInst::ICMP_SLT, A, Op1); + + // icmp sgt (X + 1), Y -> icmp sge X, Y + if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGT && + match(B, m_One())) + return new ICmpInst(CmpInst::ICMP_SGE, A, Op1); + + // if C1 has greater magnitude than C2: + // icmp (X + C1), (Y + C2) -> icmp (X + C3), Y + // s.t. C3 = C1 - C2 + // + // if C2 has greater magnitude than C1: + // icmp (X + C1), (Y + C2) -> icmp X, (Y + C3) + // s.t. C3 = C2 - C1 + if (A && C && NoOp0WrapProblem && NoOp1WrapProblem && + (BO0->hasOneUse() || BO1->hasOneUse()) && !I.isUnsigned()) + if (ConstantInt *C1 = dyn_cast<ConstantInt>(B)) + if (ConstantInt *C2 = dyn_cast<ConstantInt>(D)) { + const APInt &AP1 = C1->getValue(); + const APInt &AP2 = C2->getValue(); + if (AP1.isNegative() == AP2.isNegative()) { + APInt AP1Abs = C1->getValue().abs(); + APInt AP2Abs = C2->getValue().abs(); + if (AP1Abs.uge(AP2Abs)) { + ConstantInt *C3 = Builder->getInt(AP1 - AP2); + Value *NewAdd = Builder->CreateNSWAdd(A, C3); + return new ICmpInst(Pred, NewAdd, C); + } else { + ConstantInt *C3 = Builder->getInt(AP2 - AP1); + Value *NewAdd = Builder->CreateNSWAdd(C, C3); + return new ICmpInst(Pred, A, NewAdd); + } + } + } + + // Analyze the case when either Op0 or Op1 is a sub instruction. // Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null). A = 0; B = 0; C = 0; D = 0; @@ -2620,6 +2669,15 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } { Value *A, *B; + // Transform (A & ~B) == 0 --> (A & B) != 0 + // and (A & ~B) != 0 --> (A & B) == 0 + // if A is a power of 2. + if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) && + match(Op1, m_Zero()) && isKnownToBeAPowerOfTwo(A) && I.isEquality()) + return new ICmpInst(I.getInversePredicate(), + Builder->CreateAnd(A, B), + Op1); + // ~x < ~y --> y < x // ~x < cst --> ~cst < x if (match(Op0, m_Not(m_Value(A)))) { diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 337cfe3..e2d7966 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -69,8 +69,8 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { // If the GEP has all zero indices, it doesn't offset the pointer. If it // doesn't, it does. - if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, ToDelete, - IsOffset || !GEP->hasAllZeroIndices())) + if (!isOnlyCopiedFromConstantGlobal( + GEP, TheCopy, ToDelete, IsOffset || !GEP->hasAllZeroIndices())) return false; continue; } @@ -166,7 +166,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 if (AI.isArrayAllocation()) { // Check C != 1 if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { - Type *NewTy = + Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName()); New->setAlignment(AI.getAlignment()); @@ -294,7 +294,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, Type *SrcPTy = SrcTy->getElementType(); - if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() || + if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() || DestPTy->isVectorTy()) { // If the source is an array, the code below will not succeed. Check to // see if a trivial 'gep P, 0, 0' will help matters. Only do this for @@ -311,7 +311,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, } if (IC.getDataLayout() && - (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() || + (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() || SrcPTy->isVectorTy()) && // Do not allow turning this into a load of an integer, which is then // casted to a pointer, this pessimizes pointer analysis a lot. @@ -322,7 +322,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, // Okay, we are casting from one integer or pointer type to another of // the same size. Instead of casting the pointer before the load, cast // the result of the loaded value. - LoadInst *NewLoad = + LoadInst *NewLoad = IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName()); NewLoad->setAlignment(LI.getAlignment()); NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope()); @@ -359,7 +359,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { // None of the following transforms are legal for volatile/atomic loads. // FIXME: Some of it is okay for atomic loads; needs refactoring. if (!LI.isSimple()) return 0; - + // Do really simple store-to-load forwarding and load CSE, to catch cases // where there are several consecutive memory accesses to the same location, // separated by a few arithmetic operations. @@ -380,7 +380,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { Constant::getNullValue(Op->getType()), &LI); return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); } - } + } // load null/undef -> unreachable // TODO: Consider a target hook for valid address spaces for this xform. @@ -399,7 +399,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { if (CE->isCast()) if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) return Res; - + if (Op->hasOneUse()) { // Change select and PHI nodes to select values instead of addresses: this // helps alias analysis out a lot, allows many others simplifications, and @@ -453,18 +453,18 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { Type *DestPTy = cast<PointerType>(CI->getType())->getElementType(); PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType()); if (SrcTy == 0) return 0; - + Type *SrcPTy = SrcTy->getElementType(); if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy()) return 0; - + /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep" /// to its first element. This allows us to handle things like: /// store i32 xxx, (bitcast {foo*, float}* %P to i32*) /// on 32-bit hosts. SmallVector<Value*, 4> NewGEPIndices; - + // If the source is an array, the code below will not succeed. Check to // see if a trivial 'gep P, 0, 0' will help matters. Only do this for // constants. @@ -472,7 +472,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { // Index through pointer. Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext())); NewGEPIndices.push_back(Zero); - + while (1) { if (StructType *STy = dyn_cast<StructType>(SrcPTy)) { if (!STy->getNumElements()) /* Struct can be empty {} */ @@ -486,24 +486,24 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { break; } } - + SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace()); } if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy()) return 0; - + // If the pointers point into different address spaces or if they point to // values with different sizes, we can't do the transformation. if (!IC.getDataLayout() || - SrcTy->getAddressSpace() != + SrcTy->getAddressSpace() != cast<PointerType>(CI->getType())->getAddressSpace() || IC.getDataLayout()->getTypeSizeInBits(SrcPTy) != IC.getDataLayout()->getTypeSizeInBits(DestPTy)) return 0; // Okay, we are casting from one integer or pointer type to another of - // the same size. Instead of casting the pointer before + // the same size. Instead of casting the pointer before // the store, cast the value to be stored. Value *NewCast; Value *SIOp0 = SI.getOperand(0); @@ -517,12 +517,12 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { if (SIOp0->getType()->isPointerTy()) opcode = Instruction::PtrToInt; } - + // SIOp0 is a pointer to aggregate and this is a store to the first field, // emit a GEP to index into its first field. if (!NewGEPIndices.empty()) CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices); - + NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy, SIOp0->getName()+".c"); SI.setOperand(0, NewCast); @@ -541,7 +541,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { static bool equivalentAddressValues(Value *A, Value *B) { // Test if the values are trivially equivalent. if (A == B) return true; - + // Test if the values come form identical arithmetic instructions. // This uses isIdenticalToWhenDefined instead of isIdenticalTo because // its only used to compare two uses within the same basic block, which @@ -554,7 +554,7 @@ static bool equivalentAddressValues(Value *A, Value *B) { if (Instruction *BI = dyn_cast<Instruction>(B)) if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI)) return true; - + // Otherwise they may not be equivalent. return false; } @@ -585,7 +585,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { // If the RHS is an alloca with a single use, zapify the store, making the // alloca dead. if (Ptr->hasOneUse()) { - if (isa<AllocaInst>(Ptr)) + if (isa<AllocaInst>(Ptr)) return EraseInstFromFunction(SI); if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) { if (isa<AllocaInst>(GEP->getOperand(0))) { @@ -608,8 +608,8 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { ScanInsts++; continue; - } - + } + if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) { // Prev store isn't volatile, and stores to the same location? if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1), @@ -621,7 +621,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { } break; } - + // If this is a load, we have to stop. However, if the loaded value is from // the pointer we're loading and is producing the pointer we're storing, // then *this* store is dead (X = load P; store X -> P). @@ -629,12 +629,12 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) && LI->isSimple()) return EraseInstFromFunction(SI); - + // Otherwise, this is a load from some other location. Stores before it // may not be dead. break; } - + // Don't skip over loads or things that can modify memory. if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory()) break; @@ -664,11 +664,11 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (Instruction *Res = InstCombineStoreToCast(*this, SI)) return Res; - + // If this store is the last instruction in the basic block (possibly // excepting debug info instructions), and if the block ends with an // unconditional branch, try to move it to the successor block. - BBI = &SI; + BBI = &SI; do { ++BBI; } while (isa<DbgInfoIntrinsic>(BBI) || @@ -677,7 +677,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (BI->isUnconditional()) if (SimplifyStoreAtEndOfBlock(SI)) return 0; // xform done! - + return 0; } @@ -691,12 +691,12 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { /// bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { BasicBlock *StoreBB = SI.getParent(); - + // Check to see if the successor block has exactly two incoming edges. If // so, see if the other predecessor contains a store to the same location. // if so, insert a PHI node (if needed) and move the stores down. BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0); - + // Determine whether Dest has exactly two predecessors and, if so, compute // the other predecessor. pred_iterator PI = pred_begin(DestBB); @@ -708,7 +708,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { if (++PI == pred_end(DestBB)) return false; - + P = *PI; if (P != StoreBB) { if (OtherBB) @@ -728,7 +728,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { BranchInst *OtherBr = dyn_cast<BranchInst>(BBI); if (!OtherBr || BBI == OtherBB->begin()) return false; - + // If the other block ends in an unconditional branch, check for the 'if then // else' case. there is an instruction before the branch. StoreInst *OtherStore = 0; @@ -750,10 +750,10 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { } else { // Otherwise, the other block ended with a conditional branch. If one of the // destinations is StoreBB, then we have the if/then case. - if (OtherBr->getSuccessor(0) != StoreBB && + if (OtherBr->getSuccessor(0) != StoreBB && OtherBr->getSuccessor(1) != StoreBB) return false; - + // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an // if/then triangle. See if there is a store to the same ptr as SI that // lives in OtherBB. @@ -771,7 +771,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { BBI == OtherBB->begin()) return false; } - + // In order to eliminate the store in OtherBr, we have to // make sure nothing reads or overwrites the stored value in // StoreBB. @@ -781,7 +781,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { return false; } } - + // Insert a PHI node now if we need it. Value *MergedVal = OtherStore->getOperand(0); if (MergedVal != SI.getOperand(0)) { @@ -790,7 +790,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { PN->addIncoming(OtherStore->getOperand(0), OtherBB); MergedVal = InsertNewInstBefore(PN, DestBB->front()); } - + // Advance to a place where it is safe to insert the new store and // insert it. BBI = DestBB->getFirstInsertionPt(); @@ -800,7 +800,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { SI.getOrdering(), SI.getSynchScope()); InsertNewInstBefore(NewSI, *BBI); - NewSI->setDebugLoc(OtherStore->getDebugLoc()); + NewSI->setDebugLoc(OtherStore->getDebugLoc()); // If the two stores had the same TBAA tag, preserve it. if (MDNode *TBAATag = SI.getMetadata(LLVMContext::MD_tbaa)) @@ -808,7 +808,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { OtherStore->getMetadata(LLVMContext::MD_tbaa)))) NewSI->setMetadata(LLVMContext::MD_tbaa, TBAATag); - + // Nuke the old stores. EraseInstFromFunction(SI); EraseInstFromFunction(*OtherStore); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 173f2bf..ecc9fc3 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -28,7 +28,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) { // if this is safe. For example, the use could be in dynamically unreached // code. if (!V->hasOneUse()) return 0; - + bool MadeChange = false; // ((1 << A) >>u B) --> (1 << (A-B)) @@ -41,7 +41,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) { A = IC.Builder->CreateSub(A, B); return IC.Builder->CreateShl(PowerOf2, A); } - + // (PowerOfTwo >>u B) --> isExact since shifting out the result would make it // inexact. Similarly for <<. if (BinaryOperator *I = dyn_cast<BinaryOperator>(V)) @@ -52,12 +52,12 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) { I->setOperand(0, V2); MadeChange = true; } - + if (I->getOpcode() == Instruction::LShr && !I->isExact()) { I->setIsExact(); MadeChange = true; } - + if (I->getOpcode() == Instruction::Shl && !I->hasNoUnsignedWrap()) { I->setHasNoUnsignedWrap(); MadeChange = true; @@ -67,7 +67,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) { // TODO: Lots more we could do here: // If V is a phi node, we can call this on each of its operands. // "select cond, X, 0" can simplify to "X". - + return MadeChange ? V : 0; } @@ -84,12 +84,12 @@ static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) { LHSExt = LHSExt.zext(W * 2); RHSExt = RHSExt.zext(W * 2); } - + APInt MulExt = LHSExt * RHSExt; - + if (!sign) return MulExt.ugt(APInt::getLowBitsSet(W * 2, W)); - + APInt Min = APInt::getSignedMinValue(W).sext(W * 2); APInt Max = APInt::getSignedMaxValue(W).sext(W * 2); return MulExt.slt(Min) || MulExt.sgt(Max); @@ -107,16 +107,16 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { if (match(Op1, m_AllOnes())) // X * -1 == 0 - X return BinaryOperator::CreateNeg(Op0, I.getName()); - + if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { - + // ((X << C1)*C2) == (X * (C2 << C1)) if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0)) if (SI->getOpcode() == Instruction::Shl) if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1))) return BinaryOperator::CreateMul(SI->getOperand(0), ConstantExpr::getShl(CI, ShOp)); - + const APInt &Val = CI->getValue(); if (Val.isPowerOf2()) { // Replace X*(2^C) with X << C Constant *NewCst = ConstantInt::get(Op0->getType(), Val.logBase2()); @@ -125,7 +125,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { if (I.hasNoUnsignedWrap()) Shl->setHasNoUnsignedWrap(); return Shl; } - + // Canonicalize (X+C1)*CI -> X*CI+C1*CI. { Value *X; ConstantInt *C1; if (Op0->hasOneUse() && @@ -158,9 +158,9 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { } } } - + // Simplify mul instructions with a constant RHS. - if (isa<Constant>(Op1)) { + if (isa<Constant>(Op1)) { // Try to fold constant mul into select arguments. if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) if (Instruction *R = FoldOpIntoSelect(I, SI)) @@ -181,7 +181,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { Value *Op1C = Op1; BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0); if (!BO || - (BO->getOpcode() != Instruction::UDiv && + (BO->getOpcode() != Instruction::UDiv && BO->getOpcode() != Instruction::SDiv)) { Op1C = Op0; BO = dyn_cast<BinaryOperator>(Op1); @@ -227,14 +227,14 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { if (match(Op1, m_Shl(m_One(), m_Value(Y)))) return BinaryOperator::CreateShl(Op0, Y); } - + // If one of the operands of the multiply is a cast from a boolean value, then // we know the bool is either zero or one, so this is a 'masking' multiply. // X * Y (where Y is 0 or 1) -> X & (0-Y) if (!I.getType()->isVectorTy()) { // -2 is "-1 << 1" so it is all bits set except the low one. APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true); - + Value *BoolCast = 0, *OtherOp = 0; if (MaskedValueIsZero(Op0, Negative2)) BoolCast = Op0, OtherOp = Op1; @@ -280,7 +280,7 @@ static void detectLog2OfHalf(Value *&Op, Value *&Y, IntrinsicInst *&Log2) { return; if (I->getOpcode() != Instruction::FMul || !I->hasUnsafeAlgebra()) return; - + ConstantFP *CFP = dyn_cast<ConstantFP>(I->getOperand(0)); if (CFP && CFP->isExactlyValue(0.5)) { Y = I->getOperand(1); @@ -289,14 +289,14 @@ static void detectLog2OfHalf(Value *&Op, Value *&Y, IntrinsicInst *&Log2) { CFP = dyn_cast<ConstantFP>(I->getOperand(1)); if (CFP && CFP->isExactlyValue(0.5)) Y = I->getOperand(0); -} +} /// Helper function of InstCombiner::visitFMul(BinaryOperator(). It returns /// true iff the given value is FMul or FDiv with one and only one operand /// being a normal constant (i.e. not Zero/NaN/Infinity). static bool isFMulOrFDivWithConstant(Value *V) { Instruction *I = dyn_cast<Instruction>(V); - if (!I || (I->getOpcode() != Instruction::FMul && + if (!I || (I->getOpcode() != Instruction::FMul && I->getOpcode() != Instruction::FDiv)) return false; @@ -318,10 +318,10 @@ static bool isNormalFp(const ConstantFP *C) { /// foldFMulConst() is a helper routine of InstCombiner::visitFMul(). /// The input \p FMulOrDiv is a FMul/FDiv with one and only one operand /// being a constant (i.e. isFMulOrFDivWithConstant(FMulOrDiv) == true). -/// This function is to simplify "FMulOrDiv * C" and returns the +/// This function is to simplify "FMulOrDiv * C" and returns the /// resulting expression. Note that this function could return NULL in /// case the constants cannot be folded into a normal floating-point. -/// +/// Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, ConstantFP *C, Instruction *InsertBefore) { assert(isFMulOrFDivWithConstant(FMulOrDiv) && "V is invalid"); @@ -351,7 +351,7 @@ Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, ConstantFP *C, if (isNormalFp(F)) { R = BinaryOperator::CreateFMul(Opnd0, F); } else { - // (X / C1) * C => X / (C1/C) + // (X / C1) * C => X / (C1/C) Constant *F = ConstantExpr::getFDiv(C1, C); if (isNormalFp(cast<ConstantFP>(F))) R = BinaryOperator::CreateFDiv(Opnd0, F); @@ -415,13 +415,13 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { if (C0) { std::swap(C0, C1); std::swap(Opnd0, Opnd1); - Swap = true; + Swap = true; } if (C1 && C1->getValueAPF().isNormal() && isFMulOrFDivWithConstant(Opnd0)) { Value *M1 = ConstantExpr::getFMul(C1, C); - Value *M0 = isNormalFp(cast<ConstantFP>(M1)) ? + Value *M0 = isNormalFp(cast<ConstantFP>(M1)) ? foldFMulConst(cast<Instruction>(Opnd0), C, &I) : 0; if (M0 && M1) { @@ -495,7 +495,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { } // (X*Y) * X => (X*X) * Y where Y != X - // The purpose is two-fold: + // The purpose is two-fold: // 1) to form a power expression (of X). // 2) potentially shorten the critical path: After transformation, the // latency of the instruction Y is amortized by the expression of X*X, @@ -524,6 +524,35 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { } } + // B * (uitofp i1 C) -> select C, B, 0 + if (I.hasNoNaNs() && I.hasNoInfs() && I.hasNoSignedZeros()) { + Value *LHS = Op0, *RHS = Op1; + Value *B, *C; + if (!match(RHS, m_UIToFp(m_Value(C)))) + std::swap(LHS, RHS); + + if (match(RHS, m_UIToFp(m_Value(C))) && C->getType()->isIntegerTy(1)) { + B = LHS; + Value *Zero = ConstantFP::getNegativeZero(B->getType()); + return SelectInst::Create(C, B, Zero); + } + } + + // A * (1 - uitofp i1 C) -> select C, 0, A + if (I.hasNoNaNs() && I.hasNoInfs() && I.hasNoSignedZeros()) { + Value *LHS = Op0, *RHS = Op1; + Value *A, *C; + if (!match(RHS, m_FSub(m_FPOne(), m_UIToFp(m_Value(C))))) + std::swap(LHS, RHS); + + if (match(RHS, m_FSub(m_FPOne(), m_UIToFp(m_Value(C)))) && + C->getType()->isIntegerTy(1)) { + A = LHS; + Value *Zero = ConstantFP::getNegativeZero(A->getType()); + return SelectInst::Create(C, Zero, A); + } + } + if (!isa<Constant>(Op1)) std::swap(Opnd0, Opnd1); else @@ -537,7 +566,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { /// instruction. bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) { SelectInst *SI = cast<SelectInst>(I.getOperand(1)); - + // div/rem X, (Cond ? 0 : Y) -> div/rem X, Y int NonNullOperand = -1; if (Constant *ST = dyn_cast<Constant>(SI->getOperand(1))) @@ -547,36 +576,36 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) { if (Constant *ST = dyn_cast<Constant>(SI->getOperand(2))) if (ST->isNullValue()) NonNullOperand = 1; - + if (NonNullOperand == -1) return false; - + Value *SelectCond = SI->getOperand(0); - + // Change the div/rem to use 'Y' instead of the select. I.setOperand(1, SI->getOperand(NonNullOperand)); - + // Okay, we know we replace the operand of the div/rem with 'Y' with no // problem. However, the select, or the condition of the select may have // multiple uses. Based on our knowledge that the operand must be non-zero, // propagate the known value for the select into other uses of it, and // propagate a known value of the condition into its other users. - + // If the select and condition only have a single use, don't bother with this, // early exit. if (SI->use_empty() && SelectCond->hasOneUse()) return true; - + // Scan the current block backward, looking for other uses of SI. BasicBlock::iterator BBI = &I, BBFront = I.getParent()->begin(); - + while (BBI != BBFront) { --BBI; // If we found a call to a function, we can't assume it will return, so // information from below it cannot be propagated above it. if (isa<CallInst>(BBI) && !isa<IntrinsicInst>(BBI)) break; - + // Replace uses of the select or its condition with the known values. for (Instruction::op_iterator I = BBI->op_begin(), E = BBI->op_end(); I != E; ++I) { @@ -589,17 +618,17 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) { Worklist.Add(BBI); } } - + // If we past the instruction, quit looking for it. if (&*BBI == SI) SI = 0; if (&*BBI == SelectCond) SelectCond = 0; - + // If we ran out of things to eliminate, break out of the loop. if (SelectCond == 0 && SI == 0) break; - + } return true; } @@ -617,7 +646,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { I.setOperand(1, V); return &I; } - + // Handle cases involving: [su]div X, (select Cond, Y, Z) // This does not apply for fdiv. if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I)) @@ -683,16 +712,16 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { // Handle the integer div common cases if (Instruction *Common = commonIDivTransforms(I)) return Common; - - { + + { // X udiv 2^C -> X >> C // Check to see if this is an unsigned division with an exact power of 2, // if so, convert to a right shift. const APInt *C; if (match(Op1, m_Power2(C))) { BinaryOperator *LShr = - BinaryOperator::CreateLShr(Op0, - ConstantInt::get(Op0->getType(), + BinaryOperator::CreateLShr(Op0, + ConstantInt::get(Op0->getType(), C->logBase2())); if (I.isExact()) LShr->setIsExact(); return LShr; @@ -732,7 +761,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { return BinaryOperator::CreateLShr(Op0, N); } } - + // udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2) // where C1&C2 are powers of two. { Value *Cond; const APInt *C1, *C2; @@ -740,11 +769,11 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { // Construct the "on true" case of the select Value *TSI = Builder->CreateLShr(Op0, C1->logBase2(), Op1->getName()+".t", I.isExact()); - + // Construct the "on false" case of the select Value *FSI = Builder->CreateLShr(Op0, C2->logBase2(), Op1->getName()+".f", I.isExact()); - + // construct the select instruction and return it. return SelectInst::Create(Cond, TSI, FSI); } @@ -799,7 +828,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set return BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); } - + if (match(Op1, m_Shl(m_Power2(), m_Value()))) { // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y) // Safe because the only negative value (1 << Y) can take on is @@ -809,13 +838,13 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { } } } - + return 0; } /// CvtFDivConstToReciprocal tries to convert X/C into X*1/C if C not a special /// FP value and: -/// 1) 1/C is exact, or +/// 1) 1/C is exact, or /// 2) reciprocal is allowed. /// If the convertion was successful, the simplified expression "X * 1/C" is /// returned; otherwise, NULL is returned. @@ -826,7 +855,7 @@ static Instruction *CvtFDivConstToReciprocal(Value *Dividend, const APFloat &FpVal = Divisor->getValueAPF(); APFloat Reciprocal(FpVal.getSemantics()); bool Cvt = FpVal.getExactInverse(&Reciprocal); - + if (!Cvt && AllowReciprocal && FpVal.isNormal()) { Reciprocal = APFloat(FpVal.getSemantics(), 1.0f); (void)Reciprocal.divide(FpVal, APFloat::rmNearestTiesToEven); @@ -870,10 +899,10 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { Constant *C = ConstantExpr::getFMul(C1, C2); const APFloat &F = cast<ConstantFP>(C)->getValueAPF(); if (F.isNormal() && !F.isDenormal()) { - Res = CvtFDivConstToReciprocal(X, cast<ConstantFP>(C), + Res = CvtFDivConstToReciprocal(X, cast<ConstantFP>(C), AllowReciprocal); if (!Res) - Res = BinaryOperator::CreateFDiv(X, C); + Res = BinaryOperator::CreateFDiv(X, C); } } @@ -911,7 +940,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { if (Fold) { const APFloat &FoldC = cast<ConstantFP>(Fold)->getValueAPF(); if (FoldC.isNormal() && !FoldC.isDenormal()) { - Instruction *R = CreateDiv ? + Instruction *R = CreateDiv ? BinaryOperator::CreateFDiv(Fold, X) : BinaryOperator::CreateFMul(X, Fold); R->setFastMathFlags(I.getFastMathFlags()); @@ -997,7 +1026,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { if (Instruction *common = commonIRemTransforms(I)) return common; - + // X urem C^2 -> X and C-1 { const APInt *C; if (match(Op1, m_Power2(C))) @@ -1005,7 +1034,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { ConstantInt::get(I.getType(), *C-1)); } - // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1) + // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1) if (match(Op1, m_Shl(m_Power2(), m_Value()))) { Constant *N1 = Constant::getAllOnesValue(I.getType()); Value *Add = Builder->CreateAdd(Op1, N1); @@ -1041,7 +1070,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) { // Handle the integer rem common cases if (Instruction *Common = commonIRemTransforms(I)) return Common; - + if (Value *RHSNeg = dyn_castNegVal(Op1)) if (!isa<Constant>(RHSNeg) || (isa<ConstantInt>(RHSNeg) && diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp index b0a998c..bd14e81 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -27,10 +27,10 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { unsigned Opc = FirstInst->getOpcode(); Value *LHSVal = FirstInst->getOperand(0); Value *RHSVal = FirstInst->getOperand(1); - + Type *LHSType = LHSVal->getType(); Type *RHSType = RHSVal->getType(); - + bool isNUW = false, isNSW = false, isExact = false; if (OverflowingBinaryOperator *BO = dyn_cast<OverflowingBinaryOperator>(FirstInst)) { @@ -39,7 +39,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { } else if (PossiblyExactOperator *PEO = dyn_cast<PossiblyExactOperator>(FirstInst)) isExact = PEO->isExact(); - + // Scan to see if all operands are the same opcode, and all have one use. for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i)); @@ -54,14 +54,14 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { if (CmpInst *CI = dyn_cast<CmpInst>(I)) if (CI->getPredicate() != cast<CmpInst>(FirstInst)->getPredicate()) return 0; - + if (isNUW) isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap(); if (isNSW) isNSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); if (isExact) isExact = cast<PossiblyExactOperator>(I)->isExact(); - + // Keep track of which operand needs a phi node. if (I->getOperand(0) != LHSVal) LHSVal = 0; if (I->getOperand(1) != RHSVal) RHSVal = 0; @@ -73,9 +73,9 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { // bad when the PHIs are in the header of a loop. if (!LHSVal && !RHSVal) return 0; - + // Otherwise, this is safe to transform! - + Value *InLHS = FirstInst->getOperand(0); Value *InRHS = FirstInst->getOperand(1); PHINode *NewLHS = 0, *NewRHS = 0; @@ -86,7 +86,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { InsertNewInstBefore(NewLHS, PN); LHSVal = NewLHS; } - + if (RHSVal == 0) { NewRHS = PHINode::Create(RHSType, PN.getNumIncomingValues(), FirstInst->getOperand(1)->getName() + ".pn"); @@ -94,7 +94,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { InsertNewInstBefore(NewRHS, PN); RHSVal = NewRHS; } - + // Add all operands to the new PHIs. if (NewLHS || NewRHS) { for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { @@ -109,7 +109,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { } } } - + if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst)) { CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), LHSVal, RHSVal); @@ -129,8 +129,8 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { GetElementPtrInst *FirstInst =cast<GetElementPtrInst>(PN.getIncomingValue(0)); - - SmallVector<Value*, 16> FixedOperands(FirstInst->op_begin(), + + SmallVector<Value*, 16> FixedOperands(FirstInst->op_begin(), FirstInst->op_end()); // This is true if all GEP bases are allocas and if all indices into them are // constants. @@ -140,9 +140,9 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { // more than one phi, which leads to higher register pressure. This is // especially bad when the PHIs are in the header of a loop. bool NeededPhi = false; - + bool AllInBounds = true; - + // Scan to see if all operands are the same opcode, and all have one use. for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i)); @@ -151,18 +151,18 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { return 0; AllInBounds &= GEP->isInBounds(); - + // Keep track of whether or not all GEPs are of alloca pointers. if (AllBasePointersAreAllocas && (!isa<AllocaInst>(GEP->getOperand(0)) || !GEP->hasAllConstantIndices())) AllBasePointersAreAllocas = false; - + // Compare the operand lists. for (unsigned op = 0, e = FirstInst->getNumOperands(); op != e; ++op) { if (FirstInst->getOperand(op) == GEP->getOperand(op)) continue; - + // Don't merge two GEPs when two operands differ (introducing phi nodes) // if one of the PHIs has a constant for the index. The index may be // substantially cheaper to compute for the constants, so making it a @@ -171,7 +171,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { if (isa<ConstantInt>(FirstInst->getOperand(op)) || isa<ConstantInt>(GEP->getOperand(op))) return 0; - + if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType()) return 0; @@ -186,7 +186,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { NeededPhi = true; } } - + // If all of the base pointers of the PHI'd GEPs are from allocas, don't // bother doing this transformation. At best, this will just save a bit of // offset calculation, but all the predecessors will have to materialize the @@ -195,11 +195,11 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { // which can usually all be folded into the load. if (AllBasePointersAreAllocas) return 0; - + // Otherwise, this is safe to transform. Insert PHI nodes for each operand // that is variable. SmallVector<PHINode*, 16> OperandPhis(FixedOperands.size()); - + bool HasAnyPHIs = false; for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) { if (FixedOperands[i]) continue; // operand doesn't need a phi. @@ -207,28 +207,28 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { PHINode *NewPN = PHINode::Create(FirstOp->getType(), e, FirstOp->getName()+".pn"); InsertNewInstBefore(NewPN, PN); - + NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0)); OperandPhis[i] = NewPN; FixedOperands[i] = NewPN; HasAnyPHIs = true; } - + // Add all operands to the new PHIs. if (HasAnyPHIs) { for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { GetElementPtrInst *InGEP =cast<GetElementPtrInst>(PN.getIncomingValue(i)); BasicBlock *InBB = PN.getIncomingBlock(i); - + for (unsigned op = 0, e = OperandPhis.size(); op != e; ++op) if (PHINode *OpPhi = OperandPhis[op]) OpPhi->addIncoming(InGEP->getOperand(op), InBB); } } - + Value *Base = FixedOperands[0]; - GetElementPtrInst *NewGEP = + GetElementPtrInst *NewGEP = GetElementPtrInst::Create(Base, makeArrayRef(FixedOperands).slice(1)); if (AllInBounds) NewGEP->setIsInBounds(); NewGEP->setDebugLoc(FirstInst->getDebugLoc()); @@ -246,11 +246,11 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { /// to a register. static bool isSafeAndProfitableToSinkLoad(LoadInst *L) { BasicBlock::iterator BBI = L, E = L->getParent()->end(); - + for (++BBI; BBI != E; ++BBI) if (BBI->mayWriteToMemory()) return false; - + // Check for non-address taken alloca. If not address-taken already, it isn't // profitable to do this xform. if (AllocaInst *AI = dyn_cast<AllocaInst>(L->getOperand(0))) { @@ -266,11 +266,11 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) { isAddressTaken = true; break; } - + if (!isAddressTaken && AI->isStaticAlloca()) return false; } - + // If this load is a load from a GEP with a constant offset from an alloca, // then we don't want to sink it. In its present form, it will be // load [constant stack offset]. Sinking it will cause us to have to @@ -280,7 +280,7 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) { if (AllocaInst *AI = dyn_cast<AllocaInst>(GEP->getOperand(0))) if (AI->isStaticAlloca() && GEP->hasAllConstantIndices()) return false; - + return true; } @@ -300,41 +300,41 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { bool isVolatile = FirstLI->isVolatile(); unsigned LoadAlignment = FirstLI->getAlignment(); unsigned LoadAddrSpace = FirstLI->getPointerAddressSpace(); - + // We can't sink the load if the loaded value could be modified between the // load and the PHI. if (FirstLI->getParent() != PN.getIncomingBlock(0) || !isSafeAndProfitableToSinkLoad(FirstLI)) return 0; - + // If the PHI is of volatile loads and the load block has multiple // successors, sinking it would remove a load of the volatile value from // the path through the other successor. - if (isVolatile && + if (isVolatile && FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1) return 0; - + // Check to see if all arguments are the same operation. for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { LoadInst *LI = dyn_cast<LoadInst>(PN.getIncomingValue(i)); if (!LI || !LI->hasOneUse()) return 0; - - // We can't sink the load if the loaded value could be modified between + + // We can't sink the load if the loaded value could be modified between // the load and the PHI. if (LI->isVolatile() != isVolatile || LI->getParent() != PN.getIncomingBlock(i) || LI->getPointerAddressSpace() != LoadAddrSpace || !isSafeAndProfitableToSinkLoad(LI)) return 0; - + // If some of the loads have an alignment specified but not all of them, // we can't do the transformation. if ((LoadAlignment != 0) != (LI->getAlignment() != 0)) return 0; - + LoadAlignment = std::min(LoadAlignment, LI->getAlignment()); - + // If the PHI is of volatile loads and the load block has multiple // successors, sinking it would remove a load of the volatile value from // the path through the other successor. @@ -342,16 +342,16 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { LI->getParent()->getTerminator()->getNumSuccessors() != 1) return 0; } - + // Okay, they are all the same operation. Create a new PHI node of the // correct type, and PHI together all of the LHS's of the instructions. PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType(), PN.getNumIncomingValues(), PN.getName()+".in"); - + Value *InVal = FirstLI->getOperand(0); NewPN->addIncoming(InVal, PN.getIncomingBlock(0)); - + // Add all operands to the new PHI. for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { Value *NewInVal = cast<LoadInst>(PN.getIncomingValue(i))->getOperand(0); @@ -359,7 +359,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { InVal = 0; NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i)); } - + Value *PhiVal; if (InVal) { // The new PHI unions all of the same values together. This is really @@ -370,14 +370,14 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { InsertNewInstBefore(NewPN, PN); PhiVal = NewPN; } - + // If this was a volatile load that we are merging, make sure to loop through // and mark all the input loads as non-volatile. If we don't do this, we will // insert a new volatile load and the old ones will not be deletable. if (isVolatile) for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) cast<LoadInst>(PN.getIncomingValue(i))->setVolatile(false); - + LoadInst *NewLI = new LoadInst(PhiVal, "", isVolatile, LoadAlignment); NewLI->setDebugLoc(FirstLI->getDebugLoc()); return NewLI; @@ -395,7 +395,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { return FoldPHIArgGEPIntoPHI(PN); if (isa<LoadInst>(FirstInst)) return FoldPHIArgLoadIntoPHI(PN); - + // Scan the instruction, looking for input operations that can be folded away. // If all input operands to the phi are the same instruction (e.g. a cast from // the same type or "+42") we can pull the operation through the PHI, reducing @@ -403,7 +403,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { Constant *ConstantOp = 0; Type *CastSrcTy = 0; bool isNUW = false, isNSW = false, isExact = false; - + if (isa<CastInst>(FirstInst)) { CastSrcTy = FirstInst->getOperand(0)->getType(); @@ -414,12 +414,12 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { return 0; } } else if (isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)) { - // Can fold binop, compare or shift here if the RHS is a constant, + // Can fold binop, compare or shift here if the RHS is a constant, // otherwise call FoldPHIArgBinOpIntoPHI. ConstantOp = dyn_cast<Constant>(FirstInst->getOperand(1)); if (ConstantOp == 0) return FoldPHIArgBinOpIntoPHI(PN); - + if (OverflowingBinaryOperator *BO = dyn_cast<OverflowingBinaryOperator>(FirstInst)) { isNUW = BO->hasNoUnsignedWrap(); @@ -442,7 +442,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { } else if (I->getOperand(1) != ConstantOp) { return 0; } - + if (isNUW) isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap(); if (isNSW) @@ -486,7 +486,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { NewCI->setDebugLoc(FirstInst->getDebugLoc()); return NewCI; } - + if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) { BinOp = BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp); if (isNUW) BinOp->setHasNoUnsignedWrap(); @@ -495,7 +495,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { BinOp->setDebugLoc(FirstInst->getDebugLoc()); return BinOp; } - + CmpInst *CIOp = cast<CmpInst>(FirstInst); CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), PhiVal, ConstantOp); @@ -513,7 +513,7 @@ static bool DeadPHICycle(PHINode *PN, // Remember this node, and if we find the cycle, return. if (!PotentiallyDeadPHIs.insert(PN)) return true; - + // Don't scan crazily complex things. if (PotentiallyDeadPHIs.size() == 16) return false; @@ -527,16 +527,16 @@ static bool DeadPHICycle(PHINode *PN, /// PHIsEqualValue - Return true if this phi node is always equal to /// NonPhiInVal. This happens with mutually cyclic phi nodes like: /// z = some value; x = phi (y, z); y = phi (x, z) -static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal, +static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal, SmallPtrSet<PHINode*, 16> &ValueEqualPHIs) { // See if we already saw this PHI node. if (!ValueEqualPHIs.insert(PN)) return true; - + // Don't scan crazily complex things. if (ValueEqualPHIs.size() == 16) return false; - + // Scan the operands to see if they are either phi nodes or are equal to // the value. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { @@ -547,7 +547,7 @@ static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal, } else if (Op != NonPhiInVal) return false; } - + return true; } @@ -557,10 +557,10 @@ struct PHIUsageRecord { unsigned PHIId; // The ID # of the PHI (something determinstic to sort on) unsigned Shift; // The amount shifted. Instruction *Inst; // The trunc instruction. - + PHIUsageRecord(unsigned pn, unsigned Sh, Instruction *User) : PHIId(pn), Shift(Sh), Inst(User) {} - + bool operator<(const PHIUsageRecord &RHS) const { if (PHIId < RHS.PHIId) return true; if (PHIId > RHS.PHIId) return false; @@ -570,15 +570,15 @@ struct PHIUsageRecord { RHS.Inst->getType()->getPrimitiveSizeInBits(); } }; - + struct LoweredPHIRecord { PHINode *PN; // The PHI that was lowered. unsigned Shift; // The amount shifted. unsigned Width; // The width extracted. - + LoweredPHIRecord(PHINode *pn, unsigned Sh, Type *Ty) : PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {} - + // Ctor form used by DenseMap. LoweredPHIRecord(PHINode *pn, unsigned Sh) : PN(pn), Shift(Sh), Width(0) {} @@ -621,20 +621,20 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { // PHIUsers - Keep track of all of the truncated values extracted from a set // of PHIs, along with their offset. These are the things we want to rewrite. SmallVector<PHIUsageRecord, 16> PHIUsers; - + // PHIs are often mutually cyclic, so we keep track of a whole set of PHI // nodes which are extracted from. PHIsToSlice is a set we use to avoid // revisiting PHIs, PHIsInspected is a ordered list of PHIs that we need to // check the uses of (to ensure they are all extracts). SmallVector<PHINode*, 8> PHIsToSlice; SmallPtrSet<PHINode*, 8> PHIsInspected; - + PHIsToSlice.push_back(&FirstPhi); PHIsInspected.insert(&FirstPhi); - + for (unsigned PHIId = 0; PHIId != PHIsToSlice.size(); ++PHIId) { PHINode *PN = PHIsToSlice[PHIId]; - + // Scan the input list of the PHI. If any input is an invoke, and if the // input is defined in the predecessor, then we won't be split the critical // edge which is required to insert a truncate. Because of this, we have to @@ -644,85 +644,85 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { if (II == 0) continue; if (II->getParent() != PN->getIncomingBlock(i)) continue; - + // If we have a phi, and if it's directly in the predecessor, then we have // a critical edge where we need to put the truncate. Since we can't // split the edge in instcombine, we have to bail out. return 0; } - - + + for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ++UI) { Instruction *User = cast<Instruction>(*UI); - + // If the user is a PHI, inspect its uses recursively. if (PHINode *UserPN = dyn_cast<PHINode>(User)) { if (PHIsInspected.insert(UserPN)) PHIsToSlice.push_back(UserPN); continue; } - + // Truncates are always ok. if (isa<TruncInst>(User)) { PHIUsers.push_back(PHIUsageRecord(PHIId, 0, User)); continue; } - + // Otherwise it must be a lshr which can only be used by one trunc. if (User->getOpcode() != Instruction::LShr || !User->hasOneUse() || !isa<TruncInst>(User->use_back()) || !isa<ConstantInt>(User->getOperand(1))) return 0; - + unsigned Shift = cast<ConstantInt>(User->getOperand(1))->getZExtValue(); PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, User->use_back())); } } - + // If we have no users, they must be all self uses, just nuke the PHI. if (PHIUsers.empty()) return ReplaceInstUsesWith(FirstPhi, UndefValue::get(FirstPhi.getType())); - + // If this phi node is transformable, create new PHIs for all the pieces // extracted out of it. First, sort the users by their offset and size. array_pod_sort(PHIUsers.begin(), PHIUsers.end()); - + DEBUG(errs() << "SLICING UP PHI: " << FirstPhi << '\n'; for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i) errs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] <<'\n'; ); - + // PredValues - This is a temporary used when rewriting PHI nodes. It is // hoisted out here to avoid construction/destruction thrashing. DenseMap<BasicBlock*, Value*> PredValues; - + // ExtractedVals - Each new PHI we introduce is saved here so we don't // introduce redundant PHIs. DenseMap<LoweredPHIRecord, PHINode*> ExtractedVals; - + for (unsigned UserI = 0, UserE = PHIUsers.size(); UserI != UserE; ++UserI) { unsigned PHIId = PHIUsers[UserI].PHIId; PHINode *PN = PHIsToSlice[PHIId]; unsigned Offset = PHIUsers[UserI].Shift; Type *Ty = PHIUsers[UserI].Inst->getType(); - + PHINode *EltPHI; - + // If we've already lowered a user like this, reuse the previously lowered // value. if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) { - + // Otherwise, Create the new PHI node for this user. EltPHI = PHINode::Create(Ty, PN->getNumIncomingValues(), PN->getName()+".off"+Twine(Offset), PN); assert(EltPHI->getType() != PN->getType() && "Truncate didn't shrink phi?"); - + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { BasicBlock *Pred = PN->getIncomingBlock(i); Value *&PredVal = PredValues[Pred]; - + // If we already have a value for this predecessor, reuse it. if (PredVal) { EltPHI->addIncoming(PredVal, Pred); @@ -736,7 +736,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { EltPHI->addIncoming(PredVal, Pred); continue; } - + if (PHINode *InPHI = dyn_cast<PHINode>(PN)) { // If the incoming value was a PHI, and if it was one of the PHIs we // already rewrote it, just use the lowered value. @@ -746,7 +746,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { continue; } } - + // Otherwise, do an extract in the predecessor. Builder->SetInsertPoint(Pred, Pred->getTerminator()); Value *Res = InVal; @@ -756,7 +756,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { Res = Builder->CreateTrunc(Res, Ty, "extract.t"); PredVal = Res; EltPHI->addIncoming(Res, Pred); - + // If the incoming value was a PHI, and if it was one of the PHIs we are // rewriting, we will ultimately delete the code we inserted. This // means we need to revisit that PHI to make sure we extract out the @@ -765,22 +765,22 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { if (PHIsInspected.count(OldInVal)) { unsigned RefPHIId = std::find(PHIsToSlice.begin(),PHIsToSlice.end(), OldInVal)-PHIsToSlice.begin(); - PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset, + PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset, cast<Instruction>(Res))); ++UserE; } } PredValues.clear(); - + DEBUG(errs() << " Made element PHI for offset " << Offset << ": " << *EltPHI << '\n'); ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI; } - + // Replace the use of this piece with the PHI node. ReplaceInstUsesWith(*PHIUsers[UserI].Inst, EltPHI); } - + // Replace all the remaining uses of the PHI nodes (self uses and the lshrs) // with undefs. Value *Undef = UndefValue::get(FirstPhi.getType()); @@ -818,7 +818,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) { if (DeadPHICycle(PU, PotentiallyDeadPHIs)) return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType())); } - + // If this phi has a single use, and if that use just computes a value for // the next iteration of a loop, delete the phi. This occurs with unused // induction variables, e.g. "for (int j = 0; ; ++j);". Detecting this @@ -847,7 +847,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) { if (InValNo != NumIncomingVals) { Value *NonPhiInVal = PN.getIncomingValue(InValNo); - + // Scan the rest of the operands to see if there are any conflicts, if so // there is no need to recursively scan other phis. for (++InValNo; InValNo != NumIncomingVals; ++InValNo) { @@ -855,7 +855,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) { if (OpVal != NonPhiInVal && !isa<PHINode>(OpVal)) break; } - + // If we scanned over all operands, then we have one unique value plus // phi values. Scan PHI nodes to see if they all merge in each other or // the value. @@ -899,6 +899,6 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) { !TD->isLegalInteger(PN.getType()->getPrimitiveSizeInBits())) if (Instruction *Res = SliceUpIllegalIntegerPHI(PN)) return Res; - + return 0; } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 121aa1f..59502fb 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -350,6 +350,68 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, return 0; } +/// foldSelectICmpAndOr - We want to turn: +/// (select (icmp eq (and X, C1), 0), Y, (or Y, C2)) +/// into: +/// (or (shl (and X, C1), C3), y) +/// iff: +/// C1 and C2 are both powers of 2 +/// where: +/// C3 = Log(C2) - Log(C1) +/// +/// This transform handles cases where: +/// 1. The icmp predicate is inverted +/// 2. The select operands are reversed +/// 3. The magnitude of C2 and C1 are flipped +static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal, + Value *FalseVal, + InstCombiner::BuilderTy *Builder) { + const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition()); + if (!IC || !IC->isEquality()) + return 0; + + Value *CmpLHS = IC->getOperand(0); + Value *CmpRHS = IC->getOperand(1); + + if (!match(CmpRHS, m_Zero())) + return 0; + + Value *X; + const APInt *C1; + if (!match(CmpLHS, m_And(m_Value(X), m_Power2(C1)))) + return 0; + + const APInt *C2; + bool OrOnTrueVal = false; + bool OrOnFalseVal = match(FalseVal, m_Or(m_Specific(TrueVal), m_Power2(C2))); + if (!OrOnFalseVal) + OrOnTrueVal = match(TrueVal, m_Or(m_Specific(FalseVal), m_Power2(C2))); + + if (!OrOnFalseVal && !OrOnTrueVal) + return 0; + + Value *V = CmpLHS; + Value *Y = OrOnFalseVal ? TrueVal : FalseVal; + + unsigned C1Log = C1->logBase2(); + unsigned C2Log = C2->logBase2(); + if (C2Log > C1Log) { + V = Builder->CreateZExtOrTrunc(V, Y->getType()); + V = Builder->CreateShl(V, C2Log - C1Log); + } else if (C1Log > C2Log) { + V = Builder->CreateLShr(V, C1Log - C2Log); + V = Builder->CreateZExtOrTrunc(V, Y->getType()); + } else + V = Builder->CreateZExtOrTrunc(V, Y->getType()); + + ICmpInst::Predicate Pred = IC->getPredicate(); + if ((Pred == ICmpInst::ICMP_NE && OrOnFalseVal) || + (Pred == ICmpInst::ICMP_EQ && OrOnTrueVal)) + V = Builder->CreateXor(V, *C2); + + return Builder->CreateOr(V, Y); +} + /// visitSelectInstWithICmp - Visit a SelectInst that has an /// ICmpInst as its first operand. /// @@ -521,6 +583,9 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, } } + if (Value *V = foldSelectICmpAndOr(SI, TrueVal, FalseVal, Builder)) + return ReplaceInstUsesWith(SI, V); + return Changed ? &SI : 0; } @@ -676,7 +741,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // Change: A = select B, false, C --> A = and !B, C Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName()); return BinaryOperator::CreateAnd(NotCond, FalseVal); - } else if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) { + } + if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) { if (C->getZExtValue() == false) { // Change: A = select B, C, false --> A = and B, C return BinaryOperator::CreateAnd(CondVal, TrueVal); @@ -690,14 +756,14 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // select a, a, b -> a|b if (CondVal == TrueVal) return BinaryOperator::CreateOr(CondVal, FalseVal); - else if (CondVal == FalseVal) + if (CondVal == FalseVal) return BinaryOperator::CreateAnd(CondVal, TrueVal); // select a, ~a, b -> (~a)&b // select a, b, ~a -> (~a)|b if (match(TrueVal, m_Not(m_Specific(CondVal)))) return BinaryOperator::CreateAnd(TrueVal, FalseVal); - else if (match(FalseVal, m_Not(m_Specific(CondVal)))) + if (match(FalseVal, m_Not(m_Specific(CondVal)))) return BinaryOperator::CreateOr(TrueVal, FalseVal); } @@ -838,7 +904,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { Value *NewFalseOp = NegVal; if (AddOp != TI) std::swap(NewTrueOp, NewFalseOp); - Value *NewSel = + Value *NewSel = Builder->CreateSelect(CondVal, NewTrueOp, NewFalseOp, SI.getName() + ".p"); @@ -862,7 +928,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { Value *LHS, *RHS, *LHS2, *RHS2; if (SelectPatternFlavor SPF = MatchSelectPattern(&SI, LHS, RHS)) { if (SelectPatternFlavor SPF2 = MatchSelectPattern(LHS, LHS2, RHS2)) - if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2, + if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2, SI, SPF, RHS)) return R; if (SelectPatternFlavor SPF2 = MatchSelectPattern(RHS, LHS2, RHS2)) @@ -908,7 +974,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return &SI; } - if (VectorType *VecTy = dyn_cast<VectorType>(SI.getType())) { + if (VectorType* VecTy = dyn_cast<VectorType>(SI.getType())) { unsigned VWidth = VecTy->getNumElements(); APInt UndefElts(VWidth, 0); APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); @@ -918,24 +984,6 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return &SI; } - if (ConstantVector *CV = dyn_cast<ConstantVector>(CondVal)) { - // Form a shufflevector instruction. - SmallVector<Constant *, 8> Mask(VWidth); - Type *Int32Ty = Type::getInt32Ty(CV->getContext()); - for (unsigned i = 0; i != VWidth; ++i) { - Constant *Elem = cast<Constant>(CV->getOperand(i)); - if (ConstantInt *E = dyn_cast<ConstantInt>(Elem)) - Mask[i] = ConstantInt::get(Int32Ty, i + (E->isZero() ? VWidth : 0)); - else if (isa<UndefValue>(Elem)) - Mask[i] = UndefValue::get(Int32Ty); - else - return 0; - } - Constant *MaskVal = ConstantVector::get(Mask); - Value *V = Builder->CreateShuffleVector(TrueVal, FalseVal, MaskVal); - return ReplaceInstUsesWith(SI, V); - } - if (isa<ConstantAggregateZero>(CondVal)) { return ReplaceInstUsesWith(SI, FalseVal); } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 4f71db1..4301ddb 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -105,6 +105,75 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) { return 0; } +// If we have a PHI node with a vector type that has only 2 uses: feed +// itself and be an operand of extractelemnt at a constant location, +// try to replace the PHI of the vector type with a PHI of a scalar type +Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) { + // Verify that the PHI node has exactly 2 uses. Otherwise return NULL. + if (!PN->hasNUses(2)) + return NULL; + + // If so, it's known at this point that one operand is PHI and the other is + // an extractelement node. Find the PHI user that is not the extractelement + // node. + Value::use_iterator iu = PN->use_begin(); + Instruction *PHIUser = dyn_cast<Instruction>(*iu); + if (PHIUser == cast<Instruction>(&EI)) + PHIUser = cast<Instruction>(*(++iu)); + + // Verify that this PHI user has one use, which is the PHI itself, + // and that it is a binary operation which is cheap to scalarize. + // otherwise return NULL. + if (!PHIUser->hasOneUse() || !(PHIUser->use_back() == PN) || + !(isa<BinaryOperator>(PHIUser)) || + !CheapToScalarize(PHIUser, true)) + return NULL; + + // Create a scalar PHI node that will replace the vector PHI node + // just before the current PHI node. + PHINode * scalarPHI = cast<PHINode>( + InsertNewInstWith(PHINode::Create(EI.getType(), + PN->getNumIncomingValues(), ""), *PN)); + // Scalarize each PHI operand. + for (unsigned i=0; i < PN->getNumIncomingValues(); i++) { + Value *PHIInVal = PN->getIncomingValue(i); + BasicBlock *inBB = PN->getIncomingBlock(i); + Value *Elt = EI.getIndexOperand(); + // If the operand is the PHI induction variable: + if (PHIInVal == PHIUser) { + // Scalarize the binary operation. Its first operand is the + // scalar PHI and the second operand is extracted from the other + // vector operand. + BinaryOperator *B0 = cast<BinaryOperator>(PHIUser); + unsigned opId = (B0->getOperand(0) == PN) ? 1: 0; + Value *Op = Builder->CreateExtractElement( + B0->getOperand(opId), Elt, B0->getOperand(opId)->getName()+".Elt"); + Value *newPHIUser = InsertNewInstWith( + BinaryOperator::Create(B0->getOpcode(), scalarPHI,Op), + *B0); + scalarPHI->addIncoming(newPHIUser, inBB); + } else { + // Scalarize PHI input: + Instruction *newEI = + ExtractElementInst::Create(PHIInVal, Elt, ""); + // Insert the new instruction into the predecessor basic block. + Instruction *pos = dyn_cast<Instruction>(PHIInVal); + BasicBlock::iterator InsertPos; + if (pos && !isa<PHINode>(pos)) { + InsertPos = pos; + ++InsertPos; + } else { + InsertPos = inBB->getFirstInsertionPt(); + } + + InsertNewInstWith(newEI, *InsertPos); + + scalarPHI->addIncoming(newEI, inBB); + } + } + return ReplaceInstUsesWith(EI, scalarPHI); +} + Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { // If vector val is constant with all elements the same, replace EI with // that element. We handle a known element # below. @@ -149,6 +218,14 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal)) return new BitCastInst(Elt, EI.getType()); } + + // If there's a vector PHI feeding a scalar use through this extractelement + // instruction, try to scalarize the PHI. + if (PHINode *PN = dyn_cast<PHINode>(EI.getOperand(0))) { + Instruction *scalarPHI = scalarizePHI(EI, PN); + if (scalarPHI) + return (scalarPHI); + } } if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) { @@ -201,10 +278,10 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { } else if (CastInst *CI = dyn_cast<CastInst>(I)) { // Canonicalize extractelement(cast) -> cast(extractelement) // bitcasts can change the number of vector elements and they cost nothing - if (CI->hasOneUse() && EI.hasOneUse() && - (CI->getOpcode() != Instruction::BitCast)) { + if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) { Value *EE = Builder->CreateExtractElement(CI->getOperand(0), EI.getIndexOperand()); + Worklist.AddValue(EE); return CastInst::Create(CI->getOpcode(), EE, EI.getType()); } } @@ -336,6 +413,10 @@ static Value *CollectShuffleElements(Value *V, SmallVectorImpl<Constant*> &Mask, if (VecOp == RHS) { Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS); + // Update Mask to reflect that `ScalarOp' has been inserted at + // position `InsertedIdx' within the vector returned by IEI. + Mask[InsertedIdx % NumElts] = Mask[ExtractedIdx]; + // Everything but the extracted element is replaced with the RHS. for (unsigned i = 0; i != NumElts; ++i) { if (i != InsertedIdx) diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index c6115e3..ec10751 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1483,7 +1483,7 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) { Module *M = II->getParent()->getParent()->getParent(); Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing); InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(), - ArrayRef<Value *>(), "", II->getParent()); + None, "", II->getParent()); } return EraseInstFromFunction(MI); } diff --git a/contrib/llvm/lib/Transforms/Instrumentation/BlackList.cpp b/contrib/llvm/lib/Transforms/Instrumentation/BlackList.cpp index 927982d..39de4b0 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/BlackList.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/BlackList.cpp @@ -110,7 +110,8 @@ static StringRef GetGVTypeString(const GlobalVariable &G) { bool BlackList::isInInit(const GlobalVariable &G) const { return (isIn(*G.getParent()) || inSection("global-init", G.getName()) || - inSection("global-init-type", GetGVTypeString(G))); + inSection("global-init-type", GetGVTypeString(G)) || + inSection("global-init-src", G.getParent()->getModuleIdentifier())); } bool BlackList::inSection(const StringRef Section, diff --git a/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp index 8ba1025..9f35396 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/InitializePasses.h" +#include "llvm/PassRegistry.h" #include "llvm-c/Initialization.h" using namespace llvm; diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp index 53a31b0..373168e 100644 --- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp +++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp @@ -30,6 +30,7 @@ using namespace llvm::objcarc; bool llvm::objcarc::EnableARCOpts; static cl::opt<bool, true> EnableARCOptimizations("enable-objc-arc-opts", + cl::desc("enable/disable all ARC Optimizations"), cl::location(EnableARCOpts), cl::init(true)); diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp index b96c64f..c43f4f4 100644 --- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp +++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp @@ -66,6 +66,8 @@ namespace { Constant *RetainAutoreleaseCallee; /// Declaration for objc_retainAutoreleaseReturnValue(). Constant *RetainAutoreleaseRVCallee; + /// Declaration for objc_retainAutoreleasedReturnValue(). + Constant *RetainRVCallee; /// The inline asm string to insert between calls and RetainRV calls to make /// the optimization work on targets which need it. @@ -77,9 +79,12 @@ namespace { SmallPtrSet<CallInst *, 8> StoreStrongCalls; Constant *getStoreStrongCallee(Module *M); + Constant *getRetainRVCallee(Module *M); Constant *getRetainAutoreleaseCallee(Module *M); Constant *getRetainAutoreleaseRVCallee(Module *M); + bool OptimizeRetainCall(Function &F, Instruction *Retain); + bool ContractAutorelease(Function &F, Instruction *Autorelease, InstructionClass Class, SmallPtrSet<Instruction *, 4> @@ -172,6 +177,57 @@ Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) { return RetainAutoreleaseRVCallee; } +Constant *ObjCARCContract::getRetainRVCallee(Module *M) { + if (!RetainRVCallee) { + LLVMContext &C = M->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + Type *Params[] = { I8X }; + FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); + AttributeSet Attribute = + AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); + RetainRVCallee = + M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy, + Attribute); + } + return RetainRVCallee; +} + +/// Turn objc_retain into objc_retainAutoreleasedReturnValue if the operand is a +/// return value. We do this late so we do not disrupt the dataflow analysis in +/// ObjCARCOpt. +bool +ObjCARCContract::OptimizeRetainCall(Function &F, Instruction *Retain) { + ImmutableCallSite CS(GetObjCArg(Retain)); + const Instruction *Call = CS.getInstruction(); + if (!Call) + return false; + if (Call->getParent() != Retain->getParent()) + return false; + + // Check that the call is next to the retain. + BasicBlock::const_iterator I = Call; + ++I; + while (IsNoopInstruction(I)) ++I; + if (&*I != Retain) + return false; + + // Turn it to an objc_retainAutoreleasedReturnValue. + Changed = true; + ++NumPeeps; + + DEBUG(dbgs() << "Transforming objc_retain => " + "objc_retainAutoreleasedReturnValue since the operand is a " + "return value.\nOld: "<< *Retain << "\n"); + + // We do not have to worry about tail calls/does not throw since + // retain/retainRV have the same properties. + cast<CallInst>(Retain)->setCalledFunction(getRetainRVCallee(F.getParent())); + + DEBUG(dbgs() << "New: " << *Retain << "\n"); + return true; +} + /// Merge an autorelease with a retain into a fused call. bool ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease, @@ -329,6 +385,7 @@ bool ObjCARCContract::doInitialization(Module &M) { StoreStrongCallee = 0; RetainAutoreleaseCallee = 0; RetainAutoreleaseRVCallee = 0; + RetainRVCallee = 0; // Initialize RetainRVMarker. RetainRVMarker = 0; @@ -380,7 +437,6 @@ bool ObjCARCContract::runOnFunction(Function &F) { // objc_retainBlock does not necessarily return its argument. InstructionClass Class = GetBasicInstructionClass(Inst); switch (Class) { - case IC_Retain: case IC_FusedRetainAutorelease: case IC_FusedRetainAutoreleaseRV: break; @@ -389,6 +445,13 @@ bool ObjCARCContract::runOnFunction(Function &F) { if (ContractAutorelease(F, Inst, Class, DependingInstructions, Visited)) continue; break; + case IC_Retain: + // Attempt to convert retains to retainrvs if they are next to function + // calls. + if (!OptimizeRetainCall(F, Inst)) + break; + // If we succeed in our optimization, fall through. + // FALLTHROUGH case IC_RetainRV: { // If we're compiling for a target which needs a special inline-asm // marker to do the retainAutoreleasedReturnValue optimization, diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index 92d6fc4..43e2e20 100644 --- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -191,13 +191,13 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) { do { const Value *V = Worklist.pop_back_val(); - DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Visiting: " << *V << "\n"); + DEBUG(dbgs() << "Visiting: " << *V << "\n"); for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end(); UI != UE; ++UI) { const User *UUser = *UI; - DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User: " << *UUser << "\n"); + DEBUG(dbgs() << "User: " << *UUser << "\n"); // Special - Use by a call (callee or argument) is not considered // to be an escape. @@ -207,8 +207,7 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) { case IC_StoreStrong: case IC_Autorelease: case IC_AutoreleaseRV: { - DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User copies pointer " - "arguments. Pointer Escapes!\n"); + DEBUG(dbgs() << "User copies pointer arguments. Pointer Escapes!\n"); // These special functions make copies of their pointer arguments. return true; } @@ -223,12 +222,11 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) { isa<PHINode>(UUser) || isa<SelectInst>(UUser)) { if (VisitedSet.insert(UUser)) { - DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User copies value. " - "Ptr escapes if result escapes. Adding to list.\n"); + DEBUG(dbgs() << "User copies value. Ptr escapes if result escapes." + " Adding to list.\n"); Worklist.push_back(UUser); } else { - DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Already visited node." - "\n"); + DEBUG(dbgs() << "Already visited node.\n"); } continue; } @@ -245,13 +243,13 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) { continue; } // Otherwise, conservatively assume an escape. - DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Assuming ptr escapes.\n"); + DEBUG(dbgs() << "Assuming ptr escapes.\n"); return true; } } while (!Worklist.empty()); // No escapes found. - DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Ptr does not escape.\n"); + DEBUG(dbgs() << "Ptr does not escape.\n"); return false; } @@ -305,6 +303,16 @@ STATISTIC(NumRets, "Number of return value forwarding " "retain+autoreleaes eliminated"); STATISTIC(NumRRs, "Number of retain+release paths eliminated"); STATISTIC(NumPeeps, "Number of calls peephole-optimized"); +STATISTIC(NumRetainsBeforeOpt, + "Number of retains before optimization."); +STATISTIC(NumReleasesBeforeOpt, + "Number of releases before optimization."); +#ifndef NDEBUG +STATISTIC(NumRetainsAfterOpt, + "Number of retains after optimization."); +STATISTIC(NumReleasesAfterOpt, + "Number of releases after optimization."); +#endif namespace { /// \enum Sequence @@ -375,7 +383,7 @@ static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) { namespace { /// \brief Unidirectional information about either a /// retain-decrement-use-release sequence or release-use-decrement-retain - /// reverese sequence. + /// reverse sequence. struct RRInfo { /// After an objc_retain, the reference count of the referenced /// object is known to be positive. Similarly, before an objc_release, the @@ -410,6 +418,10 @@ namespace { KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(0) {} void clear(); + + bool IsTrackingImpreciseReleases() { + return ReleaseMetadata != 0; + } }; } @@ -428,7 +440,7 @@ namespace { /// True if the reference count is known to be incremented. bool KnownPositiveRefCount; - /// True of we've seen an opportunity for partial RR elimination, such as + /// True if we've seen an opportunity for partial RR elimination, such as /// pushing calls into a CFG triangle or into one side of a CFG diamond. bool Partial; @@ -457,6 +469,7 @@ namespace { } void SetSeq(Sequence NewSeq) { + DEBUG(dbgs() << "Old: " << Seq << "; New: " << NewSeq << "\n"); Seq = NewSeq; } @@ -469,7 +482,8 @@ namespace { } void ResetSequenceProgress(Sequence NewSeq) { - Seq = NewSeq; + DEBUG(dbgs() << "Resetting sequence progress.\n"); + SetSeq(NewSeq); Partial = false; RRI.clear(); } @@ -706,7 +720,19 @@ void BBState::MergeSucc(const BBState &Other) { /// Enable/disable ARC sequence annotations. static cl::opt<bool> -EnableARCAnnotations("enable-objc-arc-annotations", cl::init(false)); +EnableARCAnnotations("enable-objc-arc-annotations", cl::init(false), + cl::desc("Enable emission of arc data flow analysis " + "annotations")); +static cl::opt<bool> +DisableCheckForCFGHazards("disable-objc-arc-checkforcfghazards", cl::init(false), + cl::desc("Disable check for cfg hazards when " + "annotating")); +static cl::opt<std::string> +ARCAnnotationTargetIdentifier("objc-arc-annotation-target-identifier", + cl::init(""), + cl::desc("filter out all data flow annotations " + "but those that apply to the given " + "target llvm identifier.")); /// This function appends a unique ARCAnnotationProvenanceSourceMDKind id to an /// instruction so that we can track backwards when post processing via the llvm @@ -791,6 +817,12 @@ static void AppendMDNodeToInstForPtr(unsigned NodeId, /// state of a pointer at the entrance to a basic block. static void GenerateARCBBEntranceAnnotation(const char *Name, BasicBlock *BB, Value *Ptr, Sequence Seq) { + // If we have a target identifier, make sure that we match it before + // continuing. + if(!ARCAnnotationTargetIdentifier.empty() && + !Ptr->getName().equals(ARCAnnotationTargetIdentifier)) + return; + Module *M = BB->getParent()->getParent(); LLVMContext &C = M->getContext(); Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); @@ -828,6 +860,12 @@ static void GenerateARCBBEntranceAnnotation(const char *Name, BasicBlock *BB, /// of the pointer at the bottom of the basic block. static void GenerateARCBBTerminatorAnnotation(const char *Name, BasicBlock *BB, Value *Ptr, Sequence Seq) { + // If we have a target identifier, make sure that we match it before emitting + // an annotation. + if(!ARCAnnotationTargetIdentifier.empty() && + !Ptr->getName().equals(ARCAnnotationTargetIdentifier)) + return; + Module *M = BB->getParent()->getParent(); LLVMContext &C = M->getContext(); Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); @@ -869,6 +907,12 @@ static void GenerateARCAnnotation(unsigned InstMDId, Sequence OldSeq, Sequence NewSeq) { if (EnableARCAnnotations) { + // If we have a target identifier, make sure that we match it before + // emitting an annotation. + if(!ARCAnnotationTargetIdentifier.empty() && + !Ptr->getName().equals(ARCAnnotationTargetIdentifier)) + return; + // First generate the source annotation on our pointer. This will return an // MDString* if Ptr actually comes from an instruction implying we can put // in a source annotation. If AppendMDNodeToSourcePtr returns 0 (i.e. NULL), @@ -909,27 +953,27 @@ static void GenerateARCAnnotation(unsigned InstMDId, #define ANNOTATE_BB(_states, _bb, _name, _type, _direction) \ do { \ - if (EnableARCAnnotations) { \ - for(BBState::ptr_const_iterator I = (_states)._direction##_ptr_begin(), \ + if (EnableARCAnnotations) { \ + for(BBState::ptr_const_iterator I = (_states)._direction##_ptr_begin(), \ E = (_states)._direction##_ptr_end(); I != E; ++I) { \ - Value *Ptr = const_cast<Value*>(I->first); \ - Sequence Seq = I->second.GetSeq(); \ - GenerateARCBB ## _type ## Annotation(_name, (_bb), Ptr, Seq); \ + Value *Ptr = const_cast<Value*>(I->first); \ + Sequence Seq = I->second.GetSeq(); \ + GenerateARCBB ## _type ## Annotation(_name, (_bb), Ptr, Seq); \ + } \ } \ - } \ -} while (0) + } while (0) -#define ANNOTATE_BOTTOMUP_BBSTART(_states, _basicblock) \ +#define ANNOTATE_BOTTOMUP_BBSTART(_states, _basicblock) \ ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.bottomup.bbstart", \ Entrance, bottom_up) -#define ANNOTATE_BOTTOMUP_BBEND(_states, _basicblock) \ - ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.bottomup.bbend", \ +#define ANNOTATE_BOTTOMUP_BBEND(_states, _basicblock) \ + ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.bottomup.bbend", \ Terminator, bottom_up) -#define ANNOTATE_TOPDOWN_BBSTART(_states, _basicblock) \ - ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbstart", \ +#define ANNOTATE_TOPDOWN_BBSTART(_states, _basicblock) \ + ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbstart", \ Entrance, top_down) -#define ANNOTATE_TOPDOWN_BBEND(_states, _basicblock) \ - ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbend", \ +#define ANNOTATE_TOPDOWN_BBEND(_states, _basicblock) \ + ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbend", \ Terminator, top_down) #else // !ARC_ANNOTATION @@ -955,9 +999,6 @@ namespace { /// them. These are initialized lazily to avoid cluttering up the Module /// with unused declarations. - /// Declaration for ObjC runtime function - /// objc_retainAutoreleasedReturnValue. - Constant *RetainRVCallee; /// Declaration for ObjC runtime function objc_autoreleaseReturnValue. Constant *AutoreleaseRVCallee; /// Declaration for ObjC runtime function objc_release. @@ -991,7 +1032,6 @@ namespace { unsigned ARCAnnotationProvenanceSourceMDKind; #endif // ARC_ANNOATIONS - Constant *getRetainRVCallee(Module *M); Constant *getAutoreleaseRVCallee(Module *M); Constant *getReleaseCallee(Module *M); Constant *getRetainCallee(Module *M); @@ -1000,7 +1040,6 @@ namespace { bool IsRetainBlockOptimizable(const Instruction *Inst); - void OptimizeRetainCall(Function &F, Instruction *Retain); bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV); void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV, InstructionClass &Class); @@ -1059,6 +1098,10 @@ namespace { void OptimizeReturns(Function &F); +#ifndef NDEBUG + void GatherStatistics(Function &F, bool AfterOptimization = false); +#endif + virtual void getAnalysisUsage(AnalysisUsage &AU) const; virtual bool doInitialization(Module &M); virtual bool runOnFunction(Function &F); @@ -1106,22 +1149,6 @@ bool ObjCARCOpt::IsRetainBlockOptimizable(const Instruction *Inst) { return true; } -Constant *ObjCARCOpt::getRetainRVCallee(Module *M) { - if (!RetainRVCallee) { - LLVMContext &C = M->getContext(); - Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - Type *Params[] = { I8X }; - FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); - AttributeSet Attribute = - AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex, - Attribute::NoUnwind); - RetainRVCallee = - M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy, - Attribute); - } - return RetainRVCallee; -} - Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) { if (!AutoreleaseRVCallee) { LLVMContext &C = M->getContext(); @@ -1201,38 +1228,6 @@ Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) { return AutoreleaseCallee; } -/// Turn objc_retain into objc_retainAutoreleasedReturnValue if the operand is a -/// return value. -void -ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) { - ImmutableCallSite CS(GetObjCArg(Retain)); - const Instruction *Call = CS.getInstruction(); - if (!Call) return; - if (Call->getParent() != Retain->getParent()) return; - - // Check that the call is next to the retain. - BasicBlock::const_iterator I = Call; - ++I; - while (IsNoopInstruction(I)) ++I; - if (&*I != Retain) - return; - - // Turn it to an objc_retainAutoreleasedReturnValue.. - Changed = true; - ++NumPeeps; - - DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainCall: Transforming " - "objc_retain => objc_retainAutoreleasedReturnValue" - " since the operand is a return value.\n" - " Old: " - << *Retain << "\n"); - - cast<CallInst>(Retain)->setCalledFunction(getRetainRVCallee(F.getParent())); - - DEBUG(dbgs() << " New: " - << *Retain << "\n"); -} - /// Turn objc_retainAutoreleasedReturnValue into objc_retain if the operand is /// not a return value. Or, if it can be paired with an /// objc_autoreleaseReturnValue, delete the pair and return true. @@ -1269,9 +1264,8 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) { Changed = true; ++NumPeeps; - DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainRVCall: Erasing " << *I << "\n" - << " Erasing " << *RetainRV - << "\n"); + DEBUG(dbgs() << "Erasing autoreleaseRV,retainRV pair: " << *I << "\n" + << "Erasing " << *RetainRV << "\n"); EraseInstruction(I); EraseInstruction(RetainRV); @@ -1283,16 +1277,13 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) { Changed = true; ++NumPeeps; - DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainRVCall: Transforming " - "objc_retainAutoreleasedReturnValue => " + DEBUG(dbgs() << "Transforming objc_retainAutoreleasedReturnValue => " "objc_retain since the operand is not a return value.\n" - " Old: " - << *RetainRV << "\n"); + "Old = " << *RetainRV << "\n"); cast<CallInst>(RetainRV)->setCalledFunction(getRetainCallee(F.getParent())); - DEBUG(dbgs() << " New: " - << *RetainRV << "\n"); + DEBUG(dbgs() << "New = " << *RetainRV << "\n"); return false; } @@ -1321,12 +1312,10 @@ ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV, Changed = true; ++NumPeeps; - DEBUG(dbgs() << "ObjCARCOpt::OptimizeAutoreleaseRVCall: Transforming " - "objc_autoreleaseReturnValue => " + DEBUG(dbgs() << "Transforming objc_autoreleaseReturnValue => " "objc_autorelease since its operand is not used as a return " "value.\n" - " Old: " - << *AutoreleaseRV << "\n"); + "Old = " << *AutoreleaseRV << "\n"); CallInst *AutoreleaseRVCI = cast<CallInst>(AutoreleaseRV); AutoreleaseRVCI-> @@ -1334,8 +1323,7 @@ ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV, AutoreleaseRVCI->setTailCall(false); // Never tail call objc_autorelease. Class = IC_Autorelease; - DEBUG(dbgs() << " New: " - << *AutoreleaseRV << "\n"); + DEBUG(dbgs() << "New: " << *AutoreleaseRV << "\n"); } @@ -1359,18 +1347,24 @@ ObjCARCOpt::OptimizeRetainBlockCall(Function &F, Instruction *Inst, if (!IsRetainBlockOptimizable(Inst)) return false; + Changed = true; + ++NumPeeps; + + DEBUG(dbgs() << "Strength reduced retainBlock => retain.\n"); + DEBUG(dbgs() << "Old: " << *Inst << "\n"); CallInst *RetainBlock = cast<CallInst>(Inst); RetainBlock->setCalledFunction(getRetainCallee(F.getParent())); // Remove copy_on_escape metadata. RetainBlock->setMetadata(CopyOnEscapeMDKind, 0); Class = IC_Retain; - + DEBUG(dbgs() << "New: " << *Inst << "\n"); return true; } /// Visit each call, one at a time, and make simplifications without doing any /// additional analysis. void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { + DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeIndividualCalls ==\n"); // Reset all the flags in preparation for recomputing them. UsedInThisFunction = 0; @@ -1380,8 +1374,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { InstructionClass Class = GetBasicInstructionClass(Inst); - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Visiting: Class: " - << Class << "; " << *Inst << "\n"); + DEBUG(dbgs() << "Visiting: Class: " << Class << "; " << *Inst << "\n"); switch (Class) { default: break; @@ -1397,8 +1390,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { case IC_NoopCast: Changed = true; ++NumNoops; - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Erasing no-op cast:" - " " << *Inst << "\n"); + DEBUG(dbgs() << "Erasing no-op cast: " << *Inst << "\n"); EraseInstruction(Inst); continue; @@ -1416,11 +1408,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { Constant::getNullValue(Ty), CI); llvm::Value *NewValue = UndefValue::get(CI->getType()); - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: A null " - "pointer-to-weak-pointer is undefined behavior.\n" - " Old = " << *CI << - "\n New = " << - *NewValue << "\n"); + DEBUG(dbgs() << "A null pointer-to-weak-pointer is undefined behavior." + "\nOld = " << *CI << "\nNew = " << *NewValue << "\n"); CI->replaceAllUsesWith(NewValue); CI->eraseFromParent(); continue; @@ -1439,11 +1428,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { CI); llvm::Value *NewValue = UndefValue::get(CI->getType()); - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: A null " - "pointer-to-weak-pointer is undefined behavior.\n" - " Old = " << *CI << - "\n New = " << - *NewValue << "\n"); + DEBUG(dbgs() << "A null pointer-to-weak-pointer is undefined behavior." + "\nOld = " << *CI << "\nNew = " << *NewValue << "\n"); CI->replaceAllUsesWith(NewValue); CI->eraseFromParent(); @@ -1452,13 +1438,13 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { break; } case IC_RetainBlock: - // If we strength reduce an objc_retainBlock to amn objc_retain, continue + // If we strength reduce an objc_retainBlock to an objc_retain, continue // onto the objc_retain peephole optimizations. Otherwise break. if (!OptimizeRetainBlockCall(F, Inst, Class)) break; // FALLTHROUGH case IC_Retain: - OptimizeRetainCall(F, Inst); + ++NumRetainsBeforeOpt; break; case IC_RetainRV: if (OptimizeRetainRVCall(F, Inst)) @@ -1467,6 +1453,9 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { case IC_AutoreleaseRV: OptimizeAutoreleaseRVCall(F, Inst, Class); break; + case IC_Release: + ++NumReleasesBeforeOpt; + break; } // objc_autorelease(x) -> objc_release(x) if x is otherwise unused. @@ -1483,15 +1472,11 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { CallInst *NewCall = CallInst::Create(getReleaseCallee(F.getParent()), Call->getArgOperand(0), "", Call); - NewCall->setMetadata(ImpreciseReleaseMDKind, - MDNode::get(C, ArrayRef<Value *>())); + NewCall->setMetadata(ImpreciseReleaseMDKind, MDNode::get(C, None)); - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Replacing " - "objc_autorelease(x) with objc_release(x) since x is " - "otherwise unused.\n" - " Old: " << *Call << - "\n New: " << - *NewCall << "\n"); + DEBUG(dbgs() << "Replacing autorelease{,RV}(x) with objc_release(x) " + "since x is otherwise unused.\nOld: " << *Call << "\nNew: " + << *NewCall << "\n"); EraseInstruction(Call); Inst = NewCall; @@ -1503,9 +1488,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { // a tail keyword. if (IsAlwaysTail(Class)) { Changed = true; - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Adding tail keyword" - " to function since it can never be passed stack args: " << *Inst << - "\n"); + DEBUG(dbgs() << "Adding tail keyword to function since it can never be " + "passed stack args: " << *Inst << "\n"); cast<CallInst>(Inst)->setTailCall(); } @@ -1513,8 +1497,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { // semantics of ARC truly do not do so. if (IsNeverTail(Class)) { Changed = true; - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Removing tail " - "keyword from function: " << *Inst << + DEBUG(dbgs() << "Removing tail keyword from function: " << *Inst << "\n"); cast<CallInst>(Inst)->setTailCall(false); } @@ -1522,8 +1505,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { // Set nounwind as needed. if (IsNoThrow(Class)) { Changed = true; - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Found no throw" - " class. Setting nounwind on: " << *Inst << "\n"); + DEBUG(dbgs() << "Found no throw class. Setting nounwind on: " << *Inst + << "\n"); cast<CallInst>(Inst)->setDoesNotThrow(); } @@ -1538,8 +1521,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { if (IsNullOrUndef(Arg)) { Changed = true; ++NumNoops; - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: ARC calls with " - " null are no-ops. Erasing: " << *Inst << "\n"); + DEBUG(dbgs() << "ARC calls with null are no-ops. Erasing: " << *Inst + << "\n"); EraseInstruction(Inst); continue; } @@ -1633,10 +1616,9 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { Clone->setArgOperand(0, Op); Clone->insertBefore(InsertPos); - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Cloning " + DEBUG(dbgs() << "Cloning " << *CInst << "\n" - " And inserting " - "clone at " << *InsertPos << "\n"); + "And inserting clone at " << *InsertPos << "\n"); Worklist.push_back(std::make_pair(Clone, Incoming)); } } @@ -1648,7 +1630,65 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { } } while (!Worklist.empty()); } - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Finished List.\n"); +} + +/// If we have a top down pointer in the S_Use state, make sure that there are +/// no CFG hazards by checking the states of various bottom up pointers. +static void CheckForUseCFGHazard(const Sequence SuccSSeq, + const bool SuccSRRIKnownSafe, + PtrState &S, + bool &SomeSuccHasSame, + bool &AllSuccsHaveSame, + bool &ShouldContinue) { + switch (SuccSSeq) { + case S_CanRelease: { + if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) { + S.ClearSequenceProgress(); + break; + } + ShouldContinue = true; + break; + } + case S_Use: + SomeSuccHasSame = true; + break; + case S_Stop: + case S_Release: + case S_MovableRelease: + if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) + AllSuccsHaveSame = false; + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + case S_None: + llvm_unreachable("This should have been handled earlier."); + } +} + +/// If we have a Top Down pointer in the S_CanRelease state, make sure that +/// there are no CFG hazards by checking the states of various bottom up +/// pointers. +static void CheckForCanReleaseCFGHazard(const Sequence SuccSSeq, + const bool SuccSRRIKnownSafe, + PtrState &S, + bool &SomeSuccHasSame, + bool &AllSuccsHaveSame) { + switch (SuccSSeq) { + case S_CanRelease: + SomeSuccHasSame = true; + break; + case S_Stop: + case S_Release: + case S_MovableRelease: + case S_Use: + if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) + AllSuccsHaveSame = false; + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + case S_None: + llvm_unreachable("This should have been handled earlier."); + } } /// Check for critical edges, loop boundaries, irreducible control flow, or @@ -1661,106 +1701,82 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, // If any top-down local-use or possible-dec has a succ which is earlier in // the sequence, forget it. for (BBState::ptr_iterator I = MyStates.top_down_ptr_begin(), - E = MyStates.top_down_ptr_end(); I != E; ++I) - switch (I->second.GetSeq()) { - default: break; - case S_Use: { - const Value *Arg = I->first; - const TerminatorInst *TI = cast<TerminatorInst>(&BB->back()); - bool SomeSuccHasSame = false; - bool AllSuccsHaveSame = true; - PtrState &S = I->second; - succ_const_iterator SI(TI), SE(TI, false); - - for (; SI != SE; ++SI) { - Sequence SuccSSeq = S_None; - bool SuccSRRIKnownSafe = false; - // If VisitBottomUp has pointer information for this successor, take - // what we know about it. - DenseMap<const BasicBlock *, BBState>::iterator BBI = - BBStates.find(*SI); - assert(BBI != BBStates.end()); - const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg); - SuccSSeq = SuccS.GetSeq(); - SuccSRRIKnownSafe = SuccS.RRI.KnownSafe; - switch (SuccSSeq) { - case S_None: - case S_CanRelease: { - if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) { - S.ClearSequenceProgress(); - break; - } - continue; - } - case S_Use: - SomeSuccHasSame = true; - break; - case S_Stop: - case S_Release: - case S_MovableRelease: - if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) - AllSuccsHaveSame = false; - break; - case S_Retain: - llvm_unreachable("bottom-up pointer in retain state!"); - } - } - // If the state at the other end of any of the successor edges - // matches the current state, require all edges to match. This - // guards against loops in the middle of a sequence. - if (SomeSuccHasSame && !AllSuccsHaveSame) + E = MyStates.top_down_ptr_end(); I != E; ++I) { + PtrState &S = I->second; + const Sequence Seq = I->second.GetSeq(); + + // We only care about S_Retain, S_CanRelease, and S_Use. + if (Seq == S_None) + continue; + + // Make sure that if extra top down states are added in the future that this + // code is updated to handle it. + assert((Seq == S_Retain || Seq == S_CanRelease || Seq == S_Use) && + "Unknown top down sequence state."); + + const Value *Arg = I->first; + const TerminatorInst *TI = cast<TerminatorInst>(&BB->back()); + bool SomeSuccHasSame = false; + bool AllSuccsHaveSame = true; + + succ_const_iterator SI(TI), SE(TI, false); + + for (; SI != SE; ++SI) { + // If VisitBottomUp has pointer information for this successor, take + // what we know about it. + const DenseMap<const BasicBlock *, BBState>::iterator BBI = + BBStates.find(*SI); + assert(BBI != BBStates.end()); + const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg); + const Sequence SuccSSeq = SuccS.GetSeq(); + + // If bottom up, the pointer is in an S_None state, clear the sequence + // progress since the sequence in the bottom up state finished + // suggesting a mismatch in between retains/releases. This is true for + // all three cases that we are handling here: S_Retain, S_Use, and + // S_CanRelease. + if (SuccSSeq == S_None) { S.ClearSequenceProgress(); - break; - } - case S_CanRelease: { - const Value *Arg = I->first; - const TerminatorInst *TI = cast<TerminatorInst>(&BB->back()); - bool SomeSuccHasSame = false; - bool AllSuccsHaveSame = true; - PtrState &S = I->second; - succ_const_iterator SI(TI), SE(TI, false); - - for (; SI != SE; ++SI) { - Sequence SuccSSeq = S_None; - bool SuccSRRIKnownSafe = false; - // If VisitBottomUp has pointer information for this successor, take - // what we know about it. - DenseMap<const BasicBlock *, BBState>::iterator BBI = - BBStates.find(*SI); - assert(BBI != BBStates.end()); - const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg); - SuccSSeq = SuccS.GetSeq(); - SuccSRRIKnownSafe = SuccS.RRI.KnownSafe; - switch (SuccSSeq) { - case S_None: { - if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) { - S.ClearSequenceProgress(); - break; - } + continue; + } + + // If we have S_Use or S_CanRelease, perform our check for cfg hazard + // checks. + const bool SuccSRRIKnownSafe = SuccS.RRI.KnownSafe; + + // *NOTE* We do not use Seq from above here since we are allowing for + // S.GetSeq() to change while we are visiting basic blocks. + switch(S.GetSeq()) { + case S_Use: { + bool ShouldContinue = false; + CheckForUseCFGHazard(SuccSSeq, SuccSRRIKnownSafe, S, + SomeSuccHasSame, AllSuccsHaveSame, + ShouldContinue); + if (ShouldContinue) continue; - } - case S_CanRelease: - SomeSuccHasSame = true; - break; - case S_Stop: - case S_Release: - case S_MovableRelease: - case S_Use: - if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) - AllSuccsHaveSame = false; - break; - case S_Retain: - llvm_unreachable("bottom-up pointer in retain state!"); - } + break; + } + case S_CanRelease: { + CheckForCanReleaseCFGHazard(SuccSSeq, SuccSRRIKnownSafe, + S, SomeSuccHasSame, + AllSuccsHaveSame); + break; + } + case S_Retain: + case S_None: + case S_Stop: + case S_Release: + case S_MovableRelease: + break; } - // If the state at the other end of any of the successor edges - // matches the current state, require all edges to match. This - // guards against loops in the middle of a sequence. - if (SomeSuccHasSame && !AllSuccsHaveSame) - S.ClearSequenceProgress(); - break; - } } + + // If the state at the other end of any of the successor edges + // matches the current state, require all edges to match. This + // guards against loops in the middle of a sequence. + if (SomeSuccHasSame && !AllSuccsHaveSame) + S.ClearSequenceProgress(); + } } bool @@ -1772,6 +1788,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, InstructionClass Class = GetInstructionClass(Inst); const Value *Arg = 0; + DEBUG(dbgs() << "Class: " << Class << "\n"); + switch (Class) { case IC_Release: { Arg = GetObjCArg(Inst); @@ -1786,8 +1804,7 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, // pairs by making PtrState hold a stack of states, but this is // simple and avoids adding overhead for the non-nested case. if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease) { - DEBUG(dbgs() << "ObjCARCOpt::VisitInstructionBottomUp: Found nested " - "releases (i.e. a release pair)\n"); + DEBUG(dbgs() << "Found nested releases (i.e. a release pair)\n"); NestingDetected = true; } @@ -1820,7 +1837,10 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, case S_Release: case S_MovableRelease: case S_Use: - S.RRI.ReverseInsertPts.clear(); + // If OldSeq is not S_Use or OldSeq is S_Use and we are tracking an + // imprecise release, clear our reverse insertion points. + if (OldSeq != S_Use || S.RRI.IsTrackingImpreciseReleases()) + S.RRI.ReverseInsertPts.clear(); // FALL THROUGH case S_CanRelease: // Don't do retain+release tracking for IC_RetainRV, because it's @@ -1835,7 +1855,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, llvm_unreachable("bottom-up pointer in retain state!"); } ANNOTATE_BOTTOMUP(Inst, Arg, OldSeq, S.GetSeq()); - return NestingDetected; + // A retain moving bottom up can be a use. + break; } case IC_AutoreleasepoolPop: // Conservatively, clear MyStates for all known pointers. @@ -1861,6 +1882,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, // Check for possible releases. if (CanAlterRefCount(Inst, Ptr, PA, Class)) { + DEBUG(dbgs() << "CanAlterRefCount: Seq: " << Seq << "; " << *Ptr + << "\n"); S.ClearKnownPositiveRefCount(); switch (Seq) { case S_Use: @@ -1883,6 +1906,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, case S_Release: case S_MovableRelease: if (CanUse(Inst, Ptr, PA, Class)) { + DEBUG(dbgs() << "CanUse: Seq: " << Seq << "; " << *Ptr + << "\n"); assert(S.RRI.ReverseInsertPts.empty()); // If this is an invoke instruction, we're scanning it as part of // one of its successor blocks, since we can't insert code after it @@ -1894,6 +1919,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, S.SetSeq(S_Use); ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S_Use); } else if (Seq == S_Release && IsUser(Class)) { + DEBUG(dbgs() << "PreciseReleaseUse: Seq: " << Seq << "; " << *Ptr + << "\n"); // Non-movable releases depend on any possible objc pointer use. S.SetSeq(S_Stop); ANNOTATE_BOTTOMUP(Inst, Ptr, S_Release, S_Stop); @@ -1907,6 +1934,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, break; case S_Stop: if (CanUse(Inst, Ptr, PA, Class)) { + DEBUG(dbgs() << "PreciseStopUse: Seq: " << Seq << "; " << *Ptr + << "\n"); S.SetSeq(S_Use); ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S_Use); } @@ -1927,6 +1956,9 @@ bool ObjCARCOpt::VisitBottomUp(BasicBlock *BB, DenseMap<const BasicBlock *, BBState> &BBStates, MapVector<Value *, RRInfo> &Retains) { + + DEBUG(dbgs() << "\n== ObjCARCOpt::VisitBottomUp ==\n"); + bool NestingDetected = false; BBState &MyStates = BBStates[BB]; @@ -1960,7 +1992,7 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, if (isa<InvokeInst>(Inst)) continue; - DEBUG(dbgs() << "ObjCARCOpt::VisitButtonUp: Visiting " << *Inst << "\n"); + DEBUG(dbgs() << "Visiting " << *Inst << "\n"); NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates); } @@ -2033,13 +2065,18 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, PtrState &S = MyStates.getPtrTopDownState(Arg); S.ClearKnownPositiveRefCount(); - switch (S.GetSeq()) { + Sequence OldSeq = S.GetSeq(); + + MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); + + switch (OldSeq) { case S_Retain: case S_CanRelease: - S.RRI.ReverseInsertPts.clear(); + if (OldSeq == S_Retain || ReleaseMetadata != 0) + S.RRI.ReverseInsertPts.clear(); // FALL THROUGH case S_Use: - S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); + S.RRI.ReleaseMetadata = ReleaseMetadata; S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall(); Releases[Inst] = S.RRI; ANNOTATE_TOPDOWN(Inst, Arg, S.GetSeq(), S_None); @@ -2078,6 +2115,8 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, // Check for possible releases. if (CanAlterRefCount(Inst, Ptr, PA, Class)) { + DEBUG(dbgs() << "CanAlterRefCount: Seq: " << Seq << "; " << *Ptr + << "\n"); S.ClearKnownPositiveRefCount(); switch (Seq) { case S_Retain: @@ -2105,6 +2144,8 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, switch (Seq) { case S_CanRelease: if (CanUse(Inst, Ptr, PA, Class)) { + DEBUG(dbgs() << "CanUse: Seq: " << Seq << "; " << *Ptr + << "\n"); S.SetSeq(S_Use); ANNOTATE_TOPDOWN(Inst, Ptr, Seq, S_Use); } @@ -2127,6 +2168,7 @@ bool ObjCARCOpt::VisitTopDown(BasicBlock *BB, DenseMap<const BasicBlock *, BBState> &BBStates, DenseMap<Value *, RRInfo> &Releases) { + DEBUG(dbgs() << "\n== ObjCARCOpt::VisitTopDown ==\n"); bool NestingDetected = false; BBState &MyStates = BBStates[BB]; @@ -2156,7 +2198,7 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { Instruction *Inst = I; - DEBUG(dbgs() << "ObjCARCOpt::VisitTopDown: Visiting " << *Inst << "\n"); + DEBUG(dbgs() << "Visiting " << *Inst << "\n"); NestingDetected |= VisitInstructionTopDown(Inst, Releases, MyStates); } @@ -2165,6 +2207,9 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, // bottom of the basic block. ANNOTATE_TOPDOWN_BBEND(MyStates, BB); +#ifdef ARC_ANNOTATIONS + if (!(EnableARCAnnotations && DisableCheckForCFGHazards)) +#endif CheckForCFGHazards(BB, BBStates, MyStates); return NestingDetected; } @@ -2296,6 +2341,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg, Type *ArgTy = Arg->getType(); Type *ParamTy = PointerType::getUnqual(Type::getInt8Ty(ArgTy->getContext())); + DEBUG(dbgs() << "== ObjCARCOpt::MoveCalls ==\n"); + // Insert the new retain and release calls. for (SmallPtrSet<Instruction *, 2>::const_iterator PI = ReleasesToMove.ReverseInsertPts.begin(), @@ -2308,10 +2355,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg, Call->setDoesNotThrow(); Call->setTailCall(); - DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Inserting new Release: " << *Call - << "\n" - " At insertion point: " << *InsertPt - << "\n"); + DEBUG(dbgs() << "Inserting new Retain: " << *Call << "\n" + "At insertion point: " << *InsertPt << "\n"); } for (SmallPtrSet<Instruction *, 2>::const_iterator PI = RetainsToMove.ReverseInsertPts.begin(), @@ -2328,10 +2373,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg, if (ReleasesToMove.IsTailCallRelease) Call->setTailCall(); - DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Inserting new Retain: " << *Call - << "\n" - " At insertion point: " << *InsertPt - << "\n"); + DEBUG(dbgs() << "Inserting new Release: " << *Call << "\n" + "At insertion point: " << *InsertPt << "\n"); } // Delete the original retain and release calls. @@ -2341,8 +2384,7 @@ void ObjCARCOpt::MoveCalls(Value *Arg, Instruction *OrigRetain = *AI; Retains.blot(OrigRetain); DeadInsts.push_back(OrigRetain); - DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Deleting retain: " << *OrigRetain << - "\n"); + DEBUG(dbgs() << "Deleting retain: " << *OrigRetain << "\n"); } for (SmallPtrSet<Instruction *, 2>::const_iterator AI = ReleasesToMove.Calls.begin(), @@ -2350,9 +2392,9 @@ void ObjCARCOpt::MoveCalls(Value *Arg, Instruction *OrigRelease = *AI; Releases.erase(OrigRelease); DeadInsts.push_back(OrigRelease); - DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Deleting release: " << *OrigRelease - << "\n"); + DEBUG(dbgs() << "Deleting release: " << *OrigRelease << "\n"); } + } bool @@ -2506,6 +2548,12 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState> if (OldDelta != 0) return false; +#ifdef ARC_ANNOTATIONS + // Do not move calls if ARC annotations are requested. + if (EnableARCAnnotations) + return false; +#endif // ARC_ANNOTATIONS + Changed = true; assert(OldCount != 0 && "Unreachable code?"); NumRRs += OldCount - NewCount; @@ -2524,6 +2572,8 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState> MapVector<Value *, RRInfo> &Retains, DenseMap<Value *, RRInfo> &Releases, Module *M) { + DEBUG(dbgs() << "\n== ObjCARCOpt::PerformCodePlacement ==\n"); + bool AnyPairsCompletelyEliminated = false; RRInfo RetainsToMove; RRInfo ReleasesToMove; @@ -2539,8 +2589,7 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState> Instruction *Retain = cast<Instruction>(V); - DEBUG(dbgs() << "ObjCARCOpt::PerformCodePlacement: Visiting: " << *Retain - << "\n"); + DEBUG(dbgs() << "Visiting: " << *Retain << "\n"); Value *Arg = GetObjCArg(Retain); @@ -2567,12 +2616,6 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState> ReleasesToMove, Arg, KnownSafe, AnyPairsCompletelyEliminated); -#ifdef ARC_ANNOTATIONS - // Do not move calls if ARC annotations are requested. If we were to move - // calls in this case, we would not be able - PerformMoveCalls = PerformMoveCalls && !EnableARCAnnotations; -#endif // ARC_ANNOTATIONS - if (PerformMoveCalls) { // Ok, everything checks out and we're all set. Let's move/delete some // code! @@ -2597,14 +2640,15 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState> /// Weak pointer optimizations. void ObjCARCOpt::OptimizeWeakCalls(Function &F) { + DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeWeakCalls ==\n"); + // First, do memdep-style RLE and S2L optimizations. We can't use memdep // itself because it uses AliasAnalysis and we need to do provenance // queries instead. for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { Instruction *Inst = &*I++; - DEBUG(dbgs() << "ObjCARCOpt::OptimizeWeakCalls: Visiting: " << *Inst << - "\n"); + DEBUG(dbgs() << "Visiting: " << *Inst << "\n"); InstructionClass Class = GetBasicInstructionClass(Inst); if (Class != IC_LoadWeak && Class != IC_LoadWeakRetained) @@ -2752,9 +2796,6 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) { done:; } } - - DEBUG(dbgs() << "ObjCARCOpt::OptimizeWeakCalls: Finished List.\n\n"); - } /// Identify program paths which execute sequences of retains and releases which @@ -2820,17 +2861,17 @@ FindPredecessorRetainWithSafePath(const Value *Arg, BasicBlock *BB, BB, Autorelease, DepInsts, Visited, PA); if (DepInsts.size() != 1) return 0; - + CallInst *Retain = dyn_cast_or_null<CallInst>(*DepInsts.begin()); - + // Check that we found a retain with the same argument. if (!Retain || !IsRetain(GetBasicInstructionClass(Retain)) || GetObjCArg(Retain) != Arg) { return 0; } - + return Retain; } @@ -2847,7 +2888,7 @@ FindPredecessorAutoreleaseWithSafePath(const Value *Arg, BasicBlock *BB, BB, Ret, DepInsts, V, PA); if (DepInsts.size() != 1) return 0; - + CallInst *Autorelease = dyn_cast_or_null<CallInst>(*DepInsts.begin()); if (!Autorelease) @@ -2857,7 +2898,7 @@ FindPredecessorAutoreleaseWithSafePath(const Value *Arg, BasicBlock *BB, return 0; if (GetObjCArg(Autorelease) != Arg) return 0; - + return Autorelease; } @@ -2873,60 +2914,87 @@ void ObjCARCOpt::OptimizeReturns(Function &F) { if (!F.getReturnType()->isPointerTy()) return; + DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeReturns ==\n"); + SmallPtrSet<Instruction *, 4> DependingInstructions; SmallPtrSet<const BasicBlock *, 4> Visited; for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { BasicBlock *BB = FI; ReturnInst *Ret = dyn_cast<ReturnInst>(&BB->back()); - DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Visiting: " << *Ret << "\n"); + DEBUG(dbgs() << "Visiting: " << *Ret << "\n"); if (!Ret) continue; - + const Value *Arg = StripPointerCastsAndObjCCalls(Ret->getOperand(0)); - - // Look for an ``autorelease'' instruction that is a predecssor of Ret and + + // Look for an ``autorelease'' instruction that is a predecessor of Ret and // dependent on Arg such that there are no instructions dependent on Arg // that need a positive ref count in between the autorelease and Ret. CallInst *Autorelease = FindPredecessorAutoreleaseWithSafePath(Arg, BB, Ret, DependingInstructions, Visited, PA); - if (Autorelease) { - DependingInstructions.clear(); - Visited.clear(); - - CallInst *Retain = - FindPredecessorRetainWithSafePath(Arg, BB, Autorelease, - DependingInstructions, Visited, PA); - if (Retain) { - DependingInstructions.clear(); - Visited.clear(); - - // Check that there is nothing that can affect the reference count - // between the retain and the call. Note that Retain need not be in BB. - if (HasSafePathToPredecessorCall(Arg, Retain, DependingInstructions, - Visited, PA)) { - // If so, we can zap the retain and autorelease. - Changed = true; - ++NumRets; - DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Erasing: " << *Retain - << "\n Erasing: " - << *Autorelease << "\n"); - EraseInstruction(Retain); - EraseInstruction(Autorelease); - } - } - } - DependingInstructions.clear(); Visited.clear(); + + if (!Autorelease) + continue; + + CallInst *Retain = + FindPredecessorRetainWithSafePath(Arg, BB, Autorelease, + DependingInstructions, Visited, PA); + DependingInstructions.clear(); + Visited.clear(); + + if (!Retain) + continue; + + // Check that there is nothing that can affect the reference count + // between the retain and the call. Note that Retain need not be in BB. + bool HasSafePathToCall = HasSafePathToPredecessorCall(Arg, Retain, + DependingInstructions, + Visited, PA); + DependingInstructions.clear(); + Visited.clear(); + + if (!HasSafePathToCall) + continue; + + // If so, we can zap the retain and autorelease. + Changed = true; + ++NumRets; + DEBUG(dbgs() << "Erasing: " << *Retain << "\nErasing: " + << *Autorelease << "\n"); + EraseInstruction(Retain); + EraseInstruction(Autorelease); } +} - DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Finished List.\n\n"); +#ifndef NDEBUG +void +ObjCARCOpt::GatherStatistics(Function &F, bool AfterOptimization) { + llvm::Statistic &NumRetains = + AfterOptimization? NumRetainsAfterOpt : NumRetainsBeforeOpt; + llvm::Statistic &NumReleases = + AfterOptimization? NumReleasesAfterOpt : NumReleasesBeforeOpt; + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { + Instruction *Inst = &*I++; + switch (GetBasicInstructionClass(Inst)) { + default: + break; + case IC_Retain: + ++NumRetains; + break; + case IC_Release: + ++NumReleases; + break; + } + } } +#endif bool ObjCARCOpt::doInitialization(Module &M) { if (!EnableARCOpts) @@ -2958,7 +3026,6 @@ bool ObjCARCOpt::doInitialization(Module &M) { // calls finalizers which can have arbitrary side effects. // These are initialized lazily. - RetainRVCallee = 0; AutoreleaseRVCallee = 0; ReleaseCallee = 0; RetainCallee = 0; @@ -2978,7 +3045,8 @@ bool ObjCARCOpt::runOnFunction(Function &F) { Changed = false; - DEBUG(dbgs() << "ObjCARCOpt: Visiting Function: " << F.getName() << "\n"); + DEBUG(dbgs() << "<<< ObjCARCOpt: Visiting Function: " << F.getName() << " >>>" + "\n"); PA.setAA(&getAnalysis<AliasAnalysis>()); @@ -2986,7 +3054,7 @@ bool ObjCARCOpt::runOnFunction(Function &F) { // when compiling code that isn't ObjC, skip these if the relevant ObjC // library functions aren't declared. - // Preliminary optimizations. This also computs UsedInThisFunction. + // Preliminary optimizations. This also computes UsedInThisFunction. OptimizeIndividualCalls(F); // Optimizations for weak pointers. @@ -3013,6 +3081,13 @@ bool ObjCARCOpt::runOnFunction(Function &F) { (1 << IC_AutoreleaseRV))) OptimizeReturns(F); + // Gather statistics after optimization. +#ifndef NDEBUG + if (AreStatisticsEnabled()) { + GatherStatistics(F, true); + } +#endif + DEBUG(dbgs() << "\n"); return Changed; diff --git a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp index 015fd2e..f0d29c8 100644 --- a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/ValueMap.h" #include "llvm/Analysis/DominatorInternals.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -88,7 +89,7 @@ namespace { /// Keeps track of non-local addresses that have been sunk into a block. /// This allows us to avoid inserting duplicate code for blocks with /// multiple load/stores of the same address. - DenseMap<Value*, Value*> SunkAddrs; + ValueMap<Value*, Value*> SunkAddrs; /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to /// be updated. @@ -1653,10 +1654,6 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // start of the block. CurInstIterator = BB->begin(); SunkAddrs.clear(); - } else { - // This address is now available for reassignment, so erase the table - // entry; we don't want to match some completely different instruction. - SunkAddrs[Addr] = 0; } } ++NumMemoryInsts; @@ -1761,7 +1758,7 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { if (!DefIsLiveOut) return false; - // Make sure non of the uses are PHI nodes. + // Make sure none of the uses are PHI nodes. for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end(); UI != E; ++UI) { Instruction *User = cast<Instruction>(*UI); diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp index 129af8d..f350b9b 100644 --- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp @@ -498,6 +498,75 @@ void ValueTable::verifyRemoved(const Value *V) const { //===----------------------------------------------------------------------===// namespace { + class GVN; + struct AvailableValueInBlock { + /// BB - The basic block in question. + BasicBlock *BB; + enum ValType { + SimpleVal, // A simple offsetted value that is accessed. + LoadVal, // A value produced by a load. + MemIntrin // A memory intrinsic which is loaded from. + }; + + /// V - The value that is live out of the block. + PointerIntPair<Value *, 2, ValType> Val; + + /// Offset - The byte offset in Val that is interesting for the load query. + unsigned Offset; + + static AvailableValueInBlock get(BasicBlock *BB, Value *V, + unsigned Offset = 0) { + AvailableValueInBlock Res; + Res.BB = BB; + Res.Val.setPointer(V); + Res.Val.setInt(SimpleVal); + Res.Offset = Offset; + return Res; + } + + static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI, + unsigned Offset = 0) { + AvailableValueInBlock Res; + Res.BB = BB; + Res.Val.setPointer(MI); + Res.Val.setInt(MemIntrin); + Res.Offset = Offset; + return Res; + } + + static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI, + unsigned Offset = 0) { + AvailableValueInBlock Res; + Res.BB = BB; + Res.Val.setPointer(LI); + Res.Val.setInt(LoadVal); + Res.Offset = Offset; + return Res; + } + + bool isSimpleValue() const { return Val.getInt() == SimpleVal; } + bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; } + bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; } + + Value *getSimpleValue() const { + assert(isSimpleValue() && "Wrong accessor"); + return Val.getPointer(); + } + + LoadInst *getCoercedLoadValue() const { + assert(isCoercedLoadValue() && "Wrong accessor"); + return cast<LoadInst>(Val.getPointer()); + } + + MemIntrinsic *getMemIntrinValue() const { + assert(isMemIntrinValue() && "Wrong accessor"); + return cast<MemIntrinsic>(Val.getPointer()); + } + + /// MaterializeAdjustedValue - Emit code into this block to adjust the value + /// defined here to the specified type. This handles various coercion cases. + Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const; + }; class GVN : public FunctionPass { bool NoLoads; @@ -519,6 +588,11 @@ namespace { BumpPtrAllocator TableAllocator; SmallVector<Instruction*, 8> InstrsToErase; + + typedef SmallVector<NonLocalDepResult, 64> LoadDepVect; + typedef SmallVector<AvailableValueInBlock, 64> AvailValInBlkVect; + typedef SmallVector<BasicBlock*, 64> UnavailBlkVect; + public: static char ID; // Pass identification, replacement for typeid explicit GVN(bool noloads = false) @@ -599,11 +673,17 @@ namespace { } - // Helper fuctions - // FIXME: eliminate or document these better + // Helper fuctions of redundant load elimination bool processLoad(LoadInst *L); - bool processInstruction(Instruction *I); bool processNonLocalLoad(LoadInst *L); + void AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, + AvailValInBlkVect &ValuesPerBlock, + UnavailBlkVect &UnavailableBlocks); + bool PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, + UnavailBlkVect &UnavailableBlocks); + + // Other helper routines + bool processInstruction(Instruction *I); bool processBlock(BasicBlock *BB); void dump(DenseMap<uint32_t, Value*> &d); bool iterateOnFunction(Function &F); @@ -1159,114 +1239,6 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, return ConstantFoldLoadFromConstPtr(Src, &TD); } -namespace { - -struct AvailableValueInBlock { - /// BB - The basic block in question. - BasicBlock *BB; - enum ValType { - SimpleVal, // A simple offsetted value that is accessed. - LoadVal, // A value produced by a load. - MemIntrin // A memory intrinsic which is loaded from. - }; - - /// V - The value that is live out of the block. - PointerIntPair<Value *, 2, ValType> Val; - - /// Offset - The byte offset in Val that is interesting for the load query. - unsigned Offset; - - static AvailableValueInBlock get(BasicBlock *BB, Value *V, - unsigned Offset = 0) { - AvailableValueInBlock Res; - Res.BB = BB; - Res.Val.setPointer(V); - Res.Val.setInt(SimpleVal); - Res.Offset = Offset; - return Res; - } - - static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI, - unsigned Offset = 0) { - AvailableValueInBlock Res; - Res.BB = BB; - Res.Val.setPointer(MI); - Res.Val.setInt(MemIntrin); - Res.Offset = Offset; - return Res; - } - - static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI, - unsigned Offset = 0) { - AvailableValueInBlock Res; - Res.BB = BB; - Res.Val.setPointer(LI); - Res.Val.setInt(LoadVal); - Res.Offset = Offset; - return Res; - } - - bool isSimpleValue() const { return Val.getInt() == SimpleVal; } - bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; } - bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; } - - Value *getSimpleValue() const { - assert(isSimpleValue() && "Wrong accessor"); - return Val.getPointer(); - } - - LoadInst *getCoercedLoadValue() const { - assert(isCoercedLoadValue() && "Wrong accessor"); - return cast<LoadInst>(Val.getPointer()); - } - - MemIntrinsic *getMemIntrinValue() const { - assert(isMemIntrinValue() && "Wrong accessor"); - return cast<MemIntrinsic>(Val.getPointer()); - } - - /// MaterializeAdjustedValue - Emit code into this block to adjust the value - /// defined here to the specified type. This handles various coercion cases. - Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const { - Value *Res; - if (isSimpleValue()) { - Res = getSimpleValue(); - if (Res->getType() != LoadTy) { - const DataLayout *TD = gvn.getDataLayout(); - assert(TD && "Need target data to handle type mismatch case"); - Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(), - *TD); - - DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " - << *getSimpleValue() << '\n' - << *Res << '\n' << "\n\n\n"); - } - } else if (isCoercedLoadValue()) { - LoadInst *Load = getCoercedLoadValue(); - if (Load->getType() == LoadTy && Offset == 0) { - Res = Load; - } else { - Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(), - gvn); - - DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " " - << *getCoercedLoadValue() << '\n' - << *Res << '\n' << "\n\n\n"); - } - } else { - const DataLayout *TD = gvn.getDataLayout(); - assert(TD && "Need target data to handle type mismatch case"); - Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, - LoadTy, BB->getTerminator(), *TD); - DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset - << " " << *getMemIntrinValue() << '\n' - << *Res << '\n' << "\n\n\n"); - } - return Res; - } -}; - -} // end anonymous namespace /// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock, /// construct SSA form, allowing us to eliminate LI. This returns the value @@ -1323,48 +1295,59 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, return V; } +Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const { + Value *Res; + if (isSimpleValue()) { + Res = getSimpleValue(); + if (Res->getType() != LoadTy) { + const DataLayout *TD = gvn.getDataLayout(); + assert(TD && "Need target data to handle type mismatch case"); + Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(), + *TD); + + DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " + << *getSimpleValue() << '\n' + << *Res << '\n' << "\n\n\n"); + } + } else if (isCoercedLoadValue()) { + LoadInst *Load = getCoercedLoadValue(); + if (Load->getType() == LoadTy && Offset == 0) { + Res = Load; + } else { + Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(), + gvn); + + DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " " + << *getCoercedLoadValue() << '\n' + << *Res << '\n' << "\n\n\n"); + } + } else { + const DataLayout *TD = gvn.getDataLayout(); + assert(TD && "Need target data to handle type mismatch case"); + Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, + LoadTy, BB->getTerminator(), *TD); + DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset + << " " << *getMemIntrinValue() << '\n' + << *Res << '\n' << "\n\n\n"); + } + return Res; +} + static bool isLifetimeStart(const Instruction *Inst) { if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst)) return II->getIntrinsicID() == Intrinsic::lifetime_start; return false; } -/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are -/// non-local by performing PHI construction. -bool GVN::processNonLocalLoad(LoadInst *LI) { - // Find the non-local dependencies of the load. - SmallVector<NonLocalDepResult, 64> Deps; - AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI); - MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps); - //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: " - // << Deps.size() << *LI << '\n'); - - // If we had to process more than one hundred blocks to find the - // dependencies, this load isn't worth worrying about. Optimizing - // it will be too expensive. - unsigned NumDeps = Deps.size(); - if (NumDeps > 100) - return false; - - // If we had a phi translation failure, we'll have a single entry which is a - // clobber in the current block. Reject this early. - if (NumDeps == 1 && - !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) { - DEBUG( - dbgs() << "GVN: non-local load "; - WriteAsOperand(dbgs(), LI); - dbgs() << " has unknown dependencies\n"; - ); - return false; - } +void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, + AvailValInBlkVect &ValuesPerBlock, + UnavailBlkVect &UnavailableBlocks) { // Filter out useless results (non-locals, etc). Keep track of the blocks // where we have a value available in repl, also keep track of whether we see // dependencies that produce an unknown value for the load (such as a call // that could potentially clobber the load). - SmallVector<AvailableValueInBlock, 64> ValuesPerBlock; - SmallVector<BasicBlock*, 64> UnavailableBlocks; - + unsigned NumDeps = Deps.size(); for (unsigned i = 0, e = NumDeps; i != e; ++i) { BasicBlock *DepBB = Deps[i].getBB(); MemDepResult DepInfo = Deps[i].getResult(); @@ -1480,35 +1463,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { } UnavailableBlocks.push_back(DepBB); - continue; } +} - // If we have no predecessors that produce a known value for this load, exit - // early. - if (ValuesPerBlock.empty()) return false; - - // If all of the instructions we depend on produce a known value for this - // load, then it is fully redundant and we can use PHI insertion to compute - // its value. Insert PHIs and remove the fully redundant value now. - if (UnavailableBlocks.empty()) { - DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); - - // Perform PHI construction. - Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this); - LI->replaceAllUsesWith(V); - - if (isa<PHINode>(V)) - V->takeName(LI); - if (V->getType()->getScalarType()->isPointerTy()) - MD->invalidateCachedPointerInfo(V); - markInstructionForDeletion(LI); - ++NumGVNLoad; - return true; - } - - if (!EnablePRE || !EnableLoadPRE) - return false; - +bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, + UnavailBlkVect &UnavailableBlocks) { // Okay, we have *some* definitions of the value. This means that the value // is available in some of our (transitive) predecessors. Lets think about // doing PRE of this load. This will involve inserting a new load into the @@ -1526,7 +1485,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { BasicBlock *LoadBB = LI->getParent(); BasicBlock *TmpBB = LoadBB; - bool allSingleSucc = true; while (TmpBB->getSinglePredecessor()) { TmpBB = TmpBB->getSinglePredecessor(); if (TmpBB == LoadBB) // Infinite (unreachable) loop. @@ -1615,13 +1573,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { // pointer if it is not available. PHITransAddr Address(LI->getPointerOperand(), TD); Value *LoadPtr = 0; - if (allSingleSucc) { - LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, - *DT, NewInsts); - } else { - Address.PHITranslateValue(LoadBB, UnavailablePred, DT); - LoadPtr = Address.getAddr(); - } + LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, + *DT, NewInsts); // If we couldn't find or insert a computation of this phi translated value, // we fail PRE. @@ -1632,24 +1585,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { break; } - // Make sure it is valid to move this load here. We have to watch out for: - // @1 = getelementptr (i8* p, ... - // test p and branch if == 0 - // load @1 - // It is valid to have the getelementptr before the test, even if p can - // be 0, as getelementptr only does address arithmetic. - // If we are not pushing the value through any multiple-successor blocks - // we do not have this case. Otherwise, check that the load is safe to - // put anywhere; this can be improved, but should be conservatively safe. - if (!allSingleSucc && - // FIXME: REEVALUTE THIS. - !isSafeToLoadUnconditionally(LoadPtr, - UnavailablePred->getTerminator(), - LI->getAlignment(), TD)) { - CanDoPRE = false; - break; - } - I->second = LoadPtr; } @@ -1714,6 +1649,72 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { return true; } +/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are +/// non-local by performing PHI construction. +bool GVN::processNonLocalLoad(LoadInst *LI) { + // Step 1: Find the non-local dependencies of the load. + LoadDepVect Deps; + AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI); + MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps); + + // If we had to process more than one hundred blocks to find the + // dependencies, this load isn't worth worrying about. Optimizing + // it will be too expensive. + unsigned NumDeps = Deps.size(); + if (NumDeps > 100) + return false; + + // If we had a phi translation failure, we'll have a single entry which is a + // clobber in the current block. Reject this early. + if (NumDeps == 1 && + !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) { + DEBUG( + dbgs() << "GVN: non-local load "; + WriteAsOperand(dbgs(), LI); + dbgs() << " has unknown dependencies\n"; + ); + return false; + } + + // Step 2: Analyze the availability of the load + AvailValInBlkVect ValuesPerBlock; + UnavailBlkVect UnavailableBlocks; + AnalyzeLoadAvailability(LI, Deps, ValuesPerBlock, UnavailableBlocks); + + // If we have no predecessors that produce a known value for this load, exit + // early. + if (ValuesPerBlock.empty()) + return false; + + // Step 3: Eliminate fully redundancy. + // + // If all of the instructions we depend on produce a known value for this + // load, then it is fully redundant and we can use PHI insertion to compute + // its value. Insert PHIs and remove the fully redundant value now. + if (UnavailableBlocks.empty()) { + DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); + + // Perform PHI construction. + Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this); + LI->replaceAllUsesWith(V); + + if (isa<PHINode>(V)) + V->takeName(LI); + if (V->getType()->getScalarType()->isPointerTy()) + MD->invalidateCachedPointerInfo(V); + markInstructionForDeletion(LI); + ++NumGVNLoad; + return true; + } + + // Step 4: Eliminate partial redundancy. + if (!EnablePRE || !EnableLoadPRE) + return false; + + return PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks); +} + + static void patchReplacementInstruction(Instruction *I, Value *Repl) { // Patch the replacement so that it is not more restrictive than the value // being replaced. diff --git a/contrib/llvm/lib/Transforms/Scalar/GlobalMerge.cpp b/contrib/llvm/lib/Transforms/Scalar/GlobalMerge.cpp index 5d02c68..4796eb2 100644 --- a/contrib/llvm/lib/Transforms/Scalar/GlobalMerge.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/GlobalMerge.cpp @@ -200,9 +200,8 @@ void GlobalMerge::collectUsedGlobalVariables(Module &M) { if (!GV || !GV->hasInitializer()) return; // Should be an array of 'i8*'. - const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer()); - if (InitList == 0) return; - + const ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer()); + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) if (const GlobalVariable *G = dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts())) diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp index e98ae95..14c5655 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp @@ -56,8 +56,8 @@ namespace { } bool runOnLoop(Loop *L, LPPassManager &LPM); - void simplifyLoopLatch(Loop *L); - bool rotateLoop(Loop *L); + bool simplifyLoopLatch(Loop *L); + bool rotateLoop(Loop *L, bool SimplifiedLatch); private: LoopInfo *LI; @@ -84,13 +84,14 @@ bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) { // Simplify the loop latch before attempting to rotate the header // upward. Rotation may not be needed if the loop tail can be folded into the // loop exit. - simplifyLoopLatch(L); + bool SimplifiedLatch = simplifyLoopLatch(L); // One loop can be rotated multiple times. bool MadeChange = false; - while (rotateLoop(L)) + while (rotateLoop(L, SimplifiedLatch)) { MadeChange = true; - + SimplifiedLatch = false; + } return MadeChange; } @@ -212,25 +213,25 @@ static bool shouldSpeculateInstrs(BasicBlock::iterator Begin, /// canonical form so downstream passes can handle it. /// /// I don't believe this invalidates SCEV. -void LoopRotate::simplifyLoopLatch(Loop *L) { +bool LoopRotate::simplifyLoopLatch(Loop *L) { BasicBlock *Latch = L->getLoopLatch(); if (!Latch || Latch->hasAddressTaken()) - return; + return false; BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator()); if (!Jmp || !Jmp->isUnconditional()) - return; + return false; BasicBlock *LastExit = Latch->getSinglePredecessor(); if (!LastExit || !L->isLoopExiting(LastExit)) - return; + return false; BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator()); if (!BI) - return; + return false; if (!shouldSpeculateInstrs(Latch->begin(), Jmp)) - return; + return false; DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into " << LastExit->getName() << "\n"); @@ -253,10 +254,20 @@ void LoopRotate::simplifyLoopLatch(Loop *L) { if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) DT->eraseNode(Latch); Latch->eraseFromParent(); + return true; } /// Rotate loop LP. Return true if the loop is rotated. -bool LoopRotate::rotateLoop(Loop *L) { +/// +/// \param SimplifiedLatch is true if the latch was just folded into the final +/// loop exit. In this case we may want to rotate even though the new latch is +/// now an exiting branch. This rotation would have happened had the latch not +/// been simplified. However, if SimplifiedLatch is false, then we avoid +/// rotating loops in which the latch exits to avoid excessive or endless +/// rotation. LoopRotate should be repeatable and converge to a canonical +/// form. This property is satisfied because simplifying the loop latch can only +/// happen once across multiple invocations of the LoopRotate pass. +bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // If the loop has only one block then there is not much to rotate. if (L->getBlocks().size() == 1) return false; @@ -276,7 +287,12 @@ bool LoopRotate::rotateLoop(Loop *L) { // If the loop latch already contains a branch that leaves the loop then the // loop is already rotated. - if (OrigLatch == 0 || L->isLoopExiting(OrigLatch)) + if (OrigLatch == 0) + return false; + + // Rotate if either the loop latch does *not* exit the loop, or if the loop + // latch was just simplified. + if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch) return false; // Check size of original header and reject loop if it is very big or we can't @@ -505,4 +521,3 @@ bool LoopRotate::rotateLoop(Loop *L) { ++NumRotated; return true; } - diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp index 7ee4027..a3c241d 100644 --- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp @@ -143,13 +143,9 @@ namespace { // So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier // than Y which is defined earlier than Z. Permute "x | 1", "Y & 2", // "z" in the order of X-Y-Z is better than any other orders. - class PtrSortFunctor { - ArrayRef<XorOpnd> A; - - public: - PtrSortFunctor(ArrayRef<XorOpnd> Array) : A(Array) {} - bool operator()(unsigned LHSIndex, unsigned RHSIndex) { - return A[LHSIndex].getSymbolicRank() < A[RHSIndex].getSymbolicRank(); + struct PtrSortFunctor { + bool operator()(XorOpnd * const &LHS, XorOpnd * const &RHS) { + return LHS->getSymbolicRank() < RHS->getSymbolicRank(); } }; private: @@ -1199,9 +1195,6 @@ bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2, if (X != Opnd2->getSymbolicPart()) return false; - const APInt &C1 = Opnd1->getConstPart(); - const APInt &C2 = Opnd2->getConstPart(); - // This many instruction become dead.(At least "Opnd1 ^ Opnd2" will die.) int DeadInstNum = 1; if (Opnd1->getValue()->hasOneUse()) @@ -1219,6 +1212,8 @@ bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2, if (Opnd2->isOrExpr()) std::swap(Opnd1, Opnd2); + const APInt &C1 = Opnd1->getConstPart(); + const APInt &C2 = Opnd2->getConstPart(); APInt C3((~C1) ^ C2); // Do not increase code size! @@ -1234,6 +1229,8 @@ bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2, } else if (Opnd1->isOrExpr()) { // Xor-Rule 3: (x | c1) ^ (x | c2) = (x & c3) ^ c3 where c3 = c1 ^ c2 // + const APInt &C1 = Opnd1->getConstPart(); + const APInt &C2 = Opnd2->getConstPart(); APInt C3 = C1 ^ C2; // Do not increase code size @@ -1248,6 +1245,8 @@ bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2, } else { // Xor-Rule 4: (x & c1) ^ (x & c2) = (x & (c1^c2)) // + const APInt &C1 = Opnd1->getConstPart(); + const APInt &C2 = Opnd2->getConstPart(); APInt C3 = C1 ^ C2; Res = createAndInstr(I, X, C3); } @@ -1274,7 +1273,7 @@ Value *Reassociate::OptimizeXor(Instruction *I, return 0; SmallVector<XorOpnd, 8> Opnds; - SmallVector<unsigned, 8> OpndIndices; + SmallVector<XorOpnd*, 8> OpndPtrs; Type *Ty = Ops[0].Op->getType(); APInt ConstOpnd(Ty->getIntegerBitWidth(), 0); @@ -1285,23 +1284,29 @@ Value *Reassociate::OptimizeXor(Instruction *I, XorOpnd O(V); O.setSymbolicRank(getRank(O.getSymbolicPart())); Opnds.push_back(O); - OpndIndices.push_back(Opnds.size() - 1); } else ConstOpnd ^= cast<ConstantInt>(V)->getValue(); } + // NOTE: From this point on, do *NOT* add/delete element to/from "Opnds". + // It would otherwise invalidate the "Opnds"'s iterator, and hence invalidate + // the "OpndPtrs" as well. For the similar reason, do not fuse this loop + // with the previous loop --- the iterator of the "Opnds" may be invalidated + // when new elements are added to the vector. + for (unsigned i = 0, e = Opnds.size(); i != e; ++i) + OpndPtrs.push_back(&Opnds[i]); + // Step 2: Sort the Xor-Operands in a way such that the operands containing // the same symbolic value cluster together. For instance, the input operand // sequence ("x | 123", "y & 456", "x & 789") will be sorted into: // ("x | 123", "x & 789", "y & 456"). - std::sort(OpndIndices.begin(), OpndIndices.end(), - XorOpnd::PtrSortFunctor(Opnds)); + std::sort(OpndPtrs.begin(), OpndPtrs.end(), XorOpnd::PtrSortFunctor()); // Step 3: Combine adjacent operands XorOpnd *PrevOpnd = 0; bool Changed = false; for (unsigned i = 0, e = Opnds.size(); i < e; i++) { - XorOpnd *CurrOpnd = &Opnds[OpndIndices[i]]; + XorOpnd *CurrOpnd = OpndPtrs[i]; // The combined value Value *CV; diff --git a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp index f6bb365..d073e78 100644 --- a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp @@ -2322,17 +2322,15 @@ static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V, V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()), ConstantVector::get(Mask), Name + ".expand"); - DEBUG(dbgs() << " shuffle1: " << *V << "\n"); + DEBUG(dbgs() << " shuffle: " << *V << "\n"); Mask.clear(); for (unsigned i = 0; i != VecTy->getNumElements(); ++i) - if (i >= BeginIndex && i < EndIndex) - Mask.push_back(IRB.getInt32(i)); - else - Mask.push_back(IRB.getInt32(i + VecTy->getNumElements())); - V = IRB.CreateShuffleVector(V, Old, ConstantVector::get(Mask), - Name + "insert"); - DEBUG(dbgs() << " shuffle2: " << *V << "\n"); + Mask.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex)); + + V = IRB.CreateSelect(ConstantVector::get(Mask), V, Old, Name + "blend"); + + DEBUG(dbgs() << " blend: " << *V << "\n"); return V; } @@ -2671,6 +2669,7 @@ private: StoreInst *NewSI; if (BeginOffset == NewAllocaBeginOffset && + EndOffset == NewAllocaEndOffset && canConvertValue(TD, V->getType(), NewAllocaTy)) { V = convertValue(TD, IRB, V, NewAllocaTy); NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(), @@ -3050,16 +3049,16 @@ private: bool visitSelectInst(SelectInst &SI) { DEBUG(dbgs() << " original: " << SI << "\n"); - - // Find the operand we need to rewrite here. - bool IsTrueVal = SI.getTrueValue() == OldPtr; - if (IsTrueVal) - assert(SI.getFalseValue() != OldPtr && "Pointer is both operands!"); - else - assert(SI.getFalseValue() == OldPtr && "Pointer isn't an operand!"); + assert((SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) && + "Pointer isn't an operand!"); Value *NewPtr = getAdjustedAllocaPtr(IRB, OldPtr->getType()); - SI.setOperand(IsTrueVal ? 1 : 2, NewPtr); + // Replace the operands which were using the old pointer. + if (SI.getOperand(1) == OldPtr) + SI.setOperand(1, NewPtr); + if (SI.getOperand(2) == OldPtr) + SI.setOperand(2, NewPtr); + DEBUG(dbgs() << " to: " << SI << "\n"); deleteIfTriviallyDead(OldPtr); return false; diff --git a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp index e590a37..bfde334 100644 --- a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -1462,8 +1462,8 @@ bool SROA::ShouldAttemptScalarRepl(AllocaInst *AI) { } // performScalarRepl - This algorithm is a simple worklist driven algorithm, -// which runs on all of the alloca instructions in the function, removing them -// if they are only used by getelementptr instructions. +// which runs on all of the alloca instructions in the entry block, removing +// them if they are only used by getelementptr instructions. // bool SROA::performScalarRepl(Function &F) { std::vector<AllocaInst*> WorkList; @@ -1724,17 +1724,8 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI, continue; ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand()); - if (!IdxVal) { - // Non constant GEPs are only a problem on arrays, structs, and pointers - // Vectors can be dynamically indexed. - // FIXME: Add support for dynamic indexing on arrays. This should be - // ok on any subarrays of the alloca array, eg, a[0][i] is ok, but a[i][0] - // isn't. - if (!(*GEPIt)->isVectorTy()) - return MarkUnsafe(Info, GEPI); - NonConstant = true; - NonConstantIdxSize = TD->getTypeAllocSize(*GEPIt); - } + if (!IdxVal) + return MarkUnsafe(Info, GEPI); } // Compute the offset due to this GEP and check if the alloca has a diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp index 63d7a1d..be8d39e 100644 --- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -87,29 +87,26 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, assert(VMap.count(I) && "No mapping from source argument specified!"); #endif - // Clone any attributes. - if (NewFunc->arg_size() == OldFunc->arg_size()) - NewFunc->copyAttributesFrom(OldFunc); - else { - //Some arguments were deleted with the VMap. Copy arguments one by one - for (Function::const_arg_iterator I = OldFunc->arg_begin(), - E = OldFunc->arg_end(); I != E; ++I) - if (Argument* Anew = dyn_cast<Argument>(VMap[I])) { - AttributeSet attrs = OldFunc->getAttributes() - .getParamAttributes(I->getArgNo() + 1); - if (attrs.getNumSlots() > 0) - Anew->addAttr(attrs); - } - NewFunc->setAttributes(NewFunc->getAttributes() - .addAttributes(NewFunc->getContext(), - AttributeSet::ReturnIndex, - OldFunc->getAttributes())); - NewFunc->setAttributes(NewFunc->getAttributes() - .addAttributes(NewFunc->getContext(), - AttributeSet::FunctionIndex, - OldFunc->getAttributes())); + AttributeSet OldAttrs = OldFunc->getAttributes(); + // Clone any argument attributes that are present in the VMap. + for (Function::const_arg_iterator I = OldFunc->arg_begin(), + E = OldFunc->arg_end(); + I != E; ++I) + if (Argument *Anew = dyn_cast<Argument>(VMap[I])) { + AttributeSet attrs = + OldAttrs.getParamAttributes(I->getArgNo() + 1); + if (attrs.getNumSlots() > 0) + Anew->addAttr(attrs); + } - } + NewFunc->setAttributes(NewFunc->getAttributes() + .addAttributes(NewFunc->getContext(), + AttributeSet::ReturnIndex, + OldAttrs.getRetAttributes())); + NewFunc->setAttributes(NewFunc->getAttributes() + .addAttributes(NewFunc->getContext(), + AttributeSet::FunctionIndex, + OldAttrs.getFnAttributes())); // Loop over all of the basic blocks in the function, cloning them as // appropriate. Note that we save BE this way in order to handle cloning of diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp index e9828d6..dabb67b9 100644 --- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -758,8 +758,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // If the call site was an invoke instruction, add a branch to the normal // destination. - if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) - BranchInst::Create(II->getNormalDest(), TheCall); + if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { + BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall); + NewBr->setDebugLoc(Returns[0]->getDebugLoc()); + } // If the return instruction returned a value, replace uses of the call with // uses of the returned value. @@ -787,15 +789,16 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // "starter" and "ender" blocks. How we accomplish this depends on whether // this is an invoke instruction or a call instruction. BasicBlock *AfterCallBB; + BranchInst *CreatedBranchToNormalDest = NULL; if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { // Add an unconditional branch to make this look like the CallInst case... - BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall); + CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), TheCall); // Split the basic block. This guarantees that no PHI nodes will have to be // updated due to new incoming edges, and make the invoke case more // symmetric to the call case. - AfterCallBB = OrigBB->splitBasicBlock(NewBr, + AfterCallBB = OrigBB->splitBasicBlock(CreatedBranchToNormalDest, CalledFunc->getName()+".exit"); } else { // It's a call @@ -850,11 +853,20 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Add a branch to the merge points and remove return instructions. + DebugLoc Loc; for (unsigned i = 0, e = Returns.size(); i != e; ++i) { ReturnInst *RI = Returns[i]; - BranchInst::Create(AfterCallBB, RI); + BranchInst* BI = BranchInst::Create(AfterCallBB, RI); + Loc = RI->getDebugLoc(); + BI->setDebugLoc(Loc); RI->eraseFromParent(); } + // We need to set the debug location to *somewhere* inside the + // inlined function. The line number may be nonsensical, but the + // instruction will at least be associated with the right + // function. + if (CreatedBranchToNormalDest) + CreatedBranchToNormalDest->setDebugLoc(Loc); } else if (!Returns.empty()) { // Otherwise, if there is exactly one return value, just replace anything // using the return value of the call with the computed value. @@ -874,6 +886,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, AfterCallBB->getInstList().splice(AfterCallBB->begin(), ReturnBB->getInstList()); + if (CreatedBranchToNormalDest) + CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc()); + // Delete the return instruction now and empty ReturnBB now. Returns[0]->eraseFromParent(); ReturnBB->eraseFromParent(); diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp index be80d34..12e5b3e 100644 --- a/contrib/llvm/lib/Transforms/Utils/Local.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp @@ -832,7 +832,24 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, /// Dbg Intrinsic utilities /// -/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value +/// See if there is a dbg.value intrinsic for DIVar before I. +static bool LdStHasDebugValue(DIVariable &DIVar, Instruction *I) { + // Since we can't guarantee that the original dbg.declare instrinsic + // is removed by LowerDbgDeclare(), we need to make sure that we are + // not inserting the same dbg.value intrinsic over and over. + llvm::BasicBlock::InstListType::iterator PrevI(I); + if (PrevI != I->getParent()->getInstList().begin()) { + --PrevI; + if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(PrevI)) + if (DVI->getValue() == I->getOperand(0) && + DVI->getOffset() == 0 && + DVI->getVariable() == DIVar) + return true; + } + return false; +} + +/// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value /// that has an associated llvm.dbg.decl intrinsic. bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, StoreInst *SI, DIBuilder &Builder) { @@ -840,6 +857,9 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, if (!DIVar.Verify()) return false; + if (LdStHasDebugValue(DIVar, SI)) + return true; + Instruction *DbgVal = NULL; // If an argument is zero extended then use argument directly. The ZExt // may be zapped by an optimization pass in future. @@ -863,7 +883,7 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, return true; } -/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value +/// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value /// that has an associated llvm.dbg.decl intrinsic. bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, LoadInst *LI, DIBuilder &Builder) { @@ -871,6 +891,9 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, if (!DIVar.Verify()) return false; + if (LdStHasDebugValue(DIVar, LI)) + return true; + Instruction *DbgVal = Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0, DIVar, LI); @@ -902,6 +925,8 @@ bool llvm::LowerDbgDeclare(Function &F) { E = Dbgs.end(); I != E; ++I) { DbgDeclareInst *DDI = *I; if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress())) { + // We only remove the dbg.declare intrinsic if all uses are + // converted to dbg.value intrinsics. bool RemoveDDI = true; for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ++UI) diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 681bf9c..052ad85 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -59,6 +59,10 @@ static cl::opt<bool> SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block")); +static cl::opt<bool> +HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), + cl::desc("Hoist conditional stores if an unconditional store preceeds")); + STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables"); STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block"); @@ -1332,6 +1336,66 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { return Changed; } +/// \brief Determine if we can hoist sink a sole store instruction out of a +/// conditional block. +/// +/// We are looking for code like the following: +/// BrBB: +/// store i32 %add, i32* %arrayidx2 +/// ... // No other stores or function calls (we could be calling a memory +/// ... // function). +/// %cmp = icmp ult %x, %y +/// br i1 %cmp, label %EndBB, label %ThenBB +/// ThenBB: +/// store i32 %add5, i32* %arrayidx2 +/// br label EndBB +/// EndBB: +/// ... +/// We are going to transform this into: +/// BrBB: +/// store i32 %add, i32* %arrayidx2 +/// ... // +/// %cmp = icmp ult %x, %y +/// %add.add5 = select i1 %cmp, i32 %add, %add5 +/// store i32 %add.add5, i32* %arrayidx2 +/// ... +/// +/// \return The pointer to the value of the previous store if the store can be +/// hoisted into the predecessor block. 0 otherwise. +Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, + BasicBlock *StoreBB, BasicBlock *EndBB) { + StoreInst *StoreToHoist = dyn_cast<StoreInst>(I); + if (!StoreToHoist) + return 0; + + // Volatile or atomic. + if (!StoreToHoist->isSimple()) + return 0; + + Value *StorePtr = StoreToHoist->getPointerOperand(); + + // Look for a store to the same pointer in BrBB. + unsigned MaxNumInstToLookAt = 10; + for (BasicBlock::reverse_iterator RI = BrBB->rbegin(), + RE = BrBB->rend(); RI != RE && (--MaxNumInstToLookAt); ++RI) { + Instruction *CurI = &*RI; + + // Could be calling an instruction that effects memory like free(). + if (CurI->mayHaveSideEffects() && !isa<StoreInst>(CurI)) + return 0; + + StoreInst *SI = dyn_cast<StoreInst>(CurI); + // Found the previous store make sure it stores to the same location. + if (SI && SI->getPointerOperand() == StorePtr) + // Found the previous store, return its value operand. + return SI->getValueOperand(); + else if (SI) + return 0; // Unknown store. + } + + return 0; +} + /// \brief Speculate a conditional basic block flattening the CFG. /// /// Note that this is a very risky transform currently. Speculating @@ -1395,6 +1459,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts; unsigned SpeculationCost = 0; + Value *SpeculatedStoreValue = 0; + StoreInst *SpeculatedStore = 0; for (BasicBlock::iterator BBI = ThenBB->begin(), BBE = llvm::prior(ThenBB->end()); BBI != BBE; ++BBI) { @@ -1410,13 +1476,21 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { return false; // Don't hoist the instruction if it's unsafe or expensive. - if (!isSafeToSpeculativelyExecute(I)) + if (!isSafeToSpeculativelyExecute(I) && + !(HoistCondStores && + (SpeculatedStoreValue = isSafeToSpeculateStore(I, BB, ThenBB, + EndBB)))) return false; - if (ComputeSpeculationCost(I) > PHINodeFoldingThreshold) + if (!SpeculatedStoreValue && + ComputeSpeculationCost(I) > PHINodeFoldingThreshold) return false; + // Store the store speculation candidate. + if (SpeculatedStoreValue) + SpeculatedStore = cast<StoreInst>(I); + // Do not hoist the instruction if any of its operands are defined but not - // used in this BB. The transformation will prevent the operand from + // used in BB. The transformation will prevent the operand from // being sunk into the use block. for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) { @@ -1473,12 +1547,24 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { // If there are no PHIs to process, bail early. This helps ensure idempotence // as well. - if (!HaveRewritablePHIs) + if (!HaveRewritablePHIs && !(HoistCondStores && SpeculatedStoreValue)) return false; // If we get here, we can hoist the instruction and if-convert. DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";); + // Insert a select of the value of the speculated store. + if (SpeculatedStoreValue) { + IRBuilder<true, NoFolder> Builder(BI); + Value *TrueV = SpeculatedStore->getValueOperand(); + Value *FalseV = SpeculatedStoreValue; + if (Invert) + std::swap(TrueV, FalseV); + Value *S = Builder.CreateSelect(BrCond, TrueV, FalseV, TrueV->getName() + + "." + FalseV->getName()); + SpeculatedStore->setOperand(0, S); + } + // Hoist the instructions. BB->getInstList().splice(BI, ThenBB->getInstList(), ThenBB->begin(), llvm::prior(ThenBB->end())); @@ -3073,7 +3159,12 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { Value *Sub = SI->getCondition(); if (!Offset->isNullValue()) Sub = Builder.CreateAdd(Sub, Offset, Sub->getName()+".off"); - Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch"); + Value *Cmp; + // If NumCases overflowed, then all possible values jump to the successor. + if (NumCases->isNullValue() && SI->getNumCases() != 0) + Cmp = ConstantInt::getTrue(SI->getContext()); + else + Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch"); BranchInst *NewBI = Builder.CreateCondBr( Cmp, SI->case_begin().getCaseSuccessor(), SI->getDefaultDest()); diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index c231704..6bea2dd 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1518,6 +1518,12 @@ struct FPrintFOpt : public LibCallOptimization { if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr)) return 0; + // Do not do any of the following transformations if the fprintf return + // value is used, in general the fprintf return value is not compatible + // with fwrite(), fputc() or fputs(). + if (!CI->use_empty()) + return 0; + // fprintf(F, "foo") --> fwrite("foo", 3, 1, F) if (CI->getNumArgOperands() == 2) { for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) @@ -1527,11 +1533,10 @@ struct FPrintFOpt : public LibCallOptimization { // These optimizations require DataLayout. if (!TD) return 0; - Value *NewCI = EmitFWrite(CI->getArgOperand(1), - ConstantInt::get(TD->getIntPtrType(*Context), - FormatStr.size()), - CI->getArgOperand(0), B, TD, TLI); - return NewCI ? ConstantInt::get(CI->getType(), FormatStr.size()) : 0; + return EmitFWrite(CI->getArgOperand(1), + ConstantInt::get(TD->getIntPtrType(*Context), + FormatStr.size()), + CI->getArgOperand(0), B, TD, TLI); } // The remaining optimizations require the format string to be "%s" or "%c" @@ -1544,14 +1549,12 @@ struct FPrintFOpt : public LibCallOptimization { if (FormatStr[1] == 'c') { // fprintf(F, "%c", chr) --> fputc(chr, F) if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0; - Value *NewCI = EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, - TD, TLI); - return NewCI ? ConstantInt::get(CI->getType(), 1) : 0; + return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI); } if (FormatStr[1] == 's') { // fprintf(F, "%s", str) --> fputs(str, F) - if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty()) + if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0; return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI); } diff --git a/contrib/llvm/lib/Transforms/Utils/Utils.cpp b/contrib/llvm/lib/Transforms/Utils/Utils.cpp index 5812d46..c3df215 100644 --- a/contrib/llvm/lib/Transforms/Utils/Utils.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Utils.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/InitializePasses.h" +#include "llvm/PassRegistry.h" #include "llvm-c/Initialization.h" using namespace llvm; diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp index b5941bd..544c5ee 100644 --- a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp +++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp @@ -57,7 +57,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, return VM[V] = const_cast<Value*>(V); // Create a dummy node in case we have a metadata cycle. - MDNode *Dummy = MDNode::getTemporary(V->getContext(), ArrayRef<Value*>()); + MDNode *Dummy = MDNode::getTemporary(V->getContext(), None); VM[V] = Dummy; // Check all operands to see if any need to be remapped. diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index d26154e..08d3725 100644 --- a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8,9 +8,9 @@ //===----------------------------------------------------------------------===// // // This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops -// and generates target-independent LLVM-IR. Legalization of the IR is done -// in the codegen. However, the vectorizer uses (will use) the codegen -// interfaces to generate IR that is likely to result in an optimal binary. +// and generates target-independent LLVM-IR. +// The vectorizer uses the TargetTransformInfo analysis to estimate the costs +// of instructions in order to estimate the profitability of vectorization. // // The loop vectorizer combines consecutive loop iterations into a single // 'wide' iteration. After this transformation the index is incremented @@ -78,7 +78,9 @@ #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/PatternMatch.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/ValueHandle.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -87,6 +89,7 @@ #include <map> using namespace llvm; +using namespace llvm::PatternMatch; static cl::opt<unsigned> VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden, @@ -112,9 +115,9 @@ TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16), /// We don't unroll loops with a known constant trip count below this number. static const unsigned TinyTripCountUnrollThreshold = 128; -/// When performing a runtime memory check, do not check more than this -/// number of pointers. Notice that the check is quadratic! -static const unsigned RuntimeMemoryCheckThreshold = 4; +/// When performing memory disambiguation checks at runtime do not make more +/// than this number of comparisons. +static const unsigned RuntimeMemoryCheckThreshold = 8; /// We use a metadata with this name to indicate that a scalar loop was /// vectorized and that we don't need to re-vectorize it if we run into it @@ -333,7 +336,7 @@ public: DominatorTree *DT, TargetTransformInfo* TTI, AliasAnalysis *AA, TargetLibraryInfo *TLI) : TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), TLI(TLI), - Induction(0) {} + Induction(0), HasFunNoNaNAttr(false) {} /// This enum represents the kinds of reductions that we support. enum ReductionKind { @@ -343,8 +346,10 @@ public: RK_IntegerOr, ///< Bitwise or logical OR of numbers. RK_IntegerAnd, ///< Bitwise or logical AND of numbers. RK_IntegerXor, ///< Bitwise or logical XOR of numbers. + RK_IntegerMinMax, ///< Min/max implemented in terms of select(cmp()). RK_FloatAdd, ///< Sum of floats. - RK_FloatMult ///< Product of floats. + RK_FloatMult, ///< Product of floats. + RK_FloatMinMax ///< Min/max implemented in terms of select(cmp()). }; /// This enum represents the kinds of inductions that we support. @@ -356,21 +361,52 @@ public: IK_ReversePtrInduction ///< Reverse ptr indvar. Step = - sizeof(elem). }; + // This enum represents the kind of minmax reduction. + enum MinMaxReductionKind { + MRK_Invalid, + MRK_UIntMin, + MRK_UIntMax, + MRK_SIntMin, + MRK_SIntMax, + MRK_FloatMin, + MRK_FloatMax + }; + /// This POD struct holds information about reduction variables. struct ReductionDescriptor { ReductionDescriptor() : StartValue(0), LoopExitInstr(0), - Kind(RK_NoReduction) {} + Kind(RK_NoReduction), MinMaxKind(MRK_Invalid) {} - ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K) - : StartValue(Start), LoopExitInstr(Exit), Kind(K) {} + ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K, + MinMaxReductionKind MK) + : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK) {} // The starting value of the reduction. // It does not have to be zero! - Value *StartValue; + TrackingVH<Value> StartValue; // The instruction who's value is used outside the loop. Instruction *LoopExitInstr; // The kind of the reduction. ReductionKind Kind; + // If this a min/max reduction the kind of reduction. + MinMaxReductionKind MinMaxKind; + }; + + /// This POD struct holds information about a potential reduction operation. + struct ReductionInstDesc { + ReductionInstDesc(bool IsRedux, Instruction *I) : + IsReduction(IsRedux), PatternLastInst(I), MinMaxKind(MRK_Invalid) {} + + ReductionInstDesc(Instruction *I, MinMaxReductionKind K) : + IsReduction(true), PatternLastInst(I), MinMaxKind(K) {} + + // Is this instruction a reduction candidate. + bool IsReduction; + // The last instruction in a min/max pattern (select of the select(icmp()) + // pattern), or the current reduction instruction otherwise. + Instruction *PatternLastInst; + // If this is a min/max pattern the comparison predicate. + MinMaxReductionKind MinMaxKind; }; // This POD struct holds information about the memory runtime legality @@ -387,16 +423,18 @@ public: } /// Insert a pointer and calculate the start and end SCEVs. - void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr); + void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr); /// This flag indicates if we need to add the runtime check. bool Need; /// Holds the pointers that we need to check. - SmallVector<Value*, 2> Pointers; + SmallVector<TrackingVH<Value>, 2> Pointers; /// Holds the pointer value at the beginning of the loop. SmallVector<const SCEV*, 2> Starts; /// Holds the pointer value at the end of the loop. SmallVector<const SCEV*, 2> Ends; + /// Holds the information if this pointer is used for writing to memory. + SmallVector<bool, 2> IsWritePtr; }; /// A POD for saving information about induction variables. @@ -404,7 +442,7 @@ public: InductionInfo(Value *Start, InductionKind K) : StartValue(Start), IK(K) {} InductionInfo() : StartValue(0), IK(IK_NoInduction) {} /// Start value. - Value *StartValue; + TrackingVH<Value> StartValue; /// Induction kind. InductionKind IK; }; @@ -461,6 +499,10 @@ public: /// Returns the information that we collected about runtime memory check. RuntimePointerCheck *getRuntimePointerCheck() { return &PtrRtCheck; } + + /// This function returns the identity element (or neutral element) for + /// the operation K. + static Constant *getReductionIdentity(ReductionKind K, Type *Tp); private: /// Check if a single basic block loop is vectorizable. /// At this point we know that this is a loop with a constant trip count @@ -487,9 +529,17 @@ private: /// Returns True, if 'Phi' is the kind of reduction variable for type /// 'Kind'. If this is a reduction variable, it adds it to ReductionList. bool AddReductionVar(PHINode *Phi, ReductionKind Kind); - /// Returns true if the instruction I can be a reduction variable of type - /// 'Kind'. - bool isReductionInstr(Instruction *I, ReductionKind Kind); + /// Returns a struct describing if the instruction 'I' can be a reduction + /// variable of type 'Kind'. If the reduction is a min/max pattern of + /// select(icmp()) this function advances the instruction pointer 'I' from the + /// compare instruction to the select instruction and stores this pointer in + /// 'PatternLastInst' member of the returned struct. + ReductionInstDesc isReductionInstr(Instruction *I, ReductionKind Kind, + ReductionInstDesc &Desc); + /// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction + /// pattern corresponding to a min(X, Y) or max(X, Y). + static ReductionInstDesc isMinMaxSelectCmpPattern(Instruction *I, + ReductionInstDesc &Prev); /// Returns the induction kind of Phi. This function may return NoInduction /// if the PHI is not an induction variable. InductionKind isInductionVariable(PHINode *Phi); @@ -540,6 +590,8 @@ private: /// We need to check that all of the pointers in this list are disjoint /// at runtime. RuntimePointerCheck PtrRtCheck; + /// Can we assume the absence of NaNs. + bool HasFunNoNaNAttr; }; /// LoopVectorizationCostModel - estimates the expected speedups due to @@ -662,6 +714,11 @@ struct LoopVectorize : public LoopPass { AA = getAnalysisIfAvailable<AliasAnalysis>(); TLI = getAnalysisIfAvailable<TargetLibraryInfo>(); + if (DL == NULL) { + DEBUG(dbgs() << "LV: Not vectorizing because of missing data layout"); + return false; + } + DEBUG(dbgs() << "LV: Checking a loop in \"" << L->getHeader()->getParent()->getName() << "\"\n"); @@ -737,7 +794,8 @@ struct LoopVectorize : public LoopPass { void LoopVectorizationLegality::RuntimePointerCheck::insert(ScalarEvolution *SE, - Loop *Lp, Value *Ptr) { + Loop *Lp, Value *Ptr, + bool WritePtr) { const SCEV *Sc = SE->getSCEV(Ptr); const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc); assert(AR && "Invalid addrec expression"); @@ -746,6 +804,7 @@ LoopVectorizationLegality::RuntimePointerCheck::insert(ScalarEvolution *SE, Pointers.push_back(Ptr); Starts.push_back(AR->getStart()); Ends.push_back(ScEnd); + IsWritePtr.push_back(WritePtr); } Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) { @@ -906,12 +965,18 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr, Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand(); unsigned Alignment = LI ? LI->getAlignment() : SI->getAlignment(); + unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ScalarDataTy); + unsigned VectorElementSize = DL->getTypeStoreSize(DataTy)/VF; + + if (ScalarAllocatedSize != VectorElementSize) + return scalarizeInstruction(Instr); + // If the pointer is loop invariant or if it is non consecutive, // scalarize the load. - int Stride = Legal->isConsecutivePtr(Ptr); - bool Reverse = Stride < 0; + int ConsecutiveStride = Legal->isConsecutivePtr(Ptr); + bool Reverse = ConsecutiveStride < 0; bool UniformLoad = LI && Legal->isUniform(Ptr); - if (Stride == 0 || UniformLoad) + if (!ConsecutiveStride || UniformLoad) return scalarizeInstruction(Instr); Constant *Zero = Builder.getInt32(0); @@ -1110,6 +1175,10 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal, for (unsigned i = 0; i < NumPointers; ++i) { for (unsigned j = i+1; j < NumPointers; ++j) { + // No need to check if two readonly pointers intersect. + if (!PtrRtCheck->IsWritePtr[i] && !PtrRtCheck->IsWritePtr[j]) + continue; + Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy, "bc"); Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy, "bc"); Value *End0 = ChkBuilder.CreateBitCast(Ends[i], PtrArithTy, "bc"); @@ -1167,7 +1236,7 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { // Mark the old scalar loop with metadata that tells us not to vectorize this // loop again if we run into it. - MDNode *MD = MDNode::get(OldBasicBlock->getContext(), ArrayRef<Value*>()); + MDNode *MD = MDNode::get(OldBasicBlock->getContext(), None); OldBasicBlock->getTerminator()->setMetadata(AlreadyVectorizedMDName, MD); // Some loops have a single integer induction variable, while other loops @@ -1436,24 +1505,24 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { /// This function returns the identity element (or neutral element) for /// the operation K. -static Constant* -getReductionIdentity(LoopVectorizationLegality::ReductionKind K, Type *Tp) { +Constant* +LoopVectorizationLegality::getReductionIdentity(ReductionKind K, Type *Tp) { switch (K) { - case LoopVectorizationLegality:: RK_IntegerXor: - case LoopVectorizationLegality:: RK_IntegerAdd: - case LoopVectorizationLegality:: RK_IntegerOr: + case RK_IntegerXor: + case RK_IntegerAdd: + case RK_IntegerOr: // Adding, Xoring, Oring zero to a number does not change it. return ConstantInt::get(Tp, 0); - case LoopVectorizationLegality:: RK_IntegerMult: + case RK_IntegerMult: // Multiplying a number by 1 does not change it. return ConstantInt::get(Tp, 1); - case LoopVectorizationLegality:: RK_IntegerAnd: + case RK_IntegerAnd: // AND-ing a number with an all-1 value does not change it. return ConstantInt::get(Tp, -1, true); - case LoopVectorizationLegality:: RK_FloatMult: + case RK_FloatMult: // Multiplying a number by 1 does not change it. return ConstantFP::get(Tp, 1.0L); - case LoopVectorizationLegality:: RK_FloatAdd: + case RK_FloatAdd: // Adding zero to a number does not change it. return ConstantFP::get(Tp, 0.0L); default: @@ -1566,7 +1635,7 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) { } /// This function translates the reduction kind to an LLVM binary operator. -static Instruction::BinaryOps +static unsigned getReductionBinOp(LoopVectorizationLegality::ReductionKind Kind) { switch (Kind) { case LoopVectorizationLegality::RK_IntegerAdd: @@ -1583,11 +1652,53 @@ getReductionBinOp(LoopVectorizationLegality::ReductionKind Kind) { return Instruction::FMul; case LoopVectorizationLegality::RK_FloatAdd: return Instruction::FAdd; + case LoopVectorizationLegality::RK_IntegerMinMax: + return Instruction::ICmp; + case LoopVectorizationLegality::RK_FloatMinMax: + return Instruction::FCmp; default: llvm_unreachable("Unknown reduction operation"); } } +Value *createMinMaxOp(IRBuilder<> &Builder, + LoopVectorizationLegality::MinMaxReductionKind RK, + Value *Left, + Value *Right) { + CmpInst::Predicate P = CmpInst::ICMP_NE; + switch (RK) { + default: + llvm_unreachable("Unknown min/max reduction kind"); + case LoopVectorizationLegality::MRK_UIntMin: + P = CmpInst::ICMP_ULT; + break; + case LoopVectorizationLegality::MRK_UIntMax: + P = CmpInst::ICMP_UGT; + break; + case LoopVectorizationLegality::MRK_SIntMin: + P = CmpInst::ICMP_SLT; + break; + case LoopVectorizationLegality::MRK_SIntMax: + P = CmpInst::ICMP_SGT; + break; + case LoopVectorizationLegality::MRK_FloatMin: + P = CmpInst::FCMP_OLT; + break; + case LoopVectorizationLegality::MRK_FloatMax: + P = CmpInst::FCMP_OGT; + break; + } + + Value *Cmp; + if (RK == LoopVectorizationLegality::MRK_FloatMin || RK == LoopVectorizationLegality::MRK_FloatMax) + Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp"); + else + Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp"); + + Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select"); + return Select; +} + void InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { //===------------------------------------------------===// @@ -1651,13 +1762,24 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // Find the reduction identity variable. Zero for addition, or, xor, // one for multiplication, -1 for And. - Constant *Iden = getReductionIdentity(RdxDesc.Kind, VecTy->getScalarType()); - Constant *Identity = ConstantVector::getSplat(VF, Iden); - - // This vector is the Identity vector where the first element is the - // incoming scalar reduction. - Value *VectorStart = Builder.CreateInsertElement(Identity, - RdxDesc.StartValue, Zero); + Value *Identity; + Value *VectorStart; + if (RdxDesc.Kind == LoopVectorizationLegality::RK_IntegerMinMax || + RdxDesc.Kind == LoopVectorizationLegality::RK_FloatMinMax) { + // MinMax reduction have the start value as their identify. + VectorStart = Identity = Builder.CreateVectorSplat(VF, RdxDesc.StartValue, + "minmax.ident"); + } else { + Constant *Iden = + LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind, + VecTy->getScalarType()); + Identity = ConstantVector::getSplat(VF, Iden); + + // This vector is the Identity vector where the first element is the + // incoming scalar reduction. + VectorStart = Builder.CreateInsertElement(Identity, + RdxDesc.StartValue, Zero); + } // Fix the vector-loop phi. // We created the induction variable so we know that the @@ -1699,10 +1821,15 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // Reduce all of the unrolled parts into a single vector. Value *ReducedPartRdx = RdxParts[0]; + unsigned Op = getReductionBinOp(RdxDesc.Kind); for (unsigned part = 1; part < UF; ++part) { - Instruction::BinaryOps Op = getReductionBinOp(RdxDesc.Kind); - ReducedPartRdx = Builder.CreateBinOp(Op, RdxParts[part], ReducedPartRdx, - "bin.rdx"); + if (Op != Instruction::ICmp && Op != Instruction::FCmp) + ReducedPartRdx = Builder.CreateBinOp((Instruction::BinaryOps)Op, + RdxParts[part], ReducedPartRdx, + "bin.rdx"); + else + ReducedPartRdx = createMinMaxOp(Builder, RdxDesc.MinMaxKind, + ReducedPartRdx, RdxParts[part]); } // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles @@ -1727,8 +1854,11 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { ConstantVector::get(ShuffleMask), "rdx.shuf"); - Instruction::BinaryOps Op = getReductionBinOp(RdxDesc.Kind); - TmpVec = Builder.CreateBinOp(Op, TmpVec, Shuf, "bin.rdx"); + if (Op != Instruction::ICmp && Op != Instruction::FCmp) + TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf, + "bin.rdx"); + else + TmpVec = createMinMaxOp(Builder, RdxDesc.MinMaxKind, TmpVec, Shuf); } // The result is in the first element of the vector. @@ -1861,18 +1991,33 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, // We know that all PHIs in non header blocks are converted into // selects, so we don't have to worry about the insertion order and we // can just use the builder. - // At this point we generate the predication tree. There may be // duplications since this is a simple recursive scan, but future // optimizations will clean it up. - VectorParts Cond = createEdgeMask(P->getIncomingBlock(0), - P->getParent()); - for (unsigned part = 0; part < UF; ++part) { - VectorParts &In0 = getVectorValue(P->getIncomingValue(0)); - VectorParts &In1 = getVectorValue(P->getIncomingValue(1)); - Entry[part] = Builder.CreateSelect(Cond[part], In0[part], In1[part], - "predphi"); + unsigned NumIncoming = P->getNumIncomingValues(); + assert(NumIncoming > 1 && "Invalid PHI"); + + // Generate a sequence of selects of the form: + // SELECT(Mask3, In3, + // SELECT(Mask2, In2, + // ( ...))) + for (unsigned In = 0; In < NumIncoming; In++) { + VectorParts Cond = createEdgeMask(P->getIncomingBlock(In), + P->getParent()); + VectorParts &In0 = getVectorValue(P->getIncomingValue(In)); + + for (unsigned part = 0; part < UF; ++part) { + // We don't need to 'select' the first PHI operand because it is + // the default value if all of the other masks don't match. + if (In == 0) + Entry[part] = In0[part]; + else + // Select between the current value and the previous incoming edge + // based on the incoming mask. + Entry[part] = Builder.CreateSelect(Cond[part], In0[part], + Entry[part], "predphi"); + } } continue; } @@ -2153,12 +2298,6 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() { if (!isa<BranchInst>(BB->getTerminator())) return false; - // We must have at most two predecessors because we need to convert - // all PHIs to selects. - unsigned Preds = std::distance(pred_begin(BB), pred_end(BB)); - if (Preds > 2) - return false; - // We must be able to predicate all blocks that need to be predicated. if (blockNeedsPredication(BB) && !blockCanBePredicated(BB)) return false; @@ -2239,6 +2378,26 @@ bool LoopVectorizationLegality::canVectorize() { return true; } +/// \brief Check that the instruction has outside loop users and is not an +/// identified reduction variable. +static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, + SmallPtrSet<Value *, 4> &Reductions) { + // Reduction instructions are allowed to have exit users. All other + // instructions must not have external users. + if (!Reductions.count(Inst)) + //Check that all of the users of the loop are inside the BB. + for (Value::use_iterator I = Inst->use_begin(), E = Inst->use_end(); + I != E; ++I) { + Instruction *U = cast<Instruction>(*I); + // This user may be a reduction exit value. + if (!TheLoop->contains(U)) { + DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n"); + return true; + } + } + return false; +} + bool LoopVectorizationLegality::canVectorizeInstrs() { BasicBlock *PreHeader = TheLoop->getLoopPreheader(); BasicBlock *Header = TheLoop->getHeader(); @@ -2250,6 +2409,13 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { return false; } + // Look for the attribute signaling the absence of NaNs. + Function &F = *Header->getParent(); + if (F.hasFnAttribute("no-nans-fp-math")) + HasFunNoNaNAttr = F.getAttributes().getAttribute( + AttributeSet::FunctionIndex, + "no-nans-fp-math").getValueAsString() == "true"; + // For each block in the loop. for (Loop::block_iterator bb = TheLoop->block_begin(), be = TheLoop->block_end(); bb != be; ++bb) { @@ -2259,12 +2425,6 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { ++it) { if (PHINode *Phi = dyn_cast<PHINode>(it)) { - // This should not happen because the loop should be normalized. - if (Phi->getNumIncomingValues() != 2) { - DEBUG(dbgs() << "LV: Found an invalid PHI.\n"); - return false; - } - // Check that this PHI type is allowed. if (!Phi->getType()->isIntegerTy() && !Phi->getType()->isFloatingPointTy() && @@ -2276,8 +2436,19 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // If this PHINode is not in the header block, then we know that we // can convert it to select during if-conversion. No need to check if // the PHIs in this block are induction or reduction variables. - if (*bb != Header) - continue; + if (*bb != Header) { + // Check that this instruction has no outside users or is an + // identified reduction value with an outside user. + if(!hasOutsideLoopUser(TheLoop, it, AllowedExit)) + continue; + return false; + } + + // We only allow if-converted PHIs with more than two incoming values. + if (Phi->getNumIncomingValues() != 2) { + DEBUG(dbgs() << "LV: Found an invalid PHI.\n"); + return false; + } // This is the value coming from the preheader. Value *StartValue = Phi->getIncomingValueForBlock(PreHeader); @@ -2319,6 +2490,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { DEBUG(dbgs() << "LV: Found a XOR reduction PHI."<< *Phi <<"\n"); continue; } + if (AddReductionVar(Phi, RK_IntegerMinMax)) { + DEBUG(dbgs() << "LV: Found a MINMAX reduction PHI."<< *Phi <<"\n"); + continue; + } if (AddReductionVar(Phi, RK_FloatMult)) { DEBUG(dbgs() << "LV: Found an FMult reduction PHI."<< *Phi <<"\n"); continue; @@ -2327,6 +2502,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { DEBUG(dbgs() << "LV: Found an FAdd reduction PHI."<< *Phi <<"\n"); continue; } + if (AddReductionVar(Phi, RK_FloatMinMax)) { + DEBUG(dbgs() << "LV: Found an float MINMAX reduction PHI."<< *Phi <<"\n"); + continue; + } DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n"); return false; @@ -2356,17 +2535,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // Reduction instructions are allowed to have exit users. // All other instructions must not have external users. - if (!AllowedExit.count(it)) - //Check that all of the users of the loop are inside the BB. - for (Value::use_iterator I = it->use_begin(), E = it->use_end(); - I != E; ++I) { - Instruction *U = cast<Instruction>(*I); - // This user may be a reduction exit value. - if (!TheLoop->contains(U)) { - DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n"); - return false; - } - } + if (hasOutsideLoopUser(TheLoop, it, AllowedExit)) + return false; + } // next instr. } @@ -2446,13 +2617,6 @@ LoopVectorizationLegality::hasPossibleGlobalWriteReorder( bool LoopVectorizationLegality::canVectorizeMemory() { - if (TheLoop->isAnnotatedParallel()) { - DEBUG(dbgs() - << "LV: A loop annotated parallel, ignore memory dependency " - << "checks.\n"); - return true; - } - typedef SmallVector<Value*, 16> ValueVector; typedef SmallPtrSet<Value*, 16> ValueSet; // Holds the Load and Store *instructions*. @@ -2461,6 +2625,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() { PtrRtCheck.Pointers.clear(); PtrRtCheck.Need = false; + const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); + // For each block. for (Loop::block_iterator bb = TheLoop->block_begin(), be = TheLoop->block_end(); bb != be; ++bb) { @@ -2475,7 +2641,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() { if (it->mayReadFromMemory()) { LoadInst *Ld = dyn_cast<LoadInst>(it); if (!Ld) return false; - if (!Ld->isSimple()) { + if (!Ld->isSimple() && !IsAnnotatedParallel) { DEBUG(dbgs() << "LV: Found a non-simple load.\n"); return false; } @@ -2487,7 +2653,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() { if (it->mayWriteToMemory()) { StoreInst *St = dyn_cast<StoreInst>(it); if (!St) return false; - if (!St->isSimple()) { + if (!St->isSimple() && !IsAnnotatedParallel) { DEBUG(dbgs() << "LV: Found a non-simple store.\n"); return false; } @@ -2534,6 +2700,13 @@ bool LoopVectorizationLegality::canVectorizeMemory() { ReadWrites.insert(std::make_pair(Ptr, ST)); } + if (IsAnnotatedParallel) { + DEBUG(dbgs() + << "LV: A loop annotated parallel, ignore memory dependency " + << "checks.\n"); + return true; + } + for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) { LoadInst *LD = cast<LoadInst>(*I); Value* Ptr = LD->getPointerOperand(); @@ -2556,6 +2729,9 @@ bool LoopVectorizationLegality::canVectorizeMemory() { return true; } + unsigned NumReadPtrs = 0; + unsigned NumWritePtrs = 0; + // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. bool CanDoRT = true; @@ -2563,7 +2739,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() { for (MI = ReadWrites.begin(), ME = ReadWrites.end(); MI != ME; ++MI) { Value *V = (*MI).first; if (hasComputableBounds(V)) { - PtrRtCheck.insert(SE, TheLoop, V); + PtrRtCheck.insert(SE, TheLoop, V, true); + NumWritePtrs++; DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *V <<"\n"); } else { CanDoRT = false; @@ -2573,7 +2750,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() { for (MI = Reads.begin(), ME = Reads.end(); MI != ME; ++MI) { Value *V = (*MI).first; if (hasComputableBounds(V)) { - PtrRtCheck.insert(SE, TheLoop, V); + PtrRtCheck.insert(SE, TheLoop, V, false); + NumReadPtrs++; DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *V <<"\n"); } else { CanDoRT = false; @@ -2583,7 +2761,9 @@ bool LoopVectorizationLegality::canVectorizeMemory() { // Check that we did not collect too many pointers or found a // unsizeable pointer. - if (!CanDoRT || PtrRtCheck.Pointers.size() > RuntimeMemoryCheckThreshold) { + unsigned NumComparisons = (NumWritePtrs * (NumReadPtrs + NumWritePtrs - 1)); + DEBUG(dbgs() << "LV: We need to compare " << NumComparisons << " ptrs.\n"); + if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) { PtrRtCheck.reset(); CanDoRT = false; } @@ -2646,8 +2826,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() { Inst, WriteObjects, MaxByteWidth)) { - DEBUG(dbgs() << "LV: Found a possible write-write reorder:" - << *UI <<"\n"); + DEBUG(dbgs() << "LV: Found a possible write-write reorder:" << **UI + << "\n"); return false; } @@ -2690,8 +2870,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() { Inst, WriteObjects, MaxByteWidth)) { - DEBUG(dbgs() << "LV: Found a possible read-write reorder:" - << *UI <<"\n"); + DEBUG(dbgs() << "LV: Found a possible read-write reorder:" << **UI + << "\n"); return false; } } @@ -2737,7 +2917,18 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, // used as reduction variables (such as ADD). We may have a single // out-of-block user. The cycle must end with the original PHI. Instruction *Iter = Phi; - while (true) { + + // To recognize min/max patterns formed by a icmp select sequence, we store + // the number of instruction we saw from the recognized min/max pattern, + // such that we don't stop when we see the phi has two uses (one by the select + // and one by the icmp) and to make sure we only see exactly the two + // instructions. + unsigned NumCmpSelectPatternInst = 0; + ReductionInstDesc ReduxDesc(false, 0); + + // Avoid cycles in the chain. + SmallPtrSet<Instruction *, 8> VisitedInsts; + while (VisitedInsts.insert(Iter)) { // If the instruction has no users then this is a broken // chain and can't be a reduction variable. if (Iter->use_empty()) @@ -2751,9 +2942,6 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, // Is this a bin op ? FoundBinOp |= !isa<PHINode>(Iter); - // Remember the current instruction. - Instruction *OldIter = Iter; - // For each of the *users* of iter. for (Value::use_iterator it = Iter->use_begin(), e = Iter->use_end(); it != e; ++it) { @@ -2782,25 +2970,35 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, Iter->hasNUsesOrMore(2)) continue; - // We can't have multiple inside users. - if (FoundInBlockUser) + // We can't have multiple inside users except for a combination of + // icmp/select both using the phi. + if (FoundInBlockUser && !NumCmpSelectPatternInst) return false; FoundInBlockUser = true; // Any reduction instr must be of one of the allowed kinds. - if (!isReductionInstr(U, Kind)) + ReduxDesc = isReductionInstr(U, Kind, ReduxDesc); + if (!ReduxDesc.IsReduction) return false; + if (Kind == RK_IntegerMinMax && (isa<ICmpInst>(U) || isa<SelectInst>(U))) + ++NumCmpSelectPatternInst; + if (Kind == RK_FloatMinMax && (isa<FCmpInst>(U) || isa<SelectInst>(U))) + ++NumCmpSelectPatternInst; + // Reductions of instructions such as Div, and Sub is only // possible if the LHS is the reduction variable. - if (!U->isCommutative() && !isa<PHINode>(U) && U->getOperand(0) != Iter) + if (!U->isCommutative() && !isa<PHINode>(U) && !isa<SelectInst>(U) && + !isa<ICmpInst>(U) && !isa<FCmpInst>(U) && U->getOperand(0) != Iter) return false; - Iter = U; + Iter = ReduxDesc.PatternLastInst; } - // If all uses were skipped this can't be a reduction variable. - if (Iter == OldIter) + // This means we have seen one but not the other instruction of the + // pattern or more than just a select and cmp. + if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) && + NumCmpSelectPatternInst != 2) return false; // We found a reduction var if we have reached the original @@ -2811,47 +3009,107 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, AllowedExit.insert(ExitInstruction); // Save the description of this reduction variable. - ReductionDescriptor RD(RdxStart, ExitInstruction, Kind); + ReductionDescriptor RD(RdxStart, ExitInstruction, Kind, + ReduxDesc.MinMaxKind); Reductions[Phi] = RD; // We've ended the cycle. This is a reduction variable if we have an // outside user and it has a binary op. return FoundBinOp && ExitInstruction; } } + + return false; } -bool +/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction +/// pattern corresponding to a min(X, Y) or max(X, Y). +LoopVectorizationLegality::ReductionInstDesc +LoopVectorizationLegality::isMinMaxSelectCmpPattern(Instruction *I, + ReductionInstDesc &Prev) { + + assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) && + "Expect a select instruction"); + Instruction *Cmp = 0; + SelectInst *Select = 0; + + // We must handle the select(cmp()) as a single instruction. Advance to the + // select. + if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) { + if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->use_begin()))) + return ReductionInstDesc(false, I); + return ReductionInstDesc(Select, Prev.MinMaxKind); + } + + // Only handle single use cases for now. + if (!(Select = dyn_cast<SelectInst>(I))) + return ReductionInstDesc(false, I); + if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) && + !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0)))) + return ReductionInstDesc(false, I); + if (!Cmp->hasOneUse()) + return ReductionInstDesc(false, I); + + Value *CmpLeft; + Value *CmpRight; + + // Look for a min/max pattern. + if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return ReductionInstDesc(Select, MRK_UIntMin); + else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return ReductionInstDesc(Select, MRK_UIntMax); + else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return ReductionInstDesc(Select, MRK_SIntMax); + else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return ReductionInstDesc(Select, MRK_SIntMin); + else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return ReductionInstDesc(Select, MRK_FloatMin); + else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return ReductionInstDesc(Select, MRK_FloatMax); + else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return ReductionInstDesc(Select, MRK_FloatMin); + else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return ReductionInstDesc(Select, MRK_FloatMax); + + return ReductionInstDesc(false, I); +} + +LoopVectorizationLegality::ReductionInstDesc LoopVectorizationLegality::isReductionInstr(Instruction *I, - ReductionKind Kind) { + ReductionKind Kind, + ReductionInstDesc &Prev) { bool FP = I->getType()->isFloatingPointTy(); bool FastMath = (FP && I->isCommutative() && I->isAssociative()); - switch (I->getOpcode()) { default: - return false; + return ReductionInstDesc(false, I); case Instruction::PHI: - if (FP && (Kind != RK_FloatMult && Kind != RK_FloatAdd)) - return false; - // possibly. - return true; + if (FP && (Kind != RK_FloatMult && Kind != RK_FloatAdd && + Kind != RK_FloatMinMax)) + return ReductionInstDesc(false, I); + return ReductionInstDesc(I, Prev.MinMaxKind); case Instruction::Sub: case Instruction::Add: - return Kind == RK_IntegerAdd; - case Instruction::SDiv: - case Instruction::UDiv: + return ReductionInstDesc(Kind == RK_IntegerAdd, I); case Instruction::Mul: - return Kind == RK_IntegerMult; + return ReductionInstDesc(Kind == RK_IntegerMult, I); case Instruction::And: - return Kind == RK_IntegerAnd; + return ReductionInstDesc(Kind == RK_IntegerAnd, I); case Instruction::Or: - return Kind == RK_IntegerOr; + return ReductionInstDesc(Kind == RK_IntegerOr, I); case Instruction::Xor: - return Kind == RK_IntegerXor; + return ReductionInstDesc(Kind == RK_IntegerXor, I); case Instruction::FMul: - return Kind == RK_FloatMult && FastMath; + return ReductionInstDesc(Kind == RK_FloatMult && FastMath, I); case Instruction::FAdd: - return Kind == RK_FloatAdd && FastMath; - } + return ReductionInstDesc(Kind == RK_FloatAdd && FastMath, I); + case Instruction::FCmp: + case Instruction::ICmp: + case Instruction::Select: + if (Kind != RK_IntegerMinMax && + (!HasFunNoNaNAttr || Kind != RK_FloatMinMax)) + return ReductionInstDesc(false, I); + return isMinMaxSelectCmpPattern(I, Prev); + } } LoopVectorizationLegality::InductionKind @@ -3384,9 +3642,11 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS); // Scalarized loads/stores. - int Stride = Legal->isConsecutivePtr(Ptr); - bool Reverse = Stride < 0; - if (0 == Stride) { + int ConsecutiveStride = Legal->isConsecutivePtr(Ptr); + bool Reverse = ConsecutiveStride < 0; + unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ValTy); + unsigned VectorElementSize = DL->getTypeStoreSize(VectorTy)/VF; + if (!ConsecutiveStride || ScalarAllocatedSize != VectorElementSize) { unsigned Cost = 0; // The cost of extracting from the value vector and pointer vector. Type *PtrTy = ToVectorTy(Ptr->getType(), VF); diff --git a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp new file mode 100644 index 0000000..cc30cc9 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -0,0 +1,348 @@ +//===- SLPVectorizer.cpp - A bottom up SLP Vectorizer ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This pass implements the Bottom Up SLP vectorizer. It detects consecutive +// stores that can be put together into vector-stores. Next, it attempts to +// construct vectorizable tree using the use-def chains. If a profitable tree +// was found, the SLP vectorizer performs vectorization on the tree. +// +// The pass is inspired by the work described in the paper: +// "Loop-Aware SLP in GCC" by Ira Rosen, Dorit Nuzman, Ayal Zaks. +// +//===----------------------------------------------------------------------===// +#define SV_NAME "slp-vectorizer" +#define DEBUG_TYPE SV_NAME + +#include "VecUtils.h" +#include "llvm/Transforms/Vectorize.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include <map> + +using namespace llvm; + +static cl::opt<int> +SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden, + cl::desc("Only vectorize trees if the gain is above this " + "number. (gain = -cost of vectorization)")); +namespace { + +/// The SLPVectorizer Pass. +struct SLPVectorizer : public FunctionPass { + typedef std::map<Value*, BoUpSLP::StoreList> StoreListMap; + + /// Pass identification, replacement for typeid + static char ID; + + explicit SLPVectorizer() : FunctionPass(ID) { + initializeSLPVectorizerPass(*PassRegistry::getPassRegistry()); + } + + ScalarEvolution *SE; + DataLayout *DL; + TargetTransformInfo *TTI; + AliasAnalysis *AA; + LoopInfo *LI; + + virtual bool runOnFunction(Function &F) { + SE = &getAnalysis<ScalarEvolution>(); + DL = getAnalysisIfAvailable<DataLayout>(); + TTI = &getAnalysis<TargetTransformInfo>(); + AA = &getAnalysis<AliasAnalysis>(); + LI = &getAnalysis<LoopInfo>(); + + StoreRefs.clear(); + bool Changed = false; + + // Must have DataLayout. We can't require it because some tests run w/o + // triple. + if (!DL) + return false; + + for (Function::iterator it = F.begin(), e = F.end(); it != e; ++it) { + BasicBlock *BB = it; + bool BBChanged = false; + + // Use the bollom up slp vectorizer to construct chains that start with + // he store instructions. + BoUpSLP R(BB, SE, DL, TTI, AA, LI->getLoopFor(BB)); + + // Vectorize trees that end at reductions. + BBChanged |= vectorizeReductions(BB, R); + + // Vectorize trees that end at stores. + if (unsigned count = collectStores(BB, R)) { + (void)count; + DEBUG(dbgs()<<"SLP: Found " << count << " stores to vectorize.\n"); + BBChanged |= vectorizeStoreChains(R); + } + + // Try to hoist some of the scalarization code to the preheader. + if (BBChanged) hoistGatherSequence(LI, BB, R); + + Changed |= BBChanged; + } + + if (Changed) { + DEBUG(dbgs()<<"SLP: vectorized \""<<F.getName()<<"\"\n"); + DEBUG(verifyFunction(F)); + } + return Changed; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + FunctionPass::getAnalysisUsage(AU); + AU.addRequired<ScalarEvolution>(); + AU.addRequired<AliasAnalysis>(); + AU.addRequired<TargetTransformInfo>(); + AU.addRequired<LoopInfo>(); + } + +private: + + /// \brief Collect memory references and sort them according to their base + /// object. We sort the stores to their base objects to reduce the cost of the + /// quadratic search on the stores. TODO: We can further reduce this cost + /// if we flush the chain creation every time we run into a memory barrier. + unsigned collectStores(BasicBlock *BB, BoUpSLP &R); + + /// \brief Try to vectorize a chain that starts at two arithmetic instrs. + bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R); + + /// \brief Try to vectorize a list of operands. + bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R); + + /// \brief Try to vectorize a chain that may start at the operands of \V; + bool tryToVectorize(BinaryOperator *V, BoUpSLP &R); + + /// \brief Vectorize the stores that were collected in StoreRefs. + bool vectorizeStoreChains(BoUpSLP &R); + + /// \brief Try to hoist gather sequences outside of the loop in cases where + /// all of the sources are loop invariant. + void hoistGatherSequence(LoopInfo *LI, BasicBlock *BB, BoUpSLP &R); + + /// \brief Scan the basic block and look for reductions that may start a + /// vectorization chain. + bool vectorizeReductions(BasicBlock *BB, BoUpSLP &R); + +private: + StoreListMap StoreRefs; +}; + +unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) { + unsigned count = 0; + StoreRefs.clear(); + for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { + StoreInst *SI = dyn_cast<StoreInst>(it); + if (!SI) + continue; + + // Check that the pointer points to scalars. + Type *Ty = SI->getValueOperand()->getType(); + if (Ty->isAggregateType() || Ty->isVectorTy()) + return 0; + + // Find the base of the GEP. + Value *Ptr = SI->getPointerOperand(); + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) + Ptr = GEP->getPointerOperand(); + + // Save the store locations. + StoreRefs[Ptr].push_back(SI); + count++; + } + return count; +} + +bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) { + if (!A || !B) return false; + Value *VL[] = { A, B }; + return tryToVectorizeList(VL, R); +} + +bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) { + DEBUG(dbgs()<<"SLP: Vectorizing a list of length = " << VL.size() << ".\n"); + + // Check that all of the parts are scalar. + for (int i = 0, e = VL.size(); i < e; ++i) { + Type *Ty = VL[i]->getType(); + if (Ty->isAggregateType() || Ty->isVectorTy()) + return 0; + } + + int Cost = R.getTreeCost(VL); + int ExtrCost = R.getScalarizationCost(VL); + DEBUG(dbgs()<<"SLP: Cost of pair:" << Cost << + " Cost of extract:" << ExtrCost << ".\n"); + if ((Cost+ExtrCost) >= -SLPCostThreshold) return false; + DEBUG(dbgs()<<"SLP: Vectorizing pair.\n"); + R.vectorizeArith(VL); + return true; +} + +bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) { + if (!V) return false; + // Try to vectorize V. + if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R)) + return true; + + BinaryOperator *A = dyn_cast<BinaryOperator>(V->getOperand(0)); + BinaryOperator *B = dyn_cast<BinaryOperator>(V->getOperand(1)); + // Try to skip B. + if (B && B->hasOneUse()) { + BinaryOperator *B0 = dyn_cast<BinaryOperator>(B->getOperand(0)); + BinaryOperator *B1 = dyn_cast<BinaryOperator>(B->getOperand(1)); + if (tryToVectorizePair(A, B0, R)) { + B->moveBefore(V); + return true; + } + if (tryToVectorizePair(A, B1, R)) { + B->moveBefore(V); + return true; + } + } + + // Try to skip A. + if (A && A->hasOneUse()) { + BinaryOperator *A0 = dyn_cast<BinaryOperator>(A->getOperand(0)); + BinaryOperator *A1 = dyn_cast<BinaryOperator>(A->getOperand(1)); + if (tryToVectorizePair(A0, B, R)) { + A->moveBefore(V); + return true; + } + if (tryToVectorizePair(A1, B, R)) { + A->moveBefore(V); + return true; + } + } + return 0; +} + +bool SLPVectorizer::vectorizeReductions(BasicBlock *BB, BoUpSLP &R) { + bool Changed = false; + for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { + if (isa<DbgInfoIntrinsic>(it)) continue; + + // Try to vectorize reductions that use PHINodes. + if (PHINode *P = dyn_cast<PHINode>(it)) { + // Check that the PHI is a reduction PHI. + if (P->getNumIncomingValues() != 2) return Changed; + Value *Rdx = (P->getIncomingBlock(0) == BB ? P->getIncomingValue(0) : + (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1) : + 0)); + // Check if this is a Binary Operator. + BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx); + if (!BI) + continue; + + Value *Inst = BI->getOperand(0); + if (Inst == P) Inst = BI->getOperand(1); + Changed |= tryToVectorize(dyn_cast<BinaryOperator>(Inst), R); + continue; + } + + // Try to vectorize trees that start at compare instructions. + if (CmpInst *CI = dyn_cast<CmpInst>(it)) { + if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) { + Changed |= true; + continue; + } + for (int i = 0; i < 2; ++i) + if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i))) + Changed |= tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R); + continue; + } + } + + return Changed; +} + +bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) { + bool Changed = false; + // Attempt to sort and vectorize each of the store-groups. + for (StoreListMap::iterator it = StoreRefs.begin(), e = StoreRefs.end(); + it != e; ++it) { + if (it->second.size() < 2) + continue; + + DEBUG(dbgs()<<"SLP: Analyzing a store chain of length " << + it->second.size() << ".\n"); + + Changed |= R.vectorizeStores(it->second, -SLPCostThreshold); + } + return Changed; +} + +void SLPVectorizer::hoistGatherSequence(LoopInfo *LI, BasicBlock *BB, + BoUpSLP &R) { + // Check if this block is inside a loop. + Loop *L = LI->getLoopFor(BB); + if (!L) + return; + + // Check if it has a preheader. + BasicBlock *PreHeader = L->getLoopPreheader(); + if (!PreHeader) + return; + + // Mark the insertion point for the block. + Instruction *Location = PreHeader->getTerminator(); + + BoUpSLP::ValueList &Gathers = R.getGatherSeqInstructions(); + for (BoUpSLP::ValueList::iterator it = Gathers.begin(), e = Gathers.end(); + it != e; ++it) { + InsertElementInst *Insert = dyn_cast<InsertElementInst>(*it); + + // The InsertElement sequence can be simplified into a constant. + if (!Insert) + continue; + + // If the vector or the element that we insert into it are + // instructions that are defined in this basic block then we can't + // hoist this instruction. + Instruction *CurrVec = dyn_cast<Instruction>(Insert->getOperand(0)); + Instruction *NewElem = dyn_cast<Instruction>(Insert->getOperand(1)); + if (CurrVec && L->contains(CurrVec)) continue; + if (NewElem && L->contains(NewElem)) continue; + + // We can hoist this instruction. Move it to the pre-header. + Insert->moveBefore(Location); + } +} + +} // end anonymous namespace + +char SLPVectorizer::ID = 0; +static const char lv_name[] = "SLP Vectorizer"; +INITIALIZE_PASS_BEGIN(SLPVectorizer, SV_NAME, lv_name, false, false) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false) + +namespace llvm { + Pass *createSLPVectorizerPass() { + return new SLPVectorizer(); + } +} + diff --git a/contrib/llvm/lib/Transforms/Vectorize/VecUtils.cpp b/contrib/llvm/lib/Transforms/Vectorize/VecUtils.cpp new file mode 100644 index 0000000..9b94366 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Vectorize/VecUtils.cpp @@ -0,0 +1,730 @@ +//===- VecUtils.cpp --- Vectorization Utilities ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "SLP" + +#include "VecUtils.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" +#include <algorithm> +#include <map> + +using namespace llvm; + +static const unsigned MinVecRegSize = 128; + +static const unsigned RecursionMaxDepth = 6; + +namespace llvm { + +BoUpSLP::BoUpSLP(BasicBlock *Bb, ScalarEvolution *S, DataLayout *Dl, + TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp) : + BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa), L(Lp) { + numberInstructions(); +} + +void BoUpSLP::numberInstructions() { + int Loc = 0; + InstrIdx.clear(); + InstrVec.clear(); + // Number the instructions in the block. + for (BasicBlock::iterator it=BB->begin(), e=BB->end(); it != e; ++it) { + InstrIdx[it] = Loc++; + InstrVec.push_back(it); + assert(InstrVec[InstrIdx[it]] == it && "Invalid allocation"); + } +} + +Value *BoUpSLP::getPointerOperand(Value *I) { + if (LoadInst *LI = dyn_cast<LoadInst>(I)) return LI->getPointerOperand(); + if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getPointerOperand(); + return 0; +} + +unsigned BoUpSLP::getAddressSpaceOperand(Value *I) { + if (LoadInst *L=dyn_cast<LoadInst>(I)) return L->getPointerAddressSpace(); + if (StoreInst *S=dyn_cast<StoreInst>(I)) return S->getPointerAddressSpace(); + return -1; +} + +bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) { + Value *PtrA = getPointerOperand(A); + Value *PtrB = getPointerOperand(B); + unsigned ASA = getAddressSpaceOperand(A); + unsigned ASB = getAddressSpaceOperand(B); + + // Check that the address spaces match and that the pointers are valid. + if (!PtrA || !PtrB || (ASA != ASB)) return false; + + // Check that A and B are of the same type. + if (PtrA->getType() != PtrB->getType()) return false; + + // Calculate the distance. + const SCEV *PtrSCEVA = SE->getSCEV(PtrA); + const SCEV *PtrSCEVB = SE->getSCEV(PtrB); + const SCEV *OffsetSCEV = SE->getMinusSCEV(PtrSCEVA, PtrSCEVB); + const SCEVConstant *ConstOffSCEV = dyn_cast<SCEVConstant>(OffsetSCEV); + + // Non constant distance. + if (!ConstOffSCEV) return false; + + int64_t Offset = ConstOffSCEV->getValue()->getSExtValue(); + Type *Ty = cast<PointerType>(PtrA->getType())->getElementType(); + // The Instructions are connsecutive if the size of the first load/store is + // the same as the offset. + int64_t Sz = DL->getTypeStoreSize(Ty); + return ((-Offset) == Sz); +} + +bool BoUpSLP::vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold) { + Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType(); + unsigned Sz = DL->getTypeSizeInBits(StoreTy); + unsigned VF = MinVecRegSize / Sz; + + if (!isPowerOf2_32(Sz) || VF < 2) return false; + + bool Changed = false; + // Look for profitable vectorizable trees at all offsets, starting at zero. + for (unsigned i = 0, e = Chain.size(); i < e; ++i) { + if (i + VF > e) return Changed; + DEBUG(dbgs()<<"SLP: Analyzing " << VF << " stores at offset "<< i << "\n"); + ArrayRef<Value *> Operands = Chain.slice(i, VF); + + int Cost = getTreeCost(Operands); + DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n"); + if (Cost < CostThreshold) { + DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n"); + vectorizeTree(Operands, VF); + i += VF - 1; + Changed = true; + } + } + + return Changed; +} + +bool BoUpSLP::vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold) { + ValueSet Heads, Tails; + SmallDenseMap<Value*, Value*> ConsecutiveChain; + + // We may run into multiple chains that merge into a single chain. We mark the + // stores that we vectorized so that we don't visit the same store twice. + ValueSet VectorizedStores; + bool Changed = false; + + // Do a quadratic search on all of the given stores and find + // all of the pairs of loads that follow each other. + for (unsigned i = 0, e = Stores.size(); i < e; ++i) + for (unsigned j = 0; j < e; ++j) { + if (i == j) continue; + if (isConsecutiveAccess(Stores[i], Stores[j])) { + Tails.insert(Stores[j]); + Heads.insert(Stores[i]); + ConsecutiveChain[Stores[i]] = Stores[j]; + } + } + + // For stores that start but don't end a link in the chain: + for (ValueSet::iterator it = Heads.begin(), e = Heads.end();it != e; ++it) { + if (Tails.count(*it)) continue; + + // We found a store instr that starts a chain. Now follow the chain and try + // to vectorize it. + ValueList Operands; + Value *I = *it; + // Collect the chain into a list. + while (Tails.count(I) || Heads.count(I)) { + if (VectorizedStores.count(I)) break; + Operands.push_back(I); + // Move to the next value in the chain. + I = ConsecutiveChain[I]; + } + + bool Vectorized = vectorizeStoreChain(Operands, costThreshold); + + // Mark the vectorized stores so that we don't vectorize them again. + if (Vectorized) + VectorizedStores.insert(Operands.begin(), Operands.end()); + Changed |= Vectorized; + } + + return Changed; +} + +int BoUpSLP::getScalarizationCost(ArrayRef<Value *> VL) { + // Find the type of the operands in VL. + Type *ScalarTy = VL[0]->getType(); + if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) + ScalarTy = SI->getValueOperand()->getType(); + VectorType *VecTy = VectorType::get(ScalarTy, VL.size()); + // Find the cost of inserting/extracting values from the vector. + return getScalarizationCost(VecTy); +} + +int BoUpSLP::getScalarizationCost(Type *Ty) { + int Cost = 0; + for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i) + Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i); + return Cost; +} + +AliasAnalysis::Location BoUpSLP::getLocation(Instruction *I) { + if (StoreInst *SI = dyn_cast<StoreInst>(I)) return AA->getLocation(SI); + if (LoadInst *LI = dyn_cast<LoadInst>(I)) return AA->getLocation(LI); + return AliasAnalysis::Location(); +} + +Value *BoUpSLP::isUnsafeToSink(Instruction *Src, Instruction *Dst) { + assert(Src->getParent() == Dst->getParent() && "Not the same BB"); + BasicBlock::iterator I = Src, E = Dst; + /// Scan all of the instruction from SRC to DST and check if + /// the source may alias. + for (++I; I != E; ++I) { + // Ignore store instructions that are marked as 'ignore'. + if (MemBarrierIgnoreList.count(I)) continue; + if (Src->mayWriteToMemory()) /* Write */ { + if (!I->mayReadOrWriteMemory()) continue; + } else /* Read */ { + if (!I->mayWriteToMemory()) continue; + } + AliasAnalysis::Location A = getLocation(&*I); + AliasAnalysis::Location B = getLocation(Src); + + if (!A.Ptr || !B.Ptr || AA->alias(A, B)) + return I; + } + return 0; +} + +void BoUpSLP::vectorizeArith(ArrayRef<Value *> Operands) { + Value *Vec = vectorizeTree(Operands, Operands.size()); + BasicBlock::iterator Loc = cast<Instruction>(Vec); + IRBuilder<> Builder(++Loc); + // After vectorizing the operands we need to generate extractelement + // instructions and replace all of the uses of the scalar values with + // the values that we extracted from the vectorized tree. + for (unsigned i = 0, e = Operands.size(); i != e; ++i) { + Value *S = Builder.CreateExtractElement(Vec, Builder.getInt32(i)); + Operands[i]->replaceAllUsesWith(S); + } +} + +int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) { + // Get rid of the list of stores that were removed, and from the + // lists of instructions with multiple users. + MemBarrierIgnoreList.clear(); + LaneMap.clear(); + MultiUserVals.clear(); + MustScalarize.clear(); + + // Scan the tree and find which value is used by which lane, and which values + // must be scalarized. + getTreeUses_rec(VL, 0); + + // Check that instructions with multiple users can be vectorized. Mark unsafe + // instructions. + for (ValueSet::iterator it = MultiUserVals.begin(), + e = MultiUserVals.end(); it != e; ++it) { + // Check that all of the users of this instr are within the tree + // and that they are all from the same lane. + int Lane = -1; + for (Value::use_iterator I = (*it)->use_begin(), E = (*it)->use_end(); + I != E; ++I) { + if (LaneMap.find(*I) == LaneMap.end()) { + MustScalarize.insert(*it); + DEBUG(dbgs()<<"SLP: Adding " << **it << + " to MustScalarize because of an out of tree usage.\n"); + break; + } + if (Lane == -1) Lane = LaneMap[*I]; + if (Lane != LaneMap[*I]) { + MustScalarize.insert(*it); + DEBUG(dbgs()<<"Adding " << **it << + " to MustScalarize because multiple lane use it: " + << Lane << " and " << LaneMap[*I] << ".\n"); + break; + } + } + } + + // Now calculate the cost of vectorizing the tree. + return getTreeCost_rec(VL, 0); +} + +void BoUpSLP::getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth) { + if (Depth == RecursionMaxDepth) return; + + // Don't handle vectors. + if (VL[0]->getType()->isVectorTy()) return; + if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) + if (SI->getValueOperand()->getType()->isVectorTy()) return; + + // Check if all of the operands are constants. + bool AllConst = true; + bool AllSameScalar = true; + for (unsigned i = 0, e = VL.size(); i < e; ++i) { + AllConst &= isa<Constant>(VL[i]); + AllSameScalar &= (VL[0] == VL[i]); + Instruction *I = dyn_cast<Instruction>(VL[i]); + // If one of the instructions is out of this BB, we need to scalarize all. + if (I && I->getParent() != BB) return; + } + + // If all of the operands are identical or constant we have a simple solution. + if (AllConst || AllSameScalar) return; + + // Scalarize unknown structures. + Instruction *VL0 = dyn_cast<Instruction>(VL[0]); + if (!VL0) return; + + unsigned Opcode = VL0->getOpcode(); + for (unsigned i = 0, e = VL.size(); i < e; ++i) { + Instruction *I = dyn_cast<Instruction>(VL[i]); + // If not all of the instructions are identical then we have to scalarize. + if (!I || Opcode != I->getOpcode()) return; + } + + // Mark instructions with multiple users. + for (unsigned i = 0, e = VL.size(); i < e; ++i) { + Instruction *I = dyn_cast<Instruction>(VL[i]); + // Remember to check if all of the users of this instr are vectorized + // within our tree. + if (I && I->getNumUses() > 1) MultiUserVals.insert(I); + } + + for (int i = 0, e = VL.size(); i < e; ++i) { + // Check that the instruction is only used within + // one lane. + if (LaneMap.count(VL[i]) && LaneMap[VL[i]] != i) return; + // Make this instruction as 'seen' and remember the lane. + LaneMap[VL[i]] = i; + } + + switch (Opcode) { + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::SIToFP: + case Instruction::UIToFP: + case Instruction::Trunc: + case Instruction::FPTrunc: + case Instruction::BitCast: + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { + ValueList Operands; + // Prepare the operand vector. + for (unsigned j = 0; j < VL.size(); ++j) + Operands.push_back(cast<Instruction>(VL[j])->getOperand(i)); + + getTreeUses_rec(Operands, Depth+1); + } + return; + } + case Instruction::Store: { + ValueList Operands; + for (unsigned j = 0; j < VL.size(); ++j) + Operands.push_back(cast<Instruction>(VL[j])->getOperand(0)); + getTreeUses_rec(Operands, Depth+1); + return; + } + default: + return; + } +} + +int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) { + Type *ScalarTy = VL[0]->getType(); + + if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) + ScalarTy = SI->getValueOperand()->getType(); + + /// Don't mess with vectors. + if (ScalarTy->isVectorTy()) return max_cost; + VectorType *VecTy = VectorType::get(ScalarTy, VL.size()); + + if (Depth == RecursionMaxDepth) return getScalarizationCost(VecTy); + + // Check if all of the operands are constants. + bool AllConst = true; + bool AllSameScalar = true; + bool MustScalarizeFlag = false; + for (unsigned i = 0, e = VL.size(); i < e; ++i) { + AllConst &= isa<Constant>(VL[i]); + AllSameScalar &= (VL[0] == VL[i]); + // Must have a single use. + Instruction *I = dyn_cast<Instruction>(VL[i]); + MustScalarizeFlag |= MustScalarize.count(VL[i]); + // This instruction is outside the basic block. + if (I && I->getParent() != BB) + return getScalarizationCost(VecTy); + } + + // Is this a simple vector constant. + if (AllConst) return 0; + + // If all of the operands are identical we can broadcast them. + Instruction *VL0 = dyn_cast<Instruction>(VL[0]); + if (AllSameScalar) { + // If we are in a loop, and this is not an instruction (e.g. constant or + // argument) or the instruction is defined outside the loop then assume + // that the cost is zero. + if (L && (!VL0 || !L->contains(VL0))) + return 0; + + // We need to broadcast the scalar. + return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0); + } + + // If this is not a constant, or a scalar from outside the loop then we + // need to scalarize it. + if (MustScalarizeFlag) + return getScalarizationCost(VecTy); + + if (!VL0) return getScalarizationCost(VecTy); + assert(VL0->getParent() == BB && "Wrong BB"); + + unsigned Opcode = VL0->getOpcode(); + for (unsigned i = 0, e = VL.size(); i < e; ++i) { + Instruction *I = dyn_cast<Instruction>(VL[i]); + // If not all of the instructions are identical then we have to scalarize. + if (!I || Opcode != I->getOpcode()) return getScalarizationCost(VecTy); + } + + // Check if it is safe to sink the loads or the stores. + if (Opcode == Instruction::Load || Opcode == Instruction::Store) { + int MaxIdx = InstrIdx[VL0]; + for (unsigned i = 1, e = VL.size(); i < e; ++i ) + MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]); + + Instruction *Last = InstrVec[MaxIdx]; + for (unsigned i = 0, e = VL.size(); i < e; ++i ) { + if (VL[i] == Last) continue; + Value *Barrier = isUnsafeToSink(cast<Instruction>(VL[i]), Last); + if (Barrier) { + DEBUG(dbgs() << "SLP: Can't sink " << *VL[i] << "\n down to " << + *Last << "\n because of " << *Barrier << "\n"); + return max_cost; + } + } + } + + switch (Opcode) { + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::SIToFP: + case Instruction::UIToFP: + case Instruction::Trunc: + case Instruction::FPTrunc: + case Instruction::BitCast: { + int Cost = 0; + ValueList Operands; + Type *SrcTy = VL0->getOperand(0)->getType(); + // Prepare the operand vector. + for (unsigned j = 0; j < VL.size(); ++j) { + Operands.push_back(cast<Instruction>(VL[j])->getOperand(0)); + // Check that the casted type is the same for all users. + if (cast<Instruction>(VL[j])->getOperand(0)->getType() != SrcTy) + return getScalarizationCost(VecTy); + } + + Cost += getTreeCost_rec(Operands, Depth+1); + if (Cost >= max_cost) return max_cost; + + // Calculate the cost of this instruction. + int ScalarCost = VL.size() * TTI->getCastInstrCost(VL0->getOpcode(), + VL0->getType(), SrcTy); + + VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size()); + int VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy, SrcVecTy); + Cost += (VecCost - ScalarCost); + return Cost; + } + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + int Cost = 0; + // Calculate the cost of all of the operands. + for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { + ValueList Operands; + // Prepare the operand vector. + for (unsigned j = 0; j < VL.size(); ++j) + Operands.push_back(cast<Instruction>(VL[j])->getOperand(i)); + + Cost += getTreeCost_rec(Operands, Depth+1); + if (Cost >= max_cost) return max_cost; + } + + // Calculate the cost of this instruction. + int ScalarCost = VecTy->getNumElements() * + TTI->getArithmeticInstrCost(Opcode, ScalarTy); + + int VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy); + Cost += (VecCost - ScalarCost); + return Cost; + } + case Instruction::Load: { + // If we are scalarize the loads, add the cost of forming the vector. + for (unsigned i = 0, e = VL.size()-1; i < e; ++i) + if (!isConsecutiveAccess(VL[i], VL[i+1])) + return getScalarizationCost(VecTy); + + // Cost of wide load - cost of scalar loads. + int ScalarLdCost = VecTy->getNumElements() * + TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0); + int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0); + return VecLdCost - ScalarLdCost; + } + case Instruction::Store: { + // We know that we can merge the stores. Calculate the cost. + int ScalarStCost = VecTy->getNumElements() * + TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0); + int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1,0); + int StoreCost = VecStCost - ScalarStCost; + + ValueList Operands; + for (unsigned j = 0; j < VL.size(); ++j) { + Operands.push_back(cast<Instruction>(VL[j])->getOperand(0)); + MemBarrierIgnoreList.insert(VL[j]); + } + + int TotalCost = StoreCost + getTreeCost_rec(Operands, Depth + 1); + return TotalCost; + } + default: + // Unable to vectorize unknown instructions. + return getScalarizationCost(VecTy); + } +} + +Instruction *BoUpSLP::GetLastInstr(ArrayRef<Value *> VL, unsigned VF) { + int MaxIdx = InstrIdx[BB->getFirstNonPHI()]; + for (unsigned i = 0; i < VF; ++i ) + MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]); + return InstrVec[MaxIdx + 1]; +} + +Value *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) { + IRBuilder<> Builder(GetLastInstr(VL, Ty->getNumElements())); + Value *Vec = UndefValue::get(Ty); + for (unsigned i=0; i < Ty->getNumElements(); ++i) { + // Generate the 'InsertElement' instruction. + Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i)); + // Remember that this instruction is used as part of a 'gather' sequence. + // The caller of the bottom-up slp vectorizer can try to hoist the sequence + // if the users are outside of the basic block. + GatherInstructions.push_back(Vec); + } + + return Vec; +} + +Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) { + Value *V = vectorizeTree_rec(VL, VF); + // We moved some instructions around. We have to number them again + // before we can do any analysis. + numberInstructions(); + MustScalarize.clear(); + return V; +} + +Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) { + Type *ScalarTy = VL[0]->getType(); + if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) + ScalarTy = SI->getValueOperand()->getType(); + VectorType *VecTy = VectorType::get(ScalarTy, VF); + + // Check if all of the operands are constants or identical. + bool AllConst = true; + bool AllSameScalar = true; + for (unsigned i = 0, e = VF; i < e; ++i) { + AllConst &= isa<Constant>(VL[i]); + AllSameScalar &= (VL[0] == VL[i]); + // The instruction must be in the same BB, and it must be vectorizable. + Instruction *I = dyn_cast<Instruction>(VL[i]); + if (MustScalarize.count(VL[i]) || (I && I->getParent() != BB)) + return Scalarize(VL, VecTy); + } + + // Check that this is a simple vector constant. + if (AllConst || AllSameScalar) return Scalarize(VL, VecTy); + + // Scalarize unknown structures. + Instruction *VL0 = dyn_cast<Instruction>(VL[0]); + if (!VL0) return Scalarize(VL, VecTy); + + if (VectorizedValues.count(VL0)) return VectorizedValues[VL0]; + + unsigned Opcode = VL0->getOpcode(); + for (unsigned i = 0, e = VF; i < e; ++i) { + Instruction *I = dyn_cast<Instruction>(VL[i]); + // If not all of the instructions are identical then we have to scalarize. + if (!I || Opcode != I->getOpcode()) return Scalarize(VL, VecTy); + } + + switch (Opcode) { + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::SIToFP: + case Instruction::UIToFP: + case Instruction::Trunc: + case Instruction::FPTrunc: + case Instruction::BitCast: { + ValueList INVL; + for (int i = 0; i < VF; ++i) + INVL.push_back(cast<Instruction>(VL[i])->getOperand(0)); + Value *InVec = vectorizeTree_rec(INVL, VF); + IRBuilder<> Builder(GetLastInstr(VL, VF)); + CastInst *CI = dyn_cast<CastInst>(VL0); + Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy); + VectorizedValues[VL0] = V; + return V; + } + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + ValueList LHSVL, RHSVL; + for (int i = 0; i < VF; ++i) { + RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0)); + LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1)); + } + + Value *RHS = vectorizeTree_rec(RHSVL, VF); + Value *LHS = vectorizeTree_rec(LHSVL, VF); + IRBuilder<> Builder(GetLastInstr(VL, VF)); + BinaryOperator *BinOp = cast<BinaryOperator>(VL0); + Value *V = Builder.CreateBinOp(BinOp->getOpcode(), RHS,LHS); + VectorizedValues[VL0] = V; + return V; + } + case Instruction::Load: { + LoadInst *LI = cast<LoadInst>(VL0); + unsigned Alignment = LI->getAlignment(); + + // Check if all of the loads are consecutive. + for (unsigned i = 1, e = VF; i < e; ++i) + if (!isConsecutiveAccess(VL[i-1], VL[i])) + return Scalarize(VL, VecTy); + + IRBuilder<> Builder(GetLastInstr(VL, VF)); + Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(), + VecTy->getPointerTo()); + LI = Builder.CreateLoad(VecPtr); + LI->setAlignment(Alignment); + VectorizedValues[VL0] = LI; + return LI; + } + case Instruction::Store: { + StoreInst *SI = cast<StoreInst>(VL0); + unsigned Alignment = SI->getAlignment(); + + ValueList ValueOp; + for (int i = 0; i < VF; ++i) + ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand()); + + Value *VecValue = vectorizeTree_rec(ValueOp, VF); + + IRBuilder<> Builder(GetLastInstr(VL, VF)); + Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(), + VecTy->getPointerTo()); + Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment); + + for (int i = 0; i < VF; ++i) + cast<Instruction>(VL[i])->eraseFromParent(); + return 0; + } + default: + Value *S = Scalarize(VL, VecTy); + VectorizedValues[VL0] = S; + return S; + } +} + +} // end of namespace diff --git a/contrib/llvm/lib/Transforms/Vectorize/VecUtils.h b/contrib/llvm/lib/Transforms/Vectorize/VecUtils.h new file mode 100644 index 0000000..5456c6c --- /dev/null +++ b/contrib/llvm/lib/Transforms/Vectorize/VecUtils.h @@ -0,0 +1,164 @@ +//===- VecUtils.h - Vectorization Utilities -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This family of classes and functions manipulate vectors and chains of +// vectors. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_VECTORIZE_VECUTILS_H +#define LLVM_TRANSFORMS_VECTORIZE_VECUTILS_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include <vector> + +namespace llvm { + +class BasicBlock; class Instruction; class Type; +class VectorType; class StoreInst; class Value; +class ScalarEvolution; class DataLayout; +class TargetTransformInfo; class AliasAnalysis; +class Loop; + +/// Bottom Up SLP vectorization utility class. +struct BoUpSLP { + typedef SmallVector<Value*, 8> ValueList; + typedef SmallPtrSet<Value*, 16> ValueSet; + typedef SmallVector<StoreInst*, 8> StoreList; + static const int max_cost = 1<<20; + + // \brief C'tor. + BoUpSLP(BasicBlock *Bb, ScalarEvolution *Se, DataLayout *Dl, + TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp); + + /// \brief Take the pointer operand from the Load/Store instruction. + /// \returns NULL if this is not a valid Load/Store instruction. + static Value *getPointerOperand(Value *I); + + /// \brief Take the address space operand from the Load/Store instruction. + /// \returns -1 if this is not a valid Load/Store instruction. + static unsigned getAddressSpaceOperand(Value *I); + + /// \returns true if the memory operations A and B are consecutive. + bool isConsecutiveAccess(Value *A, Value *B); + + /// \brief Vectorize the tree that starts with the elements in \p VL. + /// \returns the vectorized value. + Value *vectorizeTree(ArrayRef<Value *> VL, int VF); + + /// \returns the vectorization cost of the subtree that starts at \p VL. + /// A negative number means that this is profitable. + int getTreeCost(ArrayRef<Value *> VL); + + /// \returns the scalarization cost for this list of values. Assuming that + /// this subtree gets vectorized, we may need to extract the values from the + /// roots. This method calculates the cost of extracting the values. + int getScalarizationCost(ArrayRef<Value *> VL); + + /// \brief Attempts to order and vectorize a sequence of stores. This + /// function does a quadratic scan of the given stores. + /// \returns true if the basic block was modified. + bool vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold); + + /// \brief Vectorize a group of scalars into a vector tree. + void vectorizeArith(ArrayRef<Value *> Operands); + + /// \returns the list of new instructions that were added in order to collect + /// scalars into vectors. This list can be used to further optimize the gather + /// sequences. + ValueList &getGatherSeqInstructions() {return GatherInstructions; } + +private: + /// \brief This method contains the recursive part of getTreeCost. + int getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth); + + /// \brief This recursive method looks for vectorization hazards such as + /// values that are used by multiple users and checks that values are used + /// by only one vector lane. It updates the variables LaneMap, MultiUserVals. + void getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth); + + /// \brief This method contains the recursive part of vectorizeTree. + Value *vectorizeTree_rec(ArrayRef<Value *> VL, int VF); + + /// \brief Number all of the instructions in the block. + void numberInstructions(); + + /// \brief Vectorize a sorted sequence of stores. + bool vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold); + + /// \returns the scalarization cost for this type. Scalarization in this + /// context means the creation of vectors from a group of scalars. + int getScalarizationCost(Type *Ty); + + /// \returns the AA location that is being access by the instruction. + AliasAnalysis::Location getLocation(Instruction *I); + + /// \brief Checks if it is possible to sink an instruction from + /// \p Src to \p Dst. + /// \returns the pointer to the barrier instruction if we can't sink. + Value *isUnsafeToSink(Instruction *Src, Instruction *Dst); + + /// \returns the instruction that appears last in the BB from \p VL. + /// Only consider the first \p VF elements. + Instruction *GetLastInstr(ArrayRef<Value *> VL, unsigned VF); + + /// \returns a vector from a collection of scalars in \p VL. + Value *Scalarize(ArrayRef<Value *> VL, VectorType *Ty); + +private: + /// Maps instructions to numbers and back. + SmallDenseMap<Value*, int> InstrIdx; + /// Maps integers to Instructions. + std::vector<Instruction*> InstrVec; + + // -- containers that are used during getTreeCost -- // + + /// Contains values that must be scalarized because they are used + /// by multiple lanes, or by users outside the tree. + /// NOTICE: The vectorization methods also use this set. + ValueSet MustScalarize; + + /// Contains a list of values that are used outside the current tree. This + /// set must be reset between runs. + ValueSet MultiUserVals; + /// Maps values in the tree to the vector lanes that uses them. This map must + /// be reset between runs of getCost. + std::map<Value*, int> LaneMap; + /// A list of instructions to ignore while sinking + /// memory instructions. This map must be reset between runs of getCost. + SmallPtrSet<Value *, 8> MemBarrierIgnoreList; + + // -- Containers that are used during vectorizeTree -- // + + /// Maps between the first scalar to the vector. This map must be reset + ///between runs. + DenseMap<Value*, Value*> VectorizedValues; + + // -- Containers that are used after vectorization by the caller -- // + + /// A list of instructions that are used when gathering scalars into vectors. + /// In many cases these instructions can be hoisted outside of the BB. + /// Iterating over this list is faster than calling LICM. + ValueList GatherInstructions; + + // Analysis and block reference. + BasicBlock *BB; + ScalarEvolution *SE; + DataLayout *DL; + TargetTransformInfo *TTI; + AliasAnalysis *AA; + Loop *L; +}; + +} // end of namespace + +#endif // LLVM_TRANSFORMS_VECTORIZE_VECUTILS_H diff --git a/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp index 19eefd2..a927fe1 100644 --- a/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp +++ b/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp @@ -1,4 +1,4 @@ - //===-- Vectorize.cpp -----------------------------------------------------===// +//===-- Vectorize.cpp -----------------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -28,6 +28,7 @@ using namespace llvm; void llvm::initializeVectorization(PassRegistry &Registry) { initializeBBVectorizePass(Registry); initializeLoopVectorizePass(Registry); + initializeSLPVectorizerPass(Registry); } void LLVMInitializeVectorization(LLVMPassRegistryRef R) { @@ -41,3 +42,7 @@ void LLVMAddBBVectorizePass(LLVMPassManagerRef PM) { void LLVMAddLoopVectorizePass(LLVMPassManagerRef PM) { unwrap(PM)->add(createLoopVectorizePass()); } + +void LLVMAddSLPVectorizePass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createSLPVectorizerPass()); +} |