diff options
Diffstat (limited to 'lib')
435 files changed, 16611 insertions, 9788 deletions
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp index 308b9e3..bfa3ff1 100644 --- a/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -108,6 +108,11 @@ PrintModRefResults(const char *Msg, bool P, Instruction *I, Value *Ptr, } } +static inline bool isInterestingPointer(Value *V) { + return V->getType()->isPointerTy() + && !isa<ConstantPointerNull>(V); +} + bool AAEval::runOnFunction(Function &F) { AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); @@ -115,21 +120,31 @@ bool AAEval::runOnFunction(Function &F) { SetVector<CallSite> CallSites; for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) - if (I->getType()->isPointerTy()) // Add all pointer arguments + if (I->getType()->isPointerTy()) // Add all pointer arguments. Pointers.insert(I); for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { - if (I->getType()->isPointerTy()) // Add all pointer instructions + if (I->getType()->isPointerTy()) // Add all pointer instructions. Pointers.insert(&*I); Instruction &Inst = *I; - User::op_iterator OI = Inst.op_begin(); CallSite CS = CallSite::get(&Inst); - if (CS.getInstruction() && - isa<Function>(CS.getCalledValue())) - ++OI; // Skip actual functions for direct function calls. - for (; OI != Inst.op_end(); ++OI) - if ((*OI)->getType()->isPointerTy() && !isa<ConstantPointerNull>(*OI)) - Pointers.insert(*OI); + if (CS) { + Value *Callee = CS.getCalledValue(); + // Skip actual functions for direct function calls. + if (!isa<Function>(Callee) && isInterestingPointer(Callee)) + Pointers.insert(Callee); + // Consider formals. + for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); + AI != AE; ++AI) + if (isInterestingPointer(*AI)) + Pointers.insert(*AI); + } else { + // Consider all operands. + for (Instruction::op_iterator OI = Inst.op_begin(), OE = Inst.op_end(); + OI != OE; ++OI) + if (isInterestingPointer(*OI)) + Pointers.insert(*OI); + } if (CS.getInstruction()) CallSites.insert(CS); } diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index 31a649d..cfe7a1c 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -655,6 +655,11 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, unsigned PNSize, AliasAnalysis::AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size, const Value *V2, unsigned V2Size) { + // If either of the memory references is empty, it doesn't matter what the + // pointer values are. + if (V1Size == 0 || V2Size == 0) + return NoAlias; + // Strip off any casts if they exist. V1 = V1->stripPointerCasts(); V2 = V2->stripPointerCasts(); diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index 17c9b86..ad05dd9 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -21,6 +21,7 @@ add_llvm_library(LLVMAnalysis LazyValueInfo.cpp LibCallAliasAnalysis.cpp LibCallSemantics.cpp + Lint.cpp LiveValues.cpp LoopDependenceAnalysis.cpp LoopInfo.cpp @@ -38,6 +39,7 @@ add_llvm_library(LLVMAnalysis ScalarEvolution.cpp ScalarEvolutionAliasAnalysis.cpp ScalarEvolutionExpander.cpp + ScalarEvolutionNormalization.cpp SparsePropagation.cpp Trace.cpp ValueTracking.cpp diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index dda1fba..37cda02 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -401,7 +401,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, APInt ResultVal = APInt(IntType->getBitWidth(), RawBytes[BytesLoaded-1]); for (unsigned i = 1; i != BytesLoaded; ++i) { ResultVal <<= 8; - ResultVal |= APInt(IntType->getBitWidth(), RawBytes[BytesLoaded-1-i]); + ResultVal |= RawBytes[BytesLoaded-1-i]; } return ConstantInt::get(IntType->getContext(), ResultVal); diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp index 8ba1902..141b181 100644 --- a/lib/Analysis/DebugInfo.cpp +++ b/lib/Analysis/DebugInfo.cpp @@ -402,6 +402,17 @@ uint64_t DIDerivedType::getOriginalTypeSize() const { return getSizeInBits(); } +/// isInlinedFnArgument - Return trule if this variable provides debugging +/// information for an inlined function arguments. +bool DIVariable::isInlinedFnArgument(const Function *CurFn) { + assert(CurFn && "Invalid function"); + if (!getContext().isSubprogram()) + return false; + // This variable is not inlined function argument if its scope + // does not describe current function. + return !(DISubprogram(getContext().getNode()).describes(CurFn)); +} + /// describes - Return true if this subprogram provides debugging /// information for the function F. bool DISubprogram::describes(const Function *F) { @@ -414,6 +425,13 @@ bool DISubprogram::describes(const Function *F) { return false; } +unsigned DISubprogram::isOptimized() const { + assert (DbgNode && "Invalid subprogram descriptor!"); + if (DbgNode->getNumOperands() == 16) + return getUnsignedField(15); + return 0; +} + StringRef DIScope::getFilename() const { if (!DbgNode) return StringRef(); @@ -901,7 +919,8 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context, bool isDefinition, unsigned VK, unsigned VIndex, DIType ContainingType, - bool isArtificial) { + bool isArtificial, + bool isOptimized) { Value *Elts[] = { GetTagConstant(dwarf::DW_TAG_subprogram), @@ -918,9 +937,10 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context, ConstantInt::get(Type::getInt32Ty(VMContext), (unsigned)VK), ConstantInt::get(Type::getInt32Ty(VMContext), VIndex), ContainingType.getNode(), - ConstantInt::get(Type::getInt1Ty(VMContext), isArtificial) + ConstantInt::get(Type::getInt1Ty(VMContext), isArtificial), + ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized) }; - return DISubprogram(MDNode::get(VMContext, &Elts[0], 15)); + return DISubprogram(MDNode::get(VMContext, &Elts[0], 16)); } /// CreateSubprogramDefinition - Create new subprogram descriptor for the @@ -945,9 +965,10 @@ DISubprogram DIFactory::CreateSubprogramDefinition(DISubprogram &SPDeclaration) DeclNode->getOperand(11), // Virtuality DeclNode->getOperand(12), // VIndex DeclNode->getOperand(13), // Containting Type - DeclNode->getOperand(14) // isArtificial + DeclNode->getOperand(14), // isArtificial + DeclNode->getOperand(15) // isOptimized }; - return DISubprogram(MDNode::get(VMContext, &Elts[0], 15)); + return DISubprogram(MDNode::get(VMContext, &Elts[0], 16)); } /// CreateGlobalVariable - Create a new descriptor for the specified global. @@ -1150,10 +1171,8 @@ void DebugInfoFinder::processModule(Module &M) { for (Function::iterator FI = (*I).begin(), FE = (*I).end(); FI != FE; ++FI) for (BasicBlock::iterator BI = (*FI).begin(), BE = (*FI).end(); BI != BE; ++BI) { - if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) { + if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) processDeclare(DDI); - continue; - } DebugLoc Loc = BI->getDebugLoc(); if (Loc.isUnknown()) diff --git a/lib/Analysis/DomPrinter.cpp b/lib/Analysis/DomPrinter.cpp index 3af687a..a1676e5 100644 --- a/lib/Analysis/DomPrinter.cpp +++ b/lib/Analysis/DomPrinter.cpp @@ -83,31 +83,6 @@ struct DOTGraphTraits<PostDominatorTree*> } namespace { -template <class Analysis, bool OnlyBBS> -struct GenericGraphViewer : public FunctionPass { - std::string Name; - - GenericGraphViewer(std::string GraphName, const void *ID) : FunctionPass(ID) { - Name = GraphName; - } - - virtual bool runOnFunction(Function &F) { - Analysis *Graph; - std::string Title, GraphName; - Graph = &getAnalysis<Analysis>(); - GraphName = DOTGraphTraits<Analysis*>::getGraphName(Graph); - Title = GraphName + " for '" + F.getNameStr() + "' function"; - ViewGraph(Graph, Name, OnlyBBS, Title); - - return false; - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AU.addRequired<Analysis>(); - } -}; - struct DomViewer : public DOTGraphTraitsViewer<DominatorTree, false> { static char ID; diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp index 8c43aa1..2bde56d7 100644 --- a/lib/Analysis/IPA/CallGraph.cpp +++ b/lib/Analysis/IPA/CallGraph.cpp @@ -158,8 +158,11 @@ private: // destroy - Release memory for the call graph virtual void destroy() { /// CallsExternalNode is not in the function map, delete it explicitly. - delete CallsExternalNode; - CallsExternalNode = 0; + if (CallsExternalNode) { + CallsExternalNode->allReferencesDropped(); + delete CallsExternalNode; + CallsExternalNode = 0; + } CallGraph::destroy(); } }; @@ -181,6 +184,14 @@ void CallGraph::initialize(Module &M) { void CallGraph::destroy() { if (FunctionMap.empty()) return; + // Reset all node's use counts to zero before deleting them to prevent an + // assertion from firing. +#ifndef NDEBUG + for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end(); + I != E; ++I) + I->second->allReferencesDropped(); +#endif + for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end(); I != E; ++I) delete I->second; @@ -233,14 +244,16 @@ void CallGraphNode::print(raw_ostream &OS) const { else OS << "Call graph node <<null function>>"; - OS << "<<0x" << this << ">> #uses=" << getNumReferences() << '\n'; + OS << "<<" << this << ">> #uses=" << getNumReferences() << '\n'; - for (const_iterator I = begin(), E = end(); I != E; ++I) + for (const_iterator I = begin(), E = end(); I != E; ++I) { + OS << " CS<" << I->first << "> calls "; if (Function *FI = I->second->getFunction()) - OS << " Calls function '" << FI->getName() <<"'\n"; - else - OS << " Calls external node\n"; - OS << "\n"; + OS << "function '" << FI->getName() <<"'\n"; + else + OS << "external node\n"; + } + OS << '\n'; } void CallGraphNode::dump() const { print(dbgs()); } diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp index fb08041..0c01ee5 100644 --- a/lib/Analysis/IPA/CallGraphSCCPass.cpp +++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp @@ -22,11 +22,18 @@ #include "llvm/PassManagers.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +static cl::opt<unsigned> +MaxIterations("max-cg-scc-iterations", cl::ReallyHidden, cl::init(4)); + +STATISTIC(MaxSCCIterations, "Maximum CGSCCPassMgr iterations on one SCC"); + //===----------------------------------------------------------------------===// // CGPassManager // @@ -81,55 +88,31 @@ public: } private: - bool RunPassOnSCC(Pass *P, std::vector<CallGraphNode*> &CurSCC, - CallGraph &CG, bool &CallGraphUpToDate); - void RefreshCallGraph(std::vector<CallGraphNode*> &CurSCC, CallGraph &CG, + bool RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG, + bool &DevirtualizedCall); + + bool RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC, + CallGraph &CG, bool &CallGraphUpToDate, + bool &DevirtualizedCall); + bool RefreshCallGraph(CallGraphSCC &CurSCC, CallGraph &CG, bool IsCheckingMode); }; -/// PrintCallGraphPass - Print a Module corresponding to a call graph. -/// -class PrintCallGraphPass : public CallGraphSCCPass { -private: - std::string Banner; - raw_ostream &Out; // raw_ostream to print on. - -public: - static char ID; - PrintCallGraphPass() : CallGraphSCCPass(&ID), Out(dbgs()) {} - PrintCallGraphPass(const std::string &B, raw_ostream &o) - : CallGraphSCCPass(&ID), Banner(B), Out(o) {} - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - } - - bool runOnSCC(std::vector<CallGraphNode *> &SCC) { - Out << Banner; - for (std::vector<CallGraphNode *>::iterator n = SCC.begin(), ne = SCC.end(); - n != ne; - ++n) { - (*n)->getFunction()->print(Out); - } - return false; - } -}; - } // end anonymous namespace. char CGPassManager::ID = 0; -char PrintCallGraphPass::ID = 0; -bool CGPassManager::RunPassOnSCC(Pass *P, std::vector<CallGraphNode*> &CurSCC, - CallGraph &CG, bool &CallGraphUpToDate) { +bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC, + CallGraph &CG, bool &CallGraphUpToDate, + bool &DevirtualizedCall) { bool Changed = false; PMDataManager *PM = P->getAsPMDataManager(); if (PM == 0) { CallGraphSCCPass *CGSP = (CallGraphSCCPass*)P; if (!CallGraphUpToDate) { - RefreshCallGraph(CurSCC, CG, false); + DevirtualizedCall |= RefreshCallGraph(CurSCC, CG, false); CallGraphUpToDate = true; } @@ -154,8 +137,9 @@ bool CGPassManager::RunPassOnSCC(Pass *P, std::vector<CallGraphNode*> &CurSCC, FPPassManager *FPP = (FPPassManager*)P; // Run pass P on all functions in the current SCC. - for (unsigned i = 0, e = CurSCC.size(); i != e; ++i) { - if (Function *F = CurSCC[i]->getFunction()) { + for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end(); + I != E; ++I) { + if (Function *F = (*I)->getFunction()) { dumpPassInfo(P, EXECUTION_MSG, ON_FUNCTION_MSG, F->getName()); TimeRegion PassTimer(getPassTimer(FPP)); Changed |= FPP->runOnFunction(*F); @@ -178,26 +162,39 @@ bool CGPassManager::RunPassOnSCC(Pass *P, std::vector<CallGraphNode*> &CurSCC, /// FunctionPasses have potentially munged the callgraph, and can be used after /// CallGraphSCC passes to verify that they correctly updated the callgraph. /// -void CGPassManager::RefreshCallGraph(std::vector<CallGraphNode*> &CurSCC, +/// This function returns true if it devirtualized an existing function call, +/// meaning it turned an indirect call into a direct call. This happens when +/// a function pass like GVN optimizes away stuff feeding the indirect call. +/// This never happens in checking mode. +/// +bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, CallGraph &CG, bool CheckingMode) { DenseMap<Value*, CallGraphNode*> CallSites; DEBUG(dbgs() << "CGSCCPASSMGR: Refreshing SCC with " << CurSCC.size() << " nodes:\n"; - for (unsigned i = 0, e = CurSCC.size(); i != e; ++i) - CurSCC[i]->dump(); + for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end(); + I != E; ++I) + (*I)->dump(); ); bool MadeChange = false; + bool DevirtualizedCall = false; // Scan all functions in the SCC. - for (unsigned sccidx = 0, e = CurSCC.size(); sccidx != e; ++sccidx) { - CallGraphNode *CGN = CurSCC[sccidx]; + unsigned FunctionNo = 0; + for (CallGraphSCC::iterator SCCIdx = CurSCC.begin(), E = CurSCC.end(); + SCCIdx != E; ++SCCIdx, ++FunctionNo) { + CallGraphNode *CGN = *SCCIdx; Function *F = CGN->getFunction(); if (F == 0 || F->isDeclaration()) continue; // Walk the function body looking for call sites. Sync up the call sites in // CGN with those actually in the function. + + // Keep track of the number of direct and indirect calls that were + // invalidated and removed. + unsigned NumDirectRemoved = 0, NumIndirectRemoved = 0; // Get the set of call sites currently in the function. for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) { @@ -216,6 +213,12 @@ void CGPassManager::RefreshCallGraph(std::vector<CallGraphNode*> &CurSCC, assert(!CheckingMode && "CallGraphSCCPass did not update the CallGraph correctly!"); + // If this was an indirect call site, count it. + if (I->second->getFunction() == 0) + ++NumIndirectRemoved; + else + ++NumDirectRemoved; + // Just remove the edge from the set of callees, keep track of whether // I points to the last element of the vector. bool WasLast = I + 1 == E; @@ -237,6 +240,9 @@ void CGPassManager::RefreshCallGraph(std::vector<CallGraphNode*> &CurSCC, } // Loop over all of the instructions in the function, getting the callsites. + // Keep track of the number of direct/indirect calls added. + unsigned NumDirectAdded = 0, NumIndirectAdded = 0; + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { CallSite CS = CallSite::get(I); @@ -271,19 +277,21 @@ void CGPassManager::RefreshCallGraph(std::vector<CallGraphNode*> &CurSCC, // If not, we either went from a direct call to indirect, indirect to // direct, or direct to different direct. CallGraphNode *CalleeNode; - if (Function *Callee = CS.getCalledFunction()) + if (Function *Callee = CS.getCalledFunction()) { CalleeNode = CG.getOrInsertFunction(Callee); - else + // Keep track of whether we turned an indirect call into a direct + // one. + if (ExistingNode->getFunction() == 0) { + DevirtualizedCall = true; + DEBUG(dbgs() << " CGSCCPASSMGR: Devirtualized call to '" + << Callee->getName() << "'\n"); + } + } else { CalleeNode = CG.getCallsExternalNode(); + } // Update the edge target in CGN. - for (CallGraphNode::iterator I = CGN->begin(); ; ++I) { - assert(I != CGN->end() && "Didn't find call entry"); - if (I->first == CS.getInstruction()) { - I->second = CalleeNode; - break; - } - } + CGN->replaceCallEdge(CS, CS, CalleeNode); MadeChange = true; continue; } @@ -291,19 +299,34 @@ void CGPassManager::RefreshCallGraph(std::vector<CallGraphNode*> &CurSCC, assert(!CheckingMode && "CallGraphSCCPass did not update the CallGraph correctly!"); - // If the call site didn't exist in the CGN yet, add it. We assume that - // newly introduced call sites won't be indirect. This could be fixed - // in the future. + // If the call site didn't exist in the CGN yet, add it. CallGraphNode *CalleeNode; - if (Function *Callee = CS.getCalledFunction()) + if (Function *Callee = CS.getCalledFunction()) { CalleeNode = CG.getOrInsertFunction(Callee); - else + ++NumDirectAdded; + } else { CalleeNode = CG.getCallsExternalNode(); + ++NumIndirectAdded; + } CGN->addCalledFunction(CS, CalleeNode); MadeChange = true; } + // We scanned the old callgraph node, removing invalidated call sites and + // then added back newly found call sites. One thing that can happen is + // that an old indirect call site was deleted and replaced with a new direct + // call. In this case, we have devirtualized a call, and CGSCCPM would like + // to iteratively optimize the new code. Unfortunately, we don't really + // have a great way to detect when this happens. As an approximation, we + // just look at whether the number of indirect calls is reduced and the + // number of direct calls is increased. There are tons of ways to fool this + // (e.g. DCE'ing an indirect call and duplicating an unrelated block with a + // direct call) but this is close enough. + if (NumIndirectRemoved > NumIndirectAdded && + NumDirectRemoved < NumDirectAdded) + DevirtualizedCall = true; + // After scanning this function, if we still have entries in callsites, then // they are dangling pointers. WeakVH should save us for this, so abort if // this happens. @@ -311,18 +334,85 @@ void CGPassManager::RefreshCallGraph(std::vector<CallGraphNode*> &CurSCC, // Periodically do an explicit clear to remove tombstones when processing // large scc's. - if ((sccidx & 15) == 0) + if ((FunctionNo & 15) == 15) CallSites.clear(); } DEBUG(if (MadeChange) { dbgs() << "CGSCCPASSMGR: Refreshed SCC is now:\n"; - for (unsigned i = 0, e = CurSCC.size(); i != e; ++i) - CurSCC[i]->dump(); + for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end(); + I != E; ++I) + (*I)->dump(); + if (DevirtualizedCall) + dbgs() << "CGSCCPASSMGR: Refresh devirtualized a call!\n"; + } else { dbgs() << "CGSCCPASSMGR: SCC Refresh didn't change call graph.\n"; } ); + + return DevirtualizedCall; +} + +/// RunAllPassesOnSCC - Execute the body of the entire pass manager on the +/// specified SCC. This keeps track of whether a function pass devirtualizes +/// any calls and returns it in DevirtualizedCall. +bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG, + bool &DevirtualizedCall) { + bool Changed = false; + + // CallGraphUpToDate - Keep track of whether the callgraph is known to be + // up-to-date or not. The CGSSC pass manager runs two types of passes: + // CallGraphSCC Passes and other random function passes. Because other + // random function passes are not CallGraph aware, they may clobber the + // call graph by introducing new calls or deleting other ones. This flag + // is set to false when we run a function pass so that we know to clean up + // the callgraph when we need to run a CGSCCPass again. + bool CallGraphUpToDate = true; + + // Run all passes on current SCC. + for (unsigned PassNo = 0, e = getNumContainedPasses(); + PassNo != e; ++PassNo) { + Pass *P = getContainedPass(PassNo); + + // If we're in -debug-pass=Executions mode, construct the SCC node list, + // otherwise avoid constructing this string as it is expensive. + if (isPassDebuggingExecutionsOrMore()) { + std::string Functions; + #ifndef NDEBUG + raw_string_ostream OS(Functions); + for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end(); + I != E; ++I) { + if (I != CurSCC.begin()) OS << ", "; + (*I)->print(OS); + } + OS.flush(); + #endif + dumpPassInfo(P, EXECUTION_MSG, ON_CG_MSG, Functions); + } + dumpRequiredSet(P); + + initializeAnalysisImpl(P); + + // Actually run this pass on the current SCC. + Changed |= RunPassOnSCC(P, CurSCC, CG, + CallGraphUpToDate, DevirtualizedCall); + + if (Changed) + dumpPassInfo(P, MODIFICATION_MSG, ON_CG_MSG, ""); + dumpPreservedSet(P); + + verifyPreservedAnalysis(P); + removeNotPreservedAnalysis(P); + recordAvailableAnalysis(P); + removeDeadPasses(P, "", ON_CG_MSG); + } + + // If the callgraph was left out of date (because the last pass run was a + // functionpass), refresh it before we move on to the next SCC. + if (!CallGraphUpToDate) + DevirtualizedCall |= RefreshCallGraph(CurSCC, CG, false); + return Changed; } /// run - Execute all of the passes scheduled for execution. Keep track of @@ -330,72 +420,53 @@ void CGPassManager::RefreshCallGraph(std::vector<CallGraphNode*> &CurSCC, bool CGPassManager::runOnModule(Module &M) { CallGraph &CG = getAnalysis<CallGraph>(); bool Changed = doInitialization(CG); - - std::vector<CallGraphNode*> CurSCC; // Walk the callgraph in bottom-up SCC order. - for (scc_iterator<CallGraph*> CGI = scc_begin(&CG), E = scc_end(&CG); - CGI != E;) { + scc_iterator<CallGraph*> CGI = scc_begin(&CG); + + CallGraphSCC CurSCC(&CGI); + while (!CGI.isAtEnd()) { // Copy the current SCC and increment past it so that the pass can hack // on the SCC if it wants to without invalidating our iterator. - CurSCC = *CGI; + std::vector<CallGraphNode*> &NodeVec = *CGI; + CurSCC.initialize(&NodeVec[0], &NodeVec[0]+NodeVec.size()); ++CGI; + // At the top level, we run all the passes in this pass manager on the + // functions in this SCC. However, we support iterative compilation in the + // case where a function pass devirtualizes a call to a function. For + // example, it is very common for a function pass (often GVN or instcombine) + // to eliminate the addressing that feeds into a call. With that improved + // information, we would like the call to be an inline candidate, infer + // mod-ref information etc. + // + // Because of this, we allow iteration up to a specified iteration count. + // This only happens in the case of a devirtualized call, so we only burn + // compile time in the case that we're making progress. We also have a hard + // iteration count limit in case there is crazy code. + unsigned Iteration = 0; + bool DevirtualizedCall = false; + do { + DEBUG(if (Iteration) + dbgs() << " SCCPASSMGR: Re-visiting SCC, iteration #" + << Iteration << '\n'); + DevirtualizedCall = false; + Changed |= RunAllPassesOnSCC(CurSCC, CG, DevirtualizedCall); + } while (Iteration++ < MaxIterations && DevirtualizedCall); - // CallGraphUpToDate - Keep track of whether the callgraph is known to be - // up-to-date or not. The CGSSC pass manager runs two types of passes: - // CallGraphSCC Passes and other random function passes. Because other - // random function passes are not CallGraph aware, they may clobber the - // call graph by introducing new calls or deleting other ones. This flag - // is set to false when we run a function pass so that we know to clean up - // the callgraph when we need to run a CGSCCPass again. - bool CallGraphUpToDate = true; + if (DevirtualizedCall) + DEBUG(dbgs() << " CGSCCPASSMGR: Stopped iteration after " << Iteration + << " times, due to -max-cg-scc-iterations\n"); - // Run all passes on current SCC. - for (unsigned PassNo = 0, e = getNumContainedPasses(); - PassNo != e; ++PassNo) { - Pass *P = getContainedPass(PassNo); - - // If we're in -debug-pass=Executions mode, construct the SCC node list, - // otherwise avoid constructing this string as it is expensive. - if (isPassDebuggingExecutionsOrMore()) { - std::string Functions; -#ifndef NDEBUG - raw_string_ostream OS(Functions); - for (unsigned i = 0, e = CurSCC.size(); i != e; ++i) { - if (i) OS << ", "; - CurSCC[i]->print(OS); - } - OS.flush(); -#endif - dumpPassInfo(P, EXECUTION_MSG, ON_CG_MSG, Functions); - } - dumpRequiredSet(P); - - initializeAnalysisImpl(P); - - // Actually run this pass on the current SCC. - Changed |= RunPassOnSCC(P, CurSCC, CG, CallGraphUpToDate); - - if (Changed) - dumpPassInfo(P, MODIFICATION_MSG, ON_CG_MSG, ""); - dumpPreservedSet(P); - - verifyPreservedAnalysis(P); - removeNotPreservedAnalysis(P); - recordAvailableAnalysis(P); - removeDeadPasses(P, "", ON_CG_MSG); - } + if (Iteration > MaxSCCIterations) + MaxSCCIterations = Iteration; - // If the callgraph was left out of date (because the last pass run was a - // functionpass), refresh it before we move on to the next SCC. - if (!CallGraphUpToDate) - RefreshCallGraph(CurSCC, CG, false); } Changed |= doFinalization(CG); return Changed; } + /// Initialize CG bool CGPassManager::doInitialization(CallGraph &CG) { bool Changed = false; @@ -426,11 +497,32 @@ bool CGPassManager::doFinalization(CallGraph &CG) { return Changed; } -Pass *CallGraphSCCPass::createPrinterPass(raw_ostream &O, - const std::string &Banner) const { - return new PrintCallGraphPass(Banner, O); +//===----------------------------------------------------------------------===// +// CallGraphSCC Implementation +//===----------------------------------------------------------------------===// + +/// ReplaceNode - This informs the SCC and the pass manager that the specified +/// Old node has been deleted, and New is to be used in its place. +void CallGraphSCC::ReplaceNode(CallGraphNode *Old, CallGraphNode *New) { + assert(Old != New && "Should not replace node with self"); + for (unsigned i = 0; ; ++i) { + assert(i != Nodes.size() && "Node not in SCC"); + if (Nodes[i] != Old) continue; + Nodes[i] = New; + break; + } + + // Update the active scc_iterator so that it doesn't contain dangling + // pointers to the old CallGraphNode. + scc_iterator<CallGraph*> *CGI = (scc_iterator<CallGraph*>*)Context; + CGI->ReplaceNode(Old, New); } + +//===----------------------------------------------------------------------===// +// CallGraphSCCPass Implementation +//===----------------------------------------------------------------------===// + /// Assign pass manager to manage this pass. void CallGraphSCCPass::assignPassManager(PMStack &PMS, PassManagerType PreferredType) { @@ -475,3 +567,43 @@ void CallGraphSCCPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<CallGraph>(); AU.addPreserved<CallGraph>(); } + + +//===----------------------------------------------------------------------===// +// PrintCallGraphPass Implementation +//===----------------------------------------------------------------------===// + +namespace { + /// PrintCallGraphPass - Print a Module corresponding to a call graph. + /// + class PrintCallGraphPass : public CallGraphSCCPass { + std::string Banner; + raw_ostream &Out; // raw_ostream to print on. + + public: + static char ID; + PrintCallGraphPass() : CallGraphSCCPass(&ID), Out(dbgs()) {} + PrintCallGraphPass(const std::string &B, raw_ostream &o) + : CallGraphSCCPass(&ID), Banner(B), Out(o) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + + bool runOnSCC(CallGraphSCC &SCC) { + Out << Banner; + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) + (*I)->getFunction()->print(Out); + return false; + } + }; + +} // end anonymous namespace. + +char PrintCallGraphPass::ID = 0; + +Pass *CallGraphSCCPass::createPrinterPass(raw_ostream &O, + const std::string &Banner) const { + return new PrintCallGraphPass(Banner, O); +} + diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index 47b5d4a..2c997da 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -36,146 +36,34 @@ Pass *llvm::createIVUsersPass() { return new IVUsers(); } -/// CollectSubexprs - Split S into subexpressions which can be pulled out into -/// separate registers. -static void CollectSubexprs(const SCEV *S, - SmallVectorImpl<const SCEV *> &Ops, - ScalarEvolution &SE) { - if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { - // Break out add operands. - for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); - I != E; ++I) - CollectSubexprs(*I, Ops, SE); - return; - } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { - // Split a non-zero base out of an addrec. - if (!AR->getStart()->isZero()) { - CollectSubexprs(AR->getStart(), Ops, SE); - CollectSubexprs(SE.getAddRecExpr(SE.getIntegerSCEV(0, AR->getType()), - AR->getStepRecurrence(SE), - AR->getLoop()), Ops, SE); - return; - } - } - - // Otherwise use the value itself. - Ops.push_back(S); -} +/// isInteresting - Test whether the given expression is "interesting" when +/// used by the given expression, within the context of analyzing the +/// given loop. +static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L) { + // Anything loop-invariant is interesting. + if (!isa<SCEVUnknown>(S) && S->isLoopInvariant(L)) + return true; -/// getSCEVStartAndStride - Compute the start and stride of this expression, -/// returning false if the expression is not a start/stride pair, or true if it -/// is. The stride must be a loop invariant expression, but the start may be -/// a mix of loop invariant and loop variant expressions. The start cannot, -/// however, contain an AddRec from a different loop, unless that loop is an -/// outer loop of the current loop. -static bool getSCEVStartAndStride(const SCEV *&SH, Loop *L, Loop *UseLoop, - const SCEV *&Start, const SCEV *&Stride, - ScalarEvolution *SE, DominatorTree *DT) { - const SCEV *TheAddRec = Start; // Initialize to zero. - - // If the outer level is an AddExpr, the operands are all start values except - // for a nested AddRecExpr. - if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(SH)) { - for (unsigned i = 0, e = AE->getNumOperands(); i != e; ++i) - if (const SCEVAddRecExpr *AddRec = - dyn_cast<SCEVAddRecExpr>(AE->getOperand(i))) - TheAddRec = SE->getAddExpr(AddRec, TheAddRec); - else - Start = SE->getAddExpr(Start, AE->getOperand(i)); - } else if (isa<SCEVAddRecExpr>(SH)) { - TheAddRec = SH; - } else { - return false; // not analyzable. + // An addrec is interesting if it's affine or if it has an interesting start. + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + // Keep things simple. Don't touch loop-variant strides. + if (AR->getLoop() == L) + return AR->isAffine() || !L->contains(I); + // Otherwise recurse to see if the start value is interesting. + return isInteresting(AR->getStart(), I, L); } - // Break down TheAddRec into its component parts. - SmallVector<const SCEV *, 4> Subexprs; - CollectSubexprs(TheAddRec, Subexprs, *SE); - - // Look for an addrec on the current loop among the parts. - const SCEV *AddRecStride = 0; - for (SmallVectorImpl<const SCEV *>::iterator I = Subexprs.begin(), - E = Subexprs.end(); I != E; ++I) { - const SCEV *S = *I; - if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) - if (AR->getLoop() == L) { - *I = AR->getStart(); - AddRecStride = AR->getStepRecurrence(*SE); - break; - } - } - if (!AddRecStride) + // An add is interesting if any of its operands is. + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + for (SCEVAddExpr::op_iterator OI = Add->op_begin(), OE = Add->op_end(); + OI != OE; ++OI) + if (isInteresting(*OI, I, L)) + return true; return false; - - // Add up everything else into a start value (which may not be - // loop-invariant). - const SCEV *AddRecStart = SE->getAddExpr(Subexprs); - - // Use getSCEVAtScope to attempt to simplify other loops out of - // the picture. - AddRecStart = SE->getSCEVAtScope(AddRecStart, UseLoop); - - Start = SE->getAddExpr(Start, AddRecStart); - - // If stride is an instruction, make sure it properly dominates the header. - // Otherwise we could end up with a use before def situation. - if (!isa<SCEVConstant>(AddRecStride)) { - BasicBlock *Header = L->getHeader(); - if (!AddRecStride->properlyDominates(Header, DT)) - return false; - - DEBUG(dbgs() << "["; - WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false); - dbgs() << "] Variable stride: " << *AddRecStride << "\n"); } - Stride = AddRecStride; - return true; -} - -/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression -/// and now we need to decide whether the user should use the preinc or post-inc -/// value. If this user should use the post-inc version of the IV, return true. -/// -/// Choosing wrong here can break dominance properties (if we choose to use the -/// post-inc value when we cannot) or it can end up adding extra live-ranges to -/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we -/// should use the post-inc value). -static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV, - const Loop *L, DominatorTree *DT) { - // If the user is in the loop, use the preinc value. - if (L->contains(User)) return false; - - BasicBlock *LatchBlock = L->getLoopLatch(); - if (!LatchBlock) - return false; - - // Ok, the user is outside of the loop. If it is dominated by the latch - // block, use the post-inc value. - if (DT->dominates(LatchBlock, User->getParent())) - return true; - - // There is one case we have to be careful of: PHI nodes. These little guys - // can live in blocks that are not dominated by the latch block, but (since - // their uses occur in the predecessor block, not the block the PHI lives in) - // should still use the post-inc value. Check for this case now. - PHINode *PN = dyn_cast<PHINode>(User); - if (!PN) return false; // not a phi, not dominated by latch block. - - // Look at all of the uses of IV by the PHI node. If any use corresponds to - // a block that is not dominated by the latch block, give up and use the - // preincremented value. - unsigned NumUses = 0; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (PN->getIncomingValue(i) == IV) { - ++NumUses; - if (!DT->dominates(LatchBlock, PN->getIncomingBlock(i))) - return false; - } - - // Okay, all uses of IV by PN are in predecessor blocks that really are - // dominated by the latch block. Use the post-incremented value. - return true; + // Nothing else is interesting here. + return false; } /// AddUsersIfInteresting - Inspect the specified instruction. If it is a @@ -194,18 +82,10 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { // Get the symbolic expression for this instruction. const SCEV *ISE = SE->getSCEV(I); - if (isa<SCEVCouldNotCompute>(ISE)) return false; - // Get the start and stride for this expression. - Loop *UseLoop = LI->getLoopFor(I->getParent()); - const SCEV *Start = SE->getIntegerSCEV(0, ISE->getType()); - const SCEV *Stride = Start; - - if (!getSCEVStartAndStride(ISE, L, UseLoop, Start, Stride, SE, DT)) - return false; // Non-reducible symbolic expression, bail out. - - // Keep things simple. Don't touch loop-variant strides. - if (!Stride->isLoopInvariant(L) && L->contains(I)) + // If we've come to an uninteresting expression, stop the traversal and + // call this a user. + if (!isInteresting(ISE, I, L)) return false; SmallPtrSet<Instruction *, 4> UniqueUsers; @@ -241,27 +121,22 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { } if (AddUserToIVUsers) { - // Okay, we found a user that we cannot reduce. Analyze the instruction - // and decide what to do with it. If we are a use inside of the loop, use - // the value before incrementation, otherwise use it after incrementation. - if (IVUseShouldUsePostIncValue(User, I, L, DT)) { - // The value used will be incremented by the stride more than we are - // expecting, so subtract this off. - const SCEV *NewStart = SE->getMinusSCEV(Start, Stride); - IVUses.push_back(new IVStrideUse(this, Stride, NewStart, User, I)); - IVUses.back().setIsUseOfPostIncrementedValue(true); - DEBUG(dbgs() << " USING POSTINC SCEV, START=" << *NewStart<< "\n"); - } else { - IVUses.push_back(new IVStrideUse(this, Stride, Start, User, I)); - } + // Okay, we found a user that we cannot reduce. + IVUses.push_back(new IVStrideUse(this, User, I)); + IVStrideUse &NewUse = IVUses.back(); + // Transform the expression into a normalized form. + ISE = TransformForPostIncUse(NormalizeAutodetect, + ISE, User, I, + NewUse.PostIncLoops, + *SE, *DT); + DEBUG(dbgs() << " NORMALIZED TO: " << *ISE << '\n'); } } return true; } -IVStrideUse &IVUsers::AddUser(const SCEV *Stride, const SCEV *Offset, - Instruction *User, Value *Operand) { - IVUses.push_back(new IVStrideUse(this, Stride, Offset, User, Operand)); +IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) { + IVUses.push_back(new IVStrideUse(this, User, Operand)); return IVUses.back(); } @@ -287,40 +162,11 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { // them by stride. Start by finding all of the PHI nodes in the header for // this loop. If they are induction variables, inspect their uses. for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) - AddUsersIfInteresting(I); + (void)AddUsersIfInteresting(I); return false; } -/// getReplacementExpr - Return a SCEV expression which computes the -/// value of the OperandValToReplace of the given IVStrideUse. -const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &U) const { - // Start with zero. - const SCEV *RetVal = SE->getIntegerSCEV(0, U.getStride()->getType()); - // Create the basic add recurrence. - RetVal = SE->getAddRecExpr(RetVal, U.getStride(), L); - // Add the offset in a separate step, because it may be loop-variant. - RetVal = SE->getAddExpr(RetVal, U.getOffset()); - // For uses of post-incremented values, add an extra stride to compute - // the actual replacement value. - if (U.isUseOfPostIncrementedValue()) - RetVal = SE->getAddExpr(RetVal, U.getStride()); - return RetVal; -} - -/// getCanonicalExpr - Return a SCEV expression which computes the -/// value of the SCEV of the given IVStrideUse, ignoring the -/// isUseOfPostIncrementedValue flag. -const SCEV *IVUsers::getCanonicalExpr(const IVStrideUse &U) const { - // Start with zero. - const SCEV *RetVal = SE->getIntegerSCEV(0, U.getStride()->getType()); - // Create the basic add recurrence. - RetVal = SE->getAddRecExpr(RetVal, U.getStride(), L); - // Add the offset in a separate step, because it may be loop-variant. - RetVal = SE->getAddExpr(RetVal, U.getOffset()); - return RetVal; -} - void IVUsers::print(raw_ostream &OS, const Module *M) const { OS << "IV Users for loop "; WriteAsOperand(OS, L->getHeader(), false); @@ -337,10 +183,14 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const { E = IVUses.end(); UI != E; ++UI) { OS << " "; WriteAsOperand(OS, UI->getOperandValToReplace(), false); - OS << " = " - << *getReplacementExpr(*UI); - if (UI->isUseOfPostIncrementedValue()) - OS << " (post-inc)"; + OS << " = " << *getReplacementExpr(*UI); + for (PostIncLoopSet::const_iterator + I = UI->PostIncLoops.begin(), + E = UI->PostIncLoops.end(); I != E; ++I) { + OS << " (post-inc with loop "; + WriteAsOperand(OS, (*I)->getHeader(), false); + OS << ")"; + } OS << " in "; UI->getUser()->print(OS, &Annotator); OS << '\n'; @@ -356,6 +206,49 @@ void IVUsers::releaseMemory() { IVUses.clear(); } +/// getReplacementExpr - Return a SCEV expression which computes the +/// value of the OperandValToReplace. +const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &IU) const { + return SE->getSCEV(IU.getOperandValToReplace()); +} + +/// getExpr - Return the expression for the use. +const SCEV *IVUsers::getExpr(const IVStrideUse &IU) const { + return + TransformForPostIncUse(Normalize, getReplacementExpr(IU), + IU.getUser(), IU.getOperandValToReplace(), + const_cast<PostIncLoopSet &>(IU.getPostIncLoops()), + *SE, *DT); +} + +static const SCEVAddRecExpr *findAddRecForLoop(const SCEV *S, const Loop *L) { + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + if (AR->getLoop() == L) + return AR; + return findAddRecForLoop(AR->getStart(), L); + } + + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); + I != E; ++I) + if (const SCEVAddRecExpr *AR = findAddRecForLoop(*I, L)) + return AR; + return 0; + } + + return 0; +} + +const SCEV *IVUsers::getStride(const IVStrideUse &IU, const Loop *L) const { + if (const SCEVAddRecExpr *AR = findAddRecForLoop(getExpr(IU), L)) + return AR->getStepRecurrence(*SE); + return 0; +} + +void IVStrideUse::transformToPostInc(const Loop *L) { + PostIncLoops.insert(L); +} + void IVStrideUse::deleted() { // Remove this user from the list. Parent->IVUses.erase(this); diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index c599e90..6271371 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -24,28 +24,29 @@ using namespace llvm; unsigned InlineCostAnalyzer::FunctionInfo:: CountCodeReductionForConstant(Value *V) { unsigned Reduction = 0; - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) - if (isa<BranchInst>(*UI) || isa<SwitchInst>(*UI)) { + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ + User *U = *UI; + if (isa<BranchInst>(U) || isa<SwitchInst>(U)) { // We will be able to eliminate all but one of the successors. - const TerminatorInst &TI = cast<TerminatorInst>(**UI); + const TerminatorInst &TI = cast<TerminatorInst>(*U); const unsigned NumSucc = TI.getNumSuccessors(); unsigned Instrs = 0; for (unsigned I = 0; I != NumSucc; ++I) Instrs += Metrics.NumBBInsts[TI.getSuccessor(I)]; // We don't know which blocks will be eliminated, so use the average size. Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc; - } else if (CallInst *CI = dyn_cast<CallInst>(*UI)) { + } else if (CallInst *CI = dyn_cast<CallInst>(U)) { // Turning an indirect call into a direct call is a BIG win if (CI->getCalledValue() == V) Reduction += InlineConstants::IndirectCallBonus; - } else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) { + } else if (InvokeInst *II = dyn_cast<InvokeInst>(U)) { // Turning an indirect call into a direct call is a BIG win if (II->getCalledValue() == V) Reduction += InlineConstants::IndirectCallBonus; } else { // Figure out if this instruction will be removed due to simple constant // propagation. - Instruction &Inst = cast<Instruction>(**UI); + Instruction &Inst = cast<Instruction>(*U); // We can't constant propagate instructions which have effects or // read memory. @@ -74,7 +75,7 @@ CountCodeReductionForConstant(Value *V) { Reduction += CountCodeReductionForConstant(&Inst); } } - + } return Reduction; } @@ -107,10 +108,10 @@ unsigned InlineCostAnalyzer::FunctionInfo:: return Reduction; } -// callIsSmall - If a call is likely to lower to a single target instruction, or -// is otherwise deemed small return true. -// TODO: Perhaps calls like memcpy, strcpy, etc? -static bool callIsSmall(const Function *F) { +/// callIsSmall - If a call is likely to lower to a single target instruction, +/// or is otherwise deemed small return true. +/// TODO: Perhaps calls like memcpy, strcpy, etc? +bool llvm::callIsSmall(const Function *F) { if (!F) return false; if (F->hasLocalLinkage()) return false; @@ -158,10 +159,18 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) { // it. This is a hack because we depend on the user marking their local // variables as volatile if they are live across a setjmp call, and they // probably won't do this in callers. - if (Function *F = CS.getCalledFunction()) + if (Function *F = CS.getCalledFunction()) { if (F->isDeclaration() && (F->getName() == "setjmp" || F->getName() == "_setjmp")) NeverInline = true; + + // If this call is to function itself, then the function is recursive. + // Inlining it into other functions is a bad idea, because this is + // basically just a form of loop peeling, and our metrics aren't useful + // for that case. + if (F == BB->getParent()) + NeverInline = true; + } if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction())) { // Each argument to a call takes on average one instruction to set up. @@ -249,10 +258,16 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) { // function call or not. // InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, - SmallPtrSet<const Function *, 16> &NeverInline) { + SmallPtrSet<const Function*, 16> &NeverInline) { + return getInlineCost(CS, CS.getCalledFunction(), NeverInline); +} + +InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, + Function *Callee, + SmallPtrSet<const Function*, 16> &NeverInline) { Instruction *TheCall = CS.getInstruction(); - Function *Callee = CS.getCalledFunction(); Function *Caller = TheCall->getParent()->getParent(); + bool isDirectCall = CS.getCalledFunction() == Callee; // Don't inline functions which can be redefined at link-time to mean // something else. Don't inline functions marked noinline or call sites @@ -267,11 +282,11 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, // be inlined. This value may go negative. // int InlineCost = 0; - + // If there is only one call of the function, and it has internal linkage, // make it almost guaranteed to be inlined. // - if (Callee->hasLocalLinkage() && Callee->hasOneUse()) + if (Callee->hasLocalLinkage() && Callee->hasOneUse() && isDirectCall) InlineCost += InlineConstants::LastCallToStaticBonus; // If this function uses the coldcc calling convention, prefer not to inline @@ -288,31 +303,36 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, } else if (isa<UnreachableInst>(++BasicBlock::iterator(TheCall))) InlineCost += InlineConstants::NoreturnPenalty; - // Get information about the callee... - FunctionInfo &CalleeFI = CachedFunctionInfo[Callee]; + // Get information about the callee. + FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; // If we haven't calculated this information yet, do so now. - if (CalleeFI.Metrics.NumBlocks == 0) - CalleeFI.analyzeFunction(Callee); + if (CalleeFI->Metrics.NumBlocks == 0) + CalleeFI->analyzeFunction(Callee); // If we should never inline this, return a huge cost. - if (CalleeFI.Metrics.NeverInline) + if (CalleeFI->Metrics.NeverInline) return InlineCost::getNever(); - // FIXME: It would be nice to kill off CalleeFI.NeverInline. Then we + // FIXME: It would be nice to kill off CalleeFI->NeverInline. Then we // could move this up and avoid computing the FunctionInfo for // things we are going to just return always inline for. This // requires handling setjmp somewhere else, however. if (!Callee->isDeclaration() && Callee->hasFnAttr(Attribute::AlwaysInline)) return InlineCost::getAlways(); - if (CalleeFI.Metrics.usesDynamicAlloca) { - // Get infomation about the caller... + if (CalleeFI->Metrics.usesDynamicAlloca) { + // Get infomation about the caller. FunctionInfo &CallerFI = CachedFunctionInfo[Caller]; // If we haven't calculated this information yet, do so now. - if (CallerFI.Metrics.NumBlocks == 0) + if (CallerFI.Metrics.NumBlocks == 0) { CallerFI.analyzeFunction(Caller); + + // Recompute the CalleeFI pointer, getting Caller could have invalidated + // it. + CalleeFI = &CachedFunctionInfo[Callee]; + } // Don't inline a callee with dynamic alloca into a caller without them. // Functions containing dynamic alloca's are inefficient in various ways; @@ -339,15 +359,15 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, // scalarization), so encourage the inlining of the function. // if (isa<AllocaInst>(I)) { - if (ArgNo < CalleeFI.ArgumentWeights.size()) - InlineCost -= CalleeFI.ArgumentWeights[ArgNo].AllocaWeight; + if (ArgNo < CalleeFI->ArgumentWeights.size()) + InlineCost -= CalleeFI->ArgumentWeights[ArgNo].AllocaWeight; // If this is a constant being passed into the function, use the argument // weights calculated for the callee to determine how much will be folded // away with this information. } else if (isa<Constant>(I)) { - if (ArgNo < CalleeFI.ArgumentWeights.size()) - InlineCost -= CalleeFI.ArgumentWeights[ArgNo].ConstantWeight; + if (ArgNo < CalleeFI->ArgumentWeights.size()) + InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight; } } @@ -355,10 +375,10 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, // likely to be inlined, look at factors that make us not want to inline it. // Calls usually take a long time, so they make the inlining gain smaller. - InlineCost += CalleeFI.Metrics.NumCalls * InlineConstants::CallPenalty; + InlineCost += CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty; // Look at the size of the callee. Each instruction counts as 5. - InlineCost += CalleeFI.Metrics.NumInsts*InlineConstants::InstrCost; + InlineCost += CalleeFI->Metrics.NumInsts*InlineConstants::InstrCost; return llvm::InlineCost::get(InlineCost); } @@ -368,7 +388,7 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) { Function *Callee = CS.getCalledFunction(); - // Get information about the callee... + // Get information about the callee. FunctionInfo &CalleeFI = CachedFunctionInfo[Callee]; // If we haven't calculated this information yet, do so now. @@ -392,41 +412,49 @@ float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) { /// growCachedCostInfo - update the cached cost info for Caller after Callee has /// been inlined. void -InlineCostAnalyzer::growCachedCostInfo(Function* Caller, Function* Callee) { - FunctionInfo &CallerFI = CachedFunctionInfo[Caller]; +InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) { + CodeMetrics &CallerMetrics = CachedFunctionInfo[Caller].Metrics; // For small functions we prefer to recalculate the cost for better accuracy. - if (CallerFI.Metrics.NumBlocks < 10 || CallerFI.Metrics.NumInsts < 1000) { + if (CallerMetrics.NumBlocks < 10 || CallerMetrics.NumInsts < 1000) { resetCachedCostInfo(Caller); return; } // For large functions, we can save a lot of computation time by skipping // recalculations. - if (CallerFI.Metrics.NumCalls > 0) - --CallerFI.Metrics.NumCalls; - - if (Callee) { - FunctionInfo &CalleeFI = CachedFunctionInfo[Callee]; - if (!CalleeFI.Metrics.NumBlocks) { - resetCachedCostInfo(Caller); - return; - } - CallerFI.Metrics.NeverInline |= CalleeFI.Metrics.NeverInline; - CallerFI.Metrics.usesDynamicAlloca |= CalleeFI.Metrics.usesDynamicAlloca; - - CallerFI.Metrics.NumInsts += CalleeFI.Metrics.NumInsts; - CallerFI.Metrics.NumBlocks += CalleeFI.Metrics.NumBlocks; - CallerFI.Metrics.NumCalls += CalleeFI.Metrics.NumCalls; - CallerFI.Metrics.NumVectorInsts += CalleeFI.Metrics.NumVectorInsts; - CallerFI.Metrics.NumRets += CalleeFI.Metrics.NumRets; - - // analyzeBasicBlock counts each function argument as an inst. - if (CallerFI.Metrics.NumInsts >= Callee->arg_size()) - CallerFI.Metrics.NumInsts -= Callee->arg_size(); - else - CallerFI.Metrics.NumInsts = 0; + if (CallerMetrics.NumCalls > 0) + --CallerMetrics.NumCalls; + + if (Callee == 0) return; + + CodeMetrics &CalleeMetrics = CachedFunctionInfo[Callee].Metrics; + + // If we don't have metrics for the callee, don't recalculate them just to + // update an approximation in the caller. Instead, just recalculate the + // caller info from scratch. + if (CalleeMetrics.NumBlocks == 0) { + resetCachedCostInfo(Caller); + return; } + + // Since CalleeMetrics were already calculated, we know that the CallerMetrics + // reference isn't invalidated: both were in the DenseMap. + CallerMetrics.NeverInline |= CalleeMetrics.NeverInline; + CallerMetrics.usesDynamicAlloca |= CalleeMetrics.usesDynamicAlloca; + + CallerMetrics.NumInsts += CalleeMetrics.NumInsts; + CallerMetrics.NumBlocks += CalleeMetrics.NumBlocks; + CallerMetrics.NumCalls += CalleeMetrics.NumCalls; + CallerMetrics.NumVectorInsts += CalleeMetrics.NumVectorInsts; + CallerMetrics.NumRets += CalleeMetrics.NumRets; + + // analyzeBasicBlock counts each function argument as an inst. + if (CallerMetrics.NumInsts >= Callee->arg_size()) + CallerMetrics.NumInsts -= Callee->arg_size(); + else + CallerMetrics.NumInsts = 0; + // We are not updating the argumentweights. We have already determined that // Caller is a fairly large function, so we accept the loss of precision. } diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 8288e96..dbefc2d 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -314,6 +314,35 @@ Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, return 0; } +/// SimplifySelectInst - Given operands for a SelectInst, see if we can fold +/// the result. If not, this returns null. +Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal, + const TargetData *TD) { + // select true, X, Y -> X + // select false, X, Y -> Y + if (ConstantInt *CB = dyn_cast<ConstantInt>(CondVal)) + return CB->getZExtValue() ? TrueVal : FalseVal; + + // select C, X, X -> X + if (TrueVal == FalseVal) + return TrueVal; + + if (isa<UndefValue>(TrueVal)) // select C, undef, X -> X + return FalseVal; + if (isa<UndefValue>(FalseVal)) // select C, X, undef -> X + return TrueVal; + if (isa<UndefValue>(CondVal)) { // select undef, X, Y -> X or Y + if (isa<Constant>(TrueVal)) + return TrueVal; + return FalseVal; + } + + + + return 0; +} + + /// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can /// fold the result. If not, this returns null. Value *llvm::SimplifyGEPInst(Value *const *Ops, unsigned NumOps, @@ -391,6 +420,9 @@ Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD) { case Instruction::FCmp: return SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(), I->getOperand(0), I->getOperand(1), TD); + case Instruction::Select: + return SimplifySelectInst(I->getOperand(0), I->getOperand(1), + I->getOperand(2), TD); case Instruction::GetElementPtr: { SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end()); return SimplifyGEPInst(&Ops[0], Ops.size(), TD); diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp new file mode 100644 index 0000000..25d4f95 --- /dev/null +++ b/lib/Analysis/Lint.cpp @@ -0,0 +1,495 @@ +//===-- Lint.cpp - Check for common errors in LLVM IR ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass statically checks for common and easily-identified constructs +// which produce undefined or likely unintended behavior in LLVM IR. +// +// It is not a guarantee of correctness, in two ways. First, it isn't +// comprehensive. There are checks which could be done statically which are +// not yet implemented. Some of these are indicated by TODO comments, but +// those aren't comprehensive either. Second, many conditions cannot be +// checked statically. This pass does no dynamic instrumentation, so it +// can't check for all possible problems. +// +// Another limitation is that it assumes all code will be executed. A store +// through a null pointer in a basic block which is never reached is harmless, +// but this pass will warn about it anyway. +// +// Optimization passes may make conditions that this pass checks for more or +// less obvious. If an optimization pass appears to be introducing a warning, +// it may be that the optimization pass is merely exposing an existing +// condition in the code. +// +// This code may be run before instcombine. In many cases, instcombine checks +// for the same kinds of things and turns instructions with undefined behavior +// into unreachable (or equivalent). Because of this, this pass makes some +// effort to look through bitcasts and so on. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/Lint.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Pass.h" +#include "llvm/PassManager.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Function.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/InstVisitor.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/STLExtras.h" +using namespace llvm; + +namespace { + namespace MemRef { + static unsigned Read = 1; + static unsigned Write = 2; + static unsigned Callee = 4; + static unsigned Branchee = 8; + } + + class Lint : public FunctionPass, public InstVisitor<Lint> { + friend class InstVisitor<Lint>; + + void visitFunction(Function &F); + + void visitCallSite(CallSite CS); + void visitMemoryReference(Instruction &I, Value *Ptr, unsigned Align, + const Type *Ty, unsigned Flags); + + void visitCallInst(CallInst &I); + void visitInvokeInst(InvokeInst &I); + void visitReturnInst(ReturnInst &I); + void visitLoadInst(LoadInst &I); + void visitStoreInst(StoreInst &I); + void visitXor(BinaryOperator &I); + void visitSub(BinaryOperator &I); + void visitLShr(BinaryOperator &I); + void visitAShr(BinaryOperator &I); + void visitShl(BinaryOperator &I); + void visitSDiv(BinaryOperator &I); + void visitUDiv(BinaryOperator &I); + void visitSRem(BinaryOperator &I); + void visitURem(BinaryOperator &I); + void visitAllocaInst(AllocaInst &I); + void visitVAArgInst(VAArgInst &I); + void visitIndirectBrInst(IndirectBrInst &I); + void visitExtractElementInst(ExtractElementInst &I); + void visitInsertElementInst(InsertElementInst &I); + void visitUnreachableInst(UnreachableInst &I); + + public: + Module *Mod; + AliasAnalysis *AA; + TargetData *TD; + + std::string Messages; + raw_string_ostream MessagesStr; + + static char ID; // Pass identification, replacement for typeid + Lint() : FunctionPass(&ID), MessagesStr(Messages) {} + + virtual bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<AliasAnalysis>(); + } + virtual void print(raw_ostream &O, const Module *M) const {} + + void WriteValue(const Value *V) { + if (!V) return; + if (isa<Instruction>(V)) { + MessagesStr << *V << '\n'; + } else { + WriteAsOperand(MessagesStr, V, true, Mod); + MessagesStr << '\n'; + } + } + + void WriteType(const Type *T) { + if (!T) return; + MessagesStr << ' '; + WriteTypeSymbolic(MessagesStr, T, Mod); + } + + // CheckFailed - A check failed, so print out the condition and the message + // that failed. This provides a nice place to put a breakpoint if you want + // to see why something is not correct. + void CheckFailed(const Twine &Message, + const Value *V1 = 0, const Value *V2 = 0, + const Value *V3 = 0, const Value *V4 = 0) { + MessagesStr << Message.str() << "\n"; + WriteValue(V1); + WriteValue(V2); + WriteValue(V3); + WriteValue(V4); + } + + void CheckFailed(const Twine &Message, const Value *V1, + const Type *T2, const Value *V3 = 0) { + MessagesStr << Message.str() << "\n"; + WriteValue(V1); + WriteType(T2); + WriteValue(V3); + } + + void CheckFailed(const Twine &Message, const Type *T1, + const Type *T2 = 0, const Type *T3 = 0) { + MessagesStr << Message.str() << "\n"; + WriteType(T1); + WriteType(T2); + WriteType(T3); + } + }; +} + +char Lint::ID = 0; +static RegisterPass<Lint> +X("lint", "Statically lint-checks LLVM IR", false, true); + +// Assert - We know that cond should be true, if not print an error message. +#define Assert(C, M) \ + do { if (!(C)) { CheckFailed(M); return; } } while (0) +#define Assert1(C, M, V1) \ + do { if (!(C)) { CheckFailed(M, V1); return; } } while (0) +#define Assert2(C, M, V1, V2) \ + do { if (!(C)) { CheckFailed(M, V1, V2); return; } } while (0) +#define Assert3(C, M, V1, V2, V3) \ + do { if (!(C)) { CheckFailed(M, V1, V2, V3); return; } } while (0) +#define Assert4(C, M, V1, V2, V3, V4) \ + do { if (!(C)) { CheckFailed(M, V1, V2, V3, V4); return; } } while (0) + +// Lint::run - This is the main Analysis entry point for a +// function. +// +bool Lint::runOnFunction(Function &F) { + Mod = F.getParent(); + AA = &getAnalysis<AliasAnalysis>(); + TD = getAnalysisIfAvailable<TargetData>(); + visit(F); + dbgs() << MessagesStr.str(); + return false; +} + +void Lint::visitFunction(Function &F) { + // This isn't undefined behavior, it's just a little unusual, and it's a + // fairly common mistake to neglect to name a function. + Assert1(F.hasName() || F.hasLocalLinkage(), + "Unusual: Unnamed function with non-local linkage", &F); +} + +void Lint::visitCallSite(CallSite CS) { + Instruction &I = *CS.getInstruction(); + Value *Callee = CS.getCalledValue(); + + // TODO: Check function alignment? + visitMemoryReference(I, Callee, 0, 0, MemRef::Callee); + + if (Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) { + Assert1(CS.getCallingConv() == F->getCallingConv(), + "Undefined behavior: Caller and callee calling convention differ", + &I); + + const FunctionType *FT = F->getFunctionType(); + unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin()); + + Assert1(FT->isVarArg() ? + FT->getNumParams() <= NumActualArgs : + FT->getNumParams() == NumActualArgs, + "Undefined behavior: Call argument count mismatches callee " + "argument count", &I); + + // TODO: Check argument types (in case the callee was casted) + + // TODO: Check ABI-significant attributes. + + // TODO: Check noalias attribute. + + // TODO: Check sret attribute. + } + + // TODO: Check the "tail" keyword constraints. + + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) + switch (II->getIntrinsicID()) { + default: break; + + // TODO: Check more intrinsics + + case Intrinsic::memcpy: { + MemCpyInst *MCI = cast<MemCpyInst>(&I); + visitMemoryReference(I, MCI->getSource(), MCI->getAlignment(), 0, + MemRef::Write); + visitMemoryReference(I, MCI->getDest(), MCI->getAlignment(), 0, + MemRef::Read); + + // Check that the memcpy arguments don't overlap. The AliasAnalysis API + // isn't expressive enough for what we really want to do. Known partial + // overlap is not distinguished from the case where nothing is known. + unsigned Size = 0; + if (const ConstantInt *Len = + dyn_cast<ConstantInt>(MCI->getLength()->stripPointerCasts())) + if (Len->getValue().isIntN(32)) + Size = Len->getValue().getZExtValue(); + Assert1(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) != + AliasAnalysis::MustAlias, + "Undefined behavior: memcpy source and destination overlap", &I); + break; + } + case Intrinsic::memmove: { + MemMoveInst *MMI = cast<MemMoveInst>(&I); + visitMemoryReference(I, MMI->getSource(), MMI->getAlignment(), 0, + MemRef::Write); + visitMemoryReference(I, MMI->getDest(), MMI->getAlignment(), 0, + MemRef::Read); + break; + } + case Intrinsic::memset: { + MemSetInst *MSI = cast<MemSetInst>(&I); + visitMemoryReference(I, MSI->getDest(), MSI->getAlignment(), 0, + MemRef::Write); + break; + } + + case Intrinsic::vastart: + Assert1(I.getParent()->getParent()->isVarArg(), + "Undefined behavior: va_start called in a non-varargs function", + &I); + + visitMemoryReference(I, CS.getArgument(0), 0, 0, + MemRef::Read | MemRef::Write); + break; + case Intrinsic::vacopy: + visitMemoryReference(I, CS.getArgument(0), 0, 0, MemRef::Write); + visitMemoryReference(I, CS.getArgument(1), 0, 0, MemRef::Read); + break; + case Intrinsic::vaend: + visitMemoryReference(I, CS.getArgument(0), 0, 0, + MemRef::Read | MemRef::Write); + break; + + case Intrinsic::stackrestore: + visitMemoryReference(I, CS.getArgument(0), 0, 0, + MemRef::Read); + break; + } +} + +void Lint::visitCallInst(CallInst &I) { + return visitCallSite(&I); +} + +void Lint::visitInvokeInst(InvokeInst &I) { + return visitCallSite(&I); +} + +void Lint::visitReturnInst(ReturnInst &I) { + Function *F = I.getParent()->getParent(); + Assert1(!F->doesNotReturn(), + "Unusual: Return statement in function with noreturn attribute", + &I); +} + +// TODO: Add a length argument and check that the reference is in bounds +void Lint::visitMemoryReference(Instruction &I, + Value *Ptr, unsigned Align, const Type *Ty, + unsigned Flags) { + Value *UnderlyingObject = Ptr->getUnderlyingObject(); + Assert1(!isa<ConstantPointerNull>(UnderlyingObject), + "Undefined behavior: Null pointer dereference", &I); + Assert1(!isa<UndefValue>(UnderlyingObject), + "Undefined behavior: Undef pointer dereference", &I); + + if (Flags & MemRef::Write) { + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject)) + Assert1(!GV->isConstant(), + "Undefined behavior: Write to read-only memory", &I); + Assert1(!isa<Function>(UnderlyingObject) && + !isa<BlockAddress>(UnderlyingObject), + "Undefined behavior: Write to text section", &I); + } + if (Flags & MemRef::Read) { + Assert1(!isa<Function>(UnderlyingObject), + "Unusual: Load from function body", &I); + Assert1(!isa<BlockAddress>(UnderlyingObject), + "Undefined behavior: Load from block address", &I); + } + if (Flags & MemRef::Callee) { + Assert1(!isa<BlockAddress>(UnderlyingObject), + "Undefined behavior: Call to block address", &I); + } + if (Flags & MemRef::Branchee) { + Assert1(!isa<Constant>(UnderlyingObject) || + isa<BlockAddress>(UnderlyingObject), + "Undefined behavior: Branch to non-blockaddress", &I); + } + + if (TD) { + if (Align == 0 && Ty) Align = TD->getABITypeAlignment(Ty); + + if (Align != 0) { + unsigned BitWidth = TD->getTypeSizeInBits(Ptr->getType()); + APInt Mask = APInt::getAllOnesValue(BitWidth), + KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + ComputeMaskedBits(Ptr, Mask, KnownZero, KnownOne, TD); + Assert1(!(KnownOne & APInt::getLowBitsSet(BitWidth, Log2_32(Align))), + "Undefined behavior: Memory reference address is misaligned", &I); + } + } +} + +void Lint::visitLoadInst(LoadInst &I) { + visitMemoryReference(I, I.getPointerOperand(), I.getAlignment(), I.getType(), + MemRef::Read); +} + +void Lint::visitStoreInst(StoreInst &I) { + visitMemoryReference(I, I.getPointerOperand(), I.getAlignment(), + I.getOperand(0)->getType(), MemRef::Write); +} + +void Lint::visitXor(BinaryOperator &I) { + Assert1(!isa<UndefValue>(I.getOperand(0)) || + !isa<UndefValue>(I.getOperand(1)), + "Undefined result: xor(undef, undef)", &I); +} + +void Lint::visitSub(BinaryOperator &I) { + Assert1(!isa<UndefValue>(I.getOperand(0)) || + !isa<UndefValue>(I.getOperand(1)), + "Undefined result: sub(undef, undef)", &I); +} + +void Lint::visitLShr(BinaryOperator &I) { + if (ConstantInt *CI = + dyn_cast<ConstantInt>(I.getOperand(1)->stripPointerCasts())) + Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), + "Undefined result: Shift count out of range", &I); +} + +void Lint::visitAShr(BinaryOperator &I) { + if (ConstantInt *CI = + dyn_cast<ConstantInt>(I.getOperand(1)->stripPointerCasts())) + Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), + "Undefined result: Shift count out of range", &I); +} + +void Lint::visitShl(BinaryOperator &I) { + if (ConstantInt *CI = + dyn_cast<ConstantInt>(I.getOperand(1)->stripPointerCasts())) + Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), + "Undefined result: Shift count out of range", &I); +} + +static bool isZero(Value *V, TargetData *TD) { + // Assume undef could be zero. + if (isa<UndefValue>(V)) return true; + + unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth(); + APInt Mask = APInt::getAllOnesValue(BitWidth), + KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD); + return KnownZero.isAllOnesValue(); +} + +void Lint::visitSDiv(BinaryOperator &I) { + Assert1(!isZero(I.getOperand(1), TD), + "Undefined behavior: Division by zero", &I); +} + +void Lint::visitUDiv(BinaryOperator &I) { + Assert1(!isZero(I.getOperand(1), TD), + "Undefined behavior: Division by zero", &I); +} + +void Lint::visitSRem(BinaryOperator &I) { + Assert1(!isZero(I.getOperand(1), TD), + "Undefined behavior: Division by zero", &I); +} + +void Lint::visitURem(BinaryOperator &I) { + Assert1(!isZero(I.getOperand(1), TD), + "Undefined behavior: Division by zero", &I); +} + +void Lint::visitAllocaInst(AllocaInst &I) { + if (isa<ConstantInt>(I.getArraySize())) + // This isn't undefined behavior, it's just an obvious pessimization. + Assert1(&I.getParent()->getParent()->getEntryBlock() == I.getParent(), + "Pessimization: Static alloca outside of entry block", &I); +} + +void Lint::visitVAArgInst(VAArgInst &I) { + visitMemoryReference(I, I.getOperand(0), 0, 0, + MemRef::Read | MemRef::Write); +} + +void Lint::visitIndirectBrInst(IndirectBrInst &I) { + visitMemoryReference(I, I.getAddress(), 0, 0, MemRef::Branchee); +} + +void Lint::visitExtractElementInst(ExtractElementInst &I) { + if (ConstantInt *CI = + dyn_cast<ConstantInt>(I.getIndexOperand()->stripPointerCasts())) + Assert1(CI->getValue().ult(I.getVectorOperandType()->getNumElements()), + "Undefined result: extractelement index out of range", &I); +} + +void Lint::visitInsertElementInst(InsertElementInst &I) { + if (ConstantInt *CI = + dyn_cast<ConstantInt>(I.getOperand(2)->stripPointerCasts())) + Assert1(CI->getValue().ult(I.getType()->getNumElements()), + "Undefined result: insertelement index out of range", &I); +} + +void Lint::visitUnreachableInst(UnreachableInst &I) { + // This isn't undefined behavior, it's merely suspicious. + Assert1(&I == I.getParent()->begin() || + prior(BasicBlock::iterator(&I))->mayHaveSideEffects(), + "Unusual: unreachable immediately preceded by instruction without " + "side effects", &I); +} + +//===----------------------------------------------------------------------===// +// Implement the public interfaces to this file... +//===----------------------------------------------------------------------===// + +FunctionPass *llvm::createLintPass() { + return new Lint(); +} + +/// lintFunction - Check a function for errors, printing messages on stderr. +/// +void llvm::lintFunction(const Function &f) { + Function &F = const_cast<Function&>(f); + assert(!F.isDeclaration() && "Cannot lint external functions"); + + FunctionPassManager FPM(F.getParent()); + Lint *V = new Lint(); + FPM.add(V); + FPM.run(F); +} + +/// lintModule - Check a module for errors, printing messages on stderr. +/// Return true if the module is corrupt. +/// +void llvm::lintModule(const Module &M, std::string *ErrorInfo) { + PassManager PM; + Lint *V = new Lint(); + PM.add(V); + PM.run(const_cast<Module&>(M)); + + if (ErrorInfo) + *ErrorInfo = V->MessagesStr.str(); +} diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index 1001d2b..735e31f 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -29,9 +29,9 @@ using namespace llvm; // Always verify loopinfo if expensive checking is enabled. #ifdef XDEBUG -bool VerifyLoopInfo = true; +static bool VerifyLoopInfo = true; #else -bool VerifyLoopInfo = false; +static bool VerifyLoopInfo = false; #endif static cl::opt<bool,true> VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo), diff --git a/lib/Analysis/PointerTracking.cpp b/lib/Analysis/PointerTracking.cpp index ce7ac89..14df0b7 100644 --- a/lib/Analysis/PointerTracking.cpp +++ b/lib/Analysis/PointerTracking.cpp @@ -183,17 +183,17 @@ enum SolverResult PointerTracking::isLoopGuardedBy(const Loop *L, Predicate Pred, const SCEV *A, const SCEV *B) const { - if (SE->isLoopGuardedByCond(L, Pred, A, B)) + if (SE->isLoopEntryGuardedByCond(L, Pred, A, B)) return AlwaysTrue; Pred = ICmpInst::getSwappedPredicate(Pred); - if (SE->isLoopGuardedByCond(L, Pred, B, A)) + if (SE->isLoopEntryGuardedByCond(L, Pred, B, A)) return AlwaysTrue; Pred = ICmpInst::getInversePredicate(Pred); - if (SE->isLoopGuardedByCond(L, Pred, B, A)) + if (SE->isLoopEntryGuardedByCond(L, Pred, B, A)) return AlwaysFalse; Pred = ICmpInst::getSwappedPredicate(Pred); - if (SE->isLoopGuardedByCond(L, Pred, A, B)) + if (SE->isLoopEntryGuardedByCond(L, Pred, A, B)) return AlwaysTrue; return Unknown; } diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp index c38e050..f0f3a05 100644 --- a/lib/Analysis/PostDominators.cpp +++ b/lib/Analysis/PostDominators.cpp @@ -33,7 +33,6 @@ F("postdomtree", "Post-Dominator Tree Construction", true, true); bool PostDominatorTree::runOnFunction(Function &F) { DT->recalculate(F); - DEBUG(DT->print(dbgs())); return false; } diff --git a/lib/Analysis/README.txt b/lib/Analysis/README.txt index c401090..0e96e4c 100644 --- a/lib/Analysis/README.txt +++ b/lib/Analysis/README.txt @@ -16,3 +16,15 @@ In addition to being much more complicated, it involves i65 arithmetic, which is very inefficient when expanded into code. //===---------------------------------------------------------------------===// + +In formatValue in test/CodeGen/X86/lsr-delayed-fold.ll, + +ScalarEvolution is forming this expression: + +((trunc i64 (-1 * %arg5) to i32) + (trunc i64 %arg5 to i32) + (-1 * (trunc i64 undef to i32))) + +This could be folded to + +(-1 * (trunc i64 undef to i32)) + +//===---------------------------------------------------------------------===// diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 1af271a..6870268 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -188,8 +188,8 @@ const SCEV *ScalarEvolution::getConstant(const APInt& Val) { const SCEV * ScalarEvolution::getConstant(const Type *Ty, uint64_t V, bool isSigned) { - return getConstant( - ConstantInt::get(cast<IntegerType>(Ty), V, isSigned)); + const IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty)); + return getConstant(ConstantInt::get(ITy, V, isSigned)); } const Type *SCEVConstant::getType() const { return V->getType(); } @@ -247,11 +247,13 @@ void SCEVSignExtendExpr::print(raw_ostream &OS) const { } void SCEVCommutativeExpr::print(raw_ostream &OS) const { - assert(NumOperands > 1 && "This plus expr shouldn't exist!"); const char *OpStr = getOperationStr(); - OS << "(" << *Operands[0]; - for (unsigned i = 1, e = NumOperands; i != e; ++i) - OS << OpStr << *Operands[i]; + OS << "("; + for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) { + OS << **I; + if (next(I) != E) + OS << OpStr; + } OS << ")"; } @@ -759,7 +761,7 @@ static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K, CalculationBits); const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy); for (unsigned i = 1; i != K; ++i) { - const SCEV *S = SE.getMinusSCEV(It, SE.getIntegerSCEV(i, It->getType())); + const SCEV *S = SE.getMinusSCEV(It, SE.getConstant(It->getType(), i)); Dividend = SE.getMulExpr(Dividend, SE.getTruncateOrZeroExtend(S, CalculationTy)); } @@ -955,7 +957,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, const SCEV *N = getConstant(APInt::getMinValue(BitWidth) - getUnsignedRange(Step).getUnsignedMax()); if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) || - (isLoopGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) && + (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) && isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR->getPostIncExpr(*this), N))) // Return the expression with the addrec on the outside. @@ -965,8 +967,8 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, } else if (isKnownNegative(Step)) { const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) - getSignedRange(Step).getSignedMin()); - if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) && - (isLoopGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) || + if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) || + (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) && isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR->getPostIncExpr(*this), N))) // Return the expression with the addrec on the outside. @@ -1090,7 +1092,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, const SCEV *N = getConstant(APInt::getSignedMinValue(BitWidth) - getSignedRange(Step).getSignedMax()); if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT, AR, N) || - (isLoopGuardedByCond(L, ICmpInst::ICMP_SLT, Start, N) && + (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, Start, N) && isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT, AR->getPostIncExpr(*this), N))) // Return the expression with the addrec on the outside. @@ -1101,7 +1103,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, const SCEV *N = getConstant(APInt::getSignedMaxValue(BitWidth) - getSignedRange(Step).getSignedMin()); if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT, AR, N) || - (isLoopGuardedByCond(L, ICmpInst::ICMP_SGT, Start, N) && + (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGT, Start, N) && isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT, AR->getPostIncExpr(*this), N))) // Return the expression with the addrec on the outside. @@ -1237,7 +1239,7 @@ CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M, } } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) { // Pull a buried constant out to the outside. - if (Scale != 1 || AccumulatedConstant != 0 || C->isZero()) + if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero()) Interesting = true; AccumulatedConstant += Scale * C->getValue()->getValue(); } else { @@ -1308,13 +1310,13 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, } // If we are left with a constant zero being added, strip it off. - if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) { + if (LHSC->getValue()->isZero()) { Ops.erase(Ops.begin()); --Idx; } - } - if (Ops.size() == 1) return Ops[0]; + if (Ops.size() == 1) return Ops[0]; + } // Okay, check to see if the same value occurs in the operand list twice. If // so, merge them together into an multiply expression. Since we sorted the @@ -1324,7 +1326,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, if (Ops[i] == Ops[i+1]) { // X + Y + Y --> X + Y*2 // Found a match, merge the two values into a multiply, and add any // remaining values to the result. - const SCEV *Two = getIntegerSCEV(2, Ty); + const SCEV *Two = getConstant(Ty, 2); const SCEV *Mul = getMulExpr(Ops[i], Two); if (Ops.size() == 2) return Mul; @@ -1353,9 +1355,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, } LargeOps.push_back(T->getOperand()); } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) { - // This could be either sign or zero extension, but sign extension - // is much more likely to be foldable here. - LargeOps.push_back(getSignExtendExpr(C, SrcType)); + LargeOps.push_back(getAnyExtendExpr(C, SrcType)); } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) { SmallVector<const SCEV *, 8> LargeMulOps; for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) { @@ -1368,9 +1368,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, LargeMulOps.push_back(T->getOperand()); } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(j))) { - // This could be either sign or zero extension, but sign extension - // is much more likely to be foldable here. - LargeMulOps.push_back(getSignExtendExpr(C, SrcType)); + LargeMulOps.push_back(getAnyExtendExpr(C, SrcType)); } else { Ok = false; break; @@ -1445,7 +1443,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, Ops.push_back(getMulExpr(getConstant(I->first), getAddExpr(I->second))); if (Ops.empty()) - return getIntegerSCEV(0, Ty); + return getConstant(Ty, 0); if (Ops.size() == 1) return Ops[0]; return getAddExpr(Ops); @@ -1470,7 +1468,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, MulOps.erase(MulOps.begin()+MulOp); InnerMul = getMulExpr(MulOps); } - const SCEV *One = getIntegerSCEV(1, Ty); + const SCEV *One = getConstant(Ty, 1); const SCEV *AddOne = getAddExpr(InnerMul, One); const SCEV *OuterMul = getMulExpr(AddOne, Ops[AddOp]); if (Ops.size() == 2) return OuterMul; @@ -1534,8 +1532,9 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, // they are loop invariant w.r.t. the recurrence. SmallVector<const SCEV *, 8> LIOps; const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]); + const Loop *AddRecLoop = AddRec->getLoop(); for (unsigned i = 0, e = Ops.size(); i != e; ++i) - if (Ops[i]->isLoopInvariant(AddRec->getLoop())) { + if (Ops[i]->isLoopInvariant(AddRecLoop)) { LIOps.push_back(Ops[i]); Ops.erase(Ops.begin()+i); --i; --e; @@ -1552,7 +1551,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, // It's tempting to propagate NUW/NSW flags here, but nuw/nsw addition // is not associative so this isn't necessarily safe. - const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRec->getLoop()); + const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop); // If all of the other operands were loop invariant, we are done. if (Ops.size() == 1) return NewRec; @@ -1573,7 +1572,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);++OtherIdx) if (OtherIdx != Idx) { const SCEVAddRecExpr *OtherAddRec = cast<SCEVAddRecExpr>(Ops[OtherIdx]); - if (AddRec->getLoop() == OtherAddRec->getLoop()) { + if (AddRecLoop == OtherAddRec->getLoop()) { // Other + {A,+,B} + {C,+,D} --> Other + {A+C,+,B+D} SmallVector<const SCEV *, 4> NewOps(AddRec->op_begin(), AddRec->op_end()); @@ -1585,7 +1584,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, } NewOps[i] = getAddExpr(NewOps[i], OtherAddRec->getOperand(i)); } - const SCEV *NewAddRec = getAddRecExpr(NewOps, AddRec->getLoop()); + const SCEV *NewAddRec = getAddRecExpr(NewOps, AddRecLoop); if (Ops.size() == 2) return NewAddRec; @@ -1697,15 +1696,15 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, return getAddExpr(NewOps); } } + + if (Ops.size() == 1) + return Ops[0]; } // Skip over the add expression until we get to a multiply. while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr) ++Idx; - if (Ops.size() == 1) - return Ops[0]; - // If there are mul operands inline them all into this expression. if (Idx < Ops.size()) { bool DeletedMul = false; @@ -1843,77 +1842,81 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) { if (RHSC->getValue()->equalsInt(1)) return LHS; // X udiv 1 --> x - if (RHSC->isZero()) - return getIntegerSCEV(0, LHS->getType()); // value is undefined - - // Determine if the division can be folded into the operands of - // its operands. - // TODO: Generalize this to non-constants by using known-bits information. - const Type *Ty = LHS->getType(); - unsigned LZ = RHSC->getValue()->getValue().countLeadingZeros(); - unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ; - // For non-power-of-two values, effectively round the value up to the - // nearest power of two. - if (!RHSC->getValue()->getValue().isPowerOf2()) - ++MaxShiftAmt; - const IntegerType *ExtTy = - IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt); - // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded. - if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) - if (const SCEVConstant *Step = - dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this))) - if (!Step->getValue()->getValue() - .urem(RHSC->getValue()->getValue()) && - getZeroExtendExpr(AR, ExtTy) == - getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy), - getZeroExtendExpr(Step, ExtTy), - AR->getLoop())) { - SmallVector<const SCEV *, 4> Operands; - for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i) - Operands.push_back(getUDivExpr(AR->getOperand(i), RHS)); - return getAddRecExpr(Operands, AR->getLoop()); - } - // (A*B)/C --> A*(B/C) if safe and B/C can be folded. - if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) { - SmallVector<const SCEV *, 4> Operands; - for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) - Operands.push_back(getZeroExtendExpr(M->getOperand(i), ExtTy)); - if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands)) - // Find an operand that's safely divisible. - for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { - const SCEV *Op = M->getOperand(i); - const SCEV *Div = getUDivExpr(Op, RHSC); - if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) { - Operands = SmallVector<const SCEV *, 4>(M->op_begin(), M->op_end()); - Operands[i] = Div; - return getMulExpr(Operands); + // If the denominator is zero, the result of the udiv is undefined. Don't + // try to analyze it, because the resolution chosen here may differ from + // the resolution chosen in other parts of the compiler. + if (!RHSC->getValue()->isZero()) { + // Determine if the division can be folded into the operands of + // its operands. + // TODO: Generalize this to non-constants by using known-bits information. + const Type *Ty = LHS->getType(); + unsigned LZ = RHSC->getValue()->getValue().countLeadingZeros(); + unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ; + // For non-power-of-two values, effectively round the value up to the + // nearest power of two. + if (!RHSC->getValue()->getValue().isPowerOf2()) + ++MaxShiftAmt; + const IntegerType *ExtTy = + IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt); + // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded. + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) + if (const SCEVConstant *Step = + dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this))) + if (!Step->getValue()->getValue() + .urem(RHSC->getValue()->getValue()) && + getZeroExtendExpr(AR, ExtTy) == + getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy), + getZeroExtendExpr(Step, ExtTy), + AR->getLoop())) { + SmallVector<const SCEV *, 4> Operands; + for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i) + Operands.push_back(getUDivExpr(AR->getOperand(i), RHS)); + return getAddRecExpr(Operands, AR->getLoop()); } + // (A*B)/C --> A*(B/C) if safe and B/C can be folded. + if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) { + SmallVector<const SCEV *, 4> Operands; + for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) + Operands.push_back(getZeroExtendExpr(M->getOperand(i), ExtTy)); + if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands)) + // Find an operand that's safely divisible. + for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { + const SCEV *Op = M->getOperand(i); + const SCEV *Div = getUDivExpr(Op, RHSC); + if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) { + Operands = SmallVector<const SCEV *, 4>(M->op_begin(), + M->op_end()); + Operands[i] = Div; + return getMulExpr(Operands); + } + } + } + // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded. + if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(LHS)) { + SmallVector<const SCEV *, 4> Operands; + for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) + Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy)); + if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) { + Operands.clear(); + for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) { + const SCEV *Op = getUDivExpr(A->getOperand(i), RHS); + if (isa<SCEVUDivExpr>(Op) || + getMulExpr(Op, RHS) != A->getOperand(i)) + break; + Operands.push_back(Op); + } + if (Operands.size() == A->getNumOperands()) + return getAddExpr(Operands); } - } - // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded. - if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(LHS)) { - SmallVector<const SCEV *, 4> Operands; - for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) - Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy)); - if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) { - Operands.clear(); - for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) { - const SCEV *Op = getUDivExpr(A->getOperand(i), RHS); - if (isa<SCEVUDivExpr>(Op) || getMulExpr(Op, RHS) != A->getOperand(i)) - break; - Operands.push_back(Op); - } - if (Operands.size() == A->getNumOperands()) - return getAddExpr(Operands); } - } - // Fold if both operands are constant. - if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) { - Constant *LHSCV = LHSC->getValue(); - Constant *RHSCV = RHSC->getValue(); - return getConstant(cast<ConstantInt>(ConstantExpr::getUDiv(LHSCV, - RHSCV))); + // Fold if both operands are constant. + if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) { + Constant *LHSCV = LHSC->getValue(); + Constant *RHSCV = RHSC->getValue(); + return getConstant(cast<ConstantInt>(ConstantExpr::getUDiv(LHSCV, + RHSCV))); + } } } @@ -2090,9 +2093,9 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { // maximum-int. return Ops[0]; } - } - if (Ops.size() == 1) return Ops[0]; + if (Ops.size() == 1) return Ops[0]; + } // Find the first SMax while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scSMaxExpr) @@ -2116,7 +2119,13 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { // so, delete one. Since we sorted the list, these values are required to // be adjacent. for (unsigned i = 0, e = Ops.size()-1; i != e; ++i) - if (Ops[i] == Ops[i+1]) { // X smax Y smax Y --> X smax Y + // X smax Y smax Y --> X smax Y + // X smax Y --> X, if X is always greater than Y + if (Ops[i] == Ops[i+1] || + isKnownPredicate(ICmpInst::ICMP_SGE, Ops[i], Ops[i+1])) { + Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2); + --i; --e; + } else if (isKnownPredicate(ICmpInst::ICMP_SLE, Ops[i], Ops[i+1])) { Ops.erase(Ops.begin()+i, Ops.begin()+i+1); --i; --e; } @@ -2189,9 +2198,9 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { // maximum-int. return Ops[0]; } - } - if (Ops.size() == 1) return Ops[0]; + if (Ops.size() == 1) return Ops[0]; + } // Find the first UMax while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scUMaxExpr) @@ -2215,7 +2224,13 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { // so, delete one. Since we sorted the list, these values are required to // be adjacent. for (unsigned i = 0, e = Ops.size()-1; i != e; ++i) - if (Ops[i] == Ops[i+1]) { // X umax Y umax Y --> X umax Y + // X umax Y umax Y --> X umax Y + // X umax Y --> X, if X is always greater than Y + if (Ops[i] == Ops[i+1] || + isKnownPredicate(ICmpInst::ICMP_UGE, Ops[i], Ops[i+1])) { + Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2); + --i; --e; + } else if (isKnownPredicate(ICmpInst::ICMP_ULE, Ops[i], Ops[i+1])) { Ops.erase(Ops.begin()+i, Ops.begin()+i+1); --i; --e; } @@ -2254,6 +2269,13 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS, } const SCEV *ScalarEvolution::getSizeOfExpr(const Type *AllocTy) { + // If we have TargetData, we can bypass creating a target-independent + // constant expression and then folding it back into a ConstantInt. + // This is just a compile-time optimization. + if (TD) + return getConstant(TD->getIntPtrType(getContext()), + TD->getTypeAllocSize(AllocTy)); + Constant *C = ConstantExpr::getSizeOf(AllocTy); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) C = ConstantFoldConstantExpression(CE, TD); @@ -2271,6 +2293,13 @@ const SCEV *ScalarEvolution::getAlignOfExpr(const Type *AllocTy) { const SCEV *ScalarEvolution::getOffsetOfExpr(const StructType *STy, unsigned FieldNo) { + // If we have TargetData, we can bypass creating a target-independent + // constant expression and then folding it back into a ConstantInt. + // This is just a compile-time optimization. + if (TD) + return getConstant(TD->getIntPtrType(getContext()), + TD->getStructLayout(STy)->getElementOffset(FieldNo)); + Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) C = ConstantFoldConstantExpression(CE, TD); @@ -2597,14 +2626,29 @@ ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) { /// a loop header, making it a potential recurrence, or it doesn't. /// const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { - if (PN->getNumIncomingValues() == 2) // The loops have been canonicalized. - if (const Loop *L = LI->getLoopFor(PN->getParent())) - if (L->getHeader() == PN->getParent()) { - // If it lives in the loop header, it has two incoming values, one - // from outside the loop, and one from inside. - unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); - unsigned BackEdge = IncomingEdge^1; - + if (const Loop *L = LI->getLoopFor(PN->getParent())) + if (L->getHeader() == PN->getParent()) { + // The loop may have multiple entrances or multiple exits; we can analyze + // this phi as an addrec if it has a unique entry value and a unique + // backedge value. + Value *BEValueV = 0, *StartValueV = 0; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *V = PN->getIncomingValue(i); + if (L->contains(PN->getIncomingBlock(i))) { + if (!BEValueV) { + BEValueV = V; + } else if (BEValueV != V) { + BEValueV = 0; + break; + } + } else if (!StartValueV) { + StartValueV = V; + } else if (StartValueV != V) { + StartValueV = 0; + break; + } + } + if (BEValueV && StartValueV) { // While we are analyzing this PHI node, handle its value symbolically. const SCEV *SymbolicName = getUnknown(PN); assert(Scalars.find(PN) == Scalars.end() && @@ -2613,7 +2657,6 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { // Using this symbolic name for the PHI, analyze the value coming around // the back-edge. - Value *BEValueV = PN->getIncomingValue(BackEdge); const SCEV *BEValue = getSCEV(BEValueV); // NOTE: If BEValue is loop invariant, we know that the PHI node just @@ -2657,8 +2700,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { HasNSW = true; } - const SCEV *StartVal = - getSCEV(PN->getIncomingValue(IncomingEdge)); + const SCEV *StartVal = getSCEV(StartValueV); const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, HasNUW, HasNSW); @@ -2684,12 +2726,12 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { // Because the other in-value of i (0) fits the evolution of BEValue // i really is an addrec evolution. if (AddRec->getLoop() == L && AddRec->isAffine()) { - const SCEV *StartVal = getSCEV(PN->getIncomingValue(IncomingEdge)); + const SCEV *StartVal = getSCEV(StartValueV); // If StartVal = j.start - j.stride, we can use StartVal as the // initial step of the addrec evolution. if (StartVal == getMinusSCEV(AddRec->getOperand(0), - AddRec->getOperand(1))) { + AddRec->getOperand(1))) { const SCEV *PHISCEV = getAddRecExpr(StartVal, AddRec->getOperand(1), L); @@ -2702,9 +2744,8 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { } } } - - return SymbolicName; } + } // If the PHI has a single incoming value, follow that value, unless the // PHI's incoming blocks are in a different loop, in which case doing so @@ -2737,7 +2778,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { // Don't attempt to analyze GEPs over unsized objects. if (!cast<PointerType>(Base->getType())->getElementType()->isSized()) return getUnknown(GEP); - const SCEV *TotalOffset = getIntegerSCEV(0, IntPtrTy); + const SCEV *TotalOffset = getConstant(IntPtrTy, 0); gep_type_iterator GTI = gep_type_begin(GEP); for (GetElementPtrInst::op_iterator I = next(GEP->op_begin()), E = GEP->op_end(); @@ -2920,9 +2961,9 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) { // initial value. if (AddRec->hasNoUnsignedWrap()) if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart())) - ConservativeResult = - ConstantRange(C->getValue()->getValue(), - APInt(getTypeSizeInBits(C->getType()), 0)); + if (!C->getValue()->isZero()) + ConservativeResult = + ConstantRange(C->getValue()->getValue(), APInt(BitWidth, 0)); // TODO: non-affine addrec if (AddRec->isAffine()) { @@ -2933,14 +2974,26 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) { MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty); const SCEV *Start = AddRec->getStart(); - const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this); + const SCEV *Step = AddRec->getStepRecurrence(*this); - // Check for overflow. - if (!AddRec->hasNoUnsignedWrap()) + ConstantRange StartRange = getUnsignedRange(Start); + ConstantRange StepRange = getSignedRange(Step); + ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount); + ConstantRange EndRange = + StartRange.add(MaxBECountRange.multiply(StepRange)); + + // Check for overflow. This must be done with ConstantRange arithmetic + // because we could be called from within the ScalarEvolution overflow + // checking code. + ConstantRange ExtStartRange = StartRange.zextOrTrunc(BitWidth*2+1); + ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1); + ConstantRange ExtMaxBECountRange = + MaxBECountRange.zextOrTrunc(BitWidth*2+1); + ConstantRange ExtEndRange = EndRange.zextOrTrunc(BitWidth*2+1); + if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) != + ExtEndRange) return ConservativeResult; - ConstantRange StartRange = getUnsignedRange(Start); - ConstantRange EndRange = getUnsignedRange(End); APInt Min = APIntOps::umin(StartRange.getUnsignedMin(), EndRange.getUnsignedMin()); APInt Max = APIntOps::umax(StartRange.getUnsignedMax(), @@ -3064,14 +3117,26 @@ ScalarEvolution::getSignedRange(const SCEV *S) { MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty); const SCEV *Start = AddRec->getStart(); - const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this); + const SCEV *Step = AddRec->getStepRecurrence(*this); - // Check for overflow. - if (!AddRec->hasNoSignedWrap()) + ConstantRange StartRange = getSignedRange(Start); + ConstantRange StepRange = getSignedRange(Step); + ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount); + ConstantRange EndRange = + StartRange.add(MaxBECountRange.multiply(StepRange)); + + // Check for overflow. This must be done with ConstantRange arithmetic + // because we could be called from within the ScalarEvolution overflow + // checking code. + ConstantRange ExtStartRange = StartRange.sextOrTrunc(BitWidth*2+1); + ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1); + ConstantRange ExtMaxBECountRange = + MaxBECountRange.zextOrTrunc(BitWidth*2+1); + ConstantRange ExtEndRange = EndRange.sextOrTrunc(BitWidth*2+1); + if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) != + ExtEndRange) return ConservativeResult; - ConstantRange StartRange = getSignedRange(Start); - ConstantRange EndRange = getSignedRange(End); APInt Min = APIntOps::smin(StartRange.getSignedMin(), EndRange.getSignedMin()); APInt Max = APIntOps::smax(StartRange.getSignedMax(), @@ -3122,9 +3187,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { else if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) return getConstant(CI); else if (isa<ConstantPointerNull>(V)) - return getIntegerSCEV(0, V->getType()); - else if (isa<UndefValue>(V)) - return getIntegerSCEV(0, V->getType()); + return getConstant(V->getType(), 0); else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) return GA->mayBeOverridden() ? getUnknown(V) : getSCEV(GA->getAliasee()); else @@ -3256,8 +3319,16 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // Turn shift left of a constant amount into a multiply. if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) { uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth(); + + // If the shift count is not less than the bitwidth, the result of + // the shift is undefined. Don't try to analyze it, because the + // resolution chosen here may differ from the resolution chosen in + // other parts of the compiler. + if (SA->getValue().uge(BitWidth)) + break; + Constant *X = ConstantInt::get(getContext(), - APInt(BitWidth, 1).shl(SA->getLimitedValue(BitWidth))); + APInt(BitWidth, 1).shl(SA->getZExtValue())); return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X)); } break; @@ -3266,8 +3337,16 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // Turn logical shift right of a constant into a unsigned divide. if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) { uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth(); + + // If the shift count is not less than the bitwidth, the result of + // the shift is undefined. Don't try to analyze it, because the + // resolution chosen here may differ from the resolution chosen in + // other parts of the compiler. + if (SA->getValue().uge(BitWidth)) + break; + Constant *X = ConstantInt::get(getContext(), - APInt(BitWidth, 1).shl(SA->getLimitedValue(BitWidth))); + APInt(BitWidth, 1).shl(SA->getZExtValue())); return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(X)); } break; @@ -3275,19 +3354,26 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { case Instruction::AShr: // For a two-shift sext-inreg, use sext(trunc(x)) as the SCEV expression. if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) - if (Instruction *L = dyn_cast<Instruction>(U->getOperand(0))) + if (Operator *L = dyn_cast<Operator>(U->getOperand(0))) if (L->getOpcode() == Instruction::Shl && L->getOperand(1) == U->getOperand(1)) { - unsigned BitWidth = getTypeSizeInBits(U->getType()); + uint64_t BitWidth = getTypeSizeInBits(U->getType()); + + // If the shift count is not less than the bitwidth, the result of + // the shift is undefined. Don't try to analyze it, because the + // resolution chosen here may differ from the resolution chosen in + // other parts of the compiler. + if (CI->getValue().uge(BitWidth)) + break; + uint64_t Amt = BitWidth - CI->getZExtValue(); if (Amt == BitWidth) return getSCEV(L->getOperand(0)); // shift by zero --> noop - if (Amt > BitWidth) - return getIntegerSCEV(0, U->getType()); // value is undefined return getSignExtendExpr(getTruncateExpr(getSCEV(L->getOperand(0)), - IntegerType::get(getContext(), Amt)), - U->getType()); + IntegerType::get(getContext(), + Amt)), + U->getType()); } break; @@ -3330,10 +3416,22 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // fall through case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: - if (LHS == U->getOperand(1) && RHS == U->getOperand(2)) - return getSMaxExpr(getSCEV(LHS), getSCEV(RHS)); - else if (LHS == U->getOperand(2) && RHS == U->getOperand(1)) - return getSMinExpr(getSCEV(LHS), getSCEV(RHS)); + // a >s b ? a+x : b+x -> smax(a, b)+x + // a >s b ? b+x : a+x -> smin(a, b)+x + if (LHS->getType() == U->getType()) { + const SCEV *LS = getSCEV(LHS); + const SCEV *RS = getSCEV(RHS); + const SCEV *LA = getSCEV(U->getOperand(1)); + const SCEV *RA = getSCEV(U->getOperand(2)); + const SCEV *LDiff = getMinusSCEV(LA, LS); + const SCEV *RDiff = getMinusSCEV(RA, RS); + if (LDiff == RDiff) + return getAddExpr(getSMaxExpr(LS, RS), LDiff); + LDiff = getMinusSCEV(LA, RS); + RDiff = getMinusSCEV(RA, LS); + if (LDiff == RDiff) + return getAddExpr(getSMinExpr(LS, RS), LDiff); + } break; case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: @@ -3341,28 +3439,52 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // fall through case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_UGE: - if (LHS == U->getOperand(1) && RHS == U->getOperand(2)) - return getUMaxExpr(getSCEV(LHS), getSCEV(RHS)); - else if (LHS == U->getOperand(2) && RHS == U->getOperand(1)) - return getUMinExpr(getSCEV(LHS), getSCEV(RHS)); + // a >u b ? a+x : b+x -> umax(a, b)+x + // a >u b ? b+x : a+x -> umin(a, b)+x + if (LHS->getType() == U->getType()) { + const SCEV *LS = getSCEV(LHS); + const SCEV *RS = getSCEV(RHS); + const SCEV *LA = getSCEV(U->getOperand(1)); + const SCEV *RA = getSCEV(U->getOperand(2)); + const SCEV *LDiff = getMinusSCEV(LA, LS); + const SCEV *RDiff = getMinusSCEV(RA, RS); + if (LDiff == RDiff) + return getAddExpr(getUMaxExpr(LS, RS), LDiff); + LDiff = getMinusSCEV(LA, RS); + RDiff = getMinusSCEV(RA, LS); + if (LDiff == RDiff) + return getAddExpr(getUMinExpr(LS, RS), LDiff); + } break; case ICmpInst::ICMP_NE: - // n != 0 ? n : 1 -> umax(n, 1) - if (LHS == U->getOperand(1) && - isa<ConstantInt>(U->getOperand(2)) && - cast<ConstantInt>(U->getOperand(2))->isOne() && + // n != 0 ? n+x : 1+x -> umax(n, 1)+x + if (LHS->getType() == U->getType() && isa<ConstantInt>(RHS) && - cast<ConstantInt>(RHS)->isZero()) - return getUMaxExpr(getSCEV(LHS), getSCEV(U->getOperand(2))); + cast<ConstantInt>(RHS)->isZero()) { + const SCEV *One = getConstant(LHS->getType(), 1); + const SCEV *LS = getSCEV(LHS); + const SCEV *LA = getSCEV(U->getOperand(1)); + const SCEV *RA = getSCEV(U->getOperand(2)); + const SCEV *LDiff = getMinusSCEV(LA, LS); + const SCEV *RDiff = getMinusSCEV(RA, One); + if (LDiff == RDiff) + return getAddExpr(getUMaxExpr(LS, One), LDiff); + } break; case ICmpInst::ICMP_EQ: - // n == 0 ? 1 : n -> umax(n, 1) - if (LHS == U->getOperand(2) && - isa<ConstantInt>(U->getOperand(1)) && - cast<ConstantInt>(U->getOperand(1))->isOne() && + // n == 0 ? 1+x : n+x -> umax(n, 1)+x + if (LHS->getType() == U->getType() && isa<ConstantInt>(RHS) && - cast<ConstantInt>(RHS)->isZero()) - return getUMaxExpr(getSCEV(LHS), getSCEV(U->getOperand(1))); + cast<ConstantInt>(RHS)->isZero()) { + const SCEV *One = getConstant(LHS->getType(), 1); + const SCEV *LS = getSCEV(LHS); + const SCEV *LA = getSCEV(U->getOperand(1)); + const SCEV *RA = getSCEV(U->getOperand(2)); + const SCEV *LDiff = getMinusSCEV(LA, One); + const SCEV *RDiff = getMinusSCEV(RA, LS); + if (LDiff == RDiff) + return getAddExpr(getUMaxExpr(LS, One), LDiff); + } break; default: break; @@ -3739,7 +3861,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L, return getCouldNotCompute(); else // The backedge is never taken. - return getIntegerSCEV(0, CI->getType()); + return getConstant(CI->getType(), 0); } // If it's not an integer or pointer comparison then compute it the hard way. @@ -3786,6 +3908,9 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L, Cond = ICmpInst::getSwappedPredicate(Cond); } + // Simplify the operands before analyzing them. + (void)SimplifyICmpOperands(Cond, LHS, RHS); + // If we have a comparison of a chrec against a constant, try to use value // ranges to answer this query. if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) @@ -4067,7 +4192,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, if (I != ConstantEvolutionLoopExitValue.end()) return I->second; - if (BEs.ugt(APInt(BEs.getBitWidth(),MaxBruteForceIterations))) + if (BEs.ugt(MaxBruteForceIterations)) return ConstantEvolutionLoopExitValue[PN] = 0; // Not going to evaluate it. Constant *&RetVal = ConstantEvolutionLoopExitValue[PN]; @@ -4562,7 +4687,7 @@ ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) { // already. If so, the backedge will execute zero times. if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) { if (!C->getValue()->isNullValue()) - return getIntegerSCEV(0, C->getType()); + return getConstant(C->getType(), 0); return getCouldNotCompute(); // Otherwise it will loop infinitely. } @@ -4573,6 +4698,8 @@ ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) { /// getLoopPredecessor - If the given loop's header has exactly one unique /// predecessor outside the loop, return it. Otherwise return null. +/// This is less strict that the loop "preheader" concept, which requires +/// the predecessor to have only one single successor. /// BasicBlock *ScalarEvolution::getLoopPredecessor(const Loop *L) { BasicBlock *Header = L->getHeader(); @@ -4591,21 +4718,21 @@ BasicBlock *ScalarEvolution::getLoopPredecessor(const Loop *L) { /// successor from which BB is reachable, or null if no such block is /// found. /// -BasicBlock * +std::pair<BasicBlock *, BasicBlock *> ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) { // If the block has a unique predecessor, then there is no path from the // predecessor to the block that does not go through the direct edge // from the predecessor to the block. if (BasicBlock *Pred = BB->getSinglePredecessor()) - return Pred; + return std::make_pair(Pred, BB); // A loop's header is defined to be a block that dominates the loop. // If the header has a unique predecessor outside the loop, it must be // a block that has exactly one successor that can reach the loop. if (Loop *L = LI->getLoopFor(BB)) - return getLoopPredecessor(L); + return std::make_pair(getLoopPredecessor(L), L->getHeader()); - return 0; + return std::pair<BasicBlock *, BasicBlock *>(); } /// HasSameValue - SCEV structural equivalence is usually sufficient for @@ -4631,6 +4758,266 @@ static bool HasSameValue(const SCEV *A, const SCEV *B) { return false; } +/// SimplifyICmpOperands - Simplify LHS and RHS in a comparison with +/// predicate Pred. Return true iff any changes were made. +/// +bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, + const SCEV *&LHS, const SCEV *&RHS) { + bool Changed = false; + + // Canonicalize a constant to the right side. + if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) { + // Check for both operands constant. + if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) { + if (ConstantExpr::getICmp(Pred, + LHSC->getValue(), + RHSC->getValue())->isNullValue()) + goto trivially_false; + else + goto trivially_true; + } + // Otherwise swap the operands to put the constant on the right. + std::swap(LHS, RHS); + Pred = ICmpInst::getSwappedPredicate(Pred); + Changed = true; + } + + // If we're comparing an addrec with a value which is loop-invariant in the + // addrec's loop, put the addrec on the left. Also make a dominance check, + // as both operands could be addrecs loop-invariant in each other's loop. + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS)) { + const Loop *L = AR->getLoop(); + if (LHS->isLoopInvariant(L) && LHS->properlyDominates(L->getHeader(), DT)) { + std::swap(LHS, RHS); + Pred = ICmpInst::getSwappedPredicate(Pred); + Changed = true; + } + } + + // If there's a constant operand, canonicalize comparisons with boundary + // cases, and canonicalize *-or-equal comparisons to regular comparisons. + if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) { + const APInt &RA = RC->getValue()->getValue(); + switch (Pred) { + default: llvm_unreachable("Unexpected ICmpInst::Predicate value!"); + case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_NE: + break; + case ICmpInst::ICMP_UGE: + if ((RA - 1).isMinValue()) { + Pred = ICmpInst::ICMP_NE; + RHS = getConstant(RA - 1); + Changed = true; + break; + } + if (RA.isMaxValue()) { + Pred = ICmpInst::ICMP_EQ; + Changed = true; + break; + } + if (RA.isMinValue()) goto trivially_true; + + Pred = ICmpInst::ICMP_UGT; + RHS = getConstant(RA - 1); + Changed = true; + break; + case ICmpInst::ICMP_ULE: + if ((RA + 1).isMaxValue()) { + Pred = ICmpInst::ICMP_NE; + RHS = getConstant(RA + 1); + Changed = true; + break; + } + if (RA.isMinValue()) { + Pred = ICmpInst::ICMP_EQ; + Changed = true; + break; + } + if (RA.isMaxValue()) goto trivially_true; + + Pred = ICmpInst::ICMP_ULT; + RHS = getConstant(RA + 1); + Changed = true; + break; + case ICmpInst::ICMP_SGE: + if ((RA - 1).isMinSignedValue()) { + Pred = ICmpInst::ICMP_NE; + RHS = getConstant(RA - 1); + Changed = true; + break; + } + if (RA.isMaxSignedValue()) { + Pred = ICmpInst::ICMP_EQ; + Changed = true; + break; + } + if (RA.isMinSignedValue()) goto trivially_true; + + Pred = ICmpInst::ICMP_SGT; + RHS = getConstant(RA - 1); + Changed = true; + break; + case ICmpInst::ICMP_SLE: + if ((RA + 1).isMaxSignedValue()) { + Pred = ICmpInst::ICMP_NE; + RHS = getConstant(RA + 1); + Changed = true; + break; + } + if (RA.isMinSignedValue()) { + Pred = ICmpInst::ICMP_EQ; + Changed = true; + break; + } + if (RA.isMaxSignedValue()) goto trivially_true; + + Pred = ICmpInst::ICMP_SLT; + RHS = getConstant(RA + 1); + Changed = true; + break; + case ICmpInst::ICMP_UGT: + if (RA.isMinValue()) { + Pred = ICmpInst::ICMP_NE; + Changed = true; + break; + } + if ((RA + 1).isMaxValue()) { + Pred = ICmpInst::ICMP_EQ; + RHS = getConstant(RA + 1); + Changed = true; + break; + } + if (RA.isMaxValue()) goto trivially_false; + break; + case ICmpInst::ICMP_ULT: + if (RA.isMaxValue()) { + Pred = ICmpInst::ICMP_NE; + Changed = true; + break; + } + if ((RA - 1).isMinValue()) { + Pred = ICmpInst::ICMP_EQ; + RHS = getConstant(RA - 1); + Changed = true; + break; + } + if (RA.isMinValue()) goto trivially_false; + break; + case ICmpInst::ICMP_SGT: + if (RA.isMinSignedValue()) { + Pred = ICmpInst::ICMP_NE; + Changed = true; + break; + } + if ((RA + 1).isMaxSignedValue()) { + Pred = ICmpInst::ICMP_EQ; + RHS = getConstant(RA + 1); + Changed = true; + break; + } + if (RA.isMaxSignedValue()) goto trivially_false; + break; + case ICmpInst::ICMP_SLT: + if (RA.isMaxSignedValue()) { + Pred = ICmpInst::ICMP_NE; + Changed = true; + break; + } + if ((RA - 1).isMinSignedValue()) { + Pred = ICmpInst::ICMP_EQ; + RHS = getConstant(RA - 1); + Changed = true; + break; + } + if (RA.isMinSignedValue()) goto trivially_false; + break; + } + } + + // Check for obvious equality. + if (HasSameValue(LHS, RHS)) { + if (ICmpInst::isTrueWhenEqual(Pred)) + goto trivially_true; + if (ICmpInst::isFalseWhenEqual(Pred)) + goto trivially_false; + } + + // If possible, canonicalize GE/LE comparisons to GT/LT comparisons, by + // adding or subtracting 1 from one of the operands. + switch (Pred) { + case ICmpInst::ICMP_SLE: + if (!getSignedRange(RHS).getSignedMax().isMaxSignedValue()) { + RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS, + /*HasNUW=*/false, /*HasNSW=*/true); + Pred = ICmpInst::ICMP_SLT; + Changed = true; + } else if (!getSignedRange(LHS).getSignedMin().isMinSignedValue()) { + LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS, + /*HasNUW=*/false, /*HasNSW=*/true); + Pred = ICmpInst::ICMP_SLT; + Changed = true; + } + break; + case ICmpInst::ICMP_SGE: + if (!getSignedRange(RHS).getSignedMin().isMinSignedValue()) { + RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS, + /*HasNUW=*/false, /*HasNSW=*/true); + Pred = ICmpInst::ICMP_SGT; + Changed = true; + } else if (!getSignedRange(LHS).getSignedMax().isMaxSignedValue()) { + LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS, + /*HasNUW=*/false, /*HasNSW=*/true); + Pred = ICmpInst::ICMP_SGT; + Changed = true; + } + break; + case ICmpInst::ICMP_ULE: + if (!getUnsignedRange(RHS).getUnsignedMax().isMaxValue()) { + RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS, + /*HasNUW=*/true, /*HasNSW=*/false); + Pred = ICmpInst::ICMP_ULT; + Changed = true; + } else if (!getUnsignedRange(LHS).getUnsignedMin().isMinValue()) { + LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS, + /*HasNUW=*/true, /*HasNSW=*/false); + Pred = ICmpInst::ICMP_ULT; + Changed = true; + } + break; + case ICmpInst::ICMP_UGE: + if (!getUnsignedRange(RHS).getUnsignedMin().isMinValue()) { + RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS, + /*HasNUW=*/true, /*HasNSW=*/false); + Pred = ICmpInst::ICMP_UGT; + Changed = true; + } else if (!getUnsignedRange(LHS).getUnsignedMax().isMaxValue()) { + LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS, + /*HasNUW=*/true, /*HasNSW=*/false); + Pred = ICmpInst::ICMP_UGT; + Changed = true; + } + break; + default: + break; + } + + // TODO: More simplifications are possible here. + + return Changed; + +trivially_true: + // Return 0 == 0. + LHS = RHS = getConstant(Type::getInt1Ty(getContext()), 0); + Pred = ICmpInst::ICMP_EQ; + return true; + +trivially_false: + // Return 0 != 0. + LHS = RHS = getConstant(Type::getInt1Ty(getContext()), 0); + Pred = ICmpInst::ICMP_NE; + return true; +} + bool ScalarEvolution::isKnownNegative(const SCEV *S) { return getSignedRange(S).getSignedMax().isNegative(); } @@ -4653,10 +5040,36 @@ bool ScalarEvolution::isKnownNonZero(const SCEV *S) { bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { + // Canonicalize the inputs first. + (void)SimplifyICmpOperands(Pred, LHS, RHS); + + // If LHS or RHS is an addrec, check to see if the condition is true in + // every iteration of the loop. + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) + if (isLoopEntryGuardedByCond( + AR->getLoop(), Pred, AR->getStart(), RHS) && + isLoopBackedgeGuardedByCond( + AR->getLoop(), Pred, AR->getPostIncExpr(*this), RHS)) + return true; + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS)) + if (isLoopEntryGuardedByCond( + AR->getLoop(), Pred, LHS, AR->getStart()) && + isLoopBackedgeGuardedByCond( + AR->getLoop(), Pred, LHS, AR->getPostIncExpr(*this))) + return true; + + // Otherwise see what can be done with known constant ranges. + return isKnownPredicateWithRanges(Pred, LHS, RHS); +} +bool +ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS) { if (HasSameValue(LHS, RHS)) return ICmpInst::isTrueWhenEqual(Pred); + // This code is split out from isKnownPredicate because it is called from + // within isLoopEntryGuardedByCond. switch (Pred) { default: llvm_unreachable("Unexpected ICmpInst::Predicate value!"); @@ -4753,35 +5166,33 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L, LoopContinuePredicate->getSuccessor(0) != L->getHeader()); } -/// isLoopGuardedByCond - Test whether entry to the loop is protected +/// isLoopEntryGuardedByCond - Test whether entry to the loop is protected /// by a conditional between LHS and RHS. This is used to help avoid max /// expressions in loop trip counts, and to eliminate casts. bool -ScalarEvolution::isLoopGuardedByCond(const Loop *L, - ICmpInst::Predicate Pred, - const SCEV *LHS, const SCEV *RHS) { +ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, + ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS) { // Interpret a null as meaning no loop, where there is obviously no guard // (interprocedural conditions notwithstanding). if (!L) return false; - BasicBlock *Predecessor = getLoopPredecessor(L); - BasicBlock *PredecessorDest = L->getHeader(); - // Starting at the loop predecessor, climb up the predecessor chain, as long // as there are predecessors that can be found that have unique successors // leading to the original header. - for (; Predecessor; - PredecessorDest = Predecessor, - Predecessor = getPredecessorWithUniqueSuccessorForBB(Predecessor)) { + for (std::pair<BasicBlock *, BasicBlock *> + Pair(getLoopPredecessor(L), L->getHeader()); + Pair.first; + Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) { BranchInst *LoopEntryPredicate = - dyn_cast<BranchInst>(Predecessor->getTerminator()); + dyn_cast<BranchInst>(Pair.first->getTerminator()); if (!LoopEntryPredicate || LoopEntryPredicate->isUnconditional()) continue; if (isImpliedCond(LoopEntryPredicate->getCondition(), Pred, LHS, RHS, - LoopEntryPredicate->getSuccessor(0) != PredecessorDest)) + LoopEntryPredicate->getSuccessor(0) != Pair.second)) return true; } @@ -4845,117 +5256,12 @@ bool ScalarEvolution::isImpliedCond(Value *CondValue, // Canonicalize the query to match the way instcombine will have // canonicalized the comparison. - // First, put a constant operand on the right. - if (isa<SCEVConstant>(LHS)) { - std::swap(LHS, RHS); - Pred = ICmpInst::getSwappedPredicate(Pred); - } - // Then, canonicalize comparisons with boundary cases. - if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) { - const APInt &RA = RC->getValue()->getValue(); - switch (Pred) { - default: llvm_unreachable("Unexpected ICmpInst::Predicate value!"); - case ICmpInst::ICMP_EQ: - case ICmpInst::ICMP_NE: - break; - case ICmpInst::ICMP_UGE: - if ((RA - 1).isMinValue()) { - Pred = ICmpInst::ICMP_NE; - RHS = getConstant(RA - 1); - break; - } - if (RA.isMaxValue()) { - Pred = ICmpInst::ICMP_EQ; - break; - } - if (RA.isMinValue()) return true; - break; - case ICmpInst::ICMP_ULE: - if ((RA + 1).isMaxValue()) { - Pred = ICmpInst::ICMP_NE; - RHS = getConstant(RA + 1); - break; - } - if (RA.isMinValue()) { - Pred = ICmpInst::ICMP_EQ; - break; - } - if (RA.isMaxValue()) return true; - break; - case ICmpInst::ICMP_SGE: - if ((RA - 1).isMinSignedValue()) { - Pred = ICmpInst::ICMP_NE; - RHS = getConstant(RA - 1); - break; - } - if (RA.isMaxSignedValue()) { - Pred = ICmpInst::ICMP_EQ; - break; - } - if (RA.isMinSignedValue()) return true; - break; - case ICmpInst::ICMP_SLE: - if ((RA + 1).isMaxSignedValue()) { - Pred = ICmpInst::ICMP_NE; - RHS = getConstant(RA + 1); - break; - } - if (RA.isMinSignedValue()) { - Pred = ICmpInst::ICMP_EQ; - break; - } - if (RA.isMaxSignedValue()) return true; - break; - case ICmpInst::ICMP_UGT: - if (RA.isMinValue()) { - Pred = ICmpInst::ICMP_NE; - break; - } - if ((RA + 1).isMaxValue()) { - Pred = ICmpInst::ICMP_EQ; - RHS = getConstant(RA + 1); - break; - } - if (RA.isMaxValue()) return false; - break; - case ICmpInst::ICMP_ULT: - if (RA.isMaxValue()) { - Pred = ICmpInst::ICMP_NE; - break; - } - if ((RA - 1).isMinValue()) { - Pred = ICmpInst::ICMP_EQ; - RHS = getConstant(RA - 1); - break; - } - if (RA.isMinValue()) return false; - break; - case ICmpInst::ICMP_SGT: - if (RA.isMinSignedValue()) { - Pred = ICmpInst::ICMP_NE; - break; - } - if ((RA + 1).isMaxSignedValue()) { - Pred = ICmpInst::ICMP_EQ; - RHS = getConstant(RA + 1); - break; - } - if (RA.isMaxSignedValue()) return false; - break; - case ICmpInst::ICMP_SLT: - if (RA.isMaxSignedValue()) { - Pred = ICmpInst::ICMP_NE; - break; - } - if ((RA - 1).isMinSignedValue()) { - Pred = ICmpInst::ICMP_EQ; - RHS = getConstant(RA - 1); - break; - } - if (RA.isMinSignedValue()) return false; - break; - } - } + if (SimplifyICmpOperands(Pred, LHS, RHS)) + if (LHS == RHS) + return CmpInst::isTrueWhenEqual(Pred); + if (SimplifyICmpOperands(FoundPred, FoundLHS, FoundRHS)) + if (FoundLHS == FoundRHS) + return CmpInst::isFalseWhenEqual(Pred); // Check to see if we can make the LHS or RHS match. if (LHS == FoundRHS || RHS == FoundLHS) { @@ -5028,26 +5334,26 @@ ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred, break; case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: - if (isKnownPredicate(ICmpInst::ICMP_SLE, LHS, FoundLHS) && - isKnownPredicate(ICmpInst::ICMP_SGE, RHS, FoundRHS)) + if (isKnownPredicateWithRanges(ICmpInst::ICMP_SLE, LHS, FoundLHS) && + isKnownPredicateWithRanges(ICmpInst::ICMP_SGE, RHS, FoundRHS)) return true; break; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: - if (isKnownPredicate(ICmpInst::ICMP_SGE, LHS, FoundLHS) && - isKnownPredicate(ICmpInst::ICMP_SLE, RHS, FoundRHS)) + if (isKnownPredicateWithRanges(ICmpInst::ICMP_SGE, LHS, FoundLHS) && + isKnownPredicateWithRanges(ICmpInst::ICMP_SLE, RHS, FoundRHS)) return true; break; case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: - if (isKnownPredicate(ICmpInst::ICMP_ULE, LHS, FoundLHS) && - isKnownPredicate(ICmpInst::ICMP_UGE, RHS, FoundRHS)) + if (isKnownPredicateWithRanges(ICmpInst::ICMP_ULE, LHS, FoundLHS) && + isKnownPredicateWithRanges(ICmpInst::ICMP_UGE, RHS, FoundRHS)) return true; break; case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_UGE: - if (isKnownPredicate(ICmpInst::ICMP_UGE, LHS, FoundLHS) && - isKnownPredicate(ICmpInst::ICMP_ULE, RHS, FoundRHS)) + if (isKnownPredicateWithRanges(ICmpInst::ICMP_UGE, LHS, FoundLHS) && + isKnownPredicateWithRanges(ICmpInst::ICMP_ULE, RHS, FoundRHS)) return true; break; } @@ -5066,7 +5372,7 @@ const SCEV *ScalarEvolution::getBECount(const SCEV *Start, "This code doesn't handle negative strides yet!"); const Type *Ty = Start->getType(); - const SCEV *NegOne = getIntegerSCEV(-1, Ty); + const SCEV *NegOne = getConstant(Ty, (uint64_t)-1); const SCEV *Diff = getMinusSCEV(End, Start); const SCEV *RoundUp = getAddExpr(Step, NegOne); @@ -5122,7 +5428,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, // behavior, so if wrap does occur, the loop could either terminate or // loop infinitely, but in either case, the loop is guaranteed to // iterate at least until the iteration where the wrapping occurs. - const SCEV *One = getIntegerSCEV(1, Step->getType()); + const SCEV *One = getConstant(Step->getType(), 1); if (isSigned) { APInt Max = APInt::getSignedMaxValue(BitWidth); if ((Max - getSignedRange(getMinusSCEV(Step, One)).getSignedMax()) @@ -5156,10 +5462,10 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, // only know that it will execute (max(m,n)-n)/s times. In both cases, // the division must round up. const SCEV *End = RHS; - if (!isLoopGuardedByCond(L, - isSigned ? ICmpInst::ICMP_SLT : - ICmpInst::ICMP_ULT, - getMinusSCEV(Start, Step), RHS)) + if (!isLoopEntryGuardedByCond(L, + isSigned ? ICmpInst::ICMP_SLT : + ICmpInst::ICMP_ULT, + getMinusSCEV(Start, Step), RHS)) End = isSigned ? getSMaxExpr(RHS, Start) : getUMaxExpr(RHS, Start); @@ -5173,7 +5479,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, // This allows the subsequent ceiling division of (N+(step-1))/step to // compute the correct value. const SCEV *StepMinusOne = getMinusSCEV(Step, - getIntegerSCEV(1, Step->getType())); + getConstant(Step->getType(), 1)); MaxEnd = isSigned ? getSMinExpr(MaxEnd, getMinusSCEV(getConstant(APInt::getSignedMaxValue(BitWidth)), @@ -5210,7 +5516,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart())) if (!SC->getValue()->isZero()) { SmallVector<const SCEV *, 4> Operands(op_begin(), op_end()); - Operands[0] = SE.getIntegerSCEV(0, SC->getType()); + Operands[0] = SE.getConstant(SC->getType(), 0); const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop()); if (const SCEVAddRecExpr *ShiftedAddRec = dyn_cast<SCEVAddRecExpr>(Shifted)) @@ -5234,7 +5540,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, // iteration exits. unsigned BitWidth = SE.getTypeSizeInBits(getType()); if (!Range.contains(APInt(BitWidth, 0))) - return SE.getIntegerSCEV(0, getType()); + return SE.getConstant(getType(), 0); if (isAffine()) { // If this is an affine expression then we have this situation: @@ -5459,7 +5765,7 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const { WriteAsOperand(OS, F, /*PrintType=*/false); OS << "\n"; for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) - if (isSCEVable(I->getType())) { + if (isSCEVable(I->getType()) && !isa<CmpInst>(*I)) { OS << *I << '\n'; OS << " --> "; const SCEV *SV = SE.getSCEV(&*I); diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 2e18cea..0012b84 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -192,7 +192,7 @@ static bool FactorOutConstant(const SCEV *&S, // x/x == 1. if (S == Factor) { - S = SE.getIntegerSCEV(1, S->getType()); + S = SE.getConstant(S->getType(), 1); return true; } @@ -244,7 +244,7 @@ static bool FactorOutConstant(const SCEV *&S, // Mul's operands. If so, we can just remove it. for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { const SCEV *SOp = M->getOperand(i); - const SCEV *Remainder = SE.getIntegerSCEV(0, SOp->getType()); + const SCEV *Remainder = SE.getConstant(SOp->getType(), 0); if (FactorOutConstant(SOp, Remainder, Factor, SE, TD) && Remainder->isZero()) { SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end()); @@ -259,7 +259,7 @@ static bool FactorOutConstant(const SCEV *&S, // In an AddRec, check if both start and step are divisible. if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) { const SCEV *Step = A->getStepRecurrence(SE); - const SCEV *StepRem = SE.getIntegerSCEV(0, Step->getType()); + const SCEV *StepRem = SE.getConstant(Step->getType(), 0); if (!FactorOutConstant(Step, StepRem, Factor, SE, TD)) return false; if (!StepRem->isZero()) @@ -289,7 +289,7 @@ static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops, SmallVector<const SCEV *, 8> AddRecs(Ops.end() - NumAddRecs, Ops.end()); // Let ScalarEvolution sort and simplify the non-addrecs list. const SCEV *Sum = NoAddRecs.empty() ? - SE.getIntegerSCEV(0, Ty) : + SE.getConstant(Ty, 0) : SE.getAddExpr(NoAddRecs); // If it returned an add, use the operands. Otherwise it simplified // the sum into a single value, so just use that. @@ -316,7 +316,7 @@ static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops, while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Ops[i])) { const SCEV *Start = A->getStart(); if (Start->isZero()) break; - const SCEV *Zero = SE.getIntegerSCEV(0, Ty); + const SCEV *Zero = SE.getConstant(Ty, 0); AddRecs.push_back(SE.getAddRecExpr(Zero, A->getStepRecurrence(SE), A->getLoop())); @@ -392,7 +392,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, SmallVector<const SCEV *, 8> NewOps; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { const SCEV *Op = Ops[i]; - const SCEV *Remainder = SE.getIntegerSCEV(0, Ty); + const SCEV *Remainder = SE.getConstant(Ty, 0); if (FactorOutConstant(Op, Remainder, ElSize, SE, SE.TD)) { // Op now has ElSize factored out. ScaledOps.push_back(Op); @@ -642,6 +642,8 @@ static const Loop *GetRelevantLoop(const SCEV *S, LoopInfo &LI, llvm_unreachable("Unexpected SCEV type!"); } +namespace { + /// LoopCompare - Compare loops by PickMostRelevantLoop. class LoopCompare { DominatorTree &DT; @@ -668,6 +670,8 @@ public: } }; +} + Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { const Type *Ty = SE.getEffectiveSCEVType(S->getType()); @@ -705,9 +709,11 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum); } else if (const PointerType *PTy = dyn_cast<PointerType>(Op->getType())) { // The running sum is an integer, and there's a pointer at this level. - // Try to form a getelementptr. + // Try to form a getelementptr. If the running sum is instructions, + // use a SCEVUnknown to avoid re-analyzing them. SmallVector<const SCEV *, 4> NewOps; - NewOps.push_back(SE.getUnknown(Sum)); + NewOps.push_back(isa<Instruction>(Sum) ? SE.getUnknown(Sum) : + SE.getSCEV(Sum)); for (++I; I != E && I->first == CurLoop; ++I) NewOps.push_back(I->second); Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op)); @@ -797,7 +803,7 @@ static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest, while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Base)) { Base = A->getStart(); Rest = SE.getAddExpr(Rest, - SE.getAddRecExpr(SE.getIntegerSCEV(0, A->getType()), + SE.getAddRecExpr(SE.getConstant(A->getType(), 0), A->getStepRecurrence(SE), A->getLoop())); } @@ -966,9 +972,12 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { // Determine a normalized form of this expression, which is the expression // before any post-inc adjustment is made. const SCEVAddRecExpr *Normalized = S; - if (L == PostIncLoop) { - const SCEV *Step = S->getStepRecurrence(SE); - Normalized = cast<SCEVAddRecExpr>(SE.getMinusSCEV(S, Step)); + if (PostIncLoops.count(L)) { + PostIncLoopSet Loops; + Loops.insert(L); + Normalized = + cast<SCEVAddRecExpr>(TransformForPostIncUse(Normalize, S, 0, 0, + Loops, SE, *SE.DT)); } // Strip off any non-loop-dominating component from the addrec start. @@ -976,7 +985,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { const SCEV *PostLoopOffset = 0; if (!Start->properlyDominates(L->getHeader(), SE.DT)) { PostLoopOffset = Start; - Start = SE.getIntegerSCEV(0, Normalized->getType()); + Start = SE.getConstant(Normalized->getType(), 0); Normalized = cast<SCEVAddRecExpr>(SE.getAddRecExpr(Start, Normalized->getStepRecurrence(SE), @@ -986,10 +995,9 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { // Strip off any non-loop-dominating component from the addrec step. const SCEV *Step = Normalized->getStepRecurrence(SE); const SCEV *PostLoopScale = 0; - if (!Step->hasComputableLoopEvolution(L) && - !Step->dominates(L->getHeader(), SE.DT)) { + if (!Step->dominates(L->getHeader(), SE.DT)) { PostLoopScale = Step; - Step = SE.getIntegerSCEV(1, Normalized->getType()); + Step = SE.getConstant(Normalized->getType(), 1); Normalized = cast<SCEVAddRecExpr>(SE.getAddRecExpr(Start, Step, Normalized->getLoop())); @@ -1002,7 +1010,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { // Accommodate post-inc mode, if necessary. Value *Result; - if (L != PostIncLoop) + if (!PostIncLoops.count(L)) Result = PN; else { // In PostInc mode, use the post-incremented value. @@ -1072,7 +1080,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { // {X,+,F} --> X + {0,+,F} if (!S->getStart()->isZero()) { SmallVector<const SCEV *, 4> NewOps(S->op_begin(), S->op_end()); - NewOps[0] = SE.getIntegerSCEV(0, Ty); + NewOps[0] = SE.getConstant(Ty, 0); const SCEV *Rest = SE.getAddRecExpr(NewOps, L); // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the @@ -1100,7 +1108,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { // {0,+,1} --> Insert a canonical induction variable into the loop! if (S->isAffine() && - S->getOperand(1) == SE.getIntegerSCEV(1, Ty)) { + S->getOperand(1) == SE.getConstant(Ty, 1)) { // If there's a canonical IV, just use it. if (CanonicalIV) { assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) && @@ -1274,7 +1282,7 @@ Value *SCEVExpander::expand(const SCEV *S) { // If the SCEV is computable at this level, insert it into the header // after the PHIs (and after any other instructions that we've inserted // there) so that it is guaranteed to dominate any user inside the loop. - if (L && S->hasComputableLoopEvolution(L) && L != PostIncLoop) + if (L && S->hasComputableLoopEvolution(L) && !PostIncLoops.count(L)) InsertPt = L->getHeader()->getFirstNonPHI(); while (isInsertedInstruction(InsertPt) || isa<DbgInfoIntrinsic>(InsertPt)) InsertPt = llvm::next(BasicBlock::iterator(InsertPt)); @@ -1296,7 +1304,7 @@ Value *SCEVExpander::expand(const SCEV *S) { Value *V = visit(S); // Remember the expanded value for this SCEV at this location. - if (!PostIncLoop) + if (PostIncLoops.empty()) InsertedExpressions[std::make_pair(S, InsertPt)] = V; restoreInsertPoint(SaveInsertBB, SaveInsertPt); @@ -1304,7 +1312,7 @@ Value *SCEVExpander::expand(const SCEV *S) { } void SCEVExpander::rememberInstruction(Value *I) { - if (!PostIncLoop) + if (PostIncLoops.empty()) InsertedValues.insert(I); // If we just claimed an existing instruction and that instruction had @@ -1334,8 +1342,8 @@ Value * SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, const Type *Ty) { assert(Ty->isIntegerTy() && "Can only insert integer induction variables!"); - const SCEV *H = SE.getAddRecExpr(SE.getIntegerSCEV(0, Ty), - SE.getIntegerSCEV(1, Ty), L); + const SCEV *H = SE.getAddRecExpr(SE.getConstant(Ty, 0), + SE.getConstant(Ty, 1), L); BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); Value *V = expandCodeFor(H, 0, L->getHeader()->begin()); diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp new file mode 100644 index 0000000..75c381d --- /dev/null +++ b/lib/Analysis/ScalarEvolutionNormalization.cpp @@ -0,0 +1,150 @@ +//===- ScalarEvolutionNormalization.cpp - See below -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements utilities for working with "normalized" expressions. +// See the comments at the top of ScalarEvolutionNormalization.h for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/ScalarEvolutionNormalization.h" +using namespace llvm; + +/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression +/// and now we need to decide whether the user should use the preinc or post-inc +/// value. If this user should use the post-inc version of the IV, return true. +/// +/// Choosing wrong here can break dominance properties (if we choose to use the +/// post-inc value when we cannot) or it can end up adding extra live-ranges to +/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we +/// should use the post-inc value). +static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV, + const Loop *L, DominatorTree *DT) { + // If the user is in the loop, use the preinc value. + if (L->contains(User)) return false; + + BasicBlock *LatchBlock = L->getLoopLatch(); + if (!LatchBlock) + return false; + + // Ok, the user is outside of the loop. If it is dominated by the latch + // block, use the post-inc value. + if (DT->dominates(LatchBlock, User->getParent())) + return true; + + // There is one case we have to be careful of: PHI nodes. These little guys + // can live in blocks that are not dominated by the latch block, but (since + // their uses occur in the predecessor block, not the block the PHI lives in) + // should still use the post-inc value. Check for this case now. + PHINode *PN = dyn_cast<PHINode>(User); + if (!PN) return false; // not a phi, not dominated by latch block. + + // Look at all of the uses of IV by the PHI node. If any use corresponds to + // a block that is not dominated by the latch block, give up and use the + // preincremented value. + unsigned NumUses = 0; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == IV) { + ++NumUses; + if (!DT->dominates(LatchBlock, PN->getIncomingBlock(i))) + return false; + } + + // Okay, all uses of IV by PN are in predecessor blocks that really are + // dominated by the latch block. Use the post-incremented value. + return true; +} + +const SCEV *llvm::TransformForPostIncUse(TransformKind Kind, + const SCEV *S, + Instruction *User, + Value *OperandValToReplace, + PostIncLoopSet &Loops, + ScalarEvolution &SE, + DominatorTree &DT) { + if (isa<SCEVConstant>(S) || isa<SCEVUnknown>(S)) + return S; + if (const SCEVCastExpr *X = dyn_cast<SCEVCastExpr>(S)) { + const SCEV *O = X->getOperand(); + const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace, + Loops, SE, DT); + if (O != N) + switch (S->getSCEVType()) { + case scZeroExtend: return SE.getZeroExtendExpr(N, S->getType()); + case scSignExtend: return SE.getSignExtendExpr(N, S->getType()); + case scTruncate: return SE.getTruncateExpr(N, S->getType()); + default: llvm_unreachable("Unexpected SCEVCastExpr kind!"); + } + return S; + } + if (const SCEVNAryExpr *X = dyn_cast<SCEVNAryExpr>(S)) { + SmallVector<const SCEV *, 8> Operands; + bool Changed = false; + for (SCEVNAryExpr::op_iterator I = X->op_begin(), E = X->op_end(); + I != E; ++I) { + const SCEV *O = *I; + const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace, + Loops, SE, DT); + Changed |= N != O; + Operands.push_back(N); + } + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + // An addrec. This is the interesting part. + const Loop *L = AR->getLoop(); + const SCEV *Result = SE.getAddRecExpr(Operands, L); + switch (Kind) { + default: llvm_unreachable("Unexpected transform name!"); + case NormalizeAutodetect: + if (Instruction *OI = dyn_cast<Instruction>(OperandValToReplace)) + if (IVUseShouldUsePostIncValue(User, OI, L, &DT)) { + Result = SE.getMinusSCEV(Result, AR->getStepRecurrence(SE)); + Loops.insert(L); + } + break; + case Normalize: + if (Loops.count(L)) + Result = SE.getMinusSCEV(Result, AR->getStepRecurrence(SE)); + break; + case Denormalize: + if (Loops.count(L)) { + const SCEV *TransformedStep = + TransformForPostIncUse(Kind, AR->getStepRecurrence(SE), + User, OperandValToReplace, Loops, SE, DT); + Result = SE.getAddExpr(Result, TransformedStep); + } + break; + } + return Result; + } + if (Changed) + switch (S->getSCEVType()) { + case scAddExpr: return SE.getAddExpr(Operands); + case scMulExpr: return SE.getMulExpr(Operands); + case scSMaxExpr: return SE.getSMaxExpr(Operands); + case scUMaxExpr: return SE.getUMaxExpr(Operands); + default: llvm_unreachable("Unexpected SCEVNAryExpr kind!"); + } + return S; + } + if (const SCEVUDivExpr *X = dyn_cast<SCEVUDivExpr>(S)) { + const SCEV *LO = X->getLHS(); + const SCEV *RO = X->getRHS(); + const SCEV *LN = TransformForPostIncUse(Kind, LO, User, OperandValToReplace, + Loops, SE, DT); + const SCEV *RN = TransformForPostIncUse(Kind, RO, User, OperandValToReplace, + Loops, SE, DT); + if (LO != LN || RO != RN) + return SE.getUDivExpr(LN, RN); + return S; + } + llvm_unreachable("Unexpected SCEV kind!"); + return 0; +} diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 5ae72f7..7e8ec2e 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -1342,22 +1342,23 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin, /// GetConstantStringInfo - This function computes the length of a /// null-terminated C string pointed to by V. If successful, it returns true /// and returns the string in Str. If unsuccessful, it returns false. -bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset, +bool llvm::GetConstantStringInfo(const Value *V, std::string &Str, + uint64_t Offset, bool StopAtNul) { // If V is NULL then return false; if (V == NULL) return false; // Look through bitcast instructions. - if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) + if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V)) return GetConstantStringInfo(BCI->getOperand(0), Str, Offset, StopAtNul); // If the value is not a GEP instruction nor a constant expression with a // GEP instruction, then return false because ConstantArray can't occur // any other way - User *GEP = 0; - if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) { + const User *GEP = 0; + if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) { GEP = GEPI; - } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { + } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { if (CE->getOpcode() == Instruction::BitCast) return GetConstantStringInfo(CE->getOperand(0), Str, Offset, StopAtNul); if (CE->getOpcode() != Instruction::GetElementPtr) @@ -1378,7 +1379,7 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset, // Check to make sure that the first operand of the GEP is an integer and // has value 0 so that we are sure we're indexing into the initializer. - ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1)); + const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1)); if (FirstIdx == 0 || !FirstIdx->isZero()) return false; @@ -1386,7 +1387,7 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset, // into the array. If this occurs, we can't say anything meaningful about // the string. uint64_t StartIdx = 0; - if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2))) + if (const ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2))) StartIdx = CI->getZExtValue(); else return false; @@ -1397,10 +1398,10 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset, // The GEP instruction, constant or instruction, must reference a global // variable that is a constant and is initialized. The referenced constant // initializer is the array that we'll use for optimization. - GlobalVariable* GV = dyn_cast<GlobalVariable>(V); + const GlobalVariable* GV = dyn_cast<GlobalVariable>(V); if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) return false; - Constant *GlobalInit = GV->getInitializer(); + const Constant *GlobalInit = GV->getInitializer(); // Handle the ConstantAggregateZero case if (isa<ConstantAggregateZero>(GlobalInit)) { @@ -1411,7 +1412,7 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset, } // Must be a Constant Array - ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit); + const ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit); if (Array == 0 || !Array->getType()->getElementType()->isIntegerTy(8)) return false; @@ -1425,8 +1426,8 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset, // to in the array. Str.reserve(NumElts-Offset); for (unsigned i = Offset; i != NumElts; ++i) { - Constant *Elt = Array->getOperand(i); - ConstantInt *CI = dyn_cast<ConstantInt>(Elt); + const Constant *Elt = Array->getOperand(i); + const ConstantInt *CI = dyn_cast<ConstantInt>(Elt); if (!CI) // This array isn't suitable, non-int initializer. return false; if (StopAtNul && CI->isZero()) diff --git a/lib/Archive/Archive.cpp b/lib/Archive/Archive.cpp index f4f8a43..54c715c 100644 --- a/lib/Archive/Archive.cpp +++ b/lib/Archive/Archive.cpp @@ -233,15 +233,14 @@ bool llvm::GetBitcodeSymbols(const sys::Path& fName, } Module* -llvm::GetBitcodeSymbols(const unsigned char *BufPtr, unsigned Length, +llvm::GetBitcodeSymbols(const char *BufPtr, unsigned Length, const std::string& ModuleID, LLVMContext& Context, std::vector<std::string>& symbols, std::string* ErrMsg) { // Get the module. std::auto_ptr<MemoryBuffer> Buffer( - MemoryBuffer::getNewMemBuffer(Length, ModuleID.c_str())); - memcpy((char*)Buffer->getBufferStart(), BufPtr, Length); + MemoryBuffer::getMemBufferCopy(StringRef(BufPtr, Length),ModuleID.c_str())); Module *M = ParseBitcodeFile(Buffer.get(), Context, ErrMsg); if (!M) diff --git a/lib/Archive/ArchiveInternals.h b/lib/Archive/ArchiveInternals.h index baea544..08f20e7 100644 --- a/lib/Archive/ArchiveInternals.h +++ b/lib/Archive/ArchiveInternals.h @@ -77,7 +77,7 @@ namespace llvm { std::vector<std::string>& symbols, std::string* ErrMsg); - Module* GetBitcodeSymbols(const unsigned char*Buffer,unsigned Length, + Module* GetBitcodeSymbols(const char *Buffer, unsigned Length, const std::string& ModuleID, LLVMContext& Context, std::vector<std::string>& symbols, diff --git a/lib/Archive/ArchiveReader.cpp b/lib/Archive/ArchiveReader.cpp index 3ef15d2..eef6fe0 100644 --- a/lib/Archive/ArchiveReader.cpp +++ b/lib/Archive/ArchiveReader.cpp @@ -121,7 +121,7 @@ Archive::parseMemberHeader(const char*& At, const char* End, std::string* error) if (isdigit(Hdr->name[3])) { unsigned len = atoi(&Hdr->name[3]); const char *nulp = (const char *)memchr(At, '\0', len); - pathname.assign(At, nulp != 0 ? nulp - At : len); + pathname.assign(At, nulp != 0 ? (uintptr_t)(nulp - At) : len); At += len; MemberSize -= len; flags |= ArchiveMember::HasLongFilenameFlag; @@ -348,8 +348,8 @@ Archive::getAllModules(std::vector<Module*>& Modules, std::string FullMemberName = archPath.str() + "(" + I->getPath().str() + ")"; MemoryBuffer *Buffer = - MemoryBuffer::getNewMemBuffer(I->getSize(), FullMemberName.c_str()); - memcpy((char*)Buffer->getBufferStart(), I->getData(), I->getSize()); + MemoryBuffer::getMemBufferCopy(StringRef(I->getData(), I->getSize()), + FullMemberName.c_str()); Module *M = ParseBitcodeFile(Buffer, Context, ErrMessage); delete Buffer; @@ -487,9 +487,9 @@ Archive::findModuleDefiningSymbol(const std::string& symbol, // Now, load the bitcode module to get the Module. std::string FullMemberName = archPath.str() + "(" + mbr->getPath().str() + ")"; - MemoryBuffer *Buffer =MemoryBuffer::getNewMemBuffer(mbr->getSize(), - FullMemberName.c_str()); - memcpy((char*)Buffer->getBufferStart(), mbr->getData(), mbr->getSize()); + MemoryBuffer *Buffer = + MemoryBuffer::getMemBufferCopy(StringRef(mbr->getData(), mbr->getSize()), + FullMemberName.c_str()); Module *m = getLazyBitcodeModule(Buffer, Context, ErrMsg); if (!m) @@ -538,8 +538,8 @@ Archive::findModulesDefiningSymbols(std::set<std::string>& symbols, std::string FullMemberName = archPath.str() + "(" + mbr->getPath().str() + ")"; Module* M = - GetBitcodeSymbols((const unsigned char*)At, mbr->getSize(), - FullMemberName, Context, symbols, error); + GetBitcodeSymbols(At, mbr->getSize(), FullMemberName, Context, + symbols, error); if (M) { // Insert the module's symbols into the symbol table @@ -616,8 +616,8 @@ bool Archive::isBitcodeArchive() { archPath.str() + "(" + I->getPath().str() + ")"; MemoryBuffer *Buffer = - MemoryBuffer::getNewMemBuffer(I->getSize(), FullMemberName.c_str()); - memcpy((char*)Buffer->getBufferStart(), I->getData(), I->getSize()); + MemoryBuffer::getMemBufferCopy(StringRef(I->getData(), I->getSize()), + FullMemberName.c_str()); Module *M = ParseBitcodeFile(Buffer, Context); delete Buffer; if (!M) diff --git a/lib/Archive/ArchiveWriter.cpp b/lib/Archive/ArchiveWriter.cpp index a02601a..21d4f65 100644 --- a/lib/Archive/ArchiveWriter.cpp +++ b/lib/Archive/ArchiveWriter.cpp @@ -226,8 +226,7 @@ Archive::writeMember( std::string FullMemberName = archPath.str() + "(" + member.getPath().str() + ")"; Module* M = - GetBitcodeSymbols((const unsigned char*)data,fSize, - FullMemberName, Context, symbols, ErrMsg); + GetBitcodeSymbols(data, fSize, FullMemberName, Context, symbols, ErrMsg); // If the bitcode parsed successfully if ( M ) { diff --git a/lib/Archive/CMakeLists.txt b/lib/Archive/CMakeLists.txt index 27698cb..7ff478a 100644 --- a/lib/Archive/CMakeLists.txt +++ b/lib/Archive/CMakeLists.txt @@ -2,4 +2,4 @@ add_llvm_library(LLVMArchive Archive.cpp ArchiveReader.cpp ArchiveWriter.cpp - )
\ No newline at end of file + ) diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index cdad077..3b08ca1 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -322,9 +322,8 @@ bool LLParser::ParseUnnamedType() { return true; } - assert(Lex.getKind() == lltok::kw_type); LocTy TypeLoc = Lex.getLoc(); - Lex.Lex(); // eat kw_type + if (ParseToken(lltok::kw_type, "expected 'type' after '='")) return true; PATypeHolder Ty(Type::getVoidTy(Context)); if (ParseType(Ty)) return true; @@ -1171,10 +1170,10 @@ bool LLParser::ParseOptionalCommaAlign(unsigned &Alignment, return false; } - if (Lex.getKind() == lltok::kw_align) { - if (ParseOptionalAlignment(Alignment)) return true; - } else - return true; + if (Lex.getKind() != lltok::kw_align) + return Error(Lex.getLoc(), "expected metadata or 'align'"); + + if (ParseOptionalAlignment(Alignment)) return true; } return false; @@ -2352,11 +2351,28 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { if (NSW) return Error(ModifierLoc, "nsw only applies to integer operations"); } - // API compatibility: Accept either integer or floating-point types with - // add, sub, and mul. - if (!Val0->getType()->isIntOrIntVectorTy() && - !Val0->getType()->isFPOrFPVectorTy()) - return Error(ID.Loc,"constexpr requires integer, fp, or vector operands"); + // Check that the type is valid for the operator. + switch (Opc) { + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + if (!Val0->getType()->isIntOrIntVectorTy()) + return Error(ID.Loc, "constexpr requires integer operands"); + break; + case Instruction::FAdd: + case Instruction::FSub: + case Instruction::FMul: + case Instruction::FDiv: + case Instruction::FRem: + if (!Val0->getType()->isFPOrFPVectorTy()) + return Error(ID.Loc, "constexpr requires fp operands"); + break; + default: llvm_unreachable("Unknown binary operator!"); + } unsigned Flags = 0; if (NUW) Flags |= OverflowingBinaryOperator::NoUnsignedWrap; if (NSW) Flags |= OverflowingBinaryOperator::NoSignedWrap; @@ -2788,6 +2804,10 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { ForwardRefVals.find(FunctionName); if (FRVI != ForwardRefVals.end()) { Fn = M->getFunction(FunctionName); + if (Fn->getType() != PFT) + return Error(FRVI->second.second, "invalid forward reference to " + "function '" + FunctionName + "' with wrong type!"); + ForwardRefVals.erase(FRVI); } else if ((Fn = M->getFunction(FunctionName))) { // If this function already exists in the symbol table, then it is @@ -2933,6 +2953,8 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) { default: assert(0 && "Unknown ParseInstruction result!"); case InstError: return true; case InstNormal: + BB->getInstList().push_back(Inst); + // With a normal result, we check to see if the instruction is followed by // a comma and metadata. if (EatIfPresent(lltok::comma)) @@ -2940,6 +2962,8 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) { return true; break; case InstExtraComma: + BB->getInstList().push_back(Inst); + // If the instruction parser ate an extra comma at the end of it, it // *must* be followed by metadata. if (ParseInstructionMetadata(Inst)) @@ -2947,8 +2971,6 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) { break; } - BB->getInstList().push_back(Inst); - // Set the name on the instruction. if (PFS.SetInstName(NameID, NameStr, NameLoc, Inst)) return true; } while (!isa<TerminatorInst>(Inst)); @@ -2995,8 +3017,7 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB, if (EatIfPresent(lltok::kw_nuw)) NUW = true; } - // API compatibility: Accept either integer or floating-point types. - bool Result = ParseArithmetic(Inst, PFS, KeywordVal, 0); + bool Result = ParseArithmetic(Inst, PFS, KeywordVal, 1); if (!Result) { if (!Inst->getType()->isIntOrIntVectorTy()) { if (NUW) diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index ae460bb..c8f669f 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -74,7 +74,7 @@ namespace llvm { public: typedef LLLexer::LocTy LocTy; private: - LLVMContext& Context; + LLVMContext &Context; LLLexer Lex; Module *M; diff --git a/lib/AsmParser/Parser.cpp b/lib/AsmParser/Parser.cpp index 7280cf4..e511cbe 100644 --- a/lib/AsmParser/Parser.cpp +++ b/lib/AsmParser/Parser.cpp @@ -44,9 +44,9 @@ Module *llvm::ParseAssemblyFile(const std::string &Filename, SMDiagnostic &Err, std::string ErrorStr; MemoryBuffer *F = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), &ErrorStr); if (F == 0) { - Err = SMDiagnostic(SMLoc(), "", -1, -1, + Err = SMDiagnostic(Filename, "Could not open input file '" + Filename + "': " + - ErrorStr, ""); + ErrorStr); return 0; } diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 8840622f..4008a6a 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -31,12 +31,12 @@ using namespace llvm; // If DebugDiv > 0 then only break antidep with (ID % DebugDiv) == DebugMod static cl::opt<int> DebugDiv("agg-antidep-debugdiv", - cl::desc("Debug control for aggressive anti-dep breaker"), - cl::init(0), cl::Hidden); + cl::desc("Debug control for aggressive anti-dep breaker"), + cl::init(0), cl::Hidden); static cl::opt<int> DebugMod("agg-antidep-debugmod", - cl::desc("Debug control for aggressive anti-dep breaker"), - cl::init(0), cl::Hidden); + cl::desc("Debug control for aggressive anti-dep breaker"), + cl::init(0), cl::Hidden); AggressiveAntiDepState::AggressiveAntiDepState(const unsigned TargetRegs, MachineBasicBlock *BB) : @@ -210,7 +210,7 @@ void AggressiveAntiDepBreaker::FinishBlock() { } void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, - unsigned InsertPosIndex) { + unsigned InsertPosIndex) { assert(Count < InsertPosIndex && "Instruction index out of expected range!"); std::set<unsigned> PassthruRegs; @@ -244,7 +244,7 @@ void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, } bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr *MI, - MachineOperand& MO) + MachineOperand& MO) { if (!MO.isReg() || !MO.isImplicit()) return false; @@ -281,9 +281,9 @@ void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI, /// AntiDepEdges - Return in Edges the anti- and output- dependencies /// in SU that we want to consider for breaking. -static void AntiDepEdges(SUnit *SU, std::vector<SDep*>& Edges) { +static void AntiDepEdges(const SUnit *SU, std::vector<const SDep*>& Edges) { SmallSet<unsigned, 4> RegSet; - for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); + for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); P != PE; ++P) { if ((P->getKind() == SDep::Anti) || (P->getKind() == SDep::Output)) { unsigned Reg = P->getReg(); @@ -297,14 +297,14 @@ static void AntiDepEdges(SUnit *SU, std::vector<SDep*>& Edges) { /// CriticalPathStep - Return the next SUnit after SU on the bottom-up /// critical path. -static SUnit *CriticalPathStep(SUnit *SU) { - SDep *Next = 0; +static const SUnit *CriticalPathStep(const SUnit *SU) { + const SDep *Next = 0; unsigned NextDepth = 0; // Find the predecessor edge with the greatest depth. if (SU != 0) { - for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); + for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); P != PE; ++P) { - SUnit *PredSU = P->getSUnit(); + const SUnit *PredSU = P->getSUnit(); unsigned PredLatency = P->getLatency(); unsigned PredTotalLatency = PredSU->getDepth() + PredLatency; // In the case of a latency tie, prefer an anti-dependency edge over @@ -359,8 +359,7 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, unsigned Count, - std::set<unsigned>& PassthruRegs) -{ + std::set<unsigned>& PassthruRegs) { unsigned *DefIndices = State->GetDefIndices(); std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& RegRefs = State->GetRegRefs(); @@ -439,7 +438,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, } void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, - unsigned Count) { + unsigned Count) { DEBUG(dbgs() << "\tUse Groups:"); std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& RegRefs = State->GetRegRefs(); @@ -704,9 +703,9 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( /// ScheduleDAG and break them by renaming registers. /// unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( - std::vector<SUnit>& SUnits, - MachineBasicBlock::iterator& Begin, - MachineBasicBlock::iterator& End, + const std::vector<SUnit>& SUnits, + MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, unsigned InsertPosIndex) { unsigned *KillIndices = State->GetKillIndices(); unsigned *DefIndices = State->GetDefIndices(); @@ -721,20 +720,21 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( RenameOrderType RenameOrder; // ...need a map from MI to SUnit. - std::map<MachineInstr *, SUnit *> MISUnitMap; + std::map<MachineInstr *, const SUnit *> MISUnitMap; for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { - SUnit *SU = &SUnits[i]; - MISUnitMap.insert(std::pair<MachineInstr *, SUnit *>(SU->getInstr(), SU)); + const SUnit *SU = &SUnits[i]; + MISUnitMap.insert(std::pair<MachineInstr *, const SUnit *>(SU->getInstr(), + SU)); } // Track progress along the critical path through the SUnit graph as // we walk the instructions. This is needed for regclasses that only // break critical-path anti-dependencies. - SUnit *CriticalPathSU = 0; + const SUnit *CriticalPathSU = 0; MachineInstr *CriticalPathMI = 0; if (CriticalPathSet.any()) { for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { - SUnit *SU = &SUnits[i]; + const SUnit *SU = &SUnits[i]; if (!CriticalPathSU || ((SU->getDepth() + SU->Latency) > (CriticalPathSU->getDepth() + CriticalPathSU->Latency))) { @@ -775,8 +775,8 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( // The dependence edges that represent anti- and output- // dependencies that are candidates for breaking. - std::vector<SDep*> Edges; - SUnit *PathSU = MISUnitMap[MI]; + std::vector<const SDep *> Edges; + const SUnit *PathSU = MISUnitMap[MI]; AntiDepEdges(PathSU, Edges); // If MI is not on the critical path, then we don't rename @@ -794,7 +794,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( if (!MI->isKill()) { // Attempt to break each anti-dependency... for (unsigned i = 0, e = Edges.size(); i != e; ++i) { - SDep *Edge = Edges[i]; + const SDep *Edge = Edges[i]; SUnit *NextSU = Edge->getSUnit(); if ((Edge->getKind() != SDep::Anti) && @@ -838,7 +838,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( // Also, if there are dependencies on other SUnits with the // same register as the anti-dependency, don't attempt to // break it. - for (SUnit::pred_iterator P = PathSU->Preds.begin(), + for (SUnit::const_pred_iterator P = PathSU->Preds.begin(), PE = PathSU->Preds.end(); P != PE; ++P) { if (P->getSUnit() == NextSU ? (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) : @@ -847,7 +847,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( break; } } - for (SUnit::pred_iterator P = PathSU->Preds.begin(), + for (SUnit::const_pred_iterator P = PathSU->Preds.begin(), PE = PathSU->Preds.end(); P != PE; ++P) { if ((P->getSUnit() == NextSU) && (P->getKind() != SDep::Anti) && (P->getKind() != SDep::Output)) { diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h index a62d68c..506d43e 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.h +++ b/lib/CodeGen/AggressiveAntiDepBreaker.h @@ -142,9 +142,9 @@ namespace llvm { /// path /// of the ScheduleDAG and break them by renaming registers. /// - unsigned BreakAntiDependencies(std::vector<SUnit>& SUnits, - MachineBasicBlock::iterator& Begin, - MachineBasicBlock::iterator& End, + unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits, + MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, unsigned InsertPosIndex); /// Observe - Update liveness information to account for the current diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp new file mode 100644 index 0000000..f71eee5 --- /dev/null +++ b/lib/CodeGen/Analysis.cpp @@ -0,0 +1,285 @@ +//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities --*- C++ ------*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines several CodeGen-specific LLVM IR analysis utilties. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Analysis.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +using namespace llvm; + +/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence +/// of insertvalue or extractvalue indices that identify a member, return +/// the linearized index of the start of the member. +/// +unsigned llvm::ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty, + const unsigned *Indices, + const unsigned *IndicesEnd, + unsigned CurIndex) { + // Base case: We're done. + if (Indices && Indices == IndicesEnd) + return CurIndex; + + // Given a struct type, recursively traverse the elements. + if (const StructType *STy = dyn_cast<StructType>(Ty)) { + for (StructType::element_iterator EB = STy->element_begin(), + EI = EB, + EE = STy->element_end(); + EI != EE; ++EI) { + if (Indices && *Indices == unsigned(EI - EB)) + return ComputeLinearIndex(TLI, *EI, Indices+1, IndicesEnd, CurIndex); + CurIndex = ComputeLinearIndex(TLI, *EI, 0, 0, CurIndex); + } + return CurIndex; + } + // Given an array type, recursively traverse the elements. + else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { + const Type *EltTy = ATy->getElementType(); + for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) { + if (Indices && *Indices == i) + return ComputeLinearIndex(TLI, EltTy, Indices+1, IndicesEnd, CurIndex); + CurIndex = ComputeLinearIndex(TLI, EltTy, 0, 0, CurIndex); + } + return CurIndex; + } + // We haven't found the type we're looking for, so keep searching. + return CurIndex + 1; +} + +/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of +/// EVTs that represent all the individual underlying +/// non-aggregate types that comprise it. +/// +/// If Offsets is non-null, it points to a vector to be filled in +/// with the in-memory offsets of each of the individual values. +/// +void llvm::ComputeValueVTs(const TargetLowering &TLI, const Type *Ty, + SmallVectorImpl<EVT> &ValueVTs, + SmallVectorImpl<uint64_t> *Offsets, + uint64_t StartingOffset) { + // Given a struct type, recursively traverse the elements. + if (const StructType *STy = dyn_cast<StructType>(Ty)) { + const StructLayout *SL = TLI.getTargetData()->getStructLayout(STy); + for (StructType::element_iterator EB = STy->element_begin(), + EI = EB, + EE = STy->element_end(); + EI != EE; ++EI) + ComputeValueVTs(TLI, *EI, ValueVTs, Offsets, + StartingOffset + SL->getElementOffset(EI - EB)); + return; + } + // Given an array type, recursively traverse the elements. + if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { + const Type *EltTy = ATy->getElementType(); + uint64_t EltSize = TLI.getTargetData()->getTypeAllocSize(EltTy); + for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) + ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets, + StartingOffset + i * EltSize); + return; + } + // Interpret void as zero return values. + if (Ty->isVoidTy()) + return; + // Base case: we can get an EVT for this LLVM IR type. + ValueVTs.push_back(TLI.getValueType(Ty)); + if (Offsets) + Offsets->push_back(StartingOffset); +} + +/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V. +GlobalVariable *llvm::ExtractTypeInfo(Value *V) { + V = V->stripPointerCasts(); + GlobalVariable *GV = dyn_cast<GlobalVariable>(V); + + if (GV && GV->getName() == ".llvm.eh.catch.all.value") { + assert(GV->hasInitializer() && + "The EH catch-all value must have an initializer"); + Value *Init = GV->getInitializer(); + GV = dyn_cast<GlobalVariable>(Init); + if (!GV) V = cast<ConstantPointerNull>(Init); + } + + assert((GV || isa<ConstantPointerNull>(V)) && + "TypeInfo must be a global variable or NULL"); + return GV; +} + +/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being +/// processed uses a memory 'm' constraint. +bool +llvm::hasInlineAsmMemConstraint(std::vector<InlineAsm::ConstraintInfo> &CInfos, + const TargetLowering &TLI) { + for (unsigned i = 0, e = CInfos.size(); i != e; ++i) { + InlineAsm::ConstraintInfo &CI = CInfos[i]; + for (unsigned j = 0, ee = CI.Codes.size(); j != ee; ++j) { + TargetLowering::ConstraintType CType = TLI.getConstraintType(CI.Codes[j]); + if (CType == TargetLowering::C_Memory) + return true; + } + + // Indirect operand accesses access memory. + if (CI.isIndirect) + return true; + } + + return false; +} + +/// getFCmpCondCode - Return the ISD condition code corresponding to +/// the given LLVM IR floating-point condition code. This includes +/// consideration of global floating-point math flags. +/// +ISD::CondCode llvm::getFCmpCondCode(FCmpInst::Predicate Pred) { + ISD::CondCode FPC, FOC; + switch (Pred) { + case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break; + case FCmpInst::FCMP_OEQ: FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break; + case FCmpInst::FCMP_OGT: FOC = ISD::SETGT; FPC = ISD::SETOGT; break; + case FCmpInst::FCMP_OGE: FOC = ISD::SETGE; FPC = ISD::SETOGE; break; + case FCmpInst::FCMP_OLT: FOC = ISD::SETLT; FPC = ISD::SETOLT; break; + case FCmpInst::FCMP_OLE: FOC = ISD::SETLE; FPC = ISD::SETOLE; break; + case FCmpInst::FCMP_ONE: FOC = ISD::SETNE; FPC = ISD::SETONE; break; + case FCmpInst::FCMP_ORD: FOC = FPC = ISD::SETO; break; + case FCmpInst::FCMP_UNO: FOC = FPC = ISD::SETUO; break; + case FCmpInst::FCMP_UEQ: FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break; + case FCmpInst::FCMP_UGT: FOC = ISD::SETGT; FPC = ISD::SETUGT; break; + case FCmpInst::FCMP_UGE: FOC = ISD::SETGE; FPC = ISD::SETUGE; break; + case FCmpInst::FCMP_ULT: FOC = ISD::SETLT; FPC = ISD::SETULT; break; + case FCmpInst::FCMP_ULE: FOC = ISD::SETLE; FPC = ISD::SETULE; break; + case FCmpInst::FCMP_UNE: FOC = ISD::SETNE; FPC = ISD::SETUNE; break; + case FCmpInst::FCMP_TRUE: FOC = FPC = ISD::SETTRUE; break; + default: + llvm_unreachable("Invalid FCmp predicate opcode!"); + FOC = FPC = ISD::SETFALSE; + break; + } + if (FiniteOnlyFPMath()) + return FOC; + else + return FPC; +} + +/// getICmpCondCode - Return the ISD condition code corresponding to +/// the given LLVM IR integer condition code. +/// +ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) { + switch (Pred) { + case ICmpInst::ICMP_EQ: return ISD::SETEQ; + case ICmpInst::ICMP_NE: return ISD::SETNE; + case ICmpInst::ICMP_SLE: return ISD::SETLE; + case ICmpInst::ICMP_ULE: return ISD::SETULE; + case ICmpInst::ICMP_SGE: return ISD::SETGE; + case ICmpInst::ICMP_UGE: return ISD::SETUGE; + case ICmpInst::ICMP_SLT: return ISD::SETLT; + case ICmpInst::ICMP_ULT: return ISD::SETULT; + case ICmpInst::ICMP_SGT: return ISD::SETGT; + case ICmpInst::ICMP_UGT: return ISD::SETUGT; + default: + llvm_unreachable("Invalid ICmp predicate opcode!"); + return ISD::SETNE; + } +} + +/// Test if the given instruction is in a position to be optimized +/// with a tail-call. This roughly means that it's in a block with +/// a return and there's nothing that needs to be scheduled +/// between it and the return. +/// +/// This function only tests target-independent requirements. +bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr, + const TargetLowering &TLI) { + const Instruction *I = CS.getInstruction(); + const BasicBlock *ExitBB = I->getParent(); + const TerminatorInst *Term = ExitBB->getTerminator(); + const ReturnInst *Ret = dyn_cast<ReturnInst>(Term); + const Function *F = ExitBB->getParent(); + + // The block must end in a return statement or unreachable. + // + // FIXME: Decline tailcall if it's not guaranteed and if the block ends in + // an unreachable, for now. The way tailcall optimization is currently + // implemented means it will add an epilogue followed by a jump. That is + // not profitable. Also, if the callee is a special function (e.g. + // longjmp on x86), it can end up causing miscompilation that has not + // been fully understood. + if (!Ret && + (!GuaranteedTailCallOpt || !isa<UnreachableInst>(Term))) return false; + + // If I will have a chain, make sure no other instruction that will have a + // chain interposes between I and the return. + if (I->mayHaveSideEffects() || I->mayReadFromMemory() || + !I->isSafeToSpeculativelyExecute()) + for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ; + --BBI) { + if (&*BBI == I) + break; + // Debug info intrinsics do not get in the way of tail call optimization. + if (isa<DbgInfoIntrinsic>(BBI)) + continue; + if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() || + !BBI->isSafeToSpeculativelyExecute()) + return false; + } + + // If the block ends with a void return or unreachable, it doesn't matter + // what the call's return type is. + if (!Ret || Ret->getNumOperands() == 0) return true; + + // If the return value is undef, it doesn't matter what the call's + // return type is. + if (isa<UndefValue>(Ret->getOperand(0))) return true; + + // Conservatively require the attributes of the call to match those of + // the return. Ignore noalias because it doesn't affect the call sequence. + unsigned CallerRetAttr = F->getAttributes().getRetAttributes(); + if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias) + return false; + + // It's not safe to eliminate the sign / zero extension of the return value. + if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt)) + return false; + + // Otherwise, make sure the unmodified return value of I is the return value. + for (const Instruction *U = dyn_cast<Instruction>(Ret->getOperand(0)); ; + U = dyn_cast<Instruction>(U->getOperand(0))) { + if (!U) + return false; + if (!U->hasOneUse()) + return false; + if (U == I) + break; + // Check for a truly no-op truncate. + if (isa<TruncInst>(U) && + TLI.isTruncateFree(U->getOperand(0)->getType(), U->getType())) + continue; + // Check for a truly no-op bitcast. + if (isa<BitCastInst>(U) && + (U->getOperand(0)->getType() == U->getType() || + (U->getOperand(0)->getType()->isPointerTy() && + U->getType()->isPointerTy()))) + continue; + // Otherwise it's not a true no-op. + return false; + } + + return true; +} + diff --git a/lib/CodeGen/AntiDepBreaker.h b/lib/CodeGen/AntiDepBreaker.h index 3ee30c6..086b757 100644 --- a/lib/CodeGen/AntiDepBreaker.h +++ b/lib/CodeGen/AntiDepBreaker.h @@ -39,9 +39,9 @@ public: /// basic-block region and break them by renaming registers. Return /// the number of anti-dependencies broken. /// - virtual unsigned BreakAntiDependencies(std::vector<SUnit>& SUnits, - MachineBasicBlock::iterator& Begin, - MachineBasicBlock::iterator& End, + virtual unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits, + MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, unsigned InsertPosIndex) =0; /// Observe - Update liveness information to account for the current diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index c86e241..ded4b3f 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -42,8 +42,13 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" +#include "llvm/Support/Timer.h" using namespace llvm; +static const char *DWARFGroupName = "DWARF Emission"; +static const char *DbgTimerName = "DWARF Debug Writer"; +static const char *EHTimerName = "DWARF Exception Writer"; + STATISTIC(EmittedInsts, "Number of machine instrs printed"); char AsmPrinter::ID = 0; @@ -56,6 +61,35 @@ static gcp_map_type &getGCMap(void *&P) { } +/// getGVAlignmentLog2 - Return the alignment to use for the specified global +/// value in log2 form. This rounds up to the preferred alignment if possible +/// and legal. +static unsigned getGVAlignmentLog2(const GlobalValue *GV, const TargetData &TD, + unsigned InBits = 0) { + unsigned NumBits = 0; + if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) + NumBits = TD.getPreferredAlignmentLog(GVar); + + // If InBits is specified, round it to it. + if (InBits > NumBits) + NumBits = InBits; + + // If the GV has a specified alignment, take it into account. + if (GV->getAlignment() == 0) + return NumBits; + + unsigned GVAlign = Log2_32(GV->getAlignment()); + + // If the GVAlign is larger than NumBits, or if we are required to obey + // NumBits because the GV has an assigned section, obey it. + if (GVAlign > NumBits || GV->hasSection()) + NumBits = GVAlign; + return NumBits; +} + + + + AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer) : MachineFunctionPass(&ID), TM(tm), MAI(tm.getMCAsmInfo()), @@ -88,7 +122,7 @@ unsigned AsmPrinter::getFunctionNumber() const { return MF->getFunctionNumber(); } -TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const { +const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const { return TM.getTargetLowering()->getObjFileLowering(); } @@ -222,8 +256,12 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM); const TargetData *TD = TM.getTargetData(); - unsigned Size = TD->getTypeAllocSize(GV->getType()->getElementType()); - unsigned AlignLog = TD->getPreferredAlignmentLog(GV); + uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType()); + + // If the alignment is specified, we *must* obey it. Overaligning a global + // with a specified alignment is a prompt way to break globals emitted to + // sections and expected to be contiguous (e.g. ObjC metadata). + unsigned AlignLog = getGVAlignmentLog2(GV, *TD); // Handle common and BSS local symbols (.lcomm). if (GVKind.isCommon() || GVKind.isBSSLocal()) { @@ -270,6 +308,8 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // Handle the zerofill directive on darwin, which is a special form of BSS // emission. if (GVKind.isBSSExtern() && MAI->hasMachoZeroFillDirective()) { + if (Size == 0) Size = 1; // zerofill of 0 bytes is undefined. + // .globl _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); // .zerofill __DATA, __common, _foo, 400, 5 @@ -347,8 +387,22 @@ void AsmPrinter::EmitFunctionHeader() { } // Emit pre-function debug and/or EH information. - if (DE) DE->BeginFunction(MF); - if (DD) DD->beginFunction(MF); + if (DE) { + if (TimePassesIsEnabled) { + NamedRegionTimer T(EHTimerName, DWARFGroupName); + DE->BeginFunction(MF); + } else { + DE->BeginFunction(MF); + } + } + if (DD) { + if (TimePassesIsEnabled) { + NamedRegionTimer T(DbgTimerName, DWARFGroupName); + DD->beginFunction(MF); + } else { + DD->beginFunction(MF); + } + } } /// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the @@ -434,7 +488,58 @@ static void EmitKill(const MachineInstr *MI, AsmPrinter &AP) { AP.OutStreamer.AddBlankLine(); } +/// EmitDebugValueComment - This method handles the target-independent form +/// of DBG_VALUE, returning true if it was able to do so. A false return +/// means the target will need to handle MI in EmitInstruction. +static bool EmitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { + // This code handles only the 3-operand target-independent form. + if (MI->getNumOperands() != 3) + return false; + SmallString<128> Str; + raw_svector_ostream OS(Str); + OS << '\t' << AP.MAI->getCommentString() << "DEBUG_VALUE: "; + + // cast away const; DIetc do not take const operands for some reason. + DIVariable V(const_cast<MDNode*>(MI->getOperand(2).getMetadata())); + if (V.getContext().isSubprogram()) + OS << DISubprogram(V.getContext().getNode()).getDisplayName() << ":"; + OS << V.getName() << " <- "; + + // Register or immediate value. Register 0 means undef. + if (MI->getOperand(0).isFPImm()) { + APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF()); + if (MI->getOperand(0).getFPImm()->getType()->isFloatTy()) { + OS << (double)APF.convertToFloat(); + } else if (MI->getOperand(0).getFPImm()->getType()->isDoubleTy()) { + OS << APF.convertToDouble(); + } else { + // There is no good way to print long double. Convert a copy to + // double. Ah well, it's only a comment. + bool ignored; + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, + &ignored); + OS << "(long double) " << APF.convertToDouble(); + } + } else if (MI->getOperand(0).isImm()) { + OS << MI->getOperand(0).getImm(); + } else { + assert(MI->getOperand(0).isReg() && "Unknown operand type"); + if (MI->getOperand(0).getReg() == 0) { + // Suppress offset, it is not meaningful here. + OS << "undef"; + // NOTE: Want this comment at start of line, don't emit with AddComment. + AP.OutStreamer.EmitRawText(OS.str()); + return true; + } + OS << AP.TM.getRegisterInfo()->getName(MI->getOperand(0).getReg()); + } + + OS << '+' << MI->getOperand(1).getImm(); + // NOTE: Want this comment at start of line, don't emit with AddComment. + AP.OutStreamer.EmitRawText(OS.str()); + return true; +} /// EmitFunctionBody - This method emits the body and trailer for a /// function. @@ -453,13 +558,20 @@ void AsmPrinter::EmitFunctionBody() { for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); II != IE; ++II) { // Print the assembly for the instruction. - if (!II->isLabel()) + if (!II->isLabel() && !II->isImplicitDef() && !II->isKill() && + !II->isDebugValue()) { HasAnyRealCode = true; - - ++EmittedInsts; - - if (ShouldPrintDebugScopes) - DD->beginScope(II); + ++EmittedInsts; + } + + if (ShouldPrintDebugScopes) { + if (TimePassesIsEnabled) { + NamedRegionTimer T(DbgTimerName, DWARFGroupName); + DD->beginScope(II); + } else { + DD->beginScope(II); + } + } if (isVerbose()) EmitComments(*II, OutStreamer.GetCommentOS()); @@ -473,6 +585,12 @@ void AsmPrinter::EmitFunctionBody() { case TargetOpcode::INLINEASM: EmitInlineAsm(II); break; + case TargetOpcode::DBG_VALUE: + if (isVerbose()) { + if (!EmitDebugValueComment(II, *this)) + EmitInstruction(II); + } + break; case TargetOpcode::IMPLICIT_DEF: if (isVerbose()) EmitImplicitDef(II, *this); break; @@ -484,16 +602,29 @@ void AsmPrinter::EmitFunctionBody() { break; } - if (ShouldPrintDebugScopes) - DD->endScope(II); + if (ShouldPrintDebugScopes) { + if (TimePassesIsEnabled) { + NamedRegionTimer T(DbgTimerName, DWARFGroupName); + DD->endScope(II); + } else { + DD->endScope(II); + } + } } } // If the function is empty and the object file uses .subsections_via_symbols, // then we need to emit *something* to the function body to prevent the - // labels from collapsing together. Just emit a 0 byte. - if (MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode) - OutStreamer.EmitIntValue(0, 1, 0/*addrspace*/); + // labels from collapsing together. Just emit a noop. + if (MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode) { + MCInst Noop; + TM.getInstrInfo()->getNoopForMachoTarget(Noop); + if (Noop.getOpcode()) { + OutStreamer.AddComment("avoids zero-length function"); + OutStreamer.EmitInstruction(Noop); + } else // Target not mc-ized yet. + OutStreamer.EmitRawText(StringRef("\tnop\n")); + } // Emit target-specific gunk after the function body. EmitFunctionBodyEnd(); @@ -514,8 +645,22 @@ void AsmPrinter::EmitFunctionBody() { } // Emit post-function debug information. - if (DD) DD->endFunction(MF); - if (DE) DE->EndFunction(); + if (DD) { + if (TimePassesIsEnabled) { + NamedRegionTimer T(DbgTimerName, DWARFGroupName); + DD->endFunction(MF); + } else { + DD->endFunction(MF); + } + } + if (DE) { + if (TimePassesIsEnabled) { + NamedRegionTimer T(EHTimerName, DWARFGroupName); + DE->EndFunction(); + } else { + DE->EndFunction(); + } + } MMI->EndFunction(); // Print out jump tables referenced by the function. @@ -524,6 +669,12 @@ void AsmPrinter::EmitFunctionBody() { OutStreamer.AddBlankLine(); } +/// getDebugValueLocation - Get location information encoded by DBG_VALUE +/// operands. +MachineLocation AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const { + // Target specific DBG_VALUE instructions are handled by each target. + return MachineLocation(); +} bool AsmPrinter::doFinalization(Module &M) { // Emit global variables. @@ -533,11 +684,21 @@ bool AsmPrinter::doFinalization(Module &M) { // Finalize debug and EH information. if (DE) { - DE->EndModule(); + if (TimePassesIsEnabled) { + NamedRegionTimer T(EHTimerName, DWARFGroupName); + DE->EndModule(); + } else { + DE->EndModule(); + } delete DE; DE = 0; } if (DD) { - DD->endModule(); + if (TimePassesIsEnabled) { + NamedRegionTimer T(DbgTimerName, DWARFGroupName); + DD->endModule(); + } else { + DD->endModule(); + } delete DD; DD = 0; } @@ -595,7 +756,7 @@ bool AsmPrinter::doFinalization(Module &M) { // to be executable. Some targets have a directive to declare this. Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline"); if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty()) - if (MCSection *S = MAI->getNonexecutableStackSection(OutContext)) + if (const MCSection *S = MAI->getNonexecutableStackSection(OutContext)) OutStreamer.SwitchSection(S); // Allow the target to emit any magic that it wants at the end of the file, @@ -877,7 +1038,7 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { unsigned Align = Log2_32(TD->getPointerPrefAlignment()); if (GV->getName() == "llvm.global_ctors") { OutStreamer.SwitchSection(getObjFileLowering().getStaticCtorSection()); - EmitAlignment(Align, 0); + EmitAlignment(Align); EmitXXStructorList(GV->getInitializer()); if (TM.getRelocationModel() == Reloc::Static && @@ -891,7 +1052,7 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { if (GV->getName() == "llvm.global_dtors") { OutStreamer.SwitchSection(getObjFileLowering().getStaticDtorSection()); - EmitAlignment(Align, 0); + EmitAlignment(Align); EmitXXStructorList(GV->getInitializer()); if (TM.getRelocationModel() == Reloc::Static && @@ -984,30 +1145,49 @@ void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo, OutStreamer.EmitSymbolValue(SetLabel, Size, 0/*AddrSpace*/); } +/// EmitLabelOffsetDifference - Emit something like ".long Hi+Offset-Lo" +/// where the size in bytes of the directive is specified by Size and Hi/Lo +/// specify the labels. This implicitly uses .set if it is available. +void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset, + const MCSymbol *Lo, unsigned Size) + const { + + // Emit Hi+Offset - Lo + // Get the Hi+Offset expression. + const MCExpr *Plus = + MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Hi, OutContext), + MCConstantExpr::Create(Offset, OutContext), + OutContext); + + // Get the Hi+Offset-Lo expression. + const MCExpr *Diff = + MCBinaryExpr::CreateSub(Plus, + MCSymbolRefExpr::Create(Lo, OutContext), + OutContext); + + if (!MAI->hasSetDirective()) + OutStreamer.EmitValue(Diff, 4, 0/*AddrSpace*/); + else { + // Otherwise, emit with .set (aka assignment). + MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++); + OutStreamer.EmitAssignment(SetLabel, Diff); + OutStreamer.EmitSymbolValue(SetLabel, 4, 0/*AddrSpace*/); + } +} + //===----------------------------------------------------------------------===// // EmitAlignment - Emit an alignment directive to the specified power of // two boundary. For example, if you pass in 3 here, you will get an 8 // byte alignment. If a global value is specified, and if that global has -// an explicit alignment requested, it will unconditionally override the -// alignment request. However, if ForcedAlignBits is specified, this value -// has final say: the ultimate alignment will be the max of ForcedAlignBits -// and the alignment computed with NumBits and the global. +// an explicit alignment requested, it will override the alignment request +// if required for correctness. // -// The algorithm is: -// Align = NumBits; -// if (GV && GV->hasalignment) Align = GV->getalignment(); -// Align = std::max(Align, ForcedAlignBits); -// -void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV, - unsigned ForcedAlignBits, - bool UseFillExpr) const { - if (GV && GV->getAlignment()) - NumBits = Log2_32(GV->getAlignment()); - NumBits = std::max(NumBits, ForcedAlignBits); +void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV) const { + if (GV) NumBits = getGVAlignmentLog2(GV, *TM.getTargetData(), NumBits); - if (NumBits == 0) return; // No need to emit alignment. + if (NumBits == 0) return; // 1-byte aligned: no need to emit alignment. if (getCurrentSection()->getKind().isText()) OutStreamer.EmitCodeAlignment(1 << NumBits); @@ -1015,6 +1195,10 @@ void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV, OutStreamer.EmitValueToAlignment(1 << NumBits, 0, 1, 0); } +//===----------------------------------------------------------------------===// +// Constant emission. +//===----------------------------------------------------------------------===// + /// LowerConstant - Lower the specified LLVM Constant to an MCExpr. /// static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { @@ -1142,12 +1326,15 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { } } +static void EmitGlobalConstantImpl(const Constant *C, unsigned AddrSpace, + AsmPrinter &AP); + static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace, AsmPrinter &AP) { if (AddrSpace != 0 || !CA->isString()) { // Not a string. Print the values in successive locations for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) - AP.EmitGlobalConstant(CA->getOperand(i), AddrSpace); + EmitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP); return; } @@ -1163,7 +1350,7 @@ static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace, static void EmitGlobalConstantVector(const ConstantVector *CV, unsigned AddrSpace, AsmPrinter &AP) { for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i) - AP.EmitGlobalConstant(CV->getOperand(i), AddrSpace); + EmitGlobalConstantImpl(CV->getOperand(i), AddrSpace, AP); } static void EmitGlobalConstantStruct(const ConstantStruct *CS, @@ -1183,7 +1370,7 @@ static void EmitGlobalConstantStruct(const ConstantStruct *CS, SizeSoFar += FieldSize + PadSize; // Now print the actual field value. - AP.EmitGlobalConstant(Field, AddrSpace); + EmitGlobalConstantImpl(Field, AddrSpace, AP); // Insert padding - this may include padding to increase the size of the // current field up to the ABI size (if the struct is not packed) as well @@ -1203,7 +1390,7 @@ static void EmitGlobalConstantUnion(const ConstantUnion *CU, unsigned FilledSize = TD->getTypeAllocSize(Contents->getType()); // Print the actually filled part - AP.EmitGlobalConstant(Contents, AddrSpace); + EmitGlobalConstantImpl(Contents, AddrSpace, AP); // And pad with enough zeroes AP.OutStreamer.EmitZeros(Size-FilledSize, AddrSpace); @@ -1236,7 +1423,7 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, if (CFP->getType()->isX86_FP80Ty()) { // all long double variants are printed as hex - // api needed to prevent premature destruction + // API needed to prevent premature destruction APInt API = CFP->getValueAPF().bitcastToAPInt(); const uint64_t *p = API.getRawData(); if (AP.isVerbose()) { @@ -1266,8 +1453,8 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, assert(CFP->getType()->isPPC_FP128Ty() && "Floating point constant type not handled"); - // All long double variants are printed as hex api needed to prevent - // premature destruction. + // All long double variants are printed as hex + // API needed to prevent premature destruction. APInt API = CFP->getValueAPF().bitcastToAPInt(); const uint64_t *p = API.getRawData(); if (AP.TM.getTargetData()->isBigEndian()) { @@ -1295,57 +1482,68 @@ static void EmitGlobalConstantLargeInt(const ConstantInt *CI, } } -/// EmitGlobalConstant - Print a general LLVM constant to the .s file. -void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) { +static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, + AsmPrinter &AP) { if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV)) { - uint64_t Size = TM.getTargetData()->getTypeAllocSize(CV->getType()); - if (Size == 0) Size = 1; // An empty "_foo:" followed by a section is undef. - return OutStreamer.EmitZeros(Size, AddrSpace); + uint64_t Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType()); + return AP.OutStreamer.EmitZeros(Size, AddrSpace); } if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { - unsigned Size = TM.getTargetData()->getTypeAllocSize(CV->getType()); + unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType()); switch (Size) { case 1: case 2: case 4: case 8: - if (isVerbose()) - OutStreamer.GetCommentOS() << format("0x%llx\n", CI->getZExtValue()); - OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace); + if (AP.isVerbose()) + AP.OutStreamer.GetCommentOS() << format("0x%llx\n", CI->getZExtValue()); + AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace); return; default: - EmitGlobalConstantLargeInt(CI, AddrSpace, *this); + EmitGlobalConstantLargeInt(CI, AddrSpace, AP); return; } } if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) - return EmitGlobalConstantArray(CVA, AddrSpace, *this); + return EmitGlobalConstantArray(CVA, AddrSpace, AP); if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) - return EmitGlobalConstantStruct(CVS, AddrSpace, *this); + return EmitGlobalConstantStruct(CVS, AddrSpace, AP); if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) - return EmitGlobalConstantFP(CFP, AddrSpace, *this); + return EmitGlobalConstantFP(CFP, AddrSpace, AP); if (isa<ConstantPointerNull>(CV)) { - unsigned Size = TM.getTargetData()->getTypeAllocSize(CV->getType()); - OutStreamer.EmitIntValue(0, Size, AddrSpace); + unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType()); + AP.OutStreamer.EmitIntValue(0, Size, AddrSpace); return; } if (const ConstantUnion *CVU = dyn_cast<ConstantUnion>(CV)) - return EmitGlobalConstantUnion(CVU, AddrSpace, *this); + return EmitGlobalConstantUnion(CVU, AddrSpace, AP); if (const ConstantVector *V = dyn_cast<ConstantVector>(CV)) - return EmitGlobalConstantVector(V, AddrSpace, *this); + return EmitGlobalConstantVector(V, AddrSpace, AP); // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it // thread the streamer with EmitValue. - OutStreamer.EmitValue(LowerConstant(CV, *this), - TM.getTargetData()->getTypeAllocSize(CV->getType()), - AddrSpace); + AP.OutStreamer.EmitValue(LowerConstant(CV, AP), + AP.TM.getTargetData()->getTypeAllocSize(CV->getType()), + AddrSpace); +} + +/// EmitGlobalConstant - Print a general LLVM constant to the .s file. +void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) { + uint64_t Size = TM.getTargetData()->getTypeAllocSize(CV->getType()); + if (Size) + EmitGlobalConstantImpl(CV, AddrSpace, *this); + else if (MAI->hasSubsectionsViaSymbols()) { + // If the global has zero size, emit a single byte so that two labels don't + // look like they are at the same location. + OutStreamer.EmitIntValue(0, 1, AddrSpace); + } } void AsmPrinter::EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { @@ -1613,7 +1811,7 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) { return GMP; } - llvm_report_error("no GCMetadataPrinter registered for GC: " + Twine(Name)); + report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name)); return 0; } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 255bcd4..37d10e5 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -13,6 +13,7 @@ #define DEBUG_TYPE "asm-printer" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/Constants.h" #include "llvm/InlineAsm.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" @@ -74,15 +75,15 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const { AsmParser Parser(SrcMgr, OutContext, OutStreamer, *MAI); OwningPtr<TargetAsmParser> TAP(TM.getTarget().createAsmParser(Parser)); if (!TAP) - llvm_report_error("Inline asm not supported by this streamer because" - " we don't have an asm parser for this target\n"); + report_fatal_error("Inline asm not supported by this streamer because" + " we don't have an asm parser for this target\n"); Parser.setTargetParser(*TAP.get()); // Don't implicitly switch to the text section before the asm. int Res = Parser.Run(/*NoInitialTextSection*/ true, /*NoFinalize*/ true); if (Res && !HasDiagHandler) - llvm_report_error("Error parsing inline asm\n"); + report_fatal_error("Error parsing inline asm\n"); } @@ -97,7 +98,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { unsigned NumDefs = 0; for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef(); ++NumDefs) - assert(NumDefs != NumOperands-1 && "No asm string?"); + assert(NumDefs != NumOperands-2 && "No asm string?"); assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?"); @@ -123,6 +124,20 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+ MAI->getInlineAsmStart()); + // Get the !srcloc metadata node if we have it, and decode the loc cookie from + // it. + unsigned LocCookie = 0; + for (unsigned i = MI->getNumOperands(); i != 0; --i) { + if (MI->getOperand(i-1).isMetadata()) + if (const MDNode *SrcLoc = MI->getOperand(i-1).getMetadata()) + if (SrcLoc->getNumOperands() != 0) + if (const ConstantInt *CI = + dyn_cast<ConstantInt>(SrcLoc->getOperand(0))) { + LocCookie = CI->getZExtValue(); + break; + } + } + // Emit the inline asm to a temporary string so we can emit it through // EmitInlineAsm. SmallString<256> StringData; @@ -167,10 +182,9 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { break; case '(': // $( -> same as GCC's { character. ++LastEmitted; // Consume '(' character. - if (CurVariant != -1) { - llvm_report_error("Nested variants found in inline asm string: '" - + std::string(AsmStr) + "'"); - } + if (CurVariant != -1) + report_fatal_error("Nested variants found in inline asm string: '" + + Twine(AsmStr) + "'"); CurVariant = 0; // We're in the first variant now. break; case '|': @@ -204,8 +218,8 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { const char *StrStart = LastEmitted; const char *StrEnd = strchr(StrStart, '}'); if (StrEnd == 0) - llvm_report_error(Twine("Unterminated ${:foo} operand in inline asm" - " string: '") + Twine(AsmStr) + "'"); + report_fatal_error("Unterminated ${:foo} operand in inline asm" + " string: '" + Twine(AsmStr) + "'"); std::string Val(StrStart, StrEnd); PrintSpecial(MI, OS, Val.c_str()); @@ -219,8 +233,8 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { unsigned Val; if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val)) - llvm_report_error("Bad $ operand number in inline asm string: '" - + std::string(AsmStr) + "'"); + report_fatal_error("Bad $ operand number in inline asm string: '" + + Twine(AsmStr) + "'"); LastEmitted = IDEnd; char Modifier[2] = { 0, 0 }; @@ -231,22 +245,22 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { if (*LastEmitted == ':') { ++LastEmitted; // Consume ':' character. if (*LastEmitted == 0) - llvm_report_error("Bad ${:} expression in inline asm string: '" + - std::string(AsmStr) + "'"); + report_fatal_error("Bad ${:} expression in inline asm string: '" + + Twine(AsmStr) + "'"); Modifier[0] = *LastEmitted; ++LastEmitted; // Consume modifier character. } if (*LastEmitted != '}') - llvm_report_error("Bad ${} expression in inline asm string: '" - + std::string(AsmStr) + "'"); + report_fatal_error("Bad ${} expression in inline asm string: '" + + Twine(AsmStr) + "'"); ++LastEmitted; // Consume '}' character. } if (Val >= NumOperands-1) - llvm_report_error("Invalid $ operand number in inline asm string: '" - + std::string(AsmStr) + "'"); + report_fatal_error("Invalid $ operand number in inline asm string: '" + + Twine(AsmStr) + "'"); // Okay, we finally have a value number. Ask the target to print this // operand! @@ -273,7 +287,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { OS << *MI->getOperand(OpNo).getMBB()->getSymbol(); else { AsmPrinter *AP = const_cast<AsmPrinter*>(this); - if ((OpFlags & 7) == 4) { + if (InlineAsm::isMemKind(OpFlags)) { Error = AP->PrintAsmMemoryOperand(MI, OpNo, AsmPrinterVariant, Modifier[0] ? Modifier : 0, OS); @@ -286,9 +300,8 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { if (Error) { std::string msg; raw_string_ostream Msg(msg); - Msg << "Invalid operand found in inline asm: '" << AsmStr << "'\n"; - MI->print(Msg); - llvm_report_error(Msg.str()); + Msg << "invalid operand in inline asm: '" << AsmStr << "'"; + MMI->getModule()->getContext().emitError(LocCookie, Msg.str()); } } break; @@ -296,7 +309,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { } } OS << '\n' << (char)0; // null terminate string. - EmitInlineAsm(OS.str(), 0/*no loc cookie*/); + EmitInlineAsm(OS.str(), LocCookie); // Emit the #NOAPP end marker. This has to happen even if verbose-asm isn't // enabled, so we use EmitRawText. @@ -334,7 +347,7 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS, raw_string_ostream Msg(msg); Msg << "Unknown special formatter '" << Code << "' for machine instr: " << *MI; - llvm_report_error(Msg.str()); + report_fatal_error(Msg.str()); } } diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt index afc482d..ca8b843 100644 --- a/lib/CodeGen/AsmPrinter/CMakeLists.txt +++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt @@ -7,3 +7,5 @@ add_llvm_library(LLVMAsmPrinter DwarfException.cpp OcamlGCPrinter.cpp ) + +target_link_libraries (LLVMAsmPrinter LLVMMCParser) diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h index 454326d..9cb8314 100644 --- a/lib/CodeGen/AsmPrinter/DIE.h +++ b/lib/CodeGen/AsmPrinter/DIE.h @@ -261,11 +261,12 @@ namespace llvm { /// virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; + uint64_t getValue() const { return Integer; } + /// SizeOf - Determine size of integer value in bytes. /// virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; - // Implement isa/cast/dyncast. static bool classof(const DIEInteger *) { return true; } static bool classof(const DIEValue *I) { return I->getType() == isInteger; } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index b472d1e..e9e9ba5 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -28,9 +28,11 @@ #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetOptions.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ValueHandle.h" @@ -39,6 +41,17 @@ #include "llvm/System/Path.h" using namespace llvm; +static cl::opt<bool> PrintDbgScope("print-dbgscope", cl::Hidden, + cl::desc("Print DbgScope information for each machine instruction")); + +static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden, + cl::desc("Disable debug info printing")); + +namespace { + const char *DWARFGroupName = "DWARF Emission"; + const char *DbgTimerName = "DWARF Debug Writer"; +} // end anonymous namespace + //===----------------------------------------------------------------------===// /// Configuration values for initial hash set sizes (log2). @@ -179,6 +192,12 @@ public: }; //===----------------------------------------------------------------------===// +/// DbgRange - This is used to track range of instructions with identical +/// debug info scope. +/// +typedef std::pair<const MachineInstr *, const MachineInstr *> DbgRange; + +//===----------------------------------------------------------------------===// /// DbgScope - This class is used to track scope information. /// class DbgScope { @@ -187,22 +206,21 @@ class DbgScope { // Location at which this scope is inlined. AssertingVH<MDNode> InlinedAtLocation; bool AbstractScope; // Abstract Scope - MCSymbol *StartLabel; // Label ID of the beginning of scope. - MCSymbol *EndLabel; // Label ID of the end of scope. const MachineInstr *LastInsn; // Last instruction of this scope. const MachineInstr *FirstInsn; // First instruction of this scope. + unsigned DFSIn, DFSOut; // Scopes defined in scope. Contents not owned. SmallVector<DbgScope *, 4> Scopes; // Variables declared in scope. Contents owned. SmallVector<DbgVariable *, 8> Variables; - + SmallVector<DbgRange, 4> Ranges; // Private state for dump() mutable unsigned IndentLevel; public: DbgScope(DbgScope *P, DIDescriptor D, MDNode *I = 0) : Parent(P), Desc(D), InlinedAtLocation(I), AbstractScope(false), - StartLabel(0), EndLabel(0), - LastInsn(0), FirstInsn(0), IndentLevel(0) {} + LastInsn(0), FirstInsn(0), + DFSIn(0), DFSOut(0), IndentLevel(0) {} virtual ~DbgScope(); // Accessors. @@ -211,18 +229,57 @@ public: DIDescriptor getDesc() const { return Desc; } MDNode *getInlinedAt() const { return InlinedAtLocation; } MDNode *getScopeNode() const { return Desc.getNode(); } - MCSymbol *getStartLabel() const { return StartLabel; } - MCSymbol *getEndLabel() const { return EndLabel; } const SmallVector<DbgScope *, 4> &getScopes() { return Scopes; } const SmallVector<DbgVariable *, 8> &getVariables() { return Variables; } - void setStartLabel(MCSymbol *S) { StartLabel = S; } - void setEndLabel(MCSymbol *E) { EndLabel = E; } - void setLastInsn(const MachineInstr *MI) { LastInsn = MI; } - const MachineInstr *getLastInsn() { return LastInsn; } - void setFirstInsn(const MachineInstr *MI) { FirstInsn = MI; } + const SmallVector<DbgRange, 4> &getRanges() { return Ranges; } + + /// openInsnRange - This scope covers instruction range starting from MI. + void openInsnRange(const MachineInstr *MI) { + if (!FirstInsn) + FirstInsn = MI; + + if (Parent) + Parent->openInsnRange(MI); + } + + /// extendInsnRange - Extend the current instruction range covered by + /// this scope. + void extendInsnRange(const MachineInstr *MI) { + assert (FirstInsn && "MI Range is not open!"); + LastInsn = MI; + if (Parent) + Parent->extendInsnRange(MI); + } + + /// closeInsnRange - Create a range based on FirstInsn and LastInsn collected + /// until now. This is used when a new scope is encountered while walking + /// machine instructions. + void closeInsnRange(DbgScope *NewScope = NULL) { + assert (LastInsn && "Last insn missing!"); + Ranges.push_back(DbgRange(FirstInsn, LastInsn)); + FirstInsn = NULL; + LastInsn = NULL; + // If Parent dominates NewScope then do not close Parent's instruction + // range. + if (Parent && (!NewScope || !Parent->dominates(NewScope))) + Parent->closeInsnRange(NewScope); + } + void setAbstractScope() { AbstractScope = true; } bool isAbstractScope() const { return AbstractScope; } - const MachineInstr *getFirstInsn() { return FirstInsn; } + + // Depth First Search support to walk and mainpluate DbgScope hierarchy. + unsigned getDFSOut() const { return DFSOut; } + void setDFSOut(unsigned O) { DFSOut = O; } + unsigned getDFSIn() const { return DFSIn; } + void setDFSIn(unsigned I) { DFSIn = I; } + bool dominates(const DbgScope *S) { + if (S == this) + return true; + if (DFSIn < S->getDFSIn() && DFSOut > S->getDFSOut()) + return true; + return false; + } /// addScope - Add a scope to the scope. /// @@ -232,48 +289,11 @@ public: /// void addVariable(DbgVariable *V) { Variables.push_back(V); } - void fixInstructionMarkers(DenseMap<const MachineInstr *, - unsigned> &MIIndexMap) { - assert(getFirstInsn() && "First instruction is missing!"); - - // Use the end of last child scope as end of this scope. - const SmallVector<DbgScope *, 4> &Scopes = getScopes(); - const MachineInstr *LastInsn = getFirstInsn(); - unsigned LIndex = 0; - if (Scopes.empty()) { - assert(getLastInsn() && "Inner most scope does not have last insn!"); - return; - } - for (SmallVector<DbgScope *, 4>::const_iterator SI = Scopes.begin(), - SE = Scopes.end(); SI != SE; ++SI) { - DbgScope *DS = *SI; - DS->fixInstructionMarkers(MIIndexMap); - const MachineInstr *DSLastInsn = DS->getLastInsn(); - unsigned DSI = MIIndexMap[DSLastInsn]; - if (DSI > LIndex) { - LastInsn = DSLastInsn; - LIndex = DSI; - } - } - - unsigned CurrentLastInsnIndex = 0; - if (const MachineInstr *CL = getLastInsn()) - CurrentLastInsnIndex = MIIndexMap[CL]; - unsigned FIndex = MIIndexMap[getFirstInsn()]; - - // Set LastInsn as the last instruction for this scope only if - // it follows - // 1) this scope's first instruction and - // 2) current last instruction for this scope, if any. - if (LIndex >= CurrentLastInsnIndex && LIndex >= FIndex) - setLastInsn(LastInsn); - } - #ifndef NDEBUG void dump() const; #endif }; - + } // end llvm namespace #ifndef NDEBUG @@ -282,7 +302,6 @@ void DbgScope::dump() const { err.indent(IndentLevel); MDNode *N = Desc.getNode(); N->dump(); - err << " [" << StartLabel << ", " << EndLabel << "]\n"; if (AbstractScope) err << "Abstract Scope\n"; @@ -305,22 +324,23 @@ DbgScope::~DbgScope() { DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) : Asm(A), MMI(Asm->MMI), ModuleCU(0), AbbreviationsSet(InitAbbreviationsSetSize), - CurrentFnDbgScope(0), DebugTimer(0) { + CurrentFnDbgScope(0), PrevLabel(NULL) { NextStringPoolNumber = 0; DwarfFrameSectionSym = DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0; DwarfStrSectionSym = TextSectionSym = 0; - - if (TimePassesIsEnabled) - DebugTimer = new Timer("Dwarf Debug Writer"); - - beginModule(M); + DwarfDebugRangeSectionSym = 0; + FunctionBeginSym = 0; + if (TimePassesIsEnabled) { + NamedRegionTimer T(DbgTimerName, DWARFGroupName); + beginModule(M); + } else { + beginModule(M); + } } DwarfDebug::~DwarfDebug() { for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j) DIEBlocks[j]->~DIEBlock(); - - delete DebugTimer; } MCSymbol *DwarfDebug::getStringPoolEntry(StringRef Str) { @@ -792,6 +812,64 @@ void DwarfDebug::addAddress(DIE *Die, unsigned Attribute, addBlock(Die, Attribute, 0, Block); } +/// addRegisterAddress - Add register location entry in variable DIE. +bool DwarfDebug::addRegisterAddress(DIE *Die, DbgVariable *DV, + const MachineOperand &MO) { + assert (MO.isReg() && "Invalid machine operand!"); + if (!MO.getReg()) + return false; + MachineLocation Location; + Location.set(MO.getReg()); + addAddress(Die, dwarf::DW_AT_location, Location); + if (MCSymbol *VS = DV->getDbgValueLabel()) + addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS); + return true; +} + +/// addConstantValue - Add constant value entry in variable DIE. +bool DwarfDebug::addConstantValue(DIE *Die, DbgVariable *DV, + const MachineOperand &MO) { + assert (MO.isImm() && "Invalid machine operand!"); + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + unsigned Imm = MO.getImm(); + addUInt(Block, 0, dwarf::DW_FORM_udata, Imm); + addBlock(Die, dwarf::DW_AT_const_value, 0, Block); + if (MCSymbol *VS = DV->getDbgValueLabel()) + addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS); + return true; +} + +/// addConstantFPValue - Add constant value entry in variable DIE. +bool DwarfDebug::addConstantFPValue(DIE *Die, DbgVariable *DV, + const MachineOperand &MO) { + assert (MO.isFPImm() && "Invalid machine operand!"); + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + APFloat FPImm = MO.getFPImm()->getValueAPF(); + + // Get the raw data form of the floating point. + const APInt FltVal = FPImm.bitcastToAPInt(); + const char *FltPtr = (const char*)FltVal.getRawData(); + + int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte. + bool LittleEndian = Asm->getTargetData().isLittleEndian(); + int Incr = (LittleEndian ? 1 : -1); + int Start = (LittleEndian ? 0 : NumBytes - 1); + int Stop = (LittleEndian ? NumBytes : -1); + + // Output the constant to DWARF one byte at a time. + for (; Start != Stop; Start += Incr) + addUInt(Block, 0, dwarf::DW_FORM_data1, + (unsigned char)0xFF & FltPtr[Start]); + + addBlock(Die, dwarf::DW_AT_const_value, 0, Block); + + if (MCSymbol *VS = DV->getDbgValueLabel()) + addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, + VS); + return true; +} + + /// addToContextOwner - Add Die into the list of its context owner's children. void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) { if (Context.isType()) { @@ -1250,59 +1328,16 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) { if (SP.isArtificial()) addUInt(SPDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + if (!SP.isLocalToUnit()) + addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + + if (SP.isOptimized()) + addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); + // DW_TAG_inlined_subroutine may refer to this DIE. ModuleCU->insertDIE(SP.getNode(), SPDie); - return SPDie; -} - -/// getUpdatedDbgScope - Find DbgScope assicated with the instruction. -/// Update scope hierarchy. Create abstract scope if required. -DbgScope *DwarfDebug::getUpdatedDbgScope(MDNode *N, const MachineInstr *MI, - MDNode *InlinedAt) { - assert(N && "Invalid Scope encoding!"); - assert(MI && "Missing machine instruction!"); - bool isAConcreteScope = InlinedAt != 0; - - DbgScope *NScope = NULL; - - if (InlinedAt) - NScope = DbgScopeMap.lookup(InlinedAt); - else - NScope = DbgScopeMap.lookup(N); - assert(NScope && "Unable to find working scope!"); - - if (NScope->getFirstInsn()) - return NScope; - DbgScope *Parent = NULL; - if (isAConcreteScope) { - DILocation IL(InlinedAt); - Parent = getUpdatedDbgScope(IL.getScope().getNode(), MI, - IL.getOrigLocation().getNode()); - assert(Parent && "Unable to find Parent scope!"); - NScope->setParent(Parent); - Parent->addScope(NScope); - } else if (DIDescriptor(N).isLexicalBlock()) { - DILexicalBlock DB(N); - Parent = getUpdatedDbgScope(DB.getContext().getNode(), MI, InlinedAt); - NScope->setParent(Parent); - Parent->addScope(NScope); - } - - NScope->setFirstInsn(MI); - - if (!Parent && !InlinedAt) { - StringRef SPName = DISubprogram(N).getLinkageName(); - if (SPName == Asm->MF->getFunction()->getName()) - CurrentFnDbgScope = NScope; - } - - if (isAConcreteScope) { - ConcreteScopes[InlinedAt] = NScope; - getOrCreateAbstractScope(N); - } - - return NScope; + return SPDie; } DbgScope *DwarfDebug::getOrCreateAbstractScope(MDNode *N) { @@ -1332,6 +1367,19 @@ DbgScope *DwarfDebug::getOrCreateAbstractScope(MDNode *N) { return AScope; } +/// isSubprogramContext - Return true if Context is either a subprogram +/// or another context nested inside a subprogram. +static bool isSubprogramContext(MDNode *Context) { + if (!Context) + return false; + DIDescriptor D(Context); + if (D.isSubprogram()) + return true; + if (D.isType()) + return isSubprogramContext(DIType(Context).getContext().getNode()); + return false; +} + /// updateSubprogramScopeDIE - Find DIE for the given subprogram and /// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes. /// If there are global variables in this scope then create and insert @@ -1347,7 +1395,8 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) { // expect specification DIE in parent function. So avoid creating // specification DIE for a function defined inside a function. if (SP.isDefinition() && !SP.getContext().isCompileUnit() && - !SP.getContext().isFile() && !SP.getContext().isSubprogram()) { + !SP.getContext().isFile() && + !isSubprogramContext(SP.getContext().getNode())) { addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); // Add arguments. @@ -1378,31 +1427,48 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) { MachineLocation Location(RI->getFrameRegister(*Asm->MF)); addAddress(SPDie, dwarf::DW_AT_frame_base, Location); - if (!DISubprogram(SPNode).isLocalToUnit()) - addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); - return SPDie; } /// constructLexicalScope - Construct new DW_TAG_lexical_block /// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels. DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) { - MCSymbol *Start = Scope->getStartLabel(); - MCSymbol *End = Scope->getEndLabel(); - if (Start == 0 || End == 0) return 0; - assert(Start->isDefined() && "Invalid starting label for an inlined scope!"); - assert(End->isDefined() && "Invalid end label for an inlined scope!"); - DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block); if (Scope->isAbstractScope()) return ScopeDIE; - addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, - Start ? Start : Asm->GetTempSymbol("func_begin", - Asm->getFunctionNumber())); - addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, - End ? End : Asm->GetTempSymbol("func_end",Asm->getFunctionNumber())); + const SmallVector<DbgRange, 4> &Ranges = Scope->getRanges(); + if (Ranges.empty()) + return 0; + + SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(); + if (Ranges.size() > 1) { + // .debug_range section has not been laid out yet. Emit offset in + // .debug_range as a uint, size 4, for now. emitDIE will handle + // DW_AT_ranges appropriately. + addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, + DebugRangeSymbols.size() * Asm->getTargetData().getPointerSize()); + for (SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(), + RE = Ranges.end(); RI != RE; ++RI) { + DebugRangeSymbols.push_back(LabelsBeforeInsn.lookup(RI->first)); + DebugRangeSymbols.push_back(LabelsAfterInsn.lookup(RI->second)); + } + DebugRangeSymbols.push_back(NULL); + DebugRangeSymbols.push_back(NULL); + return ScopeDIE; + } + + MCSymbol *Start = LabelsBeforeInsn.lookup(RI->first); + MCSymbol *End = LabelsAfterInsn.lookup(RI->second); + + if (Start == 0 || End == 0) return 0; + + assert(Start->isDefined() && "Invalid starting label for an inlined scope!"); + assert(End->isDefined() && "Invalid end label for an inlined scope!"); + + addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, Start); + addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, End); return ScopeDIE; } @@ -1411,14 +1477,28 @@ DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) { /// a function. Construct DIE to represent this concrete inlined copy /// of the function. DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) { - MCSymbol *StartLabel = Scope->getStartLabel(); - MCSymbol *EndLabel = Scope->getEndLabel(); - if (StartLabel == 0 || EndLabel == 0) return 0; - + + const SmallVector<DbgRange, 4> &Ranges = Scope->getRanges(); + assert (Ranges.empty() == false + && "DbgScope does not have instruction markers!"); + + // FIXME : .debug_inlined section specification does not clearly state how + // to emit inlined scope that is split into multiple instruction ranges. + // For now, use first instruction range and emit low_pc/high_pc pair and + // corresponding .debug_inlined section entry for this pair. + SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(); + MCSymbol *StartLabel = LabelsBeforeInsn.lookup(RI->first); + MCSymbol *EndLabel = LabelsAfterInsn.lookup(RI->second); + + if (StartLabel == 0 || EndLabel == 0) { + assert (0 && "Unexpected Start and End labels for a inlined scope!"); + return 0; + } assert(StartLabel->isDefined() && "Invalid starting label for an inlined scope!"); assert(EndLabel->isDefined() && "Invalid end label for an inlined scope!"); + if (!Scope->getScopeNode()) return NULL; DIScope DS(Scope->getScopeNode()); @@ -1512,59 +1592,34 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { // Add variable address. if (!Scope->isAbstractScope()) { // Check if variable is described by DBG_VALUE instruction. - if (const MachineInstr *DbgValueInsn = DV->getDbgValue()) { - if (DbgValueInsn->getNumOperands() == 3) { - // FIXME : Handle getNumOperands != 3 - if (DbgValueInsn->getOperand(0).getType() - == MachineOperand::MO_Register - && DbgValueInsn->getOperand(0).getReg()) { - MachineLocation Location; - Location.set(DbgValueInsn->getOperand(0).getReg()); + if (const MachineInstr *DVInsn = DV->getDbgValue()) { + bool updated = false; + // FIXME : Handle getNumOperands != 3 + if (DVInsn->getNumOperands() == 3) { + if (DVInsn->getOperand(0).isReg()) + updated = addRegisterAddress(VariableDie, DV, DVInsn->getOperand(0)); + else if (DVInsn->getOperand(0).isImm()) + updated = addConstantValue(VariableDie, DV, DVInsn->getOperand(0)); + else if (DVInsn->getOperand(0).isFPImm()) + updated = addConstantFPValue(VariableDie, DV, DVInsn->getOperand(0)); + } else { + MachineLocation Location = Asm->getDebugValueLocation(DVInsn); + if (Location.getReg()) { addAddress(VariableDie, dwarf::DW_AT_location, Location); if (MCSymbol *VS = DV->getDbgValueLabel()) addLabel(VariableDie, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS); - } else if (DbgValueInsn->getOperand(0).getType() == - MachineOperand::MO_Immediate) { - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - unsigned Imm = DbgValueInsn->getOperand(0).getImm(); - addUInt(Block, 0, dwarf::DW_FORM_udata, Imm); - addBlock(VariableDie, dwarf::DW_AT_const_value, 0, Block); - if (MCSymbol *VS = DV->getDbgValueLabel()) - addLabel(VariableDie, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, - VS); - } else if (DbgValueInsn->getOperand(0).getType() == - MachineOperand::MO_FPImmediate) { - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - APFloat FPImm = DbgValueInsn->getOperand(0).getFPImm()->getValueAPF(); - - // Get the raw data form of the floating point. - const APInt FltVal = FPImm.bitcastToAPInt(); - const char *FltPtr = (const char*)FltVal.getRawData(); - - unsigned NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte. - bool LittleEndian = Asm->getTargetData().isLittleEndian(); - int Incr = (LittleEndian ? 1 : -1); - int Start = (LittleEndian ? 0 : NumBytes - 1); - int Stop = (LittleEndian ? NumBytes : -1); - - // Output the constant to DWARF one byte at a time. - for (; Start != Stop; Start += Incr) - addUInt(Block, 0, dwarf::DW_FORM_data1, - (unsigned char)0xFF & FltPtr[Start]); - - addBlock(VariableDie, dwarf::DW_AT_const_value, 0, Block); - - if (MCSymbol *VS = DV->getDbgValueLabel()) - addLabel(VariableDie, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, - VS); - } else { - //FIXME : Handle other operand types. - delete VariableDie; - return NULL; + updated = true; } - } - } else { + } + if (!updated) { + // If variableDie is not updated then DBG_VALUE instruction does not + // have valid variable info. + delete VariableDie; + return NULL; + } + } + else { MachineLocation Location; unsigned FrameReg; const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); @@ -1600,7 +1655,8 @@ void DwarfDebug::addPubTypes(DISubprogram SP) { if (!ATy.isValid()) continue; DICompositeType CATy = getDICompositeType(ATy); - if (DIDescriptor(CATy.getNode()).Verify() && !CATy.getName().empty()) { + if (DIDescriptor(CATy.getNode()).Verify() && !CATy.getName().empty() + && !CATy.isForwardDecl()) { if (DIEEntry *Entry = ModuleCU->getDIEEntry(CATy.getNode())) ModuleCU->addGlobalType(CATy.getName(), Entry->getEntry()); } @@ -1716,9 +1772,9 @@ void DwarfDebug::constructCompileUnit(MDNode *N) { addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data1, DIUnit.getLanguage()); addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN); - addLabel(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, TextSectionSym); - addLabel(Die, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, - Asm->GetTempSymbol("text_end")); + // Use DW_AT_entry_pc instead of DW_AT_low_pc/DW_AT_high_pc pair. This + // simplifies debug range entries. + addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_data4, 0); // DW_AT_stmt_list is a offset of line number information for this // compile unit in debug_line section. It is always zero when only one // compile unit is emitted in one object file. @@ -1766,7 +1822,8 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) { // Do not create specification DIE if context is either compile unit // or a subprogram. if (DI_GV.isDefinition() && !GVContext.isCompileUnit() && - !GVContext.isFile() && !GVContext.isSubprogram()) { + !GVContext.isFile() && + !isSubprogramContext(GVContext.getNode())) { // Create specification DIE. DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable); addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, @@ -1791,7 +1848,8 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) { ModuleCU->addGlobal(DI_GV.getName(), VariableDie); DIType GTy = DI_GV.getType(); - if (GTy.isCompositeType() && !GTy.getName().empty()) { + if (GTy.isCompositeType() && !GTy.getName().empty() + && !GTy.isForwardDecl()) { DIEEntry *Entry = ModuleCU->getDIEEntry(GTy.getNode()); assert(Entry && "Missing global type!"); ModuleCU->addGlobalType(GTy.getName(), Entry->getEntry()); @@ -1829,7 +1887,8 @@ void DwarfDebug::constructSubprogramDIE(MDNode *N) { /// content. Create global DIEs and emit initial debug info sections. /// This is inovked by the target AsmPrinter. void DwarfDebug::beginModule(Module *M) { - TimeRegion Timer(DebugTimer); + if (DisableDebugInfoPrinting) + return; DebugInfoFinder DbgFinder; DbgFinder.processModule(*M); @@ -1893,10 +1952,7 @@ void DwarfDebug::beginModule(Module *M) { /// endModule - Emit all Dwarf sections that should come after the content. /// void DwarfDebug::endModule() { - if (!ModuleCU) - return; - - TimeRegion Timer(DebugTimer); + if (!ModuleCU) return; // Attach DW_AT_inline attribute with inlined subprogram DIEs. for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(), @@ -1905,11 +1961,6 @@ void DwarfDebug::endModule() { addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); } - // Insert top level DIEs. - for (SmallVector<DIE *, 4>::iterator TI = TopLevelDIEsVector.begin(), - TE = TopLevelDIEsVector.end(); TI != TE; ++TI) - ModuleCU->getCUDie()->addChild(*TI); - for (DenseMap<DIE *, MDNode *>::iterator CI = ContainingTypeMap.begin(), CE = ContainingTypeMap.end(); CI != CE; ++CI) { DIE *SPDie = CI->first; @@ -1918,8 +1969,6 @@ void DwarfDebug::endModule() { DIE *NDie = ModuleCU->getDIE(N); if (!NDie) continue; addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie); - // FIXME - This is not the correct approach. - //addDIEEntry(NDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie } // Standard sections final addresses. @@ -2062,11 +2111,13 @@ void DwarfDebug::collectVariableInfo() { if (!MInsn->isDebugValue()) continue; - // FIXME : Lift this restriction. - if (MInsn->getNumOperands() != 3) + // Ignore Undef values. + if (MInsn->getOperand(0).isReg() && !MInsn->getOperand(0).getReg()) continue; - DIVariable DV((MDNode*)(MInsn->getOperand(MInsn->getNumOperands() - - 1).getMetadata())); + + DIVariable DV( + const_cast<MDNode *>(MInsn->getOperand(MInsn->getNumOperands() - 1) + .getMetadata())); if (DV.getTag() == dwarf::DW_TAG_arg_variable) { // FIXME Handle inlined subroutine arguments. DbgVariable *ArgVar = new DbgVariable(DV, MInsn, NULL); @@ -2102,10 +2153,6 @@ void DwarfDebug::beginScope(const MachineInstr *MI) { if (DL.isUnknown()) return; - // Check and update last known location info. - if (DL == PrevInstLoc) - return; - MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext()); // FIXME: Should only verify each scope once! @@ -2117,78 +2164,174 @@ void DwarfDebug::beginScope(const MachineInstr *MI) { DenseMap<const MachineInstr *, DbgVariable *>::iterator DI = DbgValueStartMap.find(MI); if (DI != DbgValueStartMap.end()) { - MCSymbol *Label = recordSourceLine(DL.getLine(), DL.getCol(), Scope); - PrevInstLoc = DL; + MCSymbol *Label = NULL; + if (DL == PrevInstLoc) + Label = PrevLabel; + else { + Label = recordSourceLine(DL.getLine(), DL.getCol(), Scope); + PrevInstLoc = DL; + PrevLabel = Label; + } DI->second->setDbgValueLabel(Label); } return; } // Emit a label to indicate location change. This is used for line - // table even if this instruction does start a new scope. - MCSymbol *Label = recordSourceLine(DL.getLine(), DL.getCol(), Scope); - PrevInstLoc = DL; - - // update DbgScope if this instruction starts a new scope. - InsnToDbgScopeMapTy::iterator I = DbgScopeBeginMap.find(MI); - if (I == DbgScopeBeginMap.end()) - return; + // table even if this instruction does not start a new scope. + MCSymbol *Label = NULL; + if (DL == PrevInstLoc) + Label = PrevLabel; + else { + Label = recordSourceLine(DL.getLine(), DL.getCol(), Scope); + PrevInstLoc = DL; + PrevLabel = Label; + } - ScopeVector &SD = I->second; - for (ScopeVector::iterator SDI = SD.begin(), SDE = SD.end(); - SDI != SDE; ++SDI) - (*SDI)->setStartLabel(Label); + // If this instruction begins a scope then note down corresponding label. + if (InsnsBeginScopeSet.count(MI) != 0) + LabelsBeforeInsn[MI] = Label; } /// endScope - Process end of a scope. void DwarfDebug::endScope(const MachineInstr *MI) { - // Ignore DBG_VALUE instruction. - if (MI->isDebugValue()) - return; - - // Check location. - DebugLoc DL = MI->getDebugLoc(); - if (DL.isUnknown()) - return; - - // Emit a label and update DbgScope if this instruction ends a scope. - InsnToDbgScopeMapTy::iterator I = DbgScopeEndMap.find(MI); - if (I == DbgScopeEndMap.end()) - return; - - MCSymbol *Label = MMI->getContext().CreateTempSymbol(); - Asm->OutStreamer.EmitLabel(Label); - - SmallVector<DbgScope*, 2> &SD = I->second; - for (SmallVector<DbgScope *, 2>::iterator SDI = SD.begin(), SDE = SD.end(); - SDI != SDE; ++SDI) - (*SDI)->setEndLabel(Label); - return; + if (InsnsEndScopeSet.count(MI) != 0) { + // Emit a label if this instruction ends a scope. + MCSymbol *Label = MMI->getContext().CreateTempSymbol(); + Asm->OutStreamer.EmitLabel(Label); + LabelsAfterInsn[MI] = Label; + } } -/// createDbgScope - Create DbgScope for the scope. -void DwarfDebug::createDbgScope(MDNode *Scope, MDNode *InlinedAt) { +/// getOrCreateDbgScope - Create DbgScope for the scope. +DbgScope *DwarfDebug::getOrCreateDbgScope(MDNode *Scope, MDNode *InlinedAt) { if (!InlinedAt) { DbgScope *WScope = DbgScopeMap.lookup(Scope); if (WScope) - return; + return WScope; WScope = new DbgScope(NULL, DIDescriptor(Scope), NULL); DbgScopeMap.insert(std::make_pair(Scope, WScope)); - if (DIDescriptor(Scope).isLexicalBlock()) - createDbgScope(DILexicalBlock(Scope).getContext().getNode(), NULL); - return; + if (DIDescriptor(Scope).isLexicalBlock()) { + DbgScope *Parent = + getOrCreateDbgScope(DILexicalBlock(Scope).getContext().getNode(), NULL); + WScope->setParent(Parent); + Parent->addScope(WScope); + } + + if (!WScope->getParent()) { + StringRef SPName = DISubprogram(Scope).getLinkageName(); + if (SPName == Asm->MF->getFunction()->getName()) + CurrentFnDbgScope = WScope; + } + + return WScope; } DbgScope *WScope = DbgScopeMap.lookup(InlinedAt); if (WScope) - return; + return WScope; WScope = new DbgScope(NULL, DIDescriptor(Scope), InlinedAt); DbgScopeMap.insert(std::make_pair(InlinedAt, WScope)); DILocation DL(InlinedAt); - createDbgScope(DL.getScope().getNode(), DL.getOrigLocation().getNode()); + DbgScope *Parent = + getOrCreateDbgScope(DL.getScope().getNode(), DL.getOrigLocation().getNode()); + WScope->setParent(Parent); + Parent->addScope(WScope); + + ConcreteScopes[InlinedAt] = WScope; + getOrCreateAbstractScope(Scope); + + return WScope; } +/// hasValidLocation - Return true if debug location entry attached with +/// machine instruction encodes valid location info. +static bool hasValidLocation(LLVMContext &Ctx, + const MachineInstr *MInsn, + MDNode *&Scope, MDNode *&InlinedAt) { + if (MInsn->isDebugValue()) + return false; + DebugLoc DL = MInsn->getDebugLoc(); + if (DL.isUnknown()) return false; + + MDNode *S = DL.getScope(Ctx); + + // There is no need to create another DIE for compile unit. For all + // other scopes, create one DbgScope now. This will be translated + // into a scope DIE at the end. + if (DIScope(S).isCompileUnit()) return false; + + Scope = S; + InlinedAt = DL.getInlinedAt(Ctx); + return true; +} + +/// calculateDominanceGraph - Calculate dominance graph for DbgScope +/// hierarchy. +static void calculateDominanceGraph(DbgScope *Scope) { + assert (Scope && "Unable to calculate scop edominance graph!"); + SmallVector<DbgScope *, 4> WorkStack; + WorkStack.push_back(Scope); + unsigned Counter = 0; + while (!WorkStack.empty()) { + DbgScope *WS = WorkStack.back(); + const SmallVector<DbgScope *, 4> &Children = WS->getScopes(); + bool visitedChildren = false; + for (SmallVector<DbgScope *, 4>::const_iterator SI = Children.begin(), + SE = Children.end(); SI != SE; ++SI) { + DbgScope *ChildScope = *SI; + if (!ChildScope->getDFSOut()) { + WorkStack.push_back(ChildScope); + visitedChildren = true; + ChildScope->setDFSIn(++Counter); + break; + } + } + if (!visitedChildren) { + WorkStack.pop_back(); + WS->setDFSOut(++Counter); + } + } +} + +/// printDbgScopeInfo - Print DbgScope info for each machine instruction. +static +void printDbgScopeInfo(LLVMContext &Ctx, const MachineFunction *MF, + DenseMap<const MachineInstr *, DbgScope *> &MI2ScopeMap) +{ +#ifndef NDEBUG + unsigned PrevDFSIn = 0; + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); + I != E; ++I) { + for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); + II != IE; ++II) { + const MachineInstr *MInsn = II; + MDNode *Scope = NULL; + MDNode *InlinedAt = NULL; + + // Check if instruction has valid location information. + if (hasValidLocation(Ctx, MInsn, Scope, InlinedAt)) { + dbgs() << " [ "; + if (InlinedAt) + dbgs() << "*"; + DenseMap<const MachineInstr *, DbgScope *>::iterator DI = + MI2ScopeMap.find(MInsn); + if (DI != MI2ScopeMap.end()) { + DbgScope *S = DI->second; + dbgs() << S->getDFSIn(); + PrevDFSIn = S->getDFSIn(); + } else + dbgs() << PrevDFSIn; + } else + dbgs() << " [ x" << PrevDFSIn; + dbgs() << " ]"; + MInsn->dump(); + } + dbgs() << "\n"; + } +#endif +} /// extractScopeInformation - Scan machine instructions in this function /// and collect DbgScopes. Return true, if at least one scope was found. bool DwarfDebug::extractScopeInformation() { @@ -2197,71 +2340,100 @@ bool DwarfDebug::extractScopeInformation() { if (!DbgScopeMap.empty()) return false; - DenseMap<const MachineInstr *, unsigned> MIIndexMap; - unsigned MIIndex = 0; - LLVMContext &Ctx = Asm->MF->getFunction()->getContext(); - // Scan each instruction and create scopes. First build working set of scopes. + LLVMContext &Ctx = Asm->MF->getFunction()->getContext(); + SmallVector<DbgRange, 4> MIRanges; + DenseMap<const MachineInstr *, DbgScope *> MI2ScopeMap; + MDNode *PrevScope = NULL; + MDNode *PrevInlinedAt = NULL; + const MachineInstr *RangeBeginMI = NULL; + const MachineInstr *PrevMI = NULL; for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end(); I != E; ++I) { for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); II != IE; ++II) { const MachineInstr *MInsn = II; - // FIXME : Remove DBG_VALUE check. - if (MInsn->isDebugValue()) continue; - MIIndexMap[MInsn] = MIIndex++; - - DebugLoc DL = MInsn->getDebugLoc(); - if (DL.isUnknown()) continue; - - MDNode *Scope = DL.getScope(Ctx); - - // There is no need to create another DIE for compile unit. For all - // other scopes, create one DbgScope now. This will be translated - // into a scope DIE at the end. - if (DIScope(Scope).isCompileUnit()) continue; - createDbgScope(Scope, DL.getInlinedAt(Ctx)); - } - } + MDNode *Scope = NULL; + MDNode *InlinedAt = NULL; + // Check if instruction has valid location information. + if (!hasValidLocation(Ctx, MInsn, Scope, InlinedAt)) { + PrevMI = MInsn; + continue; + } + + // If scope has not changed then skip this instruction. + if (Scope == PrevScope && PrevInlinedAt == InlinedAt) { + PrevMI = MInsn; + continue; + } - // Build scope hierarchy using working set of scopes. - for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end(); - I != E; ++I) { - for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); - II != IE; ++II) { - const MachineInstr *MInsn = II; - // FIXME : Remove DBG_VALUE check. - if (MInsn->isDebugValue()) continue; - DebugLoc DL = MInsn->getDebugLoc(); - if (DL.isUnknown()) continue; + if (RangeBeginMI) { + // If we have alread seen a beginning of a instruction range and + // current instruction scope does not match scope of first instruction + // in this range then create a new instruction range. + DbgRange R(RangeBeginMI, PrevMI); + MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope, PrevInlinedAt); + MIRanges.push_back(R); + } - MDNode *Scope = DL.getScope(Ctx); - if (Scope == 0) continue; + // This is a beginning of a new instruction range. + RangeBeginMI = MInsn; - // There is no need to create another DIE for compile unit. For all - // other scopes, create one DbgScope now. This will be translated - // into a scope DIE at the end. - if (DIScope(Scope).isCompileUnit()) continue; - DbgScope *DScope = getUpdatedDbgScope(Scope, MInsn, DL.getInlinedAt(Ctx)); - DScope->setLastInsn(MInsn); + // Reset previous markers. + PrevMI = MInsn; + PrevScope = Scope; + PrevInlinedAt = InlinedAt; } } + // Create last instruction range. + if (RangeBeginMI && PrevMI && PrevScope) { + DbgRange R(RangeBeginMI, PrevMI); + MIRanges.push_back(R); + MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope, PrevInlinedAt); + } + if (!CurrentFnDbgScope) return false; - CurrentFnDbgScope->fixInstructionMarkers(MIIndexMap); + calculateDominanceGraph(CurrentFnDbgScope); + if (PrintDbgScope) + printDbgScopeInfo(Ctx, Asm->MF, MI2ScopeMap); + + // Find ranges of instructions covered by each DbgScope; + DbgScope *PrevDbgScope = NULL; + for (SmallVector<DbgRange, 4>::const_iterator RI = MIRanges.begin(), + RE = MIRanges.end(); RI != RE; ++RI) { + const DbgRange &R = *RI; + DbgScope *S = MI2ScopeMap.lookup(R.first); + assert (S && "Lost DbgScope for a machine instruction!"); + if (PrevDbgScope && !PrevDbgScope->dominates(S)) + PrevDbgScope->closeInsnRange(S); + S->openInsnRange(R.first); + S->extendInsnRange(R.second); + PrevDbgScope = S; + } - // Each scope has first instruction and last instruction to mark beginning - // and end of a scope respectively. Create an inverse map that list scopes - // starts (and ends) with an instruction. One instruction may start (or end) - // multiple scopes. Ignore scopes that are not reachable. + if (PrevDbgScope) + PrevDbgScope->closeInsnRange(); + + identifyScopeMarkers(); + + return !DbgScopeMap.empty(); +} + +/// identifyScopeMarkers() - +/// Each DbgScope has first instruction and last instruction to mark beginning +/// and end of a scope respectively. Create an inverse map that list scopes +/// starts (and ends) with an instruction. One instruction may start (or end) +/// multiple scopes. Ignore scopes that are not reachable. +void DwarfDebug::identifyScopeMarkers() { SmallVector<DbgScope *, 4> WorkList; WorkList.push_back(CurrentFnDbgScope); while (!WorkList.empty()) { DbgScope *S = WorkList.pop_back_val(); - + const SmallVector<DbgScope *, 4> &Children = S->getScopes(); if (!Children.empty()) for (SmallVector<DbgScope *, 4>::const_iterator SI = Children.begin(), @@ -2270,45 +2442,51 @@ bool DwarfDebug::extractScopeInformation() { if (S->isAbstractScope()) continue; - const MachineInstr *MI = S->getFirstInsn(); - assert(MI && "DbgScope does not have first instruction!"); - - InsnToDbgScopeMapTy::iterator IDI = DbgScopeBeginMap.find(MI); - if (IDI != DbgScopeBeginMap.end()) - IDI->second.push_back(S); - else - DbgScopeBeginMap[MI].push_back(S); - - MI = S->getLastInsn(); - assert(MI && "DbgScope does not have last instruction!"); - IDI = DbgScopeEndMap.find(MI); - if (IDI != DbgScopeEndMap.end()) - IDI->second.push_back(S); - else - DbgScopeEndMap[MI].push_back(S); + + const SmallVector<DbgRange, 4> &Ranges = S->getRanges(); + if (Ranges.empty()) + continue; + for (SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(), + RE = Ranges.end(); RI != RE; ++RI) { + assert(RI->first && "DbgRange does not have first instruction!"); + assert(RI->second && "DbgRange does not have second instruction!"); + InsnsBeginScopeSet.insert(RI->first); + InsnsEndScopeSet.insert(RI->second); + } } +} - return !DbgScopeMap.empty(); +/// FindFirstDebugLoc - Find the first debug location in the function. This +/// is intended to be an approximation for the source position of the +/// beginning of the function. +static DebugLoc FindFirstDebugLoc(const MachineFunction *MF) { + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); + I != E; ++I) + for (MachineBasicBlock::const_iterator MBBI = I->begin(), MBBE = I->end(); + MBBI != MBBE; ++MBBI) { + DebugLoc DL = MBBI->getDebugLoc(); + if (!DL.isUnknown()) + return DL; + } + return DebugLoc(); } /// beginFunction - Gather pre-function debug information. Assumes being /// emitted immediately after the function entry point. void DwarfDebug::beginFunction(const MachineFunction *MF) { if (!MMI->hasDebugInfo()) return; - - TimeRegion Timer(DebugTimer); - if (!extractScopeInformation()) - return; + if (!extractScopeInformation()) return; collectVariableInfo(); + FunctionBeginSym = Asm->GetTempSymbol("func_begin", + Asm->getFunctionNumber()); // Assumes in correct section after the entry point. - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("func_begin", - Asm->getFunctionNumber())); + Asm->OutStreamer.EmitLabel(FunctionBeginSym); // Emit label for the implicitly defined dbg.stoppoint at the start of the // function. - DebugLoc FDL = MF->getDefaultDebugLoc(); + DebugLoc FDL = FindFirstDebugLoc(MF); if (FDL.isUnknown()) return; MDNode *Scope = FDL.getScope(MF->getFunction()->getContext()); @@ -2329,10 +2507,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { /// endFunction - Gather and emit post-function debug information. /// void DwarfDebug::endFunction(const MachineFunction *MF) { - if (!MMI->hasDebugInfo() || - DbgScopeMap.empty()) return; - - TimeRegion Timer(DebugTimer); + if (!MMI->hasDebugInfo() || DbgScopeMap.empty()) return; if (CurrentFnDbgScope) { // Define end label for subprogram. @@ -2355,8 +2530,13 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { AE = AbstractScopesList.end(); AI != AE; ++AI) constructScopeDIE(*AI); - constructScopeDIE(CurrentFnDbgScope); + DIE *CurFnDIE = constructScopeDIE(CurrentFnDbgScope); + if (!DisableFramePointerElim(*MF)) + addUInt(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr, + dwarf::DW_FORM_flag, 1); + + DebugFrames.push_back(FunctionDebugFrameInfo(Asm->getFunctionNumber(), MMI->getFrameMoves())); } @@ -2364,22 +2544,23 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { // Clear debug info CurrentFnDbgScope = NULL; DeleteContainerSeconds(DbgScopeMap); - DbgScopeBeginMap.clear(); - DbgScopeEndMap.clear(); + InsnsBeginScopeSet.clear(); + InsnsEndScopeSet.clear(); DbgValueStartMap.clear(); ConcreteScopes.clear(); DeleteContainerSeconds(AbstractScopes); AbstractScopesList.clear(); AbstractVariables.clear(); + LabelsBeforeInsn.clear(); + LabelsAfterInsn.clear(); Lines.clear(); + PrevLabel = NULL; } /// recordSourceLine - Register a source line with debug info. Returns the /// unique label that was emitted and which provides correspondence to /// the source line list. MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, MDNode *S) { - TimeRegion Timer(DebugTimer); - StringRef Dir; StringRef Fn; @@ -2407,17 +2588,6 @@ MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, MDNode *S) { return Label; } -/// getOrCreateSourceID - Public version of GetOrCreateSourceID. This can be -/// timed. Look up the source id with the given directory and source file -/// names. If none currently exists, create a new id and insert it in the -/// SourceIds map. This can update DirectoryNames and SourceFileNames maps as -/// well. -unsigned DwarfDebug::getOrCreateSourceID(const std::string &DirName, - const std::string &FileName) { - TimeRegion Timer(DebugTimer); - return GetOrCreateSourceID(DirName.c_str(), FileName.c_str()); -} - //===----------------------------------------------------------------------===// // Emit Methods //===----------------------------------------------------------------------===// @@ -2481,7 +2651,6 @@ void DwarfDebug::computeSizeAndOffsets() { sizeof(int8_t); // Pointer Size (in bytes) computeSizeAndOffset(ModuleCU->getCUDie(), Offset, true); - CompileUnitOffsets[ModuleCU] = 0; } /// EmitSectionSym - Switch to the specified MCSection and emit an assembler @@ -2522,7 +2691,8 @@ void DwarfDebug::EmitSectionLabels() { EmitSectionSym(Asm, TLOF.getDwarfPubTypesSection()); DwarfStrSectionSym = EmitSectionSym(Asm, TLOF.getDwarfStrSection(), "section_str"); - EmitSectionSym(Asm, TLOF.getDwarfRangesSection()); + DwarfDebugRangeSectionSym = EmitSectionSym(Asm, TLOF.getDwarfRangesSection(), + "debug_range"); TextSectionSym = EmitSectionSym(Asm, TLOF.getTextSection(), "text_begin"); EmitSectionSym(Asm, TLOF.getDataSection()); @@ -2566,6 +2736,15 @@ void DwarfDebug::emitDIE(DIE *Die) { Asm->EmitInt32(Addr); break; } + case dwarf::DW_AT_ranges: { + // DW_AT_range Value encodes offset in debug_range section. + DIEInteger *V = cast<DIEInteger>(Values[i]); + Asm->EmitLabelOffsetDifference(DwarfDebugRangeSectionSym, + V->getValue(), + DwarfDebugRangeSectionSym, + 4); + break; + } default: // Emit an attribute using the defined form. Values[i]->EmitValue(Asm, Form); @@ -2900,7 +3079,7 @@ void DwarfDebug::emitCommonDebugFrame() { Asm->EmitFrameMoves(Moves, 0, false); - Asm->EmitAlignment(2, 0, 0, false); + Asm->EmitAlignment(2); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_frame_common_end")); } @@ -2942,7 +3121,7 @@ emitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo) { Asm->EmitFrameMoves(DebugFrameInfo.Moves, FuncBeginSym, false); - Asm->EmitAlignment(2, 0, 0, false); + Asm->EmitAlignment(2); Asm->OutStreamer.EmitLabel(DebugFrameEnd); } @@ -3087,7 +3266,16 @@ void DwarfDebug::EmitDebugARanges() { void DwarfDebug::emitDebugRanges() { // Start the dwarf ranges section. Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfRangesSection()); + Asm->getObjFileLowering().getDwarfRangesSection()); + unsigned char Size = Asm->getTargetData().getPointerSize(); + for (SmallVector<const MCSymbol *, 8>::iterator + I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end(); + I != E; ++I) { + if (*I) + Asm->OutStreamer.EmitSymbolValue(const_cast<MCSymbol*>(*I), Size, 0); + else + Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0); + } } /// emitDebugMacInfo - Emit visible names into a debug macinfo section. diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index c7baf5f..b964b23 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -32,8 +32,8 @@ class DbgVariable; class MachineFrameInfo; class MachineLocation; class MachineModuleInfo; +class MachineOperand; class MCAsmInfo; -class Timer; class DIEAbbrev; class DIE; class DIEBlock; @@ -174,24 +174,14 @@ class DwarfDebug { /// (at the end of the module) as DW_AT_inline. SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs; + /// ContainingTypeMap - This map is used to keep track of subprogram DIEs that + /// need DW_AT_containing_type attribute. This attribute points to a DIE that + /// corresponds to the MDNode mapped with the subprogram DIE. DenseMap<DIE *, MDNode *> ContainingTypeMap; - /// AbstractSubprogramDIEs - Collection of abstruct subprogram DIEs. - SmallPtrSet<DIE *, 4> AbstractSubprogramDIEs; - - /// TopLevelDIEs - Collection of top level DIEs. - SmallPtrSet<DIE *, 4> TopLevelDIEs; - SmallVector<DIE *, 4> TopLevelDIEsVector; - typedef SmallVector<DbgScope *, 2> ScopeVector; - typedef DenseMap<const MachineInstr *, ScopeVector> - InsnToDbgScopeMapTy; - - /// DbgScopeBeginMap - Maps instruction with a list of DbgScopes it starts. - InsnToDbgScopeMapTy DbgScopeBeginMap; - - /// DbgScopeEndMap - Maps instruction with a list DbgScopes it ends. - InsnToDbgScopeMapTy DbgScopeEndMap; + SmallPtrSet<const MachineInstr *, 8> InsnsBeginScopeSet; + SmallPtrSet<const MachineInstr *, 8> InsnsEndScopeSet; /// InlineInfo - Keep track of inlined functions and their location. This /// information is used to populate debug_inlined section. @@ -199,18 +189,21 @@ class DwarfDebug { DenseMap<MDNode*, SmallVector<InlineInfoLabels, 4> > InlineInfo; SmallVector<MDNode *, 4> InlinedSPNodes; - /// CompileUnitOffsets - A vector of the offsets of the compile units. This is - /// used when calculating the "origin" of a concrete instance of an inlined - /// function. - DenseMap<CompileUnit *, unsigned> CompileUnitOffsets; + /// LabelsBeforeInsn - Maps instruction with label emitted before + /// instruction. + DenseMap<const MachineInstr *, MCSymbol *> LabelsBeforeInsn; + + /// LabelsAfterInsn - Maps instruction with label emitted after + /// instruction. + DenseMap<const MachineInstr *, MCSymbol *> LabelsAfterInsn; + + SmallVector<const MCSymbol *, 8> DebugRangeSymbols; /// Previous instruction's location information. This is used to determine /// label location to indicate scope boundries in dwarf debug info. DebugLoc PrevInstLoc; + MCSymbol *PrevLabel; - /// DebugTimer - Timer for the Dwarf debug writer. - Timer *DebugTimer; - struct FunctionDebugFrameInfo { unsigned Number; std::vector<MachineMove> Moves; @@ -225,8 +218,9 @@ class DwarfDebug { // the beginning of each supported dwarf section. These are used to form // section offsets and are created by EmitSectionLabels. MCSymbol *DwarfFrameSectionSym, *DwarfInfoSectionSym, *DwarfAbbrevSectionSym; - MCSymbol *DwarfStrSectionSym, *TextSectionSym; - + MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym; + + MCSymbol *FunctionBeginSym; private: /// getSourceDirectoryAndFileIds - Return the directory and file ids that @@ -311,6 +305,15 @@ private: void addAddress(DIE *Die, unsigned Attribute, const MachineLocation &Location); + /// addRegisterAddress - Add register location entry in variable DIE. + bool addRegisterAddress(DIE *Die, DbgVariable *DV, const MachineOperand &MO); + + /// addConstantValue - Add constant value entry in variable DIE. + bool addConstantValue(DIE *Die, DbgVariable *DV, const MachineOperand &MO); + + /// addConstantFPValue - Add constant value entry in variable DIE. + bool addConstantFPValue(DIE *Die, DbgVariable *DV, const MachineOperand &MO); + /// addComplexAddress - Start with the address based on the location provided, /// and generate the DWARF information necessary to find the actual variable /// (navigating the extra location information encoded in the type) based on @@ -376,13 +379,8 @@ private: /// createSubprogramDIE - Create new DIE using SP. DIE *createSubprogramDIE(const DISubprogram &SP, bool MakeDecl = false); - /// getUpdatedDbgScope - Find or create DbgScope assicated with - /// the instruction. Initialize scope and update scope hierarchy. - DbgScope *getUpdatedDbgScope(MDNode *N, const MachineInstr *MI, - MDNode *InlinedAt); - - /// createDbgScope - Create DbgScope for the scope. - void createDbgScope(MDNode *Scope, MDNode *InlinedAt); + /// getOrCreateDbgScope - Create DbgScope for the scope. + DbgScope *getOrCreateDbgScope(MDNode *Scope, MDNode *InlinedAt); DbgScope *getOrCreateAbstractScope(MDNode *N); @@ -531,14 +529,10 @@ private: return Lines.size(); } - /// getOrCreateSourceID - Public version of GetOrCreateSourceID. This can be - /// timed. Look up the source id with the given directory and source file - /// names. If none currently exists, create a new id and insert it in the - /// SourceIds map. This can update DirectoryNames and SourceFileNames maps as - /// well. - unsigned getOrCreateSourceID(const std::string &DirName, - const std::string &FileName); - + /// identifyScopeMarkers() - Indentify instructions that are marking + /// beginning of or end of a scope. + void identifyScopeMarkers(); + /// extractScopeInformation - Scan machine instructions in this function /// and collect DbgScopes. Return true, if atleast one scope was found. bool extractScopeInformation(); diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp index 72c97a4..0ff1036 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -33,7 +33,6 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Support/Timer.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" @@ -41,15 +40,9 @@ using namespace llvm; DwarfException::DwarfException(AsmPrinter *A) : Asm(A), MMI(Asm->MMI), shouldEmitTable(false), shouldEmitMoves(false), - shouldEmitTableModule(false), shouldEmitMovesModule(false), - ExceptionTimer(0) { - if (TimePassesIsEnabled) - ExceptionTimer = new Timer("DWARF Exception Writer"); -} + shouldEmitTableModule(false), shouldEmitMovesModule(false) {} -DwarfException::~DwarfException() { - delete ExceptionTimer; -} +DwarfException::~DwarfException() {} /// EmitCIE - Emit a Common Information Entry (CIE). This holds information that /// is shared among many Frame Description Entries. There is at least one CIE @@ -159,8 +152,7 @@ void DwarfException::EmitCIE(const Function *PersonalityFn, unsigned Index) { // On Darwin the linker honors the alignment of eh_frame, which means it must // be 8-byte on 64-bit targets to match what gcc does. Otherwise you get // holes which confuse readers of eh_frame. - Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3, - 0, 0, false); + Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common_end", Index)); } @@ -262,8 +254,7 @@ void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) { // On Darwin the linker honors the alignment of eh_frame, which means it // must be 8-byte on 64-bit targets to match what gcc does. Otherwise you // get holes which confuse readers of eh_frame. - Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3, - 0, 0, false); + Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_end", EHFrameInfo.Number)); @@ -432,7 +423,7 @@ bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) { if (!MO.isGlobal()) continue; - Function *F = dyn_cast<Function>(MO.getGlobal()); + const Function *F = dyn_cast<Function>(MO.getGlobal()); if (F == 0) continue; if (SawFunc) { @@ -586,7 +577,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, /// 3. Type ID table contains references to all the C++ typeinfo for all /// catches in the function. This tables is reverse indexed base 1. void DwarfException::EmitExceptionTable() { - const std::vector<GlobalVariable *> &TypeInfos = MMI->getTypeInfos(); + const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos(); const std::vector<unsigned> &FilterIds = MMI->getFilterIds(); const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads(); @@ -692,7 +683,7 @@ void DwarfException::EmitExceptionTable() { // Begin the exception table. Asm->OutStreamer.SwitchSection(LSDASection); - Asm->EmitAlignment(2, 0, 0, false); + Asm->EmitAlignment(2); // Emit the LSDA. MCSymbol *GCCETSym = @@ -868,7 +859,7 @@ void DwarfException::EmitExceptionTable() { Asm->OutStreamer.AddComment("-- Catch TypeInfos --"); Asm->OutStreamer.AddBlankLine(); } - for (std::vector<GlobalVariable *>::const_reverse_iterator + for (std::vector<const GlobalVariable *>::const_reverse_iterator I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) { const GlobalVariable *GV = *I; @@ -891,7 +882,7 @@ void DwarfException::EmitExceptionTable() { Asm->EmitULEB128(TypeID, TypeID != 0 ? "Exception specification" : 0); } - Asm->EmitAlignment(2, 0, 0, false); + Asm->EmitAlignment(2); } /// EndModule - Emit all exception information that should come after the @@ -903,9 +894,7 @@ void DwarfException::EndModule() { if (!shouldEmitMovesModule && !shouldEmitTableModule) return; - TimeRegion Timer(ExceptionTimer); - - const std::vector<Function *> Personalities = MMI->getPersonalities(); + const std::vector<const Function *> Personalities = MMI->getPersonalities(); for (unsigned I = 0, E = Personalities.size(); I < E; ++I) EmitCIE(Personalities[I], I); @@ -918,7 +907,6 @@ void DwarfException::EndModule() { /// BeginFunction - Gather pre-function exception information. Assumes it's /// being emitted immediately after the function entry point. void DwarfException::BeginFunction(const MachineFunction *MF) { - TimeRegion Timer(ExceptionTimer); shouldEmitTable = shouldEmitMoves = false; // If any landing pads survive, we need an EH table. @@ -942,7 +930,6 @@ void DwarfException::BeginFunction(const MachineFunction *MF) { void DwarfException::EndFunction() { if (!shouldEmitMoves && !shouldEmitTable) return; - TimeRegion Timer(ExceptionTimer); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber())); diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h index f35c0b6..5839f8c 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/lib/CodeGen/AsmPrinter/DwarfException.h @@ -28,7 +28,6 @@ class MachineFunction; class MCAsmInfo; class MCExpr; class MCSymbol; -class Timer; class Function; class AsmPrinter; @@ -82,9 +81,6 @@ class DwarfException { /// should be emitted. bool shouldEmitMovesModule; - /// ExceptionTimer - Timer for the Dwarf exception writer. - Timer *ExceptionTimer; - /// EmitCIE - Emit a Common Information Entry (CIE). This holds information /// that is shared among many Frame Description Entries. There is at least /// one CIE in every non-empty .debug_frame section. diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index 1db178f..a8c3c7b 100644 --- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -109,13 +109,11 @@ void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) { uint64_t FrameSize = FI.getFrameSize(); if (FrameSize >= 1<<16) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "Function '" << FI.getFunction().getName() - << "' is too large for the ocaml GC! " - << "Frame size " << FrameSize << " >= 65536.\n"; - Msg << "(" << uintptr_t(&FI) << ")"; - llvm_report_error(Msg.str()); // Very rude! + // Very rude! + report_fatal_error("Function '" + FI.getFunction().getName() + + "' is too large for the ocaml GC! " + "Frame size " + Twine(FrameSize) + ">= 65536.\n" + "(" + Twine(uintptr_t(&FI)) + ")"); } AP.OutStreamer.AddComment("live roots for " + @@ -125,12 +123,10 @@ void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) { for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) { size_t LiveCount = FI.live_size(J); if (LiveCount >= 1<<16) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "Function '" << FI.getFunction().getName() - << "' is too large for the ocaml GC! " - << "Live root count " << LiveCount << " >= 65536."; - llvm_report_error(Msg.str()); // Very rude! + // Very rude! + report_fatal_error("Function '" + FI.getFunction().getName() + + "' is too large for the ocaml GC! " + "Live root count "+Twine(LiveCount)+" >= 65536."); } AP.OutStreamer.EmitSymbolValue(J->Label, IntPtrSize, 0); diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 8f51940..9dec22e 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -264,14 +264,8 @@ static unsigned HashMachineInstr(const MachineInstr *MI) { return Hash; } -/// HashEndOfMBB - Hash the last few instructions in the MBB. For blocks -/// with no successors, we hash two instructions, because cross-jumping -/// only saves code when at least two instructions are removed (since a -/// branch must be inserted). For blocks with a successor, one of the -/// two blocks to be tail-merged will end with a branch already, so -/// it gains to cross-jump even for one instruction. -static unsigned HashEndOfMBB(const MachineBasicBlock *MBB, - unsigned minCommonTailLength) { +/// HashEndOfMBB - Hash the last instruction in the MBB. +static unsigned HashEndOfMBB(const MachineBasicBlock *MBB) { MachineBasicBlock::const_iterator I = MBB->end(); if (I == MBB->begin()) return 0; // Empty MBB. @@ -283,20 +277,8 @@ static unsigned HashEndOfMBB(const MachineBasicBlock *MBB, return 0; // MBB empty except for debug info. --I; } - unsigned Hash = HashMachineInstr(I); - if (I == MBB->begin() || minCommonTailLength == 1) - return Hash; // Single instr MBB. - - --I; - while (I->isDebugValue()) { - if (I==MBB->begin()) - return Hash; // MBB with single non-debug instr. - --I; - } - // Hash in the second-to-last instruction. - Hash ^= HashMachineInstr(I) << 2; - return Hash; + return HashMachineInstr(I); } /// ComputeCommonTailLength - Given two machine basic blocks, compute the number @@ -811,7 +793,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { MergePotentials.clear(); for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { if (I->succ_empty()) - MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I, 2U), I)); + MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I), I)); } // See if we can do any tail merging on those. @@ -897,8 +879,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // reinsert conditional branch only, for now TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond); } - MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB, 1U), - *P)); + MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P)); } } if (MergePotentials.size() >= 2) diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 62d1883..3e38872 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -1,4 +1,5 @@ add_llvm_library(LLVMCodeGen + Analysis.cpp AggressiveAntiDepBreaker.cpp BranchFolding.cpp CalcSpillWeights.cpp @@ -49,6 +50,7 @@ add_llvm_library(LLVMCodeGen ProcessImplicitDefs.cpp PrologEpilogInserter.cpp PseudoSourceValue.cpp + RegAllocFast.cpp RegAllocLinearScan.cpp RegAllocLocal.cpp RegAllocPBQP.cpp diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index 7d3de89..759fbaa 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -143,13 +143,13 @@ void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, /// CriticalPathStep - Return the next SUnit after SU on the bottom-up /// critical path. -static SDep *CriticalPathStep(SUnit *SU) { - SDep *Next = 0; +static const SDep *CriticalPathStep(const SUnit *SU) { + const SDep *Next = 0; unsigned NextDepth = 0; // Find the predecessor edge with the greatest depth. - for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); + for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); P != PE; ++P) { - SUnit *PredSU = P->getSUnit(); + const SUnit *PredSU = P->getSUnit(); unsigned PredLatency = P->getLatency(); unsigned PredTotalLatency = PredSU->getDepth() + PredLatency; // In the case of a latency tie, prefer an anti-dependency edge over @@ -326,18 +326,18 @@ CriticalAntiDepBreaker::findSuitableFreeRegister(MachineInstr *MI, } unsigned CriticalAntiDepBreaker:: -BreakAntiDependencies(std::vector<SUnit>& SUnits, - MachineBasicBlock::iterator& Begin, - MachineBasicBlock::iterator& End, +BreakAntiDependencies(const std::vector<SUnit>& SUnits, + MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, unsigned InsertPosIndex) { // The code below assumes that there is at least one instruction, // so just duck out immediately if the block is empty. if (SUnits.empty()) return 0; // Find the node at the bottom of the critical path. - SUnit *Max = 0; + const SUnit *Max = 0; for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { - SUnit *SU = &SUnits[i]; + const SUnit *SU = &SUnits[i]; if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency) Max = SU; } @@ -357,7 +357,7 @@ BreakAntiDependencies(std::vector<SUnit>& SUnits, // Track progress along the critical path through the SUnit graph as we walk // the instructions. - SUnit *CriticalPathSU = Max; + const SUnit *CriticalPathSU = Max; MachineInstr *CriticalPathMI = CriticalPathSU->getInstr(); // Consider this pattern: @@ -429,8 +429,8 @@ BreakAntiDependencies(std::vector<SUnit>& SUnits, // the anti-dependencies in an instruction in order to be effective. unsigned AntiDepReg = 0; if (MI == CriticalPathMI) { - if (SDep *Edge = CriticalPathStep(CriticalPathSU)) { - SUnit *NextSU = Edge->getSUnit(); + if (const SDep *Edge = CriticalPathStep(CriticalPathSU)) { + const SUnit *NextSU = Edge->getSUnit(); // Only consider anti-dependence edges. if (Edge->getKind() == SDep::Anti) { @@ -452,7 +452,7 @@ BreakAntiDependencies(std::vector<SUnit>& SUnits, // Also, if there are dependencies on other SUnits with the // same register as the anti-dependency, don't attempt to // break it. - for (SUnit::pred_iterator P = CriticalPathSU->Preds.begin(), + for (SUnit::const_pred_iterator P = CriticalPathSU->Preds.begin(), PE = CriticalPathSU->Preds.end(); P != PE; ++P) if (P->getSUnit() == NextSU ? (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) : diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h index 9e8db02..cc42dd2 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/lib/CodeGen/CriticalAntiDepBreaker.h @@ -72,9 +72,9 @@ namespace llvm { /// path /// of the ScheduleDAG and break them by renaming registers. /// - unsigned BreakAntiDependencies(std::vector<SUnit>& SUnits, - MachineBasicBlock::iterator& Begin, - MachineBasicBlock::iterator& End, + unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits, + MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, unsigned InsertPosIndex); /// Observe - Update liveness information to account for the current diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index 7dbfd7d..f6739f4 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -34,6 +34,7 @@ STATISTIC(NumStackTempsIntroduced, "Number of stack temporaries introduced"); namespace { class DwarfEHPrepare : public FunctionPass { + const TargetMachine *TM; const TargetLowering *TLI; bool CompileFast; @@ -84,7 +85,7 @@ namespace { } /// CleanupSelectors - Any remaining eh.selector intrinsic calls which still - /// use the ".llvm.eh.catch.all.value" call need to convert to using it's + /// use the ".llvm.eh.catch.all.value" call need to convert to using its /// initializer instead. bool CleanupSelectors(); @@ -154,8 +155,9 @@ namespace { public: static char ID; // Pass identification, replacement for typeid. - DwarfEHPrepare(const TargetLowering *tli, bool fast) : - FunctionPass(&ID), TLI(tli), CompileFast(fast), + DwarfEHPrepare(const TargetMachine *tm, bool fast) : + FunctionPass(&ID), TM(tm), TLI(TM->getTargetLowering()), + CompileFast(fast), ExceptionValueIntrinsic(0), SelectorIntrinsic(0), URoR(0), EHCatchAllValue(0), RewindFunction(0) {} @@ -180,8 +182,8 @@ namespace { char DwarfEHPrepare::ID = 0; -FunctionPass *llvm::createDwarfEHPass(const TargetLowering *tli, bool fast) { - return new DwarfEHPrepare(tli, fast); +FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm, bool fast) { + return new DwarfEHPrepare(tm, fast); } /// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups. @@ -218,7 +220,7 @@ FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes) { } /// CleanupSelectors - Any remaining eh.selector intrinsic calls which still use -/// the ".llvm.eh.catch.all.value" call need to convert to using it's +/// the ".llvm.eh.catch.all.value" call need to convert to using its /// initializer instead. bool DwarfEHPrepare::CleanupSelectors() { if (!EHCatchAllValue) return false; @@ -421,7 +423,7 @@ bool DwarfEHPrepare::HandleURoRInvokes() { bool DwarfEHPrepare::NormalizeLandingPads() { bool Changed = false; - const MCAsmInfo *MAI = TLI->getTargetMachine().getMCAsmInfo(); + const MCAsmInfo *MAI = TM->getMCAsmInfo(); bool usingSjLjEH = MAI->getExceptionHandlingType() == ExceptionHandling::SjLj; for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp index eda167c..b644ebe 100644 --- a/lib/CodeGen/ELFWriter.cpp +++ b/lib/CodeGen/ELFWriter.cpp @@ -208,7 +208,7 @@ ELFSection &ELFWriter::getDtorSection() { } // getTextSection - Get the text section for the specified function -ELFSection &ELFWriter::getTextSection(Function *F) { +ELFSection &ELFWriter::getTextSection(const Function *F) { const MCSectionELF *Text = (const MCSectionELF *)TLOF.SectionForGlobal(F, Mang, TM); return getSection(Text->getSectionName(), Text->getType(), Text->getFlags()); @@ -507,7 +507,7 @@ void ELFWriter::EmitGlobalConstant(const Constant *CV, ELFSection &GblS) { std::string msg; raw_string_ostream ErrorMsg(msg); ErrorMsg << "Constant unimp for type: " << *CV->getType(); - llvm_report_error(ErrorMsg.str()); + report_fatal_error(ErrorMsg.str()); } // ResolveConstantExpr - Resolve the constant expression until it stop @@ -572,10 +572,8 @@ CstExprResTy ELFWriter::ResolveConstantExpr(const Constant *CV) { } } - std::string msg(CE->getOpcodeName()); - raw_string_ostream ErrorMsg(msg); - ErrorMsg << ": Unsupported ConstantExpr type"; - llvm_report_error(ErrorMsg.str()); + report_fatal_error(CE->getOpcodeName() + + StringRef(": Unsupported ConstantExpr type")); return std::make_pair(CV, 0); // silence warning } diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h index b61b484..db66ecc 100644 --- a/lib/CodeGen/ELFWriter.h +++ b/lib/CodeGen/ELFWriter.h @@ -191,7 +191,7 @@ namespace llvm { ELFSection &getDtorSection(); ELFSection &getJumpTableSection(); ELFSection &getConstantPoolSection(MachineConstantPoolEntry &CPE); - ELFSection &getTextSection(Function *F); + ELFSection &getTextSection(const Function *F); ELFSection &getRelocSection(ELFSection &S); // Helpers for obtaining ELF specific info. diff --git a/lib/CodeGen/ExactHazardRecognizer.cpp b/lib/CodeGen/ExactHazardRecognizer.cpp index 61959bb..af5f289 100644 --- a/lib/CodeGen/ExactHazardRecognizer.cpp +++ b/lib/CodeGen/ExactHazardRecognizer.cpp @@ -29,7 +29,7 @@ ExactHazardRecognizer(const InstrItineraryData &LItinData) : // Determine the maximum depth of any itinerary. This determines the // depth of the scoreboard. We always make the scoreboard at least 1 // cycle deep to avoid dealing with the boundary condition. - ScoreboardDepth = 1; + unsigned ScoreboardDepth = 1; if (!ItinData.isEmpty()) { for (unsigned idx = 0; ; ++idx) { if (ItinData.isEndMarker(idx)) @@ -45,35 +45,27 @@ ExactHazardRecognizer(const InstrItineraryData &LItinData) : } } - Scoreboard = new unsigned[ScoreboardDepth]; - ScoreboardHead = 0; + ReservedScoreboard.reset(ScoreboardDepth); + RequiredScoreboard.reset(ScoreboardDepth); DEBUG(dbgs() << "Using exact hazard recognizer: ScoreboardDepth = " << ScoreboardDepth << '\n'); } -ExactHazardRecognizer::~ExactHazardRecognizer() { - delete [] Scoreboard; -} - void ExactHazardRecognizer::Reset() { - memset(Scoreboard, 0, ScoreboardDepth * sizeof(unsigned)); - ScoreboardHead = 0; + RequiredScoreboard.reset(); + ReservedScoreboard.reset(); } -unsigned ExactHazardRecognizer::getFutureIndex(unsigned offset) { - return (ScoreboardHead + offset) % ScoreboardDepth; -} - -void ExactHazardRecognizer::dumpScoreboard() { +void ExactHazardRecognizer::ScoreBoard::dump() const { dbgs() << "Scoreboard:\n"; - - unsigned last = ScoreboardDepth - 1; - while ((last > 0) && (Scoreboard[getFutureIndex(last)] == 0)) + + unsigned last = Depth - 1; + while ((last > 0) && ((*this)[last] == 0)) last--; for (unsigned i = 0; i <= last; i++) { - unsigned FUs = Scoreboard[getFutureIndex(i)]; + unsigned FUs = (*this)[i]; dbgs() << "\t"; for (int j = 31; j >= 0; j--) dbgs() << ((FUs & (1 << j)) ? '1' : '0'); @@ -96,11 +88,23 @@ ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU // stage is occupied. FIXME it would be more accurate to find the // same unit free in all the cycles. for (unsigned int i = 0; i < IS->getCycles(); ++i) { - assert(((cycle + i) < ScoreboardDepth) && + assert(((cycle + i) < RequiredScoreboard.getDepth()) && "Scoreboard depth exceeded!"); - - unsigned index = getFutureIndex(cycle + i); - unsigned freeUnits = IS->getUnits() & ~Scoreboard[index]; + + unsigned freeUnits = IS->getUnits(); + switch (IS->getReservationKind()) { + default: + assert(0 && "Invalid FU reservation"); + case InstrStage::Required: + // Required FUs conflict with both reserved and required ones + freeUnits &= ~ReservedScoreboard[cycle + i]; + // FALLTHROUGH + case InstrStage::Reserved: + // Reserved FUs can conflict only with required ones. + freeUnits &= ~RequiredScoreboard[cycle + i]; + break; + } + if (!freeUnits) { DEBUG(dbgs() << "*** Hazard in cycle " << (cycle + i) << ", "); DEBUG(dbgs() << "SU(" << SU->NodeNum << "): "); @@ -108,14 +112,14 @@ ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU return Hazard; } } - + // Advance the cycle to the next stage. cycle += IS->getNextCycles(); } return NoHazard; } - + void ExactHazardRecognizer::EmitInstruction(SUnit *SU) { if (ItinData.isEmpty()) return; @@ -125,37 +129,52 @@ void ExactHazardRecognizer::EmitInstruction(SUnit *SU) { // Use the itinerary for the underlying instruction to reserve FU's // in the scoreboard at the appropriate future cycles. unsigned idx = SU->getInstr()->getDesc().getSchedClass(); - for (const InstrStage *IS = ItinData.beginStage(idx), + for (const InstrStage *IS = ItinData.beginStage(idx), *E = ItinData.endStage(idx); IS != E; ++IS) { // We must reserve one of the stage's units for every cycle the // stage is occupied. FIXME it would be more accurate to reserve // the same unit free in all the cycles. for (unsigned int i = 0; i < IS->getCycles(); ++i) { - assert(((cycle + i) < ScoreboardDepth) && + assert(((cycle + i) < RequiredScoreboard.getDepth()) && "Scoreboard depth exceeded!"); - - unsigned index = getFutureIndex(cycle + i); - unsigned freeUnits = IS->getUnits() & ~Scoreboard[index]; - + + unsigned freeUnits = IS->getUnits(); + switch (IS->getReservationKind()) { + default: + assert(0 && "Invalid FU reservation"); + case InstrStage::Required: + // Required FUs conflict with both reserved and required ones + freeUnits &= ~ReservedScoreboard[cycle + i]; + // FALLTHROUGH + case InstrStage::Reserved: + // Reserved FUs can conflict only with required ones. + freeUnits &= ~RequiredScoreboard[cycle + i]; + break; + } + // reduce to a single unit unsigned freeUnit = 0; do { freeUnit = freeUnits; freeUnits = freeUnit & (freeUnit - 1); } while (freeUnits); - + assert(freeUnit && "No function unit available!"); - Scoreboard[index] |= freeUnit; + if (IS->getReservationKind() == InstrStage::Required) + RequiredScoreboard[cycle + i] |= freeUnit; + else + ReservedScoreboard[cycle + i] |= freeUnit; } - + // Advance the cycle to the next stage. cycle += IS->getNextCycles(); } - - DEBUG(dumpScoreboard()); + + DEBUG(ReservedScoreboard.dump()); + DEBUG(RequiredScoreboard.dump()); } - + void ExactHazardRecognizer::AdvanceCycle() { - Scoreboard[ScoreboardHead] = 0; - ScoreboardHead = getFutureIndex(1); + ReservedScoreboard[0] = 0; ReservedScoreboard.advance(); + RequiredScoreboard[0] = 0; RequiredScoreboard.advance(); } diff --git a/lib/CodeGen/ExactHazardRecognizer.h b/lib/CodeGen/ExactHazardRecognizer.h index 71ac979..91c81a9 100644 --- a/lib/CodeGen/ExactHazardRecognizer.h +++ b/lib/CodeGen/ExactHazardRecognizer.h @@ -22,35 +22,60 @@ namespace llvm { class ExactHazardRecognizer : public ScheduleHazardRecognizer { - // Itinerary data for the target. - const InstrItineraryData &ItinData; - - // Scoreboard to track function unit usage. Scoreboard[0] is a + // ScoreBoard to track function unit usage. ScoreBoard[0] is a // mask of the FUs in use in the cycle currently being - // schedule. Scoreboard[1] is a mask for the next cycle. The - // Scoreboard is used as a circular buffer with the current cycle - // indicated by ScoreboardHead. - unsigned *Scoreboard; + // schedule. ScoreBoard[1] is a mask for the next cycle. The + // ScoreBoard is used as a circular buffer with the current cycle + // indicated by Head. + class ScoreBoard { + unsigned *Data; + + // The maximum number of cycles monitored by the Scoreboard. This + // value is determined based on the target itineraries to ensure + // that all hazards can be tracked. + size_t Depth; + // Indices into the Scoreboard that represent the current cycle. + size_t Head; + public: + ScoreBoard():Data(NULL), Depth(0), Head(0) { } + ~ScoreBoard() { + delete[] Data; + } + + size_t getDepth() const { return Depth; } + unsigned& operator[](size_t idx) const { + assert(Depth && "ScoreBoard was not initialized properly!"); + + return Data[(Head + idx) % Depth]; + } - // The maximum number of cycles monitored by the Scoreboard. This - // value is determined based on the target itineraries to ensure - // that all hazards can be tracked. - unsigned ScoreboardDepth; + void reset(size_t d = 1) { + if (Data == NULL) { + Depth = d; + Data = new unsigned[Depth]; + } - // Indices into the Scoreboard that represent the current cycle. - unsigned ScoreboardHead; + memset(Data, 0, Depth * sizeof(Data[0])); + Head = 0; + } - // Return the scoreboard index to use for 'offset' cycles in the - // future. 'offset' of 0 returns ScoreboardHead. - unsigned getFutureIndex(unsigned offset); + void advance() { + Head = (Head + 1) % Depth; + } - // Print the scoreboard. - void dumpScoreboard(); + // Print the scoreboard. + void dump() const; + }; + + // Itinerary data for the target. + const InstrItineraryData &ItinData; + + ScoreBoard ReservedScoreboard; + ScoreBoard RequiredScoreboard; public: ExactHazardRecognizer(const InstrItineraryData &ItinData); - ~ExactHazardRecognizer(); - + virtual HazardType getHazardType(SUnit *SU); virtual void Reset(); virtual void EmitInstruction(SUnit *SU); diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp index 6d7cc51..790cb21 100644 --- a/lib/CodeGen/GCStrategy.cpp +++ b/lib/CodeGen/GCStrategy.cpp @@ -181,9 +181,10 @@ bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots, for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I) if (!InitedRoots.count(*I)) { - new StoreInst(ConstantPointerNull::get(cast<PointerType>( - cast<PointerType>((*I)->getType())->getElementType())), - *I, IP); + StoreInst* SI = new StoreInst(ConstantPointerNull::get(cast<PointerType>( + cast<PointerType>((*I)->getType())->getElementType())), + *I); + SI->insertAfter(*I); MadeChange = true; } diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index 87ab7ef..e1c52f7 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -331,15 +331,15 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { IRBuilder<> Builder(CI->getParent(), CI); LLVMContext &Context = CI->getContext(); - Function *Callee = CI->getCalledFunction(); + const Function *Callee = CI->getCalledFunction(); assert(Callee && "Cannot lower an indirect call!"); switch (Callee->getIntrinsicID()) { case Intrinsic::not_intrinsic: - llvm_report_error("Cannot lower a call to a non-intrinsic function '"+ + report_fatal_error("Cannot lower a call to a non-intrinsic function '"+ Callee->getName() + "'!"); default: - llvm_report_error("Code generator does not support intrinsic function '"+ + report_fatal_error("Code generator does not support intrinsic function '"+ Callee->getName()+"'!"); // The setjmp/longjmp intrinsics should only exist in the code if it was diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index ed57f4c..331dc7d 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -50,6 +50,9 @@ static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden, cl::desc("Disable Stack Slot Coloring")); static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden, cl::desc("Disable Machine LICM")); +static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm", + cl::Hidden, + cl::desc("Disable Machine LICM")); static cl::opt<bool> DisableMachineSink("disable-machine-sink", cl::Hidden, cl::desc("Disable Machine Sinking")); static cl::opt<bool> DisableLSR("disable-lsr", cl::Hidden, @@ -245,10 +248,10 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, // pad is shared by multiple invokes and is also a target of a normal // edge from elsewhere. PM.add(createSjLjEHPass(getTargetLowering())); - PM.add(createDwarfEHPass(getTargetLowering(), OptLevel==CodeGenOpt::None)); + PM.add(createDwarfEHPass(this, OptLevel==CodeGenOpt::None)); break; case ExceptionHandling::Dwarf: - PM.add(createDwarfEHPass(getTargetLowering(), OptLevel==CodeGenOpt::None)); + PM.add(createDwarfEHPass(this, OptLevel==CodeGenOpt::None)); break; case ExceptionHandling::None: PM.add(createLowerInvokePass(getTargetLowering())); @@ -337,12 +340,18 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, PM.add(createRegisterAllocator()); printAndVerify(PM, "After Register Allocation"); - // Perform stack slot coloring. - if (OptLevel != CodeGenOpt::None && !DisableSSC) { + // Perform stack slot coloring and post-ra machine LICM. + if (OptLevel != CodeGenOpt::None) { // FIXME: Re-enable coloring with register when it's capable of adding // kill markers. - PM.add(createStackSlotColoringPass(false)); - printAndVerify(PM, "After StackSlotColoring"); + if (!DisableSSC) + PM.add(createStackSlotColoringPass(false)); + + // Run post-ra machine LICM to hoist reloads / remats. + if (!DisablePostRAMachineLICM) + PM.add(createMachineLICMPass(false)); + + printAndVerify(PM, "After StackSlotColoring and postra Machine LICM"); } // Run post-ra passes. diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 23cff07..26a7190 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -665,10 +665,11 @@ void LiveIntervals::computeIntervals() { // Track the index of the current machine instr. SlotIndex MIIndex = getMBBStartIdx(MBB); - DEBUG(dbgs() << MBB->getName() << ":\n"); + DEBUG(dbgs() << "BB#" << MBB->getNumber() + << ":\t\t# derived from " << MBB->getName() << "\n"); // Create intervals for live-ins to this BB first. - for (MachineBasicBlock::const_livein_iterator LI = MBB->livein_begin(), + for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(), LE = MBB->livein_end(); LI != LE; ++LI) { handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*LI)); // Multiple live-ins can alias the same register. @@ -1296,9 +1297,26 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, MachineOperand &O = ri.getOperand(); ++ri; if (MI->isDebugValue()) { - // Remove debug info for now. - O.setReg(0U); + // Modify DBG_VALUE now that the value is in a spill slot. + if (Slot != VirtRegMap::MAX_STACK_SLOT || isLoadSS) { + uint64_t Offset = MI->getOperand(1).getImm(); + const MDNode *MDPtr = MI->getOperand(2).getMetadata(); + DebugLoc DL = MI->getDebugLoc(); + int FI = isLoadSS ? LdSlot : (int)Slot; + if (MachineInstr *NewDV = tii_->emitFrameIndexDebugValue(*mf_, FI, + Offset, MDPtr, DL)) { + DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI); + ReplaceMachineInstrInMaps(MI, NewDV); + MachineBasicBlock *MBB = MI->getParent(); + MBB->insert(MBB->erase(MI), NewDV); + continue; + } + } + DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI); + RemoveMachineInstrFromMaps(MI); + vrm.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); continue; } assert(!O.isImplicit() && "Spilling register that's used as implicit use?"); @@ -2085,7 +2103,7 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li, << "constraints:\n"; MI->print(Msg, tm_); } - llvm_report_error(Msg.str()); + report_fatal_error(Msg.str()); } for (const unsigned* AS = tri_->getSubRegisters(PReg); *AS; ++AS) { if (!hasInterval(*AS)) diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index ca8ecff..079684e 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -531,7 +531,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { // Mark live-in registers as live-in. SmallVector<unsigned, 4> Defs; - for (MachineBasicBlock::const_livein_iterator II = MBB->livein_begin(), + for (MachineBasicBlock::livein_iterator II = MBB->livein_begin(), EE = MBB->livein_end(); II != EE; ++II) { assert(TargetRegisterInfo::isPhysicalRegister(*II) && "Cannot have a live-in virtual register!"); diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index bd0ccb4..eaaa1f8 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -191,7 +191,7 @@ void MachineBasicBlock::print(raw_ostream &OS) const { const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); if (!livein_empty()) { OS << " Live Ins:"; - for (const_livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I) + for (livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I) OutputReg(OS, *I, TRI); OS << '\n'; } @@ -218,13 +218,14 @@ void MachineBasicBlock::print(raw_ostream &OS) const { } void MachineBasicBlock::removeLiveIn(unsigned Reg) { - livein_iterator I = std::find(livein_begin(), livein_end(), Reg); - assert(I != livein_end() && "Not a live in!"); + std::vector<unsigned>::iterator I = + std::find(LiveIns.begin(), LiveIns.end(), Reg); + assert(I != LiveIns.end() && "Not a live in!"); LiveIns.erase(I); } bool MachineBasicBlock::isLiveIn(unsigned Reg) const { - const_livein_iterator I = std::find(livein_begin(), livein_end(), Reg); + livein_iterator I = std::find(livein_begin(), livein_end(), Reg); return I != livein_end(); } diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 597d51d..84c3d71 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/ScopedHashTable.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/Debug.h" @@ -51,9 +52,12 @@ namespace { } private: - unsigned CurrVN; + typedef ScopedHashTableScope<MachineInstr*, unsigned, + MachineInstrExpressionTrait> ScopeType; + DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap; ScopedHashTable<MachineInstr*, unsigned, MachineInstrExpressionTrait> VNT; SmallVector<MachineInstr*, 64> Exps; + unsigned CurrVN; bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB); bool isPhysDefTriviallyDead(unsigned Reg, @@ -63,7 +67,13 @@ namespace { bool isCSECandidate(MachineInstr *MI); bool isProfitableToCSE(unsigned CSReg, unsigned Reg, MachineInstr *CSMI, MachineInstr *MI); - bool ProcessBlock(MachineDomTreeNode *Node); + void EnterScope(MachineBasicBlock *MBB); + void ExitScope(MachineBasicBlock *MBB); + bool ProcessBlock(MachineBasicBlock *MBB); + void ExitScopeIfDone(MachineDomTreeNode *Node, + DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, + DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap); + bool PerformCSE(MachineDomTreeNode *Node); }; } // end anonymous namespace @@ -277,13 +287,24 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, return CSBBs.count(MI->getParent()); } -bool MachineCSE::ProcessBlock(MachineDomTreeNode *Node) { +void MachineCSE::EnterScope(MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); + ScopeType *Scope = new ScopeType(VNT); + ScopeMap[MBB] = Scope; +} + +void MachineCSE::ExitScope(MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); + DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB); + assert(SI != ScopeMap.end()); + ScopeMap.erase(SI); + delete SI->second; +} + +bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { bool Changed = false; SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs; - ScopedHashTableScope<MachineInstr*, unsigned, - MachineInstrExpressionTrait> VNTS(VNT); - MachineBasicBlock *MBB = Node->getBlock(); for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) { MachineInstr *MI = &*I; ++I; @@ -356,10 +377,63 @@ bool MachineCSE::ProcessBlock(MachineDomTreeNode *Node) { CSEPairs.clear(); } - // Recursively call ProcessBlock with childred. - const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); - for (unsigned i = 0, e = Children.size(); i != e; ++i) - Changed |= ProcessBlock(Children[i]); + return Changed; +} + +/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given +/// dominator tree node if its a leaf or all of its children are done. Walk +/// up the dominator tree to destroy ancestors which are now done. +void +MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node, + DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, + DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) { + if (OpenChildren[Node]) + return; + + // Pop scope. + ExitScope(Node->getBlock()); + + // Now traverse upwards to pop ancestors whose offsprings are all done. + while (MachineDomTreeNode *Parent = ParentMap[Node]) { + unsigned Left = --OpenChildren[Parent]; + if (Left != 0) + break; + ExitScope(Parent->getBlock()); + Node = Parent; + } +} + +bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { + SmallVector<MachineDomTreeNode*, 32> Scopes; + SmallVector<MachineDomTreeNode*, 8> WorkList; + DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap; + DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; + + // Perform a DFS walk to determine the order of visit. + WorkList.push_back(Node); + do { + Node = WorkList.pop_back_val(); + Scopes.push_back(Node); + const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); + unsigned NumChildren = Children.size(); + OpenChildren[Node] = NumChildren; + for (unsigned i = 0; i != NumChildren; ++i) { + MachineDomTreeNode *Child = Children[i]; + ParentMap[Child] = Node; + WorkList.push_back(Child); + } + } while (!WorkList.empty()); + + // Now perform CSE. + bool Changed = false; + for (unsigned i = 0, e = Scopes.size(); i != e; ++i) { + MachineDomTreeNode *Node = Scopes[i]; + MachineBasicBlock *MBB = Node->getBlock(); + EnterScope(MBB); + Changed |= ProcessBlock(MBB); + // If it's a leaf node, it's done. Traverse upwards to pop ancestors. + ExitScopeIfDone(Node, OpenChildren, ParentMap); + } return Changed; } @@ -370,5 +444,5 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); AA = &getAnalysis<AliasAnalysis>(); DT = &getAnalysis<MachineDominatorTree>(); - return ProcessBlock(DT->getRootNode()); + return PerformCSE(DT->getRootNode()); } diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index e4ed7db..3cf10b3 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -51,7 +51,7 @@ void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) { MBB->getParent()->DeleteMachineBasicBlock(MBB); } -MachineFunction::MachineFunction(Function *F, const TargetMachine &TM, +MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, unsigned FunctionNum, MachineModuleInfo &mmi) : Fn(F), Target(TM), Ctx(mmi.getContext()), MMI(mmi) { if (TM.getRegisterInfo()) @@ -630,7 +630,7 @@ MachineConstantPool::~MachineConstantPool() { /// CanShareConstantPoolEntry - Test whether the given two constants /// can be allocated the same constant pool entry. -static bool CanShareConstantPoolEntry(Constant *A, Constant *B, +static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B, const TargetData *TD) { // Handle the trivial case quickly. if (A == B) return true; @@ -645,17 +645,17 @@ static bool CanShareConstantPoolEntry(Constant *A, Constant *B, // If a floating-point value and an integer value have the same encoding, // they can share a constant-pool entry. - if (ConstantFP *AFP = dyn_cast<ConstantFP>(A)) - if (ConstantInt *BI = dyn_cast<ConstantInt>(B)) + if (const ConstantFP *AFP = dyn_cast<ConstantFP>(A)) + if (const ConstantInt *BI = dyn_cast<ConstantInt>(B)) return AFP->getValueAPF().bitcastToAPInt() == BI->getValue(); - if (ConstantFP *BFP = dyn_cast<ConstantFP>(B)) - if (ConstantInt *AI = dyn_cast<ConstantInt>(A)) + if (const ConstantFP *BFP = dyn_cast<ConstantFP>(B)) + if (const ConstantInt *AI = dyn_cast<ConstantInt>(A)) return BFP->getValueAPF().bitcastToAPInt() == AI->getValue(); // Two vectors can share an entry if each pair of corresponding // elements could. - if (ConstantVector *AV = dyn_cast<ConstantVector>(A)) - if (ConstantVector *BV = dyn_cast<ConstantVector>(B)) { + if (const ConstantVector *AV = dyn_cast<ConstantVector>(A)) + if (const ConstantVector *BV = dyn_cast<ConstantVector>(B)) { if (AV->getType()->getNumElements() != BV->getType()->getNumElements()) return false; for (unsigned i = 0, e = AV->getType()->getNumElements(); i != e; ++i) @@ -674,7 +674,7 @@ static bool CanShareConstantPoolEntry(Constant *A, Constant *B, /// an existing one. User must specify the log2 of the minimum required /// alignment for the object. /// -unsigned MachineConstantPool::getConstantPoolIndex(Constant *C, +unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C, unsigned Alignment) { assert(Alignment && "Alignment must be specified!"); if (Alignment > PoolAlignment) PoolAlignment = Alignment; diff --git a/lib/CodeGen/MachineFunctionAnalysis.cpp b/lib/CodeGen/MachineFunctionAnalysis.cpp index 3b2eb6d..07a0f45 100644 --- a/lib/CodeGen/MachineFunctionAnalysis.cpp +++ b/lib/CodeGen/MachineFunctionAnalysis.cpp @@ -44,7 +44,8 @@ bool MachineFunctionAnalysis::doInitialization(Module &M) { MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>(); assert(MMI && "MMI not around yet??"); MMI->setModule(&M); - NextFnNum = 1; return false; + NextFnNum = 0; + return false; } diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 39b7fb5..99b5beb 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -192,6 +192,8 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { return getBlockAddress() == Other.getBlockAddress(); case MachineOperand::MO_MCSymbol: return getMCSymbol() == Other.getMCSymbol(); + case MachineOperand::MO_Metadata: + return getMetadata() == Other.getMetadata(); } } @@ -409,19 +411,14 @@ void MachineInstr::addImplicitDefUseOperands() { addOperand(MachineOperand::CreateReg(*ImpUses, false, true)); } -/// MachineInstr ctor - This constructor create a MachineInstr and add the -/// implicit operands. It reserves space for number of operands specified by -/// TargetInstrDesc or the numOperands if it is not zero. (for -/// instructions with variable number of operands). +/// MachineInstr ctor - This constructor creates a MachineInstr and adds the +/// implicit operands. It reserves space for the number of operands specified by +/// the TargetInstrDesc. MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp) : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0) { - if (!NoImp && TID->getImplicitDefs()) - for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs) - NumImplicitOps++; - if (!NoImp && TID->getImplicitUses()) - for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses) - NumImplicitOps++; + if (!NoImp) + NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses(); Operands.reserve(NumImplicitOps + TID->getNumOperands()); if (!NoImp) addImplicitDefUseOperands(); @@ -434,12 +431,8 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl, bool NoImp) : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) { - if (!NoImp && TID->getImplicitDefs()) - for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs) - NumImplicitOps++; - if (!NoImp && TID->getImplicitUses()) - for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses) - NumImplicitOps++; + if (!NoImp) + NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses(); Operands.reserve(NumImplicitOps + TID->getNumOperands()); if (!NoImp) addImplicitDefUseOperands(); @@ -450,17 +443,11 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl, /// MachineInstr ctor - Work exactly the same as the ctor two above, except /// that the MachineInstr is created and added to the end of the specified /// basic block. -/// MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid) : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0) { assert(MBB && "Cannot use inserting ctor with null basic block!"); - if (TID->ImplicitDefs) - for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs) - NumImplicitOps++; - if (TID->ImplicitUses) - for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses) - NumImplicitOps++; + NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses(); Operands.reserve(NumImplicitOps + TID->getNumOperands()); addImplicitDefUseOperands(); // Make sure that we get added to a machine basicblock @@ -475,12 +462,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) { assert(MBB && "Cannot use inserting ctor with null basic block!"); - if (TID->ImplicitDefs) - for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs) - NumImplicitOps++; - if (TID->ImplicitUses) - for (const unsigned *ImpUses = TID->getImplicitUses(); *ImpUses; ++ImpUses) - NumImplicitOps++; + NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses(); Operands.reserve(NumImplicitOps + TID->getNumOperands()); addImplicitDefUseOperands(); // Make sure that we get added to a machine basicblock @@ -1123,6 +1105,19 @@ unsigned MachineInstr::isConstantValuePHI() const { return Reg; } +/// allDefsAreDead - Return true if all the defs of this instruction are dead. +/// +bool MachineInstr::allDefsAreDead() const { + for (unsigned i = 0, e = getNumOperands(); i < e; ++i) { + const MachineOperand &MO = getOperand(i); + if (!MO.isReg() || MO.isUse()) + continue; + if (!MO.isDead()) + return false; + } + return true; +} + void MachineInstr::dump() const { dbgs() << " " << *this; } @@ -1192,7 +1187,15 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { if (TOI.isOptionalDef()) OS << "opt:"; } - MO.print(OS, TM); + if (isDebugValue() && MO.isMetadata()) { + // Pretty print DBG_VALUE instructions. + const MDNode *MD = MO.getMetadata(); + if (const MDString *MDS = dyn_cast<MDString>(MD->getOperand(2))) + OS << "!\"" << MDS->getString() << '\"'; + else + MO.print(OS, TM); + } else + MO.print(OS, TM); } // Briefly indicate whether any call clobbers were omitted. diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 0361694..b2e757d 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -22,8 +22,8 @@ #define DEBUG_TYPE "machine-licm" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -33,6 +33,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -41,32 +42,41 @@ using namespace llvm; STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops"); STATISTIC(NumCSEed, "Number of hoisted machine instructions CSEed"); +STATISTIC(NumPostRAHoisted, + "Number of machine instructions hoisted out of loops post regalloc"); namespace { class MachineLICM : public MachineFunctionPass { - MachineConstantPool *MCP; + bool PreRegAlloc; + const TargetMachine *TM; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; - BitVector AllocatableSet; + const MachineFrameInfo *MFI; + MachineRegisterInfo *RegInfo; // Various analyses that we use... AliasAnalysis *AA; // Alias analysis info. - MachineLoopInfo *LI; // Current MachineLoopInfo + MachineLoopInfo *MLI; // Current MachineLoopInfo MachineDominatorTree *DT; // Machine dominator tree for the cur loop - MachineRegisterInfo *RegInfo; // Machine register information // State that is updated as we process loops bool Changed; // True if a loop is changed. - bool FirstInLoop; // True if it's the first LICM in the loop. MachineLoop *CurLoop; // The current loop we are working on. MachineBasicBlock *CurPreheader; // The preheader for CurLoop. + BitVector AllocatableSet; + // For each opcode, keep a list of potentail CSE instructions. DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap; + public: static char ID; // Pass identification, replacement for typeid - MachineLICM() : MachineFunctionPass(&ID) {} + MachineLICM() : + MachineFunctionPass(&ID), PreRegAlloc(true) {} + + explicit MachineLICM(bool PreRA) : + MachineFunctionPass(&ID), PreRegAlloc(PreRA) {} virtual bool runOnMachineFunction(MachineFunction &MF); @@ -88,6 +98,39 @@ namespace { } private: + /// CandidateInfo - Keep track of information about hoisting candidates. + struct CandidateInfo { + MachineInstr *MI; + unsigned Def; + int FI; + CandidateInfo(MachineInstr *mi, unsigned def, int fi) + : MI(mi), Def(def), FI(fi) {} + }; + + /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop + /// invariants out to the preheader. + void HoistRegionPostRA(); + + /// HoistPostRA - When an instruction is found to only use loop invariant + /// operands that is safe to hoist, this instruction is called to do the + /// dirty work. + void HoistPostRA(MachineInstr *MI, unsigned Def); + + /// ProcessMI - Examine the instruction for potentai LICM candidate. Also + /// gather register def and frame object update information. + void ProcessMI(MachineInstr *MI, unsigned *PhysRegDefs, + SmallSet<int, 32> &StoredFIs, + SmallVector<CandidateInfo, 32> &Candidates); + + /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the + /// current loop. + void AddToLiveIns(unsigned Reg); + + /// IsLICMCandidate - Returns true if the instruction may be a suitable + /// candidate for LICM. e.g. If the instruction is a call, then it's obviously + /// not safe to hoist it. + bool IsLICMCandidate(MachineInstr &I); + /// IsLoopInvariantInst - Returns true if the instruction is loop /// invariant. I.e., all virtual register operands are defined outside of /// the loop, physical registers aren't accessed (explicitly or implicitly), @@ -145,7 +188,9 @@ char MachineLICM::ID = 0; static RegisterPass<MachineLICM> X("machinelicm", "Machine Loop Invariant Code Motion"); -FunctionPass *llvm::createMachineLICMPass() { return new MachineLICM(); } +FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) { + return new MachineLICM(PreRegAlloc); +} /// LoopIsOuterMostWithPreheader - Test if the given loop is the outer-most /// loop that has a preheader. @@ -156,31 +201,31 @@ static bool LoopIsOuterMostWithPreheader(MachineLoop *CurLoop) { return true; } -/// Hoist expressions out of the specified loop. Note, alias info for inner loop -/// is not preserved so it is not a good idea to run LICM multiple times on one -/// loop. -/// bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { - DEBUG(dbgs() << "******** Machine LICM ********\n"); + if (PreRegAlloc) + DEBUG(dbgs() << "******** Pre-regalloc Machine LICM ********\n"); + else + DEBUG(dbgs() << "******** Post-regalloc Machine LICM ********\n"); - Changed = FirstInLoop = false; - MCP = MF.getConstantPool(); + Changed = false; TM = &MF.getTarget(); TII = TM->getInstrInfo(); TRI = TM->getRegisterInfo(); + MFI = MF.getFrameInfo(); RegInfo = &MF.getRegInfo(); AllocatableSet = TRI->getAllocatableSet(MF); // Get our Loop information... - LI = &getAnalysis<MachineLoopInfo>(); - DT = &getAnalysis<MachineDominatorTree>(); - AA = &getAnalysis<AliasAnalysis>(); + MLI = &getAnalysis<MachineLoopInfo>(); + DT = &getAnalysis<MachineDominatorTree>(); + AA = &getAnalysis<AliasAnalysis>(); - for (MachineLoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) { + for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I){ CurLoop = *I; - // Only visit outer-most preheader-sporting loops. - if (!LoopIsOuterMostWithPreheader(CurLoop)) + // If this is done before regalloc, only visit outer-most preheader-sporting + // loops. + if (PreRegAlloc && !LoopIsOuterMostWithPreheader(CurLoop)) continue; // Determine the block to which to hoist instructions. If we can't find a @@ -193,16 +238,230 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { if (!CurPreheader) continue; - // CSEMap is initialized for loop header when the first instruction is - // being hoisted. - FirstInLoop = true; - HoistRegion(DT->getNode(CurLoop->getHeader())); - CSEMap.clear(); + if (!PreRegAlloc) + HoistRegionPostRA(); + else { + // CSEMap is initialized for loop header when the first instruction is + // being hoisted. + MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader()); + HoistRegion(N); + CSEMap.clear(); + } } return Changed; } +/// InstructionStoresToFI - Return true if instruction stores to the +/// specified frame. +static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { + for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), + oe = MI->memoperands_end(); o != oe; ++o) { + if (!(*o)->isStore() || !(*o)->getValue()) + continue; + if (const FixedStackPseudoSourceValue *Value = + dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { + if (Value->getFrameIndex() == FI) + return true; + } + } + return false; +} + +/// ProcessMI - Examine the instruction for potentai LICM candidate. Also +/// gather register def and frame object update information. +void MachineLICM::ProcessMI(MachineInstr *MI, + unsigned *PhysRegDefs, + SmallSet<int, 32> &StoredFIs, + SmallVector<CandidateInfo, 32> &Candidates) { + bool RuledOut = false; + bool HasNonInvariantUse = false; + unsigned Def = 0; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isFI()) { + // Remember if the instruction stores to the frame index. + int FI = MO.getIndex(); + if (!StoredFIs.count(FI) && + MFI->isSpillSlotObjectIndex(FI) && + InstructionStoresToFI(MI, FI)) + StoredFIs.insert(FI); + HasNonInvariantUse = true; + continue; + } + + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + "Not expecting virtual register!"); + + if (!MO.isDef()) { + if (Reg && PhysRegDefs[Reg]) + // If it's using a non-loop-invariant register, then it's obviously not + // safe to hoist. + HasNonInvariantUse = true; + continue; + } + + if (MO.isImplicit()) { + ++PhysRegDefs[Reg]; + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) + ++PhysRegDefs[*AS]; + if (!MO.isDead()) + // Non-dead implicit def? This cannot be hoisted. + RuledOut = true; + // No need to check if a dead implicit def is also defined by + // another instruction. + continue; + } + + // FIXME: For now, avoid instructions with multiple defs, unless + // it's a dead implicit def. + if (Def) + RuledOut = true; + else + Def = Reg; + + // If we have already seen another instruction that defines the same + // register, then this is not safe. + if (++PhysRegDefs[Reg] > 1) + // MI defined register is seen defined by another instruction in + // the loop, it cannot be a LICM candidate. + RuledOut = true; + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) + if (++PhysRegDefs[*AS] > 1) + RuledOut = true; + } + + // Only consider reloads for now and remats which do not have register + // operands. FIXME: Consider unfold load folding instructions. + if (Def && !RuledOut) { + int FI = INT_MIN; + if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) || + (TII->isLoadFromStackSlot(MI, FI) && MFI->isSpillSlotObjectIndex(FI))) + Candidates.push_back(CandidateInfo(MI, Def, FI)); + } +} + +/// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop +/// invariants out to the preheader. +void MachineLICM::HoistRegionPostRA() { + unsigned NumRegs = TRI->getNumRegs(); + unsigned *PhysRegDefs = new unsigned[NumRegs]; + std::fill(PhysRegDefs, PhysRegDefs + NumRegs, 0); + + SmallVector<CandidateInfo, 32> Candidates; + SmallSet<int, 32> StoredFIs; + + // Walk the entire region, count number of defs for each register, and + // collect potential LICM candidates. + const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks(); + for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { + MachineBasicBlock *BB = Blocks[i]; + // Conservatively treat live-in's as an external def. + // FIXME: That means a reload that're reused in successor block(s) will not + // be LICM'ed. + for (MachineBasicBlock::livein_iterator I = BB->livein_begin(), + E = BB->livein_end(); I != E; ++I) { + unsigned Reg = *I; + ++PhysRegDefs[Reg]; + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) + ++PhysRegDefs[*AS]; + } + + for (MachineBasicBlock::iterator + MII = BB->begin(), E = BB->end(); MII != E; ++MII) { + MachineInstr *MI = &*MII; + ProcessMI(MI, PhysRegDefs, StoredFIs, Candidates); + } + } + + // Now evaluate whether the potential candidates qualify. + // 1. Check if the candidate defined register is defined by another + // instruction in the loop. + // 2. If the candidate is a load from stack slot (always true for now), + // check if the slot is stored anywhere in the loop. + for (unsigned i = 0, e = Candidates.size(); i != e; ++i) { + if (Candidates[i].FI != INT_MIN && + StoredFIs.count(Candidates[i].FI)) + continue; + + if (PhysRegDefs[Candidates[i].Def] == 1) { + bool Safe = true; + MachineInstr *MI = Candidates[i].MI; + for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { + const MachineOperand &MO = MI->getOperand(j); + if (!MO.isReg() || MO.isDef() || !MO.getReg()) + continue; + if (PhysRegDefs[MO.getReg()]) { + // If it's using a non-loop-invariant register, then it's obviously + // not safe to hoist. + Safe = false; + break; + } + } + if (Safe) + HoistPostRA(MI, Candidates[i].Def); + } + } + + delete[] PhysRegDefs; +} + +/// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current +/// loop, and make sure it is not killed by any instructions in the loop. +void MachineLICM::AddToLiveIns(unsigned Reg) { + const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks(); + for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { + MachineBasicBlock *BB = Blocks[i]; + if (!BB->isLiveIn(Reg)) + BB->addLiveIn(Reg); + for (MachineBasicBlock::iterator + MII = BB->begin(), E = BB->end(); MII != E; ++MII) { + MachineInstr *MI = &*MII; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue; + if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg())) + MO.setIsKill(false); + } + } + } +} + +/// HoistPostRA - When an instruction is found to only use loop invariant +/// operands that is safe to hoist, this instruction is called to do the +/// dirty work. +void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { + // Now move the instructions to the predecessor, inserting it before any + // terminator instructions. + DEBUG({ + dbgs() << "Hoisting " << *MI; + if (CurPreheader->getBasicBlock()) + dbgs() << " to MachineBasicBlock " + << CurPreheader->getName(); + if (MI->getParent()->getBasicBlock()) + dbgs() << " from MachineBasicBlock " + << MI->getParent()->getName(); + dbgs() << "\n"; + }); + + // Splice the instruction to the preheader. + MachineBasicBlock *MBB = MI->getParent(); + CurPreheader->splice(CurPreheader->getFirstTerminator(), MBB, MI); + + // Add register to livein list to all the BBs in the current loop since a + // loop invariant must be kept live throughout the whole loop. This is + // important to ensure later passes do not scavenge the def register. + AddToLiveIns(Def); + + ++NumPostRAHoisted; + Changed = true; +} + /// HoistRegion - Walk the specified region of the CFG (defined by all blocks /// dominated by the specified block, and that are in the current loop) in depth /// first order w.r.t the DominatorTree. This allows us to visit definitions @@ -223,17 +482,17 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N) { } const std::vector<MachineDomTreeNode*> &Children = N->getChildren(); - for (unsigned I = 0, E = Children.size(); I != E; ++I) HoistRegion(Children[I]); } -/// IsLoopInvariantInst - Returns true if the instruction is loop -/// invariant. I.e., all virtual register operands are defined outside of the -/// loop, physical registers aren't accessed explicitly, and there are no side -/// effects that aren't captured by the operands or other flags. -/// -bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { +/// IsLICMCandidate - Returns true if the instruction may be a suitable +/// candidate for LICM. e.g. If the instruction is a call, then it's obviously +/// not safe to hoist it. +bool MachineLICM::IsLICMCandidate(MachineInstr &I) { + if (I.isImplicitDef()) + return false; + const TargetInstrDesc &TID = I.getDesc(); // Ignore stuff that we obviously can't hoist. @@ -251,6 +510,17 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { // This is a trivial form of alias analysis. return false; } + return true; +} + +/// IsLoopInvariantInst - Returns true if the instruction is loop +/// invariant. I.e., all virtual register operands are defined outside of the +/// loop, physical registers aren't accessed explicitly, and there are no side +/// effects that aren't captured by the operands or other flags. +/// +bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { + if (!IsLICMCandidate(I)) + return false; // The instruction is loop invariant if all of its operands are. for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { @@ -341,9 +611,6 @@ bool MachineLICM::isLoadFromConstantMemory(MachineInstr *MI) { /// IsProfitableToHoist - Return true if it is potentially profitable to hoist /// the given loop invariant. bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { - if (MI.isImplicitDef()) - return false; - // FIXME: For now, only hoist re-materilizable instructions. LICM will // increase register pressure. We want to make sure it doesn't increase // spilling. diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index f813a55..25284d6 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -12,8 +12,6 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/GlobalVariable.h" -#include "llvm/Intrinsics.h" -#include "llvm/Instructions.h" #include "llvm/Module.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -315,18 +313,18 @@ void MachineModuleInfo::EndFunction() { /// AnalyzeModule - Scan the module for global debug information. /// -void MachineModuleInfo::AnalyzeModule(Module &M) { +void MachineModuleInfo::AnalyzeModule(const Module &M) { // Insert functions in the llvm.used array (but not llvm.compiler.used) into // UsedFunctions. - GlobalVariable *GV = M.getGlobalVariable("llvm.used"); + const GlobalVariable *GV = M.getGlobalVariable("llvm.used"); if (!GV || !GV->hasInitializer()) return; // Should be an array of 'i8*'. - ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer()); + const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer()); if (InitList == 0) return; for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) - if (Function *F = + if (const Function *F = dyn_cast<Function>(InitList->getOperand(i)->stripPointerCasts())) UsedFunctions.insert(F); } @@ -407,7 +405,7 @@ MCSymbol *MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) { /// addPersonality - Provide the personality function for the exception /// information. void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad, - Function *Personality) { + const Function *Personality) { LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); LP.Personality = Personality; @@ -426,7 +424,7 @@ void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad, /// addCatchTypeInfo - Provide the catch typeinfo for a landing pad. /// void MachineModuleInfo::addCatchTypeInfo(MachineBasicBlock *LandingPad, - std::vector<GlobalVariable *> &TyInfo) { + std::vector<const GlobalVariable *> &TyInfo) { LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); for (unsigned N = TyInfo.size(); N; --N) LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1])); @@ -435,7 +433,7 @@ void MachineModuleInfo::addCatchTypeInfo(MachineBasicBlock *LandingPad, /// addFilterTypeInfo - Provide the filter typeinfo for a landing pad. /// void MachineModuleInfo::addFilterTypeInfo(MachineBasicBlock *LandingPad, - std::vector<GlobalVariable *> &TyInfo) { + std::vector<const GlobalVariable *> &TyInfo) { LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); std::vector<unsigned> IdsInFilter(TyInfo.size()); for (unsigned I = 0, E = TyInfo.size(); I != E; ++I) @@ -452,10 +450,12 @@ void MachineModuleInfo::addCleanup(MachineBasicBlock *LandingPad) { /// TidyLandingPads - Remap landing pad labels and remove any deleted landing /// pads. -void MachineModuleInfo::TidyLandingPads() { +void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) { for (unsigned i = 0; i != LandingPads.size(); ) { LandingPadInfo &LandingPad = LandingPads[i]; - if (LandingPad.LandingPadLabel && !LandingPad.LandingPadLabel->isDefined()) + if (LandingPad.LandingPadLabel && + !LandingPad.LandingPadLabel->isDefined() && + (!LPMap || (*LPMap)[LandingPad.LandingPadLabel] == 0)) LandingPad.LandingPadLabel = 0; // Special case: we *should* emit LPs with null LP MBB. This indicates @@ -468,7 +468,10 @@ void MachineModuleInfo::TidyLandingPads() { for (unsigned j = 0, e = LandingPads[i].BeginLabels.size(); j != e; ++j) { MCSymbol *BeginLabel = LandingPad.BeginLabels[j]; MCSymbol *EndLabel = LandingPad.EndLabels[j]; - if (BeginLabel->isDefined() && EndLabel->isDefined()) continue; + if ((BeginLabel->isDefined() || + (LPMap && (*LPMap)[BeginLabel] != 0)) && + (EndLabel->isDefined() || + (LPMap && (*LPMap)[EndLabel] != 0))) continue; LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j); LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j); @@ -492,7 +495,7 @@ void MachineModuleInfo::TidyLandingPads() { /// getTypeIDFor - Return the type id for the specified typeinfo. This is /// function wide. -unsigned MachineModuleInfo::getTypeIDFor(GlobalVariable *TI) { +unsigned MachineModuleInfo::getTypeIDFor(const GlobalVariable *TI) { for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i) if (TypeInfos[i] == TI) return i + 1; @@ -532,7 +535,7 @@ try_next:; } /// getPersonality - Return the personality function for the current function. -Function *MachineModuleInfo::getPersonality() const { +const Function *MachineModuleInfo::getPersonality() const { // FIXME: Until PR1414 will be fixed, we're using 1 personality function per // function return !LandingPads.empty() ? LandingPads[0].Personality : NULL; diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index d9ab677..ea5ca0c 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -12,6 +12,9 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/CommandLine.h" using namespace llvm; MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) { @@ -130,6 +133,138 @@ bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const { return ++UI == use_nodbg_end(); } +bool MachineRegisterInfo::isLiveIn(unsigned Reg) const { + for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I) + if (I->first == Reg || I->second == Reg) + return true; + return false; +} + +bool MachineRegisterInfo::isLiveOut(unsigned Reg) const { + for (liveout_iterator I = liveout_begin(), E = liveout_end(); I != E; ++I) + if (*I == Reg) + return true; + return false; +} + +/// getLiveInPhysReg - If VReg is a live-in virtual register, return the +/// corresponding live-in physical register. +unsigned MachineRegisterInfo::getLiveInPhysReg(unsigned VReg) const { + for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I) + if (I->second == VReg) + return I->first; + return 0; +} + +static cl::opt<bool> +SchedLiveInCopies("schedule-livein-copies", cl::Hidden, + cl::desc("Schedule copies of livein registers"), + cl::init(false)); + +/// EmitLiveInCopy - Emit a copy for a live in physical register. If the +/// physical register has only a single copy use, then coalesced the copy +/// if possible. +static void EmitLiveInCopy(MachineBasicBlock *MBB, + MachineBasicBlock::iterator &InsertPos, + unsigned VirtReg, unsigned PhysReg, + const TargetRegisterClass *RC, + DenseMap<MachineInstr*, unsigned> &CopyRegMap, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + const TargetInstrInfo &TII) { + unsigned NumUses = 0; + MachineInstr *UseMI = NULL; + for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(VirtReg), + UE = MRI.use_end(); UI != UE; ++UI) { + UseMI = &*UI; + if (++NumUses > 1) + break; + } + + // If the number of uses is not one, or the use is not a move instruction, + // don't coalesce. Also, only coalesce away a virtual register to virtual + // register copy. + bool Coalesced = false; + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + if (NumUses == 1 && + TII.isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubReg, DstSubReg) && + TargetRegisterInfo::isVirtualRegister(DstReg)) { + VirtReg = DstReg; + Coalesced = true; + } + + // Now find an ideal location to insert the copy. + MachineBasicBlock::iterator Pos = InsertPos; + while (Pos != MBB->begin()) { + MachineInstr *PrevMI = prior(Pos); + DenseMap<MachineInstr*, unsigned>::iterator RI = CopyRegMap.find(PrevMI); + // copyRegToReg might emit multiple instructions to do a copy. + unsigned CopyDstReg = (RI == CopyRegMap.end()) ? 0 : RI->second; + if (CopyDstReg && !TRI.regsOverlap(CopyDstReg, PhysReg)) + // This is what the BB looks like right now: + // r1024 = mov r0 + // ... + // r1 = mov r1024 + // + // We want to insert "r1025 = mov r1". Inserting this copy below the + // move to r1024 makes it impossible for that move to be coalesced. + // + // r1025 = mov r1 + // r1024 = mov r0 + // ... + // r1 = mov 1024 + // r2 = mov 1025 + break; // Woot! Found a good location. + --Pos; + } + + bool Emitted = TII.copyRegToReg(*MBB, Pos, VirtReg, PhysReg, RC, RC); + assert(Emitted && "Unable to issue a live-in copy instruction!\n"); + (void) Emitted; + + CopyRegMap.insert(std::make_pair(prior(Pos), VirtReg)); + if (Coalesced) { + if (&*InsertPos == UseMI) ++InsertPos; + MBB->erase(UseMI); + } +} + +/// EmitLiveInCopies - Emit copies to initialize livein virtual registers +/// into the given entry block. +void +MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB, + const TargetRegisterInfo &TRI, + const TargetInstrInfo &TII) { + if (SchedLiveInCopies) { + // Emit the copies at a heuristically-determined location in the block. + DenseMap<MachineInstr*, unsigned> CopyRegMap; + MachineBasicBlock::iterator InsertPos = EntryMBB->begin(); + for (MachineRegisterInfo::livein_iterator LI = livein_begin(), + E = livein_end(); LI != E; ++LI) + if (LI->second) { + const TargetRegisterClass *RC = getRegClass(LI->second); + EmitLiveInCopy(EntryMBB, InsertPos, LI->second, LI->first, + RC, CopyRegMap, *this, TRI, TII); + } + } else { + // Emit the copies into the top of the block. + for (MachineRegisterInfo::livein_iterator LI = livein_begin(), + E = livein_end(); LI != E; ++LI) + if (LI->second) { + const TargetRegisterClass *RC = getRegClass(LI->second); + bool Emitted = TII.copyRegToReg(*EntryMBB, EntryMBB->begin(), + LI->second, LI->first, RC, RC); + assert(Emitted && "Unable to issue a live-in copy instruction!\n"); + (void) Emitted; + } + } + + // Add function live-ins to entry block live-in set. + for (MachineRegisterInfo::livein_iterator I = livein_begin(), + E = livein_end(); I != E; ++I) + EntryMBB->addLiveIn(I->first); +} + #ifndef NDEBUG void MachineRegisterInfo::dumpUses(unsigned Reg) const { for (use_iterator I = use_begin(Reg), E = use_end(); I != E; ++I) diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp index b79cdbb..b8996d4 100644 --- a/lib/CodeGen/MachineSSAUpdater.cpp +++ b/lib/CodeGen/MachineSSAUpdater.cpp @@ -21,34 +21,50 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/AlignOf.h" +#include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -typedef DenseMap<MachineBasicBlock*, unsigned> AvailableValsTy; -typedef std::vector<std::pair<MachineBasicBlock*, unsigned> > - IncomingPredInfoTy; +/// BBInfo - Per-basic block information used internally by MachineSSAUpdater. +class MachineSSAUpdater::BBInfo { +public: + MachineBasicBlock *BB; // Back-pointer to the corresponding block. + unsigned AvailableVal; // Value to use in this block. + BBInfo *DefBB; // Block that defines the available value. + int BlkNum; // Postorder number. + BBInfo *IDom; // Immediate dominator. + unsigned NumPreds; // Number of predecessor blocks. + BBInfo **Preds; // Array[NumPreds] of predecessor blocks. + MachineInstr *PHITag; // Marker for existing PHIs that match. + + BBInfo(MachineBasicBlock *ThisBB, unsigned V) + : BB(ThisBB), AvailableVal(V), DefBB(V ? this : 0), BlkNum(0), IDom(0), + NumPreds(0), Preds(0), PHITag(0) { } +}; + +typedef DenseMap<MachineBasicBlock*, MachineSSAUpdater::BBInfo*> BBMapTy; +typedef DenseMap<MachineBasicBlock*, unsigned> AvailableValsTy; static AvailableValsTy &getAvailableVals(void *AV) { return *static_cast<AvailableValsTy*>(AV); } -static IncomingPredInfoTy &getIncomingPredInfo(void *IPI) { - return *static_cast<IncomingPredInfoTy*>(IPI); +static BBMapTy *getBBMap(void *BM) { + return static_cast<BBMapTy*>(BM); } - MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF, SmallVectorImpl<MachineInstr*> *NewPHI) - : AV(0), IPI(0), InsertedPHIs(NewPHI) { + : AV(0), BM(0), InsertedPHIs(NewPHI) { TII = MF.getTarget().getInstrInfo(); MRI = &MF.getRegInfo(); } MachineSSAUpdater::~MachineSSAUpdater() { delete &getAvailableVals(AV); - delete &getIncomingPredInfo(IPI); } /// Initialize - Reset this object to get ready for a new set of SSA @@ -59,11 +75,6 @@ void MachineSSAUpdater::Initialize(unsigned V) { else getAvailableVals(AV).clear(); - if (IPI == 0) - IPI = new IncomingPredInfoTy(); - else - getIncomingPredInfo(IPI).clear(); - VR = V; VRC = MRI->getRegClass(VR); } @@ -127,7 +138,7 @@ MachineInstr *InsertNewDef(unsigned Opcode, unsigned NewVR = MRI->createVirtualRegister(RC); return BuildMI(*BB, I, DebugLoc(), TII->get(Opcode), NewVR); } - + /// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that /// is live in the middle of the specified block. /// @@ -150,7 +161,7 @@ MachineInstr *InsertNewDef(unsigned Opcode, unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) { // If there is no definition of the renamed variable in this block, just use // GetValueAtEndOfBlock to do our work. - if (!getAvailableVals(AV).count(BB)) + if (!HasValueForBlock(BB)) return GetValueAtEndOfBlockInternal(BB); // If there are no predecessors, just return undef. @@ -254,141 +265,436 @@ void MachineSSAUpdater::ReplaceRegWith(unsigned OldReg, unsigned NewReg) { /// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry /// for the specified BB and if so, return it. If not, construct SSA form by -/// walking predecessors inserting PHI nodes as needed until we get to a block -/// where the value is available. -/// +/// first calculating the required placement of PHIs and then inserting new +/// PHIs where needed. unsigned MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){ AvailableValsTy &AvailableVals = getAvailableVals(AV); + if (unsigned V = AvailableVals[BB]) + return V; - // Query AvailableVals by doing an insertion of null. - std::pair<AvailableValsTy::iterator, bool> InsertRes = - AvailableVals.insert(std::make_pair(BB, 0)); - - // Handle the case when the insertion fails because we have already seen BB. - if (!InsertRes.second) { - // If the insertion failed, there are two cases. The first case is that the - // value is already available for the specified block. If we get this, just - // return the value. - if (InsertRes.first->second != 0) - return InsertRes.first->second; - - // Otherwise, if the value we find is null, then this is the value is not - // known but it is being computed elsewhere in our recursion. This means - // that we have a cycle. Handle this by inserting a PHI node and returning - // it. When we get back to the first instance of the recursion we will fill - // in the PHI node. - MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front(); - MachineInstr *NewPHI = InsertNewDef(TargetOpcode::PHI, BB, Loc, - VRC, MRI,TII); - unsigned NewVR = NewPHI->getOperand(0).getReg(); - InsertRes.first->second = NewVR; - return NewVR; - } + // Pool allocation used internally by GetValueAtEndOfBlock. + BumpPtrAllocator Allocator; + BBMapTy BBMapObj; + BM = &BBMapObj; - // If there are no predecessors, then we must have found an unreachable block - // just return 'undef'. Since there are no predecessors, InsertRes must not - // be invalidated. - if (BB->pred_empty()) { + SmallVector<BBInfo*, 100> BlockList; + BuildBlockList(BB, &BlockList, &Allocator); + + // Special case: bail out if BB is unreachable. + if (BlockList.size() == 0) { + BM = 0; // Insert an implicit_def to represent an undef value. MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF, BB, BB->getFirstTerminator(), VRC, MRI, TII); - return InsertRes.first->second = NewDef->getOperand(0).getReg(); + unsigned V = NewDef->getOperand(0).getReg(); + AvailableVals[BB] = V; + return V; } - // Okay, the value isn't in the map and we just inserted a null in the entry - // to indicate that we're processing the block. Since we have no idea what - // value is in this block, we have to recurse through our predecessors. - // - // While we're walking our predecessors, we keep track of them in a vector, - // then insert a PHI node in the end if we actually need one. We could use a - // smallvector here, but that would take a lot of stack space for every level - // of the recursion, just use IncomingPredInfo as an explicit stack. - IncomingPredInfoTy &IncomingPredInfo = getIncomingPredInfo(IPI); - unsigned FirstPredInfoEntry = IncomingPredInfo.size(); - - // As we're walking the predecessors, keep track of whether they are all - // producing the same value. If so, this value will capture it, if not, it - // will get reset to null. We distinguish the no-predecessor case explicitly - // below. - unsigned SingularValue = 0; - bool isFirstPred = true; + FindDominators(&BlockList); + FindPHIPlacement(&BlockList); + FindAvailableVals(&BlockList); + + BM = 0; + return BBMapObj[BB]->DefBB->AvailableVal; +} + +/// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds +/// vector, set Info->NumPreds, and allocate space in Info->Preds. +static void FindPredecessorBlocks(MachineSSAUpdater::BBInfo *Info, + SmallVectorImpl<MachineBasicBlock*> *Preds, + BumpPtrAllocator *Allocator) { + MachineBasicBlock *BB = Info->BB; for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), - E = BB->pred_end(); PI != E; ++PI) { - MachineBasicBlock *PredBB = *PI; - unsigned PredVal = GetValueAtEndOfBlockInternal(PredBB); - IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal)); + E = BB->pred_end(); PI != E; ++PI) + Preds->push_back(*PI); - // Compute SingularValue. - if (isFirstPred) { - SingularValue = PredVal; - isFirstPred = false; - } else if (PredVal != SingularValue) - SingularValue = 0; + Info->NumPreds = Preds->size(); + Info->Preds = static_cast<MachineSSAUpdater::BBInfo**> + (Allocator->Allocate(Info->NumPreds * sizeof(MachineSSAUpdater::BBInfo*), + AlignOf<MachineSSAUpdater::BBInfo*>::Alignment)); +} + +/// BuildBlockList - Starting from the specified basic block, traverse back +/// through its predecessors until reaching blocks with known values. Create +/// BBInfo structures for the blocks and append them to the block list. +void MachineSSAUpdater::BuildBlockList(MachineBasicBlock *BB, + BlockListTy *BlockList, + BumpPtrAllocator *Allocator) { + AvailableValsTy &AvailableVals = getAvailableVals(AV); + BBMapTy *BBMap = getBBMap(BM); + SmallVector<BBInfo*, 10> RootList; + SmallVector<BBInfo*, 64> WorkList; + + BBInfo *Info = new (*Allocator) BBInfo(BB, 0); + (*BBMap)[BB] = Info; + WorkList.push_back(Info); + + // Search backward from BB, creating BBInfos along the way and stopping when + // reaching blocks that define the value. Record those defining blocks on + // the RootList. + SmallVector<MachineBasicBlock*, 10> Preds; + while (!WorkList.empty()) { + Info = WorkList.pop_back_val(); + Preds.clear(); + FindPredecessorBlocks(Info, &Preds, Allocator); + + // Treat an unreachable predecessor as a definition with 'undef'. + if (Info->NumPreds == 0) { + // Insert an implicit_def to represent an undef value. + MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF, + Info->BB, + Info->BB->getFirstTerminator(), + VRC, MRI, TII); + Info->AvailableVal = NewDef->getOperand(0).getReg(); + Info->DefBB = Info; + RootList.push_back(Info); + continue; + } + + for (unsigned p = 0; p != Info->NumPreds; ++p) { + MachineBasicBlock *Pred = Preds[p]; + // Check if BBMap already has a BBInfo for the predecessor block. + BBMapTy::value_type &BBMapBucket = BBMap->FindAndConstruct(Pred); + if (BBMapBucket.second) { + Info->Preds[p] = BBMapBucket.second; + continue; + } + + // Create a new BBInfo for the predecessor. + unsigned PredVal = AvailableVals.lookup(Pred); + BBInfo *PredInfo = new (*Allocator) BBInfo(Pred, PredVal); + BBMapBucket.second = PredInfo; + Info->Preds[p] = PredInfo; + + if (PredInfo->AvailableVal) { + RootList.push_back(PredInfo); + continue; + } + WorkList.push_back(PredInfo); + } + } + + // Now that we know what blocks are backwards-reachable from the starting + // block, do a forward depth-first traversal to assign postorder numbers + // to those blocks. + BBInfo *PseudoEntry = new (*Allocator) BBInfo(0, 0); + unsigned BlkNum = 1; + + // Initialize the worklist with the roots from the backward traversal. + while (!RootList.empty()) { + Info = RootList.pop_back_val(); + Info->IDom = PseudoEntry; + Info->BlkNum = -1; + WorkList.push_back(Info); + } + + while (!WorkList.empty()) { + Info = WorkList.back(); + + if (Info->BlkNum == -2) { + // All the successors have been handled; assign the postorder number. + Info->BlkNum = BlkNum++; + // If not a root, put it on the BlockList. + if (!Info->AvailableVal) + BlockList->push_back(Info); + WorkList.pop_back(); + continue; + } + + // Leave this entry on the worklist, but set its BlkNum to mark that its + // successors have been put on the worklist. When it returns to the top + // the list, after handling its successors, it will be assigned a number. + Info->BlkNum = -2; + + // Add unvisited successors to the work list. + for (MachineBasicBlock::succ_iterator SI = Info->BB->succ_begin(), + E = Info->BB->succ_end(); SI != E; ++SI) { + BBInfo *SuccInfo = (*BBMap)[*SI]; + if (!SuccInfo || SuccInfo->BlkNum) + continue; + SuccInfo->BlkNum = -1; + WorkList.push_back(SuccInfo); + } } + PseudoEntry->BlkNum = BlkNum; +} - /// Look up BB's entry in AvailableVals. 'InsertRes' may be invalidated. If - /// this block is involved in a loop, a no-entry PHI node will have been - /// inserted as InsertedVal. Otherwise, we'll still have the null we inserted - /// above. - unsigned &InsertedVal = AvailableVals[BB]; - - // If all the predecessor values are the same then we don't need to insert a - // PHI. This is the simple and common case. - if (SingularValue) { - // If a PHI node got inserted, replace it with the singlar value and delete - // it. - if (InsertedVal) { - MachineInstr *OldVal = MRI->getVRegDef(InsertedVal); - // Be careful about dead loops. These RAUW's also update InsertedVal. - assert(InsertedVal != SingularValue && "Dead loop?"); - ReplaceRegWith(InsertedVal, SingularValue); - OldVal->eraseFromParent(); +/// IntersectDominators - This is the dataflow lattice "meet" operation for +/// finding dominators. Given two basic blocks, it walks up the dominator +/// tree until it finds a common dominator of both. It uses the postorder +/// number of the blocks to determine how to do that. +static MachineSSAUpdater::BBInfo * +IntersectDominators(MachineSSAUpdater::BBInfo *Blk1, + MachineSSAUpdater::BBInfo *Blk2) { + while (Blk1 != Blk2) { + while (Blk1->BlkNum < Blk2->BlkNum) { + Blk1 = Blk1->IDom; + if (!Blk1) + return Blk2; } + while (Blk2->BlkNum < Blk1->BlkNum) { + Blk2 = Blk2->IDom; + if (!Blk2) + return Blk1; + } + } + return Blk1; +} - InsertedVal = SingularValue; +/// FindDominators - Calculate the dominator tree for the subset of the CFG +/// corresponding to the basic blocks on the BlockList. This uses the +/// algorithm from: "A Simple, Fast Dominance Algorithm" by Cooper, Harvey and +/// Kennedy, published in Software--Practice and Experience, 2001, 4:1-10. +/// Because the CFG subset does not include any edges leading into blocks that +/// define the value, the results are not the usual dominator tree. The CFG +/// subset has a single pseudo-entry node with edges to a set of root nodes +/// for blocks that define the value. The dominators for this subset CFG are +/// not the standard dominators but they are adequate for placing PHIs within +/// the subset CFG. +void MachineSSAUpdater::FindDominators(BlockListTy *BlockList) { + bool Changed; + do { + Changed = false; + // Iterate over the list in reverse order, i.e., forward on CFG edges. + for (BlockListTy::reverse_iterator I = BlockList->rbegin(), + E = BlockList->rend(); I != E; ++I) { + BBInfo *Info = *I; + + // Start with the first predecessor. + assert(Info->NumPreds > 0 && "unreachable block"); + BBInfo *NewIDom = Info->Preds[0]; + + // Iterate through the block's other predecessors. + for (unsigned p = 1; p != Info->NumPreds; ++p) { + BBInfo *Pred = Info->Preds[p]; + NewIDom = IntersectDominators(NewIDom, Pred); + } - // Drop the entries we added in IncomingPredInfo to restore the stack. - IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry, - IncomingPredInfo.end()); - return InsertedVal; + // Check if the IDom value has changed. + if (NewIDom != Info->IDom) { + Info->IDom = NewIDom; + Changed = true; + } + } + } while (Changed); +} + +/// IsDefInDomFrontier - Search up the dominator tree from Pred to IDom for +/// any blocks containing definitions of the value. If one is found, then the +/// successor of Pred is in the dominance frontier for the definition, and +/// this function returns true. +static bool IsDefInDomFrontier(const MachineSSAUpdater::BBInfo *Pred, + const MachineSSAUpdater::BBInfo *IDom) { + for (; Pred != IDom; Pred = Pred->IDom) { + if (Pred->DefBB == Pred) + return true; } + return false; +} +/// FindPHIPlacement - PHIs are needed in the iterated dominance frontiers of +/// the known definitions. Iteratively add PHIs in the dom frontiers until +/// nothing changes. Along the way, keep track of the nearest dominating +/// definitions for non-PHI blocks. +void MachineSSAUpdater::FindPHIPlacement(BlockListTy *BlockList) { + bool Changed; + do { + Changed = false; + // Iterate over the list in reverse order, i.e., forward on CFG edges. + for (BlockListTy::reverse_iterator I = BlockList->rbegin(), + E = BlockList->rend(); I != E; ++I) { + BBInfo *Info = *I; + + // If this block already needs a PHI, there is nothing to do here. + if (Info->DefBB == Info) + continue; + + // Default to use the same def as the immediate dominator. + BBInfo *NewDefBB = Info->IDom->DefBB; + for (unsigned p = 0; p != Info->NumPreds; ++p) { + if (IsDefInDomFrontier(Info->Preds[p], Info->IDom)) { + // Need a PHI here. + NewDefBB = Info; + break; + } + } - // Otherwise, we do need a PHI: insert one now if we don't already have one. - MachineInstr *InsertedPHI; - if (InsertedVal == 0) { - MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front(); - InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB, Loc, - VRC, MRI, TII); - InsertedVal = InsertedPHI->getOperand(0).getReg(); - } else { - InsertedPHI = MRI->getVRegDef(InsertedVal); + // Check if anything changed. + if (NewDefBB != Info->DefBB) { + Info->DefBB = NewDefBB; + Changed = true; + } + } + } while (Changed); +} + +/// FindAvailableVal - If this block requires a PHI, first check if an existing +/// PHI matches the PHI placement and reaching definitions computed earlier, +/// and if not, create a new PHI. Visit all the block's predecessors to +/// calculate the available value for each one and fill in the incoming values +/// for a new PHI. +void MachineSSAUpdater::FindAvailableVals(BlockListTy *BlockList) { + AvailableValsTy &AvailableVals = getAvailableVals(AV); + + // Go through the worklist in forward order (i.e., backward through the CFG) + // and check if existing PHIs can be used. If not, create empty PHIs where + // they are needed. + for (BlockListTy::iterator I = BlockList->begin(), E = BlockList->end(); + I != E; ++I) { + BBInfo *Info = *I; + // Check if there needs to be a PHI in BB. + if (Info->DefBB != Info) + continue; + + // Look for an existing PHI. + FindExistingPHI(Info->BB, BlockList); + if (Info->AvailableVal) + continue; + + MachineBasicBlock::iterator Loc = + Info->BB->empty() ? Info->BB->end() : Info->BB->front(); + MachineInstr *InsertedPHI = InsertNewDef(TargetOpcode::PHI, Info->BB, Loc, + VRC, MRI, TII); + unsigned PHI = InsertedPHI->getOperand(0).getReg(); + Info->AvailableVal = PHI; + AvailableVals[Info->BB] = PHI; } - // Fill in all the predecessors of the PHI. - MachineInstrBuilder MIB(InsertedPHI); - for (IncomingPredInfoTy::iterator I = - IncomingPredInfo.begin()+FirstPredInfoEntry, - E = IncomingPredInfo.end(); I != E; ++I) - MIB.addReg(I->second).addMBB(I->first); + // Now go back through the worklist in reverse order to fill in the arguments + // for any new PHIs added in the forward traversal. + for (BlockListTy::reverse_iterator I = BlockList->rbegin(), + E = BlockList->rend(); I != E; ++I) { + BBInfo *Info = *I; + + if (Info->DefBB != Info) { + // Record the available value at join nodes to speed up subsequent + // uses of this SSAUpdater for the same value. + if (Info->NumPreds > 1) + AvailableVals[Info->BB] = Info->DefBB->AvailableVal; + continue; + } - // Drop the entries we added in IncomingPredInfo to restore the stack. - IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry, - IncomingPredInfo.end()); + // Check if this block contains a newly added PHI. + unsigned PHI = Info->AvailableVal; + MachineInstr *InsertedPHI = MRI->getVRegDef(PHI); + if (!InsertedPHI->isPHI() || InsertedPHI->getNumOperands() > 1) + continue; + + // Iterate through the block's predecessors. + MachineInstrBuilder MIB(InsertedPHI); + for (unsigned p = 0; p != Info->NumPreds; ++p) { + BBInfo *PredInfo = Info->Preds[p]; + MachineBasicBlock *Pred = PredInfo->BB; + // Skip to the nearest preceding definition. + if (PredInfo->DefBB != PredInfo) + PredInfo = PredInfo->DefBB; + MIB.addReg(PredInfo->AvailableVal).addMBB(Pred); + } - // See if the PHI node can be merged to a single value. This can happen in - // loop cases when we get a PHI of itself and one other value. - if (unsigned ConstVal = InsertedPHI->isConstantValuePHI()) { - MRI->replaceRegWith(InsertedVal, ConstVal); - InsertedPHI->eraseFromParent(); - InsertedVal = ConstVal; - } else { DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n"); // If the client wants to know about all new instructions, tell it. if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); } +} - return InsertedVal; +/// FindExistingPHI - Look through the PHI nodes in a block to see if any of +/// them match what is needed. +void MachineSSAUpdater::FindExistingPHI(MachineBasicBlock *BB, + BlockListTy *BlockList) { + for (MachineBasicBlock::iterator BBI = BB->begin(), BBE = BB->end(); + BBI != BBE && BBI->isPHI(); ++BBI) { + if (CheckIfPHIMatches(BBI)) { + RecordMatchingPHI(BBI); + break; + } + // Match failed: clear all the PHITag values. + for (BlockListTy::iterator I = BlockList->begin(), E = BlockList->end(); + I != E; ++I) + (*I)->PHITag = 0; + } +} + +/// CheckIfPHIMatches - Check if a PHI node matches the placement and values +/// in the BBMap. +bool MachineSSAUpdater::CheckIfPHIMatches(MachineInstr *PHI) { + BBMapTy *BBMap = getBBMap(BM); + SmallVector<MachineInstr*, 20> WorkList; + WorkList.push_back(PHI); + + // Mark that the block containing this PHI has been visited. + (*BBMap)[PHI->getParent()]->PHITag = PHI; + + while (!WorkList.empty()) { + PHI = WorkList.pop_back_val(); + + // Iterate through the PHI's incoming values. + for (unsigned i = 1, e = PHI->getNumOperands(); i != e; i += 2) { + unsigned IncomingVal = PHI->getOperand(i).getReg(); + BBInfo *PredInfo = (*BBMap)[PHI->getOperand(i+1).getMBB()]; + // Skip to the nearest preceding definition. + if (PredInfo->DefBB != PredInfo) + PredInfo = PredInfo->DefBB; + + // Check if it matches the expected value. + if (PredInfo->AvailableVal) { + if (IncomingVal == PredInfo->AvailableVal) + continue; + return false; + } + + // Check if the value is a PHI in the correct block. + MachineInstr *IncomingPHIVal = MRI->getVRegDef(IncomingVal); + if (!IncomingPHIVal->isPHI() || + IncomingPHIVal->getParent() != PredInfo->BB) + return false; + + // If this block has already been visited, check if this PHI matches. + if (PredInfo->PHITag) { + if (IncomingPHIVal == PredInfo->PHITag) + continue; + return false; + } + PredInfo->PHITag = IncomingPHIVal; + + WorkList.push_back(IncomingPHIVal); + } + } + return true; +} + +/// RecordMatchingPHI - For a PHI node that matches, record it and its input +/// PHIs in both the BBMap and the AvailableVals mapping. +void MachineSSAUpdater::RecordMatchingPHI(MachineInstr *PHI) { + BBMapTy *BBMap = getBBMap(BM); + AvailableValsTy &AvailableVals = getAvailableVals(AV); + SmallVector<MachineInstr*, 20> WorkList; + WorkList.push_back(PHI); + + // Record this PHI. + MachineBasicBlock *BB = PHI->getParent(); + AvailableVals[BB] = PHI->getOperand(0).getReg(); + (*BBMap)[BB]->AvailableVal = PHI->getOperand(0).getReg(); + + while (!WorkList.empty()) { + PHI = WorkList.pop_back_val(); + + // Iterate through the PHI's incoming values. + for (unsigned i = 1, e = PHI->getNumOperands(); i != e; i += 2) { + unsigned IncomingVal = PHI->getOperand(i).getReg(); + MachineInstr *IncomingPHIVal = MRI->getVRegDef(IncomingVal); + if (!IncomingPHIVal->isPHI()) continue; + BB = IncomingPHIVal->getParent(); + BBInfo *Info = (*BBMap)[BB]; + if (!Info || Info->AvailableVal) + continue; + + // Record the PHI and add it to the worklist. + AvailableVals[BB] = IncomingVal; + Info->AvailableVal = IncomingVal; + WorkList.push_back(IncomingPHIVal); + } + } } diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index e659619..ef489dc 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" @@ -37,6 +38,7 @@ namespace { const TargetRegisterInfo *TRI; MachineRegisterInfo *RegInfo; // Machine register information MachineDominatorTree *DT; // Machine dominator tree + MachineLoopInfo *LI; AliasAnalysis *AA; BitVector AllocatableSet; // Which physregs are allocatable? @@ -51,7 +53,9 @@ namespace { MachineFunctionPass::getAnalysisUsage(AU); AU.addRequired<AliasAnalysis>(); AU.addRequired<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); AU.addPreserved<MachineDominatorTree>(); + AU.addPreserved<MachineLoopInfo>(); } private: bool ProcessBlock(MachineBasicBlock &MBB); @@ -102,6 +106,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { TRI = TM.getRegisterInfo(); RegInfo = &MF.getRegInfo(); DT = &getAnalysis<MachineDominatorTree>(); + LI = &getAnalysis<MachineLoopInfo>(); AA = &getAnalysis<AliasAnalysis>(); AllocatableSet = TRI->getAllocatableSet(MF); @@ -276,8 +281,29 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // but for now we just punt. // FIXME: Split critical edges if not backedges. if (SuccToSinkTo->pred_size() > 1) { - DEBUG(dbgs() << " *** PUNTING: Critical edge found\n"); - return false; + // We cannot sink a load across a critical edge - there may be stores in + // other code paths. + bool store = true; + if (!MI->isSafeToMove(TII, AA, store)) { + DEBUG(dbgs() << " *** PUNTING: Wont sink load along critical edge.\n"); + return false; + } + + // We don't want to sink across a critical edge if we don't dominate the + // successor. We could be introducing calculations to new code paths. + if (!DT->dominates(ParentBlock, SuccToSinkTo)) { + DEBUG(dbgs() << " *** PUNTING: Critical edge found\n"); + return false; + } + + // Don't sink instructions into a loop. + if (LI->isLoopHeader(SuccToSinkTo)) { + DEBUG(dbgs() << " *** PUNTING: Loop header found\n"); + return false; + } + + // Otherwise we are OK with sinking along a critical edge. + DEBUG(dbgs() << "Sinking along critical edge.\n"); } // Determine where to insert into. Skip phi nodes. diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 434a1e8..0b75c55 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -279,7 +279,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { if (OutFile) delete OutFile; else if (foundErrors) - llvm_report_error("Found "+Twine(foundErrors)+" machine code errors."); + report_fatal_error("Found "+Twine(foundErrors)+" machine code errors."); // Clean up. regsLive.clear(); @@ -351,8 +351,8 @@ void MachineVerifier::visitMachineFunctionBefore() { } // Does iterator point to a and b as the first two elements? -bool matchPair(MachineBasicBlock::const_succ_iterator i, - const MachineBasicBlock *a, const MachineBasicBlock *b) { +static bool matchPair(MachineBasicBlock::const_succ_iterator i, + const MachineBasicBlock *a, const MachineBasicBlock *b) { if (*i == a) return *++i == b; if (*i == b) @@ -470,7 +470,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { } regsLive.clear(); - for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(), + for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(), E = MBB->livein_end(); I != E; ++I) { if (!TargetRegisterInfo::isPhysicalRegister(*I)) { report("MBB live-in list contains non-physical register", MBB); diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index 424181c..d3e1295 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -46,7 +46,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/Statistic.h" -#include <map> #include <set> using namespace llvm; @@ -266,6 +265,17 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { // Initialize register live-range state for scheduling in this block. Scheduler.StartBlock(MBB); + // FIXME: Temporary workaround for <rdar://problem/7759363>: The post-RA + // scheduler has some sort of problem with DebugValue instructions that + // causes an assertion in LeaksContext.h to fail occasionally. Just + // remove all those instructions for now. + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ) { + MachineInstr *MI = &*I++; + if (MI->isDebugValue()) + MI->eraseFromParent(); + } + // Schedule each sequence of instructions not interrupted by a label // or anything else that effectively needs to shut down scheduling. MachineBasicBlock::iterator Current = MBB->end(); @@ -274,7 +284,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { MachineInstr *MI = prior(I); if (isSchedulingBoundary(MI, Fn)) { Scheduler.Run(MBB, I, Current, CurrentCount); - Scheduler.EmitSchedule(0); + Scheduler.EmitSchedule(); Current = MI; CurrentCount = Count - 1; Scheduler.Observe(MI, CurrentCount); @@ -286,7 +296,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { assert((MBB->begin() == Current || CurrentCount != 0) && "Instruction count mismatch!"); Scheduler.Run(MBB, MBB->begin(), Current, CurrentCount); - Scheduler.EmitSchedule(0); + Scheduler.EmitSchedule(); // Clean up register live-range state. Scheduler.FinishBlock(); diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 27cb566..a454b62 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -131,10 +131,10 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const { /// pseudo instructions. void PEI::calculateCallsInformation(MachineFunction &Fn) { const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); - MachineFrameInfo *FFI = Fn.getFrameInfo(); + MachineFrameInfo *MFI = Fn.getFrameInfo(); unsigned MaxCallFrameSize = 0; - bool HasCalls = FFI->hasCalls(); + bool HasCalls = MFI->hasCalls(); // Get the function call frame set-up and tear-down instruction opcode int FrameSetupOpcode = RegInfo->getCallFrameSetupOpcode(); @@ -162,8 +162,8 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) { HasCalls = true; } - FFI->setHasCalls(HasCalls); - FFI->setMaxCallFrameSize(MaxCallFrameSize); + MFI->setHasCalls(HasCalls); + MFI->setMaxCallFrameSize(MaxCallFrameSize); for (std::vector<MachineBasicBlock::iterator>::iterator i = FrameSDOps.begin(), e = FrameSDOps.end(); i != e; ++i) { @@ -184,7 +184,7 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) { void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo(); - MachineFrameInfo *FFI = Fn.getFrameInfo(); + MachineFrameInfo *MFI = Fn.getFrameInfo(); // Get the callee saved register list... const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn); @@ -197,6 +197,10 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { if (CSRegs == 0 || CSRegs[0] == 0) return; + // In Naked functions we aren't going to save any registers. + if (Fn.getFunction()->hasFnAttr(Attribute::Naked)) + return; + // Figure out which *callee saved* registers are modified by the current // function, thus needing to be saved and restored in the prolog/epilog. const TargetRegisterClass * const *CSRegClasses = @@ -255,19 +259,19 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { // the TargetRegisterClass if the stack alignment is smaller. Use the // min. Align = std::min(Align, StackAlign); - FrameIdx = FFI->CreateStackObject(RC->getSize(), Align, true); + FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true); if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; } else { // Spill it to the stack where we must. - FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, + FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, true, false); } I->setFrameIdx(FrameIdx); } - FFI->setCalleeSavedInfo(CSI); + MFI->setCalleeSavedInfo(CSI); } /// insertCSRSpillsAndRestores - Insert spill and restore code for @@ -275,10 +279,10 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { /// void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { // Get callee saved register information. - MachineFrameInfo *FFI = Fn.getFrameInfo(); - const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo(); + MachineFrameInfo *MFI = Fn.getFrameInfo(); + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); - FFI->setCalleeSavedInfoValid(true); + MFI->setCalleeSavedInfoValid(true); // Early exit if no callee saved registers are modified! if (CSI.empty()) @@ -436,14 +440,14 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { /// AdjustStackOffset - Helper function used to adjust the stack frame offset. static inline void -AdjustStackOffset(MachineFrameInfo *FFI, int FrameIdx, +AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, bool StackGrowsDown, int64_t &Offset, unsigned &MaxAlign) { // If the stack grows down, add the object size to find the lowest address. if (StackGrowsDown) - Offset += FFI->getObjectSize(FrameIdx); + Offset += MFI->getObjectSize(FrameIdx); - unsigned Align = FFI->getObjectAlignment(FrameIdx); + unsigned Align = MFI->getObjectAlignment(FrameIdx); // If the alignment of this object is greater than that of the stack, then // increase the stack alignment to match. @@ -453,10 +457,10 @@ AdjustStackOffset(MachineFrameInfo *FFI, int FrameIdx, Offset = (Offset + Align - 1) / Align * Align; if (StackGrowsDown) { - FFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset + MFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset } else { - FFI->setObjectOffset(FrameIdx, Offset); - Offset += FFI->getObjectSize(FrameIdx); + MFI->setObjectOffset(FrameIdx, Offset); + Offset += MFI->getObjectSize(FrameIdx); } } @@ -470,7 +474,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown; // Loop over all of the stack objects, assigning sequential addresses... - MachineFrameInfo *FFI = Fn.getFrameInfo(); + MachineFrameInfo *MFI = Fn.getFrameInfo(); // Start at the beginning of the local area. // The Offset is the distance from the stack top in the direction @@ -487,17 +491,17 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // We currently don't support filling in holes in between fixed sized // objects, so we adjust 'Offset' to point to the end of last fixed sized // preallocated object. - for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) { + for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) { int64_t FixedOff; if (StackGrowsDown) { // The maximum distance from the stack pointer is at lower address of // the object -- which is given by offset. For down growing stack // the offset is negative, so we negate the offset to get the distance. - FixedOff = -FFI->getObjectOffset(i); + FixedOff = -MFI->getObjectOffset(i); } else { // The maximum distance from the start pointer is at the upper // address of the object. - FixedOff = FFI->getObjectOffset(i) + FFI->getObjectSize(i); + FixedOff = MFI->getObjectOffset(i) + MFI->getObjectSize(i); } if (FixedOff > Offset) Offset = FixedOff; } @@ -506,29 +510,29 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // callee saved registers. if (StackGrowsDown) { for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) { - // If stack grows down, we need to add size of find the lowest + // If the stack grows down, we need to add the size to find the lowest // address of the object. - Offset += FFI->getObjectSize(i); + Offset += MFI->getObjectSize(i); - unsigned Align = FFI->getObjectAlignment(i); + unsigned Align = MFI->getObjectAlignment(i); // Adjust to alignment boundary Offset = (Offset+Align-1)/Align*Align; - FFI->setObjectOffset(i, -Offset); // Set the computed offset + MFI->setObjectOffset(i, -Offset); // Set the computed offset } } else { int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex; for (int i = MaxCSFI; i >= MinCSFI ; --i) { - unsigned Align = FFI->getObjectAlignment(i); + unsigned Align = MFI->getObjectAlignment(i); // Adjust to alignment boundary Offset = (Offset+Align-1)/Align*Align; - FFI->setObjectOffset(i, Offset); - Offset += FFI->getObjectSize(i); + MFI->setObjectOffset(i, Offset); + Offset += MFI->getObjectSize(i); } } - unsigned MaxAlign = FFI->getMaxAlignment(); + unsigned MaxAlign = MFI->getMaxAlignment(); // Make sure the special register scavenging spill slot is closest to the // frame pointer if a frame pointer is required. @@ -536,28 +540,28 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { if (RS && RegInfo->hasFP(Fn) && !RegInfo->needsStackRealignment(Fn)) { int SFI = RS->getScavengingFrameIndex(); if (SFI >= 0) - AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign); + AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign); } // Make sure that the stack protector comes before the local variables on the // stack. - if (FFI->getStackProtectorIndex() >= 0) - AdjustStackOffset(FFI, FFI->getStackProtectorIndex(), StackGrowsDown, + if (MFI->getStackProtectorIndex() >= 0) + AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown, Offset, MaxAlign); // Then assign frame offsets to stack objects that are not used to spill // callee saved registers. - for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) { + for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) continue; if (RS && (int)i == RS->getScavengingFrameIndex()) continue; - if (FFI->isDeadObjectIndex(i)) + if (MFI->isDeadObjectIndex(i)) continue; - if (FFI->getStackProtectorIndex() == (int)i) + if (MFI->getStackProtectorIndex() == (int)i) continue; - AdjustStackOffset(FFI, i, StackGrowsDown, Offset, MaxAlign); + AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign); } // Make sure the special register scavenging spill slot is closest to the @@ -565,15 +569,15 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { if (RS && (!RegInfo->hasFP(Fn) || RegInfo->needsStackRealignment(Fn))) { int SFI = RS->getScavengingFrameIndex(); if (SFI >= 0) - AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign); + AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign); } if (!RegInfo->targetHandlesStackFrameRounding()) { // If we have reserved argument space for call sites in the function // immediately on entry to the current function, count it as part of the // overall stack size. - if (FFI->hasCalls() && RegInfo->hasReservedCallFrame(Fn)) - Offset += FFI->getMaxCallFrameSize(); + if (MFI->hasCalls() && RegInfo->hasReservedCallFrame(Fn)) + Offset += MFI->getMaxCallFrameSize(); // Round up the size to a multiple of the alignment. If the function has // any calls or alloca's, align to the target's StackAlignment value to @@ -581,8 +585,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // otherwise, for leaf functions, align to the TransientStackAlignment // value. unsigned StackAlign; - if (FFI->hasCalls() || FFI->hasVarSizedObjects() || - (RegInfo->needsStackRealignment(Fn) && FFI->getObjectIndexEnd() != 0)) + if (MFI->hasCalls() || MFI->hasVarSizedObjects() || + (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0)) StackAlign = TFI.getStackAlignment(); else StackAlign = TFI.getTransientStackAlignment(); @@ -594,7 +598,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { } // Update frame info to pretend that this is part of the stack... - FFI->setStackSize(Offset - LocalAreaOffset); + MFI->setStackSize(Offset - LocalAreaOffset); } diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp new file mode 100644 index 0000000..2caf1df --- /dev/null +++ b/lib/CodeGen/RegAllocFast.cpp @@ -0,0 +1,932 @@ +//===-- RegAllocFast.cpp - A fast register allocator for debug code -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This register allocator allocates registers to a basic block at a time, +// attempting to keep values in registers and reusing registers as appropriate. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "llvm/BasicBlock.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include <algorithm> +using namespace llvm; + +STATISTIC(NumStores, "Number of stores added"); +STATISTIC(NumLoads , "Number of loads added"); + +static RegisterRegAlloc + fastRegAlloc("fast", "fast register allocator", createFastRegisterAllocator); + +namespace { + class RAFast : public MachineFunctionPass { + public: + static char ID; + RAFast() : MachineFunctionPass(&ID), StackSlotForVirtReg(-1) {} + private: + const TargetMachine *TM; + MachineFunction *MF; + const TargetRegisterInfo *TRI; + const TargetInstrInfo *TII; + + // StackSlotForVirtReg - Maps virtual regs to the frame index where these + // values are spilled. + IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg; + + // Virt2PhysRegMap - This map contains entries for each virtual register + // that is currently available in a physical register. + IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysRegMap; + + unsigned &getVirt2PhysRegMapSlot(unsigned VirtReg) { + return Virt2PhysRegMap[VirtReg]; + } + + // PhysRegsUsed - This array is effectively a map, containing entries for + // each physical register that currently has a value (ie, it is in + // Virt2PhysRegMap). The value mapped to is the virtual register + // corresponding to the physical register (the inverse of the + // Virt2PhysRegMap), or 0. The value is set to 0 if this register is pinned + // because it is used by a future instruction, and to -2 if it is not + // allocatable. If the entry for a physical register is -1, then the + // physical register is "not in the map". + // + std::vector<int> PhysRegsUsed; + + // UsedInInstr - BitVector of physregs that are used in the current + // instruction, and so cannot be allocated. + BitVector UsedInInstr; + + // Virt2LastUseMap - This maps each virtual register to its last use + // (MachineInstr*, operand index pair). + IndexedMap<std::pair<MachineInstr*, unsigned>, VirtReg2IndexFunctor> + Virt2LastUseMap; + + std::pair<MachineInstr*,unsigned>& getVirtRegLastUse(unsigned Reg) { + assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); + return Virt2LastUseMap[Reg]; + } + + // VirtRegModified - This bitset contains information about which virtual + // registers need to be spilled back to memory when their registers are + // scavenged. If a virtual register has simply been rematerialized, there + // is no reason to spill it to memory when we need the register back. + // + BitVector VirtRegModified; + + // UsedInMultipleBlocks - Tracks whether a particular register is used in + // more than one block. + BitVector UsedInMultipleBlocks; + + void markVirtRegModified(unsigned Reg, bool Val = true) { + assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); + Reg -= TargetRegisterInfo::FirstVirtualRegister; + if (Val) + VirtRegModified.set(Reg); + else + VirtRegModified.reset(Reg); + } + + bool isVirtRegModified(unsigned Reg) const { + assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); + assert(Reg - TargetRegisterInfo::FirstVirtualRegister < + VirtRegModified.size() && "Illegal virtual register!"); + return VirtRegModified[Reg - TargetRegisterInfo::FirstVirtualRegister]; + } + + public: + virtual const char *getPassName() const { + return "Fast Register Allocator"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<LiveVariables>(); + AU.addRequiredID(PHIEliminationID); + AU.addRequiredID(TwoAddressInstructionPassID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + /// runOnMachineFunction - Register allocate the whole function + bool runOnMachineFunction(MachineFunction &Fn); + + /// AllocateBasicBlock - Register allocate the specified basic block. + void AllocateBasicBlock(MachineBasicBlock &MBB); + + + /// areRegsEqual - This method returns true if the specified registers are + /// related to each other. To do this, it checks to see if they are equal + /// or if the first register is in the alias set of the second register. + /// + bool areRegsEqual(unsigned R1, unsigned R2) const { + if (R1 == R2) return true; + for (const unsigned *AliasSet = TRI->getAliasSet(R2); + *AliasSet; ++AliasSet) { + if (*AliasSet == R1) return true; + } + return false; + } + + /// getStackSpaceFor - This returns the frame index of the specified virtual + /// register on the stack, allocating space if necessary. + int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC); + + /// removePhysReg - This method marks the specified physical register as no + /// longer being in use. + /// + void removePhysReg(unsigned PhysReg); + + /// spillVirtReg - This method spills the value specified by PhysReg into + /// the virtual register slot specified by VirtReg. It then updates the RA + /// data structures to indicate the fact that PhysReg is now available. + /// + void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + unsigned VirtReg, unsigned PhysReg); + + /// spillPhysReg - This method spills the specified physical register into + /// the virtual register slot associated with it. If OnlyVirtRegs is set to + /// true, then the request is ignored if the physical register does not + /// contain a virtual register. + /// + void spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I, + unsigned PhysReg, bool OnlyVirtRegs = false); + + /// assignVirtToPhysReg - This method updates local state so that we know + /// that PhysReg is the proper container for VirtReg now. The physical + /// register must not be used for anything else when this is called. + /// + void assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg); + + /// isPhysRegAvailable - Return true if the specified physical register is + /// free and available for use. This also includes checking to see if + /// aliased registers are all free... + /// + bool isPhysRegAvailable(unsigned PhysReg) const; + + /// isPhysRegSpillable - Can PhysReg be freed by spilling? + bool isPhysRegSpillable(unsigned PhysReg) const; + + /// getFreeReg - Look to see if there is a free register available in the + /// specified register class. If not, return 0. + /// + unsigned getFreeReg(const TargetRegisterClass *RC); + + /// getReg - Find a physical register to hold the specified virtual + /// register. If all compatible physical registers are used, this method + /// spills the last used virtual register to the stack, and uses that + /// register. If NoFree is true, that means the caller knows there isn't + /// a free register, do not call getFreeReg(). + unsigned getReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned VirtReg, bool NoFree = false); + + /// reloadVirtReg - This method transforms the specified virtual + /// register use to refer to a physical register. This method may do this + /// in one of several ways: if the register is available in a physical + /// register already, it uses that physical register. If the value is not + /// in a physical register, and if there are physical registers available, + /// it loads it into a register: PhysReg if that is an available physical + /// register, otherwise any physical register of the right class. + /// If register pressure is high, and it is possible, it tries to fold the + /// load of the virtual register into the instruction itself. It avoids + /// doing this if register pressure is low to improve the chance that + /// subsequent instructions can use the reloaded value. This method + /// returns the modified instruction. + /// + MachineInstr *reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned OpNum, SmallSet<unsigned, 4> &RRegs, + unsigned PhysReg); + + void reloadPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I, + unsigned PhysReg); + }; + char RAFast::ID = 0; +} + +/// getStackSpaceFor - This allocates space for the specified virtual register +/// to be held on the stack. +int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) { + // Find the location Reg would belong... + int SS = StackSlotForVirtReg[VirtReg]; + if (SS != -1) + return SS; // Already has space allocated? + + // Allocate a new stack object for this spill location... + int FrameIdx = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(), + RC->getAlignment()); + + // Assign the slot. + StackSlotForVirtReg[VirtReg] = FrameIdx; + return FrameIdx; +} + + +/// removePhysReg - This method marks the specified physical register as no +/// longer being in use. +/// +void RAFast::removePhysReg(unsigned PhysReg) { + PhysRegsUsed[PhysReg] = -1; // PhyReg no longer used +} + + +/// spillVirtReg - This method spills the value specified by PhysReg into the +/// virtual register slot specified by VirtReg. It then updates the RA data +/// structures to indicate the fact that PhysReg is now available. +/// +void RAFast::spillVirtReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned VirtReg, unsigned PhysReg) { + assert(VirtReg && "Spilling a physical register is illegal!" + " Must not have appropriate kill for the register or use exists beyond" + " the intended one."); + DEBUG(dbgs() << " Spilling register " << TRI->getName(PhysReg) + << " containing %reg" << VirtReg); + + if (!isVirtRegModified(VirtReg)) { + DEBUG(dbgs() << " which has not been modified, so no store necessary!"); + std::pair<MachineInstr*, unsigned> &LastUse = getVirtRegLastUse(VirtReg); + if (LastUse.first) + LastUse.first->getOperand(LastUse.second).setIsKill(); + } else { + // Otherwise, there is a virtual register corresponding to this physical + // register. We only need to spill it into its stack slot if it has been + // modified. + const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); + int FrameIndex = getStackSpaceFor(VirtReg, RC); + DEBUG(dbgs() << " to stack slot #" << FrameIndex); + // If the instruction reads the register that's spilled, (e.g. this can + // happen if it is a move to a physical register), then the spill + // instruction is not a kill. + bool isKill = !(I != MBB.end() && I->readsRegister(PhysReg)); + TII->storeRegToStackSlot(MBB, I, PhysReg, isKill, FrameIndex, RC); + ++NumStores; // Update statistics + } + + getVirt2PhysRegMapSlot(VirtReg) = 0; // VirtReg no longer available + + DEBUG(dbgs() << '\n'); + removePhysReg(PhysReg); +} + + +/// spillPhysReg - This method spills the specified physical register into the +/// virtual register slot associated with it. If OnlyVirtRegs is set to true, +/// then the request is ignored if the physical register does not contain a +/// virtual register. +/// +void RAFast::spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I, + unsigned PhysReg, bool OnlyVirtRegs) { + if (PhysRegsUsed[PhysReg] != -1) { // Only spill it if it's used! + assert(PhysRegsUsed[PhysReg] != -2 && "Non allocable reg used!"); + if (PhysRegsUsed[PhysReg] || !OnlyVirtRegs) + spillVirtReg(MBB, I, PhysRegsUsed[PhysReg], PhysReg); + return; + } + + // If the selected register aliases any other registers, we must make + // sure that one of the aliases isn't alive. + for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); + *AliasSet; ++AliasSet) { + if (PhysRegsUsed[*AliasSet] == -1 || // Spill aliased register. + PhysRegsUsed[*AliasSet] == -2) // If allocatable. + continue; + + if (PhysRegsUsed[*AliasSet]) + spillVirtReg(MBB, I, PhysRegsUsed[*AliasSet], *AliasSet); + } +} + + +/// assignVirtToPhysReg - This method updates local state so that we know +/// that PhysReg is the proper container for VirtReg now. The physical +/// register must not be used for anything else when this is called. +/// +void RAFast::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) { + assert(PhysRegsUsed[PhysReg] == -1 && "Phys reg already assigned!"); + // Update information to note the fact that this register was just used, and + // it holds VirtReg. + PhysRegsUsed[PhysReg] = VirtReg; + getVirt2PhysRegMapSlot(VirtReg) = PhysReg; + UsedInInstr.set(PhysReg); +} + + +/// isPhysRegAvailable - Return true if the specified physical register is free +/// and available for use. This also includes checking to see if aliased +/// registers are all free... +/// +bool RAFast::isPhysRegAvailable(unsigned PhysReg) const { + if (PhysRegsUsed[PhysReg] != -1) return false; + + // If the selected register aliases any other allocated registers, it is + // not free! + for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); + *AliasSet; ++AliasSet) + if (PhysRegsUsed[*AliasSet] >= 0) // Aliased register in use? + return false; // Can't use this reg then. + return true; +} + +/// isPhysRegSpillable - Return true if the specified physical register can be +/// spilled for use in the current instruction. +/// +bool RAFast::isPhysRegSpillable(unsigned PhysReg) const { + // Test that PhysReg and all aliases are either free or assigned to a VirtReg + // that is not used in the instruction. + if (PhysRegsUsed[PhysReg] != -1 && + (PhysRegsUsed[PhysReg] <= 0 || UsedInInstr.test(PhysReg))) + return false; + + for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); + *AliasSet; ++AliasSet) + if (PhysRegsUsed[*AliasSet] != -1 && + (PhysRegsUsed[*AliasSet] <= 0 || UsedInInstr.test(*AliasSet))) + return false; + return true; +} + + +/// getFreeReg - Look to see if there is a free register available in the +/// specified register class. If not, return 0. +/// +unsigned RAFast::getFreeReg(const TargetRegisterClass *RC) { + // Get iterators defining the range of registers that are valid to allocate in + // this class, which also specifies the preferred allocation order. + TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF); + TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF); + + for (; RI != RE; ++RI) + if (isPhysRegAvailable(*RI)) { // Is reg unused? + assert(*RI != 0 && "Cannot use register!"); + return *RI; // Found an unused register! + } + return 0; +} + + +/// getReg - Find a physical register to hold the specified virtual +/// register. If all compatible physical registers are used, this method spills +/// the last used virtual register to the stack, and uses that register. +/// +unsigned RAFast::getReg(MachineBasicBlock &MBB, MachineInstr *I, + unsigned VirtReg, bool NoFree) { + const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); + + // First check to see if we have a free register of the requested type... + unsigned PhysReg = NoFree ? 0 : getFreeReg(RC); + + if (PhysReg != 0) { + // Assign the register. + assignVirtToPhysReg(VirtReg, PhysReg); + return PhysReg; + } + + // If we didn't find an unused register, scavenge one now! Don't be fancy, + // just grab the first possible register. + TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF); + TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF); + + for (; RI != RE; ++RI) + if (isPhysRegSpillable(*RI)) { + PhysReg = *RI; + break; + } + + assert(PhysReg && "Physical register not assigned!?!?"); + spillPhysReg(MBB, I, PhysReg); + assignVirtToPhysReg(VirtReg, PhysReg); + return PhysReg; +} + + +/// reloadVirtReg - This method transforms the specified virtual +/// register use to refer to a physical register. This method may do this in +/// one of several ways: if the register is available in a physical register +/// already, it uses that physical register. If the value is not in a physical +/// register, and if there are physical registers available, it loads it into a +/// register: PhysReg if that is an available physical register, otherwise any +/// register. If register pressure is high, and it is possible, it tries to +/// fold the load of the virtual register into the instruction itself. It +/// avoids doing this if register pressure is low to improve the chance that +/// subsequent instructions can use the reloaded value. This method returns +/// the modified instruction. +/// +MachineInstr *RAFast::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned OpNum, + SmallSet<unsigned, 4> &ReloadedRegs, + unsigned PhysReg) { + unsigned VirtReg = MI->getOperand(OpNum).getReg(); + + // If the virtual register is already available, just update the instruction + // and return. + if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) { + MI->getOperand(OpNum).setReg(PR); // Assign the input register + if (!MI->isDebugValue()) { + // Do not do these for DBG_VALUE as they can affect codegen. + UsedInInstr.set(PR); + getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum); + } + return MI; + } + + // Otherwise, we need to fold it into the current instruction, or reload it. + // If we have registers available to hold the value, use them. + const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); + // If we already have a PhysReg (this happens when the instruction is a + // reg-to-reg copy with a PhysReg destination) use that. + if (!PhysReg || !TargetRegisterInfo::isPhysicalRegister(PhysReg) || + !isPhysRegAvailable(PhysReg)) + PhysReg = getFreeReg(RC); + int FrameIndex = getStackSpaceFor(VirtReg, RC); + + if (PhysReg) { // Register is available, allocate it! + assignVirtToPhysReg(VirtReg, PhysReg); + } else { // No registers available. + // Force some poor hapless value out of the register file to + // make room for the new register, and reload it. + PhysReg = getReg(MBB, MI, VirtReg, true); + } + + markVirtRegModified(VirtReg, false); // Note that this reg was just reloaded + + DEBUG(dbgs() << " Reloading %reg" << VirtReg << " into " + << TRI->getName(PhysReg) << "\n"); + + // Add move instruction(s) + TII->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC); + ++NumLoads; // Update statistics + + MF->getRegInfo().setPhysRegUsed(PhysReg); + MI->getOperand(OpNum).setReg(PhysReg); // Assign the input register + getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum); + + if (!ReloadedRegs.insert(PhysReg)) { + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Ran out of registers during register allocation!"; + if (MI->isInlineAsm()) { + Msg << "\nPlease check your inline asm statement for invalid " + << "constraints:\n"; + MI->print(Msg, TM); + } + report_fatal_error(Msg.str()); + } + for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg); + *SubRegs; ++SubRegs) { + if (ReloadedRegs.insert(*SubRegs)) continue; + + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Ran out of registers during register allocation!"; + if (MI->isInlineAsm()) { + Msg << "\nPlease check your inline asm statement for invalid " + << "constraints:\n"; + MI->print(Msg, TM); + } + report_fatal_error(Msg.str()); + } + + return MI; +} + +/// isReadModWriteImplicitKill - True if this is an implicit kill for a +/// read/mod/write register, i.e. update partial register. +static bool isReadModWriteImplicitKill(MachineInstr *MI, unsigned Reg) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() && + MO.isDef() && !MO.isDead()) + return true; + } + return false; +} + +/// isReadModWriteImplicitDef - True if this is an implicit def for a +/// read/mod/write register, i.e. update partial register. +static bool isReadModWriteImplicitDef(MachineInstr *MI, unsigned Reg) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() && + !MO.isDef() && MO.isKill()) + return true; + } + return false; +} + +void RAFast::AllocateBasicBlock(MachineBasicBlock &MBB) { + // loop over each instruction + MachineBasicBlock::iterator MII = MBB.begin(); + + DEBUG({ + const BasicBlock *LBB = MBB.getBasicBlock(); + if (LBB) + dbgs() << "\nStarting RegAlloc of BB: " << LBB->getName(); + }); + + // Add live-in registers as active. + for (MachineBasicBlock::livein_iterator I = MBB.livein_begin(), + E = MBB.livein_end(); I != E; ++I) { + unsigned Reg = *I; + MF->getRegInfo().setPhysRegUsed(Reg); + PhysRegsUsed[Reg] = 0; // It is free and reserved now + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + *SubRegs; ++SubRegs) { + if (PhysRegsUsed[*SubRegs] == -2) continue; + PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now + MF->getRegInfo().setPhysRegUsed(*SubRegs); + } + } + + // Otherwise, sequentially allocate each instruction in the MBB. + while (MII != MBB.end()) { + MachineInstr *MI = MII++; + const TargetInstrDesc &TID = MI->getDesc(); + DEBUG({ + dbgs() << "\nStarting RegAlloc of: " << *MI; + dbgs() << " Regs have values: "; + for (unsigned i = 0; i != TRI->getNumRegs(); ++i) + if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) + dbgs() << "[" << TRI->getName(i) + << ",%reg" << PhysRegsUsed[i] << "] "; + dbgs() << '\n'; + }); + + // Track registers used by instruction. + UsedInInstr.reset(); + + // Determine whether this is a copy instruction. The cases where the + // source or destination are phys regs are handled specially. + unsigned SrcCopyReg, DstCopyReg, SrcCopySubReg, DstCopySubReg; + unsigned SrcCopyPhysReg = 0U; + bool isCopy = TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg, + SrcCopySubReg, DstCopySubReg); + if (isCopy && TargetRegisterInfo::isVirtualRegister(SrcCopyReg)) + SrcCopyPhysReg = getVirt2PhysRegMapSlot(SrcCopyReg); + + // Loop over the implicit uses, making sure they don't get reallocated. + if (TID.ImplicitUses) { + for (const unsigned *ImplicitUses = TID.ImplicitUses; + *ImplicitUses; ++ImplicitUses) + UsedInInstr.set(*ImplicitUses); + } + + SmallVector<unsigned, 8> Kills; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isKill()) continue; + + if (!MO.isImplicit()) + Kills.push_back(MO.getReg()); + else if (!isReadModWriteImplicitKill(MI, MO.getReg())) + // These are extra physical register kills when a sub-register + // is defined (def of a sub-register is a read/mod/write of the + // larger registers). Ignore. + Kills.push_back(MO.getReg()); + } + + // If any physical regs are earlyclobber, spill any value they might + // have in them, then mark them unallocatable. + // If any virtual regs are earlyclobber, allocate them now (before + // freeing inputs that are killed). + if (MI->isInlineAsm()) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef() || !MO.isEarlyClobber() || + !MO.getReg()) + continue; + + if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + unsigned DestVirtReg = MO.getReg(); + unsigned DestPhysReg; + + // If DestVirtReg already has a value, use it. + if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) + DestPhysReg = getReg(MBB, MI, DestVirtReg); + MF->getRegInfo().setPhysRegUsed(DestPhysReg); + markVirtRegModified(DestVirtReg); + getVirtRegLastUse(DestVirtReg) = + std::make_pair((MachineInstr*)0, 0); + DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg) + << " to %reg" << DestVirtReg << "\n"); + MO.setReg(DestPhysReg); // Assign the earlyclobber register + } else { + unsigned Reg = MO.getReg(); + if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP. + // These are extra physical register defs when a sub-register + // is defined (def of a sub-register is a read/mod/write of the + // larger registers). Ignore. + if (isReadModWriteImplicitDef(MI, MO.getReg())) continue; + + MF->getRegInfo().setPhysRegUsed(Reg); + spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg + PhysRegsUsed[Reg] = 0; // It is free and reserved now + + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + *SubRegs; ++SubRegs) { + if (PhysRegsUsed[*SubRegs] == -2) continue; + MF->getRegInfo().setPhysRegUsed(*SubRegs); + PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now + } + } + } + } + + // If a DBG_VALUE says something is located in a spilled register, + // change the DBG_VALUE to be undef, which prevents the register + // from being reloaded here. Doing that would change the generated + // code, unless another use immediately follows this instruction. + if (MI->isDebugValue() && + MI->getNumOperands()==3 && MI->getOperand(0).isReg()) { + unsigned VirtReg = MI->getOperand(0).getReg(); + if (VirtReg && TargetRegisterInfo::isVirtualRegister(VirtReg) && + !getVirt2PhysRegMapSlot(VirtReg)) + MI->getOperand(0).setReg(0U); + } + + // Get the used operands into registers. This has the potential to spill + // incoming values if we are out of registers. Note that we completely + // ignore physical register uses here. We assume that if an explicit + // physical register is referenced by the instruction, that it is guaranteed + // to be live-in, or the input is badly hosed. + // + SmallSet<unsigned, 4> ReloadedRegs; + for (unsigned i = 0; i != MI->getNumOperands(); ++i) { + MachineOperand &MO = MI->getOperand(i); + // here we are looking for only used operands (never def&use) + if (MO.isReg() && !MO.isDef() && MO.getReg() && !MO.isImplicit() && + TargetRegisterInfo::isVirtualRegister(MO.getReg())) + MI = reloadVirtReg(MBB, MI, i, ReloadedRegs, + isCopy ? DstCopyReg : 0); + } + + // If this instruction is the last user of this register, kill the + // value, freeing the register being used, so it doesn't need to be + // spilled to memory. + // + for (unsigned i = 0, e = Kills.size(); i != e; ++i) { + unsigned VirtReg = Kills[i]; + unsigned PhysReg = VirtReg; + if (TargetRegisterInfo::isVirtualRegister(VirtReg)) { + // If the virtual register was never materialized into a register, it + // might not be in the map, but it won't hurt to zero it out anyway. + unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg); + PhysReg = PhysRegSlot; + PhysRegSlot = 0; + } else if (PhysRegsUsed[PhysReg] == -2) { + // Unallocatable register dead, ignore. + continue; + } else { + assert((!PhysRegsUsed[PhysReg] || PhysRegsUsed[PhysReg] == -1) && + "Silently clearing a virtual register?"); + } + + if (!PhysReg) continue; + + DEBUG(dbgs() << " Last use of " << TRI->getName(PhysReg) + << "[%reg" << VirtReg <<"], removing it from live set\n"); + removePhysReg(PhysReg); + for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg); + *SubRegs; ++SubRegs) { + if (PhysRegsUsed[*SubRegs] != -2) { + DEBUG(dbgs() << " Last use of " + << TRI->getName(*SubRegs) << "[%reg" << VirtReg + <<"], removing it from live set\n"); + removePhysReg(*SubRegs); + } + } + } + + // Track registers defined by instruction. + UsedInInstr.reset(); + + // Loop over all of the operands of the instruction, spilling registers that + // are defined, and marking explicit destinations in the PhysRegsUsed map. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef() || MO.isImplicit() || !MO.getReg() || + MO.isEarlyClobber() || + !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + continue; + + unsigned Reg = MO.getReg(); + if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP. + // These are extra physical register defs when a sub-register + // is defined (def of a sub-register is a read/mod/write of the + // larger registers). Ignore. + if (isReadModWriteImplicitDef(MI, MO.getReg())) continue; + + MF->getRegInfo().setPhysRegUsed(Reg); + spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg + PhysRegsUsed[Reg] = 0; // It is free and reserved now + + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + *SubRegs; ++SubRegs) { + if (PhysRegsUsed[*SubRegs] == -2) continue; + + MF->getRegInfo().setPhysRegUsed(*SubRegs); + PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now + } + } + + // Loop over the implicit defs, spilling them as well. + if (TID.ImplicitDefs) { + for (const unsigned *ImplicitDefs = TID.ImplicitDefs; + *ImplicitDefs; ++ImplicitDefs) { + unsigned Reg = *ImplicitDefs; + if (PhysRegsUsed[Reg] != -2) { + spillPhysReg(MBB, MI, Reg, true); + PhysRegsUsed[Reg] = 0; // It is free and reserved now + } + MF->getRegInfo().setPhysRegUsed(Reg); + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + *SubRegs; ++SubRegs) { + if (PhysRegsUsed[*SubRegs] == -2) continue; + + PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now + MF->getRegInfo().setPhysRegUsed(*SubRegs); + } + } + } + + SmallVector<unsigned, 8> DeadDefs; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDead()) + DeadDefs.push_back(MO.getReg()); + } + + // Okay, we have allocated all of the source operands and spilled any values + // that would be destroyed by defs of this instruction. Loop over the + // explicit defs and assign them to a register, spilling incoming values if + // we need to scavenge a register. + // + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef() || !MO.getReg() || + MO.isEarlyClobber() || + !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; + + unsigned DestVirtReg = MO.getReg(); + unsigned DestPhysReg; + + // If DestVirtReg already has a value, use it. + if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) { + // If this is a copy try to reuse the input as the output; + // that will make the copy go away. + // If this is a copy, the source reg is a phys reg, and + // that reg is available, use that phys reg for DestPhysReg. + // If this is a copy, the source reg is a virtual reg, and + // the phys reg that was assigned to that virtual reg is now + // available, use that phys reg for DestPhysReg. (If it's now + // available that means this was the last use of the source.) + if (isCopy && + TargetRegisterInfo::isPhysicalRegister(SrcCopyReg) && + isPhysRegAvailable(SrcCopyReg)) { + DestPhysReg = SrcCopyReg; + assignVirtToPhysReg(DestVirtReg, DestPhysReg); + } else if (isCopy && + TargetRegisterInfo::isVirtualRegister(SrcCopyReg) && + SrcCopyPhysReg && isPhysRegAvailable(SrcCopyPhysReg) && + MF->getRegInfo().getRegClass(DestVirtReg)-> + contains(SrcCopyPhysReg)) { + DestPhysReg = SrcCopyPhysReg; + assignVirtToPhysReg(DestVirtReg, DestPhysReg); + } else + DestPhysReg = getReg(MBB, MI, DestVirtReg); + } + MF->getRegInfo().setPhysRegUsed(DestPhysReg); + markVirtRegModified(DestVirtReg); + getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0); + DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg) + << " to %reg" << DestVirtReg << "\n"); + MO.setReg(DestPhysReg); // Assign the output register + UsedInInstr.set(DestPhysReg); + } + + // If this instruction defines any registers that are immediately dead, + // kill them now. + // + for (unsigned i = 0, e = DeadDefs.size(); i != e; ++i) { + unsigned VirtReg = DeadDefs[i]; + unsigned PhysReg = VirtReg; + if (TargetRegisterInfo::isVirtualRegister(VirtReg)) { + unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg); + PhysReg = PhysRegSlot; + assert(PhysReg != 0); + PhysRegSlot = 0; + } else if (PhysRegsUsed[PhysReg] == -2) { + // Unallocatable register dead, ignore. + continue; + } else if (!PhysReg) + continue; + + DEBUG(dbgs() << " Register " << TRI->getName(PhysReg) + << " [%reg" << VirtReg + << "] is never used, removing it from live set\n"); + removePhysReg(PhysReg); + for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); + *AliasSet; ++AliasSet) { + if (PhysRegsUsed[*AliasSet] != -2) { + DEBUG(dbgs() << " Register " << TRI->getName(*AliasSet) + << " [%reg" << *AliasSet + << "] is never used, removing it from live set\n"); + removePhysReg(*AliasSet); + } + } + } + + // Finally, if this is a noop copy instruction, zap it. (Except that if + // the copy is dead, it must be kept to avoid messing up liveness info for + // the register scavenger. See pr4100.) + if (TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg, + SrcCopySubReg, DstCopySubReg) && + SrcCopyReg == DstCopyReg && DeadDefs.empty()) + MBB.erase(MI); + } + + MachineBasicBlock::iterator MI = MBB.getFirstTerminator(); + + // Spill all physical registers holding virtual registers now. + for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) + if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) { + if (unsigned VirtReg = PhysRegsUsed[i]) + spillVirtReg(MBB, MI, VirtReg, i); + else + removePhysReg(i); + } +} + +/// runOnMachineFunction - Register allocate the whole function +/// +bool RAFast::runOnMachineFunction(MachineFunction &Fn) { + DEBUG(dbgs() << "Machine Function\n"); + MF = &Fn; + TM = &Fn.getTarget(); + TRI = TM->getRegisterInfo(); + TII = TM->getInstrInfo(); + + PhysRegsUsed.assign(TRI->getNumRegs(), -1); + UsedInInstr.resize(TRI->getNumRegs()); + + // At various places we want to efficiently check to see whether a register + // is allocatable. To handle this, we mark all unallocatable registers as + // being pinned down, permanently. + { + BitVector Allocable = TRI->getAllocatableSet(Fn); + for (unsigned i = 0, e = Allocable.size(); i != e; ++i) + if (!Allocable[i]) + PhysRegsUsed[i] = -2; // Mark the reg unallocable. + } + + // initialize the virtual->physical register map to have a 'null' + // mapping for all virtual registers + unsigned LastVirtReg = MF->getRegInfo().getLastVirtReg(); + StackSlotForVirtReg.grow(LastVirtReg); + Virt2PhysRegMap.grow(LastVirtReg); + Virt2LastUseMap.grow(LastVirtReg); + VirtRegModified.resize(LastVirtReg+1 - + TargetRegisterInfo::FirstVirtualRegister); + UsedInMultipleBlocks.resize(LastVirtReg+1 - + TargetRegisterInfo::FirstVirtualRegister); + + // Loop over all of the basic blocks, eliminating virtual register references + for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); + MBB != MBBe; ++MBB) + AllocateBasicBlock(*MBB); + + StackSlotForVirtReg.clear(); + PhysRegsUsed.clear(); + VirtRegModified.clear(); + UsedInMultipleBlocks.clear(); + Virt2PhysRegMap.clear(); + Virt2LastUseMap.clear(); + return true; +} + +FunctionPass *llvm::createFastRegisterAllocator() { + return new RAFast(); +} diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp index 5c5a394..6c8fc0c 100644 --- a/lib/CodeGen/RegAllocLinearScan.cpp +++ b/lib/CodeGen/RegAllocLinearScan.cpp @@ -1177,7 +1177,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { assignRegOrStackSlotAtInterval(cur); } else { assert(false && "Ran out of registers during register allocation!"); - llvm_report_error("Ran out of registers during register allocation!"); + report_fatal_error("Ran out of registers during register allocation!"); } return; } diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp index 0ef041e..94456d1 100644 --- a/lib/CodeGen/RegAllocLocal.cpp +++ b/lib/CodeGen/RegAllocLocal.cpp @@ -189,6 +189,9 @@ namespace { /// void removePhysReg(unsigned PhysReg); + void storeVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned VirtReg, unsigned PhysReg, bool isKill); + /// spillVirtReg - This method spills the value specified by PhysReg into /// the virtual register slot specified by VirtReg. It then updates the RA /// data structures to indicate the fact that PhysReg is now available. @@ -286,6 +289,17 @@ void RALocal::removePhysReg(unsigned PhysReg) { PhysRegsUseOrder.erase(It); } +/// storeVirtReg - Store a virtual register to its assigned stack slot. +void RALocal::storeVirtReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned VirtReg, unsigned PhysReg, + bool isKill) { + const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); + int FrameIndex = getStackSpaceFor(VirtReg, RC); + DEBUG(dbgs() << " to stack slot #" << FrameIndex); + TII->storeRegToStackSlot(MBB, I, PhysReg, isKill, FrameIndex, RC); + ++NumStores; // Update statistics +} /// spillVirtReg - This method spills the value specified by PhysReg into the /// virtual register slot specified by VirtReg. It then updates the RA data @@ -309,15 +323,11 @@ void RALocal::spillVirtReg(MachineBasicBlock &MBB, // Otherwise, there is a virtual register corresponding to this physical // register. We only need to spill it into its stack slot if it has been // modified. - const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); - int FrameIndex = getStackSpaceFor(VirtReg, RC); - DEBUG(dbgs() << " to stack slot #" << FrameIndex); // If the instruction reads the register that's spilled, (e.g. this can // happen if it is a move to a physical register), then the spill // instruction is not a kill. bool isKill = !(I != MBB.end() && I->readsRegister(PhysReg)); - TII->storeRegToStackSlot(MBB, I, PhysReg, isKill, FrameIndex, RC); - ++NumStores; // Update statistics + storeVirtReg(MBB, I, VirtReg, PhysReg, isKill); } getVirt2PhysRegMapSlot(VirtReg) = 0; // VirtReg no longer available @@ -549,7 +559,7 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, << "constraints:\n"; MI->print(Msg, TM); } - llvm_report_error(Msg.str()); + report_fatal_error(Msg.str()); } for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg); *SubRegs; ++SubRegs) { @@ -563,7 +573,7 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, << "constraints:\n"; MI->print(Msg, TM); } - llvm_report_error(Msg.str()); + report_fatal_error(Msg.str()); } return MI; @@ -633,7 +643,10 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) { // uses regs before it defs them. if (!MO.isReg() || !MO.getReg() || !MO.isUse()) continue; - + + // Ignore helpful kill flags from earlier passes. + MO.setIsKill(false); + LastUseDef[MO.getReg()] = std::make_pair(I, i); if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; @@ -801,9 +814,12 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { dbgs() << "\nStarting RegAlloc of: " << *MI; dbgs() << " Regs have values: "; for (unsigned i = 0; i != TRI->getNumRegs(); ++i) - if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) + if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) { + if (PhysRegsUsed[i] && isVirtRegModified(PhysRegsUsed[i])) + dbgs() << "*"; dbgs() << "[" << TRI->getName(i) << ",%reg" << PhysRegsUsed[i] << "] "; + } dbgs() << '\n'; }); @@ -1092,6 +1108,20 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { } } + // If this instruction is a call, make sure there are no dirty registers. The + // call might throw an exception, and the landing pad expects to find all + // registers in stack slots. + if (TID.isCall()) + for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) { + if (PhysRegsUsed[i] <= 0) continue; + unsigned VirtReg = PhysRegsUsed[i]; + if (!isVirtRegModified(VirtReg)) continue; + DEBUG(dbgs() << " Storing dirty %reg" << VirtReg); + storeVirtReg(MBB, MI, VirtReg, i, false); + markVirtRegModified(VirtReg, false); + DEBUG(dbgs() << " because the call might throw\n"); + } + // Finally, if this is a noop copy instruction, zap it. (Except that if // the copy is dead, it must be kept to avoid messing up liveness info for // the register scavenger. See pr4100.) diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index 67bf209..179984f 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -64,7 +64,7 @@ void RegScavenger::initRegState() { return; // Live-in registers are in use. - for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(), + for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(), E = MBB->livein_end(); I != E; ++I) setUsed(*I); @@ -136,6 +136,9 @@ void RegScavenger::forward() { ScavengeRestore = NULL; } + if (MI->isDebugValue()) + return; + // Find out which registers are early clobbered, killed, defined, and marked // def-dead in this instruction. BitVector EarlyClobberRegs(NumPhysRegs); diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 1f3e295..587f001 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -29,7 +29,6 @@ ScheduleDAG::ScheduleDAG(MachineFunction &mf) TRI(TM.getRegisterInfo()), TLI(TM.getTargetLowering()), MF(mf), MRI(mf.getRegInfo()), - ConstPool(MF.getConstantPool()), EntrySU(), ExitSU() { } diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index ecc49e2..ca235c3 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -272,8 +272,8 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { // perform its own adjustments. const SDep& dep = SDep(SU, SDep::Data, LDataLatency, Reg); if (!UnitLatencies) { - ComputeOperandLatency(SU, UseSU, (SDep &)dep); - ST.adjustSchedDependency(SU, UseSU, (SDep &)dep); + ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep)); + ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep)); } UseSU->addPred(dep); } @@ -285,8 +285,8 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { continue; const SDep& dep = SDep(SU, SDep::Data, DataLatency, *Alias); if (!UnitLatencies) { - ComputeOperandLatency(SU, UseSU, (SDep &)dep); - ST.adjustSchedDependency(SU, UseSU, (SDep &)dep); + ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep)); + ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep)); } UseSU->addPred(dep); } @@ -572,8 +572,7 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { } // EmitSchedule - Emit the machine code in scheduled order. -MachineBasicBlock *ScheduleDAGInstrs:: -EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) { +MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() { // For MachineInstr-based scheduling, we're rescheduling the instructions in // the block, so start by removing them from the block. while (Begin != InsertPos) { diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h index c9b44de..d70608f 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.h +++ b/lib/CodeGen/ScheduleDAGInstrs.h @@ -20,7 +20,6 @@ #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/Support/Compiler.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" #include <map> @@ -171,8 +170,7 @@ namespace llvm { virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use, SDep& dep) const; - virtual MachineBasicBlock* - EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*>*); + virtual MachineBasicBlock *EmitSchedule(); /// StartBlock - Prepare to perform scheduling in the given block. /// diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt index 80c7d7c..0cfd5e1 100644 --- a/lib/CodeGen/SelectionDAG/CMakeLists.txt +++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt @@ -20,6 +20,7 @@ add_llvm_library(LLVMSelectionDAG SelectionDAGISel.cpp SelectionDAGPrinter.cpp TargetLowering.cpp + TargetSelectionDAGInfo.cpp ) target_link_libraries (LLVMSelectionDAG LLVMAnalysis LLVMAsmPrinter LLVMCodeGen) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index a336e0a..3639f80 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -129,6 +129,14 @@ namespace { bool CombineToPreIndexedLoadStore(SDNode *N); bool CombineToPostIndexedLoadStore(SDNode *N); + void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); + SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); + SDValue SExtPromoteOperand(SDValue Op, EVT PVT); + SDValue ZExtPromoteOperand(SDValue Op, EVT PVT); + SDValue PromoteIntBinOp(SDValue Op); + SDValue PromoteIntShiftOp(SDValue Op); + SDValue PromoteExtend(SDValue Op); + bool PromoteLoad(SDValue Op); /// combine - call the node-specific routine that knows how to fold each /// particular type of node. If that doesn't do anything, try the @@ -254,24 +262,28 @@ namespace { /// looking for a better chain (aliasing node.) SDValue FindBetterChain(SDNode *N, SDValue Chain); - /// getShiftAmountTy - Returns a type large enough to hold any valid - /// shift amount - before type legalization these can be huge. - EVT getShiftAmountTy() { - return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy(); - } - -public: + public: DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) - : DAG(D), - TLI(D.getTargetLoweringInfo()), - Level(Unrestricted), - OptLevel(OL), - LegalOperations(false), - LegalTypes(false), - AA(A) {} + : DAG(D), TLI(D.getTargetLoweringInfo()), Level(Unrestricted), + OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {} /// Run - runs the dag combiner on all nodes in the work list void Run(CombineLevel AtLevel); + + SelectionDAG &getDAG() const { return DAG; } + + /// getShiftAmountTy - Returns a type large enough to hold any valid + /// shift amount - before type legalization these can be huge. + EVT getShiftAmountTy() { + return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy(); + } + + /// isTypeLegal - This method returns true if we are running before type + /// legalization or if the specified VT is legal. + bool isTypeLegal(const EVT &VT) { + if (!LegalTypes) return true; + return TLI.isTypeLegal(VT); + } }; } @@ -577,9 +589,8 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, return SDValue(N, 0); } -void -DAGCombiner::CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt & - TLO) { +void DAGCombiner:: +CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { // Replace all uses. If any nodes become isomorphic to other nodes and // are deleted, make sure to remove them from our worklist. WorkListRemover DeadNodes(*this); @@ -609,7 +620,7 @@ DAGCombiner::CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt & /// it can be simplified or if things it uses can be simplified by bit /// propagation. If so, return true. bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { - TargetLowering::TargetLoweringOpt TLO(DAG); + TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); APInt KnownZero, KnownOne; if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) return false; @@ -629,6 +640,274 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { return true; } +void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { + DebugLoc dl = Load->getDebugLoc(); + EVT VT = Load->getValueType(0); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0)); + + DEBUG(dbgs() << "\nReplacing.9 "; + Load->dump(&DAG); + dbgs() << "\nWith: "; + Trunc.getNode()->dump(&DAG); + dbgs() << '\n'); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc, &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1), + &DeadNodes); + removeFromWorkList(Load); + DAG.DeleteNode(Load); + AddToWorkList(Trunc.getNode()); +} + +SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { + Replace = false; + DebugLoc dl = Op.getDebugLoc(); + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { + EVT MemVT = LD->getMemoryVT(); + ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) + ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD) + : LD->getExtensionType(); + Replace = true; + return DAG.getExtLoad(ExtType, dl, PVT, + LD->getChain(), LD->getBasePtr(), + LD->getSrcValue(), LD->getSrcValueOffset(), + MemVT, LD->isVolatile(), + LD->isNonTemporal(), LD->getAlignment()); + } + + unsigned Opc = Op.getOpcode(); + switch (Opc) { + default: break; + case ISD::AssertSext: + return DAG.getNode(ISD::AssertSext, dl, PVT, + SExtPromoteOperand(Op.getOperand(0), PVT), + Op.getOperand(1)); + case ISD::AssertZext: + return DAG.getNode(ISD::AssertZext, dl, PVT, + ZExtPromoteOperand(Op.getOperand(0), PVT), + Op.getOperand(1)); + case ISD::Constant: { + unsigned ExtOpc = + Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + return DAG.getNode(ExtOpc, dl, PVT, Op); + } + } + + if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT)) + return SDValue(); + return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op); +} + +SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { + if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT)) + return SDValue(); + EVT OldVT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + bool Replace = false; + SDValue NewOp = PromoteOperand(Op, PVT, Replace); + if (NewOp.getNode() == 0) + return SDValue(); + AddToWorkList(NewOp.getNode()); + + if (Replace) + ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp, + DAG.getValueType(OldVT)); +} + +SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { + EVT OldVT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + bool Replace = false; + SDValue NewOp = PromoteOperand(Op, PVT, Replace); + if (NewOp.getNode() == 0) + return SDValue(); + AddToWorkList(NewOp.getNode()); + + if (Replace) + ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); + return DAG.getZeroExtendInReg(NewOp, dl, OldVT); +} + +/// PromoteIntBinOp - Promote the specified integer binary operation if the +/// target indicates it is beneficial. e.g. On x86, it's usually better to +/// promote i16 operations to i32 since i16 instructions are longer. +SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { + if (!LegalOperations) + return SDValue(); + + EVT VT = Op.getValueType(); + if (VT.isVector() || !VT.isInteger()) + return SDValue(); + + // If operation type is 'undesirable', e.g. i16 on x86, consider + // promoting it. + unsigned Opc = Op.getOpcode(); + if (TLI.isTypeDesirableForOp(Opc, VT)) + return SDValue(); + + EVT PVT = VT; + // Consult target whether it is a good idea to promote this operation and + // what's the right type to promote it to. + if (TLI.IsDesirableToPromoteOp(Op, PVT)) { + assert(PVT != VT && "Don't know what type to promote to!"); + + bool Replace0 = false; + SDValue N0 = Op.getOperand(0); + SDValue NN0 = PromoteOperand(N0, PVT, Replace0); + if (NN0.getNode() == 0) + return SDValue(); + + bool Replace1 = false; + SDValue N1 = Op.getOperand(1); + SDValue NN1 = PromoteOperand(N1, PVT, Replace1); + if (NN1.getNode() == 0) + return SDValue(); + + AddToWorkList(NN0.getNode()); + AddToWorkList(NN1.getNode()); + + if (Replace0) + ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode()); + if (Replace1) + ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode()); + + DEBUG(dbgs() << "\nPromoting "; + Op.getNode()->dump(&DAG)); + DebugLoc dl = Op.getDebugLoc(); + return DAG.getNode(ISD::TRUNCATE, dl, VT, + DAG.getNode(Opc, dl, PVT, NN0, NN1)); + } + return SDValue(); +} + +/// PromoteIntShiftOp - Promote the specified integer shift operation if the +/// target indicates it is beneficial. e.g. On x86, it's usually better to +/// promote i16 operations to i32 since i16 instructions are longer. +SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { + if (!LegalOperations) + return SDValue(); + + EVT VT = Op.getValueType(); + if (VT.isVector() || !VT.isInteger()) + return SDValue(); + + // If operation type is 'undesirable', e.g. i16 on x86, consider + // promoting it. + unsigned Opc = Op.getOpcode(); + if (TLI.isTypeDesirableForOp(Opc, VT)) + return SDValue(); + + EVT PVT = VT; + // Consult target whether it is a good idea to promote this operation and + // what's the right type to promote it to. + if (TLI.IsDesirableToPromoteOp(Op, PVT)) { + assert(PVT != VT && "Don't know what type to promote to!"); + + bool Replace = false; + SDValue N0 = Op.getOperand(0); + if (Opc == ISD::SRA) + N0 = SExtPromoteOperand(Op.getOperand(0), PVT); + else if (Opc == ISD::SRL) + N0 = ZExtPromoteOperand(Op.getOperand(0), PVT); + else + N0 = PromoteOperand(N0, PVT, Replace); + if (N0.getNode() == 0) + return SDValue(); + + AddToWorkList(N0.getNode()); + if (Replace) + ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode()); + + DEBUG(dbgs() << "\nPromoting "; + Op.getNode()->dump(&DAG)); + DebugLoc dl = Op.getDebugLoc(); + return DAG.getNode(ISD::TRUNCATE, dl, VT, + DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1))); + } + return SDValue(); +} + +SDValue DAGCombiner::PromoteExtend(SDValue Op) { + if (!LegalOperations) + return SDValue(); + + EVT VT = Op.getValueType(); + if (VT.isVector() || !VT.isInteger()) + return SDValue(); + + // If operation type is 'undesirable', e.g. i16 on x86, consider + // promoting it. + unsigned Opc = Op.getOpcode(); + if (TLI.isTypeDesirableForOp(Opc, VT)) + return SDValue(); + + EVT PVT = VT; + // Consult target whether it is a good idea to promote this operation and + // what's the right type to promote it to. + if (TLI.IsDesirableToPromoteOp(Op, PVT)) { + assert(PVT != VT && "Don't know what type to promote to!"); + // fold (aext (aext x)) -> (aext x) + // fold (aext (zext x)) -> (zext x) + // fold (aext (sext x)) -> (sext x) + DEBUG(dbgs() << "\nPromoting "; + Op.getNode()->dump(&DAG)); + return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), VT, Op.getOperand(0)); + } + return SDValue(); +} + +bool DAGCombiner::PromoteLoad(SDValue Op) { + if (!LegalOperations) + return false; + + EVT VT = Op.getValueType(); + if (VT.isVector() || !VT.isInteger()) + return false; + + // If operation type is 'undesirable', e.g. i16 on x86, consider + // promoting it. + unsigned Opc = Op.getOpcode(); + if (TLI.isTypeDesirableForOp(Opc, VT)) + return false; + + EVT PVT = VT; + // Consult target whether it is a good idea to promote this operation and + // what's the right type to promote it to. + if (TLI.IsDesirableToPromoteOp(Op, PVT)) { + assert(PVT != VT && "Don't know what type to promote to!"); + + DebugLoc dl = Op.getDebugLoc(); + SDNode *N = Op.getNode(); + LoadSDNode *LD = cast<LoadSDNode>(N); + EVT MemVT = LD->getMemoryVT(); + ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) + ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD) + : LD->getExtensionType(); + SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, + LD->getChain(), LD->getBasePtr(), + LD->getSrcValue(), LD->getSrcValueOffset(), + MemVT, LD->isVolatile(), + LD->isNonTemporal(), LD->getAlignment()); + SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD); + + DEBUG(dbgs() << "\nPromoting "; + N->dump(&DAG); + dbgs() << "\nTo: "; + Result.getNode()->dump(&DAG); + dbgs() << '\n'); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result, &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1), &DeadNodes); + removeFromWorkList(N); + DAG.DeleteNode(N); + AddToWorkList(Result.getNode()); + return true; + } + return false; +} + + //===----------------------------------------------------------------------===// // Main DAG Combiner implementation //===----------------------------------------------------------------------===// @@ -732,7 +1011,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) { } SDValue DAGCombiner::visit(SDNode *N) { - switch(N->getOpcode()) { + switch (N->getOpcode()) { default: break; case ISD::TokenFactor: return visitTokenFactor(N); case ISD::MERGE_VALUES: return visitMERGE_VALUES(N); @@ -817,6 +1096,35 @@ SDValue DAGCombiner::combine(SDNode *N) { } } + // If nothing happened still, try promoting the operation. + if (RV.getNode() == 0) { + switch (N->getOpcode()) { + default: break; + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + RV = PromoteIntBinOp(SDValue(N, 0)); + break; + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + RV = PromoteIntShiftOp(SDValue(N, 0)); + break; + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + RV = PromoteExtend(SDValue(N, 0)); + break; + case ISD::LOAD: + if (PromoteLoad(SDValue(N, 0))) + RV = SDValue(N, 0); + break; + } + } + // If N is a commutative binary node, try commuting it to enable more // sdisel CSE. if (RV.getNode() == 0 && @@ -1720,8 +2028,10 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // into a vsetcc. EVT Op0VT = N0.getOperand(0).getValueType(); if ((N0.getOpcode() == ISD::ZERO_EXTEND || - N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND || + // Avoid infinite looping with PromoteIntBinOp. + (N0.getOpcode() == ISD::ANY_EXTEND && + (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) || (N0.getOpcode() == ISD::TRUNCATE && TLI.isTypeLegal(Op0VT))) && !VT.isVector() && Op0VT == N1.getOperand(0).getValueType() && @@ -2579,7 +2889,13 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { HiBitsMask); } - return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue(); + if (N1C) { + SDValue NewSHL = visitShiftByConstant(N, N1C->getZExtValue()); + if (NewSHL.getNode()) + return NewSHL; + } + + return SDValue(); } SDValue DAGCombiner::visitSRA(SDNode *N) { @@ -2693,7 +3009,13 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, N1); - return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue(); + if (N1C) { + SDValue NewSRA = visitShiftByConstant(N, N1C->getZExtValue()); + if (NewSRA.getNode()) + return NewSRA; + } + + return SDValue(); } SDValue DAGCombiner::visitSRL(SDNode *N) { @@ -2731,6 +3053,15 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), DAG.getConstant(c1 + c2, N1.getValueType())); } + + // fold (srl (shl x, c), c) -> (and x, cst2) + if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 && + N0.getValueSizeInBits() <= 64) { + uint64_t ShAmt = N1C->getZExtValue()+64-N0.getValueSizeInBits(); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0), + DAG.getConstant(~0ULL >> ShAmt, VT)); + } + // fold (srl (anyextend x), c) -> (anyextend (srl x, c)) if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { @@ -2739,10 +3070,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (N1C->getZExtValue() >= SmallVT.getSizeInBits()) return DAG.getUNDEF(VT); - SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT, - N0.getOperand(0), N1); - AddToWorkList(SmallShift.getNode()); - return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift); + if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { + SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT, + N0.getOperand(0), N1); + AddToWorkList(SmallShift.getNode()); + return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift); + } } // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign @@ -3205,24 +3538,40 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if (N0.getOpcode() == ISD::SETCC) { // sext(setcc) -> sext_in_reg(vsetcc) for vectors. - if (VT.isVector() && + // Only do this before legalize for now. + if (VT.isVector() && !LegalOperations) { + EVT N0VT = N0.getOperand(0).getValueType(); // We know that the # elements of the results is the same as the // # elements of the compare (and the # elements of the compare result // for that matter). Check to see that they are the same size. If so, // we know that the element size of the sext'd result matches the // element size of the compare operands. - VT.getSizeInBits() == N0.getOperand(0).getValueType().getSizeInBits() && - - // Only do this before legalize for now. - !LegalOperations) { - return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0), - N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()); + if (VT.getSizeInBits() == N0VT.getSizeInBits()) + return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + // If the desired elements are smaller or larger than the source + // elements we can use a matching integer vector type and then + // truncate/sign extend + else { + EVT MatchingElementType = + EVT::getIntegerVT(*DAG.getContext(), + N0VT.getScalarType().getSizeInBits()); + EVT MatchingVectorType = + EVT::getVectorVT(*DAG.getContext(), MatchingElementType, + N0VT.getVectorNumElements()); + SDValue VsetCC = + DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + } } - + // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc) + unsigned ElementWidth = VT.getScalarType().getSizeInBits(); SDValue NegOne = - DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT); + DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT); SDValue SCC = SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), NegOne, DAG.getConstant(0, VT), @@ -3624,7 +3973,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // Do not generate loads of non-round integer types since these can // be expensive (and would be wrong if the type is not byte sized). if (isa<LoadSDNode>(N0) && N0.hasOneUse() && ExtVT.isRound() && - cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() > EVTBits && + cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() >= EVTBits && // Do not change the width of a volatile load. !cast<LoadSDNode>(N0)->isVolatile()) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); @@ -3694,7 +4043,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // if x is small enough. if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { SDValue N00 = N0.getOperand(0); - if (N00.getValueType().getScalarType().getSizeInBits() < EVTBits) + if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits && + (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1); } @@ -3779,7 +4129,8 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { if (N0.getOpcode() == ISD::TRUNCATE) return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); // fold (truncate (ext x)) -> (ext x) or (truncate x) or x - if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND|| + if (N0.getOpcode() == ISD::ZERO_EXTEND || + N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { if (N0.getOperand(0).getValueType().bitsLT(VT)) // if the source is smaller than the dest, we still need an extend @@ -3805,7 +4156,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // fold (truncate (load x)) -> (smaller load x) // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) - return ReduceLoadWidth(N); + if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) + return ReduceLoadWidth(N); + return SDValue(); } static SDNode *getBuildPairElt(SDNode *N, unsigned i) { @@ -3949,7 +4302,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { VT.isInteger() && !VT.isVector()) { unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits(); EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); - if (TLI.isTypeLegal(IntXVT) || !LegalTypes) { + if (isTypeLegal(IntXVT)) { SDValue X = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), IntXVT, N0.getOperand(1)); AddToWorkList(X.getNode()); @@ -4075,8 +4428,8 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { if (Op.getOpcode() == ISD::UNDEF) continue; EltIsUndef = false; - NewBits |= (APInt(cast<ConstantSDNode>(Op)->getAPIntValue()). - zextOrTrunc(SrcBitSize).zext(DstBitSize)); + NewBits |= APInt(cast<ConstantSDNode>(Op)->getAPIntValue()). + zextOrTrunc(SrcBitSize).zext(DstBitSize); } if (EltIsUndef) @@ -4464,7 +4817,7 @@ SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); // fold (fp_round_inreg c1fp) -> c1fp - if (N0CFP && (TLI.isTypeLegal(EVT) || !LegalTypes)) { + if (N0CFP && isTypeLegal(EVT)) { SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT); return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, Round); } @@ -4676,7 +5029,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { if (Op0.getOpcode() == Op1.getOpcode()) { // Avoid missing important xor optimizations. SDValue Tmp = visitXOR(TheXor); - if (Tmp.getNode()) { + if (Tmp.getNode() && Tmp.getNode() != TheXor) { DEBUG(dbgs() << "\nReplacing.8 "; TheXor->dump(&DAG); dbgs() << "\nWith: "; @@ -5145,6 +5498,136 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { return SDValue(); } +/// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the +/// load is having specific bytes cleared out. If so, return the byte size +/// being masked out and the shift amount. +static std::pair<unsigned, unsigned> +CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { + std::pair<unsigned, unsigned> Result(0, 0); + + // Check for the structure we're looking for. + if (V->getOpcode() != ISD::AND || + !isa<ConstantSDNode>(V->getOperand(1)) || + !ISD::isNormalLoad(V->getOperand(0).getNode())) + return Result; + + // Check the chain and pointer. + LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0)); + if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer. + + // The store should be chained directly to the load or be an operand of a + // tokenfactor. + if (LD == Chain.getNode()) + ; // ok. + else if (Chain->getOpcode() != ISD::TokenFactor) + return Result; // Fail. + else { + bool isOk = false; + for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i) + if (Chain->getOperand(i).getNode() == LD) { + isOk = true; + break; + } + if (!isOk) return Result; + } + + // This only handles simple types. + if (V.getValueType() != MVT::i16 && + V.getValueType() != MVT::i32 && + V.getValueType() != MVT::i64) + return Result; + + // Check the constant mask. Invert it so that the bits being masked out are + // 0 and the bits being kept are 1. Use getSExtValue so that leading bits + // follow the sign bit for uniformity. + uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue(); + unsigned NotMaskLZ = CountLeadingZeros_64(NotMask); + if (NotMaskLZ & 7) return Result; // Must be multiple of a byte. + unsigned NotMaskTZ = CountTrailingZeros_64(NotMask); + if (NotMaskTZ & 7) return Result; // Must be multiple of a byte. + if (NotMaskLZ == 64) return Result; // All zero mask. + + // See if we have a continuous run of bits. If so, we have 0*1+0* + if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64) + return Result; + + // Adjust NotMaskLZ down to be from the actual size of the int instead of i64. + if (V.getValueType() != MVT::i64 && NotMaskLZ) + NotMaskLZ -= 64-V.getValueSizeInBits(); + + unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8; + switch (MaskedBytes) { + case 1: + case 2: + case 4: break; + default: return Result; // All one mask, or 5-byte mask. + } + + // Verify that the first bit starts at a multiple of mask so that the access + // is aligned the same as the access width. + if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result; + + Result.first = MaskedBytes; + Result.second = NotMaskTZ/8; + return Result; +} + + +/// ShrinkLoadReplaceStoreWithStore - Check to see if IVal is something that +/// provides a value as specified by MaskInfo. If so, replace the specified +/// store with a narrower store of truncated IVal. +static SDNode * +ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, + SDValue IVal, StoreSDNode *St, + DAGCombiner *DC) { + unsigned NumBytes = MaskInfo.first; + unsigned ByteShift = MaskInfo.second; + SelectionDAG &DAG = DC->getDAG(); + + // Check to see if IVal is all zeros in the part being masked in by the 'or' + // that uses this. If not, this is not a replacement. + APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(), + ByteShift*8, (ByteShift+NumBytes)*8); + if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0; + + // Check that it is legal on the target to do this. It is legal if the new + // VT we're shrinking to (i8/i16/i32) is legal or we're still before type + // legalization. + MVT VT = MVT::getIntegerVT(NumBytes*8); + if (!DC->isTypeLegal(VT)) + return 0; + + // Okay, we can do this! Replace the 'St' store with a store of IVal that is + // shifted by ByteShift and truncated down to NumBytes. + if (ByteShift) + IVal = DAG.getNode(ISD::SRL, IVal->getDebugLoc(), IVal.getValueType(), IVal, + DAG.getConstant(ByteShift*8, DC->getShiftAmountTy())); + + // Figure out the offset for the store and the alignment of the access. + unsigned StOffset; + unsigned NewAlign = St->getAlignment(); + + if (DAG.getTargetLoweringInfo().isLittleEndian()) + StOffset = ByteShift; + else + StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes; + + SDValue Ptr = St->getBasePtr(); + if (StOffset) { + Ptr = DAG.getNode(ISD::ADD, IVal->getDebugLoc(), Ptr.getValueType(), + Ptr, DAG.getConstant(StOffset, Ptr.getValueType())); + NewAlign = MinAlign(NewAlign, StOffset); + } + + // Truncate down to the new size. + IVal = DAG.getNode(ISD::TRUNCATE, IVal->getDebugLoc(), VT, IVal); + + ++OpsNarrowed; + return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr, + St->getSrcValue(), St->getSrcValueOffset()+StOffset, + false, false, NewAlign).getNode(); +} + /// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is /// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some @@ -5164,6 +5647,28 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { return SDValue(); unsigned Opc = Value.getOpcode(); + + // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst + // is a byte mask indicating a consecutive number of bytes, check to see if + // Y is known to provide just those bytes. If so, we try to replace the + // load + replace + store sequence with a single (narrower) store, which makes + // the load dead. + if (Opc == ISD::OR) { + std::pair<unsigned, unsigned> MaskedLoad; + MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain); + if (MaskedLoad.first) + if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, + Value.getOperand(1), ST,this)) + return SDValue(NewST, 0); + + // Or is commutative, so try swapping X and Y. + MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain); + if (MaskedLoad.first) + if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, + Value.getOperand(0), ST,this)) + return SDValue(NewST, 0); + } + if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || Value.getOperand(1).getOpcode() != ISD::Constant) return SDValue(); @@ -5211,8 +5716,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); - if (NewAlign < - TLI.getTargetData()->getABITypeAlignment(NewVT.getTypeForEVT(*DAG.getContext()))) + const Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext()); + if (NewAlign < TLI.getTargetData()->getABITypeAlignment(NewVTTy)) return SDValue(); SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(), @@ -5282,8 +5787,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { case MVT::ppcf128: break; case MVT::f32: - if (((TLI.isTypeLegal(MVT::i32) || !LegalTypes) && !LegalOperations && - !ST->isVolatile()) || + if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) || TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). bitcastToAPInt().getZExtValue(), MVT::i32); @@ -5294,7 +5798,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { } break; case MVT::f64: - if (((TLI.isTypeLegal(MVT::i64) || !LegalTypes) && !LegalOperations && + if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && !ST->isVolatile()) || TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). @@ -5551,7 +6055,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { InVec = InVec.getOperand(0); if (ISD::isNormalLoad(InVec.getNode())) { LN0 = cast<LoadSDNode>(InVec); - Elt = (Idx < (int)NumElems) ? Idx : Idx - NumElems; + Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; } } @@ -5659,7 +6163,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { } // Add count and size info. - if (!TLI.isTypeLegal(VT) && LegalTypes) + if (!isTypeLegal(VT)) return SDValue(); // Return the new VECTOR_SHUFFLE node. @@ -6287,7 +6791,7 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) { /// FindBaseOffset - Return true if base is a frame index, which is known not // to alias with anything but itself. Provides base object and offset as results. static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, - GlobalValue *&GV, void *&CV) { + const GlobalValue *&GV, void *&CV) { // Assume it is a primitive operation. Base = Ptr; Offset = 0; GV = 0; CV = 0; @@ -6335,7 +6839,7 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, // Gather base node and offset information. SDValue Base1, Base2; int64_t Offset1, Offset2; - GlobalValue *GV1, *GV2; + const GlobalValue *GV1, *GV2; void *CV1, *CV2; bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1); bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2); diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 4bf41f2..b4c3833 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1,4 +1,4 @@ -///===-- FastISel.cpp - Implementation of the FastISel class --------------===// +//===-- FastISel.cpp - Implementation of the FastISel class ---------------===// // // The LLVM Compiler Infrastructure // @@ -52,10 +52,11 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Support/ErrorHandling.h" #include "FunctionLoweringInfo.h" using namespace llvm; -unsigned FastISel::getRegForValue(Value *V) { +unsigned FastISel::getRegForValue(const Value *V) { EVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true); // Don't handle non-simple values in FastISel. if (!RealVT.isSimple()) @@ -83,7 +84,16 @@ unsigned FastISel::getRegForValue(Value *V) { if (Reg != 0) return Reg; - if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + return materializeRegForValue(V, VT); +} + +/// materializeRegForValue - Helper for getRegForVale. This function is +/// called when the value isn't already available in a register and must +/// be materialized with new instructions. +unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { + unsigned Reg = 0; + + if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { if (CI->getValue().getActiveBits() <= 64) Reg = FastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); } else if (isa<AllocaInst>(V)) { @@ -93,10 +103,12 @@ unsigned FastISel::getRegForValue(Value *V) { // local-CSE'd with actual integer zeros. Reg = getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext()))); - } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) { + } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { + // Try to emit the constant directly. Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF); if (!Reg) { + // Try to emit the constant by using an integer constant with a cast. const APFloat &Flt = CF->getValueAPF(); EVT IntVT = TLI.getPointerTy(); @@ -114,9 +126,9 @@ unsigned FastISel::getRegForValue(Value *V) { Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg); } } - } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { - if (!SelectOperator(CE, CE->getOpcode())) return 0; - Reg = LocalValueMap[CE]; + } else if (const Operator *Op = dyn_cast<Operator>(V)) { + if (!SelectOperator(Op, Op->getOpcode())) return 0; + Reg = LocalValueMap[Op]; } else if (isa<UndefValue>(V)) { Reg = createResultReg(TLI.getRegClassFor(VT)); BuildMI(MBB, DL, TII.get(TargetOpcode::IMPLICIT_DEF), Reg); @@ -134,11 +146,11 @@ unsigned FastISel::getRegForValue(Value *V) { return Reg; } -unsigned FastISel::lookUpRegForValue(Value *V) { +unsigned FastISel::lookUpRegForValue(const Value *V) { // Look up the value to see if we already have a register for it. We // cache values defined by Instructions across blocks, and other values // only locally. This is because Instructions already have the SSA - // def-dominatess-use requirement enforced. + // def-dominates-use requirement enforced. if (ValueMap.count(V)) return ValueMap[V]; return LocalValueMap[V]; @@ -150,7 +162,7 @@ unsigned FastISel::lookUpRegForValue(Value *V) { /// NOTE: This is only necessary because we might select a block that uses /// a value before we select the block that defines the value. It might be /// possible to fix this by selecting blocks in reverse postorder. -unsigned FastISel::UpdateValueMap(Value* I, unsigned Reg) { +unsigned FastISel::UpdateValueMap(const Value *I, unsigned Reg) { if (!isa<Instruction>(I)) { LocalValueMap[I] = Reg; return Reg; @@ -167,7 +179,7 @@ unsigned FastISel::UpdateValueMap(Value* I, unsigned Reg) { return AssignedReg; } -unsigned FastISel::getRegForGEPIndex(Value *Idx) { +unsigned FastISel::getRegForGEPIndex(const Value *Idx) { unsigned IdxN = getRegForValue(Idx); if (IdxN == 0) // Unhandled operand. Halt "fast" selection and bail. @@ -186,7 +198,7 @@ unsigned FastISel::getRegForGEPIndex(Value *Idx) { /// SelectBinaryOp - Select and emit code for a binary operator instruction, /// which has an opcode which directly corresponds to the given ISD opcode. /// -bool FastISel::SelectBinaryOp(User *I, unsigned ISDOpcode) { +bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) { EVT VT = EVT::getEVT(I->getType(), /*HandleUnknown=*/true); if (VT == MVT::Other || !VT.isSimple()) // Unhandled type. Halt "fast" selection and bail. @@ -252,7 +264,7 @@ bool FastISel::SelectBinaryOp(User *I, unsigned ISDOpcode) { return true; } -bool FastISel::SelectGetElementPtr(User *I) { +bool FastISel::SelectGetElementPtr(const User *I) { unsigned N = getRegForValue(I->getOperand(0)); if (N == 0) // Unhandled operand. Halt "fast" selection and bail. @@ -260,9 +272,9 @@ bool FastISel::SelectGetElementPtr(User *I) { const Type *Ty = I->getOperand(0)->getType(); MVT VT = TLI.getPointerTy(); - for (GetElementPtrInst::op_iterator OI = I->op_begin()+1, E = I->op_end(); - OI != E; ++OI) { - Value *Idx = *OI; + for (GetElementPtrInst::const_op_iterator OI = I->op_begin()+1, + E = I->op_end(); OI != E; ++OI) { + const Value *Idx = *OI; if (const StructType *StTy = dyn_cast<StructType>(Ty)) { unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); if (Field) { @@ -280,7 +292,7 @@ bool FastISel::SelectGetElementPtr(User *I) { Ty = cast<SequentialType>(Ty)->getElementType(); // If this is a constant subscript, handle it quickly. - if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { + if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { if (CI->getZExtValue() == 0) continue; uint64_t Offs = TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); @@ -316,51 +328,56 @@ bool FastISel::SelectGetElementPtr(User *I) { return true; } -bool FastISel::SelectCall(User *I) { - Function *F = cast<CallInst>(I)->getCalledFunction(); +bool FastISel::SelectCall(const User *I) { + const Function *F = cast<CallInst>(I)->getCalledFunction(); if (!F) return false; + // Handle selected intrinsic function calls. unsigned IID = F->getIntrinsicID(); switch (IID) { default: break; case Intrinsic::dbg_declare: { - DbgDeclareInst *DI = cast<DbgDeclareInst>(I); + const DbgDeclareInst *DI = cast<DbgDeclareInst>(I); if (!DIDescriptor::ValidDebugInfo(DI->getVariable(), CodeGenOpt::None) || !MF.getMMI().hasDebugInfo()) return true; - Value *Address = DI->getAddress(); + const Value *Address = DI->getAddress(); if (!Address) return true; - AllocaInst *AI = dyn_cast<AllocaInst>(Address); + if (isa<UndefValue>(Address)) + return true; + const AllocaInst *AI = dyn_cast<AllocaInst>(Address); // Don't handle byval struct arguments or VLAs, for example. - if (!AI) break; - DenseMap<const AllocaInst*, int>::iterator SI = - StaticAllocaMap.find(AI); - if (SI == StaticAllocaMap.end()) break; // VLAs. - int FI = SI->second; - if (!DI->getDebugLoc().isUnknown()) - MF.getMMI().setVariableDbgInfo(DI->getVariable(), FI, DI->getDebugLoc()); - - // Building the map above is target independent. Generating DBG_VALUE - // inline is target dependent; do this now. - (void)TargetSelectInstruction(cast<Instruction>(I)); + // Note that if we have a byval struct argument, fast ISel is turned off; + // those are handled in SelectionDAGBuilder. + if (AI) { + DenseMap<const AllocaInst*, int>::iterator SI = + StaticAllocaMap.find(AI); + if (SI == StaticAllocaMap.end()) break; // VLAs. + int FI = SI->second; + if (!DI->getDebugLoc().isUnknown()) + MF.getMMI().setVariableDbgInfo(DI->getVariable(), FI, DI->getDebugLoc()); + } else + // Building the map above is target independent. Generating DBG_VALUE + // inline is target dependent; do this now. + (void)TargetSelectInstruction(cast<Instruction>(I)); return true; } case Intrinsic::dbg_value: { - // This requires target support, but right now X86 is the only Fast target. - DbgValueInst *DI = cast<DbgValueInst>(I); + // This form of DBG_VALUE is target-independent. + const DbgValueInst *DI = cast<DbgValueInst>(I); const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); - Value *V = DI->getValue(); + const Value *V = DI->getValue(); if (!V) { // Currently the optimizer can produce this; insert an undef to // help debugging. Probably the optimizer should not do this. BuildMI(MBB, DL, II).addReg(0U).addImm(DI->getOffset()). addMetadata(DI->getVariable()); - } else if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { BuildMI(MBB, DL, II).addImm(CI->getZExtValue()).addImm(DI->getOffset()). addMetadata(DI->getVariable()); - } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) { + } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { BuildMI(MBB, DL, II).addFPImm(CF).addImm(DI->getOffset()). addMetadata(DI->getVariable()); } else if (unsigned Reg = lookUpRegForValue(V)) { @@ -438,10 +455,12 @@ bool FastISel::SelectCall(User *I) { break; } } + + // An arbitrary call. Bail. return false; } -bool FastISel::SelectCast(User *I, unsigned Opcode) { +bool FastISel::SelectCast(const User *I, unsigned Opcode) { EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); EVT DstVT = TLI.getValueType(I->getType()); @@ -493,7 +512,7 @@ bool FastISel::SelectCast(User *I, unsigned Opcode) { return true; } -bool FastISel::SelectBitCast(User *I) { +bool FastISel::SelectBitCast(const User *I) { // If the bitcast doesn't change the type, just use the operand value. if (I->getType() == I->getOperand(0)->getType()) { unsigned Reg = getRegForValue(I->getOperand(0)); @@ -544,15 +563,28 @@ bool FastISel::SelectBitCast(User *I) { } bool -FastISel::SelectInstruction(Instruction *I) { +FastISel::SelectInstruction(const Instruction *I) { + // Just before the terminator instruction, insert instructions to + // feed PHI nodes in successor blocks. + if (isa<TerminatorInst>(I)) + if (!HandlePHINodesInSuccessorBlocks(I->getParent())) + return false; + + DL = I->getDebugLoc(); + // First, try doing target-independent selection. - if (SelectOperator(I, I->getOpcode())) + if (SelectOperator(I, I->getOpcode())) { + DL = DebugLoc(); return true; + } // Next, try calling the target to attempt to handle the instruction. - if (TargetSelectInstruction(I)) + if (TargetSelectInstruction(I)) { + DL = DebugLoc(); return true; + } + DL = DebugLoc(); return false; } @@ -573,7 +605,7 @@ FastISel::FastEmitBranch(MachineBasicBlock *MSucc) { /// SelectFNeg - Emit an FNeg operation. /// bool -FastISel::SelectFNeg(User *I) { +FastISel::SelectFNeg(const User *I) { unsigned OpReg = getRegForValue(BinaryOperator::getFNegArgument(I)); if (OpReg == 0) return false; @@ -614,7 +646,7 @@ FastISel::SelectFNeg(User *I) { } bool -FastISel::SelectOperator(User *I, unsigned Opcode) { +FastISel::SelectOperator(const User *I, unsigned Opcode) { switch (Opcode) { case Instruction::Add: return SelectBinaryOp(I, ISD::ADD); @@ -660,10 +692,10 @@ FastISel::SelectOperator(User *I, unsigned Opcode) { return SelectGetElementPtr(I); case Instruction::Br: { - BranchInst *BI = cast<BranchInst>(I); + const BranchInst *BI = cast<BranchInst>(I); if (BI->isUnconditional()) { - BasicBlock *LLVMSucc = BI->getSuccessor(0); + const BasicBlock *LLVMSucc = BI->getSuccessor(0); MachineBasicBlock *MSucc = MBBMap[LLVMSucc]; FastEmitBranch(MSucc); return true; @@ -678,10 +710,6 @@ FastISel::SelectOperator(User *I, unsigned Opcode) { // Nothing to emit. return true; - case Instruction::PHI: - // PHI nodes are already emitted. - return true; - case Instruction::Alloca: // FunctionLowering has the static-sized case covered. if (StaticAllocaMap.count(cast<AllocaInst>(I))) @@ -721,6 +749,9 @@ FastISel::SelectOperator(User *I, unsigned Opcode) { return true; } + case Instruction::PHI: + llvm_unreachable("FastISel shouldn't visit PHI nodes!"); + default: // Unhandled instruction. Halt "fast" selection and bail. return false; @@ -730,15 +761,17 @@ FastISel::SelectOperator(User *I, unsigned Opcode) { FastISel::FastISel(MachineFunction &mf, DenseMap<const Value *, unsigned> &vm, DenseMap<const BasicBlock *, MachineBasicBlock *> &bm, - DenseMap<const AllocaInst *, int> &am + DenseMap<const AllocaInst *, int> &am, + std::vector<std::pair<MachineInstr*, unsigned> > &pn #ifndef NDEBUG - , SmallSet<Instruction*, 8> &cil + , SmallSet<const Instruction *, 8> &cil #endif ) : MBB(0), ValueMap(vm), MBBMap(bm), StaticAllocaMap(am), + PHINodesToUpdate(pn), #ifndef NDEBUG CatchInfoLost(cil), #endif @@ -775,7 +808,7 @@ unsigned FastISel::FastEmit_i(MVT, MVT, unsigned, uint64_t /*Imm*/) { } unsigned FastISel::FastEmit_f(MVT, MVT, - unsigned, ConstantFP * /*FPImm*/) { + unsigned, const ConstantFP * /*FPImm*/) { return 0; } @@ -787,7 +820,7 @@ unsigned FastISel::FastEmit_ri(MVT, MVT, unsigned FastISel::FastEmit_rf(MVT, MVT, unsigned, unsigned /*Op0*/, - ConstantFP * /*FPImm*/) { + const ConstantFP * /*FPImm*/) { return 0; } @@ -820,7 +853,7 @@ unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode, /// FastEmit_rf. If that fails, it materializes the immediate into a register /// and try FastEmit_rr instead. unsigned FastISel::FastEmit_rf_(MVT VT, unsigned Opcode, - unsigned Op0, ConstantFP *FPImm, + unsigned Op0, const ConstantFP *FPImm, MVT ImmType) { // First check if immediate type is legal. If not, we can't use the rf form. unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, FPImm); @@ -930,7 +963,7 @@ unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode, unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode, const TargetRegisterClass *RC, - unsigned Op0, ConstantFP *FPImm) { + unsigned Op0, const ConstantFP *FPImm) { unsigned ResultReg = createResultReg(RC); const TargetInstrDesc &II = TII.get(MachineInstOpcode); @@ -1006,3 +1039,67 @@ unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT, unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op) { return FastEmit_ri(VT, VT, ISD::AND, Op, 1); } + +/// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks. +/// Emit code to ensure constants are copied into registers when needed. +/// Remember the virtual registers that need to be added to the Machine PHI +/// nodes as input. We cannot just directly add them, because expansion +/// might result in multiple MBB's for one BB. As such, the start of the +/// BB might correspond to a different MBB than the end. +bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { + const TerminatorInst *TI = LLVMBB->getTerminator(); + + SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; + unsigned OrigNumPHINodesToUpdate = PHINodesToUpdate.size(); + + // Check successor nodes' PHI nodes that expect a constant to be available + // from this block. + for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { + const BasicBlock *SuccBB = TI->getSuccessor(succ); + if (!isa<PHINode>(SuccBB->begin())) continue; + MachineBasicBlock *SuccMBB = MBBMap[SuccBB]; + + // If this terminator has multiple identical successors (common for + // switches), only handle each succ once. + if (!SuccsHandled.insert(SuccMBB)) continue; + + MachineBasicBlock::iterator MBBI = SuccMBB->begin(); + + // At this point we know that there is a 1-1 correspondence between LLVM PHI + // nodes and Machine PHI nodes, but the incoming operands have not been + // emitted yet. + for (BasicBlock::const_iterator I = SuccBB->begin(); + const PHINode *PN = dyn_cast<PHINode>(I); ++I) { + // Ignore dead phi's. + if (PN->use_empty()) continue; + + // Only handle legal types. Two interesting things to note here. First, + // by bailing out early, we may leave behind some dead instructions, + // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its + // own moves. Second, this check is necessary becuase FastISel doesn't + // use CreateRegForValue to create registers, so it always creates + // exactly one register for each non-void instruction. + EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true); + if (VT == MVT::Other || !TLI.isTypeLegal(VT)) { + // Promote MVT::i1. + if (VT == MVT::i1) + VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT); + else { + PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); + return false; + } + } + + const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); + + unsigned Reg = getRegForValue(PHIOp); + if (Reg == 0) { + PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); + return false; + } + PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg)); + } + } + + return true; +} diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 4fb2aa2..65c36c1 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -14,19 +14,18 @@ #define DEBUG_TYPE "function-lowering-info" #include "FunctionLoweringInfo.h" -#include "llvm/CallingConv.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameInfo.h" @@ -34,99 +33,21 @@ #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include <algorithm> using namespace llvm; -/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence -/// of insertvalue or extractvalue indices that identify a member, return -/// the linearized index of the start of the member. -/// -unsigned llvm::ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty, - const unsigned *Indices, - const unsigned *IndicesEnd, - unsigned CurIndex) { - // Base case: We're done. - if (Indices && Indices == IndicesEnd) - return CurIndex; - - // Given a struct type, recursively traverse the elements. - if (const StructType *STy = dyn_cast<StructType>(Ty)) { - for (StructType::element_iterator EB = STy->element_begin(), - EI = EB, - EE = STy->element_end(); - EI != EE; ++EI) { - if (Indices && *Indices == unsigned(EI - EB)) - return ComputeLinearIndex(TLI, *EI, Indices+1, IndicesEnd, CurIndex); - CurIndex = ComputeLinearIndex(TLI, *EI, 0, 0, CurIndex); - } - return CurIndex; - } - // Given an array type, recursively traverse the elements. - else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { - const Type *EltTy = ATy->getElementType(); - for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) { - if (Indices && *Indices == i) - return ComputeLinearIndex(TLI, EltTy, Indices+1, IndicesEnd, CurIndex); - CurIndex = ComputeLinearIndex(TLI, EltTy, 0, 0, CurIndex); - } - return CurIndex; - } - // We haven't found the type we're looking for, so keep searching. - return CurIndex + 1; -} - -/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of -/// EVTs that represent all the individual underlying -/// non-aggregate types that comprise it. -/// -/// If Offsets is non-null, it points to a vector to be filled in -/// with the in-memory offsets of each of the individual values. -/// -void llvm::ComputeValueVTs(const TargetLowering &TLI, const Type *Ty, - SmallVectorImpl<EVT> &ValueVTs, - SmallVectorImpl<uint64_t> *Offsets, - uint64_t StartingOffset) { - // Given a struct type, recursively traverse the elements. - if (const StructType *STy = dyn_cast<StructType>(Ty)) { - const StructLayout *SL = TLI.getTargetData()->getStructLayout(STy); - for (StructType::element_iterator EB = STy->element_begin(), - EI = EB, - EE = STy->element_end(); - EI != EE; ++EI) - ComputeValueVTs(TLI, *EI, ValueVTs, Offsets, - StartingOffset + SL->getElementOffset(EI - EB)); - return; - } - // Given an array type, recursively traverse the elements. - if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { - const Type *EltTy = ATy->getElementType(); - uint64_t EltSize = TLI.getTargetData()->getTypeAllocSize(EltTy); - for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) - ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets, - StartingOffset + i * EltSize); - return; - } - // Interpret void as zero return values. - if (Ty->isVoidTy()) - return; - // Base case: we can get an EVT for this LLVM IR type. - ValueVTs.push_back(TLI.getValueType(Ty)); - if (Offsets) - Offsets->push_back(StartingOffset); -} - /// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by /// PHI nodes or outside of the basic block that defines it, or used by a /// switch or atomic instruction, which may expand to multiple basic blocks. -static bool isUsedOutsideOfDefiningBlock(Instruction *I) { +static bool isUsedOutsideOfDefiningBlock(const Instruction *I) { + if (I->use_empty()) return false; if (isa<PHINode>(I)) return true; - BasicBlock *BB = I->getParent(); - for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI) + const BasicBlock *BB = I->getParent(); + for (Value::const_use_iterator UI = I->use_begin(), E = I->use_end(); + UI != E; ++UI) if (cast<Instruction>(*UI)->getParent() != BB || isa<PHINode>(*UI)) return true; return false; @@ -135,26 +56,25 @@ static bool isUsedOutsideOfDefiningBlock(Instruction *I) { /// isOnlyUsedInEntryBlock - If the specified argument is only used in the /// entry block, return true. This includes arguments used by switches, since /// the switch may expand into multiple basic blocks. -static bool isOnlyUsedInEntryBlock(Argument *A, bool EnableFastISel) { +static bool isOnlyUsedInEntryBlock(const Argument *A, bool EnableFastISel) { // With FastISel active, we may be splitting blocks, so force creation // of virtual registers for all non-dead arguments. - // Don't force virtual registers for byval arguments though, because - // fast-isel can't handle those in all cases. - if (EnableFastISel && !A->hasByValAttr()) + if (EnableFastISel) return A->use_empty(); - BasicBlock *Entry = A->getParent()->begin(); - for (Value::use_iterator UI = A->use_begin(), E = A->use_end(); UI != E; ++UI) + const BasicBlock *Entry = A->getParent()->begin(); + for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end(); + UI != E; ++UI) if (cast<Instruction>(*UI)->getParent() != Entry || isa<SwitchInst>(*UI)) return false; // Use not in entry block. return true; } -FunctionLoweringInfo::FunctionLoweringInfo(TargetLowering &tli) +FunctionLoweringInfo::FunctionLoweringInfo(const TargetLowering &tli) : TLI(tli) { } -void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf, +void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, bool EnableFastISel) { Fn = &fn; MF = &mf; @@ -162,7 +82,7 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf, // Create a vreg for each argument register that is not dead and is used // outside of the entry block for the function. - for (Function::arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end(); + for (Function::const_arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end(); AI != E; ++AI) if (!isOnlyUsedInEntryBlock(AI, EnableFastISel)) InitializeRegForValue(AI); @@ -170,10 +90,10 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf, // Initialize the mapping of values to registers. This is only set up for // instruction values that are used outside of the block that defines // them. - Function::iterator BB = Fn->begin(), EB = Fn->end(); - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) - if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) - if (ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) { + Function::const_iterator BB = Fn->begin(), EB = Fn->end(); + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) + if (const ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) { const Type *Ty = AI->getAllocatedType(); uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty); unsigned Align = @@ -187,8 +107,8 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf, } for (; BB != EB; ++BB) - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) - if (!I->use_empty() && isUsedOutsideOfDefiningBlock(I)) + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (isUsedOutsideOfDefiningBlock(I)) if (!isa<AllocaInst>(I) || !StaticAllocaMap.count(cast<AllocaInst>(I))) InitializeRegForValue(I); @@ -196,7 +116,7 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf, // Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This // also creates the initial PHI MachineInstrs, though none of the input // operands are populated. - for (BB = Fn->begin(), EB = Fn->end(); BB != EB; ++BB) { + for (BB = Fn->begin(); BB != EB; ++BB) { MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(BB); MBBMap[BB] = MBB; MF->push_back(MBB); @@ -209,14 +129,11 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf, // Create Machine PHI nodes for LLVM PHI nodes, lowering them as // appropriate. - PHINode *PN; - DebugLoc DL; - for (BasicBlock::iterator - I = BB->begin(), E = BB->end(); I != E; ++I) { - - PN = dyn_cast<PHINode>(I); - if (!PN || PN->use_empty()) continue; + for (BasicBlock::const_iterator I = BB->begin(); + const PHINode *PN = dyn_cast<PHINode>(I); ++I) { + if (PN->use_empty()) continue; + DebugLoc DL = PN->getDebugLoc(); unsigned PHIReg = ValueMap[PN]; assert(PHIReg && "PHI node does not have an assigned virtual register!"); @@ -232,12 +149,20 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf, } } } + + // Mark landing pad blocks. + for (BB = Fn->begin(); BB != EB; ++BB) + if (const InvokeInst *Invoke = dyn_cast<InvokeInst>(BB->getTerminator())) + MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad(); } /// clear - Clear out all the function-specific state. This returns this /// FunctionLoweringInfo to an empty state, ready to be used for a /// different function. void FunctionLoweringInfo::clear() { + assert(CatchInfoFound.size() == CatchInfoLost.size() && + "Not all catch info was assigned to a landing pad!"); + MBBMap.clear(); ValueMap.clear(); StaticAllocaMap.clear(); @@ -246,6 +171,7 @@ void FunctionLoweringInfo::clear() { CatchInfoFound.clear(); #endif LiveOutRegInfo.clear(); + ArgDbgValues.clear(); } unsigned FunctionLoweringInfo::MakeReg(EVT VT) { @@ -277,30 +203,12 @@ unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) { return FirstReg; } -/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V. -GlobalVariable *llvm::ExtractTypeInfo(Value *V) { - V = V->stripPointerCasts(); - GlobalVariable *GV = dyn_cast<GlobalVariable>(V); - - if (GV && GV->getName() == ".llvm.eh.catch.all.value") { - assert(GV->hasInitializer() && - "The EH catch-all value must have an initializer"); - Value *Init = GV->getInitializer(); - GV = dyn_cast<GlobalVariable>(Init); - if (!GV) V = cast<ConstantPointerNull>(Init); - } - - assert((GV || isa<ConstantPointerNull>(V)) && - "TypeInfo must be a global variable or NULL"); - return GV; -} - /// AddCatchInfo - Extract the personality and type infos from an eh.selector /// call, and add them to the specified machine basic block. -void llvm::AddCatchInfo(CallInst &I, MachineModuleInfo *MMI, +void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, MachineBasicBlock *MBB) { // Inform the MachineModuleInfo of the personality for this landing pad. - ConstantExpr *CE = cast<ConstantExpr>(I.getOperand(2)); + const ConstantExpr *CE = cast<ConstantExpr>(I.getOperand(2)); assert(CE->getOpcode() == Instruction::BitCast && isa<Function>(CE->getOperand(0)) && "Personality should be a function"); @@ -308,11 +216,11 @@ void llvm::AddCatchInfo(CallInst &I, MachineModuleInfo *MMI, // Gather all the type infos for this landing pad and pass them along to // MachineModuleInfo. - std::vector<GlobalVariable *> TyInfo; + std::vector<const GlobalVariable *> TyInfo; unsigned N = I.getNumOperands(); for (unsigned i = N - 1; i > 2; --i) { - if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(i))) { + if (const ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(i))) { unsigned FilterLength = CI->getZExtValue(); unsigned FirstCatch = i + FilterLength + !FilterLength; assert (FirstCatch <= N && "Invalid filter length"); @@ -349,10 +257,11 @@ void llvm::AddCatchInfo(CallInst &I, MachineModuleInfo *MMI, } } -void llvm::CopyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB, +void llvm::CopyCatchInfo(const BasicBlock *SrcBB, const BasicBlock *DestBB, MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) { - for (BasicBlock::iterator I = SrcBB->begin(), E = --SrcBB->end(); I != E; ++I) - if (EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) { + for (BasicBlock::const_iterator I = SrcBB->begin(), E = --SrcBB->end(); + I != E; ++I) + if (const EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) { // Apply the catch info to DestBB. AddCatchInfo(*EHSel, MMI, FLI.MBBMap[DestBB]); #ifndef NDEBUG diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h index d851e64..4067a5b 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h @@ -15,12 +15,17 @@ #ifndef FUNCTIONLOWERINGINFO_H #define FUNCTIONLOWERINGINFO_H +#include "llvm/InlineAsm.h" +#include "llvm/Instructions.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" #ifndef NDEBUG #include "llvm/ADT/SmallSet.h" #endif #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/Support/CallSite.h" #include <vector> namespace llvm { @@ -31,6 +36,7 @@ class CallInst; class Function; class GlobalVariable; class Instruction; +class MachineInstr; class MachineBasicBlock; class MachineFunction; class MachineModuleInfo; @@ -44,8 +50,8 @@ class Value; /// class FunctionLoweringInfo { public: - TargetLowering &TLI; - Function *Fn; + const TargetLowering &TLI; + const Function *Fn; MachineFunction *MF; MachineRegisterInfo *RegInfo; @@ -57,13 +63,6 @@ public: /// allocated to hold a pointer to the hidden sret parameter. unsigned DemoteRegister; - explicit FunctionLoweringInfo(TargetLowering &TLI); - - /// set - Initialize this FunctionLoweringInfo with the given Function - /// and its associated MachineFunction. - /// - void set(Function &Fn, MachineFunction &MF, bool EnableFastISel); - /// MBBMap - A mapping from LLVM basic blocks to their machine code entry. DenseMap<const BasicBlock*, MachineBasicBlock *> MBBMap; @@ -77,27 +76,15 @@ public: /// anywhere in the function. DenseMap<const AllocaInst*, int> StaticAllocaMap; + /// ArgDbgValues - A list of DBG_VALUE instructions created during isel for + /// function arguments that are inserted after scheduling is completed. + SmallVector<MachineInstr*, 8> ArgDbgValues; + #ifndef NDEBUG - SmallSet<Instruction*, 8> CatchInfoLost; - SmallSet<Instruction*, 8> CatchInfoFound; + SmallSet<const Instruction *, 8> CatchInfoLost; + SmallSet<const Instruction *, 8> CatchInfoFound; #endif - unsigned MakeReg(EVT VT); - - /// isExportedInst - Return true if the specified value is an instruction - /// exported from its block. - bool isExportedInst(const Value *V) { - return ValueMap.count(V); - } - - unsigned CreateRegForValue(const Value *V); - - unsigned InitializeRegForValue(const Value *V) { - unsigned &R = ValueMap[V]; - assert(R == 0 && "Already initialized this value register!"); - return R = CreateRegForValue(V); - } - struct LiveOutInfo { unsigned NumSignBits; APInt KnownOne, KnownZero; @@ -108,42 +95,48 @@ public: /// register number offset by 'FirstVirtualRegister'. std::vector<LiveOutInfo> LiveOutRegInfo; + /// PHINodesToUpdate - A list of phi instructions whose operand list will + /// be updated after processing the current basic block. + /// TODO: This isn't per-function state, it's per-basic-block state. But + /// there's no other convenient place for it to live right now. + std::vector<std::pair<MachineInstr*, unsigned> > PHINodesToUpdate; + + explicit FunctionLoweringInfo(const TargetLowering &TLI); + + /// set - Initialize this FunctionLoweringInfo with the given Function + /// and its associated MachineFunction. + /// + void set(const Function &Fn, MachineFunction &MF, bool EnableFastISel); + /// clear - Clear out all the function-specific state. This returns this /// FunctionLoweringInfo to an empty state, ready to be used for a /// different function. void clear(); -}; -/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence -/// of insertvalue or extractvalue indices that identify a member, return -/// the linearized index of the start of the member. -/// -unsigned ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty, - const unsigned *Indices, - const unsigned *IndicesEnd, - unsigned CurIndex = 0); - -/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of -/// EVTs that represent all the individual underlying -/// non-aggregate types that comprise it. -/// -/// If Offsets is non-null, it points to a vector to be filled in -/// with the in-memory offsets of each of the individual values. -/// -void ComputeValueVTs(const TargetLowering &TLI, const Type *Ty, - SmallVectorImpl<EVT> &ValueVTs, - SmallVectorImpl<uint64_t> *Offsets = 0, - uint64_t StartingOffset = 0); + unsigned MakeReg(EVT VT); + + /// isExportedInst - Return true if the specified value is an instruction + /// exported from its block. + bool isExportedInst(const Value *V) { + return ValueMap.count(V); + } -/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V. -GlobalVariable *ExtractTypeInfo(Value *V); + unsigned CreateRegForValue(const Value *V); + + unsigned InitializeRegForValue(const Value *V) { + unsigned &R = ValueMap[V]; + assert(R == 0 && "Already initialized this value register!"); + return R = CreateRegForValue(V); + } +}; /// AddCatchInfo - Extract the personality and type infos from an eh.selector /// call, and add them to the specified machine basic block. -void AddCatchInfo(CallInst &I, MachineModuleInfo *MMI, MachineBasicBlock *MBB); +void AddCatchInfo(const CallInst &I, + MachineModuleInfo *MMI, MachineBasicBlock *MBB); /// CopyCatchInfo - Copy catch information from DestBB to SrcBB. -void CopyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB, +void CopyCatchInfo(const BasicBlock *SrcBB, const BasicBlock *DestBB, MachineModuleInfo *MMI, FunctionLoweringInfo &FLI); } // end namespace llvm diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 28ba343..c5dae82 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -296,8 +296,19 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, } } + // If this value has only one use, that use is a kill. This is a + // conservative approximation. Tied operands are never killed, so we need + // to check that. And that means we need to determine the index of the + // operand. + unsigned Idx = MI->getNumOperands(); + while (Idx > 0 && + MI->getOperand(Idx-1).isReg() && MI->getOperand(Idx-1).isImplicit()) + --Idx; + bool isTied = MI->getDesc().getOperandConstraint(Idx, TOI::TIED_TO) != -1; + bool isKill = Op.hasOneUse() && !isTied && !IsDebug; + MI->addOperand(MachineOperand::CreateReg(VReg, isOptDef, - false/*isImp*/, false/*isKill*/, + false/*isImp*/, isKill, false/*isDead*/, false/*isUndef*/, false/*isEarlyClobber*/, 0/*SubReg*/, IsDebug)); @@ -440,8 +451,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, unsigned SubIdx = cast<ConstantSDNode>(N2)->getZExtValue(); const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); const TargetRegisterClass *SRC = - getSuperRegisterRegClass(TRC, SubIdx, - Node->getValueType(0)); + getSuperRegisterRegClass(TRC, SubIdx, Node->getValueType(0)); // Figure out the register class to create for the destreg. // Note that if we're going to directly use an existing register, @@ -504,41 +514,83 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, assert(isNew && "Node emitted out of order - early"); } +/// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes. +/// +void InstrEmitter::EmitRegSequence(SDNode *Node, + DenseMap<SDValue, unsigned> &VRBaseMap) { + const TargetRegisterClass *RC = TLI->getRegClassFor(Node->getValueType(0)); + unsigned NewVReg = MRI->createVirtualRegister(RC); + MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), + TII->get(TargetOpcode::REG_SEQUENCE), NewVReg); + unsigned NumOps = Node->getNumOperands(); + assert((NumOps & 1) == 0 && + "REG_SEQUENCE must have an even number of operands!"); + const TargetInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE); + for (unsigned i = 0; i != NumOps; ++i) { + SDValue Op = Node->getOperand(i); +#ifndef NDEBUG + if (i & 1) { + unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue(); + unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap); + const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); + const TargetRegisterClass *SRC = + getSuperRegisterRegClass(TRC, SubIdx, Node->getValueType(0)); + assert(SRC == RC && "Invalid subregister index in REG_SEQUENCE"); + } +#endif + AddOperand(MI, Op, i+1, &II, VRBaseMap); + } + + MBB->insert(InsertPos, MI); + SDValue Op(Node, 0); + bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second; + isNew = isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); +} + /// EmitDbgValue - Generate machine instruction for a dbg_value node. /// -MachineInstr *InstrEmitter::EmitDbgValue(SDDbgValue *SD, - MachineBasicBlock *InsertBB, - DenseMap<SDValue, unsigned> &VRBaseMap, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) { +MachineInstr * +InstrEmitter::EmitDbgValue(SDDbgValue *SD, + DenseMap<SDValue, unsigned> &VRBaseMap) { uint64_t Offset = SD->getOffset(); MDNode* MDPtr = SD->getMDPtr(); DebugLoc DL = SD->getDebugLoc(); + if (SD->getKind() == SDDbgValue::FRAMEIX) { + // Stack address; this needs to be lowered in target-dependent fashion. + // EmitTargetCodeForFrameDebugValue is responsible for allocation. + unsigned FrameIx = SD->getFrameIx(); + return TII->emitFrameIndexDebugValue(*MF, FrameIx, Offset, MDPtr, DL); + } + // Otherwise, we're going to create an instruction here. const TargetInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE); MachineInstrBuilder MIB = BuildMI(*MF, DL, II); if (SD->getKind() == SDDbgValue::SDNODE) { - AddOperand(&*MIB, SDValue(SD->getSDNode(), SD->getResNo()), - (*MIB).getNumOperands(), &II, VRBaseMap, true /*IsDebug*/); + SDNode *Node = SD->getSDNode(); + SDValue Op = SDValue(Node, SD->getResNo()); + // It's possible we replaced this SDNode with other(s) and therefore + // didn't generate code for it. It's better to catch these cases where + // they happen and transfer the debug info, but trying to guarantee that + // in all cases would be very fragile; this is a safeguard for any + // that were missed. + DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op); + if (I==VRBaseMap.end()) + MIB.addReg(0U); // undef + else + AddOperand(&*MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap, + true /*IsDebug*/); } else if (SD->getKind() == SDDbgValue::CONST) { - Value *V = SD->getConst(); - if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + const Value *V = SD->getConst(); + if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { MIB.addImm(CI->getSExtValue()); - } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) { + } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { MIB.addFPImm(CF); } else { // Could be an Undef. In any case insert an Undef so we can see what we // dropped. MIB.addReg(0U); } - } else if (SD->getKind() == SDDbgValue::FRAMEIX) { - unsigned FrameIx = SD->getFrameIx(); - // Stack address; this needs to be lowered in target-dependent fashion. - // FIXME test that the target supports this somehow; if not emit Undef. - // Create a pseudo for EmitInstrWithCustomInserter's consumption. - MIB.addImm(FrameIx).addImm(Offset).addMetadata(MDPtr); - abort(); - TLI->EmitInstrWithCustomInserter(&*MIB, InsertBB, EM); - return 0; } else { // Insert an Undef so we can see what we dropped. MIB.addReg(0U); @@ -553,8 +605,7 @@ MachineInstr *InstrEmitter::EmitDbgValue(SDDbgValue *SD, /// void InstrEmitter:: EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, - DenseMap<SDValue, unsigned> &VRBaseMap, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) { + DenseMap<SDValue, unsigned> &VRBaseMap) { unsigned Opc = Node->getMachineOpcode(); // Handle subreg insert/extract specially @@ -571,6 +622,12 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, return; } + // Handle REG_SEQUENCE specially. + if (Opc == TargetOpcode::REG_SEQUENCE) { + EmitRegSequence(Node, VRBaseMap); + return; + } + if (Opc == TargetOpcode::IMPLICIT_DEF) // We want a unique VR for each IMPLICIT_DEF use. return; @@ -615,7 +672,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, if (II.usesCustomInsertionHook()) { // Insert this instruction into the basic block using a target // specific inserter which may returns a new basic block. - MBB = TLI->EmitInstrWithCustomInserter(MI, MBB, EM); + MBB = TLI->EmitInstrWithCustomInserter(MI, MBB); InsertPos = MBB->end(); return; } @@ -646,7 +703,6 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, i != e; ++i) MI->addRegisterDead(IDList[i-II.getNumDefs()], TRI); } - return; } /// EmitSpecialNode - Generate machine code for a target-independent node and @@ -720,12 +776,12 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, TII->get(TargetOpcode::INLINEASM)); // Add the asm string as an external symbol operand. - const char *AsmStr = - cast<ExternalSymbolSDNode>(Node->getOperand(1))->getSymbol(); + SDValue AsmStrV = Node->getOperand(InlineAsm::Op_AsmString); + const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol(); MI->addOperand(MachineOperand::CreateES(AsmStr)); // Add all of the operand registers to the instruction. - for (unsigned i = 2; i != NumOps;) { + for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { unsigned Flags = cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); @@ -733,24 +789,24 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, MI->addOperand(MachineOperand::CreateImm(Flags)); ++i; // Skip the ID value. - switch (Flags & 7) { + switch (InlineAsm::getKind(Flags)) { default: llvm_unreachable("Bad flags!"); - case 2: // Def of register. + case InlineAsm::Kind_RegDef: for (; NumVals; --NumVals, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); MI->addOperand(MachineOperand::CreateReg(Reg, true)); } break; - case 6: // Def of earlyclobber register. + case InlineAsm::Kind_RegDefEarlyClobber: for (; NumVals; --NumVals, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); MI->addOperand(MachineOperand::CreateReg(Reg, true, false, false, false, false, true)); } break; - case 1: // Use of register. - case 3: // Immediate. - case 4: // Addressing mode. + case InlineAsm::Kind_RegUse: // Use of register. + case InlineAsm::Kind_Imm: // Immediate. + case InlineAsm::Kind_Mem: // Addressing mode. // The addressing mode has been selected, just add all of the // operands to the machine instruction. for (; NumVals; --NumVals, ++i) @@ -758,6 +814,13 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, break; } } + + // Get the mdnode from the asm if it exists and add it to the instruction. + SDValue MDV = Node->getOperand(InlineAsm::Op_MDNode); + const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD(); + if (MD) + MI->addOperand(MachineOperand::CreateMetadata(MD)); + MBB->insert(InsertPos, MI); break; } diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h index baabb75..c7e7c71 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -88,6 +88,9 @@ class InstrEmitter { void EmitCopyToRegClassNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap); + /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes. + /// + void EmitRegSequence(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap); public: /// CountResults - The results of target nodes have register or immediate /// operands first, then an optional chain, and optional flag operands @@ -103,17 +106,14 @@ public: /// EmitDbgValue - Generate machine instruction for a dbg_value node. /// MachineInstr *EmitDbgValue(SDDbgValue *SD, - MachineBasicBlock *InsertBB, - DenseMap<SDValue, unsigned> &VRBaseMap, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM); + DenseMap<SDValue, unsigned> &VRBaseMap); /// EmitNode - Generate machine code for a node and needed dependencies. /// void EmitNode(SDNode *Node, bool IsClone, bool IsCloned, - DenseMap<SDValue, unsigned> &VRBaseMap, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) { + DenseMap<SDValue, unsigned> &VRBaseMap) { if (Node->isMachineOpcode()) - EmitMachineNode(Node, IsClone, IsCloned, VRBaseMap, EM); + EmitMachineNode(Node, IsClone, IsCloned, VRBaseMap); else EmitSpecialNode(Node, IsClone, IsCloned, VRBaseMap); } @@ -130,8 +130,7 @@ public: private: void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, - DenseMap<SDValue, unsigned> &VRBaseMap, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM); + DenseMap<SDValue, unsigned> &VRBaseMap); void EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, DenseMap<SDValue, unsigned> &VRBaseMap); }; diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index d35f0da..bedfa57 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -32,13 +32,11 @@ #include "llvm/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SmallPtrSet.h" -#include <map> using namespace llvm; //===----------------------------------------------------------------------===// @@ -55,7 +53,8 @@ using namespace llvm; /// namespace { class SelectionDAGLegalize { - TargetLowering &TLI; + const TargetMachine &TM; + const TargetLowering &TLI; SelectionDAG &DAG; CodeGenOpt::Level OptLevel; @@ -213,7 +212,8 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag, CodeGenOpt::Level ol) - : TLI(dag.getTargetLoweringInfo()), DAG(dag), OptLevel(ol), + : TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()), + DAG(dag), OptLevel(ol), ValueTypeActions(TLI.getValueTypeActions()) { assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE && "Too many value types for ValueTypeActions to hold!"); @@ -409,7 +409,9 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // to the final destination using (unaligned) integer loads and stores. EVT StoredVT = ST->getMemoryVT(); EVT RegVT = - TLI.getRegisterType(*DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), StoredVT.getSizeInBits())); + TLI.getRegisterType(*DAG.getContext(), + EVT::getIntegerVT(*DAG.getContext(), + StoredVT.getSizeInBits())); unsigned StoredBytes = StoredVT.getSizeInBits() / 8; unsigned RegBytes = RegVT.getSizeInBits() / 8; unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes; @@ -445,7 +447,8 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // The last store may be partial. Do a truncating store. On big-endian // machines this requires an extending load from the stack slot to ensure // that the bits are in the right place. - EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset)); + EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), + 8 * (StoredBytes - Offset)); // Load from the stack slot. SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr, @@ -548,7 +551,8 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, } // The last copy may be partial. Do an extending load. - EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), 8 * (LoadedBytes - Offset)); + EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), + 8 * (LoadedBytes - Offset)); SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr, LD->getSrcValue(), SVOffset + Offset, MemVT, LD->isVolatile(), @@ -968,11 +972,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Node->dump( &DAG); dbgs() << "\n"; #endif - llvm_unreachable("Do not know how to legalize this operator!"); + assert(0 && "Do not know how to legalize this operator!"); case ISD::BUILD_VECTOR: switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) { - default: llvm_unreachable("This action is not supported yet!"); + default: assert(0 && "This action is not supported yet!"); case TargetLowering::Custom: Tmp3 = TLI.LowerOperation(Result, DAG); if (Tmp3.getNode()) { @@ -1089,7 +1093,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Tmp4 = Result.getValue(1); switch (TLI.getOperationAction(Node->getOpcode(), VT)) { - default: llvm_unreachable("This action is not supported yet!"); + default: assert(0 && "This action is not supported yet!"); case TargetLowering::Legal: // If this is an unaligned load and the target doesn't support it, // expand it. @@ -1259,7 +1263,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Tmp2 = LegalizeOp(Ch); } else { switch (TLI.getLoadExtAction(ExtType, SrcVT)) { - default: llvm_unreachable("This action is not supported yet!"); + default: assert(0 && "This action is not supported yet!"); case TargetLowering::Custom: isCustom = true; // FALLTHROUGH @@ -1357,7 +1361,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { EVT VT = Tmp3.getValueType(); switch (TLI.getOperationAction(ISD::STORE, VT)) { - default: llvm_unreachable("This action is not supported yet!"); + default: assert(0 && "This action is not supported yet!"); case TargetLowering::Legal: // If this is an unaligned store and the target doesn't support it, // expand it. @@ -1394,7 +1398,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // Promote to a byte-sized store with upper bits zero if not // storing an integral number of bytes. For example, promote // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) - EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StVT.getStoreSizeInBits()); + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), + StVT.getStoreSizeInBits()); Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT); Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), SVOffset, NVT, isVolatile, isNonTemporal, @@ -1459,7 +1464,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { ST->getOffset()); switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { - default: llvm_unreachable("This action is not supported yet!"); + default: assert(0 && "This action is not supported yet!"); case TargetLowering::Legal: // If this is an unaligned store and the target doesn't support it, // expand it. @@ -1658,8 +1663,7 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); Chain = SP.getValue(1); unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue(); - unsigned StackAlign = - TLI.getTargetMachine().getFrameInfo()->getStackAlignment(); + unsigned StackAlign = TM.getFrameInfo()->getStackAlignment(); if (Align > StackAlign) SP = DAG.getNode(ISD::AND, dl, VT, SP, DAG.getConstant(-(uint64_t)Align, VT)); @@ -1683,7 +1687,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, EVT OpVT = LHS.getValueType(); ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get(); switch (TLI.getCondCodeAction(CCCode, OpVT)) { - default: llvm_unreachable("Unknown condition code action!"); + default: assert(0 && "Unknown condition code action!"); case TargetLowering::Legal: // Nothing to do. break; @@ -1691,7 +1695,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID; unsigned Opc = 0; switch (CCCode) { - default: llvm_unreachable("Don't know how to expand this condition!"); + default: assert(0 && "Don't know how to expand this condition!"); case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO; Opc = ISD::AND; break; case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO; Opc = ISD::AND; break; case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO; Opc = ISD::AND; break; @@ -1738,8 +1742,8 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, unsigned SrcSize = SrcOp.getValueType().getSizeInBits(); unsigned SlotSize = SlotVT.getSizeInBits(); unsigned DestSize = DestVT.getSizeInBits(); - unsigned DestAlign = - TLI.getTargetData()->getPrefTypeAlignment(DestVT.getTypeForEVT(*DAG.getContext())); + const Type *DestType = DestVT.getTypeForEVT(*DAG.getContext()); + unsigned DestAlign = TLI.getTargetData()->getPrefTypeAlignment(DestType); // Emit a store to the stack slot. Use a truncstore if the input value is // later than DestVT. @@ -1930,7 +1934,7 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, RTLIB::Libcall Call_PPCF128) { RTLIB::Libcall LC; switch (Node->getValueType(0).getSimpleVT().SimpleTy) { - default: llvm_unreachable("Unexpected request for libcall!"); + default: assert(0 && "Unexpected request for libcall!"); case MVT::f32: LC = Call_F32; break; case MVT::f64: LC = Call_F64; break; case MVT::f80: LC = Call_F80; break; @@ -1947,7 +1951,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, RTLIB::Libcall Call_I128) { RTLIB::Libcall LC; switch (Node->getValueType(0).getSimpleVT().SimpleTy) { - default: llvm_unreachable("Unexpected request for libcall!"); + default: assert(0 && "Unexpected request for libcall!"); case MVT::i8: LC = Call_I8; break; case MVT::i16: LC = Call_I16; break; case MVT::i32: LC = Call_I32; break; @@ -2062,7 +2066,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, // offset depending on the data type. uint64_t FF; switch (Op0.getValueType().getSimpleVT().SimpleTy) { - default: llvm_unreachable("Unsupported integer type!"); + default: assert(0 && "Unsupported integer type!"); case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float) case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float) case MVT::i32: FF = 0x4F800000ULL; break; // 2^32 (as a float) @@ -2182,7 +2186,7 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) { EVT SHVT = TLI.getShiftAmountTy(); SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8; switch (VT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("Unhandled Expand type in BSWAP!"); + default: assert(0 && "Unhandled Expand type in BSWAP!"); case MVT::i16: Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT)); Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT)); @@ -2227,7 +2231,7 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) { SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl) { switch (Opc) { - default: llvm_unreachable("Cannot expand this yet!"); + default: assert(0 && "Cannot expand this yet!"); case ISD::CTPOP: { static const uint64_t mask[6] = { 0x5555555555555555ULL, 0x3333333333333333ULL, @@ -2333,10 +2337,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, EVT VT = Node->getValueType(0); if (VT.isInteger()) Results.push_back(DAG.getConstant(0, VT)); - else if (VT.isFloatingPoint()) + else { + assert(VT.isFloatingPoint() && "Unknown value type!"); Results.push_back(DAG.getConstantFP(0, VT)); - else - llvm_unreachable("Unknown value type!"); + } break; } case ISD::TRAP: { @@ -2431,7 +2435,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, // Increment the pointer, VAList, to the next vaarg Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList, DAG.getConstant(TLI.getTargetData()-> - getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())), + getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())), TLI.getPointerTy())); // Store the incremented VAList to the legalized pointer Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Tmp2, V, 0, @@ -2842,7 +2846,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS, RHS); TopHalf = BottomHalf.getValue(1); - } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2))) { + } else { + // FIXME: We should be able to fall back to a libcall with an illegal + // type in some cases. + // Also, we can fall back to a division in some cases, but that's a big + // performance hit in the general case. + assert(TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(), + VT.getSizeInBits() * 2)) && + "Don't know how to expand this operation yet!"); EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2); LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS); RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); @@ -2851,12 +2862,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, DAG.getIntPtrConstant(0)); TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1, DAG.getIntPtrConstant(1)); - } else { - // FIXME: We should be able to fall back to a libcall with an illegal - // type in some cases. - // Also, we can fall back to a division in some cases, but that's a big - // performance hit in the general case. - llvm_unreachable("Don't know how to expand this operation yet!"); } if (isSigned) { Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy()); @@ -2916,7 +2921,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, PseudoSourceValue::getJumpTable(), 0, MemVT, false, false, 0); Addr = LD; - if (TLI.getTargetMachine().getRelocationModel() == Reloc::PIC_) { + if (TM.getRelocationModel() == Reloc::PIC_) { // For PIC, the sequence is: // BRIND(load(Jumptable + index) + RelocBase) // RelocBase can be JumpTable, GOT or some sort of global base. @@ -3078,11 +3083,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, if (OVT.isVector()) { ExtOp = ISD::BIT_CONVERT; TruncOp = ISD::BIT_CONVERT; - } else if (OVT.isInteger()) { + } else { + assert(OVT.isInteger() && "Cannot promote logic operation"); ExtOp = ISD::ANY_EXTEND; TruncOp = ISD::TRUNCATE; - } else { - llvm_report_error("Cannot promote logic operation"); } // Promote each of the values to the new type. Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0)); diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 665b21f..e3eb949 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -109,14 +109,16 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_BIT_CONVERT(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) { // Convert the inputs to integers, and build a new pair out of them. return DAG.getNode(ISD::BUILD_PAIR, N->getDebugLoc(), - TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)), + TLI.getTypeToTransformTo(*DAG.getContext(), + N->getValueType(0)), BitConvertToInteger(N->getOperand(0)), BitConvertToInteger(N->getOperand(1))); } SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) { return DAG.getConstant(N->getValueAPF().bitcastToAPInt(), - TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0))); + TLI.getTypeToTransformTo(*DAG.getContext(), + N->getValueType(0))); } SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) { @@ -338,7 +340,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP32(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = N->getOperand(0); - return MakeLibCall(RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false, N->getDebugLoc()); + return MakeLibCall(RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false, + N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { @@ -489,7 +492,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_UNDEF(SDNode *N) { - return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0))); + return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), + N->getValueType(0))); } SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) { @@ -531,7 +535,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { // Sign/zero extend the argument if the libcall takes a larger type. SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, NVT, N->getOperand(0)); - return MakeLibCall(LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT), &Op, 1, false, dl); + return MakeLibCall(LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT), + &Op, 1, false, dl); } @@ -1403,7 +1408,8 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) { SDValue Chain = ST->getChain(); SDValue Ptr = ST->getBasePtr(); - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ST->getValue().getValueType()); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), + ST->getValue().getValueType()); assert(NVT.isByteSized() && "Expanded type not byte sized!"); assert(ST->getMemoryVT().bitsLE(NVT) && "Float type not round?"); diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 48f64c3..548454c 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -204,7 +204,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) { std::swap(Lo, Hi); InOp = DAG.getNode(ISD::ANY_EXTEND, dl, - EVT::getIntegerVT(*DAG.getContext(), NOutVT.getSizeInBits()), + EVT::getIntegerVT(*DAG.getContext(), + NOutVT.getSizeInBits()), JoinIntegers(Lo, Hi)); return DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, InOp); } @@ -464,7 +465,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) { return DAG.getNode(ISD::SHL, N->getDebugLoc(), - TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)), + TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)), GetPromotedInteger(N->getOperand(0)), N->getOperand(1)); } @@ -555,7 +556,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) { - return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0))); + return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), + N->getValueType(0))); } SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) { @@ -1383,7 +1385,8 @@ void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N, if (NVTBits < EVTBits) { Hi = DAG.getNode(ISD::AssertSext, dl, NVT, Hi, - DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), EVTBits - NVTBits))); + DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), + EVTBits - NVTBits))); } else { Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT)); // The high part replicates the sign bit of Lo, make it explicit. @@ -1403,7 +1406,8 @@ void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N, if (NVTBits < EVTBits) { Hi = DAG.getNode(ISD::AssertZext, dl, NVT, Hi, - DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), EVTBits - NVTBits))); + DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), + EVTBits - NVTBits))); } else { Lo = DAG.getNode(ISD::AssertZext, dl, NVT, Lo, DAG.getValueType(EVT)); // The high part must be zero, make it explicit. @@ -1846,7 +1850,8 @@ void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N, unsigned ExcessBits = Op.getValueType().getSizeInBits() - NVT.getSizeInBits(); Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi, - DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), ExcessBits))); + DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), + ExcessBits))); } } @@ -1968,7 +1973,8 @@ void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, SplitInteger(Res, Lo, Hi); unsigned ExcessBits = Op.getValueType().getSizeInBits() - NVT.getSizeInBits(); - Hi = DAG.getZeroExtendInReg(Hi, dl, EVT::getIntegerVT(*DAG.getContext(), ExcessBits)); + Hi = DAG.getZeroExtendInReg(Hi, dl, + EVT::getIntegerVT(*DAG.getContext(), ExcessBits)); } } @@ -2269,7 +2275,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { unsigned EBytes = ExtVT.getStoreSize(); unsigned IncrementSize = NVT.getSizeInBits()/8; unsigned ExcessBits = (EBytes - IncrementSize)*8; - EVT HiVT = EVT::getIntegerVT(*DAG.getContext(), ExtVT.getSizeInBits() - ExcessBits); + EVT HiVT = EVT::getIntegerVT(*DAG.getContext(), + ExtVT.getSizeInBits() - ExcessBits); if (ExcessBits < NVT.getSizeInBits()) { // Transfer high bits from the top of Lo to the bottom of Hi. diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index f3e7ca4f..17f131b 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -721,7 +721,8 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) { } void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { - assert(Result.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + assert(Result.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && "Invalid type for promoted integer"); AnalyzeNewValue(Result); @@ -731,7 +732,8 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { } void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { - assert(Result.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + assert(Result.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && "Invalid type for softened float"); AnalyzeNewValue(Result); @@ -762,7 +764,8 @@ void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo, void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo, SDValue Hi) { - assert(Lo.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + assert(Lo.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && Hi.getValueType() == Lo.getValueType() && "Invalid type for expanded integer"); // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant. @@ -788,7 +791,8 @@ void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo, void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo, SDValue Hi) { - assert(Lo.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + assert(Lo.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && Hi.getValueType() == Lo.getValueType() && "Invalid type for expanded float"); // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant. @@ -832,7 +836,8 @@ void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo, } void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { - assert(Result.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + assert(Result.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && "Invalid type for widened vector"); AnalyzeNewValue(Result); @@ -940,7 +945,8 @@ void DAGTypeLegalizer::GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT) { } else { unsigned NumElements = InVT.getVectorNumElements(); assert(!(NumElements & 1) && "Splitting vector, but not in half!"); - LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), NumElements/2); + LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(), + InVT.getVectorElementType(), NumElements/2); } } @@ -980,7 +986,8 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) { DebugLoc dlLo = Lo.getDebugLoc(); EVT LVT = Lo.getValueType(); EVT HVT = Hi.getValueType(); - EVT NVT = EVT::getIntegerVT(*DAG.getContext(), LVT.getSizeInBits() + HVT.getSizeInBits()); + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), + LVT.getSizeInBits() + HVT.getSizeInBits()); Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo); Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi); @@ -1082,7 +1089,8 @@ void DAGTypeLegalizer::SplitInteger(SDValue Op, /// type half the size of Op's. void DAGTypeLegalizer::SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi) { - EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Op.getValueType().getSizeInBits()/2); + EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), + Op.getValueType().getSizeInBits()/2); SplitInteger(Op, HalfVT, HalfVT, Lo, Hi); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 9dd9796..d60ad60 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -33,7 +33,7 @@ namespace llvm { /// into small values. /// class VISIBILITY_HIDDEN DAGTypeLegalizer { - TargetLowering &TLI; + const TargetLowering &TLI; SelectionDAG &DAG; public: // NodeIdFlags - This pass uses the NodeId on the SDNodes to hold information diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 5e83b4b..88e1e62 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -173,8 +173,9 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT); SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl, - EVT::getVectorVT(*DAG.getContext(), NewVT, 2*OldElts), - OldVec); + EVT::getVectorVT(*DAG.getContext(), + NewVT, 2*OldElts), + OldVec); // Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector. SDValue Idx = N->getOperand(1); @@ -268,7 +269,9 @@ SDValue DAGTypeLegalizer::ExpandOp_BIT_CONVERT(SDNode *N) { // is no point, and it might create expansion loops). For example, on // x86 this turns v1i64 = BIT_CONVERT i64 into v1i64 = BIT_CONVERT v2i32. EVT OVT = N->getOperand(0).getValueType(); - EVT NVT = EVT::getVectorVT(*DAG.getContext(), TLI.getTypeToTransformTo(*DAG.getContext(), OVT), 2); + EVT NVT = EVT::getVectorVT(*DAG.getContext(), + TLI.getTypeToTransformTo(*DAG.getContext(), OVT), + 2); if (isTypeLegal(NVT)) { SDValue Parts[2]; @@ -312,8 +315,9 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) { } SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, - EVT::getVectorVT(*DAG.getContext(), NewVT, NewElts.size()), - &NewElts[0], NewElts.size()); + EVT::getVectorVT(*DAG.getContext(), + NewVT, NewElts.size()), + &NewElts[0], NewElts.size()); // Convert the new vector to the old vector type. return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec); @@ -380,7 +384,8 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { DebugLoc dl = N->getDebugLoc(); StoreSDNode *St = cast<StoreSDNode>(N); - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), St->getValue().getValueType()); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), + St->getValue().getValueType()); SDValue Chain = St->getChain(); SDValue Ptr = St->getBasePtr(); int SVOffset = St->getSrcValueOffset(); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index b5f84c0..0e2bd02 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -34,7 +34,7 @@ using namespace llvm; namespace { class VectorLegalizer { SelectionDAG& DAG; - TargetLowering& TLI; + const TargetLowering &TLI; bool Changed; // Keep track of whether anything changed /// LegalizedNodes - For nodes that are of legal width, and that have more diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index ed5f24c..7efeea1 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -705,8 +705,9 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, // Store the new element. This may be larger than the vector element type, // so use a truncating store. SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); + const Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); unsigned Alignment = - TLI.getTargetData()->getPrefTypeAlignment(VecVT.getTypeForEVT(*DAG.getContext())); + TLI.getTargetData()->getPrefTypeAlignment(VecType); Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT, false, false, 0); @@ -1419,7 +1420,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) { ShOp = GetWidenedVector(ShOp); ShVT = ShOp.getValueType(); } - EVT ShWidenVT = EVT::getVectorVT(*DAG.getContext(), ShVT.getVectorElementType(), + EVT ShWidenVT = EVT::getVectorVT(*DAG.getContext(), + ShVT.getVectorElementType(), WidenVT.getVectorNumElements()); if (ShVT != ShWidenVT) ShOp = ModifyToType(ShOp, ShWidenVT); @@ -1493,7 +1495,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) { unsigned NewNumElts = WidenSize / InSize; if (InVT.isVector()) { EVT InEltVT = InVT.getVectorElementType(); - NewInVT= EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenSize / InEltVT.getSizeInBits()); + NewInVT= EVT::getVectorVT(*DAG.getContext(), InEltVT, + WidenSize / InEltVT.getSizeInBits()); } else { NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts); } @@ -1617,7 +1620,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { SDValue RndOp = N->getOperand(3); SDValue SatOp = N->getOperand(4); - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), + N->getValueType(0)); unsigned WidenNumElts = WidenVT.getVectorNumElements(); EVT InVT = InOp.getValueType(); @@ -1791,7 +1795,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { EVT CondVT = Cond1.getValueType(); if (CondVT.isVector()) { EVT CondEltVT = CondVT.getVectorElementType(); - EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(), CondEltVT, WidenNumElts); + EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(), + CondEltVT, WidenNumElts); if (getTypeAction(CondVT) == WidenVector) Cond1 = GetWidenedVector(Cond1); @@ -1859,7 +1864,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { SDValue InOp1 = N->getOperand(0); EVT InVT = InOp1.getValueType(); assert(InVT.isVector() && "can not widen non vector type"); - EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), WidenNumElts); + EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(), + InVT.getVectorElementType(), WidenNumElts); InOp1 = GetWidenedVector(InOp1); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); @@ -2124,7 +2130,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, // The routines chops the vector into the largest vector loads with the same // element type or scalar loads and then recombines it to the widen vector // type. - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0)); unsigned WidenWidth = WidenVT.getSizeInBits(); EVT LdVT = LD->getMemoryVT(); DebugLoc dl = LD->getDebugLoc(); diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h index 9d1568f..ac2d338 100644 --- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h +++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -41,7 +41,7 @@ private: SDNode *Node; // valid for expressions unsigned ResNo; // valid for expressions } s; - Value *Const; // valid for constants + const Value *Const; // valid for constants unsigned FrameIx; // valid for stack objects } u; MDNode *mdPtr; @@ -60,7 +60,8 @@ public: } // Constructor for constants. - SDDbgValue(MDNode *mdP, Value *C, uint64_t off, DebugLoc dl, unsigned O) : + SDDbgValue(MDNode *mdP, const Value *C, uint64_t off, DebugLoc dl, + unsigned O) : mdPtr(mdP), Offset(off), DL(dl), Order(O), Invalid(false) { kind = CONST; u.Const = C; @@ -86,7 +87,7 @@ public: unsigned getResNo() { assert (kind==SDNODE); return u.s.ResNo; } // Returns the Value* for a constant - Value *getConst() { assert (kind==CONST); return u.Const; } + const Value *getConst() { assert (kind==CONST); return u.Const; } // Returns the FrameIx for a stack object unsigned getFrameIx() { assert (kind==FRAMEIX); return u.FrameIx; } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 3f1766d..da02850 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -17,18 +17,19 @@ #define DEBUG_TYPE "pre-RA-sched" #include "ScheduleDAGSDNodes.h" +#include "llvm/InlineAsm.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/ADT/PriorityQueue.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include <climits> using namespace llvm; @@ -647,13 +648,14 @@ bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU, if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag) --NumOps; // Ignore the flag operand. - for (unsigned i = 2; i != NumOps;) { + for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { unsigned Flags = cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); - unsigned NumVals = (Flags & 0xffff) >> 3; + unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); ++i; // Skip the ID value. - if ((Flags & 7) == 2 || (Flags & 7) == 6) { + if (InlineAsm::isRegDefKind(Flags) || + InlineAsm::isRegDefEarlyClobberKind(Flags)) { // Check for def of register or earlyclobber register. for (; NumVals; --NumVals, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index e7ab2f0..76e4771 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -353,8 +353,8 @@ void ScheduleDAGSDNodes::AddSchedEdges() { const SDep& dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, OpSU->Latency, PhysReg); if (!isChain && !UnitLatencies) { - ComputeOperandLatency(OpSU, SU, (SDep &)dep); - ST.adjustSchedDependency(OpSU, SU, (SDep &)dep); + ComputeOperandLatency(OpSU, SU, const_cast<SDep &>(dep)); + ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep)); } SU->addPred(dep); @@ -422,7 +422,6 @@ namespace { // instructions in the right order. static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM, DenseMap<SDValue, unsigned> &VRBaseMap, SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders, SmallSet<unsigned, 8> &Seen) { @@ -449,9 +448,11 @@ static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, continue; unsigned DVOrder = DVs[i]->getOrder(); if (DVOrder == ++Order) { - MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], BB, VRBaseMap, EM); - Orders.push_back(std::make_pair(DVOrder, DbgMI)); - BB->insert(InsertPos, DbgMI); + MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap); + if (DbgMI) { + Orders.push_back(std::make_pair(DVOrder, DbgMI)); + BB->insert(InsertPos, DbgMI); + } DVs[i]->setIsInvalidated(); } } @@ -459,8 +460,7 @@ static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, /// EmitSchedule - Emit the machine code in scheduled order. -MachineBasicBlock *ScheduleDAGSDNodes:: -EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) { +MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { InstrEmitter Emitter(BB, InsertPos); DenseMap<SDValue, unsigned> VRBaseMap; DenseMap<SUnit*, unsigned> CopyVRBaseMap; @@ -468,6 +468,17 @@ EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) { SmallSet<unsigned, 8> Seen; bool HasDbg = DAG->hasDebugValues(); + // If this is the first BB, emit byval parameter dbg_value's. + if (HasDbg && BB->getParent()->begin() == MachineFunction::iterator(BB)) { + SDDbgInfo::DbgIterator PDI = DAG->ByvalParmDbgBegin(); + SDDbgInfo::DbgIterator PDE = DAG->ByvalParmDbgEnd(); + for (; PDI != PDE; ++PDI) { + MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap); + if (DbgMI) + BB->insert(BB->end(), DbgMI); + } + } + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { SUnit *SU = Sequence[i]; if (!SU) { @@ -491,21 +502,21 @@ EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) { while (!FlaggedNodes.empty()) { SDNode *N = FlaggedNodes.back(); Emitter.EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, SU->isCloned, - VRBaseMap, EM); - // Remember the the source order of the inserted instruction. + VRBaseMap); + // Remember the source order of the inserted instruction. if (HasDbg) - ProcessSourceNode(N, DAG, Emitter, EM, VRBaseMap, Orders, Seen); + ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen); FlaggedNodes.pop_back(); } Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned, - VRBaseMap, EM); - // Remember the the source order of the inserted instruction. + VRBaseMap); + // Remember the source order of the inserted instruction. if (HasDbg) - ProcessSourceNode(SU->getNode(), DAG, Emitter, EM, VRBaseMap, Orders, + ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders, Seen); } - // Insert all the dbg_value which have not already been inserted in source + // Insert all the dbg_values which have not already been inserted in source // order sequence. if (HasDbg) { MachineBasicBlock::iterator BBBegin = BB->empty() ? BB->end() : BB->begin(); @@ -540,13 +551,15 @@ EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) { #endif if ((*DI)->isInvalidated()) continue; - MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, MIBB, VRBaseMap, EM); - if (!LastOrder) - // Insert to start of the BB (after PHIs). - BB->insert(BBBegin, DbgMI); - else { - MachineBasicBlock::iterator Pos = MI; - MIBB->insert(llvm::next(Pos), DbgMI); + MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap); + if (DbgMI) { + if (!LastOrder) + // Insert to start of the BB (after PHIs). + BB->insert(BBBegin, DbgMI); + else { + MachineBasicBlock::iterator Pos = MI; + MIBB->insert(llvm::next(Pos), DbgMI); + } } } LastOrder = Order; @@ -558,8 +571,9 @@ EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) { MachineBasicBlock *InsertBB = Emitter.getBlock(); MachineBasicBlock::iterator Pos= Emitter.getBlock()->getFirstTerminator(); if (!(*DI)->isInvalidated()) { - MachineInstr *DbgMI= Emitter.EmitDbgValue(*DI, InsertBB, VRBaseMap, EM); - InsertBB->insert(Pos, DbgMI); + MachineInstr *DbgMI= Emitter.EmitDbgValue(*DI, VRBaseMap); + if (DbgMI) + InsertBB->insert(Pos, DbgMI); } ++DI; } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 6b829b6..7ae8ec2 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -59,7 +59,8 @@ namespace llvm { if (isa<JumpTableSDNode>(Node)) return true; if (isa<ExternalSymbolSDNode>(Node)) return true; if (isa<BlockAddressSDNode>(Node)) return true; - if (Node->getOpcode() == ISD::EntryToken) return true; + if (Node->getOpcode() == ISD::EntryToken || + isa<MDNodeSDNode>(Node)) return true; return false; } @@ -93,8 +94,7 @@ namespace llvm { /// virtual void ComputeLatency(SUnit *SU); - virtual MachineBasicBlock * - EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM); + virtual MachineBasicBlock *EmitSchedule(); /// Schedule - Order nodes according to selected style, filling /// in the Sequence member. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 8c0554d..e6df742 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -304,10 +304,6 @@ ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2, return Result; } -const TargetMachine &SelectionDAG::getTarget() const { - return MF->getTarget(); -} - //===----------------------------------------------------------------------===// // SDNode Profile Support //===----------------------------------------------------------------------===// @@ -792,8 +788,8 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const { } // EntryNode could meaningfully have debug info if we can find it... -SelectionDAG::SelectionDAG(TargetLowering &tli, FunctionLoweringInfo &fli) - : TLI(tli), FLI(fli), +SelectionDAG::SelectionDAG(const TargetMachine &tm, FunctionLoweringInfo &fli) + : TM(tm), TLI(*tm.getTargetLowering()), FLI(fli), EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)), Root(getEntryNode()), Ordering(0) { AllNodes.push_back(&EntryNode); @@ -1048,7 +1044,7 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, return SDValue(N, 0); } -SDValue SelectionDAG::getConstantPool(Constant *C, EVT VT, +SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, unsigned Alignment, int Offset, bool isTarget, unsigned char TargetFlags) { @@ -1319,7 +1315,7 @@ SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) { } -SDValue SelectionDAG::getBlockAddress(BlockAddress *BA, EVT VT, +SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, bool isTarget, unsigned char TargetFlags) { unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress; @@ -1356,6 +1352,23 @@ SDValue SelectionDAG::getSrcValue(const Value *V) { return SDValue(N, 0); } +/// getMDNode - Return an MDNodeSDNode which holds an MDNode. +SDValue SelectionDAG::getMDNode(const MDNode *MD) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), 0, 0); + ID.AddPointer(MD); + + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) MDNodeSDNode(MD); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + + /// getShiftAmountOperand - Return the specified value casted to /// the target's desired shift amount type. SDValue SelectionDAG::getShiftAmountOperand(SDValue Op) { @@ -1904,7 +1917,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, // Output known-0 bits are known if clear or set in both the low clear bits // common to both LHS & RHS. For example, 8+(X<<3) is known to have the // low 3 bits clear. - APInt Mask2 = APInt::getLowBitsSet(BitWidth, Mask.countTrailingOnes()); + APInt Mask2 = APInt::getLowBitsSet(BitWidth, + BitWidth - Mask.countLeadingZeros()); ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); unsigned KnownZeroOut = KnownZero2.countTrailingOnes(); @@ -2253,7 +2267,7 @@ bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const { GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); if (!GA) return false; if (GA->getOffset() != 0) return false; - GlobalVariable *GV = dyn_cast<GlobalVariable>(GA->getGlobal()); + const GlobalVariable *GV = dyn_cast<GlobalVariable>(GA->getGlobal()); if (!GV) return false; return MF->getMMI().hasDebugInfo(); } @@ -2778,14 +2792,19 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, // If the indices are the same, return the inserted element else // if the indices are known different, extract the element from // the original vector. - if (N1.getOperand(2) == N2) { - if (VT == N1.getOperand(1).getValueType()) - return N1.getOperand(1); - else - return getSExtOrTrunc(N1.getOperand(1), DL, VT); - } else if (isa<ConstantSDNode>(N1.getOperand(2)) && - isa<ConstantSDNode>(N2)) + SDValue N1Op2 = N1.getOperand(2); + ConstantSDNode *N1Op2C = dyn_cast<ConstantSDNode>(N1Op2.getNode()); + + if (N1Op2C && N2C) { + if (N1Op2C->getZExtValue() == N2C->getZExtValue()) { + if (VT == N1.getOperand(1).getValueType()) + return N1.getOperand(1); + else + return getSExtOrTrunc(N1.getOperand(1), DL, VT); + } + return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2); + } } break; case ISD::EXTRACT_ELEMENT: @@ -3178,7 +3197,7 @@ static bool isMemSrcFromString(SDValue Src, std::string &Str) { if (!G) return false; - GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal()); + const GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal()); if (GV && GetConstantStringInfo(GV, Str, SrcDelta, false)) return true; @@ -3193,6 +3212,7 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit, uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool NonScalarIntSafe, + bool MemcpyStrSrc, SelectionDAG &DAG, const TargetLowering &TLI) { assert((SrcAlign == 0 || SrcAlign >= DstAlign) && @@ -3201,9 +3221,12 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, // the value, i.e. memset or memcpy from constant string. Otherwise, it's // the inferred alignment of the source. 'DstAlign', on the other hand, is the // specified alignment of the memory operation. If it is zero, that means - // it's possible to change the alignment of the destination. + // it's possible to change the alignment of the destination. 'MemcpyStrSrc' + // indicates whether the memcpy source is constant so it does not need to be + // loaded. EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, - NonScalarIntSafe, DAG); + NonScalarIntSafe, MemcpyStrSrc, + DAG.getMachineFunction()); if (VT == MVT::Other) { if (DstAlign >= TLI.getTargetData()->getPointerPrefAlignment() || @@ -3269,9 +3292,6 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, // below a certain threshold. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); std::vector<EVT> MemOps; - uint64_t Limit = -1ULL; - if (!AlwaysInline) - Limit = TLI.getMaxStoresPerMemcpy(); bool DstAlignCanChange = false; MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); @@ -3283,9 +3303,13 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, std::string Str; bool CopyFromStr = isMemSrcFromString(Src, Str); bool isZeroStr = CopyFromStr && Str.empty(); + uint64_t Limit = -1ULL; + if (!AlwaysInline) + Limit = TLI.getMaxStoresPerMemcpy(); if (!FindOptimalMemOpLowering(MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), - (isZeroStr ? 0 : SrcAlign), true, DAG, TLI)) + (isZeroStr ? 0 : SrcAlign), + true, CopyFromStr, DAG, TLI)) return SDValue(); if (DstAlignCanChange) { @@ -3373,7 +3397,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, if (!FindOptimalMemOpLowering(MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), - SrcAlign, true, DAG, TLI)) + SrcAlign, true, false, DAG, TLI)) return SDValue(); if (DstAlignCanChange) { @@ -3445,7 +3469,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue(); if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(), Size, (DstAlignCanChange ? 0 : Align), 0, - NonScalarIntSafe, DAG, TLI)) + NonScalarIntSafe, false, DAG, TLI)) return SDValue(); if (DstAlignCanChange) { @@ -3571,8 +3595,10 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, if (Result.getNode()) return Result; + // FIXME: If the memmove is volatile, lowering it to plain libc memmove may + // not be safe. See memcpy above for more details. + // Emit a library call. - assert(!isVol && "library memmove does not support volatile"); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext()); @@ -3620,8 +3646,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, if (Result.getNode()) return Result; - // Emit a library call. - assert(!isVol && "library memset does not support volatile"); + // Emit a library call. const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; @@ -4913,7 +4938,7 @@ SelectionDAG::getDbgValue(MDNode *MDPtr, SDNode *N, unsigned R, uint64_t Off, } SDDbgValue * -SelectionDAG::getDbgValue(MDNode *MDPtr, Value *C, uint64_t Off, +SelectionDAG::getDbgValue(MDNode *MDPtr, const Value *C, uint64_t Off, DebugLoc DL, unsigned O) { return new (Allocator) SDDbgValue(MDPtr, C, Off, DL, O); } @@ -5321,8 +5346,8 @@ unsigned SelectionDAG::GetOrdering(const SDNode *SD) const { /// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the /// value is produced by SD. -void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD) { - DbgInfo->add(DB, SD); +void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) { + DbgInfo->add(DB, SD, isParameter); if (SD) SD->setHasDebugValue(true); } @@ -5338,7 +5363,7 @@ HandleSDNode::~HandleSDNode() { GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, const GlobalValue *GA, EVT VT, int64_t o, unsigned char TF) : SDNode(Opc, DebugLoc(), getSDVTList(VT)), Offset(o), TargetFlags(TF) { - TheGlobal = const_cast<GlobalValue*>(GA); + TheGlobal = GA; } MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt, @@ -5558,6 +5583,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::PCMARKER: return "PCMarker"; case ISD::READCYCLECOUNTER: return "ReadCycleCounter"; case ISD::SRCVALUE: return "SrcValue"; + case ISD::MDNODE_SDNODE: return "MDNode"; case ISD::EntryToken: return "EntryToken"; case ISD::TokenFactor: return "TokenFactor"; case ISD::AssertSext: return "AssertSext"; @@ -5926,6 +5952,11 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << "<" << M->getValue() << ">"; else OS << "<null>"; + } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) { + if (MD->getMD()) + OS << "<" << MD->getMD() << ">"; + else + OS << "<null>"; } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) { OS << ":" << N->getVT().getEVTString(); } @@ -6063,7 +6094,7 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { unsigned i; for (i= 0; i != NE; ++i) { - for (unsigned j = 0; j != N->getNumOperands(); ++j) { + for (unsigned j = 0, e = N->getNumOperands(); j != e; ++j) { SDValue Operand = N->getOperand(j); EVT OperandVT = Operand.getValueType(); if (OperandVT.isVector()) { @@ -6141,8 +6172,8 @@ bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, return true; } - GlobalValue *GV1 = NULL; - GlobalValue *GV2 = NULL; + const GlobalValue *GV1 = NULL; + const GlobalValue *GV2 = NULL; int64_t Offset1 = 0; int64_t Offset2 = 0; bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1); @@ -6157,14 +6188,14 @@ bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, /// it cannot be inferred. unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { // If this is a GlobalAddress + cst, return the alignment. - GlobalValue *GV; + const GlobalValue *GV; int64_t GVOffset = 0; if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { // If GV has specified alignment, then use it. Otherwise, use the preferred // alignment. unsigned Align = GV->getAlignment(); if (!Align) { - if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) { + if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) { if (GVar->hasInitializer()) { const TargetData *TD = TLI.getTargetData(); Align = TD->getPreferredAlignment(GVar); @@ -6326,8 +6357,8 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, if (OpVal.getOpcode() == ISD::UNDEF) SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize); else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) - SplatValue |= (APInt(CN->getAPIntValue()).zextOrTrunc(EltBitSize). - zextOrTrunc(sz) << BitPos); + SplatValue |= APInt(CN->getAPIntValue()).zextOrTrunc(EltBitSize). + zextOrTrunc(sz) << BitPos; else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) SplatValue |= CN->getValueAPF().bitcastToAPInt().zextOrTrunc(sz) <<BitPos; else diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 4bbb3de..a38b204 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -28,7 +28,9 @@ #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" #include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/GCMetadata.h" @@ -168,7 +170,7 @@ namespace { /// AddInlineAsmOperands - Add this value to the specified inlineasm node /// operand list. This adds the code marker, matching input operand index /// (if applicable), and includes the number of values added into it. - void AddInlineAsmOperands(unsigned Code, + void AddInlineAsmOperands(unsigned Kind, bool HasMatching, unsigned MatchingIdx, SelectionDAG &DAG, std::vector<SDValue> &Ops) const; @@ -533,7 +535,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) { TD = DAG.getTarget().getTargetData(); } -/// clear - Clear out the curret SelectionDAG and the associated +/// clear - Clear out the current SelectionDAG and the associated /// state and prepare this SelectionDAGBuilder object to be used /// for a new block. This doesn't clear out information about /// additional blocks that are needed to complete switch lowering @@ -543,8 +545,6 @@ void SelectionDAGBuilder::clear() { NodeMap.clear(); PendingLoads.clear(); PendingExports.clear(); - EdgeMapping.clear(); - DAG.clear(); CurDebugLoc = DebugLoc(); HasTailCall = false; } @@ -612,11 +612,26 @@ void SelectionDAGBuilder::AssignOrderingToNode(const SDNode *Node) { AssignOrderingToNode(Node->getOperand(I).getNode()); } -void SelectionDAGBuilder::visit(Instruction &I) { +void SelectionDAGBuilder::visit(const Instruction &I) { + // Set up outgoing PHI node register values before emitting the terminator. + if (isa<TerminatorInst>(&I)) + HandlePHINodesInSuccessorBlocks(I.getParent()); + + CurDebugLoc = I.getDebugLoc(); + visit(I.getOpcode(), I); + + if (!isa<TerminatorInst>(&I) && !HasTailCall) + CopyToExportRegsIfNeeded(&I); + + CurDebugLoc = DebugLoc(); } -void SelectionDAGBuilder::visit(unsigned Opcode, User &I) { +void SelectionDAGBuilder::visitPHI(const PHINode &) { + llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!"); +} + +void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) { // Note: this doesn't use InstVisitor, because it has to work with // ConstantExpr's in addition to instructions. switch (Opcode) { @@ -638,28 +653,28 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { SDValue &N = NodeMap[V]; if (N.getNode()) return N; - if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V))) { + if (const Constant *C = dyn_cast<Constant>(V)) { EVT VT = TLI.getValueType(V->getType(), true); - if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) + if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) return N = DAG.getConstant(*CI, VT); - if (GlobalValue *GV = dyn_cast<GlobalValue>(C)) + if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) return N = DAG.getGlobalAddress(GV, VT); if (isa<ConstantPointerNull>(C)) return N = DAG.getConstant(0, TLI.getPointerTy()); - if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) + if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) return N = DAG.getConstantFP(*CFP, VT); if (isa<UndefValue>(C) && !V->getType()->isAggregateType()) return N = DAG.getUNDEF(VT); - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { + if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { visit(CE->getOpcode(), *CE); SDValue N1 = NodeMap[V]; - assert(N1.getNode() && "visit didn't populate the ValueMap!"); + assert(N1.getNode() && "visit didn't populate the NodeMap!"); return N1; } @@ -704,7 +719,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { getCurDebugLoc()); } - if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) + if (const BlockAddress *BA = dyn_cast<BlockAddress>(C)) return DAG.getBlockAddress(BA, VT); const VectorType *VecTy = cast<VectorType>(V->getType()); @@ -713,7 +728,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { // Now that we know the number and type of the elements, get that number of // elements into the Ops array based on what kind of constant it is. SmallVector<SDValue, 16> Ops; - if (ConstantVector *CP = dyn_cast<ConstantVector>(C)) { + if (const ConstantVector *CP = dyn_cast<ConstantVector>(C)) { for (unsigned i = 0; i != NumElements; ++i) Ops.push_back(getValue(CP->getOperand(i))); } else { @@ -756,7 +771,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { static void getReturnInfo(const Type* ReturnType, Attributes attr, SmallVectorImpl<EVT> &OutVTs, SmallVectorImpl<ISD::ArgFlagsTy> &OutFlags, - TargetLowering &TLI, + const TargetLowering &TLI, SmallVectorImpl<uint64_t> *Offsets = 0) { SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(TLI, ReturnType, ValueVTs); @@ -811,7 +826,7 @@ static void getReturnInfo(const Type* ReturnType, } } -void SelectionDAGBuilder::visitRet(ReturnInst &I) { +void SelectionDAGBuilder::visitRet(const ReturnInst &I) { SDValue Chain = getControlRoot(); SmallVector<ISD::OutputArg, 8> Outs; FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo(); @@ -916,18 +931,18 @@ void SelectionDAGBuilder::visitRet(ReturnInst &I) { /// CopyToExportRegsIfNeeded - If the given value has virtual registers /// created for it, emit nodes to copy the value into the virtual /// registers. -void SelectionDAGBuilder::CopyToExportRegsIfNeeded(Value *V) { - if (!V->use_empty()) { - DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); - if (VMI != FuncInfo.ValueMap.end()) - CopyValueToVirtualRegister(V, VMI->second); +void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) { + DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); + if (VMI != FuncInfo.ValueMap.end()) { + assert(!V->use_empty() && "Unused value assigned virtual registers!"); + CopyValueToVirtualRegister(V, VMI->second); } } /// ExportFromCurrentBlock - If this condition isn't known to be exported from /// the current basic block, add it to ValueMap now so that we'll get a /// CopyTo/FromReg. -void SelectionDAGBuilder::ExportFromCurrentBlock(Value *V) { +void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) { // No need to export constants. if (!isa<Instruction>(V) && !isa<Argument>(V)) return; @@ -938,11 +953,11 @@ void SelectionDAGBuilder::ExportFromCurrentBlock(Value *V) { CopyValueToVirtualRegister(V, Reg); } -bool SelectionDAGBuilder::isExportableFromCurrentBlock(Value *V, +bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB) { // The operands of the setcc have to be in this block. We don't know // how to export them from some other block. - if (Instruction *VI = dyn_cast<Instruction>(V)) { + if (const Instruction *VI = dyn_cast<Instruction>(V)) { // Can export from current BB. if (VI->getParent() == FromBB) return true; @@ -971,85 +986,31 @@ static bool InBlock(const Value *V, const BasicBlock *BB) { return true; } -/// getFCmpCondCode - Return the ISD condition code corresponding to -/// the given LLVM IR floating-point condition code. This includes -/// consideration of global floating-point math flags. -/// -static ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred) { - ISD::CondCode FPC, FOC; - switch (Pred) { - case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break; - case FCmpInst::FCMP_OEQ: FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break; - case FCmpInst::FCMP_OGT: FOC = ISD::SETGT; FPC = ISD::SETOGT; break; - case FCmpInst::FCMP_OGE: FOC = ISD::SETGE; FPC = ISD::SETOGE; break; - case FCmpInst::FCMP_OLT: FOC = ISD::SETLT; FPC = ISD::SETOLT; break; - case FCmpInst::FCMP_OLE: FOC = ISD::SETLE; FPC = ISD::SETOLE; break; - case FCmpInst::FCMP_ONE: FOC = ISD::SETNE; FPC = ISD::SETONE; break; - case FCmpInst::FCMP_ORD: FOC = FPC = ISD::SETO; break; - case FCmpInst::FCMP_UNO: FOC = FPC = ISD::SETUO; break; - case FCmpInst::FCMP_UEQ: FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break; - case FCmpInst::FCMP_UGT: FOC = ISD::SETGT; FPC = ISD::SETUGT; break; - case FCmpInst::FCMP_UGE: FOC = ISD::SETGE; FPC = ISD::SETUGE; break; - case FCmpInst::FCMP_ULT: FOC = ISD::SETLT; FPC = ISD::SETULT; break; - case FCmpInst::FCMP_ULE: FOC = ISD::SETLE; FPC = ISD::SETULE; break; - case FCmpInst::FCMP_UNE: FOC = ISD::SETNE; FPC = ISD::SETUNE; break; - case FCmpInst::FCMP_TRUE: FOC = FPC = ISD::SETTRUE; break; - default: - llvm_unreachable("Invalid FCmp predicate opcode!"); - FOC = FPC = ISD::SETFALSE; - break; - } - if (FiniteOnlyFPMath()) - return FOC; - else - return FPC; -} - -/// getICmpCondCode - Return the ISD condition code corresponding to -/// the given LLVM IR integer condition code. -/// -static ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred) { - switch (Pred) { - case ICmpInst::ICMP_EQ: return ISD::SETEQ; - case ICmpInst::ICMP_NE: return ISD::SETNE; - case ICmpInst::ICMP_SLE: return ISD::SETLE; - case ICmpInst::ICMP_ULE: return ISD::SETULE; - case ICmpInst::ICMP_SGE: return ISD::SETGE; - case ICmpInst::ICMP_UGE: return ISD::SETUGE; - case ICmpInst::ICMP_SLT: return ISD::SETLT; - case ICmpInst::ICMP_ULT: return ISD::SETULT; - case ICmpInst::ICMP_SGT: return ISD::SETGT; - case ICmpInst::ICMP_UGT: return ISD::SETUGT; - default: - llvm_unreachable("Invalid ICmp predicate opcode!"); - return ISD::SETNE; - } -} - /// EmitBranchForMergedCondition - Helper method for FindMergedConditions. /// This function emits a branch and is used at the leaves of an OR or an /// AND operator tree. /// void -SelectionDAGBuilder::EmitBranchForMergedCondition(Value *Cond, +SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - MachineBasicBlock *CurBB) { + MachineBasicBlock *CurBB, + MachineBasicBlock *SwitchBB) { const BasicBlock *BB = CurBB->getBasicBlock(); // If the leaf of the tree is a comparison, merge the condition into // the caseblock. - if (CmpInst *BOp = dyn_cast<CmpInst>(Cond)) { + if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) { // The operands of the cmp have to be in this block. We don't know // how to export them from some other block. If this is the first block // of the sequence, no exporting is needed. - if (CurBB == CurMBB || + if (CurBB == SwitchBB || (isExportableFromCurrentBlock(BOp->getOperand(0), BB) && isExportableFromCurrentBlock(BOp->getOperand(1), BB))) { ISD::CondCode Condition; - if (ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) { + if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) { Condition = getICmpCondCode(IC->getPredicate()); - } else if (FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) { + } else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) { Condition = getFCmpCondCode(FC->getPredicate()); } else { Condition = ISD::SETEQ; // silence warning. @@ -1070,19 +1031,20 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(Value *Cond, } /// FindMergedConditions - If Cond is an expression like -void SelectionDAGBuilder::FindMergedConditions(Value *Cond, +void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, + MachineBasicBlock *SwitchBB, unsigned Opc) { // If this node is not part of the or/and tree, emit it as a branch. - Instruction *BOp = dyn_cast<Instruction>(Cond); + const Instruction *BOp = dyn_cast<Instruction>(Cond); if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) || (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() || BOp->getParent() != CurBB->getBasicBlock() || !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { - EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB); + EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB); return; } @@ -1102,10 +1064,10 @@ void SelectionDAGBuilder::FindMergedConditions(Value *Cond, // // Emit the LHS condition. - FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, Opc); + FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc); // Emit the RHS condition into TmpBB. - FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc); + FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc); } else { assert(Opc == Instruction::And && "Unknown merge op!"); // Codegen X & Y as: @@ -1118,10 +1080,10 @@ void SelectionDAGBuilder::FindMergedConditions(Value *Cond, // This requires creation of TmpBB after CurBB. // Emit the LHS condition. - FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, Opc); + FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc); // Emit the RHS condition into TmpBB. - FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc); + FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc); } } @@ -1156,19 +1118,21 @@ SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){ return true; } -void SelectionDAGBuilder::visitBr(BranchInst &I) { +void SelectionDAGBuilder::visitBr(const BranchInst &I) { + MachineBasicBlock *BrMBB = FuncInfo.MBBMap[I.getParent()]; + // Update machine-CFG edges. MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; // Figure out which block is immediately after the current one. MachineBasicBlock *NextBlock = 0; - MachineFunction::iterator BBI = CurMBB; + MachineFunction::iterator BBI = BrMBB; if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; if (I.isUnconditional()) { // Update machine-CFG edges. - CurMBB->addSuccessor(Succ0MBB); + BrMBB->addSuccessor(Succ0MBB); // If this is not a fall-through branch, emit the branch. if (Succ0MBB != NextBlock) @@ -1181,7 +1145,7 @@ void SelectionDAGBuilder::visitBr(BranchInst &I) { // If this condition is one of the special cases we handle, do special stuff // now. - Value *CondVal = I.getCondition(); + const Value *CondVal = I.getCondition(); MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)]; // If this is a series of conditions that are or'd or and'd together, emit @@ -1199,15 +1163,16 @@ void SelectionDAGBuilder::visitBr(BranchInst &I) { // cmp D, E // jle foo // - if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { + if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { if (BOp->hasOneUse() && (BOp->getOpcode() == Instruction::And || BOp->getOpcode() == Instruction::Or)) { - FindMergedConditions(BOp, Succ0MBB, Succ1MBB, CurMBB, BOp->getOpcode()); + FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, + BOp->getOpcode()); // If the compares in later blocks need to use values not currently // exported from this block, export them now. This block should always // be the first entry. - assert(SwitchCases[0].ThisBB == CurMBB && "Unexpected lowering!"); + assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!"); // Allow some cases to be rejected. if (ShouldEmitAsBranches(SwitchCases)) { @@ -1217,7 +1182,7 @@ void SelectionDAGBuilder::visitBr(BranchInst &I) { } // Emit the branch for this block. - visitSwitchCase(SwitchCases[0]); + visitSwitchCase(SwitchCases[0], BrMBB); SwitchCases.erase(SwitchCases.begin()); return; } @@ -1233,16 +1198,17 @@ void SelectionDAGBuilder::visitBr(BranchInst &I) { // Create a CaseBlock record representing this branch. CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()), - NULL, Succ0MBB, Succ1MBB, CurMBB); + NULL, Succ0MBB, Succ1MBB, BrMBB); // Use visitSwitchCase to actually insert the fast branch sequence for this // cond branch. - visitSwitchCase(CB); + visitSwitchCase(CB, BrMBB); } /// visitSwitchCase - Emits the necessary code to represent a single node in /// the binary search tree resulting from lowering a switch instruction. -void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB) { +void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, + MachineBasicBlock *SwitchBB) { SDValue Cond; SDValue CondLHS = getValue(CB.CmpLHS); DebugLoc dl = getCurDebugLoc(); @@ -1281,13 +1247,13 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB) { } // Update successor info - CurMBB->addSuccessor(CB.TrueBB); - CurMBB->addSuccessor(CB.FalseBB); + SwitchBB->addSuccessor(CB.TrueBB); + SwitchBB->addSuccessor(CB.FalseBB); // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. MachineBasicBlock *NextBlock = 0; - MachineFunction::iterator BBI = CurMBB; + MachineFunction::iterator BBI = SwitchBB; if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; @@ -1305,11 +1271,11 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB) { // If the branch was constant folded, fix up the CFG. if (BrCond.getOpcode() == ISD::BR) { - CurMBB->removeSuccessor(CB.FalseBB); + SwitchBB->removeSuccessor(CB.FalseBB); } else { // Otherwise, go ahead and insert the false branch. if (BrCond == getControlRoot()) - CurMBB->removeSuccessor(CB.TrueBB); + SwitchBB->removeSuccessor(CB.TrueBB); if (CB.FalseBB != NextBlock) BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, @@ -1336,7 +1302,8 @@ void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) { /// visitJumpTableHeader - This function emits necessary code to produce index /// in the JumpTable from switch case. void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, - JumpTableHeader &JTH) { + JumpTableHeader &JTH, + MachineBasicBlock *SwitchBB) { // Subtract the lowest switch case value from the value being switched on and // conditional branch to default mbb if the result is greater than the // difference between smallest and largest cases. @@ -1368,7 +1335,7 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. MachineBasicBlock *NextBlock = 0; - MachineFunction::iterator BBI = CurMBB; + MachineFunction::iterator BBI = SwitchBB; if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; @@ -1386,7 +1353,8 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, /// visitBitTestHeader - This function emits necessary code to produce value /// suitable for "bit tests" -void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B) { +void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, + MachineBasicBlock *SwitchBB) { // Subtract the minimum value SDValue SwitchOp = getValue(B.SValue); EVT VT = SwitchOp.getValueType(); @@ -1409,14 +1377,14 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B) { // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. MachineBasicBlock *NextBlock = 0; - MachineFunction::iterator BBI = CurMBB; + MachineFunction::iterator BBI = SwitchBB; if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; MachineBasicBlock* MBB = B.Cases[0].ThisBB; - CurMBB->addSuccessor(B.Default); - CurMBB->addSuccessor(MBB); + SwitchBB->addSuccessor(B.Default); + SwitchBB->addSuccessor(MBB); SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), MVT::Other, CopyTo, RangeCmp, @@ -1432,7 +1400,8 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B) { /// visitBitTestCase - this function produces one "bit test" void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB, unsigned Reg, - BitTestCase &B) { + BitTestCase &B, + MachineBasicBlock *SwitchBB) { // Make desired shift SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg, TLI.getPointerTy()); @@ -1450,8 +1419,8 @@ void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB, AndOp, DAG.getConstant(0, TLI.getPointerTy()), ISD::SETNE); - CurMBB->addSuccessor(B.TargetBB); - CurMBB->addSuccessor(NextMBB); + SwitchBB->addSuccessor(B.TargetBB); + SwitchBB->addSuccessor(NextMBB); SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), MVT::Other, getControlRoot(), @@ -1460,7 +1429,7 @@ void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB, // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. MachineBasicBlock *NextBlock = 0; - MachineFunction::iterator BBI = CurMBB; + MachineFunction::iterator BBI = SwitchBB; if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; @@ -1471,7 +1440,9 @@ void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB, DAG.setRoot(BrAnd); } -void SelectionDAGBuilder::visitInvoke(InvokeInst &I) { +void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { + MachineBasicBlock *InvokeMBB = FuncInfo.MBBMap[I.getParent()]; + // Retrieve successors. MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)]; MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)]; @@ -1487,8 +1458,8 @@ void SelectionDAGBuilder::visitInvoke(InvokeInst &I) { CopyToExportRegsIfNeeded(&I); // Update successor info - CurMBB->addSuccessor(Return); - CurMBB->addSuccessor(LandingPad); + InvokeMBB->addSuccessor(Return); + InvokeMBB->addSuccessor(LandingPad); // Drop into normal successor. DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), @@ -1496,15 +1467,16 @@ void SelectionDAGBuilder::visitInvoke(InvokeInst &I) { DAG.getBasicBlock(Return))); } -void SelectionDAGBuilder::visitUnwind(UnwindInst &I) { +void SelectionDAGBuilder::visitUnwind(const UnwindInst &I) { } /// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for /// small case ranges). bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, CaseRecVector& WorkList, - Value* SV, - MachineBasicBlock* Default) { + const Value* SV, + MachineBasicBlock *Default, + MachineBasicBlock *SwitchBB) { Case& BackCase = *(CR.Range.second-1); // Size is the number of Cases represented by this range. @@ -1557,7 +1529,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, FallThrough = Default; } - Value *RHS, *LHS, *MHS; + const Value *RHS, *LHS, *MHS; ISD::CondCode CC; if (I->High == I->Low) { // This is just small small case range :) containing exactly 1 case @@ -1573,8 +1545,8 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, // code into the current block. Otherwise, push the CaseBlock onto the // vector to be later processed by SDISel, and insert the node's MBB // before the next MBB. - if (CurBlock == CurMBB) - visitSwitchCase(CB); + if (CurBlock == SwitchBB) + visitSwitchCase(CB, SwitchBB); else SwitchCases.push_back(CB); @@ -1600,8 +1572,9 @@ static APInt ComputeRange(const APInt &First, const APInt &Last) { /// handleJTSwitchCase - Emit jumptable for current switch case range bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR, CaseRecVector& WorkList, - Value* SV, - MachineBasicBlock* Default) { + const Value* SV, + MachineBasicBlock* Default, + MachineBasicBlock *SwitchBB) { Case& FrontCase = *CR.Range.first; Case& BackCase = *(CR.Range.second-1); @@ -1613,7 +1586,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR, I!=E; ++I) TSize += I->size(); - if (!areJTsAllowed(TLI) || TSize.ult(APInt(First.getBitWidth(), 4))) + if (!areJTsAllowed(TLI) || TSize.ult(4)) return false; APInt Range = ComputeRange(First, Last); @@ -1682,9 +1655,9 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR, // Set the jump table information so that we can codegen it as a second // MachineBasicBlock JumpTable JT(-1U, JTI, JumpTableBB, Default); - JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == CurMBB)); - if (CR.CaseBB == CurMBB) - visitJumpTableHeader(JT, JTH); + JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == SwitchBB)); + if (CR.CaseBB == SwitchBB) + visitJumpTableHeader(JT, JTH, SwitchBB); JTCases.push_back(JumpTableBlock(JTH, JT)); @@ -1695,8 +1668,9 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR, /// 2 subtrees. bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, CaseRecVector& WorkList, - Value* SV, - MachineBasicBlock* Default) { + const Value* SV, + MachineBasicBlock *Default, + MachineBasicBlock *SwitchBB) { // Get the MachineFunction which holds the current MBB. This is used when // inserting any additional MBBs necessary to represent the switch. MachineFunction *CurMF = FuncInfo.MF; @@ -1810,8 +1784,8 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, // Otherwise, branch to LHS. CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB); - if (CR.CaseBB == CurMBB) - visitSwitchCase(CB); + if (CR.CaseBB == SwitchBB) + visitSwitchCase(CB, SwitchBB); else SwitchCases.push_back(CB); @@ -1823,8 +1797,9 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, /// of masks and emit bit tests with these masks. bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, CaseRecVector& WorkList, - Value* SV, - MachineBasicBlock* Default){ + const Value* SV, + MachineBasicBlock* Default, + MachineBasicBlock *SwitchBB){ EVT PTy = TLI.getPointerTy(); unsigned IntPtrBits = PTy.getSizeInBits(); @@ -1867,7 +1842,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, << "Low bound: " << minValue << '\n' << "High bound: " << maxValue << '\n'); - if (cmpRange.uge(APInt(cmpRange.getBitWidth(), IntPtrBits)) || + if (cmpRange.uge(IntPtrBits) || (!(Dests.size() == 1 && numCmps >= 3) && !(Dests.size() == 2 && numCmps >= 5) && !(Dests.size() >= 3 && numCmps >= 6))) @@ -1879,8 +1854,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, // Optimize the case where all the case values fit in a // word without having to subtract minValue. In this case, // we can optimize away the subtraction. - if (minValue.isNonNegative() && - maxValue.slt(APInt(maxValue.getBitWidth(), IntPtrBits))) { + if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) { cmpRange = maxValue; } else { lowBound = minValue; @@ -1940,11 +1914,11 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, } BitTestBlock BTB(lowBound, cmpRange, SV, - -1U, (CR.CaseBB == CurMBB), + -1U, (CR.CaseBB == SwitchBB), CR.CaseBB, Default, BTC); - if (CR.CaseBB == CurMBB) - visitBitTestHeader(BTB); + if (CR.CaseBB == SwitchBB) + visitBitTestHeader(BTB, SwitchBB); BitTestCases.push_back(BTB); @@ -1994,7 +1968,9 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, return numCmps; } -void SelectionDAGBuilder::visitSwitch(SwitchInst &SI) { +void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { + MachineBasicBlock *SwitchMBB = FuncInfo.MBBMap[SI.getParent()]; + // Figure out which block is immediately after the current one. MachineBasicBlock *NextBlock = 0; MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()]; @@ -2005,7 +1981,7 @@ void SelectionDAGBuilder::visitSwitch(SwitchInst &SI) { // Update machine-CFG edges. // If this is not a fall-through branch, emit the branch. - CurMBB->addSuccessor(Default); + SwitchMBB->addSuccessor(Default); if (Default != NextBlock) DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, getControlRoot(), @@ -2026,38 +2002,41 @@ void SelectionDAGBuilder::visitSwitch(SwitchInst &SI) { // Get the Value to be switched on and default basic blocks, which will be // inserted into CaseBlock records, representing basic blocks in the binary // search tree. - Value *SV = SI.getOperand(0); + const Value *SV = SI.getOperand(0); // Push the initial CaseRec onto the worklist CaseRecVector WorkList; - WorkList.push_back(CaseRec(CurMBB,0,0,CaseRange(Cases.begin(),Cases.end()))); + WorkList.push_back(CaseRec(SwitchMBB,0,0, + CaseRange(Cases.begin(),Cases.end()))); while (!WorkList.empty()) { // Grab a record representing a case range to process off the worklist CaseRec CR = WorkList.back(); WorkList.pop_back(); - if (handleBitTestsSwitchCase(CR, WorkList, SV, Default)) + if (handleBitTestsSwitchCase(CR, WorkList, SV, Default, SwitchMBB)) continue; // If the range has few cases (two or less) emit a series of specific // tests. - if (handleSmallSwitchRange(CR, WorkList, SV, Default)) + if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB)) continue; // If the switch has more than 5 blocks, and at least 40% dense, and the // target supports indirect branches, then emit a jump table rather than // lowering the switch to a binary tree of conditional branches. - if (handleJTSwitchCase(CR, WorkList, SV, Default)) + if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB)) continue; // Emit binary tree. We need to pick a pivot, and push left and right ranges // onto the worklist. Leafs are handled via handleSmallSwitchRange() call. - handleBTSplitSwitchCase(CR, WorkList, SV, Default); + handleBTSplitSwitchCase(CR, WorkList, SV, Default, SwitchMBB); } } -void SelectionDAGBuilder::visitIndirectBr(IndirectBrInst &I) { +void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { + MachineBasicBlock *IndirectBrMBB = FuncInfo.MBBMap[I.getParent()]; + // Update machine-CFG edges with unique successors. SmallVector<BasicBlock*, 32> succs; succs.reserve(I.getNumSuccessors()); @@ -2066,14 +2045,14 @@ void SelectionDAGBuilder::visitIndirectBr(IndirectBrInst &I) { array_pod_sort(succs.begin(), succs.end()); succs.erase(std::unique(succs.begin(), succs.end()), succs.end()); for (unsigned i = 0, e = succs.size(); i != e; ++i) - CurMBB->addSuccessor(FuncInfo.MBBMap[succs[i]]); + IndirectBrMBB->addSuccessor(FuncInfo.MBBMap[succs[i]]); DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(), MVT::Other, getControlRoot(), getValue(I.getAddress()))); } -void SelectionDAGBuilder::visitFSub(User &I) { +void SelectionDAGBuilder::visitFSub(const User &I) { // -0.0 - X --> fneg const Type *Ty = I.getType(); if (Ty->isVectorTy()) { @@ -2103,14 +2082,14 @@ void SelectionDAGBuilder::visitFSub(User &I) { visitBinary(I, ISD::FSUB); } -void SelectionDAGBuilder::visitBinary(User &I, unsigned OpCode) { +void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); setValue(&I, DAG.getNode(OpCode, getCurDebugLoc(), Op1.getValueType(), Op1, Op2)); } -void SelectionDAGBuilder::visitShift(User &I, unsigned Opcode) { +void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); if (!I.getType()->isVectorTy() && @@ -2144,11 +2123,11 @@ void SelectionDAGBuilder::visitShift(User &I, unsigned Opcode) { Op1.getValueType(), Op1, Op2)); } -void SelectionDAGBuilder::visitICmp(User &I) { +void SelectionDAGBuilder::visitICmp(const User &I) { ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE; - if (ICmpInst *IC = dyn_cast<ICmpInst>(&I)) + if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I)) predicate = IC->getPredicate(); - else if (ConstantExpr *IC = dyn_cast<ConstantExpr>(&I)) + else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I)) predicate = ICmpInst::Predicate(IC->getPredicate()); SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); @@ -2158,11 +2137,11 @@ void SelectionDAGBuilder::visitICmp(User &I) { setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode)); } -void SelectionDAGBuilder::visitFCmp(User &I) { +void SelectionDAGBuilder::visitFCmp(const User &I) { FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE; - if (FCmpInst *FC = dyn_cast<FCmpInst>(&I)) + if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I)) predicate = FC->getPredicate(); - else if (ConstantExpr *FC = dyn_cast<ConstantExpr>(&I)) + else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I)) predicate = FCmpInst::Predicate(FC->getPredicate()); SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); @@ -2171,7 +2150,7 @@ void SelectionDAGBuilder::visitFCmp(User &I) { setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition)); } -void SelectionDAGBuilder::visitSelect(User &I) { +void SelectionDAGBuilder::visitSelect(const User &I) { SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(TLI, I.getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); @@ -2196,14 +2175,14 @@ void SelectionDAGBuilder::visitSelect(User &I) { &Values[0], NumValues)); } -void SelectionDAGBuilder::visitTrunc(User &I) { +void SelectionDAGBuilder::visitTrunc(const User &I) { // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N)); } -void SelectionDAGBuilder::visitZExt(User &I) { +void SelectionDAGBuilder::visitZExt(const User &I) { // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). // ZExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); @@ -2211,7 +2190,7 @@ void SelectionDAGBuilder::visitZExt(User &I) { setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N)); } -void SelectionDAGBuilder::visitSExt(User &I) { +void SelectionDAGBuilder::visitSExt(const User &I) { // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest). // SExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); @@ -2219,7 +2198,7 @@ void SelectionDAGBuilder::visitSExt(User &I) { setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N)); } -void SelectionDAGBuilder::visitFPTrunc(User &I) { +void SelectionDAGBuilder::visitFPTrunc(const User &I) { // FPTrunc is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); @@ -2227,42 +2206,42 @@ void SelectionDAGBuilder::visitFPTrunc(User &I) { DestVT, N, DAG.getIntPtrConstant(0))); } -void SelectionDAGBuilder::visitFPExt(User &I){ +void SelectionDAGBuilder::visitFPExt(const User &I){ // FPTrunc is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N)); } -void SelectionDAGBuilder::visitFPToUI(User &I) { +void SelectionDAGBuilder::visitFPToUI(const User &I) { // FPToUI is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N)); } -void SelectionDAGBuilder::visitFPToSI(User &I) { +void SelectionDAGBuilder::visitFPToSI(const User &I) { // FPToSI is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N)); } -void SelectionDAGBuilder::visitUIToFP(User &I) { +void SelectionDAGBuilder::visitUIToFP(const User &I) { // UIToFP is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N)); } -void SelectionDAGBuilder::visitSIToFP(User &I){ +void SelectionDAGBuilder::visitSIToFP(const User &I){ // SIToFP is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N)); } -void SelectionDAGBuilder::visitPtrToInt(User &I) { +void SelectionDAGBuilder::visitPtrToInt(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); @@ -2271,7 +2250,7 @@ void SelectionDAGBuilder::visitPtrToInt(User &I) { setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT)); } -void SelectionDAGBuilder::visitIntToPtr(User &I) { +void SelectionDAGBuilder::visitIntToPtr(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); @@ -2280,7 +2259,7 @@ void SelectionDAGBuilder::visitIntToPtr(User &I) { setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT)); } -void SelectionDAGBuilder::visitBitCast(User &I) { +void SelectionDAGBuilder::visitBitCast(const User &I) { SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); @@ -2293,7 +2272,7 @@ void SelectionDAGBuilder::visitBitCast(User &I) { setValue(&I, N); // noop cast. } -void SelectionDAGBuilder::visitInsertElement(User &I) { +void SelectionDAGBuilder::visitInsertElement(const User &I) { SDValue InVec = getValue(I.getOperand(0)); SDValue InVal = getValue(I.getOperand(1)); SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), @@ -2304,7 +2283,7 @@ void SelectionDAGBuilder::visitInsertElement(User &I) { InVec, InVal, InIdx)); } -void SelectionDAGBuilder::visitExtractElement(User &I) { +void SelectionDAGBuilder::visitExtractElement(const User &I) { SDValue InVec = getValue(I.getOperand(0)); SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), TLI.getPointerTy(), @@ -2323,7 +2302,7 @@ static bool SequentialMask(SmallVectorImpl<int> &Mask, unsigned SIndx) { return true; } -void SelectionDAGBuilder::visitShuffleVector(User &I) { +void SelectionDAGBuilder::visitShuffleVector(const User &I) { SmallVector<int, 8> Mask; SDValue Src1 = getValue(I.getOperand(0)); SDValue Src2 = getValue(I.getOperand(1)); @@ -2504,7 +2483,7 @@ void SelectionDAGBuilder::visitShuffleVector(User &I) { VT, &Ops[0], Ops.size())); } -void SelectionDAGBuilder::visitInsertValue(InsertValueInst &I) { +void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { const Value *Op0 = I.getOperand(0); const Value *Op1 = I.getOperand(1); const Type *AggTy = I.getType(); @@ -2545,7 +2524,7 @@ void SelectionDAGBuilder::visitInsertValue(InsertValueInst &I) { &Values[0], NumAggValues)); } -void SelectionDAGBuilder::visitExtractValue(ExtractValueInst &I) { +void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { const Value *Op0 = I.getOperand(0); const Type *AggTy = Op0->getType(); const Type *ValTy = I.getType(); @@ -2573,13 +2552,13 @@ void SelectionDAGBuilder::visitExtractValue(ExtractValueInst &I) { &Values[0], NumValValues)); } -void SelectionDAGBuilder::visitGetElementPtr(User &I) { +void SelectionDAGBuilder::visitGetElementPtr(const User &I) { SDValue N = getValue(I.getOperand(0)); const Type *Ty = I.getOperand(0)->getType(); - for (GetElementPtrInst::op_iterator OI = I.op_begin()+1, E = I.op_end(); + for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end(); OI != E; ++OI) { - Value *Idx = *OI; + const Value *Idx = *OI; if (const StructType *StTy = dyn_cast<StructType>(Ty)) { unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); if (Field) { @@ -2599,7 +2578,7 @@ void SelectionDAGBuilder::visitGetElementPtr(User &I) { Ty = cast<SequentialType>(Ty)->getElementType(); // If this is a constant subscript, handle it quickly. - if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { + if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { if (CI->getZExtValue() == 0) continue; uint64_t Offs = TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); @@ -2650,7 +2629,7 @@ void SelectionDAGBuilder::visitGetElementPtr(User &I) { setValue(&I, N); } -void SelectionDAGBuilder::visitAlloca(AllocaInst &I) { +void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { // If this is a fixed sized alloca in the entry block of the function, // allocate it statically on the stack. if (FuncInfo.StaticAllocaMap.count(&I)) @@ -2674,8 +2653,7 @@ void SelectionDAGBuilder::visitAlloca(AllocaInst &I) { // Handle alignment. If the requested alignment is less than or equal to // the stack alignment, ignore it. If the size is greater than or equal to // the stack alignment, we note this in the DYNAMIC_STACKALLOC node. - unsigned StackAlign = - TLI.getTargetMachine().getFrameInfo()->getStackAlignment(); + unsigned StackAlign = TM.getFrameInfo()->getStackAlignment(); if (Align <= StackAlign) Align = 0; @@ -2702,7 +2680,7 @@ void SelectionDAGBuilder::visitAlloca(AllocaInst &I) { FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(); } -void SelectionDAGBuilder::visitLoad(LoadInst &I) { +void SelectionDAGBuilder::visitLoad(const LoadInst &I) { const Value *SV = I.getOperand(0); SDValue Ptr = getValue(SV); @@ -2762,9 +2740,9 @@ void SelectionDAGBuilder::visitLoad(LoadInst &I) { &Values[0], NumValues)); } -void SelectionDAGBuilder::visitStore(StoreInst &I) { - Value *SrcV = I.getOperand(0); - Value *PtrV = I.getOperand(1); +void SelectionDAGBuilder::visitStore(const StoreInst &I) { + const Value *SrcV = I.getOperand(0); + const Value *PtrV = I.getOperand(1); SmallVector<EVT, 4> ValueVTs; SmallVector<uint64_t, 4> Offsets; @@ -2801,7 +2779,7 @@ void SelectionDAGBuilder::visitStore(StoreInst &I) { /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC /// node. -void SelectionDAGBuilder::visitTargetIntrinsic(CallInst &I, +void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic) { bool HasChain = !I.doesNotAccessMemory(); bool OnlyLoad = HasChain && I.onlyReadsMemory(); @@ -2927,7 +2905,8 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt) { /// visitIntrinsicCall: I is a call instruction /// Op is the associated NodeType for I const char * -SelectionDAGBuilder::implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op) { +SelectionDAGBuilder::implVisitBinaryAtomic(const CallInst& I, + ISD::NodeType Op) { SDValue Root = getRoot(); SDValue L = DAG.getAtomic(Op, getCurDebugLoc(), @@ -2943,7 +2922,7 @@ SelectionDAGBuilder::implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op) { // implVisitAluOverflow - Lower arithmetic overflow instrinsics. const char * -SelectionDAGBuilder::implVisitAluOverflow(CallInst &I, ISD::NodeType Op) { +SelectionDAGBuilder::implVisitAluOverflow(const CallInst &I, ISD::NodeType Op) { SDValue Op1 = getValue(I.getOperand(1)); SDValue Op2 = getValue(I.getOperand(2)); @@ -2955,7 +2934,7 @@ SelectionDAGBuilder::implVisitAluOverflow(CallInst &I, ISD::NodeType Op) { /// visitExp - Lower an exp intrinsic. Handles the special sequences for /// limited-precision mode. void -SelectionDAGBuilder::visitExp(CallInst &I) { +SelectionDAGBuilder::visitExp(const CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); @@ -3081,7 +3060,7 @@ SelectionDAGBuilder::visitExp(CallInst &I) { /// visitLog - Lower a log intrinsic. Handles the special sequences for /// limited-precision mode. void -SelectionDAGBuilder::visitLog(CallInst &I) { +SelectionDAGBuilder::visitLog(const CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); @@ -3191,7 +3170,7 @@ SelectionDAGBuilder::visitLog(CallInst &I) { /// visitLog2 - Lower a log2 intrinsic. Handles the special sequences for /// limited-precision mode. void -SelectionDAGBuilder::visitLog2(CallInst &I) { +SelectionDAGBuilder::visitLog2(const CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); @@ -3300,7 +3279,7 @@ SelectionDAGBuilder::visitLog2(CallInst &I) { /// visitLog10 - Lower a log10 intrinsic. Handles the special sequences for /// limited-precision mode. void -SelectionDAGBuilder::visitLog10(CallInst &I) { +SelectionDAGBuilder::visitLog10(const CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); @@ -3402,7 +3381,7 @@ SelectionDAGBuilder::visitLog10(CallInst &I) { /// visitExp2 - Lower an exp2 intrinsic. Handles the special sequences for /// limited-precision mode. void -SelectionDAGBuilder::visitExp2(CallInst &I) { +SelectionDAGBuilder::visitExp2(const CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); @@ -3516,9 +3495,9 @@ SelectionDAGBuilder::visitExp2(CallInst &I) { /// visitPow - Lower a pow intrinsic. Handles the special sequences for /// limited-precision mode with x == 10.0f. void -SelectionDAGBuilder::visitPow(CallInst &I) { +SelectionDAGBuilder::visitPow(const CallInst &I) { SDValue result; - Value *Val = I.getOperand(1); + const Value *Val = I.getOperand(1); DebugLoc dl = getCurDebugLoc(); bool IsExp10 = false; @@ -3664,7 +3643,7 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS, if (Val == 0) return DAG.getConstantFP(1.0, LHS.getValueType()); - Function *F = DAG.getMachineFunction().getFunction(); + const Function *F = DAG.getMachineFunction().getFunction(); if (!F->hasFnAttr(Attribute::OptimizeForSize) || // If optimizing for size, don't insert too many multiplies. This // inserts up to 5 multiplies. @@ -3700,12 +3679,58 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS, return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS); } +/// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function +/// argument, create the corresponding DBG_VALUE machine instruction for it now. +/// At the end of instruction selection, they will be inserted to the entry BB. +bool +SelectionDAGBuilder::EmitFuncArgumentDbgValue(const DbgValueInst &DI, + const Value *V, MDNode *Variable, + uint64_t Offset, + const SDValue &N) { + if (!isa<Argument>(V)) + return false; + + MachineFunction &MF = DAG.getMachineFunction(); + // Ignore inlined function arguments here. + DIVariable DV(Variable); + if (DV.isInlinedFnArgument(MF.getFunction())) + return false; + + MachineBasicBlock *MBB = FuncInfo.MBBMap[DI.getParent()]; + if (MBB != &MF.front()) + return false; + + unsigned Reg = 0; + if (N.getOpcode() == ISD::CopyFromReg) { + Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg(); + if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) { + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + unsigned PR = RegInfo.getLiveInPhysReg(Reg); + if (PR) + Reg = PR; + } + } + + if (!Reg) { + DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); + if (VMI == FuncInfo.ValueMap.end()) + return false; + Reg = VMI->second; + } + + const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo(); + MachineInstrBuilder MIB = BuildMI(MF, getCurDebugLoc(), + TII->get(TargetOpcode::DBG_VALUE)) + .addReg(Reg, RegState::Debug).addImm(Offset).addMetadata(Variable); + FuncInfo.ArgDbgValues.push_back(&*MIB); + return true; +} /// visitIntrinsicCall - Lower the call to the specified intrinsic function. If /// we want to emit this as a call to a named external function, return the name /// otherwise lower it and return null. const char * -SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { +SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DebugLoc dl = getCurDebugLoc(); SDValue Res; @@ -3792,44 +3817,76 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { return 0; } case Intrinsic::dbg_declare: { - // FIXME: currently, we get here only if OptLevel != CodeGenOpt::None. - // The real handling of this intrinsic is in FastISel. - if (OptLevel != CodeGenOpt::None) - // FIXME: Variable debug info is not supported here. - return 0; - DbgDeclareInst &DI = cast<DbgDeclareInst>(I); + const DbgDeclareInst &DI = cast<DbgDeclareInst>(I); if (!DIDescriptor::ValidDebugInfo(DI.getVariable(), CodeGenOpt::None)) return 0; MDNode *Variable = DI.getVariable(); - Value *Address = DI.getAddress(); + // Parameters are handled specially. + bool isParameter = + DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable; + const Value *Address = DI.getAddress(); if (!Address) return 0; - if (BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) + if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) Address = BCI->getOperand(0); - AllocaInst *AI = dyn_cast<AllocaInst>(Address); - // Don't handle byval struct arguments or VLAs, for example. - if (!AI) - return 0; - DenseMap<const AllocaInst*, int>::iterator SI = - FuncInfo.StaticAllocaMap.find(AI); - if (SI == FuncInfo.StaticAllocaMap.end()) - return 0; // VLAs. - int FI = SI->second; + const AllocaInst *AI = dyn_cast<AllocaInst>(Address); + if (AI) { + // Don't handle byval arguments or VLAs, for example. + // Non-byval arguments are handled here (they refer to the stack temporary + // alloca at this point). + DenseMap<const AllocaInst*, int>::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + if (SI == FuncInfo.StaticAllocaMap.end()) + return 0; // VLAs. + int FI = SI->second; + + MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); + if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo()) + MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc()); + } - MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); - if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo()) - MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc()); + // Build an entry in DbgOrdering. Debug info input nodes get an SDNodeOrder + // but do not always have a corresponding SDNode built. The SDNodeOrder + // absolute, but not relative, values are different depending on whether + // debug info exists. + ++SDNodeOrder; + SDValue &N = NodeMap[Address]; + SDDbgValue *SDV; + if (N.getNode()) { + if (isParameter && !AI) { + FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode()); + if (FINode) + // Byval parameter. We have a frame index at this point. + SDV = DAG.getDbgValue(Variable, FINode->getIndex(), + 0, dl, SDNodeOrder); + else + // Can't do anything with other non-AI cases yet. This might be a + // parameter of a callee function that got inlined, for example. + return 0; + } else if (AI) + SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(), + 0, dl, SDNodeOrder); + else + // Can't do anything with other non-AI cases yet. + return 0; + DAG.AddDbgValue(SDV, N.getNode(), isParameter); + } else { + // This isn't useful, but it shows what we're missing. + SDV = DAG.getDbgValue(Variable, UndefValue::get(Address->getType()), + 0, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, 0, isParameter); + } return 0; } case Intrinsic::dbg_value: { - DbgValueInst &DI = cast<DbgValueInst>(I); + const DbgValueInst &DI = cast<DbgValueInst>(I); if (!DIDescriptor::ValidDebugInfo(DI.getVariable(), CodeGenOpt::None)) return 0; MDNode *Variable = DI.getVariable(); uint64_t Offset = DI.getOffset(); - Value *V = DI.getValue(); + const Value *V = DI.getValue(); if (!V) return 0; @@ -3838,26 +3895,31 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { // absolute, but not relative, values are different depending on whether // debug info exists. ++SDNodeOrder; + SDDbgValue *SDV; if (isa<ConstantInt>(V) || isa<ConstantFP>(V)) { - DAG.AddDbgValue(DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder)); + SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, 0, false); } else { SDValue &N = NodeMap[V]; - if (N.getNode()) - DAG.AddDbgValue(DAG.getDbgValue(Variable, N.getNode(), - N.getResNo(), Offset, dl, SDNodeOrder), - N.getNode()); - else + if (N.getNode()) { + if (!EmitFuncArgumentDbgValue(DI, V, Variable, Offset, N)) { + SDV = DAG.getDbgValue(Variable, N.getNode(), + N.getResNo(), Offset, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, N.getNode(), false); + } + } else { // We may expand this to cover more cases. One case where we have no // data available is an unreferenced parameter; we need this fallback. - DAG.AddDbgValue(DAG.getDbgValue(Variable, - UndefValue::get(V->getType()), - Offset, dl, SDNodeOrder)); + SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()), + Offset, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, 0, false); + } } // Build a debug info table entry. - if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) + if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V)) V = BCI->getOperand(0); - AllocaInst *AI = dyn_cast<AllocaInst>(V); + const AllocaInst *AI = dyn_cast<AllocaInst>(V); // Don't handle byval struct arguments or VLAs, for example. if (!AI) return 0; @@ -3874,7 +3936,8 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { } case Intrinsic::eh_exception: { // Insert the EXCEPTIONADDR instruction. - assert(CurMBB->isLandingPad() &&"Call to eh.exception not in landing pad!"); + assert(FuncInfo.MBBMap[I.getParent()]->isLandingPad() && + "Call to eh.exception not in landing pad!"); SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); SDValue Ops[1]; Ops[0] = DAG.getRoot(); @@ -3885,16 +3948,17 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { } case Intrinsic::eh_selector: { + MachineBasicBlock *CallMBB = FuncInfo.MBBMap[I.getParent()]; MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); - if (CurMBB->isLandingPad()) - AddCatchInfo(I, &MMI, CurMBB); + if (CallMBB->isLandingPad()) + AddCatchInfo(I, &MMI, CallMBB); else { #ifndef NDEBUG FuncInfo.CatchInfoLost.insert(&I); #endif // FIXME: Mark exception selector register as live in. Hack for PR1508. unsigned Reg = TLI.getExceptionSelectorRegister(); - if (Reg) CurMBB->addLiveIn(Reg); + if (Reg) FuncInfo.MBBMap[I.getParent()]->addLiveIn(Reg); } // Insert the EHSELECTION instruction. @@ -3977,7 +4041,7 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { case Intrinsic::convertuu: Code = ISD::CVT_UU; break; } EVT DestVT = TLI.getValueType(I.getType()); - Value *Op1 = I.getOperand(1); + const Value *Op1 = I.getOperand(1); Res = DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1), DAG.getValueType(DestVT), DAG.getValueType(getValue(Op1).getValueType()), @@ -4146,8 +4210,8 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { } case Intrinsic::gcroot: if (GFI) { - Value *Alloca = I.getOperand(1); - Constant *TypeMap = cast<Constant>(I.getOperand(2)); + const Value *Alloca = I.getOperand(1); + const Constant *TypeMap = cast<Constant>(I.getOperand(2)); FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode()); GFI->addStackRoot(FI->getIndex(), TypeMap); @@ -4244,93 +4308,7 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { } } -/// Test if the given instruction is in a position to be optimized -/// with a tail-call. This roughly means that it's in a block with -/// a return and there's nothing that needs to be scheduled -/// between it and the return. -/// -/// This function only tests target-independent requirements. -static bool -isInTailCallPosition(CallSite CS, Attributes CalleeRetAttr, - const TargetLowering &TLI) { - const Instruction *I = CS.getInstruction(); - const BasicBlock *ExitBB = I->getParent(); - const TerminatorInst *Term = ExitBB->getTerminator(); - const ReturnInst *Ret = dyn_cast<ReturnInst>(Term); - const Function *F = ExitBB->getParent(); - - // The block must end in a return statement or unreachable. - // - // FIXME: Decline tailcall if it's not guaranteed and if the block ends in - // an unreachable, for now. The way tailcall optimization is currently - // implemented means it will add an epilogue followed by a jump. That is - // not profitable. Also, if the callee is a special function (e.g. - // longjmp on x86), it can end up causing miscompilation that has not - // been fully understood. - if (!Ret && - (!GuaranteedTailCallOpt || !isa<UnreachableInst>(Term))) return false; - - // If I will have a chain, make sure no other instruction that will have a - // chain interposes between I and the return. - if (I->mayHaveSideEffects() || I->mayReadFromMemory() || - !I->isSafeToSpeculativelyExecute()) - for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ; - --BBI) { - if (&*BBI == I) - break; - // Debug info intrinsics do not get in the way of tail call optimization. - if (isa<DbgInfoIntrinsic>(BBI)) - continue; - if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() || - !BBI->isSafeToSpeculativelyExecute()) - return false; - } - - // If the block ends with a void return or unreachable, it doesn't matter - // what the call's return type is. - if (!Ret || Ret->getNumOperands() == 0) return true; - - // If the return value is undef, it doesn't matter what the call's - // return type is. - if (isa<UndefValue>(Ret->getOperand(0))) return true; - - // Conservatively require the attributes of the call to match those of - // the return. Ignore noalias because it doesn't affect the call sequence. - unsigned CallerRetAttr = F->getAttributes().getRetAttributes(); - if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias) - return false; - - // It's not safe to eliminate the sign / zero extension of the return value. - if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt)) - return false; - - // Otherwise, make sure the unmodified return value of I is the return value. - for (const Instruction *U = dyn_cast<Instruction>(Ret->getOperand(0)); ; - U = dyn_cast<Instruction>(U->getOperand(0))) { - if (!U) - return false; - if (!U->hasOneUse()) - return false; - if (U == I) - break; - // Check for a truly no-op truncate. - if (isa<TruncInst>(U) && - TLI.isTruncateFree(U->getOperand(0)->getType(), U->getType())) - continue; - // Check for a truly no-op bitcast. - if (isa<BitCastInst>(U) && - (U->getOperand(0)->getType() == U->getType() || - (U->getOperand(0)->getType()->isPointerTy() && - U->getType()->isPointerTy()))) - continue; - // Otherwise it's not a true no-op. - return false; - } - - return true; -} - -void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee, +void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool isTailCall, MachineBasicBlock *LandingPad) { const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); @@ -4378,7 +4356,7 @@ void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee, RetTy = Type::getVoidTy(FTy->getContext()); } - for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); + for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { SDValue ArgNode = getValue(*i); Entry.Node = ArgNode; Entry.Ty = (*i)->getType(); @@ -4509,12 +4487,12 @@ void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee, /// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the /// value is equal or not-equal to zero. -static bool IsOnlyUsedInZeroEqualityComparison(Value *V) { - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); +static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) { + for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) { - if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI)) + if (const ICmpInst *IC = dyn_cast<ICmpInst>(*UI)) if (IC->isEquality()) - if (Constant *C = dyn_cast<Constant>(IC->getOperand(1))) + if (const Constant *C = dyn_cast<Constant>(IC->getOperand(1))) if (C->isNullValue()) continue; // Unknown instruction. @@ -4523,17 +4501,20 @@ static bool IsOnlyUsedInZeroEqualityComparison(Value *V) { return true; } -static SDValue getMemCmpLoad(Value *PtrVal, MVT LoadVT, const Type *LoadTy, +static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, + const Type *LoadTy, SelectionDAGBuilder &Builder) { // Check to see if this load can be trivially constant folded, e.g. if the // input is from a string literal. - if (Constant *LoadInput = dyn_cast<Constant>(PtrVal)) { + if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) { // Cast pointer to the type we really want to load. - LoadInput = ConstantExpr::getBitCast(LoadInput, + LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput), PointerType::getUnqual(LoadTy)); - if (Constant *LoadCst = ConstantFoldLoadFromConstPtr(LoadInput, Builder.TD)) + if (const Constant *LoadCst = + ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput), + Builder.TD)) return Builder.getValue(LoadCst); } @@ -4566,18 +4547,18 @@ static SDValue getMemCmpLoad(Value *PtrVal, MVT LoadVT, const Type *LoadTy, /// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form. /// If so, return true and lower it, otherwise return false and it will be /// lowered like a normal call. -bool SelectionDAGBuilder::visitMemCmpCall(CallInst &I) { +bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { // Verify that the prototype makes sense. int memcmp(void*,void*,size_t) if (I.getNumOperands() != 4) return false; - Value *LHS = I.getOperand(1), *RHS = I.getOperand(2); + const Value *LHS = I.getOperand(1), *RHS = I.getOperand(2); if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() || !I.getOperand(3)->getType()->isIntegerTy() || !I.getType()->isIntegerTy()) return false; - ConstantInt *Size = dyn_cast<ConstantInt>(I.getOperand(3)); + const ConstantInt *Size = dyn_cast<ConstantInt>(I.getOperand(3)); // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0 // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0 @@ -4643,11 +4624,11 @@ bool SelectionDAGBuilder::visitMemCmpCall(CallInst &I) { } -void SelectionDAGBuilder::visitCall(CallInst &I) { +void SelectionDAGBuilder::visitCall(const CallInst &I) { const char *RenameFn = 0; if (Function *F = I.getCalledFunction()) { if (F->isDeclaration()) { - const TargetIntrinsicInfo *II = TLI.getTargetMachine().getIntrinsicInfo(); + const TargetIntrinsicInfo *II = TM.getIntrinsicInfo(); if (II) { if (unsigned IID = II->getIntrinsicID(F)) { RenameFn = visitIntrinsicCall(I, IID); @@ -4871,14 +4852,13 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, /// AddInlineAsmOperands - Add this value to the specified inlineasm node /// operand list. This adds the code marker and includes the number of /// values added into it. -void RegsForValue::AddInlineAsmOperands(unsigned Code, - bool HasMatching,unsigned MatchingIdx, +void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, + unsigned MatchingIdx, SelectionDAG &DAG, std::vector<SDValue> &Ops) const { - assert(Regs.size() < (1 << 13) && "Too many inline asm outputs!"); - unsigned Flag = Code | (Regs.size() << 3); + unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size()); if (HasMatching) - Flag |= 0x80000000 | (MatchingIdx << 16); + Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx); SDValue Res = DAG.getTargetConstant(Flag, MVT::i32); Ops.push_back(Res); @@ -4994,7 +4974,7 @@ public: if (isIndirect) { const llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy); if (!PtrTy) - llvm_report_error("Indirect operand for inline asm not a pointer!"); + report_fatal_error("Indirect operand for inline asm not a pointer!"); OpTy = PtrTy->getElementType(); } @@ -5214,31 +5194,10 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, // Otherwise, we couldn't allocate enough registers for this. } -/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being -/// processed uses a memory 'm' constraint. -static bool -hasInlineAsmMemConstraint(std::vector<InlineAsm::ConstraintInfo> &CInfos, - const TargetLowering &TLI) { - for (unsigned i = 0, e = CInfos.size(); i != e; ++i) { - InlineAsm::ConstraintInfo &CI = CInfos[i]; - for (unsigned j = 0, ee = CI.Codes.size(); j != ee; ++j) { - TargetLowering::ConstraintType CType = TLI.getConstraintType(CI.Codes[j]); - if (CType == TargetLowering::C_Memory) - return true; - } - - // Indirect operand accesses access memory. - if (CI.isIndirect) - return true; - } - - return false; -} - /// visitInlineAsm - Handle a call to an InlineAsm object. /// -void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { - InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); +void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { + const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); /// ConstraintOperands - Information about all of the constraints. std::vector<SDISelAsmOperandInfo> ConstraintOperands; @@ -5274,7 +5233,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { case InlineAsm::isOutput: // Indirect outputs just consume an argument. if (OpInfo.isIndirect) { - OpInfo.CallOperandVal = CS.getArgument(ArgNo++); + OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); break; } @@ -5291,7 +5250,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { ++ResNo; break; case InlineAsm::isInput: - OpInfo.CallOperandVal = CS.getArgument(ArgNo++); + OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); break; case InlineAsm::isClobber: // Nothing to do. @@ -5304,7 +5263,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { // Strip bitcasts, if any. This mostly comes up for functions. OpInfo.CallOperandVal = OpInfo.CallOperandVal->stripPointerCasts(); - if (BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) { + if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) { OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]); } else { OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); @@ -5327,14 +5286,15 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { // error. if (OpInfo.hasMatchingInput()) { SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; + if (OpInfo.ConstraintVT != Input.ConstraintVT) { if ((OpInfo.ConstraintVT.isInteger() != Input.ConstraintVT.isInteger()) || (OpInfo.ConstraintVT.getSizeInBits() != Input.ConstraintVT.getSizeInBits())) { - llvm_report_error("Unsupported asm: input constraint" - " with a matching output constraint of incompatible" - " type!"); + report_fatal_error("Unsupported asm: input constraint" + " with a matching output constraint of" + " incompatible type!"); } Input.ConstraintVT = OpInfo.ConstraintVT; } @@ -5356,7 +5316,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { // If the operand is a float, integer, or vector constant, spill to a // constant pool entry to get its address. - Value *OpVal = OpInfo.CallOperandVal; + const Value *OpVal = OpInfo.CallOperandVal; if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) || isa<ConstantVector>(OpVal)) { OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal), @@ -5409,6 +5369,11 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), TLI.getPointerTy())); + // If we have a !srcloc metadata node associated with it, we want to attach + // this to the ultimately generated inline asm machineinstr. To do this, we + // pass in the third operand as this (potentially null) inline asm MDNode. + const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc"); + AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc)); // Loop over all of the inputs, copying the operand values into the // appropriate registers and processing the output regs. @@ -5428,8 +5393,8 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { assert(OpInfo.isIndirect && "Memory output must be indirect operand"); // Add information to the INLINEASM node to know about this output. - unsigned ResOpType = 4/*MEM*/ | (1<<3); - AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, + unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); + AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, TLI.getPointerTy())); AsmNodeOperands.push_back(OpInfo.CallOperand); break; @@ -5439,10 +5404,9 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { // Copy the output from the appropriate register. Find a register that // we can use. - if (OpInfo.AssignedRegs.Regs.empty()) { - llvm_report_error("Couldn't allocate output reg for" - " constraint '" + OpInfo.ConstraintCode + "'!"); - } + if (OpInfo.AssignedRegs.Regs.empty()) + report_fatal_error("Couldn't allocate output reg for constraint '" + + Twine(OpInfo.ConstraintCode) + "'!"); // If this is an indirect operand, store through the pointer after the // asm. @@ -5459,8 +5423,8 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { // Add information to the INLINEASM node to know that this register is // set. OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ? - 6 /* EARLYCLOBBER REGDEF */ : - 2 /* REGDEF */ , + InlineAsm::Kind_RegDefEarlyClobber : + InlineAsm::Kind_RegDef, false, 0, DAG, @@ -5477,27 +5441,30 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { // Scan until we find the definition we already emitted of this operand. // When we find it, create a RegsForValue operand. - unsigned CurOp = 2; // The first operand. + unsigned CurOp = InlineAsm::Op_FirstOperand; for (; OperandNo; --OperandNo) { // Advance to the next operand. unsigned OpFlag = cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue(); - assert(((OpFlag & 7) == 2 /*REGDEF*/ || - (OpFlag & 7) == 6 /*EARLYCLOBBER REGDEF*/ || - (OpFlag & 7) == 4 /*MEM*/) && - "Skipped past definitions?"); + assert((InlineAsm::isRegDefKind(OpFlag) || + InlineAsm::isRegDefEarlyClobberKind(OpFlag) || + InlineAsm::isMemKind(OpFlag)) && "Skipped past definitions?"); CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1; } unsigned OpFlag = cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue(); - if ((OpFlag & 7) == 2 /*REGDEF*/ - || (OpFlag & 7) == 6 /* EARLYCLOBBER REGDEF */) { + if (InlineAsm::isRegDefKind(OpFlag) || + InlineAsm::isRegDefEarlyClobberKind(OpFlag)) { // Add (OpFlag&0xffff)>>3 registers to MatchedRegs. if (OpInfo.isIndirect) { - llvm_report_error("Don't know how to handle tied indirect " - "register inputs yet!"); + // This happens on gcc/testsuite/gcc.dg/pr8788-1.c + LLVMContext &Ctx = *DAG.getContext(); + Ctx.emitError(CS.getInstruction(), "inline asm not supported yet:" + " don't know how to handle tied " + "indirect register inputs"); } + RegsForValue MatchedRegs; MatchedRegs.TLI = &TLI; MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType()); @@ -5512,22 +5479,23 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { // Use the produced MatchedRegs object to MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), Chain, &Flag); - MatchedRegs.AddInlineAsmOperands(1 /*REGUSE*/, + MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, true, OpInfo.getMatchedOperand(), DAG, AsmNodeOperands); break; - } else { - assert(((OpFlag & 7) == 4) && "Unknown matching constraint!"); - assert((InlineAsm::getNumOperandRegisters(OpFlag)) == 1 && - "Unexpected number of operands"); - // Add information to the INLINEASM node to know about this input. - // See InlineAsm.h isUseOperandTiedToDef. - OpFlag |= 0x80000000 | (OpInfo.getMatchedOperand() << 16); - AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, - TLI.getPointerTy())); - AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); - break; } + + assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!"); + assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 && + "Unexpected number of operands"); + // Add information to the INLINEASM node to know about this input. + // See InlineAsm.h isUseOperandTiedToDef. + OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag, + OpInfo.getMatchedOperand()); + AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, + TLI.getPointerTy())); + AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); + break; } if (OpInfo.ConstraintType == TargetLowering::C_Other) { @@ -5537,24 +5505,26 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { std::vector<SDValue> Ops; TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0], hasMemory, Ops, DAG); - if (Ops.empty()) { - llvm_report_error("Invalid operand for inline asm" - " constraint '" + OpInfo.ConstraintCode + "'!"); - } + if (Ops.empty()) + report_fatal_error("Invalid operand for inline asm constraint '" + + Twine(OpInfo.ConstraintCode) + "'!"); // Add information to the INLINEASM node to know about this input. - unsigned ResOpType = 3 /*IMM*/ | (Ops.size() << 3); + unsigned ResOpType = + InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size()); AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, TLI.getPointerTy())); AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); break; - } else if (OpInfo.ConstraintType == TargetLowering::C_Memory) { + } + + if (OpInfo.ConstraintType == TargetLowering::C_Memory) { assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); assert(InOperandVal.getValueType() == TLI.getPointerTy() && "Memory operands expect pointer values"); // Add information to the INLINEASM node to know about this input. - unsigned ResOpType = 4/*MEM*/ | (1<<3); + unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, TLI.getPointerTy())); AsmNodeOperands.push_back(InOperandVal); @@ -5569,15 +5539,14 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { // Copy the input into the appropriate registers. if (OpInfo.AssignedRegs.Regs.empty() || - !OpInfo.AssignedRegs.areValueTypesLegal()) { - llvm_report_error("Couldn't allocate input reg for" - " constraint '"+ OpInfo.ConstraintCode +"'!"); - } + !OpInfo.AssignedRegs.areValueTypesLegal()) + report_fatal_error("Couldn't allocate input reg for constraint '" + + Twine(OpInfo.ConstraintCode) + "'!"); OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), Chain, &Flag); - OpInfo.AssignedRegs.AddInlineAsmOperands(1/*REGUSE*/, false, 0, + OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0, DAG, AsmNodeOperands); break; } @@ -5585,7 +5554,8 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { // Add the clobbered value to the operand list, so that the register // allocator is aware that the physreg got clobbered. if (!OpInfo.AssignedRegs.Regs.empty()) - OpInfo.AssignedRegs.AddInlineAsmOperands(6 /* EARLYCLOBBER REGDEF */, + OpInfo.AssignedRegs.AddInlineAsmOperands( + InlineAsm::Kind_RegDefEarlyClobber, false, 0, DAG, AsmNodeOperands); break; @@ -5593,7 +5563,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { } } - // Finish up input operands. + // Finish up input operands. Set the input chain and add the flag last. AsmNodeOperands[0] = Chain; if (Flag.getNode()) AsmNodeOperands.push_back(Flag); @@ -5638,17 +5608,16 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { return; } - std::vector<std::pair<SDValue, Value*> > StoresToEmit; + std::vector<std::pair<SDValue, const Value *> > StoresToEmit; // Process indirect outputs, first output all of the flagged copies out of // physregs. for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) { RegsForValue &OutRegs = IndirectStoresToEmit[i].first; - Value *Ptr = IndirectStoresToEmit[i].second; + const Value *Ptr = IndirectStoresToEmit[i].second; SDValue OutVal = OutRegs.getCopyFromRegs(DAG, getCurDebugLoc(), Chain, &Flag); StoresToEmit.push_back(std::make_pair(OutVal, Ptr)); - } // Emit the non-flagged stores from the physregs. @@ -5669,14 +5638,14 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { DAG.setRoot(Chain); } -void SelectionDAGBuilder::visitVAStart(CallInst &I) { +void SelectionDAGBuilder::visitVAStart(const CallInst &I) { DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(), MVT::Other, getRoot(), getValue(I.getOperand(1)), DAG.getSrcValue(I.getOperand(1)))); } -void SelectionDAGBuilder::visitVAArg(VAArgInst &I) { +void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(), getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0))); @@ -5684,14 +5653,14 @@ void SelectionDAGBuilder::visitVAArg(VAArgInst &I) { DAG.setRoot(V.getValue(1)); } -void SelectionDAGBuilder::visitVAEnd(CallInst &I) { +void SelectionDAGBuilder::visitVAEnd(const CallInst &I) { DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(), MVT::Other, getRoot(), getValue(I.getOperand(1)), DAG.getSrcValue(I.getOperand(1)))); } -void SelectionDAGBuilder::visitVACopy(CallInst &I) { +void SelectionDAGBuilder::visitVACopy(const CallInst &I) { DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(), MVT::Other, getRoot(), getValue(I.getOperand(1)), @@ -5711,7 +5680,8 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, CallingConv::ID CallConv, bool isTailCall, bool isReturnValueUsed, SDValue Callee, - ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl) { + ArgListTy &Args, SelectionDAG &DAG, + DebugLoc dl) const { // Handle all of the outgoing arguments. SmallVector<ISD::OutputArg, 32> Outs; for (unsigned i = 0, e = Args.size(); i != e; ++i) { @@ -5862,18 +5832,19 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, void TargetLowering::LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { SDValue Res = LowerOperation(SDValue(N, 0), DAG); if (Res.getNode()) Results.push_back(Res); } -SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { +SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable("LowerOperation not implemented for this target!"); return SDValue(); } -void SelectionDAGBuilder::CopyValueToVirtualRegister(Value *V, unsigned Reg) { +void +SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { SDValue Op = getValue(V); assert((Op.getOpcode() != ISD::CopyFromReg || cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) && @@ -5888,9 +5859,9 @@ void SelectionDAGBuilder::CopyValueToVirtualRegister(Value *V, unsigned Reg) { #include "llvm/CodeGen/SelectionDAGISel.h" -void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) { +void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { // If this is the entry block, emit arguments. - Function &F = *LLVMBB->getParent(); + const Function &F = *LLVMBB->getParent(); SelectionDAG &DAG = SDB->DAG; SDValue OldRoot = DAG.getRoot(); DebugLoc dl = SDB->getCurDebugLoc(); @@ -5915,14 +5886,14 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) { // or one register. ISD::ArgFlagsTy Flags; Flags.setSRet(); - EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), ValueVTs[0]); + EVT RegisterVT = TLI.getRegisterType(*DAG.getContext(), ValueVTs[0]); ISD::InputArg RetArg(Flags, RegisterVT, true); Ins.push_back(RetArg); } // Set up the incoming argument description vector. unsigned Idx = 1; - for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); + for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I, ++Idx) { SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(TLI, I->getType(), ValueVTs); @@ -6024,7 +5995,7 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) { ++i; } - for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; + for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I, ++Idx) { SmallVector<SDValue, 4> ArgValues; SmallVector<EVT, 4> ValueVTs; @@ -6067,7 +6038,7 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) { // Finally, if the target has anything special to do, allow it to do so. // FIXME: this should insert code into the DAG! - EmitFunctionEntryCode(F, SDB->DAG.getMachineFunction()); + EmitFunctionEntryCode(); } /// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to @@ -6078,51 +6049,50 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) { /// the end. /// void -SelectionDAGISel::HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB) { - TerminatorInst *TI = LLVMBB->getTerminator(); +SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { + const TerminatorInst *TI = LLVMBB->getTerminator(); SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; // Check successor nodes' PHI nodes that expect a constant to be available // from this block. for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { - BasicBlock *SuccBB = TI->getSuccessor(succ); + const BasicBlock *SuccBB = TI->getSuccessor(succ); if (!isa<PHINode>(SuccBB->begin())) continue; - MachineBasicBlock *SuccMBB = FuncInfo->MBBMap[SuccBB]; + MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB]; // If this terminator has multiple identical successors (common for // switches), only handle each succ once. if (!SuccsHandled.insert(SuccMBB)) continue; MachineBasicBlock::iterator MBBI = SuccMBB->begin(); - PHINode *PN; // At this point we know that there is a 1-1 correspondence between LLVM PHI // nodes and Machine PHI nodes, but the incoming operands have not been // emitted yet. - for (BasicBlock::iterator I = SuccBB->begin(); - (PN = dyn_cast<PHINode>(I)); ++I) { + for (BasicBlock::const_iterator I = SuccBB->begin(); + const PHINode *PN = dyn_cast<PHINode>(I); ++I) { // Ignore dead phi's. if (PN->use_empty()) continue; unsigned Reg; - Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); + const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); - if (Constant *C = dyn_cast<Constant>(PHIOp)) { - unsigned &RegOut = SDB->ConstantsOut[C]; + if (const Constant *C = dyn_cast<Constant>(PHIOp)) { + unsigned &RegOut = ConstantsOut[C]; if (RegOut == 0) { - RegOut = FuncInfo->CreateRegForValue(C); - SDB->CopyValueToVirtualRegister(C, RegOut); + RegOut = FuncInfo.CreateRegForValue(C); + CopyValueToVirtualRegister(C, RegOut); } Reg = RegOut; } else { - Reg = FuncInfo->ValueMap[PHIOp]; + Reg = FuncInfo.ValueMap[PHIOp]; if (Reg == 0) { assert(isa<AllocaInst>(PHIOp) && - FuncInfo->StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) && + FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) && "Didn't codegen value into a register!??"); - Reg = FuncInfo->CreateRegForValue(PHIOp); - SDB->CopyValueToVirtualRegister(PHIOp, Reg); + Reg = FuncInfo.CreateRegForValue(PHIOp); + CopyValueToVirtualRegister(PHIOp, Reg); } } @@ -6132,77 +6102,12 @@ SelectionDAGISel::HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB) { ComputeValueVTs(TLI, PN->getType(), ValueVTs); for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { EVT VT = ValueVTs[vti]; - unsigned NumRegisters = TLI.getNumRegisters(*CurDAG->getContext(), VT); + unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT); for (unsigned i = 0, e = NumRegisters; i != e; ++i) - SDB->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i)); + FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i)); Reg += NumRegisters; } } } - SDB->ConstantsOut.clear(); -} - -/// This is the Fast-ISel version of HandlePHINodesInSuccessorBlocks. It only -/// supports legal types, and it emits MachineInstrs directly instead of -/// creating SelectionDAG nodes. -/// -bool -SelectionDAGISel::HandlePHINodesInSuccessorBlocksFast(BasicBlock *LLVMBB, - FastISel *F) { - TerminatorInst *TI = LLVMBB->getTerminator(); - - SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; - unsigned OrigNumPHINodesToUpdate = SDB->PHINodesToUpdate.size(); - - // Check successor nodes' PHI nodes that expect a constant to be available - // from this block. - for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { - BasicBlock *SuccBB = TI->getSuccessor(succ); - if (!isa<PHINode>(SuccBB->begin())) continue; - MachineBasicBlock *SuccMBB = FuncInfo->MBBMap[SuccBB]; - - // If this terminator has multiple identical successors (common for - // switches), only handle each succ once. - if (!SuccsHandled.insert(SuccMBB)) continue; - - MachineBasicBlock::iterator MBBI = SuccMBB->begin(); - PHINode *PN; - - // At this point we know that there is a 1-1 correspondence between LLVM PHI - // nodes and Machine PHI nodes, but the incoming operands have not been - // emitted yet. - for (BasicBlock::iterator I = SuccBB->begin(); - (PN = dyn_cast<PHINode>(I)); ++I) { - // Ignore dead phi's. - if (PN->use_empty()) continue; - - // Only handle legal types. Two interesting things to note here. First, - // by bailing out early, we may leave behind some dead instructions, - // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its - // own moves. Second, this check is necessary becuase FastISel doesn't - // use CreateRegForValue to create registers, so it always creates - // exactly one register for each non-void instruction. - EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true); - if (VT == MVT::Other || !TLI.isTypeLegal(VT)) { - // Promote MVT::i1. - if (VT == MVT::i1) - VT = TLI.getTypeToTransformTo(*CurDAG->getContext(), VT); - else { - SDB->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); - return false; - } - } - - Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); - - unsigned Reg = F->getRegForValue(PHIOp); - if (Reg == 0) { - SDB->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); - return false; - } - SDB->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg)); - } - } - - return true; + ConstantsOut.clear(); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index fdcba0f..3fcd4b9 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -36,6 +36,7 @@ class BasicBlock; class BitCastInst; class BranchInst; class CallInst; +class DbgValueInst; class ExtractElementInst; class ExtractValueInst; class FCmpInst; @@ -58,6 +59,7 @@ class LoadInst; class MachineBasicBlock; class MachineInstr; class MachineRegisterInfo; +class MDNode; class PHINode; class PtrToIntInst; class ReturnInst; @@ -80,11 +82,8 @@ class ZExtInst; //===----------------------------------------------------------------------===// /// SelectionDAGBuilder - This is the common target-independent lowering /// implementation that is parameterized by a TargetLowering object. -/// Also, targets can overload any lowering method. /// class SelectionDAGBuilder { - MachineBasicBlock *CurMBB; - /// CurDebugLoc - current file + line number. Changes as we build the DAG. DebugLoc CurDebugLoc; @@ -143,15 +142,16 @@ private: /// CaseRec - A struct with ctor used in lowering switches to a binary tree /// of conditional branches. struct CaseRec { - CaseRec(MachineBasicBlock *bb, Constant *lt, Constant *ge, CaseRange r) : + CaseRec(MachineBasicBlock *bb, const Constant *lt, const Constant *ge, + CaseRange r) : CaseBB(bb), LT(lt), GE(ge), Range(r) {} /// CaseBB - The MBB in which to emit the compare and branch MachineBasicBlock *CaseBB; /// LT, GE - If nonzero, we know the current case value must be less-than or /// greater-than-or-equal-to these Constants. - Constant *LT; - Constant *GE; + const Constant *LT; + const Constant *GE; /// Range - A pair of iterators representing the range of case values to be /// processed at this point in the binary search tree. CaseRange Range; @@ -182,7 +182,8 @@ private: /// SelectionDAGBuilder and SDISel for the code generation of additional basic /// blocks needed by multi-case switch statements. struct CaseBlock { - CaseBlock(ISD::CondCode cc, Value *cmplhs, Value *cmprhs, Value *cmpmiddle, + CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs, + const Value *cmpmiddle, MachineBasicBlock *truebb, MachineBasicBlock *falsebb, MachineBasicBlock *me) : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs), @@ -192,7 +193,7 @@ private: // CmpLHS/CmpRHS/CmpMHS - The LHS/MHS/RHS of the comparison to emit. // Emit by default LHS op RHS. MHS is used for range comparisons: // If MHS is not null: (LHS <= MHS) and (MHS <= RHS). - Value *CmpLHS, *CmpMHS, *CmpRHS; + const Value *CmpLHS, *CmpMHS, *CmpRHS; // TrueBB/FalseBB - the block to branch to if the setcc is true/false. MachineBasicBlock *TrueBB, *FalseBB; // ThisBB - the block into which to emit the code for the setcc and branches @@ -214,12 +215,12 @@ private: MachineBasicBlock *Default; }; struct JumpTableHeader { - JumpTableHeader(APInt F, APInt L, Value *SV, MachineBasicBlock *H, + JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H, bool E = false): First(F), Last(L), SValue(SV), HeaderBB(H), Emitted(E) {} APInt First; APInt Last; - Value *SValue; + const Value *SValue; MachineBasicBlock *HeaderBB; bool Emitted; }; @@ -236,7 +237,7 @@ private: typedef SmallVector<BitTestCase, 3> BitTestInfo; struct BitTestBlock { - BitTestBlock(APInt F, APInt R, Value* SV, + BitTestBlock(APInt F, APInt R, const Value* SV, unsigned Rg, bool E, MachineBasicBlock* P, MachineBasicBlock* D, const BitTestInfo& C): @@ -244,7 +245,7 @@ private: Parent(P), Default(D), Cases(C) { } APInt First; APInt Range; - Value *SValue; + const Value *SValue; unsigned Reg; bool Emitted; MachineBasicBlock *Parent; @@ -256,7 +257,8 @@ public: // TLI - This is information that describes the available target features we // need for lowering. This indicates when operations are unavailable, // implemented with a libcall, etc. - TargetLowering &TLI; + const TargetMachine &TM; + const TargetLowering &TLI; SelectionDAG &DAG; const TargetData *TD; AliasAnalysis *AA; @@ -271,17 +273,9 @@ public: /// SwitchInst code generation information. std::vector<BitTestBlock> BitTestCases; - /// PHINodesToUpdate - A list of phi instructions whose operand list will - /// be updated after processing the current basic block. - std::vector<std::pair<MachineInstr*, unsigned> > PHINodesToUpdate; - - /// EdgeMapping - If an edge from CurMBB to any MBB is changed (e.g. due to - /// scheduler custom lowering), track the change here. - DenseMap<MachineBasicBlock*, MachineBasicBlock*> EdgeMapping; - // Emit PHI-node-operand constants only once even if used by multiple // PHI nodes. - DenseMap<Constant*, unsigned> ConstantsOut; + DenseMap<const Constant *, unsigned> ConstantsOut; /// FuncInfo - Information about the function as a whole. /// @@ -302,16 +296,16 @@ public: LLVMContext *Context; - SelectionDAGBuilder(SelectionDAG &dag, TargetLowering &tli, - FunctionLoweringInfo &funcinfo, + SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo, CodeGenOpt::Level ol) - : SDNodeOrder(0), TLI(tli), DAG(dag), FuncInfo(funcinfo), OptLevel(ol), + : SDNodeOrder(0), TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()), + DAG(dag), FuncInfo(funcinfo), OptLevel(ol), HasTailCall(false), Context(dag.getContext()) { } void init(GCFunctionInfo *gfi, AliasAnalysis &aa); - /// clear - Clear out the curret SelectionDAG and the associated + /// clear - Clear out the current SelectionDAG and the associated /// state and prepare this SelectionDAGBuilder object to be used /// for a new block. This doesn't clear out information about /// additional blocks that are needed to complete switch lowering @@ -333,22 +327,19 @@ public: SDValue getControlRoot(); DebugLoc getCurDebugLoc() const { return CurDebugLoc; } - void setCurDebugLoc(DebugLoc dl) { CurDebugLoc = dl; } unsigned getSDNodeOrder() const { return SDNodeOrder; } - void CopyValueToVirtualRegister(Value *V, unsigned Reg); + void CopyValueToVirtualRegister(const Value *V, unsigned Reg); /// AssignOrderingToNode - Assign an ordering to the node. The order is gotten /// from how the code appeared in the source. The ordering is used by the /// scheduler to effectively turn off scheduling. void AssignOrderingToNode(const SDNode *Node); - void visit(Instruction &I); - - void visit(unsigned Opcode, User &I); + void visit(const Instruction &I); - void setCurrentBasicBlock(MachineBasicBlock *MBB) { CurMBB = MBB; } + void visit(unsigned Opcode, const User &I); SDValue getValue(const Value *V); @@ -362,136 +353,154 @@ public: std::set<unsigned> &OutputRegs, std::set<unsigned> &InputRegs); - void FindMergedConditions(Value *Cond, MachineBasicBlock *TBB, + void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, - unsigned Opc); - void EmitBranchForMergedCondition(Value *Cond, MachineBasicBlock *TBB, + MachineBasicBlock *SwitchBB, unsigned Opc); + void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - MachineBasicBlock *CurBB); + MachineBasicBlock *CurBB, + MachineBasicBlock *SwitchBB); bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases); - bool isExportableFromCurrentBlock(Value *V, const BasicBlock *FromBB); - void CopyToExportRegsIfNeeded(Value *V); - void ExportFromCurrentBlock(Value *V); - void LowerCallTo(CallSite CS, SDValue Callee, bool IsTailCall, + bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB); + void CopyToExportRegsIfNeeded(const Value *V); + void ExportFromCurrentBlock(const Value *V); + void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall, MachineBasicBlock *LandingPad = NULL); private: // Terminator instructions. - void visitRet(ReturnInst &I); - void visitBr(BranchInst &I); - void visitSwitch(SwitchInst &I); - void visitIndirectBr(IndirectBrInst &I); - void visitUnreachable(UnreachableInst &I) { /* noop */ } + void visitRet(const ReturnInst &I); + void visitBr(const BranchInst &I); + void visitSwitch(const SwitchInst &I); + void visitIndirectBr(const IndirectBrInst &I); + void visitUnreachable(const UnreachableInst &I) { /* noop */ } // Helpers for visitSwitch bool handleSmallSwitchRange(CaseRec& CR, CaseRecVector& WorkList, - Value* SV, - MachineBasicBlock* Default); + const Value* SV, + MachineBasicBlock* Default, + MachineBasicBlock *SwitchBB); bool handleJTSwitchCase(CaseRec& CR, CaseRecVector& WorkList, - Value* SV, - MachineBasicBlock* Default); + const Value* SV, + MachineBasicBlock* Default, + MachineBasicBlock *SwitchBB); bool handleBTSplitSwitchCase(CaseRec& CR, CaseRecVector& WorkList, - Value* SV, - MachineBasicBlock* Default); + const Value* SV, + MachineBasicBlock* Default, + MachineBasicBlock *SwitchBB); bool handleBitTestsSwitchCase(CaseRec& CR, CaseRecVector& WorkList, - Value* SV, - MachineBasicBlock* Default); + const Value* SV, + MachineBasicBlock* Default, + MachineBasicBlock *SwitchBB); public: - void visitSwitchCase(CaseBlock &CB); - void visitBitTestHeader(BitTestBlock &B); + void visitSwitchCase(CaseBlock &CB, + MachineBasicBlock *SwitchBB); + void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB); void visitBitTestCase(MachineBasicBlock* NextMBB, unsigned Reg, - BitTestCase &B); + BitTestCase &B, + MachineBasicBlock *SwitchBB); void visitJumpTable(JumpTable &JT); - void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH); + void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH, + MachineBasicBlock *SwitchBB); private: // These all get lowered before this pass. - void visitInvoke(InvokeInst &I); - void visitUnwind(UnwindInst &I); - - void visitBinary(User &I, unsigned OpCode); - void visitShift(User &I, unsigned Opcode); - void visitAdd(User &I) { visitBinary(I, ISD::ADD); } - void visitFAdd(User &I) { visitBinary(I, ISD::FADD); } - void visitSub(User &I) { visitBinary(I, ISD::SUB); } - void visitFSub(User &I); - void visitMul(User &I) { visitBinary(I, ISD::MUL); } - void visitFMul(User &I) { visitBinary(I, ISD::FMUL); } - void visitURem(User &I) { visitBinary(I, ISD::UREM); } - void visitSRem(User &I) { visitBinary(I, ISD::SREM); } - void visitFRem(User &I) { visitBinary(I, ISD::FREM); } - void visitUDiv(User &I) { visitBinary(I, ISD::UDIV); } - void visitSDiv(User &I) { visitBinary(I, ISD::SDIV); } - void visitFDiv(User &I) { visitBinary(I, ISD::FDIV); } - void visitAnd (User &I) { visitBinary(I, ISD::AND); } - void visitOr (User &I) { visitBinary(I, ISD::OR); } - void visitXor (User &I) { visitBinary(I, ISD::XOR); } - void visitShl (User &I) { visitShift(I, ISD::SHL); } - void visitLShr(User &I) { visitShift(I, ISD::SRL); } - void visitAShr(User &I) { visitShift(I, ISD::SRA); } - void visitICmp(User &I); - void visitFCmp(User &I); + void visitInvoke(const InvokeInst &I); + void visitUnwind(const UnwindInst &I); + + void visitBinary(const User &I, unsigned OpCode); + void visitShift(const User &I, unsigned Opcode); + void visitAdd(const User &I) { visitBinary(I, ISD::ADD); } + void visitFAdd(const User &I) { visitBinary(I, ISD::FADD); } + void visitSub(const User &I) { visitBinary(I, ISD::SUB); } + void visitFSub(const User &I); + void visitMul(const User &I) { visitBinary(I, ISD::MUL); } + void visitFMul(const User &I) { visitBinary(I, ISD::FMUL); } + void visitURem(const User &I) { visitBinary(I, ISD::UREM); } + void visitSRem(const User &I) { visitBinary(I, ISD::SREM); } + void visitFRem(const User &I) { visitBinary(I, ISD::FREM); } + void visitUDiv(const User &I) { visitBinary(I, ISD::UDIV); } + void visitSDiv(const User &I) { visitBinary(I, ISD::SDIV); } + void visitFDiv(const User &I) { visitBinary(I, ISD::FDIV); } + void visitAnd (const User &I) { visitBinary(I, ISD::AND); } + void visitOr (const User &I) { visitBinary(I, ISD::OR); } + void visitXor (const User &I) { visitBinary(I, ISD::XOR); } + void visitShl (const User &I) { visitShift(I, ISD::SHL); } + void visitLShr(const User &I) { visitShift(I, ISD::SRL); } + void visitAShr(const User &I) { visitShift(I, ISD::SRA); } + void visitICmp(const User &I); + void visitFCmp(const User &I); // Visit the conversion instructions - void visitTrunc(User &I); - void visitZExt(User &I); - void visitSExt(User &I); - void visitFPTrunc(User &I); - void visitFPExt(User &I); - void visitFPToUI(User &I); - void visitFPToSI(User &I); - void visitUIToFP(User &I); - void visitSIToFP(User &I); - void visitPtrToInt(User &I); - void visitIntToPtr(User &I); - void visitBitCast(User &I); - - void visitExtractElement(User &I); - void visitInsertElement(User &I); - void visitShuffleVector(User &I); - - void visitExtractValue(ExtractValueInst &I); - void visitInsertValue(InsertValueInst &I); - - void visitGetElementPtr(User &I); - void visitSelect(User &I); - - void visitAlloca(AllocaInst &I); - void visitLoad(LoadInst &I); - void visitStore(StoreInst &I); - void visitPHI(PHINode &I) { } // PHI nodes are handled specially. - void visitCall(CallInst &I); - bool visitMemCmpCall(CallInst &I); + void visitTrunc(const User &I); + void visitZExt(const User &I); + void visitSExt(const User &I); + void visitFPTrunc(const User &I); + void visitFPExt(const User &I); + void visitFPToUI(const User &I); + void visitFPToSI(const User &I); + void visitUIToFP(const User &I); + void visitSIToFP(const User &I); + void visitPtrToInt(const User &I); + void visitIntToPtr(const User &I); + void visitBitCast(const User &I); + + void visitExtractElement(const User &I); + void visitInsertElement(const User &I); + void visitShuffleVector(const User &I); + + void visitExtractValue(const ExtractValueInst &I); + void visitInsertValue(const InsertValueInst &I); + + void visitGetElementPtr(const User &I); + void visitSelect(const User &I); + + void visitAlloca(const AllocaInst &I); + void visitLoad(const LoadInst &I); + void visitStore(const StoreInst &I); + void visitPHI(const PHINode &I); + void visitCall(const CallInst &I); + bool visitMemCmpCall(const CallInst &I); - void visitInlineAsm(CallSite CS); - const char *visitIntrinsicCall(CallInst &I, unsigned Intrinsic); - void visitTargetIntrinsic(CallInst &I, unsigned Intrinsic); - - void visitPow(CallInst &I); - void visitExp2(CallInst &I); - void visitExp(CallInst &I); - void visitLog(CallInst &I); - void visitLog2(CallInst &I); - void visitLog10(CallInst &I); - - void visitVAStart(CallInst &I); - void visitVAArg(VAArgInst &I); - void visitVAEnd(CallInst &I); - void visitVACopy(CallInst &I); - - void visitUserOp1(Instruction &I) { + void visitInlineAsm(ImmutableCallSite CS); + const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic); + void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic); + + void visitPow(const CallInst &I); + void visitExp2(const CallInst &I); + void visitExp(const CallInst &I); + void visitLog(const CallInst &I); + void visitLog2(const CallInst &I); + void visitLog10(const CallInst &I); + + void visitVAStart(const CallInst &I); + void visitVAArg(const VAArgInst &I); + void visitVAEnd(const CallInst &I); + void visitVACopy(const CallInst &I); + + void visitUserOp1(const Instruction &I) { llvm_unreachable("UserOp1 should not exist at instruction selection time!"); } - void visitUserOp2(Instruction &I) { + void visitUserOp2(const Instruction &I) { llvm_unreachable("UserOp2 should not exist at instruction selection time!"); } - const char *implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op); - const char *implVisitAluOverflow(CallInst &I, ISD::NodeType Op); + const char *implVisitBinaryAtomic(const CallInst& I, ISD::NodeType Op); + const char *implVisitAluOverflow(const CallInst &I, ISD::NodeType Op); + + void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB); + + /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a + /// function argument, create the corresponding DBG_VALUE machine instruction + /// for it now. At the end of instruction selection, they will be inserted to + /// the entry BB. + bool EmitFuncArgumentDbgValue(const DbgValueInst &DI, + const Value *V, MDNode *Variable, + uint64_t Offset, const SDValue &N); }; } // end namespace llvm diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 9b137a5..422cb7a 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -19,10 +19,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/Constants.h" -#include "llvm/CallingConv.h" -#include "llvm/DerivedTypes.h" #include "llvm/Function.h" -#include "llvm/GlobalVariable.h" #include "llvm/InlineAsm.h" #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" @@ -32,18 +29,13 @@ #include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionAnalysis.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" @@ -52,7 +44,6 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/Statistic.h" @@ -69,10 +60,6 @@ EnableFastISelVerbose("fast-isel-verbose", cl::Hidden, static cl::opt<bool> EnableFastISelAbort("fast-isel-abort", cl::Hidden, cl::desc("Enable abort calls when \"fast\" instruction fails")); -static cl::opt<bool> -SchedLiveInCopies("schedule-livein-copies", cl::Hidden, - cl::desc("Schedule copies of livein registers"), - cl::init(false)); #ifndef NDEBUG static cl::opt<bool> @@ -161,9 +148,9 @@ namespace llvm { // When new basic blocks are inserted and the edges from MBB to its successors // are modified, the method should insert pairs of <OldSucc, NewSucc> into the // DenseMap. -MachineBasicBlock *TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { +MachineBasicBlock * +TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const { #ifndef NDEBUG dbgs() << "If a target marks an instruction with " "'usesCustomInserter', it must implement " @@ -173,115 +160,15 @@ MachineBasicBlock *TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return 0; } -/// EmitLiveInCopy - Emit a copy for a live in physical register. If the -/// physical register has only a single copy use, then coalesced the copy -/// if possible. -static void EmitLiveInCopy(MachineBasicBlock *MBB, - MachineBasicBlock::iterator &InsertPos, - unsigned VirtReg, unsigned PhysReg, - const TargetRegisterClass *RC, - DenseMap<MachineInstr*, unsigned> &CopyRegMap, - const MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI, - const TargetInstrInfo &TII) { - unsigned NumUses = 0; - MachineInstr *UseMI = NULL; - for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(VirtReg), - UE = MRI.use_end(); UI != UE; ++UI) { - UseMI = &*UI; - if (++NumUses > 1) - break; - } - - // If the number of uses is not one, or the use is not a move instruction, - // don't coalesce. Also, only coalesce away a virtual register to virtual - // register copy. - bool Coalesced = false; - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (NumUses == 1 && - TII.isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubReg, DstSubReg) && - TargetRegisterInfo::isVirtualRegister(DstReg)) { - VirtReg = DstReg; - Coalesced = true; - } - - // Now find an ideal location to insert the copy. - MachineBasicBlock::iterator Pos = InsertPos; - while (Pos != MBB->begin()) { - MachineInstr *PrevMI = prior(Pos); - DenseMap<MachineInstr*, unsigned>::iterator RI = CopyRegMap.find(PrevMI); - // copyRegToReg might emit multiple instructions to do a copy. - unsigned CopyDstReg = (RI == CopyRegMap.end()) ? 0 : RI->second; - if (CopyDstReg && !TRI.regsOverlap(CopyDstReg, PhysReg)) - // This is what the BB looks like right now: - // r1024 = mov r0 - // ... - // r1 = mov r1024 - // - // We want to insert "r1025 = mov r1". Inserting this copy below the - // move to r1024 makes it impossible for that move to be coalesced. - // - // r1025 = mov r1 - // r1024 = mov r0 - // ... - // r1 = mov 1024 - // r2 = mov 1025 - break; // Woot! Found a good location. - --Pos; - } - - bool Emitted = TII.copyRegToReg(*MBB, Pos, VirtReg, PhysReg, RC, RC); - assert(Emitted && "Unable to issue a live-in copy instruction!\n"); - (void) Emitted; - - CopyRegMap.insert(std::make_pair(prior(Pos), VirtReg)); - if (Coalesced) { - if (&*InsertPos == UseMI) ++InsertPos; - MBB->erase(UseMI); - } -} - -/// EmitLiveInCopies - If this is the first basic block in the function, -/// and if it has live ins that need to be copied into vregs, emit the -/// copies into the block. -static void EmitLiveInCopies(MachineBasicBlock *EntryMBB, - const MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI, - const TargetInstrInfo &TII) { - if (SchedLiveInCopies) { - // Emit the copies at a heuristically-determined location in the block. - DenseMap<MachineInstr*, unsigned> CopyRegMap; - MachineBasicBlock::iterator InsertPos = EntryMBB->begin(); - for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(), - E = MRI.livein_end(); LI != E; ++LI) - if (LI->second) { - const TargetRegisterClass *RC = MRI.getRegClass(LI->second); - EmitLiveInCopy(EntryMBB, InsertPos, LI->second, LI->first, - RC, CopyRegMap, MRI, TRI, TII); - } - } else { - // Emit the copies into the top of the block. - for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(), - E = MRI.livein_end(); LI != E; ++LI) - if (LI->second) { - const TargetRegisterClass *RC = MRI.getRegClass(LI->second); - bool Emitted = TII.copyRegToReg(*EntryMBB, EntryMBB->begin(), - LI->second, LI->first, RC, RC); - assert(Emitted && "Unable to issue a live-in copy instruction!\n"); - (void) Emitted; - } - } -} - //===----------------------------------------------------------------------===// // SelectionDAGISel code //===----------------------------------------------------------------------===// -SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL) : +SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, CodeGenOpt::Level OL) : MachineFunctionPass(&ID), TM(tm), TLI(*tm.getTargetLowering()), FuncInfo(new FunctionLoweringInfo(TLI)), - CurDAG(new SelectionDAG(TLI, *FuncInfo)), - SDB(new SelectionDAGBuilder(*CurDAG, TLI, *FuncInfo, OL)), + CurDAG(new SelectionDAG(tm, *FuncInfo)), + SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)), GFI(), OptLevel(OL), DAGSize(0) @@ -293,10 +180,6 @@ SelectionDAGISel::~SelectionDAGISel() { delete FuncInfo; } -unsigned SelectionDAGISel::MakeReg(EVT VT) { - return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT)); -} - void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<AliasAnalysis>(); AU.addPreserved<AliasAnalysis>(); @@ -306,129 +189,75 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { } bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { - Function &Fn = *mf.getFunction(); - // Do some sanity-checking on the command-line options. assert((!EnableFastISelVerbose || EnableFastISel) && "-fast-isel-verbose requires -fast-isel"); assert((!EnableFastISelAbort || EnableFastISel) && "-fast-isel-abort requires -fast-isel"); - // Get alias analysis for load/store combining. - AA = &getAnalysis<AliasAnalysis>(); - - MF = &mf; + const Function &Fn = *mf.getFunction(); const TargetInstrInfo &TII = *TM.getInstrInfo(); const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); - if (Fn.hasGC()) - GFI = &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn); - else - GFI = 0; + MF = &mf; RegInfo = &MF->getRegInfo(); + AA = &getAnalysis<AliasAnalysis>(); + GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : 0; + DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); CurDAG->init(*MF); FuncInfo->set(Fn, *MF, EnableFastISel); SDB->init(GFI, *AA); - for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) - if (InvokeInst *Invoke = dyn_cast<InvokeInst>(I->getTerminator())) - // Mark landing pad. - FuncInfo->MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad(); - - SelectAllBasicBlocks(Fn, *MF, TII); + SelectAllBasicBlocks(Fn); // If the first basic block in the function has live ins that need to be // copied into vregs, emit the copies into the top of the block before // emitting the code for the block. - EmitLiveInCopies(MF->begin(), *RegInfo, TRI, TII); - - // Add function live-ins to entry block live-in set. - for (MachineRegisterInfo::livein_iterator I = RegInfo->livein_begin(), - E = RegInfo->livein_end(); I != E; ++I) - MF->begin()->addLiveIn(I->first); - -#ifndef NDEBUG - assert(FuncInfo->CatchInfoFound.size() == FuncInfo->CatchInfoLost.size() && - "Not all catch info was assigned to a landing pad!"); -#endif + MachineBasicBlock *EntryMBB = MF->begin(); + RegInfo->EmitLiveInCopies(EntryMBB, TRI, TII); + + // Insert DBG_VALUE instructions for function arguments to the entry block. + for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) { + MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1]; + unsigned Reg = MI->getOperand(0).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + EntryMBB->insert(EntryMBB->begin(), MI); + else { + MachineInstr *Def = RegInfo->getVRegDef(Reg); + MachineBasicBlock::iterator InsertPos = Def; + // FIXME: VR def may not be in entry block. + Def->getParent()->insert(llvm::next(InsertPos), MI); + } + } + // Release function-specific state. SDB and CurDAG are already cleared + // at this point. FuncInfo->clear(); return true; } -/// SetDebugLoc - Update MF's and SDB's DebugLocs if debug information is -/// attached with this instruction. -static void SetDebugLoc(Instruction *I, SelectionDAGBuilder *SDB, - FastISel *FastIS, MachineFunction *MF) { - DebugLoc DL = I->getDebugLoc(); - if (DL.isUnknown()) return; - - SDB->setCurDebugLoc(DL); - - if (FastIS) - FastIS->setCurDebugLoc(DL); - - // If the function doesn't have a default debug location yet, set - // it. This is kind of a hack. - if (MF->getDefaultDebugLoc().isUnknown()) - MF->setDefaultDebugLoc(DL); -} - -/// ResetDebugLoc - Set MF's and SDB's DebugLocs to Unknown. -static void ResetDebugLoc(SelectionDAGBuilder *SDB, FastISel *FastIS) { - SDB->setCurDebugLoc(DebugLoc()); - if (FastIS) - FastIS->setCurDebugLoc(DebugLoc()); -} - -void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB, - BasicBlock::iterator Begin, - BasicBlock::iterator End, - bool &HadTailCall) { - SDB->setCurrentBasicBlock(BB); - +MachineBasicBlock * +SelectionDAGISel::SelectBasicBlock(MachineBasicBlock *BB, + const BasicBlock *LLVMBB, + BasicBlock::const_iterator Begin, + BasicBlock::const_iterator End, + bool &HadTailCall) { // Lower all of the non-terminator instructions. If a call is emitted - // as a tail call, cease emitting nodes for this block. - for (BasicBlock::iterator I = Begin; I != End && !SDB->HasTailCall; ++I) { - SetDebugLoc(I, SDB, 0, MF); - - if (!isa<TerminatorInst>(I)) { - SDB->visit(*I); - - // Set the current debug location back to "unknown" so that it doesn't - // spuriously apply to subsequent instructions. - ResetDebugLoc(SDB, 0); - } - } - - if (!SDB->HasTailCall) { - // Ensure that all instructions which are used outside of their defining - // blocks are available as virtual registers. Invoke is handled elsewhere. - for (BasicBlock::iterator I = Begin; I != End; ++I) - if (!isa<PHINode>(I) && !isa<InvokeInst>(I)) - SDB->CopyToExportRegsIfNeeded(I); - - // Handle PHI nodes in successor blocks. - if (End == LLVMBB->end()) { - HandlePHINodesInSuccessorBlocks(LLVMBB); - - // Lower the terminator after the copies are emitted. - SetDebugLoc(LLVMBB->getTerminator(), SDB, 0, MF); - SDB->visit(*LLVMBB->getTerminator()); - ResetDebugLoc(SDB, 0); - } - } + // as a tail call, cease emitting nodes for this block. Terminators + // are handled below. + for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I) + SDB->visit(*I); // Make sure the root of the DAG is up-to-date. CurDAG->setRoot(SDB->getControlRoot()); - - // Final step, emit the lowered DAG as machine code. - CodeGenAndEmitDAG(); HadTailCall = SDB->HasTailCall; SDB->clear(); + + // Final step, emit the lowered DAG as machine code. + return CodeGenAndEmitDAG(BB); } namespace { @@ -493,7 +322,7 @@ void SelectionDAGISel::ShrinkDemandedOps() { InWorklist.insert(I); } - TargetLowering::TargetLoweringOpt TLO(*CurDAG, true); + TargetLowering::TargetLoweringOpt TLO(*CurDAG, true, true, true); while (!Worklist.empty()) { SDNode *N = Worklist.pop_back_val(); InWorklist.erase(N); @@ -613,7 +442,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { } while (!Worklist.empty()); } -void SelectionDAGISel::CodeGenAndEmitDAG() { +MachineBasicBlock *SelectionDAGISel::CodeGenAndEmitDAG(MachineBasicBlock *BB) { std::string GroupName; if (TimePassesIsEnabled) GroupName = "Instruction Selection and Scheduling"; @@ -763,9 +592,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { // inserted into. if (TimePassesIsEnabled) { NamedRegionTimer T("Instruction Creation", GroupName); - BB = Scheduler->EmitSchedule(&SDB->EdgeMapping); + BB = Scheduler->EmitSchedule(); } else { - BB = Scheduler->EmitSchedule(&SDB->EdgeMapping); + BB = Scheduler->EmitSchedule(); } // Free the scheduler state. @@ -776,8 +605,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { delete Scheduler; } - DEBUG(dbgs() << "Selected machine code:\n"); - DEBUG(BB->dump()); + // Free the SelectionDAG state, now that we're finished with it. + CurDAG->clear(); + + return BB; } void SelectionDAGISel::DoInstructionSelection() { @@ -836,128 +667,94 @@ void SelectionDAGISel::DoInstructionSelection() { PostprocessISelDAG(); } +/// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and +/// do other setup for EH landing-pad blocks. +void SelectionDAGISel::PrepareEHLandingPad(MachineBasicBlock *BB) { + // Add a label to mark the beginning of the landing pad. Deletion of the + // landing pad can thus be detected via the MachineModuleInfo. + MCSymbol *Label = MF->getMMI().addLandingPad(BB); + + const TargetInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL); + BuildMI(BB, SDB->getCurDebugLoc(), II).addSym(Label); + + // Mark exception register as live in. + unsigned Reg = TLI.getExceptionAddressRegister(); + if (Reg) BB->addLiveIn(Reg); + + // Mark exception selector register as live in. + Reg = TLI.getExceptionSelectorRegister(); + if (Reg) BB->addLiveIn(Reg); + + // FIXME: Hack around an exception handling flaw (PR1508): the personality + // function and list of typeids logically belong to the invoke (or, if you + // like, the basic block containing the invoke), and need to be associated + // with it in the dwarf exception handling tables. Currently however the + // information is provided by an intrinsic (eh.selector) that can be moved + // to unexpected places by the optimizers: if the unwind edge is critical, + // then breaking it can result in the intrinsics being in the successor of + // the landing pad, not the landing pad itself. This results + // in exceptions not being caught because no typeids are associated with + // the invoke. This may not be the only way things can go wrong, but it + // is the only way we try to work around for the moment. + const BasicBlock *LLVMBB = BB->getBasicBlock(); + const BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator()); + + if (Br && Br->isUnconditional()) { // Critical edge? + BasicBlock::const_iterator I, E; + for (I = LLVMBB->begin(), E = --LLVMBB->end(); I != E; ++I) + if (isa<EHSelectorInst>(I)) + break; -void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, - MachineFunction &MF, - const TargetInstrInfo &TII) { + if (I == E) + // No catch info found - try to extract some from the successor. + CopyCatchInfo(Br->getSuccessor(0), LLVMBB, &MF->getMMI(), *FuncInfo); + } +} + +void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Initialize the Fast-ISel state, if needed. FastISel *FastIS = 0; if (EnableFastISel) - FastIS = TLI.createFastISel(MF, FuncInfo->ValueMap, FuncInfo->MBBMap, - FuncInfo->StaticAllocaMap + FastIS = TLI.createFastISel(*MF, FuncInfo->ValueMap, FuncInfo->MBBMap, + FuncInfo->StaticAllocaMap, + FuncInfo->PHINodesToUpdate #ifndef NDEBUG , FuncInfo->CatchInfoLost #endif ); // Iterate over all basic blocks in the function. - for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { - BasicBlock *LLVMBB = &*I; - BB = FuncInfo->MBBMap[LLVMBB]; + for (Function::const_iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { + const BasicBlock *LLVMBB = &*I; + MachineBasicBlock *BB = FuncInfo->MBBMap[LLVMBB]; - BasicBlock::iterator const Begin = LLVMBB->begin(); - BasicBlock::iterator const End = LLVMBB->end(); - BasicBlock::iterator BI = Begin; + BasicBlock::const_iterator const Begin = LLVMBB->getFirstNonPHI(); + BasicBlock::const_iterator const End = LLVMBB->end(); + BasicBlock::const_iterator BI = Begin; // Lower any arguments needed in this block if this is the entry block. - bool SuppressFastISel = false; - if (LLVMBB == &Fn.getEntryBlock()) { + if (LLVMBB == &Fn.getEntryBlock()) LowerArguments(LLVMBB); - // If any of the arguments has the byval attribute, forgo - // fast-isel in the entry block. - if (FastIS) { - unsigned j = 1; - for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(); - I != E; ++I, ++j) - if (Fn.paramHasAttr(j, Attribute::ByVal)) { - if (EnableFastISelVerbose || EnableFastISelAbort) - dbgs() << "FastISel skips entry block due to byval argument\n"; - SuppressFastISel = true; - break; - } - } - } - - if (BB->isLandingPad()) { - // Add a label to mark the beginning of the landing pad. Deletion of the - // landing pad can thus be detected via the MachineModuleInfo. - MCSymbol *Label = MF.getMMI().addLandingPad(BB); - - const TargetInstrDesc &II = TII.get(TargetOpcode::EH_LABEL); - BuildMI(BB, SDB->getCurDebugLoc(), II).addSym(Label); - - // Mark exception register as live in. - unsigned Reg = TLI.getExceptionAddressRegister(); - if (Reg) BB->addLiveIn(Reg); - - // Mark exception selector register as live in. - Reg = TLI.getExceptionSelectorRegister(); - if (Reg) BB->addLiveIn(Reg); - - // FIXME: Hack around an exception handling flaw (PR1508): the personality - // function and list of typeids logically belong to the invoke (or, if you - // like, the basic block containing the invoke), and need to be associated - // with it in the dwarf exception handling tables. Currently however the - // information is provided by an intrinsic (eh.selector) that can be moved - // to unexpected places by the optimizers: if the unwind edge is critical, - // then breaking it can result in the intrinsics being in the successor of - // the landing pad, not the landing pad itself. This results - // in exceptions not being caught because no typeids are associated with - // the invoke. This may not be the only way things can go wrong, but it - // is the only way we try to work around for the moment. - BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator()); - - if (Br && Br->isUnconditional()) { // Critical edge? - BasicBlock::iterator I, E; - for (I = LLVMBB->begin(), E = --LLVMBB->end(); I != E; ++I) - if (isa<EHSelectorInst>(I)) - break; - - if (I == E) - // No catch info found - try to extract some from the successor. - CopyCatchInfo(Br->getSuccessor(0), LLVMBB, &MF.getMMI(), *FuncInfo); - } - } - + // Setup an EH landing-pad block. + if (BB->isLandingPad()) + PrepareEHLandingPad(BB); + // Before doing SelectionDAG ISel, see if FastISel has been requested. - if (FastIS && !SuppressFastISel) { + if (FastIS) { // Emit code for any incoming arguments. This must happen before // beginning FastISel on the entry block. if (LLVMBB == &Fn.getEntryBlock()) { CurDAG->setRoot(SDB->getControlRoot()); - CodeGenAndEmitDAG(); SDB->clear(); + BB = CodeGenAndEmitDAG(BB); } FastIS->startNewBlock(BB); // Do FastISel on as many instructions as possible. for (; BI != End; ++BI) { - // Just before the terminator instruction, insert instructions to - // feed PHI nodes in successor blocks. - if (isa<TerminatorInst>(BI)) - if (!HandlePHINodesInSuccessorBlocksFast(LLVMBB, FastIS)) { - ++NumFastIselFailures; - ResetDebugLoc(SDB, FastIS); - if (EnableFastISelVerbose || EnableFastISelAbort) { - dbgs() << "FastISel miss: "; - BI->dump(); - } - assert(!EnableFastISelAbort && - "FastISel didn't handle a PHI in a successor"); - break; - } - - SetDebugLoc(BI, SDB, FastIS, &MF); - // Try to select the instruction with FastISel. - if (FastIS->SelectInstruction(BI)) { - ResetDebugLoc(SDB, FastIS); + if (FastIS->SelectInstruction(BI)) continue; - } - - // Clear out the debug location so that it doesn't carry over to - // unrelated instructions. - ResetDebugLoc(SDB, FastIS); // Then handle certain instructions as single-LLVM-Instruction blocks. if (isa<CallInst>(BI)) { @@ -967,14 +764,14 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, BI->dump(); } - if (!BI->getType()->isVoidTy()) { + if (!BI->getType()->isVoidTy() && !BI->use_empty()) { unsigned &R = FuncInfo->ValueMap[BI]; if (!R) R = FuncInfo->CreateRegForValue(BI); } bool HadTailCall = false; - SelectBasicBlock(LLVMBB, BI, llvm::next(BI), HadTailCall); + BB = SelectBasicBlock(BB, LLVMBB, BI, llvm::next(BI), HadTailCall); // If the call was emitted as a tail call, we're done with the block. if (HadTailCall) { @@ -1010,44 +807,41 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, // block. if (BI != End) { bool HadTailCall; - SelectBasicBlock(LLVMBB, BI, End, HadTailCall); + BB = SelectBasicBlock(BB, LLVMBB, BI, End, HadTailCall); } - FinishBasicBlock(); + FinishBasicBlock(BB); + FuncInfo->PHINodesToUpdate.clear(); } delete FastIS; } void -SelectionDAGISel::FinishBasicBlock() { - - DEBUG(dbgs() << "Target-post-processed machine code:\n"); - DEBUG(BB->dump()); +SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) { DEBUG(dbgs() << "Total amount of phi nodes to update: " - << SDB->PHINodesToUpdate.size() << "\n"); - DEBUG(for (unsigned i = 0, e = SDB->PHINodesToUpdate.size(); i != e; ++i) + << FuncInfo->PHINodesToUpdate.size() << "\n"); + DEBUG(for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) dbgs() << "Node " << i << " : (" - << SDB->PHINodesToUpdate[i].first - << ", " << SDB->PHINodesToUpdate[i].second << ")\n"); + << FuncInfo->PHINodesToUpdate[i].first + << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n"); // Next, now that we know what the last MBB the LLVM BB expanded is, update // PHI nodes in successors. if (SDB->SwitchCases.empty() && SDB->JTCases.empty() && SDB->BitTestCases.empty()) { - for (unsigned i = 0, e = SDB->PHINodesToUpdate.size(); i != e; ++i) { - MachineInstr *PHI = SDB->PHINodesToUpdate[i].first; + for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) { + MachineInstr *PHI = FuncInfo->PHINodesToUpdate[i].first; assert(PHI->isPHI() && "This is not a machine PHI node that we are updating!"); if (!BB->isSuccessor(PHI->getParent())) continue; - PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[i].second, - false)); + PHI->addOperand( + MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[i].second, false)); PHI->addOperand(MachineOperand::CreateMBB(BB)); } - SDB->PHINodesToUpdate.clear(); return; } @@ -1056,37 +850,38 @@ SelectionDAGISel::FinishBasicBlock() { if (!SDB->BitTestCases[i].Emitted) { // Set the current basic block to the mbb we wish to insert the code into BB = SDB->BitTestCases[i].Parent; - SDB->setCurrentBasicBlock(BB); // Emit the code - SDB->visitBitTestHeader(SDB->BitTestCases[i]); + SDB->visitBitTestHeader(SDB->BitTestCases[i], BB); CurDAG->setRoot(SDB->getRoot()); - CodeGenAndEmitDAG(); SDB->clear(); + BB = CodeGenAndEmitDAG(BB); } for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) { // Set the current basic block to the mbb we wish to insert the code into BB = SDB->BitTestCases[i].Cases[j].ThisBB; - SDB->setCurrentBasicBlock(BB); // Emit the code if (j+1 != ej) SDB->visitBitTestCase(SDB->BitTestCases[i].Cases[j+1].ThisBB, SDB->BitTestCases[i].Reg, - SDB->BitTestCases[i].Cases[j]); + SDB->BitTestCases[i].Cases[j], + BB); else SDB->visitBitTestCase(SDB->BitTestCases[i].Default, SDB->BitTestCases[i].Reg, - SDB->BitTestCases[i].Cases[j]); + SDB->BitTestCases[i].Cases[j], + BB); CurDAG->setRoot(SDB->getRoot()); - CodeGenAndEmitDAG(); SDB->clear(); + BB = CodeGenAndEmitDAG(BB); } // Update PHI Nodes - for (unsigned pi = 0, pe = SDB->PHINodesToUpdate.size(); pi != pe; ++pi) { - MachineInstr *PHI = SDB->PHINodesToUpdate[pi].first; + for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size(); + pi != pe; ++pi) { + MachineInstr *PHI = FuncInfo->PHINodesToUpdate[pi].first; MachineBasicBlock *PHIBB = PHI->getParent(); assert(PHI->isPHI() && "This is not a machine PHI node that we are updating!"); @@ -1094,10 +889,12 @@ SelectionDAGISel::FinishBasicBlock() { // from last "case" BB. if (PHIBB == SDB->BitTestCases[i].Default) { PHI->addOperand(MachineOperand:: - CreateReg(SDB->PHINodesToUpdate[pi].second, false)); + CreateReg(FuncInfo->PHINodesToUpdate[pi].second, + false)); PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Parent)); PHI->addOperand(MachineOperand:: - CreateReg(SDB->PHINodesToUpdate[pi].second, false)); + CreateReg(FuncInfo->PHINodesToUpdate[pi].second, + false)); PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Cases. back().ThisBB)); } @@ -1107,7 +904,8 @@ SelectionDAGISel::FinishBasicBlock() { MachineBasicBlock* cBB = SDB->BitTestCases[i].Cases[j].ThisBB; if (cBB->isSuccessor(PHIBB)) { PHI->addOperand(MachineOperand:: - CreateReg(SDB->PHINodesToUpdate[pi].second, false)); + CreateReg(FuncInfo->PHINodesToUpdate[pi].second, + false)); PHI->addOperand(MachineOperand::CreateMBB(cBB)); } } @@ -1123,40 +921,42 @@ SelectionDAGISel::FinishBasicBlock() { if (!SDB->JTCases[i].first.Emitted) { // Set the current basic block to the mbb we wish to insert the code into BB = SDB->JTCases[i].first.HeaderBB; - SDB->setCurrentBasicBlock(BB); // Emit the code - SDB->visitJumpTableHeader(SDB->JTCases[i].second, SDB->JTCases[i].first); + SDB->visitJumpTableHeader(SDB->JTCases[i].second, SDB->JTCases[i].first, + BB); CurDAG->setRoot(SDB->getRoot()); - CodeGenAndEmitDAG(); SDB->clear(); + BB = CodeGenAndEmitDAG(BB); } // Set the current basic block to the mbb we wish to insert the code into BB = SDB->JTCases[i].second.MBB; - SDB->setCurrentBasicBlock(BB); // Emit the code SDB->visitJumpTable(SDB->JTCases[i].second); CurDAG->setRoot(SDB->getRoot()); - CodeGenAndEmitDAG(); SDB->clear(); + BB = CodeGenAndEmitDAG(BB); // Update PHI Nodes - for (unsigned pi = 0, pe = SDB->PHINodesToUpdate.size(); pi != pe; ++pi) { - MachineInstr *PHI = SDB->PHINodesToUpdate[pi].first; + for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size(); + pi != pe; ++pi) { + MachineInstr *PHI = FuncInfo->PHINodesToUpdate[pi].first; MachineBasicBlock *PHIBB = PHI->getParent(); assert(PHI->isPHI() && "This is not a machine PHI node that we are updating!"); // "default" BB. We can go there only from header BB. if (PHIBB == SDB->JTCases[i].second.Default) { PHI->addOperand - (MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second, false)); + (MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[pi].second, + false)); PHI->addOperand (MachineOperand::CreateMBB(SDB->JTCases[i].first.HeaderBB)); } // JT BB. Just iterate over successors here if (BB->isSuccessor(PHIBB)) { PHI->addOperand - (MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second, false)); + (MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[pi].second, + false)); PHI->addOperand(MachineOperand::CreateMBB(BB)); } } @@ -1165,13 +965,13 @@ SelectionDAGISel::FinishBasicBlock() { // If the switch block involved a branch to one of the actual successors, we // need to update PHI nodes in that block. - for (unsigned i = 0, e = SDB->PHINodesToUpdate.size(); i != e; ++i) { - MachineInstr *PHI = SDB->PHINodesToUpdate[i].first; + for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) { + MachineInstr *PHI = FuncInfo->PHINodesToUpdate[i].first; assert(PHI->isPHI() && "This is not a machine PHI node that we are updating!"); if (BB->isSuccessor(PHI->getParent())) { - PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[i].second, - false)); + PHI->addOperand( + MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[i].second, false)); PHI->addOperand(MachineOperand::CreateMBB(BB)); } } @@ -1181,26 +981,26 @@ SelectionDAGISel::FinishBasicBlock() { for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) { // Set the current basic block to the mbb we wish to insert the code into MachineBasicBlock *ThisBB = BB = SDB->SwitchCases[i].ThisBB; - SDB->setCurrentBasicBlock(BB); - // Emit the code - SDB->visitSwitchCase(SDB->SwitchCases[i]); + // Determine the unique successors. + SmallVector<MachineBasicBlock *, 2> Succs; + Succs.push_back(SDB->SwitchCases[i].TrueBB); + if (SDB->SwitchCases[i].TrueBB != SDB->SwitchCases[i].FalseBB) + Succs.push_back(SDB->SwitchCases[i].FalseBB); + + // Emit the code. Note that this could result in ThisBB being split, so + // we need to check for updates. + SDB->visitSwitchCase(SDB->SwitchCases[i], BB); CurDAG->setRoot(SDB->getRoot()); - CodeGenAndEmitDAG(); + SDB->clear(); + ThisBB = CodeGenAndEmitDAG(BB); // Handle any PHI nodes in successors of this chunk, as if we were coming // from the original BB before switch expansion. Note that PHI nodes can // occur multiple times in PHINodesToUpdate. We have to be very careful to // handle them the right number of times. - while ((BB = SDB->SwitchCases[i].TrueBB)) { // Handle LHS and RHS. - // If new BB's are created during scheduling, the edges may have been - // updated. That is, the edge from ThisBB to BB may have been split and - // BB's predecessor is now another block. - DenseMap<MachineBasicBlock*, MachineBasicBlock*>::iterator EI = - SDB->EdgeMapping.find(BB); - if (EI != SDB->EdgeMapping.end()) - ThisBB = EI->second; - + for (unsigned i = 0, e = Succs.size(); i != e; ++i) { + BB = Succs[i]; // BB may have been removed from the CFG if a branch was constant folded. if (ThisBB->isSuccessor(BB)) { for (MachineBasicBlock::iterator Phi = BB->begin(); @@ -1208,11 +1008,11 @@ SelectionDAGISel::FinishBasicBlock() { ++Phi) { // This value for this PHI node is recorded in PHINodesToUpdate. for (unsigned pn = 0; ; ++pn) { - assert(pn != SDB->PHINodesToUpdate.size() && + assert(pn != FuncInfo->PHINodesToUpdate.size() && "Didn't find PHI entry!"); - if (SDB->PHINodesToUpdate[pn].first == Phi) { + if (FuncInfo->PHINodesToUpdate[pn].first == Phi) { Phi->addOperand(MachineOperand:: - CreateReg(SDB->PHINodesToUpdate[pn].second, + CreateReg(FuncInfo->PHINodesToUpdate[pn].second, false)); Phi->addOperand(MachineOperand::CreateMBB(ThisBB)); break; @@ -1220,21 +1020,9 @@ SelectionDAGISel::FinishBasicBlock() { } } } - - // Don't process RHS if same block as LHS. - if (BB == SDB->SwitchCases[i].FalseBB) - SDB->SwitchCases[i].FalseBB = 0; - - // If we haven't handled the RHS, do so now. Otherwise, we're done. - SDB->SwitchCases[i].TrueBB = SDB->SwitchCases[i].FalseBB; - SDB->SwitchCases[i].FalseBB = 0; } - assert(SDB->SwitchCases[i].TrueBB == 0 && SDB->SwitchCases[i].FalseBB == 0); - SDB->clear(); } SDB->SwitchCases.clear(); - - SDB->PHINodesToUpdate.clear(); } @@ -1333,16 +1121,17 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) { std::vector<SDValue> InOps; std::swap(InOps, Ops); - Ops.push_back(InOps[0]); // input chain. - Ops.push_back(InOps[1]); // input asm string. + Ops.push_back(InOps[InlineAsm::Op_InputChain]); // 0 + Ops.push_back(InOps[InlineAsm::Op_AsmString]); // 1 + Ops.push_back(InOps[InlineAsm::Op_MDNode]); // 2, !srcloc - unsigned i = 2, e = InOps.size(); + unsigned i = InlineAsm::Op_FirstOperand, e = InOps.size(); if (InOps[e-1].getValueType() == MVT::Flag) --e; // Don't process a flag operand if it is here. while (i != e) { unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue(); - if ((Flags & 7) != 4 /*MEM*/) { + if (!InlineAsm::isMemKind(Flags)) { // Just skip over this operand, copying the operands verbatim. Ops.insert(Ops.end(), InOps.begin()+i, InOps.begin()+i+InlineAsm::getNumOperandRegisters(Flags) + 1); @@ -1352,14 +1141,14 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) { "Memory operand with multiple values?"); // Otherwise, this is a memory operand. Ask the target to select it. std::vector<SDValue> SelOps; - if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps)) { - llvm_report_error("Could not match memory address. Inline asm" - " failure!"); - } + if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps)) + report_fatal_error("Could not match memory address. Inline asm" + " failure!"); // Add this to the output node. - Ops.push_back(CurDAG->getTargetConstant(4/*MEM*/ | (SelOps.size()<< 3), - MVT::i32)); + unsigned NewFlags = + InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size()); + Ops.push_back(CurDAG->getTargetConstant(NewFlags, MVT::i32)); Ops.insert(Ops.end(), SelOps.begin(), SelOps.end()); i += 2; } @@ -1436,7 +1225,8 @@ bool SelectionDAGISel::IsProfitableToFold(SDValue N, SDNode *U, /// IsLegalToFold - Returns true if the specific operand node N of /// U can be folded during instruction selection that starts at Root. bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, - bool IgnoreChains) const { + CodeGenOpt::Level OptLevel, + bool IgnoreChains) { if (OptLevel == CodeGenOpt::None) return false; // If Root use can somehow reach N through a path that that doesn't contain @@ -2011,6 +1801,7 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table, } } +namespace { struct MatchScope { /// FailIndex - If this match fails, this is the index to continue with. @@ -2032,6 +1823,8 @@ struct MatchScope { bool HasChainNodesMatched, HasFlagResultNodesMatched; }; +} + SDNode *SelectionDAGISel:: SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, unsigned TableSize) { @@ -2045,6 +1838,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, //case ISD::VALUETYPE: //case ISD::CONDCODE: case ISD::HANDLENODE: + case ISD::MDNODE_SDNODE: case ISD::TargetConstant: case ISD::TargetConstantFP: case ISD::TargetConstantPool: @@ -2383,7 +2177,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (!IsProfitableToFold(N, NodeStack[NodeStack.size()-2].getNode(), NodeToMatch) || !IsLegalToFold(N, NodeStack[NodeStack.size()-2].getNode(), - NodeToMatch, true/*We validate our own chains*/)) + NodeToMatch, OptLevel, + true/*We validate our own chains*/)) break; continue; @@ -2776,7 +2571,7 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) { else Msg << "unknown intrinsic #" << iid; } - llvm_report_error(Msg.str()); + report_fatal_error(Msg.str()); } char SelectionDAGISel::ID = 0; diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index ea2ff2f..8a4a1b1 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -480,7 +480,8 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) { } /// NOTE: The constructor takes ownership of TLOF. -TargetLowering::TargetLowering(TargetMachine &tm,TargetLoweringObjectFile *tlof) +TargetLowering::TargetLowering(const TargetMachine &tm, + const TargetLoweringObjectFile *tlof) : TM(tm), TD(TM.getTargetData()), TLOF(*tlof) { // All operations default to being supported. memset(OpActions, 0, sizeof(OpActions)); @@ -720,7 +721,7 @@ void TargetLowering::computeRegisterProperties() { unsigned NElts = VT.getVectorNumElements(); for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { EVT SVT = (MVT::SimpleValueType)nVT; - if (isTypeLegal(SVT) && SVT.getVectorElementType() == EltVT && + if (isTypeSynthesizable(SVT) && SVT.getVectorElementType() == EltVT && SVT.getVectorNumElements() > NElts && NElts != 1) { TransformToType[i] = SVT; ValueTypeActions.setTypeAction(VT, Promote); @@ -1279,8 +1280,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // variable. The low bit of the shift cannot be an input sign bit unless // the shift amount is >= the size of the datatype, which is undefined. if (DemandedMask == 1) - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(), - Op.getOperand(0), Op.getOperand(1))); + return TLO.CombineTo(Op, + TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(), + Op.getOperand(0), Op.getOperand(1))); if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { EVT VT = Op.getValueType(); @@ -1465,23 +1467,29 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, case ISD::SRL: // Shrink SRL by a constant if none of the high bits shifted in are // demanded. - if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1))){ - APInt HighBits = APInt::getHighBitsSet(OperandBitWidth, - OperandBitWidth - BitWidth); - HighBits = HighBits.lshr(ShAmt->getZExtValue()); - HighBits.trunc(BitWidth); - - if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) { - // None of the shifted in bits are needed. Add a truncate of the - // shift input, then shift it. - SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, - Op.getValueType(), - In.getOperand(0)); - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, - Op.getValueType(), - NewTrunc, - In.getOperand(1))); - } + if (TLO.LegalTypes() && + !isTypeDesirableForOp(ISD::SRL, Op.getValueType())) + // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is + // undesirable. + break; + ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1)); + if (!ShAmt) + break; + APInt HighBits = APInt::getHighBitsSet(OperandBitWidth, + OperandBitWidth - BitWidth); + HighBits = HighBits.lshr(ShAmt->getZExtValue()); + HighBits.trunc(BitWidth); + + if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) { + // None of the shifted in bits are needed. Add a truncate of the + // shift input, then shift it. + SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, + Op.getValueType(), + In.getOperand(0)); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, + Op.getValueType(), + NewTrunc, + In.getOperand(1))); } break; } @@ -1874,10 +1882,15 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, isa<ConstantSDNode>(Op0.getOperand(1)) && cast<ConstantSDNode>(Op0.getOperand(1))->getAPIntValue() == 1) { // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0. - if (Op0.getValueType() != VT) + if (Op0.getValueType().bitsGT(VT)) Op0 = DAG.getNode(ISD::AND, dl, VT, DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)), DAG.getConstant(1, VT)); + else if (Op0.getValueType().bitsLT(VT)) + Op0 = DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)), + DAG.getConstant(1, VT)); + return DAG.getSetCC(dl, VT, Op0, DAG.getConstant(0, Op0.getValueType()), Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); @@ -2245,7 +2258,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, /// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the /// node is a GlobalAddress + offset. -bool TargetLowering::isGAPlusOffset(SDNode *N, GlobalValue* &GA, +bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const { if (isa<GlobalAddressSDNode>(N)) { GlobalAddressSDNode *GASD = cast<GlobalAddressSDNode>(N); diff --git a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp new file mode 100644 index 0000000..d20477f --- /dev/null +++ b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp @@ -0,0 +1,21 @@ +//===-- TargetSelectionDAGInfo.cpp - SelectionDAG Info --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the TargetSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetSelectionDAGInfo.h" +using namespace llvm; + +TargetSelectionDAGInfo::TargetSelectionDAGInfo() { +} + +TargetSelectionDAGInfo::~TargetSelectionDAGInfo() { +} diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp index 0e6d479..5240bef 100644 --- a/lib/CodeGen/ShadowStackGC.cpp +++ b/lib/CodeGen/ShadowStackGC.cpp @@ -160,7 +160,7 @@ namespace { Args.clear(); Args.append(CI->op_begin() + 1, CI->op_end()); - InvokeInst *II = InvokeInst::Create(CI->getOperand(0), + InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), NewBB, CleanupBB, Args.begin(), Args.end(), CI->getName(), CallBB); diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp index 15ca374..1f68a6f 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -179,7 +179,7 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, for (const unsigned* SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) if (li_->hasInterval(*SR) && IntA.overlaps(li_->getInterval(*SR))) { DEBUG({ - dbgs() << "Interfere with sub-register "; + dbgs() << "\t\tInterfere with sub-register "; li_->getInterval(*SR).print(dbgs(), tri_); }); return false; @@ -187,7 +187,7 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, } DEBUG({ - dbgs() << "\nExtending: "; + dbgs() << "Extending: "; IntB.print(dbgs(), tri_); }); @@ -236,7 +236,7 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, // If the copy instruction was killing the destination register before the // merge, find the last use and trim the live range. That will also add the // isKill marker. - if (CopyMI->killsRegister(IntA.reg)) + if (ALR->valno->isKill(CopyIdx)) TrimLiveIntervalToLastUse(CopyUseIdx, CopyMI->getParent(), IntA, ALR); ++numExtends; @@ -259,6 +259,9 @@ bool SimpleRegisterCoalescing::HasOtherReachingDefs(LiveInterval &IntA, for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) { if (BI->valno == BValNo) continue; + // When BValNo is null, we're looking for a dummy clobber-value for a subreg. + if (!BValNo && !BI->valno->isDefAccurate() && !BI->valno->getCopy()) + continue; if (BI->start <= AI->start && BI->end > AI->start) return true; if (BI->start > AI->start && BI->start < AI->end) @@ -369,6 +372,17 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, if (HasOtherReachingDefs(IntA, IntB, AValNo, BValNo)) return false; + bool BHasSubRegs = false; + if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) + BHasSubRegs = *tri_->getSubRegisters(IntB.reg); + + // Abort if the subregisters of IntB.reg have values that are not simply the + // clobbers from the superreg. + if (BHasSubRegs) + for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) + if (HasOtherReachingDefs(IntA, li_->getInterval(*SR), AValNo, 0)) + return false; + // If some of the uses of IntA.reg is already coalesced away, return false. // It's not possible to determine whether it's safe to perform the coalescing. for (MachineRegisterInfo::use_nodbg_iterator UI = @@ -417,9 +431,6 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, BExtend[ALR->end] = BLR->end; // Update uses of IntA of the specific Val# with IntB. - bool BHasSubRegs = false; - if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) - BHasSubRegs = *tri_->getSubRegisters(IntB.reg); for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg), UE = mri_->use_end(); UI != UE;) { MachineOperand &UseMO = UI.getOperand(); @@ -470,7 +481,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, // We need to insert a new liverange: [ALR.start, LastUse). It may be we can // simply extend BLR if CopyMI doesn't end the range. DEBUG({ - dbgs() << "\nExtending: "; + dbgs() << "Extending: "; IntB.print(dbgs(), tri_); }); @@ -523,7 +534,6 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, DEBUG({ dbgs() << " result = "; IntB.print(dbgs(), tri_); - dbgs() << '\n'; dbgs() << "\nShortening: "; IntA.print(dbgs(), tri_); }); @@ -709,7 +719,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, // kill. bool checkForDeadDef = false; MachineBasicBlock *MBB = CopyMI->getParent(); - if (CopyMI->killsRegister(SrcInt.reg)) + if (SrcLR->valno->isKill(DefIdx)) if (!TrimLiveIntervalToLastUse(CopyIdx, MBB, SrcInt, SrcLR)) { checkForDeadDef = true; } @@ -804,7 +814,8 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, CopySrcReg == SrcReg && CopyDstReg != UseDstReg) { // If the use is a copy and it won't be coalesced away, and its source // is defined by a trivial computation, try to rematerialize it instead. - if (ReMaterializeTrivialDef(li_->getInterval(SrcReg), CopyDstReg, + if (!JoinedCopies.count(UseMI) && + ReMaterializeTrivialDef(li_->getInterval(SrcReg), CopyDstReg, CopyDstSubIdx, UseMI)) continue; } @@ -824,6 +835,8 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, UseMI->isRegTiedToDefOperand(&O-&UseMI->getOperand(0)))) UseMI->addRegisterKilled(DstReg, tri_, true); } + DEBUG(dbgs() << "\t\tupdated: " << li_->getInstructionIndex(UseMI) + << "\t" << *UseMI); continue; } @@ -836,11 +849,11 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, assert(OldSubIdx < SubIdx && "Conflicting sub-register index!"); else if (SubIdx) O.setSubReg(SubIdx); - // Remove would-be duplicated kill marker. - if (O.isKill() && UseMI->killsRegister(DstReg)) - O.setIsKill(false); O.setReg(DstReg); + DEBUG(dbgs() << "\t\tupdated: " << li_->getInstructionIndex(UseMI) + << "\t" << *UseMI); + // After updating the operand, check if the machine instruction has // become a copy. If so, update its val# information. if (JoinedCopies.count(UseMI)) @@ -865,38 +878,6 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, } } -/// RemoveUnnecessaryKills - Remove kill markers that are no longer accurate -/// due to live range lengthening as the result of coalescing. -void SimpleRegisterCoalescing::RemoveUnnecessaryKills(unsigned Reg, - LiveInterval &LI) { - for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(Reg), - UE = mri_->use_end(); UI != UE; ++UI) { - MachineOperand &UseMO = UI.getOperand(); - if (!UseMO.isKill()) - continue; - MachineInstr *UseMI = UseMO.getParent(); - SlotIndex UseIdx = - li_->getInstructionIndex(UseMI).getUseIndex(); - const LiveRange *LR = LI.getLiveRangeContaining(UseIdx); - if (!LR || - (!LR->valno->isKill(UseIdx.getDefIndex()) && - LR->valno->def != UseIdx.getDefIndex())) { - // Interesting problem. After coalescing reg1027's def and kill are both - // at the same point: %reg1027,0.000000e+00 = [56,814:0) 0@70-(814) - // - // bb5: - // 60 %reg1027<def> = t2MOVr %reg1027, 14, %reg0, %reg0 - // 68 %reg1027<def> = t2LDRi12 %reg1027<kill>, 8, 14, %reg0 - // 76 t2CMPzri %reg1038<kill,undef>, 0, 14, %reg0, %CPSR<imp-def> - // 84 %reg1027<def> = t2MOVr %reg1027, 14, %reg0, %reg0 - // 96 t2Bcc mbb<bb5,0x2030910>, 1, %CPSR<kill> - // - // Do not remove the kill marker on t2LDRi12. - UseMO.setIsKill(false); - } - } -} - /// removeIntervalIfEmpty - Check if the live interval of a physical register /// is empty, if so remove it and also remove the empty intervals of its /// sub-registers. Return true if live interval is removed. @@ -1064,9 +1045,8 @@ SimpleRegisterCoalescing::isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI, unsigned Threshold = allocatableRCRegs_[RC].count() * 2; unsigned Length = li_->getApproximateInstructionCount(DstInt); if (Length > Threshold && - (((float)std::distance(mri_->use_nodbg_begin(DstInt.reg), - mri_->use_nodbg_end()) / Length) < - (1.0 / Threshold))) + std::distance(mri_->use_nodbg_begin(DstInt.reg), + mri_->use_nodbg_end()) * Threshold < Length) return false; // If the virtual register live interval extends into a loop, turn down @@ -1122,9 +1102,8 @@ SimpleRegisterCoalescing::isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI, unsigned Threshold = allocatableRCRegs_[RC].count() * 2; unsigned Length = li_->getApproximateInstructionCount(SrcInt); if (Length > Threshold && - (((float)std::distance(mri_->use_nodbg_begin(SrcInt.reg), - mri_->use_nodbg_end()) / Length) < - (1.0 / Threshold))) + std::distance(mri_->use_nodbg_begin(SrcInt.reg), + mri_->use_nodbg_end()) * Threshold < Length) return false; if (SrcInt.empty()) @@ -1168,20 +1147,42 @@ SimpleRegisterCoalescing::isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI, /// isWinToJoinCrossClass - Return true if it's profitable to coalesce /// two virtual registers from different register classes. bool -SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned LargeReg, - unsigned SmallReg, - unsigned Threshold) { - // Then make sure the intervals are *short*. - LiveInterval &LargeInt = li_->getInterval(LargeReg); - LiveInterval &SmallInt = li_->getInterval(SmallReg); - unsigned LargeSize = li_->getApproximateInstructionCount(LargeInt); - unsigned SmallSize = li_->getApproximateInstructionCount(SmallInt); - if (LargeSize > Threshold) { - unsigned SmallUses = std::distance(mri_->use_nodbg_begin(SmallReg), - mri_->use_nodbg_end()); - unsigned LargeUses = std::distance(mri_->use_nodbg_begin(LargeReg), - mri_->use_nodbg_end()); - if (SmallUses*LargeSize < LargeUses*SmallSize) +SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned SrcReg, + unsigned DstReg, + const TargetRegisterClass *SrcRC, + const TargetRegisterClass *DstRC, + const TargetRegisterClass *NewRC) { + unsigned NewRCCount = allocatableRCRegs_[NewRC].count(); + // This heuristics is good enough in practice, but it's obviously not *right*. + // 4 is a magic number that works well enough for x86, ARM, etc. It filter + // out all but the most restrictive register classes. + if (NewRCCount > 4 || + // Early exit if the function is fairly small, coalesce aggressively if + // that's the case. For really special register classes with 3 or + // fewer registers, be a bit more careful. + (li_->getFuncInstructionCount() / NewRCCount) < 8) + return true; + LiveInterval &SrcInt = li_->getInterval(SrcReg); + LiveInterval &DstInt = li_->getInterval(DstReg); + unsigned SrcSize = li_->getApproximateInstructionCount(SrcInt); + unsigned DstSize = li_->getApproximateInstructionCount(DstInt); + if (SrcSize <= NewRCCount && DstSize <= NewRCCount) + return true; + // Estimate *register use density*. If it doubles or more, abort. + unsigned SrcUses = std::distance(mri_->use_nodbg_begin(SrcReg), + mri_->use_nodbg_end()); + unsigned DstUses = std::distance(mri_->use_nodbg_begin(DstReg), + mri_->use_nodbg_end()); + unsigned NewUses = SrcUses + DstUses; + unsigned NewSize = SrcSize + DstSize; + if (SrcRC != NewRC && SrcSize > NewRCCount) { + unsigned SrcRCCount = allocatableRCRegs_[SrcRC].count(); + if (NewUses*SrcSize*SrcRCCount > 2*SrcUses*NewSize*NewRCCount) + return false; + } + if (DstRC != NewRC && DstSize > NewRCCount) { + unsigned DstRCCount = allocatableRCRegs_[DstRC].count(); + if (NewUses*DstSize*DstRCCount > 2*DstUses*NewSize*NewRCCount) return false; } return true; @@ -1263,7 +1264,7 @@ SimpleRegisterCoalescing::CanJoinExtractSubRegToPhysReg(unsigned DstReg, if (li_->hasInterval(RealDstReg) && RHS.overlaps(li_->getInterval(RealDstReg))) { DEBUG({ - dbgs() << "Interfere with register "; + dbgs() << "\t\tInterfere with register "; li_->getInterval(RealDstReg).print(dbgs(), tri_); }); return false; // Not coalescable @@ -1275,7 +1276,7 @@ SimpleRegisterCoalescing::CanJoinExtractSubRegToPhysReg(unsigned DstReg, !tri_->isSubRegister(DstReg, *SR) && li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) { DEBUG({ - dbgs() << "Interfere with sub-register "; + dbgs() << "\t\tInterfere with sub-register "; li_->getInterval(*SR).print(dbgs(), tri_); }); return false; // Not coalescable @@ -1298,7 +1299,7 @@ SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg, if (li_->hasInterval(RealSrcReg) && LHS.overlaps(li_->getInterval(RealSrcReg))) { DEBUG({ - dbgs() << "Interfere with register "; + dbgs() << "\t\tInterfere with register "; li_->getInterval(RealSrcReg).print(dbgs(), tri_); }); return false; // Not coalescable @@ -1310,7 +1311,7 @@ SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg, !tri_->isSubRegister(SrcReg, *SR) && li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) { DEBUG({ - dbgs() << "Interfere with sub-register "; + dbgs() << "\t\tInterfere with sub-register "; li_->getInterval(*SR).print(dbgs(), tri_); }); return false; // Not coalescable @@ -1400,6 +1401,13 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { return false; // Not coalescable. } + // We cannot handle dual subreg indices and mismatched classes at the same + // time. + if (SrcSubIdx && DstSubIdx && differingRegisterClasses(SrcReg, DstReg)) { + DEBUG(dbgs() << "\tCannot handle subreg indices and mismatched classes.\n"); + return false; + } + // Check that a physical source register is compatible with dst regclass if (SrcIsPhys) { unsigned SrcSubReg = SrcSubIdx ? @@ -1517,10 +1525,11 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { return false; // Not coalescable } - unsigned LargeReg = isExtSubReg ? SrcReg : DstReg; - unsigned SmallReg = isExtSubReg ? DstReg : SrcReg; - unsigned Limit= allocatableRCRegs_[mri_->getRegClass(SmallReg)].count(); - if (!isWinToJoinCrossClass(LargeReg, SmallReg, Limit)) { + if (!isWinToJoinCrossClass(SrcReg, DstReg, SrcRC, DstRC, NewRC)) { + DEBUG(dbgs() << "\tAvoid coalescing to constrained register class: " + << SrcRC->getName() << "/" + << DstRC->getName() << " -> " + << NewRC->getName() << ".\n"); Again = true; // May be possible to coalesce later. return false; } @@ -1568,49 +1577,40 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { } } - unsigned LargeReg = SrcReg; - unsigned SmallReg = DstReg; - // Now determine the register class of the joined register. - if (isExtSubReg) { - if (SubIdx && DstRC && DstRC->isASubClass()) { - // This is a move to a sub-register class. However, the source is a - // sub-register of a larger register class. We don't know what should - // the register class be. FIXME. - Again = true; - return false; + if (!SrcIsPhys && !DstIsPhys) { + if (isExtSubReg) { + NewRC = + SubIdx ? tri_->getMatchingSuperRegClass(SrcRC, DstRC, SubIdx) : SrcRC; + } else if (isInsSubReg) { + NewRC = + SubIdx ? tri_->getMatchingSuperRegClass(DstRC, SrcRC, SubIdx) : DstRC; + } else { + NewRC = getCommonSubClass(SrcRC, DstRC); } - if (!DstIsPhys && !SrcIsPhys) - NewRC = SrcRC; - } else if (!SrcIsPhys && !DstIsPhys) { - NewRC = getCommonSubClass(SrcRC, DstRC); + if (!NewRC) { DEBUG(dbgs() << "\tDisjoint regclasses: " << SrcRC->getName() << ", " << DstRC->getName() << ".\n"); return false; // Not coalescable. } - if (DstRC->getSize() > SrcRC->getSize()) - std::swap(LargeReg, SmallReg); - } - // If we are joining two virtual registers and the resulting register - // class is more restrictive (fewer register, smaller size). Check if it's - // worth doing the merge. - if (!SrcIsPhys && !DstIsPhys && - (isExtSubReg || DstRC->isASubClass()) && - !isWinToJoinCrossClass(LargeReg, SmallReg, - allocatableRCRegs_[NewRC].count())) { - DEBUG(dbgs() << "\tSrc/Dest are different register classes: " - << SrcRC->getName() << "/" - << DstRC->getName() << " -> " - << NewRC->getName() << ".\n"); - // Allow the coalescer to try again in case either side gets coalesced to - // a physical register that's compatible with the other side. e.g. - // r1024 = MOV32to32_ r1025 - // But later r1024 is assigned EAX then r1025 may be coalesced with EAX. - Again = true; // May be possible to coalesce later. - return false; + // If we are joining two virtual registers and the resulting register + // class is more restrictive (fewer register, smaller size). Check if it's + // worth doing the merge. + if (!isWinToJoinCrossClass(SrcReg, DstReg, SrcRC, DstRC, NewRC)) { + DEBUG(dbgs() << "\tAvoid coalescing to constrained register class: " + << SrcRC->getName() << "/" + << DstRC->getName() << " -> " + << NewRC->getName() << ".\n"); + // Allow the coalescer to try again in case either side gets coalesced to + // a physical register that's compatible with the other side. e.g. + // r1024 = MOV32to32_ r1025 + // But later r1024 is assigned EAX then r1025 may be coalesced with EAX. + Again = true; // May be possible to coalesce later. + return false; + } } } @@ -1626,9 +1626,13 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { "Register mapping is horribly broken!"); DEBUG({ - dbgs() << "\t\tInspecting "; SrcInt.print(dbgs(), tri_); - dbgs() << " and "; DstInt.print(dbgs(), tri_); - dbgs() << ": "; + dbgs() << "\t\tInspecting "; + if (SrcRC) dbgs() << SrcRC->getName() << ": "; + SrcInt.print(dbgs(), tri_); + dbgs() << "\n\t\t and "; + if (DstRC) dbgs() << DstRC->getName() << ": "; + DstInt.print(dbgs(), tri_); + dbgs() << "\n"; }); // Save a copy of the virtual register live interval. We'll manually @@ -1672,10 +1676,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { const TargetRegisterClass *RC = mri_->getRegClass(JoinVReg); unsigned Threshold = allocatableRCRegs_[RC].count() * 2; unsigned Length = li_->getApproximateInstructionCount(JoinVInt); - float Ratio = 1.0 / Threshold; if (Length > Threshold && - (((float)std::distance(mri_->use_nodbg_begin(JoinVReg), - mri_->use_nodbg_end()) / Length) < Ratio)) { + std::distance(mri_->use_nodbg_begin(JoinVReg), + mri_->use_nodbg_end()) * Threshold < Length) { // Before giving up coalescing, if definition of source is defined by // trivial computation, try rematerializing it. if (ReMaterializeTrivialDef(SrcInt, DstReg, DstSubIdx, CopyMI)) @@ -1701,7 +1704,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // Only coalesce an empty interval (defined by implicit_def) with // another interval which has a valno defined by the CopyMI and the CopyMI // is a kill of the implicit def. - DEBUG(dbgs() << "Not profitable!\n"); + DEBUG(dbgs() << "\tNot profitable!\n"); return false; } } else if (!JoinIntervals(DstInt, SrcInt, Swapped)) { @@ -1718,12 +1721,12 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { (AdjustCopiesBackFrom(SrcInt, DstInt, CopyMI) || RemoveCopyByCommutingDef(SrcInt, DstInt, CopyMI))) { JoinedCopies.insert(CopyMI); - DEBUG(dbgs() << "Trivial!\n"); + DEBUG(dbgs() << "\tTrivial!\n"); return true; } // Otherwise, we are unable to join the intervals. - DEBUG(dbgs() << "Interference!\n"); + DEBUG(dbgs() << "\tInterference!\n"); Again = true; // May be possible to coalesce later. return false; } @@ -1794,12 +1797,6 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // Remember to delete the copy instruction. JoinedCopies.insert(CopyMI); - // Some live range has been lengthened due to colaescing, eliminate the - // unnecessary kills. - RemoveUnnecessaryKills(SrcReg, *ResDstInt); - if (TargetRegisterInfo::isVirtualRegister(DstReg)) - RemoveUnnecessaryKills(DstReg, *ResDstInt); - UpdateRegDefsUses(SrcReg, DstReg, SubIdx); // If we have extended the live range of a physical register, make sure we @@ -1843,7 +1840,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { } DEBUG({ - dbgs() << "\n\t\tJoined. Result = "; + dbgs() << "\t\tJoined. Result = "; ResDstInt->print(dbgs(), tri_); dbgs() << "\n"; }); @@ -2198,7 +2195,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, for (const unsigned* SR = tri_->getSubRegisters(LHS.reg); *SR; ++SR) if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) { DEBUG({ - dbgs() << "Interfere with sub-register "; + dbgs() << "\tInterfere with sub-register "; li_->getInterval(*SR).print(dbgs(), tri_); }); return false; @@ -2215,7 +2212,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, for (const unsigned* SR = tri_->getSubRegisters(RHS.reg); *SR; ++SR) if (li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) { DEBUG({ - dbgs() << "Interfere with sub-register "; + dbgs() << "\tInterfere with sub-register "; li_->getInterval(*SR).print(dbgs(), tri_); }); return false; @@ -2673,13 +2670,6 @@ SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start, return NULL; } -void SimpleRegisterCoalescing::printRegName(unsigned reg) const { - if (TargetRegisterInfo::isPhysicalRegister(reg)) - dbgs() << tri_->getName(reg); - else - dbgs() << "%reg" << reg; -} - void SimpleRegisterCoalescing::releaseMemory() { JoinedCopies.clear(); ReMatCopies.clear(); @@ -2744,7 +2734,7 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { // delete them later. DoDelete = false; } - if (MI->registerDefIsDead(DstReg)) { + if (MI->allDefsAreDead()) { LiveInterval &li = li_->getInterval(DstReg); if (!ShortenDeadCopySrcLiveRange(li, MI)) ShortenDeadCopyLiveRange(li, MI); @@ -2808,8 +2798,25 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { li_->RemoveMachineInstrFromMaps(MI); mii = mbbi->erase(mii); ++numPeep; - } else { - ++mii; + continue; + } + + ++mii; + + // Check for now unnecessary kill flags. + if (li_->isNotInMIMap(MI)) continue; + SlotIndex UseIdx = li_->getInstructionIndex(MI).getUseIndex(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isKill()) continue; + unsigned reg = MO.getReg(); + if (!reg || !li_->hasInterval(reg)) continue; + LiveInterval &LI = li_->getInterval(reg); + const LiveRange *LR = LI.getLiveRangeContaining(UseIdx); + if (!LR || + (!LR->valno->isKill(UseIdx.getDefIndex()) && + LR->valno->def != UseIdx.getDefIndex())) + MO.setIsKill(false); } } } diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h index f668064..1be04f3 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.h +++ b/lib/CodeGen/SimpleRegisterCoalescing.h @@ -179,8 +179,11 @@ namespace llvm { /// isWinToJoinCrossClass - Return true if it's profitable to coalesce /// two virtual registers from different register classes. - bool isWinToJoinCrossClass(unsigned LargeReg, unsigned SmallReg, - unsigned Threshold); + bool isWinToJoinCrossClass(unsigned SrcReg, + unsigned DstReg, + const TargetRegisterClass *SrcRC, + const TargetRegisterClass *DstRC, + const TargetRegisterClass *NewRC); /// HasIncompatibleSubRegDefUse - If we are trying to coalesce a virtual /// register with a physical register, check if any of the virtual register @@ -220,10 +223,6 @@ namespace llvm { /// subregister. void UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx); - /// RemoveUnnecessaryKills - Remove kill markers that are no longer accurate - /// due to live range lengthening as the result of coalescing. - void RemoveUnnecessaryKills(unsigned Reg, LiveInterval &LI); - /// ShortenDeadCopyLiveRange - Shorten a live range defined by a dead copy. /// Return true if live interval is removed. bool ShortenDeadCopyLiveRange(LiveInterval &li, MachineInstr *CopyMI); @@ -243,8 +242,6 @@ namespace llvm { /// cycles Start and End or NULL if there are no uses. MachineOperand *lastRegisterUse(SlotIndex Start, SlotIndex End, unsigned Reg, SlotIndex &LastUseIdx) const; - - void printRegName(unsigned reg) const; }; } // End llvm namespace diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp index 7ba4403..63c5554 100644 --- a/lib/CodeGen/Spiller.cpp +++ b/lib/CodeGen/Spiller.cpp @@ -46,7 +46,6 @@ namespace { /// Utility class for spillers. class SpillerBase : public Spiller { protected: - MachineFunction *mf; LiveIntervals *lis; MachineFrameInfo *mfi; @@ -160,9 +159,11 @@ protected: return added; } - }; +} // end anonymous namespace + +namespace { /// Spills any live range using the spill-everywhere method with no attempt at /// folding. @@ -178,9 +179,12 @@ public: // Ignore spillIs - we don't use it. return trivialSpillEverywhere(li); } - }; +} // end anonymous namespace + +namespace { + /// Falls back on LiveIntervals::addIntervalsForSpills. class StandardSpiller : public Spiller { protected: @@ -198,9 +202,12 @@ public: SlotIndex*) { return lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm); } - }; +} // end anonymous namespace + +namespace { + /// When a call to spill is placed this spiller will first try to break the /// interval up into its component values (one new interval per value). /// If this fails, or if a call is placed to spill a previously split interval @@ -513,15 +520,16 @@ private: }; -} +} // end anonymous namespace + llvm::Spiller* llvm::createSpiller(MachineFunction *mf, LiveIntervals *lis, const MachineLoopInfo *loopInfo, VirtRegMap *vrm) { switch (spillerOpt) { - case trivial: return new TrivialSpiller(mf, lis, vrm); break; - case standard: return new StandardSpiller(lis, loopInfo, vrm); break; - case splitting: return new SplittingSpiller(mf, lis, loopInfo, vrm); break; - default: llvm_unreachable("Unreachable!"); break; + default: assert(0 && "unknown spiller"); + case trivial: return new TrivialSpiller(mf, lis, vrm); + case standard: return new StandardSpiller(lis, loopInfo, vrm); + case splitting: return new SplittingSpiller(mf, lis, loopInfo, vrm); } } diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index 12d38f0..42dfd7f 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -182,7 +182,8 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) { if (!LS->hasInterval(FI)) continue; LiveInterval &li = LS->getInterval(FI); - li.weight += LiveIntervals::getSpillWeight(false, true, loopDepth); + if (!MI->isDebugValue()) + li.weight += LiveIntervals::getSpillWeight(false, true, loopDepth); SSRefs[FI].push_back(MI); } } diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index e9e998f..0ad6619 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -40,7 +40,7 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, std::string msg; raw_string_ostream Msg(msg); Msg << "Don't know how to commute: " << *MI; - llvm_report_error(Msg.str()); + report_fatal_error(Msg.str()); } assert(MI->getOperand(Idx1).isReg() && MI->getOperand(Idx2).isReg() && diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index d6bdb10..9f95993 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -38,110 +38,88 @@ using namespace dwarf; //===----------------------------------------------------------------------===// // ELF //===----------------------------------------------------------------------===// -typedef StringMap<const MCSectionELF*> ELFUniqueMapTy; - -TargetLoweringObjectFileELF::~TargetLoweringObjectFileELF() { - // If we have the section uniquing map, free it. - delete (ELFUniqueMapTy*)UniquingMap; -} - -const MCSection *TargetLoweringObjectFileELF:: -getELFSection(StringRef Section, unsigned Type, unsigned Flags, - SectionKind Kind, bool IsExplicit) const { - if (UniquingMap == 0) - UniquingMap = new ELFUniqueMapTy(); - ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)UniquingMap; - - // Do the lookup, if we have a hit, return it. - StringMapEntry<const MCSectionELF*> &Entry = Map.GetOrCreateValue(Section); - if (Entry.getValue()) return Entry.getValue(); - - MCSectionELF *Result = MCSectionELF::Create(Entry.getKey(), Type, Flags, Kind, - IsExplicit, getContext()); - Entry.setValue(Result); - return Result; -} void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, const TargetMachine &TM) { - if (UniquingMap != 0) - ((ELFUniqueMapTy*)UniquingMap)->clear(); TargetLoweringObjectFile::Initialize(Ctx, TM); BSSSection = - getELFSection(".bss", MCSectionELF::SHT_NOBITS, - MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, - SectionKind::getBSS()); + getContext().getELFSection(".bss", MCSectionELF::SHT_NOBITS, + MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC, + SectionKind::getBSS()); TextSection = - getELFSection(".text", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_EXECINSTR | MCSectionELF::SHF_ALLOC, - SectionKind::getText()); + getContext().getELFSection(".text", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_EXECINSTR | + MCSectionELF::SHF_ALLOC, + SectionKind::getText()); DataSection = - getELFSection(".data", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, - SectionKind::getDataRel()); + getContext().getELFSection(".data", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC, + SectionKind::getDataRel()); ReadOnlySection = - getELFSection(".rodata", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC, - SectionKind::getReadOnly()); + getContext().getELFSection(".rodata", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC, + SectionKind::getReadOnly()); TLSDataSection = - getELFSection(".tdata", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS | - MCSectionELF::SHF_WRITE, SectionKind::getThreadData()); + getContext().getELFSection(".tdata", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS | + MCSectionELF::SHF_WRITE, + SectionKind::getThreadData()); TLSBSSSection = - getELFSection(".tbss", MCSectionELF::SHT_NOBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS | - MCSectionELF::SHF_WRITE, SectionKind::getThreadBSS()); + getContext().getELFSection(".tbss", MCSectionELF::SHT_NOBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS | + MCSectionELF::SHF_WRITE, + SectionKind::getThreadBSS()); DataRelSection = - getELFSection(".data.rel", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, - SectionKind::getDataRel()); + getContext().getELFSection(".data.rel", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE, + SectionKind::getDataRel()); DataRelLocalSection = - getELFSection(".data.rel.local", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, - SectionKind::getDataRelLocal()); + getContext().getELFSection(".data.rel.local", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE, + SectionKind::getDataRelLocal()); DataRelROSection = - getELFSection(".data.rel.ro", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, - SectionKind::getReadOnlyWithRel()); + getContext().getELFSection(".data.rel.ro", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE, + SectionKind::getReadOnlyWithRel()); DataRelROLocalSection = - getELFSection(".data.rel.ro.local", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, - SectionKind::getReadOnlyWithRelLocal()); + getContext().getELFSection(".data.rel.ro.local", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE, + SectionKind::getReadOnlyWithRelLocal()); MergeableConst4Section = - getELFSection(".rodata.cst4", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE, - SectionKind::getMergeableConst4()); + getContext().getELFSection(".rodata.cst4", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_MERGE, + SectionKind::getMergeableConst4()); MergeableConst8Section = - getELFSection(".rodata.cst8", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE, - SectionKind::getMergeableConst8()); + getContext().getELFSection(".rodata.cst8", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_MERGE, + SectionKind::getMergeableConst8()); MergeableConst16Section = - getELFSection(".rodata.cst16", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE, - SectionKind::getMergeableConst16()); + getContext().getELFSection(".rodata.cst16", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_MERGE, + SectionKind::getMergeableConst16()); StaticCtorSection = - getELFSection(".ctors", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, - SectionKind::getDataRel()); + getContext().getELFSection(".ctors", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE, + SectionKind::getDataRel()); StaticDtorSection = - getELFSection(".dtors", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, - SectionKind::getDataRel()); + getContext().getELFSection(".dtors", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE, + SectionKind::getDataRel()); // Exception Handling Sections. @@ -150,47 +128,48 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, // runtime hit for C++ apps. Either the contents of the LSDA need to be // adjusted or this should be a data section. LSDASection = - getELFSection(".gcc_except_table", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC, SectionKind::getReadOnly()); + getContext().getELFSection(".gcc_except_table", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC, + SectionKind::getReadOnly()); EHFrameSection = - getELFSection(".eh_frame", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, - SectionKind::getDataRel()); + getContext().getELFSection(".eh_frame", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE, + SectionKind::getDataRel()); // Debug Info Sections. DwarfAbbrevSection = - getELFSection(".debug_abbrev", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); + getContext().getELFSection(".debug_abbrev", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); DwarfInfoSection = - getELFSection(".debug_info", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); + getContext().getELFSection(".debug_info", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); DwarfLineSection = - getELFSection(".debug_line", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); + getContext().getELFSection(".debug_line", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); DwarfFrameSection = - getELFSection(".debug_frame", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); + getContext().getELFSection(".debug_frame", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); DwarfPubNamesSection = - getELFSection(".debug_pubnames", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); + getContext().getELFSection(".debug_pubnames", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); DwarfPubTypesSection = - getELFSection(".debug_pubtypes", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); + getContext().getELFSection(".debug_pubtypes", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); DwarfStrSection = - getELFSection(".debug_str", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); + getContext().getELFSection(".debug_str", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); DwarfLocSection = - getELFSection(".debug_loc", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); + getContext().getELFSection(".debug_loc", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); DwarfARangesSection = - getELFSection(".debug_aranges", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); + getContext().getELFSection(".debug_aranges", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); DwarfRangesSection = - getELFSection(".debug_ranges", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); + getContext().getELFSection(".debug_ranges", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); DwarfMacroInfoSection = - getELFSection(".debug_macinfo", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); + getContext().getELFSection(".debug_macinfo", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); } @@ -279,9 +258,9 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, // Infer section flags from the section name if we can. Kind = getELFKindForNamedSection(SectionName, Kind); - return getELFSection(SectionName, - getELFSectionType(SectionName, Kind), - getELFSectionFlags(Kind), Kind, true); + return getContext().getELFSection(SectionName, + getELFSectionType(SectionName, Kind), + getELFSectionFlags(Kind), Kind, true); } static const char *getSectionPrefixForUniqueGlobal(SectionKind Kind) { @@ -300,19 +279,54 @@ static const char *getSectionPrefixForUniqueGlobal(SectionKind Kind) { return ".gnu.linkonce.d.rel.ro."; } +/// getSectionPrefixForGlobal - Return the section prefix name used by options +/// FunctionsSections and DataSections. +static const char *getSectionPrefixForGlobal(SectionKind Kind) { + if (Kind.isText()) return ".text."; + if (Kind.isReadOnly()) return ".rodata."; + + if (Kind.isThreadData()) return ".tdata."; + if (Kind.isThreadBSS()) return ".tbss."; + + if (Kind.isDataNoRel()) return ".data."; + if (Kind.isDataRelLocal()) return ".data.rel.local."; + if (Kind.isDataRel()) return ".data.rel."; + if (Kind.isReadOnlyWithRelLocal()) return ".data.rel.ro.local."; + + assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); + return ".data.rel.ro."; +} + + const MCSection *TargetLoweringObjectFileELF:: SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang, const TargetMachine &TM) const { + // If we have -ffunction-section or -fdata-section then we should emit the + // global value to a uniqued section specifically for it. + bool EmitUniquedSection; + if (Kind.isText()) + EmitUniquedSection = TM.getFunctionSections(); + else + EmitUniquedSection = TM.getDataSections(); // If this global is linkonce/weak and the target handles this by emitting it // into a 'uniqued' section name, create and return the section now. - if (GV->isWeakForLinker() && !Kind.isCommon() && !Kind.isBSS()) { - const char *Prefix = getSectionPrefixForUniqueGlobal(Kind); + if ((GV->isWeakForLinker() || EmitUniquedSection) && + !Kind.isCommon() && !Kind.isBSS()) { + const char *Prefix; + if (GV->isWeakForLinker()) + Prefix = getSectionPrefixForUniqueGlobal(Kind); + else { + assert(EmitUniquedSection); + Prefix = getSectionPrefixForGlobal(Kind); + } + SmallString<128> Name(Prefix, Prefix+strlen(Prefix)); MCSymbol *Sym = Mang->getSymbol(GV); Name.append(Sym->getName().begin(), Sym->getName().end()); - return getELFSection(Name.str(), getELFSectionType(Name.str(), Kind), - getELFSectionFlags(Kind), Kind); + return getContext().getELFSection(Name.str(), + getELFSectionType(Name.str(), Kind), + getELFSectionFlags(Kind), Kind); } if (Kind.isText()) return TextSection; @@ -337,11 +351,11 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, std::string Name = SizeSpec + utostr(Align); - return getELFSection(Name, MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | - MCSectionELF::SHF_MERGE | - MCSectionELF::SHF_STRINGS, - Kind); + return getContext().getELFSection(Name, MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | + MCSectionELF::SHF_MERGE | + MCSectionELF::SHF_STRINGS, + Kind); } if (Kind.isMergeableConst()) { @@ -426,43 +440,6 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, // MachO //===----------------------------------------------------------------------===// -typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy; - -TargetLoweringObjectFileMachO::~TargetLoweringObjectFileMachO() { - // If we have the MachO uniquing map, free it. - delete (MachOUniqueMapTy*)UniquingMap; -} - - -const MCSectionMachO *TargetLoweringObjectFileMachO:: -getMachOSection(StringRef Segment, StringRef Section, - unsigned TypeAndAttributes, - unsigned Reserved2, SectionKind Kind) const { - // We unique sections by their segment/section pair. The returned section - // may not have the same flags as the requested section, if so this should be - // diagnosed by the client as an error. - - // Create the map if it doesn't already exist. - if (UniquingMap == 0) - UniquingMap = new MachOUniqueMapTy(); - MachOUniqueMapTy &Map = *(MachOUniqueMapTy*)UniquingMap; - - // Form the name to look up. - SmallString<64> Name; - Name += Segment; - Name.push_back(','); - Name += Section; - - // Do the lookup, if we have a hit, return it. - const MCSectionMachO *&Entry = Map[Name.str()]; - if (Entry) return Entry; - - // Otherwise, return a new section. - return Entry = MCSectionMachO::Create(Segment, Section, TypeAndAttributes, - Reserved2, Kind, getContext()); -} - - void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, const TargetMachine &TM) { // _foo.eh symbols are currently always exported so that the linker knows @@ -473,29 +450,31 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, IsFunctionEHFrameSymbolPrivate = false; SupportsWeakOmittedEHFrame = false; - if (UniquingMap != 0) - ((MachOUniqueMapTy*)UniquingMap)->clear(); TargetLoweringObjectFile::Initialize(Ctx, TM); TextSection // .text - = getMachOSection("__TEXT", "__text", - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, - SectionKind::getText()); + = getContext().getMachOSection("__TEXT", "__text", + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + SectionKind::getText()); DataSection // .data - = getMachOSection("__DATA", "__data", 0, SectionKind::getDataRel()); + = getContext().getMachOSection("__DATA", "__data", 0, + SectionKind::getDataRel()); CStringSection // .cstring - = getMachOSection("__TEXT", "__cstring", MCSectionMachO::S_CSTRING_LITERALS, - SectionKind::getMergeable1ByteCString()); + = getContext().getMachOSection("__TEXT", "__cstring", + MCSectionMachO::S_CSTRING_LITERALS, + SectionKind::getMergeable1ByteCString()); UStringSection - = getMachOSection("__TEXT","__ustring", 0, - SectionKind::getMergeable2ByteCString()); + = getContext().getMachOSection("__TEXT","__ustring", 0, + SectionKind::getMergeable2ByteCString()); FourByteConstantSection // .literal4 - = getMachOSection("__TEXT", "__literal4", MCSectionMachO::S_4BYTE_LITERALS, - SectionKind::getMergeableConst4()); + = getContext().getMachOSection("__TEXT", "__literal4", + MCSectionMachO::S_4BYTE_LITERALS, + SectionKind::getMergeableConst4()); EightByteConstantSection // .literal8 - = getMachOSection("__TEXT", "__literal8", MCSectionMachO::S_8BYTE_LITERALS, - SectionKind::getMergeableConst8()); + = getContext().getMachOSection("__TEXT", "__literal8", + MCSectionMachO::S_8BYTE_LITERALS, + SectionKind::getMergeableConst8()); // ld_classic doesn't support .literal16 in 32-bit mode, and ld64 falls back // to using it in -static mode. @@ -503,110 +482,130 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, if (TM.getRelocationModel() != Reloc::Static && TM.getTargetData()->getPointerSize() == 32) SixteenByteConstantSection = // .literal16 - getMachOSection("__TEXT", "__literal16",MCSectionMachO::S_16BYTE_LITERALS, - SectionKind::getMergeableConst16()); + getContext().getMachOSection("__TEXT", "__literal16", + MCSectionMachO::S_16BYTE_LITERALS, + SectionKind::getMergeableConst16()); ReadOnlySection // .const - = getMachOSection("__TEXT", "__const", 0, SectionKind::getReadOnly()); + = getContext().getMachOSection("__TEXT", "__const", 0, + SectionKind::getReadOnly()); TextCoalSection - = getMachOSection("__TEXT", "__textcoal_nt", - MCSectionMachO::S_COALESCED | - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, - SectionKind::getText()); + = getContext().getMachOSection("__TEXT", "__textcoal_nt", + MCSectionMachO::S_COALESCED | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + SectionKind::getText()); ConstTextCoalSection - = getMachOSection("__TEXT", "__const_coal", MCSectionMachO::S_COALESCED, - SectionKind::getText()); + = getContext().getMachOSection("__TEXT", "__const_coal", + MCSectionMachO::S_COALESCED, + SectionKind::getText()); ConstDataCoalSection - = getMachOSection("__DATA","__const_coal", MCSectionMachO::S_COALESCED, - SectionKind::getText()); + = getContext().getMachOSection("__DATA","__const_coal", + MCSectionMachO::S_COALESCED, + SectionKind::getText()); ConstDataSection // .const_data - = getMachOSection("__DATA", "__const", 0, - SectionKind::getReadOnlyWithRel()); + = getContext().getMachOSection("__DATA", "__const", 0, + SectionKind::getReadOnlyWithRel()); DataCoalSection - = getMachOSection("__DATA","__datacoal_nt", MCSectionMachO::S_COALESCED, - SectionKind::getDataRel()); + = getContext().getMachOSection("__DATA","__datacoal_nt", + MCSectionMachO::S_COALESCED, + SectionKind::getDataRel()); DataCommonSection - = getMachOSection("__DATA","__common", MCSectionMachO::S_ZEROFILL, - SectionKind::getBSS()); + = getContext().getMachOSection("__DATA","__common", + MCSectionMachO::S_ZEROFILL, + SectionKind::getBSS()); DataBSSSection - = getMachOSection("__DATA","__bss", MCSectionMachO::S_ZEROFILL, - SectionKind::getBSS()); + = getContext().getMachOSection("__DATA","__bss", MCSectionMachO::S_ZEROFILL, + SectionKind::getBSS()); LazySymbolPointerSection - = getMachOSection("__DATA", "__la_symbol_ptr", - MCSectionMachO::S_LAZY_SYMBOL_POINTERS, - SectionKind::getMetadata()); + = getContext().getMachOSection("__DATA", "__la_symbol_ptr", + MCSectionMachO::S_LAZY_SYMBOL_POINTERS, + SectionKind::getMetadata()); NonLazySymbolPointerSection - = getMachOSection("__DATA", "__nl_symbol_ptr", - MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS, - SectionKind::getMetadata()); + = getContext().getMachOSection("__DATA", "__nl_symbol_ptr", + MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS, + SectionKind::getMetadata()); if (TM.getRelocationModel() == Reloc::Static) { StaticCtorSection - = getMachOSection("__TEXT", "__constructor", 0,SectionKind::getDataRel()); + = getContext().getMachOSection("__TEXT", "__constructor", 0, + SectionKind::getDataRel()); StaticDtorSection - = getMachOSection("__TEXT", "__destructor", 0, SectionKind::getDataRel()); + = getContext().getMachOSection("__TEXT", "__destructor", 0, + SectionKind::getDataRel()); } else { StaticCtorSection - = getMachOSection("__DATA", "__mod_init_func", - MCSectionMachO::S_MOD_INIT_FUNC_POINTERS, - SectionKind::getDataRel()); + = getContext().getMachOSection("__DATA", "__mod_init_func", + MCSectionMachO::S_MOD_INIT_FUNC_POINTERS, + SectionKind::getDataRel()); StaticDtorSection - = getMachOSection("__DATA", "__mod_term_func", - MCSectionMachO::S_MOD_TERM_FUNC_POINTERS, - SectionKind::getDataRel()); + = getContext().getMachOSection("__DATA", "__mod_term_func", + MCSectionMachO::S_MOD_TERM_FUNC_POINTERS, + SectionKind::getDataRel()); } // Exception Handling. - LSDASection = getMachOSection("__TEXT", "__gcc_except_tab", 0, - SectionKind::getReadOnlyWithRel()); + LSDASection = getContext().getMachOSection("__TEXT", "__gcc_except_tab", 0, + SectionKind::getReadOnlyWithRel()); EHFrameSection = - getMachOSection("__TEXT", "__eh_frame", - MCSectionMachO::S_COALESCED | - MCSectionMachO::S_ATTR_NO_TOC | - MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS | - MCSectionMachO::S_ATTR_LIVE_SUPPORT, - SectionKind::getReadOnly()); + getContext().getMachOSection("__TEXT", "__eh_frame", + MCSectionMachO::S_COALESCED | + MCSectionMachO::S_ATTR_NO_TOC | + MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS | + MCSectionMachO::S_ATTR_LIVE_SUPPORT, + SectionKind::getReadOnly()); // Debug Information. DwarfAbbrevSection = - getMachOSection("__DWARF", "__debug_abbrev", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + getContext().getMachOSection("__DWARF", "__debug_abbrev", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); DwarfInfoSection = - getMachOSection("__DWARF", "__debug_info", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + getContext().getMachOSection("__DWARF", "__debug_info", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); DwarfLineSection = - getMachOSection("__DWARF", "__debug_line", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + getContext().getMachOSection("__DWARF", "__debug_line", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); DwarfFrameSection = - getMachOSection("__DWARF", "__debug_frame", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + getContext().getMachOSection("__DWARF", "__debug_frame", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); DwarfPubNamesSection = - getMachOSection("__DWARF", "__debug_pubnames", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + getContext().getMachOSection("__DWARF", "__debug_pubnames", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); DwarfPubTypesSection = - getMachOSection("__DWARF", "__debug_pubtypes", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + getContext().getMachOSection("__DWARF", "__debug_pubtypes", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); DwarfStrSection = - getMachOSection("__DWARF", "__debug_str", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + getContext().getMachOSection("__DWARF", "__debug_str", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); DwarfLocSection = - getMachOSection("__DWARF", "__debug_loc", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + getContext().getMachOSection("__DWARF", "__debug_loc", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); DwarfARangesSection = - getMachOSection("__DWARF", "__debug_aranges", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + getContext().getMachOSection("__DWARF", "__debug_aranges", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); DwarfRangesSection = - getMachOSection("__DWARF", "__debug_ranges", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + getContext().getMachOSection("__DWARF", "__debug_ranges", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); DwarfMacroInfoSection = - getMachOSection("__DWARF", "__debug_macinfo", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + getContext().getMachOSection("__DWARF", "__debug_macinfo", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); DwarfDebugInlineSection = - getMachOSection("__DWARF", "__debug_inlined", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); + getContext().getMachOSection("__DWARF", "__debug_inlined", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); } const MCSection *TargetLoweringObjectFileMachO:: @@ -619,8 +618,8 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section, TAA, StubSize); if (!ErrorCode.empty()) { - // If invalid, report the error with llvm_report_error. - llvm_report_error("Global variable '" + GV->getNameStr() + + // If invalid, report the error with report_fatal_error. + report_fatal_error("Global variable '" + GV->getNameStr() + "' has an invalid section specifier '" + GV->getSection()+ "': " + ErrorCode + "."); // Fall back to dropping it into the data section. @@ -629,14 +628,14 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, // Get the section. const MCSectionMachO *S = - getMachOSection(Segment, Section, TAA, StubSize, Kind); + getContext().getMachOSection(Segment, Section, TAA, StubSize, Kind); // Okay, now that we got the section, verify that the TAA & StubSize agree. // If the user declared multiple globals with different section flags, we need // to reject it here. if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) { - // If invalid, report the error with llvm_report_error. - llvm_report_error("Global variable '" + GV->getNameStr() + + // If invalid, report the error with report_fatal_error. + report_fatal_error("Global variable '" + GV->getNameStr() + "' section type or attributes does not match previous" " section specifier"); } @@ -806,7 +805,7 @@ const MCSection *TargetLoweringObjectFileCOFF:: getCOFFSection(StringRef Name, bool isDirective, SectionKind Kind) const { // Create the map if it doesn't already exist. if (UniquingMap == 0) - UniquingMap = new MachOUniqueMapTy(); + UniquingMap = new COFFUniqueMapTy(); COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)UniquingMap; // Do the lookup, if we have a hit, return it. diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp index 0b7fde7..7f0412c 100644 --- a/lib/CodeGen/VirtRegRewriter.cpp +++ b/lib/CodeGen/VirtRegRewriter.cpp @@ -9,7 +9,9 @@ #define DEBUG_TYPE "virtregrewriter" #include "VirtRegRewriter.h" +#include "VirtRegMap.h" #include "llvm/Function.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -893,7 +895,7 @@ unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC, bool DoReMat = NewOp.StackSlotOrReMat > VirtRegMap::MAX_STACK_SLOT; int SSorRMId = DoReMat - ? VRM.getReMatId(NewOp.VirtReg) : NewOp.StackSlotOrReMat; + ? VRM.getReMatId(NewOp.VirtReg) : (int) NewOp.StackSlotOrReMat; // Back-schedule reloads and remats. MachineBasicBlock::iterator InsertLoc = @@ -1064,6 +1066,7 @@ class LocalRewriter : public VirtRegRewriter { VirtRegMap *VRM; BitVector AllocatableRegs; DenseMap<MachineInstr*, unsigned> DistanceMap; + DenseMap<int, SmallVector<MachineInstr*,4> > Slot2DbgValues; MachineBasicBlock *MBB; // Basic block currently being processed. @@ -1188,12 +1191,24 @@ bool LocalRewriter::runOnMachineFunction(MachineFunction &MF, VirtRegMap &vrm, // Mark unused spill slots. MachineFrameInfo *MFI = MF.getFrameInfo(); int SS = VRM->getLowSpillSlot(); - if (SS != VirtRegMap::NO_STACK_SLOT) - for (int e = VRM->getHighSpillSlot(); SS <= e; ++SS) + if (SS != VirtRegMap::NO_STACK_SLOT) { + for (int e = VRM->getHighSpillSlot(); SS <= e; ++SS) { + SmallVector<MachineInstr*, 4> &DbgValues = Slot2DbgValues[SS]; if (!VRM->isSpillSlotUsed(SS)) { MFI->RemoveStackObject(SS); + for (unsigned j = 0, ee = DbgValues.size(); j != ee; ++j) { + MachineInstr *DVMI = DbgValues[j]; + MachineBasicBlock *DVMBB = DVMI->getParent(); + DEBUG(dbgs() << "Removing debug info referencing FI#" << SS << '\n'); + VRM->RemoveMachineInstrFromMaps(DVMI); + DVMBB->erase(DVMI); + } ++NumDSS; } + DbgValues.clear(); + } + } + Slot2DbgValues.clear(); return true; } @@ -1903,6 +1918,10 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, bool BackTracked = false; MachineInstr &MI = *MII; + // Remember DbgValue's which reference stack slots. + if (MI.isDebugValue() && MI.getOperand(0).isFI()) + Slot2DbgValues[MI.getOperand(0).getIndex()].push_back(&MI); + /// ReusedOperands - Keep track of operand reuse in case we need to undo /// reuse. ReuseInfo ReusedOperands(MI, TRI); diff --git a/lib/CodeGen/VirtRegRewriter.h b/lib/CodeGen/VirtRegRewriter.h index 44f9df6..93474e0 100644 --- a/lib/CodeGen/VirtRegRewriter.h +++ b/lib/CodeGen/VirtRegRewriter.h @@ -10,11 +10,10 @@ #ifndef LLVM_CODEGEN_VIRTREGREWRITER_H #define LLVM_CODEGEN_VIRTREGREWRITER_H -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "VirtRegMap.h" - namespace llvm { + class LiveIntervals; + class MachineFunction; + class VirtRegMap; /// VirtRegRewriter interface: Implementations of this interface assign /// spilled virtual registers to stack slots, rewriting the code. diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index da21c2d..b17827e 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -379,27 +379,27 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn, switch (NumArgs) { case 3: if (FTy->getParamType(2) != PPInt8Ty) { - llvm_report_error("Invalid type for third argument of main() supplied"); + report_fatal_error("Invalid type for third argument of main() supplied"); } // FALLS THROUGH case 2: if (FTy->getParamType(1) != PPInt8Ty) { - llvm_report_error("Invalid type for second argument of main() supplied"); + report_fatal_error("Invalid type for second argument of main() supplied"); } // FALLS THROUGH case 1: if (!FTy->getParamType(0)->isIntegerTy(32)) { - llvm_report_error("Invalid type for first argument of main() supplied"); + report_fatal_error("Invalid type for first argument of main() supplied"); } // FALLS THROUGH case 0: if (!FTy->getReturnType()->isIntegerTy() && !FTy->getReturnType()->isVoidTy()) { - llvm_report_error("Invalid return type of main() supplied"); + report_fatal_error("Invalid return type of main() supplied"); } break; default: - llvm_report_error("Invalid number of arguments of main() supplied"); + report_fatal_error("Invalid number of arguments of main() supplied"); } ArgvArray CArgv; @@ -771,7 +771,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { std::string msg; raw_string_ostream Msg(msg); Msg << "ConstantExpr not handled: " << *CE; - llvm_report_error(Msg.str()); + report_fatal_error(Msg.str()); } GenericValue Result; @@ -807,7 +807,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { std::string msg; raw_string_ostream Msg(msg); Msg << "ERROR: Constant unimplemented for type: " << *C->getType(); - llvm_report_error(Msg.str()); + report_fatal_error(Msg.str()); } return Result; } @@ -935,7 +935,7 @@ void ExecutionEngine::LoadValueFromMemory(GenericValue &Result, std::string msg; raw_string_ostream Msg(msg); Msg << "Cannot load value of type " << *Ty << "!"; - llvm_report_error(Msg.str()); + report_fatal_error(Msg.str()); } } @@ -1051,7 +1051,7 @@ void ExecutionEngine::emitGlobals() { sys::DynamicLibrary::SearchForAddressOfSymbol(I->getName())) addGlobalMapping(I, SymAddr); else { - llvm_report_error("Could not resolve external global address: " + report_fatal_error("Could not resolve external global address: " +I->getName()); } } diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp index a2aad5a..0748b54 100644 --- a/lib/ExecutionEngine/Interpreter/Execution.cpp +++ b/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -631,7 +631,7 @@ void Interpreter::visitUnwindInst(UnwindInst &I) { do { ECStack.pop_back(); if (ECStack.empty()) - llvm_report_error("Empty stack during unwind!"); + report_fatal_error("Empty stack during unwind!"); Inst = ECStack.back().Caller.getInstruction(); } while (!(Inst && isa<InvokeInst>(Inst))); @@ -644,7 +644,7 @@ void Interpreter::visitUnwindInst(UnwindInst &I) { } void Interpreter::visitUnreachableInst(UnreachableInst &I) { - llvm_report_error("Program executed an 'unreachable' instruction!"); + report_fatal_error("Program executed an 'unreachable' instruction!"); } void Interpreter::visitBranchInst(BranchInst &I) { diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp index 3ba783b..26a53b5 100644 --- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp +++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp @@ -126,7 +126,7 @@ static ffi_type *ffiTypeFor(const Type *Ty) { default: break; } // TODO: Support other types such as StructTyID, ArrayTyID, OpaqueTyID, etc. - llvm_report_error("Type could not be mapped for use with libffi."); + report_fatal_error("Type could not be mapped for use with libffi."); return NULL; } @@ -174,7 +174,7 @@ static void *ffiValueFor(const Type *Ty, const GenericValue &AV, default: break; } // TODO: Support other types such as StructTyID, ArrayTyID, OpaqueTyID, etc. - llvm_report_error("Type value could not be mapped for use with libffi."); + report_fatal_error("Type value could not be mapped for use with libffi."); return NULL; } @@ -188,7 +188,7 @@ static bool ffiInvoke(RawFunc Fn, Function *F, // TODO: We don't have type information about the remaining arguments, because // this information is never passed into ExecutionEngine::runFunction(). if (ArgVals.size() > NumArgs && F->isVarArg()) { - llvm_report_error("Calling external var arg function '" + F->getName() + report_fatal_error("Calling external var arg function '" + F->getName() + "' is not supported by the Interpreter."); } @@ -284,7 +284,7 @@ GenericValue Interpreter::callExternalFunction(Function *F, errs() << "Tried to execute an unknown external function: " << F->getType()->getDescription() << " __main\n"; else - llvm_report_error("Tried to execute an unknown external function: " + + report_fatal_error("Tried to execute an unknown external function: " + F->getType()->getDescription() + " " +F->getName()); #ifndef USE_LIBFFI errs() << "Recompiling LLVM with --enable-libffi might help.\n"; @@ -325,7 +325,7 @@ GenericValue lle_X_exit(const FunctionType *FT, GenericValue lle_X_abort(const FunctionType *FT, const std::vector<GenericValue> &Args) { //FIXME: should we report or raise here? - //llvm_report_error("Interpreted program raised SIGABRT"); + //report_fatal_error("Interpreted program raised SIGABRT"); raise (SIGABRT); return GenericValue(); } diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h index bc4200b..564e9ab 100644 --- a/lib/ExecutionEngine/Interpreter/Interpreter.h +++ b/lib/ExecutionEngine/Interpreter/Interpreter.h @@ -190,12 +190,10 @@ public: return &(ECStack.back ().VarArgs[0]); } - //FIXME: private: -public: +private: // Helper functions GenericValue executeGEPOperation(Value *Ptr, gep_type_iterator I, gep_type_iterator E, ExecutionContext &SF); -private: // Helper functions // SwitchToNewBasicBlock - Start execution in a new basic block and run any // PHI nodes in the top of the block. This is used for intraprocedural // control flow. diff --git a/lib/ExecutionEngine/JIT/Intercept.cpp b/lib/ExecutionEngine/JIT/Intercept.cpp index c00b60a..b367033 100644 --- a/lib/ExecutionEngine/JIT/Intercept.cpp +++ b/lib/ExecutionEngine/JIT/Intercept.cpp @@ -142,7 +142,7 @@ void *JIT::getPointerToNamedFunction(const std::string &Name, return RP; if (AbortOnFailure) { - llvm_report_error("Program used external function '"+Name+ + report_fatal_error("Program used external function '"+Name+ "' which could not be resolved!"); } return 0; diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp index dd74d73..546d2b2 100644 --- a/lib/ExecutionEngine/JIT/JIT.cpp +++ b/lib/ExecutionEngine/JIT/JIT.cpp @@ -304,7 +304,7 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, // Turn the machine code intermediate representation into bytes in memory that // may be executed. if (TM.addPassesToEmitMachineCode(PM, *JCE, OptLevel)) { - llvm_report_error("Target does not support machine code emission!"); + report_fatal_error("Target does not support machine code emission!"); } // Register routine for informing unwinding runtime about new EH frames @@ -352,7 +352,7 @@ void JIT::addModule(Module *M) { // Turn the machine code intermediate representation into bytes in memory // that may be executed. if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) { - llvm_report_error("Target does not support machine code emission!"); + report_fatal_error("Target does not support machine code emission!"); } // Initialize passes. @@ -383,7 +383,7 @@ bool JIT::removeModule(Module *M) { // Turn the machine code intermediate representation into bytes in memory // that may be executed. if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) { - llvm_report_error("Target does not support machine code emission!"); + report_fatal_error("Target does not support machine code emission!"); } // Initialize passes. @@ -665,7 +665,7 @@ void *JIT::getPointerToFunction(Function *F) { // exists in this Module. std::string ErrorMsg; if (F->Materialize(&ErrorMsg)) { - llvm_report_error("Error reading function '" + F->getName()+ + report_fatal_error("Error reading function '" + F->getName()+ "' from bitcode file: " + ErrorMsg); } @@ -704,7 +704,7 @@ void *JIT::getOrEmitGlobalVariable(const GlobalVariable *GV) { #endif Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(GV->getName()); if (Ptr == 0) { - llvm_report_error("Could not resolve external global address: " + report_fatal_error("Could not resolve external global address: " +GV->getName()); } addGlobalMapping(GV, Ptr); @@ -754,7 +754,7 @@ char* JIT::getMemoryForGV(const GlobalVariable* GV) { // situation. It's returned in the same block of memory as code which may // not be writable. if (isGVCompilationDisabled() && !GV->isConstant()) { - llvm_report_error("Compilation of non-internal GlobalValue is disabled!"); + report_fatal_error("Compilation of non-internal GlobalValue is disabled!"); } // Some applications require globals and code to live together, so they may diff --git a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp index 68471bd..749a57d 100644 --- a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp +++ b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp @@ -80,7 +80,7 @@ std::string JITDebugRegisterer::MakeELF(const Function *F, DebugInfo &I) { // Copy the binary into the .text section. This isn't necessary, but it's // useful to be able to disassemble the ELF by hand. - ELFSection &Text = EW.getTextSection((Function *)F); + ELFSection &Text = EW.getTextSection(const_cast<Function *>(F)); Text.Addr = (uint64_t)I.FnStart; // TODO: We could eliminate this copy if we somehow used a pointer/size pair // instead of a vector. diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp index 2f42e6b..4b3ca87 100644 --- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp @@ -52,7 +52,7 @@ unsigned char* JITDwarfEmitter::EmitDwarfTable(MachineFunction& F, unsigned char* Result = 0; - const std::vector<Function *> Personalities = MMI->getPersonalities(); + const std::vector<const Function *> Personalities = MMI->getPersonalities(); EHFramePtr = EmitCommonEHFrame(Personalities[MMI->getPersonalityIndex()]); Result = EmitEHFrame(Personalities[MMI->getPersonalityIndex()], EHFramePtr, @@ -75,7 +75,7 @@ JITDwarfEmitter::EmitFrameMoves(intptr_t BaseLabelPtr, MCSymbol *Label = Move.getLabel(); // Throw out move if the label is invalid. - if (Label && !Label->isDefined()) + if (Label && (*JCE->getLabelLocations())[Label] == 0) continue; intptr_t LabelPtr = 0; @@ -199,9 +199,9 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF, assert(MMI && "MachineModuleInfo not registered!"); // Map all labels and get rid of any dead landing pads. - MMI->TidyLandingPads(); + MMI->TidyLandingPads(JCE->getLabelLocations()); - const std::vector<GlobalVariable *> &TypeInfos = MMI->getTypeInfos(); + const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos(); const std::vector<unsigned> &FilterIds = MMI->getFilterIds(); const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads(); if (PadInfos.empty()) return 0; @@ -450,7 +450,7 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF, // Emit the type ids. for (unsigned M = TypeInfos.size(); M; --M) { - GlobalVariable *GV = TypeInfos[M - 1]; + const GlobalVariable *GV = TypeInfos[M - 1]; if (GV) { if (TD->getPointerSize() == sizeof(int32_t)) @@ -609,7 +609,7 @@ unsigned JITDwarfEmitter::GetDwarfTableSizeInBytes(MachineFunction& F, FinalSize += GetExceptionTableSizeInBytes(&F); - const std::vector<Function *> Personalities = MMI->getPersonalities(); + const std::vector<const Function *> Personalities = MMI->getPersonalities(); FinalSize += GetCommonEHFrameSizeInBytes(Personalities[MMI->getPersonalityIndex()]); @@ -711,7 +711,7 @@ JITDwarfEmitter::GetFrameMovesSizeInBytes(intptr_t BaseLabelPtr, MCSymbol *Label = Move.getLabel(); // Throw out move if the label is invalid. - if (Label && !Label->isDefined()) + if (Label && (*JCE->getLabelLocations())[Label] == 0) continue; intptr_t LabelPtr = 0; @@ -780,9 +780,9 @@ JITDwarfEmitter::GetExceptionTableSizeInBytes(MachineFunction* MF) const { unsigned FinalSize = 0; // Map all labels and get rid of any dead landing pads. - MMI->TidyLandingPads(); + MMI->TidyLandingPads(JCE->getLabelLocations()); - const std::vector<GlobalVariable *> &TypeInfos = MMI->getTypeInfos(); + const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos(); const std::vector<unsigned> &FilterIds = MMI->getFilterIds(); const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads(); if (PadInfos.empty()) return 0; diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp index 7b8ab9e..e3855b2 100644 --- a/lib/ExecutionEngine/JIT/JITEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp @@ -23,6 +23,7 @@ #include "llvm/Analysis/DebugInfo.h" #include "llvm/CodeGen/JITCodeEmitter.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineCodeInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -30,8 +31,8 @@ #include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/ExecutionEngine/JITEventListener.h" #include "llvm/ExecutionEngine/JITMemoryManager.h" -#include "llvm/CodeGen/MachineCodeInfo.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetJITInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -43,7 +44,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/System/Disassembler.h" #include "llvm/System/Memory.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -369,7 +369,7 @@ namespace { ValueMap<const Function *, EmittedCode, EmittedFunctionConfig> EmittedFunctions; - DILocation PrevDLT; + DebugLoc PrevDL; /// Instance of the JIT JIT *TheJIT; @@ -377,14 +377,14 @@ namespace { public: JITEmitter(JIT &jit, JITMemoryManager *JMM, TargetMachine &TM) : SizeEstimate(0), Resolver(jit, *this), MMI(0), CurFn(0), - EmittedFunctions(this), PrevDLT(NULL), TheJIT(&jit) { + EmittedFunctions(this), TheJIT(&jit) { MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager(); if (jit.getJITInfo().needsGOT()) { MemMgr->AllocateGOT(); DEBUG(dbgs() << "JIT is managing a GOT\n"); } - if (DwarfExceptionHandling || JITEmitDebugInfo) { + if (JITExceptionHandling || JITEmitDebugInfo) { DE.reset(new JITDwarfEmitter(jit)); } if (JITEmitDebugInfo) { @@ -463,6 +463,10 @@ namespace { LabelLocations[Label] = getCurrentPCValue(); } + virtual DenseMap<MCSymbol*, uintptr_t> *getLabelLocations() { + return &LabelLocations; + } + virtual uintptr_t getLabelAddress(MCSymbol *Label) const { assert(LabelLocations.count(Label) && "Label not emitted!"); return LabelLocations.find(Label)->second; @@ -737,7 +741,7 @@ void *JITResolver::JITCompilerFn(void *Stub) { // If lazy compilation is disabled, emit a useful error message and abort. if (!JR->TheJIT->isCompilingLazily()) { - llvm_report_error("LLVM JIT requested to do lazy compilation of function '" + report_fatal_error("LLVM JIT requested to do lazy compilation of function '" + F->getName() + "' when lazy compiles are disabled!"); } @@ -823,19 +827,17 @@ void *JITEmitter::getPointerToGVIndirectSym(GlobalValue *V, void *Reference) { void JITEmitter::processDebugLoc(DebugLoc DL, bool BeforePrintingInsn) { if (DL.isUnknown()) return; if (!BeforePrintingInsn) return; - - // FIXME: This is horribly inefficient. - DILocation CurDLT(DL.getAsMDNode( - EmissionDetails.MF->getFunction()->getContext())); - if (CurDLT.getScope().getNode() != 0 && PrevDLT.getNode() !=CurDLT.getNode()){ + const LLVMContext& Context = EmissionDetails.MF->getFunction()->getContext(); + + if (DL.getScope(Context) != 0 && PrevDL != DL) { JITEvent_EmittedFunctionDetails::LineStart NextLine; NextLine.Address = getCurrentPCValue(); NextLine.Loc = DL; EmissionDetails.LineStarts.push_back(NextLine); } - PrevDLT = CurDLT; + PrevDL = DL; } static unsigned GetConstantPoolSizeInBytes(MachineConstantPool *MCP, @@ -945,7 +947,7 @@ unsigned JITEmitter::addSizeOfGlobalsInConstantVal( std::string msg; raw_string_ostream Msg(msg); Msg << "ConstantExpr not handled: " << *CE; - llvm_report_error(Msg.str()); + report_fatal_error(Msg.str()); } } } @@ -997,12 +999,13 @@ unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) { for (unsigned CurOp = 0; CurOp < NumOps; CurOp++) { const MachineOperand &MO = MI.getOperand(CurOp); if (MO.isGlobal()) { - GlobalValue* V = MO.getGlobal(); + const GlobalValue* V = MO.getGlobal(); const GlobalVariable *GV = dyn_cast<const GlobalVariable>(V); if (!GV) continue; // If seen in previous function, it will have an entry here. - if (TheJIT->getPointerToGlobalIfAvailable(GV)) + if (TheJIT->getPointerToGlobalIfAvailable( + const_cast<GlobalVariable *>(GV))) continue; // If seen earlier in this function, it will have an entry here. // FIXME: it should be possible to combine these tables, by @@ -1212,6 +1215,9 @@ bool JITEmitter::finishFunction(MachineFunction &F) { TheJIT->NotifyFunctionEmitted(*F.getFunction(), FnStart, FnEnd-FnStart, EmissionDetails); + // Reset the previous debug location. + PrevDL = DebugLoc(); + DEBUG(dbgs() << "JIT: Finished CodeGen of [" << (void*)FnStart << "] Function: " << F.getFunction()->getName() << ": " << (FnEnd-FnStart) << " bytes of text, " @@ -1223,45 +1229,44 @@ bool JITEmitter::finishFunction(MachineFunction &F) { // Mark code region readable and executable if it's not so already. MemMgr->setMemoryExecutable(); - DEBUG( - if (sys::hasDisassembler()) { - dbgs() << "JIT: Disassembled code:\n"; - dbgs() << sys::disassembleBuffer(FnStart, FnEnd-FnStart, - (uintptr_t)FnStart); - } else { - dbgs() << "JIT: Binary code:\n"; - uint8_t* q = FnStart; - for (int i = 0; q < FnEnd; q += 4, ++i) { - if (i == 4) - i = 0; - if (i == 0) - dbgs() << "JIT: " << (long)(q - FnStart) << ": "; - bool Done = false; - for (int j = 3; j >= 0; --j) { - if (q + j >= FnEnd) - Done = true; - else - dbgs() << (unsigned short)q[j]; + DEBUG({ + if (sys::hasDisassembler()) { + dbgs() << "JIT: Disassembled code:\n"; + dbgs() << sys::disassembleBuffer(FnStart, FnEnd-FnStart, + (uintptr_t)FnStart); + } else { + dbgs() << "JIT: Binary code:\n"; + uint8_t* q = FnStart; + for (int i = 0; q < FnEnd; q += 4, ++i) { + if (i == 4) + i = 0; + if (i == 0) + dbgs() << "JIT: " << (long)(q - FnStart) << ": "; + bool Done = false; + for (int j = 3; j >= 0; --j) { + if (q + j >= FnEnd) + Done = true; + else + dbgs() << (unsigned short)q[j]; + } + if (Done) + break; + dbgs() << ' '; + if (i == 3) + dbgs() << '\n'; } - if (Done) - break; - dbgs() << ' '; - if (i == 3) - dbgs() << '\n'; + dbgs()<< '\n'; } - dbgs()<< '\n'; - } - ); + }); - if (DwarfExceptionHandling || JITEmitDebugInfo) { + if (JITExceptionHandling || JITEmitDebugInfo) { uintptr_t ActualSize = 0; SavedBufferBegin = BufferBegin; SavedBufferEnd = BufferEnd; SavedCurBufferPtr = CurBufferPtr; - if (MemMgr->NeedsExactSize()) { + if (MemMgr->NeedsExactSize()) ActualSize = DE->GetDwarfTableSizeInBytes(F, *this, FnStart, FnEnd); - } BufferBegin = CurBufferPtr = MemMgr->startExceptionTable(F.getFunction(), ActualSize); @@ -1277,7 +1282,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) { BufferEnd = SavedBufferEnd; CurBufferPtr = SavedCurBufferPtr; - if (DwarfExceptionHandling) { + if (JITExceptionHandling) { TheJIT->RegisterTable(FrameRegister); } @@ -1375,7 +1380,7 @@ void JITEmitter::emitConstantPool(MachineConstantPool *MCP) { ConstPoolAddresses.push_back(CAddr); if (CPE.isMachineConstantPoolEntry()) { // FIXME: add support to lower machine constant pool values into bytes! - llvm_report_error("Initialize memory with machine specific constant pool" + report_fatal_error("Initialize memory with machine specific constant pool" "entry has not been implemented!"); } TheJIT->InitializeMemory(CPE.Val.ConstVal, (void*)CAddr); diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp index a17caa1..653e6f1 100644 --- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp +++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp @@ -15,6 +15,7 @@ #include "llvm/ExecutionEngine/JITMemoryManager.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/Twine.h" #include "llvm/GlobalValue.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Compiler.h" @@ -22,12 +23,9 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/System/Memory.h" -#include <map> #include <vector> #include <cassert> #include <climits> -#include <cstdio> -#include <cstdlib> #include <cstring> using namespace llvm; @@ -614,8 +612,8 @@ sys::MemoryBlock DefaultJITMemoryManager::allocateNewSlab(size_t size) { sys::MemoryBlock *LastSlabPtr = LastSlab.base() ? &LastSlab : 0; sys::MemoryBlock B = sys::Memory::AllocateRWX(size, LastSlabPtr, &ErrMsg); if (B.base() == 0) { - llvm_report_error("Allocation failed when allocating new memory in the" - " JIT\n" + ErrMsg); + report_fatal_error("Allocation failed when allocating new memory in the" + " JIT\n" + Twine(ErrMsg)); } LastSlab = B; ++NumSlabs; diff --git a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp index 9c01b73..1ca084b 100644 --- a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp +++ b/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp @@ -114,26 +114,43 @@ void OProfileJITEventListener::NotifyFunctionEmitted( return; } - // Now we convert the line number information from the address/DebugLoc format - // in Details to the address/filename/lineno format that OProfile expects. - // OProfile 0.9.4 (and maybe later versions) has a bug that causes it to - // ignore line numbers for addresses above 4G. - FilenameCache Filenames; - std::vector<debug_line_info> LineInfo; - LineInfo.reserve(1 + Details.LineStarts.size()); - if (!Details.MF->getDefaultDebugLoc().isUnknown()) { - LineInfo.push_back(LineStartToOProfileFormat( - *Details.MF, Filenames, - reinterpret_cast<uintptr_t>(FnStart), - Details.MF->getDefaultDebugLoc())); - } - for (std::vector<EmittedFunctionDetails::LineStart>::const_iterator + if (!Details.LineStarts.empty()) { + // Now we convert the line number information from the address/DebugLoc + // format in Details to the address/filename/lineno format that OProfile + // expects. Note that OProfile 0.9.4 has a bug that causes it to ignore + // line numbers for addresses above 4G. + FilenameCache Filenames; + std::vector<debug_line_info> LineInfo; + LineInfo.reserve(1 + Details.LineStarts.size()); + + DebugLoc FirstLoc = Details.LineStarts[0].Loc; + assert(!FirstLoc.isUnknown() + && "LineStarts should not contain unknown DebugLocs"); + MDNode *FirstLocScope = FirstLoc.getScope(F.getContext()); + DISubprogram FunctionDI = getDISubprogram(FirstLocScope); + if (FunctionDI.Verify()) { + // If we have debug info for the function itself, use that as the line + // number of the first several instructions. Otherwise, after filling + // LineInfo, we'll adjust the address of the first line number to point at + // the start of the function. + debug_line_info line_info; + line_info.vma = reinterpret_cast<uintptr_t>(FnStart); + line_info.lineno = FunctionDI.getLineNumber(); + line_info.filename = Filenames.getFilename(FirstLocScope); + LineInfo.push_back(line_info); + } + + for (std::vector<EmittedFunctionDetails::LineStart>::const_iterator I = Details.LineStarts.begin(), E = Details.LineStarts.end(); - I != E; ++I) { - LineInfo.push_back(LineStartToOProfileFormat( - *Details.MF, Filenames, I->Address, I->Loc)); - } - if (!LineInfo.empty()) { + I != E; ++I) { + LineInfo.push_back(LineStartToOProfileFormat( + *Details.MF, Filenames, I->Address, I->Loc)); + } + + // In case the function didn't have line info of its own, adjust the first + // line info's address to include the start of the function. + LineInfo[0].vma = reinterpret_cast<uintptr_t>(FnStart); + if (op_write_debug_line_info(Agent, FnStart, LineInfo.size(), &*LineInfo.begin()) == -1) { DEBUG(dbgs() diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp index f0da694..2b23994 100644 --- a/lib/MC/MCAsmInfo.cpp +++ b/lib/MC/MCAsmInfo.cpp @@ -35,6 +35,7 @@ MCAsmInfo::MCAsmInfo() { AssemblerDialect = 0; AllowQuotesInName = false; AllowNameToStartWithDigit = false; + AllowPeriodsInName = true; ZeroDirective = "\t.zero\t"; AsciiDirective = "\t.ascii\t"; AscizDirective = "\t.asciz\t"; diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index 03b8bd3..69afcc8 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -212,7 +212,7 @@ static bool isScatteredFixupFullyResolvedSimple(const MCAssembler &Asm, // resolved. This also works in conjunction with absolutized .set, which // requires the compiler to use .set to absolutize the differences between // symbols which the compiler knows to be assembly time constants, so we don't - // need to worry about consider symbol differences fully resolved. + // need to worry about considering symbol differences fully resolved. // Non-relative fixups are only resolved if constant. if (!BaseSection) @@ -342,7 +342,7 @@ bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout, ++stats::EvaluateFixup; if (!Fixup.Value->EvaluateAsRelocatable(Target, &Layout)) - llvm_report_error("expected relocatable expression"); + report_fatal_error("expected relocatable expression"); // FIXME: How do non-scattered symbols work in ELF? I presume the linker // doesn't support small relocations, but then under what criteria does the @@ -466,12 +466,12 @@ uint64_t MCAssembler::LayoutSection(MCSectionData &SD, int64_t TargetLocation; if (!OF.getOffset().EvaluateAsAbsolute(TargetLocation, &Layout)) - llvm_report_error("expected assembly-time absolute expression"); + report_fatal_error("expected assembly-time absolute expression"); // FIXME: We need a way to communicate this error. int64_t Offset = TargetLocation - FragmentOffset; if (Offset < 0) - llvm_report_error("invalid .org offset '" + Twine(TargetLocation) + + report_fatal_error("invalid .org offset '" + Twine(TargetLocation) + "' (at offset '" + Twine(FragmentOffset) + "'"); EffectiveSize = Offset; @@ -526,7 +526,7 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout, // multiple .align directives to enforce the semantics it wants), but is // severe enough that we want to report it. How to handle this? if (Count * AF.getValueSize() != FragmentSize) - llvm_report_error("undefined .align directive, value size '" + + report_fatal_error("undefined .align directive, value size '" + Twine(AF.getValueSize()) + "' is not a divisor of padding size '" + Twine(FragmentSize) + "'"); @@ -537,7 +537,7 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout, // If we are aligning with nops, ask that target to emit the right data. if (AF.getEmitNops()) { if (!Asm.getBackend().WriteNopData(Count, OW)) - llvm_report_error("unable to write nop sequence of " + + report_fatal_error("unable to write nop sequence of " + Twine(Count) + " bytes"); break; } @@ -662,7 +662,7 @@ void MCAssembler::Finish() { uint64_t StartOffset = OS.tell(); llvm::OwningPtr<MCObjectWriter> Writer(getBackend().createObjectWriter(OS)); if (!Writer) - llvm_report_error("unable to create object writer!"); + report_fatal_error("unable to create object writer!"); // Allow the object writer a chance to perform post-layout binding (for // example, to set the index fields in the symbol data). @@ -715,6 +715,8 @@ bool MCAssembler::FixupNeedsRelaxation(const MCAsmFixup &Fixup, return true; // Otherwise, relax if the value is too big for a (signed) i8. + // + // FIXME: This is target dependent! return int64_t(Value) != int64_t(int8_t(Value)); } diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp index e02cbc7..dc757bb 100644 --- a/lib/MC/MCContext.cpp +++ b/lib/MC/MCContext.cpp @@ -9,20 +9,35 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSymbol.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" using namespace llvm; +typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy; +typedef StringMap<const MCSectionELF*> ELFUniqueMapTy; + + MCContext::MCContext(const MCAsmInfo &mai) : MAI(mai), NextUniqueID(0) { + MachOUniquingMap = 0; + ELFUniquingMap = 0; } MCContext::~MCContext() { - // NOTE: The sections are all allocated out of a bump pointer allocator, + // NOTE: The symbols are all allocated out of a bump pointer allocator, // we don't need to free them here. + + // If we have the MachO uniquing map, free it. + delete (MachOUniqueMapTy*)MachOUniquingMap; + delete (ELFUniqueMapTy*)ELFUniquingMap; } +//===----------------------------------------------------------------------===// +// Symbol Manipulation +//===----------------------------------------------------------------------===// + MCSymbol *MCContext::GetOrCreateSymbol(StringRef Name) { assert(!Name.empty() && "Normal symbols cannot be unnamed!"); @@ -55,3 +70,56 @@ MCSymbol *MCContext::CreateTempSymbol() { MCSymbol *MCContext::LookupSymbol(StringRef Name) const { return Symbols.lookup(Name); } + +//===----------------------------------------------------------------------===// +// Section Management +//===----------------------------------------------------------------------===// + +const MCSectionMachO *MCContext:: +getMachOSection(StringRef Segment, StringRef Section, + unsigned TypeAndAttributes, + unsigned Reserved2, SectionKind Kind) { + + // We unique sections by their segment/section pair. The returned section + // may not have the same flags as the requested section, if so this should be + // diagnosed by the client as an error. + + // Create the map if it doesn't already exist. + if (MachOUniquingMap == 0) + MachOUniquingMap = new MachOUniqueMapTy(); + MachOUniqueMapTy &Map = *(MachOUniqueMapTy*)MachOUniquingMap; + + // Form the name to look up. + SmallString<64> Name; + Name += Segment; + Name.push_back(','); + Name += Section; + + // Do the lookup, if we have a hit, return it. + const MCSectionMachO *&Entry = Map[Name.str()]; + if (Entry) return Entry; + + // Otherwise, return a new section. + return Entry = new (*this) MCSectionMachO(Segment, Section, TypeAndAttributes, + Reserved2, Kind); +} + + +const MCSection *MCContext:: +getELFSection(StringRef Section, unsigned Type, unsigned Flags, + SectionKind Kind, bool IsExplicit) { + if (ELFUniquingMap == 0) + ELFUniquingMap = new ELFUniqueMapTy(); + ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)ELFUniquingMap; + + // Do the lookup, if we have a hit, return it. + StringMapEntry<const MCSectionELF*> &Entry = Map.GetOrCreateValue(Section); + if (Entry.getValue()) return Entry.getValue(); + + MCSectionELF *Result = new (*this) MCSectionELF(Entry.getKey(), Type, Flags, + Kind, IsExplicit); + Entry.setValue(Result); + return Result; +} + + diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp index 22c8d76..1183312 100644 --- a/lib/MC/MCParser/AsmLexer.cpp +++ b/lib/MC/MCParser/AsmLexer.cpp @@ -74,6 +74,11 @@ AsmToken AsmLexer::LexIdentifier() { while (isalnum(*CurPtr) || *CurPtr == '_' || *CurPtr == '$' || *CurPtr == '.' || *CurPtr == '@') ++CurPtr; + + // Handle . as a special case. + if (CurPtr == TokStart+1 && TokStart[0] == '.') + return AsmToken(AsmToken::Dot, StringRef(TokStart, 1)); + return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart)); } diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 4e62689..a63d2e4 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -30,16 +30,10 @@ using namespace llvm; enum { DEFAULT_ADDRSPACE = 0 }; -// Mach-O section uniquing. -// -// FIXME: Figure out where this should live, it should be shared by -// TargetLoweringObjectFile. -typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy; - AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out, const MCAsmInfo &_MAI) : Lexer(_MAI), Ctx(_Ctx), Out(_Out), SrcMgr(_SM), TargetParser(0), - CurBuffer(0), SectionUniquingMap(0) { + CurBuffer(0) { Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)); // Debugging directives. @@ -51,39 +45,6 @@ AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out, AsmParser::~AsmParser() { - // If we have the MachO uniquing map, free it. - delete (MachOUniqueMapTy*)SectionUniquingMap; -} - -const MCSection *AsmParser::getMachOSection(const StringRef &Segment, - const StringRef &Section, - unsigned TypeAndAttributes, - unsigned Reserved2, - SectionKind Kind) const { - // We unique sections by their segment/section pair. The returned section - // may not have the same flags as the requested section, if so this should be - // diagnosed by the client as an error. - - // Create the map if it doesn't already exist. - if (SectionUniquingMap == 0) - SectionUniquingMap = new MachOUniqueMapTy(); - MachOUniqueMapTy &Map = *(MachOUniqueMapTy*)SectionUniquingMap; - - // Form the name to look up. - SmallString<64> Name; - Name += Segment; - Name.push_back(','); - Name += Section; - - // Do the lookup, if we have a hit, return it. - const MCSectionMachO *&Entry = Map[Name.str()]; - - // FIXME: This should validate the type and attributes. - if (Entry) return Entry; - - // Otherwise, return a new section. - return Entry = MCSectionMachO::Create(Segment, Section, TypeAndAttributes, - Reserved2, Kind, Ctx); } void AsmParser::Warning(SMLoc L, const Twine &Msg) { @@ -143,7 +104,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { // // FIXME: Target hook & command line option for initial section. if (!NoInitialTextSection) - Out.SwitchSection(getMachOSection("__TEXT", "__text", + Out.SwitchSection(Ctx.getMachOSection("__TEXT", "__text", MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, 0, SectionKind::getText())); @@ -156,29 +117,6 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { // While we have input, parse each statement. while (Lexer.isNot(AsmToken::Eof)) { - // Handle conditional assembly here before calling ParseStatement() - if (Lexer.getKind() == AsmToken::Identifier) { - // If we have an identifier, handle it as the key symbol. - AsmToken ID = getTok(); - SMLoc IDLoc = ID.getLoc(); - StringRef IDVal = ID.getString(); - - if (IDVal == ".if" || - IDVal == ".elseif" || - IDVal == ".else" || - IDVal == ".endif") { - if (!ParseConditionalAssemblyDirectives(IDVal, IDLoc)) - continue; - HadError = true; - EatToEndOfStatement(); - continue; - } - } - if (TheCondState.Ignore) { - EatToEndOfStatement(); - continue; - } - if (!ParseStatement()) continue; // We had an error, remember it and recover by skipping to the next line. @@ -198,21 +136,6 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { return HadError; } -/// ParseConditionalAssemblyDirectives - parse the conditional assembly -/// directives -bool AsmParser::ParseConditionalAssemblyDirectives(StringRef Directive, - SMLoc DirectiveLoc) { - if (Directive == ".if") - return ParseDirectiveIf(DirectiveLoc); - if (Directive == ".elseif") - return ParseDirectiveElseIf(DirectiveLoc); - if (Directive == ".else") - return ParseDirectiveElse(DirectiveLoc); - if (Directive == ".endif") - return ParseDirectiveEndIf(DirectiveLoc); - return true; -} - /// EatToEndOfStatement - Throw away the rest of the line for testing purposes. void AsmParser::EatToEndOfStatement() { while (Lexer.isNot(AsmToken::EndOfStatement) && @@ -248,6 +171,7 @@ MCSymbol *AsmParser::CreateSymbol(StringRef Name) { /// primaryexpr ::= (parenexpr /// primaryexpr ::= symbol /// primaryexpr ::= number +/// primaryexpr ::= '.' /// primaryexpr ::= ~,+,- primaryexpr bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { switch (Lexer.getKind()) { @@ -292,6 +216,17 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { EndLoc = Lexer.getLoc(); Lex(); // Eat token. return false; + case AsmToken::Dot: { + // This is a '.' reference, which references the current PC. Emit a + // temporary label to the streamer and refer to it. + MCSymbol *Sym = Ctx.CreateTempSymbol(); + Out.EmitLabel(Sym); + Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, getContext()); + EndLoc = Lexer.getLoc(); + Lex(); // Eat identifier. + return false; + } + case AsmToken::LParen: Lex(); // Eat the '('. return ParseParenExpr(Res, EndLoc); @@ -484,9 +419,30 @@ bool AsmParser::ParseStatement() { AsmToken ID = getTok(); SMLoc IDLoc = ID.getLoc(); StringRef IDVal; - if (ParseIdentifier(IDVal)) - return TokError("unexpected token at start of statement"); + if (ParseIdentifier(IDVal)) { + if (!TheCondState.Ignore) + return TokError("unexpected token at start of statement"); + IDVal = ""; + } + // Handle conditional assembly here before checking for skipping. We + // have to do this so that .endif isn't skipped in a ".if 0" block for + // example. + if (IDVal == ".if") + return ParseDirectiveIf(IDLoc); + if (IDVal == ".elseif") + return ParseDirectiveElseIf(IDLoc); + if (IDVal == ".else") + return ParseDirectiveElse(IDLoc); + if (IDVal == ".endif") + return ParseDirectiveEndIf(IDLoc); + + // If we are in a ".if 0" block, ignore this statement. + if (TheCondState.Ignore) { + EatToEndOfStatement(); + return false; + } + // FIXME: Recurse on local labels? // See what kind of statement we have. @@ -773,39 +729,38 @@ bool AsmParser::ParseStatement() { return false; } - SmallVector<MCParsedAsmOperand*, 8> ParsedOperands; - if (getTargetParser().ParseInstruction(IDVal, IDLoc, ParsedOperands)) - // FIXME: Leaking ParsedOperands on failure. - return true; - - if (Lexer.isNot(AsmToken::EndOfStatement)) - // FIXME: Leaking ParsedOperands on failure. - return TokError("unexpected token in argument list"); - - // Eat the end of statement marker. - Lex(); - - - MCInst Inst; + bool HadError = getTargetParser().ParseInstruction(IDVal, IDLoc, + ParsedOperands); + if (!HadError && Lexer.isNot(AsmToken::EndOfStatement)) + HadError = TokError("unexpected token in argument list"); + + // If parsing succeeded, match the instruction. + if (!HadError) { + MCInst Inst; + if (!getTargetParser().MatchInstruction(ParsedOperands, Inst)) { + // Emit the instruction on success. + Out.EmitInstruction(Inst); + } else { + // Otherwise emit a diagnostic about the match failure and set the error + // flag. + // + // FIXME: We should give nicer diagnostics about the exact failure. + Error(IDLoc, "unrecognized instruction"); + HadError = true; + } + } - bool MatchFail = getTargetParser().MatchInstruction(ParsedOperands, Inst); + // If there was no error, consume the end-of-statement token. Otherwise this + // will be done by our caller. + if (!HadError) + Lex(); // Free any parsed operands. for (unsigned i = 0, e = ParsedOperands.size(); i != e; ++i) delete ParsedOperands[i]; - if (MatchFail) { - // FIXME: We should give nicer diagnostics about the exact failure. - Error(IDLoc, "unrecognized instruction"); - return true; - } - - // Instruction is good, process it. - Out.EmitInstruction(Inst); - - // Skip to end of line for now. - return false; + return HadError; } bool AsmParser::ParseAssignment(const StringRef &Name) { @@ -919,9 +874,9 @@ bool AsmParser::ParseDirectiveDarwinSection() { // FIXME: Arch specific. bool isText = Segment == "__TEXT"; // FIXME: Hack. - Out.SwitchSection(getMachOSection(Segment, Section, TAA, StubSize, - isText ? SectionKind::getText() - : SectionKind::getDataRel())); + Out.SwitchSection(Ctx.getMachOSection(Segment, Section, TAA, StubSize, + isText ? SectionKind::getText() + : SectionKind::getDataRel())); return false; } @@ -936,9 +891,9 @@ bool AsmParser::ParseDirectiveSectionSwitch(const char *Segment, // FIXME: Arch specific. bool isText = StringRef(Segment) == "__TEXT"; // FIXME: Hack. - Out.SwitchSection(getMachOSection(Segment, Section, TAA, StubSize, - isText ? SectionKind::getText() - : SectionKind::getDataRel())); + Out.SwitchSection(Ctx.getMachOSection(Segment, Section, TAA, StubSize, + isText ? SectionKind::getText() + : SectionKind::getDataRel())); // Set the implicit alignment, if any. // @@ -1374,9 +1329,9 @@ bool AsmParser::ParseDirectiveComm(bool IsLocal) { // '.lcomm' is equivalent to '.zerofill'. // Create the Symbol as a common or local common with Size and Pow2Alignment if (IsLocal) { - Out.EmitZerofill(getMachOSection("__DATA", "__bss", - MCSectionMachO::S_ZEROFILL, 0, - SectionKind::getBSS()), + Out.EmitZerofill(Ctx.getMachOSection("__DATA", "__bss", + MCSectionMachO::S_ZEROFILL, 0, + SectionKind::getBSS()), Sym, Size, 1 << Pow2Alignment); return false; } @@ -1410,9 +1365,9 @@ bool AsmParser::ParseDirectiveDarwinZerofill() { // the section but with no symbol. if (Lexer.is(AsmToken::EndOfStatement)) { // Create the zerofill section but no symbol - Out.EmitZerofill(getMachOSection(Segment, Section, - MCSectionMachO::S_ZEROFILL, 0, - SectionKind::getBSS())); + Out.EmitZerofill(Ctx.getMachOSection(Segment, Section, + MCSectionMachO::S_ZEROFILL, 0, + SectionKind::getBSS())); return false; } @@ -1468,9 +1423,9 @@ bool AsmParser::ParseDirectiveDarwinZerofill() { // Create the zerofill Symbol with Size and Pow2Alignment // // FIXME: Arch specific. - Out.EmitZerofill(getMachOSection(Segment, Section, - MCSectionMachO::S_ZEROFILL, 0, - SectionKind::getBSS()), + Out.EmitZerofill(Ctx.getMachOSection(Segment, Section, + MCSectionMachO::S_ZEROFILL, 0, + SectionKind::getBSS()), Sym, Size, 1 << Pow2Alignment); return false; @@ -1604,9 +1559,6 @@ bool AsmParser::ParseDirectiveDarwinDumpOrLoad(SMLoc IDLoc, bool IsDump) { /// ParseDirectiveIf /// ::= .if expression bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) { - // Consume the identifier that was the .if directive - Lex(); - TheCondStack.push_back(TheCondState); TheCondState.TheCond = AsmCond::IfCond; if(TheCondState.Ignore) { @@ -1638,9 +1590,6 @@ bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) { " an .elseif"); TheCondState.TheCond = AsmCond::ElseIfCond; - // Consume the identifier that was the .elseif directive - Lex(); - bool LastIgnoreState = false; if (!TheCondStack.empty()) LastIgnoreState = TheCondStack.back().Ignore; @@ -1667,9 +1616,6 @@ bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) { /// ParseDirectiveElse /// ::= .else bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) { - // Consume the identifier that was the .else directive - Lex(); - if (Lexer.isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.else' directive"); @@ -1694,9 +1640,6 @@ bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) { /// ParseDirectiveEndIf /// ::= .endif bool AsmParser::ParseDirectiveEndIf(SMLoc DirectiveLoc) { - // Consume the identifier that was the .endif directive - Lex(); - if (Lexer.isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.endif' directive"); diff --git a/lib/MC/MCSectionELF.cpp b/lib/MC/MCSectionELF.cpp index ebfe269..a7599de 100644 --- a/lib/MC/MCSectionELF.cpp +++ b/lib/MC/MCSectionELF.cpp @@ -14,11 +14,7 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; -MCSectionELF *MCSectionELF:: -Create(StringRef Section, unsigned Type, unsigned Flags, - SectionKind K, bool isExplicit, MCContext &Ctx) { - return new (Ctx) MCSectionELF(Section, Type, Flags, K, isExplicit); -} +MCSectionELF::~MCSectionELF() {} // anchor. // ShouldOmitSectionDirective - Decides whether a '.section' directive // should be printed before the section name @@ -62,59 +58,63 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, OS << ",#write"; if (Flags & MCSectionELF::SHF_TLS) OS << ",#tls"; - } else { - OS << ",\""; - if (Flags & MCSectionELF::SHF_ALLOC) - OS << 'a'; - if (Flags & MCSectionELF::SHF_EXECINSTR) - OS << 'x'; - if (Flags & MCSectionELF::SHF_WRITE) - OS << 'w'; - if (Flags & MCSectionELF::SHF_MERGE) - OS << 'M'; - if (Flags & MCSectionELF::SHF_STRINGS) - OS << 'S'; - if (Flags & MCSectionELF::SHF_TLS) - OS << 'T'; - - // If there are target-specific flags, print them. - if (Flags & ~MCSectionELF::TARGET_INDEP_SHF) - PrintTargetSpecificSectionFlags(MAI, OS); - - OS << '"'; + OS << '\n'; + return; + } + + OS << ",\""; + if (Flags & MCSectionELF::SHF_ALLOC) + OS << 'a'; + if (Flags & MCSectionELF::SHF_EXECINSTR) + OS << 'x'; + if (Flags & MCSectionELF::SHF_WRITE) + OS << 'w'; + if (Flags & MCSectionELF::SHF_MERGE) + OS << 'M'; + if (Flags & MCSectionELF::SHF_STRINGS) + OS << 'S'; + if (Flags & MCSectionELF::SHF_TLS) + OS << 'T'; + + // If there are target-specific flags, print them. + if (Flags & MCSectionELF::XCORE_SHF_CP_SECTION) + OS << 'c'; + if (Flags & MCSectionELF::XCORE_SHF_DP_SECTION) + OS << 'd'; + + OS << '"'; - if (ShouldPrintSectionType(Type)) { - OS << ','; - - // If comment string is '@', e.g. as on ARM - use '%' instead - if (MAI.getCommentString()[0] == '@') - OS << '%'; - else - OS << '@'; - - if (Type == MCSectionELF::SHT_INIT_ARRAY) - OS << "init_array"; - else if (Type == MCSectionELF::SHT_FINI_ARRAY) - OS << "fini_array"; - else if (Type == MCSectionELF::SHT_PREINIT_ARRAY) - OS << "preinit_array"; - else if (Type == MCSectionELF::SHT_NOBITS) - OS << "nobits"; - else if (Type == MCSectionELF::SHT_PROGBITS) - OS << "progbits"; - - if (getKind().isMergeable1ByteCString()) { - OS << ",1"; - } else if (getKind().isMergeable2ByteCString()) { - OS << ",2"; - } else if (getKind().isMergeable4ByteCString() || - getKind().isMergeableConst4()) { - OS << ",4"; - } else if (getKind().isMergeableConst8()) { - OS << ",8"; - } else if (getKind().isMergeableConst16()) { - OS << ",16"; - } + if (ShouldPrintSectionType(Type)) { + OS << ','; + + // If comment string is '@', e.g. as on ARM - use '%' instead + if (MAI.getCommentString()[0] == '@') + OS << '%'; + else + OS << '@'; + + if (Type == MCSectionELF::SHT_INIT_ARRAY) + OS << "init_array"; + else if (Type == MCSectionELF::SHT_FINI_ARRAY) + OS << "fini_array"; + else if (Type == MCSectionELF::SHT_PREINIT_ARRAY) + OS << "preinit_array"; + else if (Type == MCSectionELF::SHT_NOBITS) + OS << "nobits"; + else if (Type == MCSectionELF::SHT_PROGBITS) + OS << "progbits"; + + if (getKind().isMergeable1ByteCString()) { + OS << ",1"; + } else if (getKind().isMergeable2ByteCString()) { + OS << ",2"; + } else if (getKind().isMergeable4ByteCString() || + getKind().isMergeableConst4()) { + OS << ",4"; + } else if (getKind().isMergeableConst8()) { + OS << ",8"; + } else if (getKind().isMergeableConst16()) { + OS << ",16"; } } diff --git a/lib/MC/MCSectionMachO.cpp b/lib/MC/MCSectionMachO.cpp index 6cc67a2..3a18cee 100644 --- a/lib/MC/MCSectionMachO.cpp +++ b/lib/MC/MCSectionMachO.cpp @@ -64,14 +64,22 @@ ENTRY(0 /*FIXME*/, S_ATTR_LOC_RELOC) { AttrFlagEnd, 0, 0 } }; - -MCSectionMachO *MCSectionMachO:: -Create(StringRef Segment, StringRef Section, - unsigned TypeAndAttributes, unsigned Reserved2, - SectionKind K, MCContext &Ctx) { - // S_SYMBOL_STUBS must be set for Reserved2 to be non-zero. - return new (Ctx) MCSectionMachO(Segment, Section, TypeAndAttributes, - Reserved2, K); +MCSectionMachO::MCSectionMachO(StringRef Segment, StringRef Section, + unsigned TAA, unsigned reserved2, SectionKind K) + : MCSection(K), TypeAndAttributes(TAA), Reserved2(reserved2) { + assert(Segment.size() <= 16 && Section.size() <= 16 && + "Segment or section string too long"); + for (unsigned i = 0; i != 16; ++i) { + if (i < Segment.size()) + SegmentName[i] = Segment[i]; + else + SegmentName[i] = 0; + + if (i < Section.size()) + SectionName[i] = Section[i]; + else + SectionName[i] = 0; + } } void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI, diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp index e073eb5..a533ccf 100644 --- a/lib/MC/MachObjectWriter.cpp +++ b/lib/MC/MachObjectWriter.cpp @@ -418,7 +418,7 @@ public: unsigned Log2Size = Log2_32(Align); assert((1U << Log2Size) == Align && "Invalid 'common' alignment!"); if (Log2Size > 15) - llvm_report_error("invalid 'common' alignment '" + + report_fatal_error("invalid 'common' alignment '" + Twine(Align) + "'"); // FIXME: Keep this mask with the SymbolFlags enumeration. Flags = (Flags & 0xF0FF) | (Log2Size << 8); @@ -477,7 +477,7 @@ public: // actual expression addend without the PCrel bias. However, instructions // with data following the relocation are not accomodated for (see comment // below regarding SIGNED{1,2,4}), so it isn't exactly that either. - Value += 1 << Log2Size; + Value += 1LL << Log2Size; } if (Target.isAbsolute()) { // constant @@ -506,23 +506,23 @@ public: // Neither symbol can be modified. if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None || Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None) - llvm_report_error("unsupported relocation of modified symbol"); + report_fatal_error("unsupported relocation of modified symbol"); // We don't support PCrel relocations of differences. Darwin 'as' doesn't // implement most of these correctly. if (IsPCRel) - llvm_report_error("unsupported pc-relative relocation of difference"); + report_fatal_error("unsupported pc-relative relocation of difference"); // We don't currently support any situation where one or both of the // symbols would require a local relocation. This is almost certainly // unused and may not be possible to encode correctly. if (!A_Base || !B_Base) - llvm_report_error("unsupported local relocations in difference"); + report_fatal_error("unsupported local relocations in difference"); // Darwin 'as' doesn't emit correct relocations for this (it ends up with // a single SIGNED relocation); reject it for now. if (A_Base == B_Base) - llvm_report_error("unsupported relocation with identical base"); + report_fatal_error("unsupported relocation with identical base"); Value += Layout.getSymbolAddress(&A_SD) - Layout.getSymbolAddress(A_Base); Value -= Layout.getSymbolAddress(&B_SD) - Layout.getSymbolAddress(B_Base); @@ -580,12 +580,12 @@ public: else Type = RIT_X86_64_GOT; } else if (Modifier != MCSymbolRefExpr::VK_None) - llvm_report_error("unsupported symbol modifier in relocation"); + report_fatal_error("unsupported symbol modifier in relocation"); else Type = RIT_X86_64_Signed; } else { if (Modifier != MCSymbolRefExpr::VK_None) - llvm_report_error("unsupported symbol modifier in branch " + report_fatal_error("unsupported symbol modifier in branch " "relocation"); Type = RIT_X86_64_Branch; @@ -605,7 +605,7 @@ public: // well based on the actual encoded instruction (the additional bias), // but instead appear to just look at the final offset. if (IsRIPRel) { - switch (-(Target.getConstant() + (1 << Log2Size))) { + switch (-(Target.getConstant() + (1LL << Log2Size))) { case 1: Type = RIT_X86_64_Signed1; break; case 2: Type = RIT_X86_64_Signed2; break; case 4: Type = RIT_X86_64_Signed4; break; @@ -622,7 +622,7 @@ public: Type = RIT_X86_64_GOT; IsPCRel = 1; } else if (Modifier != MCSymbolRefExpr::VK_None) - llvm_report_error("unsupported symbol modifier in relocation"); + report_fatal_error("unsupported symbol modifier in relocation"); else Type = RIT_X86_64_Unsigned; } @@ -657,7 +657,7 @@ public: MCSymbolData *A_SD = &Asm.getSymbolData(*A); if (!A_SD->getFragment()) - llvm_report_error("symbol '" + A->getName() + + report_fatal_error("symbol '" + A->getName() + "' can not be undefined in a subtraction expression"); uint32_t Value = Layout.getSymbolAddress(A_SD); @@ -667,7 +667,7 @@ public: MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); if (!B_SD->getFragment()) - llvm_report_error("symbol '" + B->getSymbol().getName() + + report_fatal_error("symbol '" + B->getSymbol().getName() + "' can not be undefined in a subtraction expression"); // Select the appropriate difference relocation type. diff --git a/lib/Support/Allocator.cpp b/lib/Support/Allocator.cpp index 31b45c8..90df262 100644 --- a/lib/Support/Allocator.cpp +++ b/lib/Support/Allocator.cpp @@ -23,9 +23,7 @@ namespace llvm { BumpPtrAllocator::BumpPtrAllocator(size_t size, size_t threshold, SlabAllocator &allocator) : SlabSize(size), SizeThreshold(threshold), Allocator(allocator), - CurSlab(0), BytesAllocated(0) { - StartNewSlab(); -} + CurSlab(0), BytesAllocated(0) { } BumpPtrAllocator::~BumpPtrAllocator() { DeallocateSlabs(CurSlab); @@ -72,6 +70,8 @@ void BumpPtrAllocator::DeallocateSlabs(MemSlab *Slab) { /// Reset - Deallocate all but the current slab and reset the current pointer /// to the beginning of it, freeing all memory allocated so far. void BumpPtrAllocator::Reset() { + if (!CurSlab) + return; DeallocateSlabs(CurSlab->NextPtr); CurSlab->NextPtr = 0; CurPtr = (char*)(CurSlab + 1); @@ -81,6 +81,9 @@ void BumpPtrAllocator::Reset() { /// Allocate - Allocate space at the specified alignment. /// void *BumpPtrAllocator::Allocate(size_t Size, size_t Alignment) { + if (!CurSlab) // Start a new slab if we haven't allocated one already. + StartNewSlab(); + // Keep track of how many bytes we've allocated. BytesAllocated += Size; diff --git a/lib/Support/Debug.cpp b/lib/Support/Debug.cpp index eccfa0b..7f48f8a 100644 --- a/lib/Support/Debug.cpp +++ b/lib/Support/Debug.cpp @@ -51,12 +51,19 @@ DebugBufferSize("debug-buffer-size", cl::init(0)); static std::string CurrentDebugType; -static struct DebugOnlyOpt { + +namespace { + +struct DebugOnlyOpt { void operator=(const std::string &Val) const { DebugFlag |= !Val.empty(); CurrentDebugType = Val; } -} DebugOnlyOptLoc; +}; + +} + +static DebugOnlyOpt DebugOnlyOptLoc; static cl::opt<DebugOnlyOpt, true, cl::parser<std::string> > DebugOnly("debug-only", cl::desc("Enable a specific type of debug output"), diff --git a/lib/Support/Dwarf.cpp b/lib/Support/Dwarf.cpp index d1230b9..c19c2d6 100644 --- a/lib/Support/Dwarf.cpp +++ b/lib/Support/Dwarf.cpp @@ -196,8 +196,9 @@ const char *llvm::dwarf::AttributeString(unsigned Attribute) { case DW_AT_APPLE_flags: return "DW_AT_APPLE_flags"; case DW_AT_APPLE_isa: return "DW_AT_APPLE_isa"; case DW_AT_APPLE_block: return "DW_AT_APPLE_block"; - case DW_AT_APPLE_major_runtime_vers: return "DW_AT_APPLE_major_runtime_vers"; + case DW_AT_APPLE_major_runtime_vers: return "DW_AT_APPLE_major_runtime_vers"; case DW_AT_APPLE_runtime_class: return "DW_AT_APPLE_runtime_class"; + case DW_AT_APPLE_omit_frame_ptr: return "DW_AT_APPLE_omit_frame_ptr"; } return 0; } diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp index 4412cb2..56a171c 100644 --- a/lib/Support/ErrorHandling.cpp +++ b/lib/Support/ErrorHandling.cpp @@ -1,4 +1,4 @@ -//===- lib/Support/ErrorHandling.cpp - Callbacks for errors -----*- C++ -*-===// +//===- lib/Support/ErrorHandling.cpp - Callbacks for errors ---------------===// // // The LLVM Compiler Infrastructure // @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// // -// This file defines an API for error handling, it supersedes cerr+abort(), and -// cerr+exit() style error handling. -// Callbacks can be registered for these errors through this API. +// This file defines an API used to indicate fatal error conditions. Non-fatal +// errors (most of them) should be handled through LLVMContext. +// //===----------------------------------------------------------------------===// #include "llvm/ADT/Twine.h" @@ -19,16 +19,14 @@ #include "llvm/System/Threading.h" #include <cassert> #include <cstdlib> - using namespace llvm; using namespace std; -static llvm_error_handler_t ErrorHandler = 0; +static fatal_error_handler_t ErrorHandler = 0; static void *ErrorHandlerUserData = 0; -namespace llvm { -void llvm_install_error_handler(llvm_error_handler_t handler, - void *user_data) { +void llvm::install_fatal_error_handler(fatal_error_handler_t handler, + void *user_data) { assert(!llvm_is_multithreaded() && "Cannot register error handlers after starting multithreaded mode!\n"); assert(!ErrorHandler && "Error handler already registered!\n"); @@ -36,19 +34,19 @@ void llvm_install_error_handler(llvm_error_handler_t handler, ErrorHandlerUserData = user_data; } -void llvm_remove_error_handler() { +void llvm::remove_fatal_error_handler() { ErrorHandler = 0; } -void llvm_report_error(const char *reason) { - llvm_report_error(Twine(reason)); +void llvm::report_fatal_error(const char *reason) { + report_fatal_error(Twine(reason)); } -void llvm_report_error(const std::string &reason) { - llvm_report_error(Twine(reason)); +void llvm::report_fatal_error(const std::string &reason) { + report_fatal_error(Twine(reason)); } -void llvm_report_error(const Twine &reason) { +void llvm::report_fatal_error(const Twine &reason) { if (!ErrorHandler) { errs() << "LLVM ERROR: " << reason << "\n"; } else { @@ -57,8 +55,8 @@ void llvm_report_error(const Twine &reason) { exit(1); } -void llvm_unreachable_internal(const char *msg, const char *file, - unsigned line) { +void llvm::llvm_unreachable_internal(const char *msg, const char *file, + unsigned line) { // This code intentionally doesn't call the ErrorHandler callback, because // llvm_unreachable is intended to be used to indicate "impossible" // situations, and not legitimate runtime errors. @@ -70,4 +68,3 @@ void llvm_unreachable_internal(const char *msg, const char *file, dbgs() << "!\n"; abort(); } -} diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp index ec84f9b..fdd6285 100644 --- a/lib/Support/GraphWriter.cpp +++ b/lib/Support/GraphWriter.cpp @@ -130,28 +130,28 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait, if (sys::Program::ExecuteAndWait(prog, &args[0], 0, 0, 0, 0, &ErrMsg)) { errs() << "Error viewing graph " << Filename.str() << ": '" << ErrMsg << "\n"; - } else { - errs() << " done. \n"; + return; + } + errs() << " done. \n"; - sys::Path gv(LLVM_PATH_GV); - args.clear(); - args.push_back(gv.c_str()); - args.push_back(PSFilename.c_str()); - args.push_back("--spartan"); - args.push_back(0); - - ErrMsg.clear(); - if (wait) { - if (sys::Program::ExecuteAndWait(gv, &args[0],0,0,0,0,&ErrMsg)) - errs() << "Error viewing graph: " << ErrMsg << "\n"; - Filename.eraseFromDisk(); - PSFilename.eraseFromDisk(); - } - else { - sys::Program::ExecuteNoWait(gv, &args[0],0,0,0,&ErrMsg); - errs() << "Remember to erase graph files: " << Filename.str() << " " - << PSFilename.str() << "\n"; - } + sys::Path gv(LLVM_PATH_GV); + args.clear(); + args.push_back(gv.c_str()); + args.push_back(PSFilename.c_str()); + args.push_back("--spartan"); + args.push_back(0); + + ErrMsg.clear(); + if (wait) { + if (sys::Program::ExecuteAndWait(gv, &args[0],0,0,0,0,&ErrMsg)) + errs() << "Error viewing graph: " << ErrMsg << "\n"; + Filename.eraseFromDisk(); + PSFilename.eraseFromDisk(); + } + else { + sys::Program::ExecuteNoWait(gv, &args[0],0,0,0,&ErrMsg); + errs() << "Remember to erase graph files: " << Filename.str() << " " + << PSFilename.str() << "\n"; } #elif HAVE_DOTTY sys::Path dotty(LLVM_PATH_DOTTY); @@ -166,7 +166,8 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait, errs() << "Error viewing graph " << Filename.str() << ": " << ErrMsg << "\n"; } else { -#ifdef __MINGW32__ // Dotty spawns another app and doesn't wait until it returns +// Dotty spawns another app and doesn't wait until it returns +#if defined (__MINGW32__) || defined (_WINDOWS) return; #endif Filename.eraseFromDisk(); diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp index 4e7520c..da5681c 100644 --- a/lib/Support/SourceMgr.cpp +++ b/lib/Support/SourceMgr.cpp @@ -168,7 +168,7 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, const std::string &Msg, } PrintedMsg += Msg; - return SMDiagnostic(Loc, + return SMDiagnostic(*this, Loc, CurMB->getBufferIdentifier(), FindLineNumber(Loc, CurBuf), Loc.getPointer()-LineStart, PrintedMsg, LineStr, ShowLine); diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp index 4fac073..481f6ba 100644 --- a/lib/Support/Timer.cpp +++ b/lib/Support/Timer.cpp @@ -190,6 +190,8 @@ void TimeRecord::print(const TimeRecord &Total, raw_ostream &OS) const { // NamedRegionTimer Implementation //===----------------------------------------------------------------------===// +namespace { + typedef StringMap<Timer> Name2TimerMap; class Name2PairMap { @@ -216,6 +218,8 @@ public: } }; +} + static ManagedStatic<Name2TimerMap> NamedTimers; static ManagedStatic<Name2PairMap> NamedGroupedTimers; diff --git a/lib/Support/circular_raw_ostream.cpp b/lib/Support/circular_raw_ostream.cpp index e52996d..ca0d30d 100644 --- a/lib/Support/circular_raw_ostream.cpp +++ b/lib/Support/circular_raw_ostream.cpp @@ -1,4 +1,4 @@ -//===- circulat_raw_ostream.cpp - Implement the circular_raw_ostream class -===// +//===- circular_raw_ostream.cpp - Implement circular_raw_ostream ----------===// // // The LLVM Compiler Infrastructure // @@ -12,9 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/circular_raw_ostream.h" - #include <algorithm> - using namespace llvm; void circular_raw_ostream::write_impl(const char *Ptr, size_t Size) { @@ -25,7 +23,8 @@ void circular_raw_ostream::write_impl(const char *Ptr, size_t Size) { // Write into the buffer, wrapping if necessary. while (Size != 0) { - unsigned Bytes = std::min(Size, BufferSize - (Cur - BufferArray)); + unsigned Bytes = + std::min(unsigned(Size), unsigned(BufferSize - (Cur - BufferArray))); memcpy(Cur, Ptr, Bytes); Size -= Bytes; Cur += Bytes; @@ -37,11 +36,10 @@ void circular_raw_ostream::write_impl(const char *Ptr, size_t Size) { } } -void circular_raw_ostream::flushBufferWithBanner(void) { +void circular_raw_ostream::flushBufferWithBanner() { if (BufferSize != 0) { // Write out the buffer - int num = std::strlen(Banner); - TheStream->write(Banner, num); + TheStream->write(Banner, std::strlen(Banner)); flushBuffer(); } } diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index f59bd0d..0b05c54 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -57,11 +57,11 @@ raw_ostream::~raw_ostream() { delete [] OutBufStart; // If there are any pending errors, report them now. Clients wishing - // to avoid llvm_report_error calls should check for errors with + // to avoid report_fatal_error calls should check for errors with // has_error() and clear the error flag with clear_error() before // destructing raw_ostream objects which may have errors. if (Error) - llvm_report_error("IO failure on output stream."); + report_fatal_error("IO failure on output stream."); } // An out of line virtual method to provide a home for the class vtable. @@ -442,7 +442,8 @@ uint64_t raw_fd_ostream::seek(uint64_t off) { } size_t raw_fd_ostream::preferred_buffer_size() const { -#if !defined(_MSC_VER) && !defined(__MINGW32__) // Windows has no st_blksize. +#if !defined(_MSC_VER) && !defined(__MINGW32__) && !defined(_MINIX) + // Windows and Minix have no st_blksize. assert(FD >= 0 && "File not yet open!"); struct stat statbuf; if (fstat(FD, &statbuf) != 0) diff --git a/lib/Support/regengine.inc b/lib/Support/regengine.inc index bf55543..7e41f96 100644 --- a/lib/Support/regengine.inc +++ b/lib/Support/regengine.inc @@ -185,7 +185,7 @@ matcher(struct re_guts *g, const char *string, size_t nmatch, endp = fast(m, start, stop, gf, gl); if (endp == NULL) { /* a miss */ free(m->pmatch); - free(m->lastpos); + free((void*)m->lastpos); STATETEARDOWN(m); return(REG_NOMATCH); } diff --git a/lib/System/DynamicLibrary.cpp b/lib/System/DynamicLibrary.cpp index d6f3140..6f6890c 100644 --- a/lib/System/DynamicLibrary.cpp +++ b/lib/System/DynamicLibrary.cpp @@ -24,12 +24,18 @@ // Collection of symbol name/value pairs to be searched prior to any libraries. static std::map<std::string, void*> *ExplicitSymbols = 0; -static struct ExplicitSymbolsDeleter { +namespace { + +struct ExplicitSymbolsDeleter { ~ExplicitSymbolsDeleter() { if (ExplicitSymbols) delete ExplicitSymbols; } -} Dummy; +}; + +} + +static ExplicitSymbolsDeleter Dummy; void llvm::sys::DynamicLibrary::AddSymbol(const char* symbolName, void *symbolValue) { @@ -44,6 +50,7 @@ void llvm::sys::DynamicLibrary::AddSymbol(const char* symbolName, #else +#if HAVE_DLFCN_H #include <dlfcn.h> using namespace llvm; using namespace llvm::sys; @@ -68,6 +75,17 @@ bool DynamicLibrary::LoadLibraryPermanently(const char *Filename, OpenedHandles->push_back(H); return false; } +#else + +using namespace llvm; +using namespace llvm::sys; + +bool DynamicLibrary::LoadLibraryPermanently(const char *Filename, + std::string *ErrMsg) { + if (ErrMsg) *ErrMsg = "dlopen() not supported on this platform"; + return true; +} +#endif namespace llvm { void *SearchForAddressOfSpecialSymbol(const char* symbolName); @@ -84,6 +102,7 @@ void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) { return I->second; } +#if HAVE_DLFCN_H // Now search the libraries. if (OpenedHandles) { for (std::vector<void *>::iterator I = OpenedHandles->begin(), @@ -95,6 +114,7 @@ void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) { } } } +#endif if (void *Result = llvm::SearchForAddressOfSpecialSymbol(symbolName)) return Result; diff --git a/lib/System/Program.cpp b/lib/System/Program.cpp index a3049d4..cd58c2c 100644 --- a/lib/System/Program.cpp +++ b/lib/System/Program.cpp @@ -13,8 +13,7 @@ #include "llvm/System/Program.h" #include "llvm/Config/config.h" - -namespace llvm { +using namespace llvm; using namespace sys; //===----------------------------------------------------------------------===// @@ -48,9 +47,6 @@ Program::ExecuteNoWait(const Path& path, prg.Execute(path, args, envp, redirects, memoryLimit, ErrMsg); } - -} - // Include the platform-specific parts of this class. #ifdef LLVM_ON_UNIX #include "Unix/Program.inc" diff --git a/lib/System/Unix/Path.inc b/lib/System/Unix/Path.inc index 52253b3..74596dc 100644 --- a/lib/System/Unix/Path.inc +++ b/lib/System/Unix/Path.inc @@ -858,15 +858,20 @@ Path::makeUnique(bool reuse_current, std::string* ErrMsg) { // Append an XXXXXX pattern to the end of the file for use with mkstemp, // mktemp or our own implementation. - std::string Buf(path); + // This uses std::vector instead of SmallVector to avoid a dependence on + // libSupport. And performance isn't critical here. + std::vector<char> Buf; + Buf.resize(path.size()+8); + char *FNBuffer = &Buf[0]; + path.copy(FNBuffer,path.size()); if (isDirectory()) - Buf += "/XXXXXX"; + strcpy(FNBuffer+path.size(), "/XXXXXX"); else - Buf += "-XXXXXX"; + strcpy(FNBuffer+path.size(), "-XXXXXX"); #if defined(HAVE_MKSTEMP) int TempFD; - if ((TempFD = mkstemp((char*)Buf.c_str())) == -1) + if ((TempFD = mkstemp(FNBuffer)) == -1) return MakeErrMsg(ErrMsg, path + ": can't make unique filename"); // We don't need to hold the temp file descriptor... we will trust that no one @@ -874,21 +879,21 @@ Path::makeUnique(bool reuse_current, std::string* ErrMsg) { close(TempFD); // Save the name - path = Buf; + path = FNBuffer; #elif defined(HAVE_MKTEMP) // If we don't have mkstemp, use the old and obsolete mktemp function. - if (mktemp(Buf.c_str()) == 0) + if (mktemp(FNBuffer) == 0) return MakeErrMsg(ErrMsg, path + ": can't make unique filename"); // Save the name - path = Buf; + path = FNBuffer; #else // Okay, looks like we have to do it all by our lonesome. static unsigned FCounter = 0; unsigned offset = path.size() + 1; - while (FCounter < 999999 && exists()) { - sprintf(Buf.data()+offset, "%06u", ++FCounter); - path = Buf; + while ( FCounter < 999999 && exists()) { + sprintf(FNBuffer+offset,"%06u",++FCounter); + path = FNBuffer; } if (FCounter > 999999) return MakeErrMsg(ErrMsg, diff --git a/lib/System/Unix/Program.inc b/lib/System/Unix/Program.inc index b4cc875..358415f 100644 --- a/lib/System/Unix/Program.inc +++ b/lib/System/Unix/Program.inc @@ -30,6 +30,14 @@ #if HAVE_FCNTL_H #include <fcntl.h> #endif +#ifdef HAVE_POSIX_SPAWN +#include <spawn.h> +#if !defined(__APPLE__) + extern char **environ; +#else +#include <crt_externs.h> // _NSGetEnviron +#endif +#endif namespace llvm { using namespace sys; @@ -94,20 +102,19 @@ Program::FindProgramByName(const std::string& progName) { } static bool RedirectIO(const Path *Path, int FD, std::string* ErrMsg) { - if (Path == 0) - // Noop + if (Path == 0) // Noop return false; - std::string File; + const char *File; if (Path->isEmpty()) // Redirect empty paths to /dev/null File = "/dev/null"; else - File = Path->str(); + File = Path->c_str(); // Open the file - int InFD = open(File.c_str(), FD == 0 ? O_RDONLY : O_WRONLY|O_CREAT, 0666); + int InFD = open(File, FD == 0 ? O_RDONLY : O_WRONLY|O_CREAT, 0666); if (InFD == -1) { - MakeErrMsg(ErrMsg, "Cannot open file '" + File + "' for " + MakeErrMsg(ErrMsg, "Cannot open file '" + std::string(File) + "' for " + (FD == 0 ? "input" : "output")); return true; } @@ -122,6 +129,25 @@ static bool RedirectIO(const Path *Path, int FD, std::string* ErrMsg) { return false; } +#ifdef HAVE_POSIX_SPAWN +static bool RedirectIO_PS(const Path *Path, int FD, std::string *ErrMsg, + posix_spawn_file_actions_t &FileActions) { + if (Path == 0) // Noop + return false; + const char *File; + if (Path->isEmpty()) + // Redirect empty paths to /dev/null + File = "/dev/null"; + else + File = Path->c_str(); + + if (int Err = posix_spawn_file_actions_addopen(&FileActions, FD, + File, FD == 0 ? O_RDONLY : O_WRONLY|O_CREAT, 0666)) + return MakeErrMsg(ErrMsg, "Cannot dup2", Err); + return false; +} +#endif + static void TimeOutHandler(int Sig) { } @@ -151,13 +177,55 @@ static void SetMemoryLimits (unsigned size) } bool -Program::Execute(const Path& path, - const char** args, - const char** envp, - const Path** redirects, - unsigned memoryLimit, - std::string* ErrMsg) -{ +Program::Execute(const Path &path, const char **args, const char **envp, + const Path **redirects, unsigned memoryLimit, + std::string *ErrMsg) { + // If this OS has posix_spawn and there is no memory limit being implied, use + // posix_spawn. It is more efficient than fork/exec. +#ifdef HAVE_POSIX_SPAWN + if (memoryLimit == 0) { + posix_spawn_file_actions_t FileActions; + posix_spawn_file_actions_init(&FileActions); + + if (redirects) { + // Redirect stdin/stdout. + if (RedirectIO_PS(redirects[0], 0, ErrMsg, FileActions) || + RedirectIO_PS(redirects[1], 1, ErrMsg, FileActions)) + return false; + if (redirects[1] == 0 || redirects[2] == 0 || + *redirects[1] != *redirects[2]) { + // Just redirect stderr + if (RedirectIO_PS(redirects[2], 2, ErrMsg, FileActions)) return false; + } else { + // If stdout and stderr should go to the same place, redirect stderr + // to the FD already open for stdout. + if (int Err = posix_spawn_file_actions_adddup2(&FileActions, 1, 2)) + return !MakeErrMsg(ErrMsg, "Can't redirect stderr to stdout", Err); + } + } + + if (!envp) +#if !defined(__APPLE__) + envp = const_cast<const char **>(environ); +#else + // environ is missing in dylibs. + envp = const_cast<const char **>(*_NSGetEnviron()); +#endif + + pid_t PID; + int Err = posix_spawn(&PID, path.c_str(), &FileActions, /*attrp*/0, + const_cast<char **>(args), const_cast<char **>(envp)); + + posix_spawn_file_actions_destroy(&FileActions); + + if (Err) + return !MakeErrMsg(ErrMsg, "posix_spawn failed", Err); + + Data_ = reinterpret_cast<void*>(PID); + return true; + } +#endif + if (!path.canExecute()) { if (ErrMsg) *ErrMsg = path.str() + " is not executable"; @@ -201,9 +269,12 @@ Program::Execute(const Path& path, // Execute! if (envp != 0) - execve(path.c_str(), (char**)args, (char**)envp); + execve(path.c_str(), + const_cast<char **>(args), + const_cast<char **>(envp)); else - execv(path.c_str(), (char**)args); + execv(path.c_str(), + const_cast<char **>(args)); // If the execve() failed, we should exit. Follow Unix protocol and // return 127 if the executable was not found, and 126 otherwise. // Use _exit rather than exit so that atexit functions and static @@ -239,7 +310,9 @@ Program::Wait(unsigned secondsToWait, // fact of having a handler at all causes the wait below to return with EINTR, // unlike if we used SIG_IGN. if (secondsToWait) { +#ifndef __HAIKU__ Act.sa_sigaction = 0; +#endif Act.sa_handler = TimeOutHandler; sigemptyset(&Act.sa_mask); Act.sa_flags = 0; diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 8d9c622..b4dec0c 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -124,7 +124,8 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, def : Processor<"cortex-a8", CortexA8Itineraries, [ArchV7A, FeatureThumb2, FeatureNEON, FeatureHasSlowVMLx, FeatureNEONForFP]>; -def : ProcNoItin<"cortex-a9", [ArchV7A, FeatureThumb2, FeatureNEON]>; +def : Processor<"cortex-a9", CortexA9Itineraries, + [ArchV7A, FeatureThumb2, FeatureNEON]>; //===----------------------------------------------------------------------===// // Register File Description diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h index ea62c33..e68354a 100644 --- a/lib/Target/ARM/ARMAddressingModes.h +++ b/lib/Target/ARM/ARMAddressingModes.h @@ -151,22 +151,13 @@ namespace ARM_AM { if ((rotr32(Imm, RotAmt) & ~255U) == 0) return (32-RotAmt)&31; // HW rotates right, not left. - // For values like 0xF000000F, we should skip the first run of ones, then + // For values like 0xF000000F, we should ignore the low 6 bits, then // retry the hunt. - if (Imm & 1) { - unsigned TrailingOnes = CountTrailingZeros_32(~Imm); - if (TrailingOnes != 32) { // Avoid overflow on 0xFFFFFFFF - // Restart the search for a high-order bit after the initial seconds of - // ones. - unsigned TZ2 = CountTrailingZeros_32(Imm & ~((1 << TrailingOnes)-1)); - - // Rotate amount must be even. - unsigned RotAmt2 = TZ2 & ~1; - - // If this fits, use it. - if (RotAmt2 != 32 && (rotr32(Imm, RotAmt2) & ~255U) == 0) - return (32-RotAmt2)&31; // HW rotates right, not left. - } + if (Imm & 63U) { + unsigned TZ2 = CountTrailingZeros_32(Imm & ~63U); + unsigned RotAmt2 = TZ2 & ~1; + if ((rotr32(Imm, RotAmt2) & ~255U) == 0) + return (32-RotAmt2)&31; // HW rotates right, not left. } // Otherwise, we have no way to cover this span of bits with a single diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 1995f79..a193858 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -467,6 +467,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case TargetOpcode::KILL: case TargetOpcode::DBG_LABEL: case TargetOpcode::EH_LABEL: + case TargetOpcode::DBG_VALUE: return 0; } break; @@ -481,10 +482,11 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { // operand #2. return MI->getOperand(2).getImm(); case ARM::Int_eh_sjlj_setjmp: + case ARM::Int_eh_sjlj_setjmp_nofp: return 24; case ARM::tInt_eh_sjlj_setjmp: - return 14; case ARM::t2Int_eh_sjlj_setjmp: + case ARM::t2Int_eh_sjlj_setjmp_nofp: return 14; case ARM::BR_JTr: case ARM::BR_JTm: @@ -815,6 +817,16 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, } } +MachineInstr* +ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, + int FrameIx, uint64_t Offset, + const MDNode *MDPtr, + DebugLoc DL) const { + MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM::DBG_VALUE)) + .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr); + return &*MIB; +} + MachineInstr *ARMBaseInstrInfo:: foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, const SmallVectorImpl<unsigned> &Ops, int FI) const { diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 292c498..7a5630e 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -269,6 +269,12 @@ public: unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC) const; + virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, + int FrameIx, + uint64_t Offset, + const MDNode *MDPtr, + DebugLoc DL) const; + virtual bool canFoldMemoryOperand(const MachineInstr *MI, const SmallVectorImpl<unsigned> &Ops) const; diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index f162546..bc12187 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -38,11 +38,14 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/CommandLine.h" -using namespace llvm; +namespace llvm { cl::opt<bool> ReuseFrameIndexVals("arm-reuse-frame-index-vals", cl::Hidden, cl::init(true), cl::desc("Reuse repeated frame index values")); +} + +using namespace llvm; unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum, bool *isSPVFP) { @@ -478,7 +481,7 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg, /// bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - return ((NoFramePointerElim && MFI->hasCalls())|| + return ((DisableFramePointerElim(MF) && MFI->hasCalls())|| needsStackRealignment(MF) || MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()); @@ -506,7 +509,7 @@ needsStackRealignment(const MachineFunction &MF) const { bool ARMBaseRegisterInfo:: cannotEliminateFrame(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - if (NoFramePointerElim && MFI->hasCalls()) + if (DisableFramePointerElim(MF) && MFI->hasCalls()) return true; return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() || needsStackRealignment(MF); @@ -1050,7 +1053,7 @@ emitLoadConstPool(MachineBasicBlock &MBB, unsigned PredReg) const { MachineFunction &MF = *MBB.getParent(); MachineConstantPool *ConstantPool = MF.getConstantPool(); - Constant *C = + const Constant *C = ConstantInt::get(Type::getInt32Ty(MF.getFunction()->getContext()), Val); unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); @@ -1180,6 +1183,13 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, SPAdj = 0; Offset += SPAdj; + // Special handling of dbg_value instructions. + if (MI.isDebugValue()) { + MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/); + MI.getOperand(i+1).ChangeToImmediate(Offset); + return 0; + } + // Modify MI as necessary to handle as much of 'Offset' as possible bool Done = false; if (!AFI->isThumbFunction()) diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index e7aa0c8..f84f85a 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -64,7 +64,8 @@ namespace { static char ID; public: ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) - : MachineFunctionPass(&ID), JTI(0), II((ARMInstrInfo*)tm.getInstrInfo()), + : MachineFunctionPass(&ID), JTI(0), + II((const ARMInstrInfo *)tm.getInstrInfo()), TD(tm.getTargetData()), TM(tm), MCE(mce), MCPEs(0), MJTEs(0), IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} @@ -150,7 +151,7 @@ namespace { /// Routines that handle operands which add machine relocations which are /// fixed up by the relocation stage. - void emitGlobalAddress(GlobalValue *GV, unsigned Reloc, + void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, bool MayNeedFarStub, bool Indirect, intptr_t ACPV = 0); void emitExternalSymbolAddress(const char *ES, unsigned Reloc); @@ -174,9 +175,9 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) { assert((MF.getTarget().getRelocationModel() != Reloc::Default || MF.getTarget().getRelocationModel() != Reloc::Static) && "JIT relocation model must be set to static or default!"); - JTI = ((ARMTargetMachine&)MF.getTarget()).getJITInfo(); - II = ((ARMTargetMachine&)MF.getTarget()).getInstrInfo(); - TD = ((ARMTargetMachine&)MF.getTarget()).getTargetData(); + JTI = ((ARMTargetMachine &)MF.getTarget()).getJITInfo(); + II = ((const ARMTargetMachine &)MF.getTarget()).getInstrInfo(); + TD = ((const ARMTargetMachine &)MF.getTarget()).getTargetData(); Subtarget = &TM.getSubtarget<ARMSubtarget>(); MCPEs = &MF.getConstantPool()->getConstants(); MJTEs = 0; @@ -249,14 +250,16 @@ unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI, /// emitGlobalAddress - Emit the specified address to the code stream. /// -void ARMCodeEmitter::emitGlobalAddress(GlobalValue *GV, unsigned Reloc, +void ARMCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, bool MayNeedFarStub, bool Indirect, intptr_t ACPV) { MachineRelocation MR = Indirect ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc, - GV, ACPV, MayNeedFarStub) + const_cast<GlobalValue *>(GV), + ACPV, MayNeedFarStub) : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, - GV, ACPV, MayNeedFarStub); + const_cast<GlobalValue *>(GV), ACPV, + MayNeedFarStub); MCE.addRelocation(MR); } @@ -391,7 +394,7 @@ void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) { << (void*)MCE.getCurrentPCValue() << " " << *ACPV << '\n'); assert(ACPV->isGlobalValue() && "unsupported constant pool value"); - GlobalValue *GV = ACPV->getGV(); + const GlobalValue *GV = ACPV->getGV(); if (GV) { Reloc::Model RelocM = TM.getRelocationModel(); emitGlobalAddress(GV, ARM::reloc_arm_machine_cp_entry, @@ -403,7 +406,7 @@ void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) { } emitWordLE(0); } else { - Constant *CV = MCPE.Val.ConstVal; + const Constant *CV = MCPE.Val.ConstVal; DEBUG({ errs() << " ** Constant pool #" << CPI << " @ " @@ -415,7 +418,7 @@ void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) { errs() << '\n'; }); - if (GlobalValue *GV = dyn_cast<GlobalValue>(CV)) { + if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) { emitGlobalAddress(GV, ARM::reloc_arm_absolute, isa<Function>(GV), false); emitWordLE(0); } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { @@ -559,7 +562,7 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) { // We allow inline assembler nodes with empty bodies - they can // implicitly define registers, which is ok for JIT. if (MI.getOperand(0).getSymbolName()[0]) { - llvm_report_error("JIT does not support inline asm!"); + report_fatal_error("JIT does not support inline asm!"); } break; } @@ -704,7 +707,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, const TargetInstrDesc &TID = MI.getDesc(); if (TID.Opcode == ARM::BFC) { - llvm_report_error("ARMv6t2 JIT is not yet supported."); + report_fatal_error("ARMv6t2 JIT is not yet supported."); } // Part of binary is determined by TableGn. diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp index 90dd0c7..f13ccc6 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.cpp +++ b/lib/Target/ARM/ARMConstantPoolValue.cpp @@ -21,7 +21,7 @@ #include <cstdlib> using namespace llvm; -ARMConstantPoolValue::ARMConstantPoolValue(Constant *cval, unsigned id, +ARMConstantPoolValue::ARMConstantPoolValue(const Constant *cval, unsigned id, ARMCP::ARMCPKind K, unsigned char PCAdj, const char *Modif, @@ -39,16 +39,17 @@ ARMConstantPoolValue::ARMConstantPoolValue(LLVMContext &C, CVal(NULL), S(strdup(s)), LabelId(id), Kind(ARMCP::CPExtSymbol), PCAdjust(PCAdj), Modifier(Modif), AddCurrentAddress(AddCA) {} -ARMConstantPoolValue::ARMConstantPoolValue(GlobalValue *gv, const char *Modif) +ARMConstantPoolValue::ARMConstantPoolValue(const GlobalValue *gv, + const char *Modif) : MachineConstantPoolValue((const Type*)Type::getInt32Ty(gv->getContext())), CVal(gv), S(NULL), LabelId(0), Kind(ARMCP::CPValue), PCAdjust(0), Modifier(Modif) {} -GlobalValue *ARMConstantPoolValue::getGV() const { +const GlobalValue *ARMConstantPoolValue::getGV() const { return dyn_cast_or_null<GlobalValue>(CVal); } -BlockAddress *ARMConstantPoolValue::getBlockAddress() const { +const BlockAddress *ARMConstantPoolValue::getBlockAddress() const { return dyn_cast_or_null<BlockAddress>(CVal); } diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h index 741acde..6f4eddf 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.h +++ b/lib/Target/ARM/ARMConstantPoolValue.h @@ -36,7 +36,7 @@ namespace ARMCP { /// represent PC-relative displacement between the address of the load /// instruction and the constant being loaded, i.e. (&GV-(LPIC+8)). class ARMConstantPoolValue : public MachineConstantPoolValue { - Constant *CVal; // Constant being loaded. + const Constant *CVal; // Constant being loaded. const char *S; // ExtSymbol being loaded. unsigned LabelId; // Label id of the load. ARMCP::ARMCPKind Kind; // Kind of constant. @@ -46,20 +46,20 @@ class ARMConstantPoolValue : public MachineConstantPoolValue { bool AddCurrentAddress; public: - ARMConstantPoolValue(Constant *cval, unsigned id, + ARMConstantPoolValue(const Constant *cval, unsigned id, ARMCP::ARMCPKind Kind = ARMCP::CPValue, unsigned char PCAdj = 0, const char *Modifier = NULL, bool AddCurrentAddress = false); ARMConstantPoolValue(LLVMContext &C, const char *s, unsigned id, unsigned char PCAdj = 0, const char *Modifier = NULL, bool AddCurrentAddress = false); - ARMConstantPoolValue(GlobalValue *GV, const char *Modifier); + ARMConstantPoolValue(const GlobalValue *GV, const char *Modifier); ARMConstantPoolValue(); ~ARMConstantPoolValue(); - GlobalValue *getGV() const; + const GlobalValue *getGV() const; const char *getSymbol() const { return S; } - BlockAddress *getBlockAddress() const; + const BlockAddress *getBlockAddress() const; const char *getModifier() const { return Modifier; } bool hasModifier() const { return Modifier != NULL; } bool mustAddCurrentAddress() const { return AddCurrentAddress; } diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 1b8727d..845d088 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -91,7 +91,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { LO16 = LO16.addImm(Lo16); HI16 = HI16.addImm(Hi16); } else { - GlobalValue *GV = MO.getGlobal(); + const GlobalValue *GV = MO.getGlobal(); unsigned TF = MO.getTargetFlags(); LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16); HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16); diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 7d48663..36a1827 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -13,7 +13,6 @@ #include "ARM.h" #include "ARMAddressingModes.h" -#include "ARMISelLowering.h" #include "ARMTargetMachine.h" #include "llvm/CallingConv.h" #include "llvm/Constants.h" @@ -121,9 +120,6 @@ private: SDNode *SelectARMIndexedLoad(SDNode *N); SDNode *SelectT2IndexedLoad(SDNode *N); - /// SelectDYN_ALLOC - Select dynamic alloc for Thumb. - SDNode *SelectDYN_ALLOC(SDNode *N); - /// SelectVLD - Select NEON load intrinsics. NumVecs should be /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for /// loads of D registers and even subregs and odd subregs of Q registers. @@ -146,7 +142,7 @@ private: unsigned *QOpcodes1); /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM. - SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, unsigned Opc); + SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned); /// SelectCMOVOp - Select CMOV instructions for ARM. SDNode *SelectCMOVOp(SDNode *N); @@ -939,59 +935,6 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) { return NULL; } -SDNode *ARMDAGToDAGISel::SelectDYN_ALLOC(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); - EVT VT = N->getValueType(0); - SDValue Chain = N->getOperand(0); - SDValue Size = N->getOperand(1); - SDValue Align = N->getOperand(2); - SDValue SP = CurDAG->getRegister(ARM::SP, MVT::i32); - int32_t AlignVal = cast<ConstantSDNode>(Align)->getSExtValue(); - if (AlignVal < 0) - // We need to align the stack. Use Thumb1 tAND which is the only thumb - // instruction that can read and write SP. This matches to a pseudo - // instruction that has a chain to ensure the result is written back to - // the stack pointer. - SP = SDValue(CurDAG->getMachineNode(ARM::tANDsp, dl, VT, SP, Align), 0); - - bool isC = isa<ConstantSDNode>(Size); - uint32_t C = isC ? cast<ConstantSDNode>(Size)->getZExtValue() : ~0UL; - // Handle the most common case for both Thumb1 and Thumb2: - // tSUBspi - immediate is between 0 ... 508 inclusive. - if (C <= 508 && ((C & 3) == 0)) - // FIXME: tSUBspi encode scale 4 implicitly. - return CurDAG->SelectNodeTo(N, ARM::tSUBspi_, VT, MVT::Other, SP, - CurDAG->getTargetConstant(C/4, MVT::i32), - Chain); - - if (Subtarget->isThumb1Only()) { - // Use tADDspr since Thumb1 does not have a sub r, sp, r. ARMISelLowering - // should have negated the size operand already. FIXME: We can't insert - // new target independent node at this stage so we are forced to negate - // it earlier. Is there a better solution? - return CurDAG->SelectNodeTo(N, ARM::tADDspr_, VT, MVT::Other, SP, Size, - Chain); - } else if (Subtarget->isThumb2()) { - if (isC && Predicate_t2_so_imm(Size.getNode())) { - // t2SUBrSPi - SDValue Ops[] = { SP, CurDAG->getTargetConstant(C, MVT::i32), Chain }; - return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPi_, VT, MVT::Other, Ops, 3); - } else if (isC && Predicate_imm0_4095(Size.getNode())) { - // t2SUBrSPi12 - SDValue Ops[] = { SP, CurDAG->getTargetConstant(C, MVT::i32), Chain }; - return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPi12_, VT, MVT::Other, Ops, 3); - } else { - // t2SUBrSPs - SDValue Ops[] = { SP, Size, - getI32Imm(ARM_AM::getSORegOpc(ARM_AM::lsl,0)), Chain }; - return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPs_, VT, MVT::Other, Ops, 4); - } - } - - // FIXME: Add ADD / SUB sp instructions for ARM. - return 0; -} - /// PairDRegs - Insert a pair of double registers into an implicit def to /// form a quad register. SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) { @@ -1052,7 +995,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, break; } - SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue Pred = getAL(CurDAG); SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); if (is64BitVector) { unsigned Opc = DOpcodes[OpcodeIndex]; @@ -1142,7 +1085,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, break; } - SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue Pred = getAL(CurDAG); SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); SmallVector<SDValue, 10> Ops; @@ -1249,7 +1192,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, case MVT::v4i32: OpcodeIndex = 1; break; } - SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue Pred = getAL(CurDAG); SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); SmallVector<SDValue, 10> Ops; @@ -1305,10 +1248,42 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, } SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, - unsigned Opc) { + bool isSigned) { if (!Subtarget->hasV6T2Ops()) return NULL; + unsigned Opc = isSigned ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) + : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX); + + + // For unsigned extracts, check for a shift right and mask + unsigned And_imm = 0; + if (N->getOpcode() == ISD::AND) { + if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) { + + // The immediate is a mask of the low bits iff imm & (imm+1) == 0 + if (And_imm & (And_imm + 1)) + return NULL; + + unsigned Srl_imm = 0; + if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, + Srl_imm)) { + assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); + + unsigned Width = CountTrailingOnes_32(And_imm); + unsigned LSB = Srl_imm; + SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); + SDValue Ops[] = { N->getOperand(0).getOperand(0), + CurDAG->getTargetConstant(LSB, MVT::i32), + CurDAG->getTargetConstant(Width, MVT::i32), + getAL(CurDAG), Reg0 }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); + } + } + return NULL; + } + + // Otherwise, we're looking for a shift of a shift unsigned Shl_imm = 0; if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!"); @@ -1531,7 +1506,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDNode *ResNode; if (Subtarget->isThumb1Only()) { - SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue Pred = getAL(CurDAG); SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() }; ResNode = CurDAG->getMachineNode(ARM::tLDRcp, dl, MVT::i32, MVT::Other, @@ -1571,16 +1546,12 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); } } - case ARMISD::DYN_ALLOC: - return SelectDYN_ALLOC(N); case ISD::SRL: - if (SDNode *I = SelectV6T2BitfieldExtractOp(N, - Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX)) + if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false)) return I; break; case ISD::SRA: - if (SDNode *I = SelectV6T2BitfieldExtractOp(N, - Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)) + if (SDNode *I = SelectV6T2BitfieldExtractOp(N, true)) return I; break; case ISD::MUL: @@ -1624,6 +1595,10 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } break; case ISD::AND: { + // Check for unsigned bitfield extract + if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false)) + return I; + // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits // are entirely contributed by c2 and lower 16-bits are entirely contributed @@ -1708,7 +1683,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Chain = N->getOperand(0); SDValue AM5Opc = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::ia, 4), MVT::i32); - SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue Pred = getAL(CurDAG); SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { N->getOperand(1), AM5Opc, Pred, PredReg, Chain }; return CurDAG->getMachineNode(ARM::VLDMQ, dl, MVT::v2f64, MVT::Other, @@ -1724,7 +1699,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Chain = N->getOperand(0); SDValue AM5Opc = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::ia, 4), MVT::i32); - SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue Pred = getAL(CurDAG); SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { N->getOperand(1), N->getOperand(2), AM5Opc, Pred, PredReg, Chain }; @@ -1816,7 +1791,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case MVT::v4f32: case MVT::v4i32: Opc = ARM::VZIPq32; break; } - SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue Pred = getAL(CurDAG); SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4); @@ -1835,7 +1810,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case MVT::v4f32: case MVT::v4i32: Opc = ARM::VUZPq32; break; } - SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue Pred = getAL(CurDAG); SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4); @@ -1854,7 +1829,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case MVT::v4f32: case MVT::v4i32: Opc = ARM::VTRNq32; break; } - SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue Pred = getAL(CurDAG); SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 77fb0c3..d3842a6 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -40,12 +40,18 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/VectorExtras.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include <sstream> using namespace llvm; +static cl::opt<bool> +EnableARMLongCalls("arm-long-calls", cl::Hidden, + cl::desc("Generate calls via indirect call instructions."), + cl::init(false)); + static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, @@ -90,6 +96,8 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom); setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand); + setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand); + setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand); if (VT.isInteger()) { setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom); setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom); @@ -376,10 +384,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // FIXME: Shouldn't need this, since no register is used, but the legalizer // doesn't yet know how to not do that for SjLj. setExceptionSelectorRegister(ARM::R0); - if (Subtarget->isThumb()) - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); - else - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); if (!Subtarget->hasV6Ops() && !Subtarget->isThumb2()) { @@ -783,7 +788,7 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; @@ -871,7 +876,7 @@ ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, DebugLoc dl, SelectionDAG &DAG, const CCValAssign &VA, - ISD::ArgFlagsTy Flags) { + ISD::ArgFlagsTy Flags) const { unsigned LocMemOffset = VA.getLocMemOffset(); SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); @@ -889,7 +894,7 @@ void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, CCValAssign &VA, CCValAssign &NextVA, SDValue &StackPtr, SmallVector<SDValue, 8> &MemOpChains, - ISD::ArgFlagsTy Flags) { + ISD::ArgFlagsTy Flags) const { SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Arg); @@ -918,7 +923,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // ARM target does not yet support tail call optimization. isTailCall = false; @@ -1025,8 +1030,44 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, bool isLocalARMFunc = false; MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { - GlobalValue *GV = G->getGlobal(); + + if (EnableARMLongCalls) { + assert (getTargetMachine().getRelocationModel() == Reloc::Static + && "long-calls with non-static relocation model!"); + // Handle a global address or an external symbol. If it's not one of + // those, the target's already in a register, so we don't need to do + // anything extra. + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + const GlobalValue *GV = G->getGlobal(); + // Create a constant pool entry for the callee address + unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); + ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, + ARMPCLabelIndex, + ARMCP::CPValue, 0); + // Get the address of the callee into a register + SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + Callee = DAG.getLoad(getPointerTy(), dl, + DAG.getEntryNode(), CPAddr, + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); + } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) { + const char *Sym = S->getSymbol(); + + // Create a constant pool entry for the callee address + unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); + ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), + Sym, ARMPCLabelIndex, 0); + // Get the address of the callee into a register + SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + Callee = DAG.getLoad(getPointerTy(), dl, + DAG.getEntryNode(), CPAddr, + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); + } + } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + const GlobalValue *GV = G->getGlobal(); isDirect = true; bool isExt = GV->isDeclaration() || GV->isWeakForLinker(); bool isStub = (isExt && Subtarget->isTargetDarwin()) && @@ -1049,7 +1090,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); Callee = DAG.getNode(ARMISD::PIC_ADD, dl, getPointerTy(), Callee, PICLabel); - } else + } else Callee = DAG.getTargetGlobalAddress(GV, getPointerTy()); } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { isDirect = true; @@ -1125,7 +1166,7 @@ SDValue ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG) { + DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to a location. SmallVector<CCValAssign, 16> RVLocs; @@ -1232,13 +1273,14 @@ static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); } -SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, + SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned ARMPCLabelIndex = 0; DebugLoc DL = Op.getDebugLoc(); EVT PtrVT = getPointerTy(); - BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); + const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); SDValue CPAddr; if (RelocM == Reloc::Static) { @@ -1264,7 +1306,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) { // Lower ISD::GlobalTLSAddress using the "general dynamic" model SDValue ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { DebugLoc dl = GA->getDebugLoc(); EVT PtrVT = getPointerTy(); unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; @@ -1303,8 +1345,8 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, // "local exec" model. SDValue ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, - SelectionDAG &DAG) { - GlobalValue *GV = GA->getGlobal(); + SelectionDAG &DAG) const { + const GlobalValue *GV = GA->getGlobal(); DebugLoc dl = GA->getDebugLoc(); SDValue Offset; SDValue Chain = DAG.getEntryNode(); @@ -1350,7 +1392,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, } SDValue -ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) { +ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { // TODO: implement the "local dynamic" model assert(Subtarget->isTargetELF() && "TLS not implemented for non-ELF targets"); @@ -1364,10 +1406,10 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) { } SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(); DebugLoc dl = Op.getDebugLoc(); - GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); if (RelocM == Reloc::PIC_) { bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); @@ -1404,13 +1446,13 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, } SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned ARMPCLabelIndex = 0; EVT PtrVT = getPointerTy(); DebugLoc dl = Op.getDebugLoc(); - GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); SDValue CPAddr; if (RelocM == Reloc::Static) @@ -1443,7 +1485,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, } SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, - SelectionDAG &DAG){ + SelectionDAG &DAG) const { assert(Subtarget->isTargetELF() && "GLOBAL OFFSET TABLE not implemented for non-ELF targets"); MachineFunction &MF = DAG.getMachineFunction(); @@ -1466,7 +1508,8 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SDValue ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, - const ARMSubtarget *Subtarget) { + const ARMSubtarget *Subtarget) + const { unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); DebugLoc dl = Op.getDebugLoc(); switch (IntNo) { @@ -1533,20 +1576,23 @@ static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG, return Res; } -static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, - unsigned VarArgsFrameIndex) { +static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { + MachineFunction &MF = DAG.getMachineFunction(); + ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>(); + // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. DebugLoc dl = Op.getDebugLoc(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); + SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0, false, false, 0); } SDValue -ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) { +ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, + SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); DebugLoc dl = Node->getDebugLoc(); EVT VT = Node->getValueType(0); @@ -1595,7 +1641,7 @@ ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) { SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, SDValue &Root, SelectionDAG &DAG, - DebugLoc dl) { + DebugLoc dl) const { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); @@ -1611,10 +1657,8 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, SDValue ArgValue2; if (NextVA.isMemLoc()) { - unsigned ArgSize = NextVA.getLocVT().getSizeInBits()/8; MachineFrameInfo *MFI = MF.getFrameInfo(); - int FI = MFI->CreateFixedObject(ArgSize, NextVA.getLocMemOffset(), - true, false); + int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true, false); // Create load node to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); @@ -1635,7 +1679,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) + const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -1663,14 +1708,22 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, if (VA.needsCustom()) { // f64 and vector types are split up into multiple registers or // combinations of registers and stack slots. - RegVT = MVT::i32; - if (VA.getLocVT() == MVT::v2f64) { SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); VA = ArgLocs[++i]; // skip ahead to next loc - SDValue ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], - Chain, DAG, dl); + SDValue ArgValue2; + if (VA.isMemLoc()) { + int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), + true, false); + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, + PseudoSourceValue::getFixedStack(FI), 0, + false, false, 0); + } else { + ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], + Chain, DAG, dl); + } ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue, ArgValue1, DAG.getIntPtrConstant(0)); @@ -1758,10 +1811,12 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // to their spots on the stack so that they may be loaded by deferencing // the result of va_next. AFI->setVarArgsRegSaveSize(VARegSaveSize); - VarArgsFrameIndex = MFI->CreateFixedObject(VARegSaveSize, ArgOffset + - VARegSaveSize - VARegSize, - true, false); - SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); + AFI->setVarArgsFrameIndex( + MFI->CreateFixedObject(VARegSaveSize, + ArgOffset + VARegSaveSize - VARegSize, + true, false)); + SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), + getPointerTy()); SmallVector<SDValue, 4> MemOps; for (; NumGPRs < 4; ++NumGPRs) { @@ -1773,9 +1828,10 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - PseudoSourceValue::getFixedStack(VarArgsFrameIndex), 0, - false, false, 0); + SDValue Store = + DAG.getStore(Val.getValue(1), dl, Val, FIN, + PseudoSourceValue::getFixedStack(AFI->getVarArgsFrameIndex()), 0, + false, false, 0); MemOps.push_back(Store); FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, DAG.getConstant(4, getPointerTy())); @@ -1785,7 +1841,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, &MemOps[0], MemOps.size()); } else // This will point to the next argument passed via stack. - VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset, true, false); + AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, + true, false)); } return Chain; @@ -1800,7 +1857,7 @@ static bool isFloatingPointZero(SDValue Op) { if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) { SDValue WrapperOp = Op.getOperand(1).getOperand(0); if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp)) - if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) + if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) return CFP->getValueAPF().isPosZero(); } } @@ -1811,7 +1868,8 @@ static bool isFloatingPointZero(SDValue Op) { /// the given operands. SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, - SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) { + SDValue &ARMCC, SelectionDAG &DAG, + DebugLoc dl) const { if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { unsigned C = RHSC->getZExtValue(); if (!isLegalICmpImmediate(C)) { @@ -1877,7 +1935,7 @@ static SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp); } -SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); @@ -1911,7 +1969,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { return Result; } -SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); SDValue LHS = Op.getOperand(2); @@ -1945,7 +2003,7 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) { return Res; } -SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Table = Op.getOperand(1); SDValue Index = Op.getOperand(2); @@ -2034,7 +2092,7 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp); } -SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); MFI->setFrameAddressIsTaken(true); EVT VT = Op.getValueType(); @@ -2055,8 +2113,10 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, - const Value *DstSV, uint64_t DstSVOff, - const Value *SrcSV, uint64_t SrcSVOff){ + const Value *DstSV, + uint64_t DstSVOff, + const Value *SrcSV, + uint64_t SrcSVOff) const { // Do repeated 4-byte loads and stores. To be improved. // This requires 4-byte alignment. if ((Align & 3) != 0) @@ -2157,11 +2217,25 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); } +/// ExpandBIT_CONVERT - If the target supports VFP, this function is called to +/// expand a bit convert where either the source or destination type is i64 to +/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 +/// operand type is illegal (e.g., v2f32 for a target that doesn't support +/// vectors), since the legalizer won't know what to do with that. static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) { - SDValue Op = N->getOperand(0); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); DebugLoc dl = N->getDebugLoc(); - if (N->getValueType(0) == MVT::f64) { - // Turn i64->f64 into VMOVDRR. + SDValue Op = N->getOperand(0); + + // This function is only supposed to be called for i64 types, either as the + // source or destination of the bit convert. + EVT SrcVT = Op.getValueType(); + EVT DstVT = N->getValueType(0); + assert((SrcVT == MVT::i64 || DstVT == MVT::i64) && + "ExpandBIT_CONVERT called for non-i64 type"); + + // Turn i64->f64 into VMOVDRR. + if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) { SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, DAG.getConstant(0, MVT::i32)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, @@ -2170,11 +2244,14 @@ static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) { } // Turn f64->i64 into VMOVRRD. - SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, - DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); + if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) { + SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); + // Merge the pieces into a single i64 value. + return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); + } - // Merge the pieces into a single i64 value. - return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); + return SDValue(); } /// getZeroVector - Returns a vector of specified type with all zero elements. @@ -2227,7 +2304,8 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { /// LowerShiftRightParts - Lower SRA_PARTS, which returns two /// i32 values and take a 2 x i32 value to shift plus a shift amount. -SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, + SelectionDAG &DAG) const { assert(Op.getNumOperands() == 3 && "Not a double-shift!"); EVT VT = Op.getValueType(); unsigned VTBits = VT.getSizeInBits(); @@ -2262,7 +2340,8 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) { /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two /// i32 values and take a 2 x i32 value to shift plus a shift amount. -SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, + SelectionDAG &DAG) const { assert(Op.getNumOperands() == 3 && "Not a double-shift!"); EVT VT = Op.getValueType(); unsigned VTBits = VT.getSizeInBits(); @@ -3059,7 +3138,7 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Val); } -SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Don't know how to custom lower this!"); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); @@ -3072,7 +3151,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::BR_CC: return LowerBR_CC(Op, DAG); case ISD::BR_JT: return LowerBR_JT(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); - case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex); + case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget); case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); @@ -3105,22 +3184,22 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { /// type with new values built out of custom code. void ARMTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { + SDValue Res; switch (N->getOpcode()) { default: llvm_unreachable("Don't know how to custom expand this!"); - return; + break; case ISD::BIT_CONVERT: - Results.push_back(ExpandBIT_CONVERT(N, DAG)); - return; + Res = ExpandBIT_CONVERT(N, DAG); + break; case ISD::SRL: - case ISD::SRA: { - SDValue Res = LowerShift(N, DAG, Subtarget); - if (Res.getNode()) - Results.push_back(Res); - return; - } + case ISD::SRA: + Res = LowerShift(N, DAG, Subtarget); + break; } + if (Res.getNode()) + Results.push_back(Res); } //===----------------------------------------------------------------------===// @@ -3302,8 +3381,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, MachineBasicBlock * ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { + MachineBasicBlock *BB) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); bool isThumb2 = Subtarget->isThumb2(); @@ -3387,12 +3465,9 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, F->insert(It, sinkMBB); // Update machine-CFG edges by first adding all successors of the current // block to the new block which will contain the Phi node for the select. - // Also inform sdisel of the edge changes. for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), - E = BB->succ_end(); I != E; ++I) { - EM->insert(std::make_pair(*I, sinkMBB)); + E = BB->succ_end(); I != E; ++I) sinkMBB->addSuccessor(*I); - } // Next, remove all successors of the current block, and add the true // and fallthrough blocks as its successors. while (!BB->succ_empty()) diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index fa33ad3..d8a230f 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -159,25 +159,24 @@ namespace llvm { // ARMTargetLowering - ARM Implementation of the TargetLowering interface class ARMTargetLowering : public TargetLowering { - int VarArgsFrameIndex; // FrameIndex for start of varargs area. public: explicit ARMTargetLowering(TargetMachine &TM); - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; /// ReplaceNodeResults - Replace the results of node with an illegal result /// type with new values built out of custom code. /// virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG); + SelectionDAG &DAG) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; virtual const char *getTargetNodeName(unsigned Opcode) const; - virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*>*) const; + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const; /// allowsUnalignedMemoryAccesses - Returns true if the target allows /// unaligned memory accesses. of the specified type. @@ -237,7 +236,7 @@ namespace llvm { std::vector<SDValue> &Ops, SelectionDAG &DAG) const; - virtual const ARMSubtarget* getSubtarget() { + virtual const ARMSubtarget* getSubtarget() const { return Subtarget; } @@ -272,54 +271,57 @@ namespace llvm { CCValAssign &VA, CCValAssign &NextVA, SDValue &StackPtr, SmallVector<SDValue, 8> &MemOpChains, - ISD::ArgFlagsTy Flags); + ISD::ArgFlagsTy Flags) const; SDValue GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, - SDValue &Root, SelectionDAG &DAG, DebugLoc dl); + SDValue &Root, SelectionDAG &DAG, + DebugLoc dl) const; CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return, bool isVarArg) const; SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, DebugLoc dl, SelectionDAG &DAG, const CCValAssign &VA, - ISD::ArgFlagsTy Flags); - SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG); + ISD::ArgFlagsTy Flags) const; + SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, - const ARMSubtarget *Subtarget); - SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG); - SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG); - SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG); - SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG); + const ARMSubtarget *Subtarget) const; + SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, - SelectionDAG &DAG); + SelectionDAG &DAG) const; SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA, - SelectionDAG &DAG); - SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG); - SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG); - SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG); - SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG); - SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG); - SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG); - SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG); - SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG); + SelectionDAG &DAG) const; + SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, - const Value *DstSV, uint64_t DstSVOff, - const Value *SrcSV, uint64_t SrcSVOff); + const Value *DstSV, + uint64_t DstSVOff, + const Value *SrcSV, + uint64_t SrcSVOff) const; SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerCall(SDValue Chain, SDValue Callee, @@ -328,16 +330,16 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG); + DebugLoc dl, SelectionDAG &DAG) const; SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, - SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl); + SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) const; MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB, diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index f2ab06f..ce5f2f8 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -124,6 +124,7 @@ def HasV6 : Predicate<"Subtarget->hasV6Ops()">; def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">; def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">; def HasV7 : Predicate<"Subtarget->hasV7Ops()">; +def NoVFP : Predicate<"!Subtarget->hasVFP2()">; def HasVFP2 : Predicate<"Subtarget->hasVFP2()">; def HasVFP3 : Predicate<"Subtarget->hasVFP3()">; def HasNEON : Predicate<"Subtarget->hasNEON()">; @@ -1231,7 +1232,7 @@ def LDRBT : AI2ldbpo<(outs GPR:$dst, GPR:$base_wb), } def LDRSBT : AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base,am2offset:$offset), LdMiscFrm, IIC_iLoadru, + (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru, "ldrsbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> { let Inst{21} = 1; // overwrite } @@ -2533,7 +2534,23 @@ let Defs = "mov\tr0, #0\n\t" "add\tpc, pc, #0\n\t" "mov\tr0, #1 @ eh_setjmp end", "", - [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>; + [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, + Requires<[IsARM, HasVFP2]>; +} + +let Defs = + [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ] in { + def Int_eh_sjlj_setjmp_nofp : XI<(outs), (ins GPR:$src, GPR:$val), + AddrModeNone, SizeSpecial, IndexModeNone, + Pseudo, NoItinerary, + "str\tsp, [$src, #+8] @ eh_setjmp begin\n\t" + "add\t$val, pc, #8\n\t" + "str\t$val, [$src, #+4]\n\t" + "mov\tr0, #0\n\t" + "add\tpc, pc, #0\n\t" + "mov\tr0, #1 @ eh_setjmp end", "", + [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, + Requires<[IsARM, NoVFP]>; } //===----------------------------------------------------------------------===// @@ -2747,7 +2764,7 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> { def L_OFFSET : ACI<(outs), (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), - opc, "l\tp$cop, cr$CRd, $addr"> { + !strconcat(opc, "l"), "\tp$cop, cr$CRd, $addr"> { let Inst{31-28} = op31_28; let Inst{24} = 1; // P = 1 let Inst{21} = 0; // W = 0 @@ -2757,7 +2774,7 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> { def L_PRE : ACI<(outs), (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), - opc, "l\tp$cop, cr$CRd, $addr!"> { + !strconcat(opc, "l"), "\tp$cop, cr$CRd, $addr!"> { let Inst{31-28} = op31_28; let Inst{24} = 1; // P = 1 let Inst{21} = 1; // W = 1 @@ -2767,7 +2784,7 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> { def L_POST : ACI<(outs), (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, am2offset:$offset), - opc, "l\tp$cop, cr$CRd, [$base], $offset"> { + !strconcat(opc, "l"), "\tp$cop, cr$CRd, [$base], $offset"> { let Inst{31-28} = op31_28; let Inst{24} = 0; // P = 0 let Inst{21} = 1; // W = 1 @@ -2777,7 +2794,7 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> { def L_OPTION : ACI<(outs), (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, nohash_imm:$option), - opc, "l\tp$cop, cr$CRd, [$base], $option"> { + !strconcat(opc, "l"), "\tp$cop, cr$CRd, [$base], $option"> { let Inst{31-28} = op31_28; let Inst{24} = 0; // P = 0 let Inst{23} = 1; // U = 1 diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index ed9d31d..d5ce2b8 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -1621,12 +1621,13 @@ multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, // First with only element sizes of 16 and 32 bits: multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, + InstrItinClass itin16, InstrItinClass itin32, + string OpcodeStr, string Dt, Intrinsic IntOp, bit Commutable = 0> { - def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin, + def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp, Commutable>; - def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin, + def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp, Commutable>; } @@ -1642,11 +1643,12 @@ multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8, // ....then also with element size of 8 bits: multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, + InstrItinClass itin16, InstrItinClass itin32, + string OpcodeStr, string Dt, Intrinsic IntOp, bit Commutable = 0> - : N3VLInt_HS<op24, op23, op11_8, op4, itin, OpcodeStr, Dt, + : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp, Commutable> { - def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin, + def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp, Commutable>; } @@ -1711,21 +1713,22 @@ multiclass N3VMulOpSL_HS<bits<4> op11_8, // Neon 3-argument intrinsics, // element sizes of 8, 16 and 32 bits: multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itinD, InstrItinClass itinQ, string OpcodeStr, string Dt, Intrinsic IntOp> { // 64-bit vector types. - def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16D, + def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; - def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D, + def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD, OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; - def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi32D, + def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD, OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; // 128-bit vector types. - def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16Q, + def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; - def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16Q, + def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ, OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; - def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi32Q, + def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ, OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; } @@ -1734,10 +1737,11 @@ multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, // First with only element sizes of 16 and 32 bits: multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itin16, InstrItinClass itin32, string OpcodeStr, string Dt, Intrinsic IntOp> { - def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D, + def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; - def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi16D, + def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; } @@ -1751,9 +1755,10 @@ multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8, // ....then also with element size of 8 bits: multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itin16, InstrItinClass itin32, string OpcodeStr, string Dt, Intrinsic IntOp> - : N3VLInt3_HS<op24, op23, op11_8, op4, OpcodeStr, Dt, IntOp> { - def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16D, + : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> { + def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; } @@ -2001,10 +2006,10 @@ def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", v4f32, v4f32, fadd, 1>; // VADDL : Vector Add Long (Q = D + D) -defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, "vaddl", "s", - int_arm_neon_vaddls, 1>; -defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, "vaddl", "u", - int_arm_neon_vaddlu, 1>; +defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, + "vaddl", "s", int_arm_neon_vaddls, 1>; +defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, + "vaddl", "u", int_arm_neon_vaddlu, 1>; // VADDW : Vector Add Wide (Q = Q + D) defm VADDWs : N3VWInt_QHS<0,1,0b0001,0, "vaddw", "s", int_arm_neon_vaddws, 0>; defm VADDWu : N3VWInt_QHS<1,1,0b0001,0, "vaddw", "u", int_arm_neon_vaddwu, 0>; @@ -2118,10 +2123,10 @@ def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) -defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, "vmull", "s", - int_arm_neon_vmulls, 1>; -defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, "vmull", "u", - int_arm_neon_vmullu, 1>; +defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, + "vmull", "s", int_arm_neon_vmulls, 1>; +defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, + "vmull", "u", int_arm_neon_vmullu, 1>; def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", v8i16, v8i8, int_arm_neon_vmullp, 1>; defm VMULLsls : N3VLIntSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", @@ -2130,10 +2135,10 @@ defm VMULLslu : N3VLIntSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", int_arm_neon_vmullu>; // VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) -defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, "vqdmull", "s", - int_arm_neon_vqdmull, 1>; -defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, "vqdmull", "s", - int_arm_neon_vqdmull>; +defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D, + "vqdmull", "s", int_arm_neon_vqdmull, 1>; +defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, + "vqdmull", "s", int_arm_neon_vqdmull>; // Vector Multiply-Accumulate and Multiply-Subtract Operations. @@ -2177,15 +2182,17 @@ def : Pat<(v4f32 (fadd (v4f32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VMLAL : Vector Multiply Accumulate Long (Q += D * D) -defm VMLALs : N3VLInt3_QHS<0,1,0b1000,0, "vmlal", "s", int_arm_neon_vmlals>; -defm VMLALu : N3VLInt3_QHS<1,1,0b1000,0, "vmlal", "u", int_arm_neon_vmlalu>; +defm VMLALs : N3VLInt3_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, + "vmlal", "s", int_arm_neon_vmlals>; +defm VMLALu : N3VLInt3_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, + "vmlal", "u", int_arm_neon_vmlalu>; defm VMLALsls : N3VLInt3SL_HS<0, 0b0010, "vmlal", "s", int_arm_neon_vmlals>; defm VMLALslu : N3VLInt3SL_HS<1, 0b0010, "vmlal", "u", int_arm_neon_vmlalu>; // VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) -defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, "vqdmlal", "s", - int_arm_neon_vqdmlal>; +defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, + "vqdmlal", "s", int_arm_neon_vqdmlal>; defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>; // VMLS : Vector Multiply Subtract (integer and floating-point) @@ -2227,15 +2234,17 @@ def : Pat<(v4f32 (fsub (v4f32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VMLSL : Vector Multiply Subtract Long (Q -= D * D) -defm VMLSLs : N3VLInt3_QHS<0,1,0b1010,0, "vmlsl", "s", int_arm_neon_vmlsls>; -defm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, "vmlsl", "u", int_arm_neon_vmlslu>; +defm VMLSLs : N3VLInt3_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, + "vmlsl", "s", int_arm_neon_vmlsls>; +defm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, + "vmlsl", "u", int_arm_neon_vmlslu>; defm VMLSLsls : N3VLInt3SL_HS<0, 0b0110, "vmlsl", "s", int_arm_neon_vmlsls>; defm VMLSLslu : N3VLInt3SL_HS<1, 0b0110, "vmlsl", "u", int_arm_neon_vmlslu>; // VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) -defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, "vqdmlsl", "s", - int_arm_neon_vqdmlsl>; +defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, + "vqdmlsl", "s", int_arm_neon_vqdmlsl>; defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; // Vector Subtract Operations. @@ -2248,26 +2257,26 @@ def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", v4f32, v4f32, fsub, 0>; // VSUBL : Vector Subtract Long (Q = D - D) -defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, "vsubl", "s", - int_arm_neon_vsubls, 1>; -defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, "vsubl", "u", - int_arm_neon_vsublu, 1>; +defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, + "vsubl", "s", int_arm_neon_vsubls, 1>; +defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, + "vsubl", "u", int_arm_neon_vsublu, 1>; // VSUBW : Vector Subtract Wide (Q = Q - D) defm VSUBWs : N3VWInt_QHS<0,1,0b0011,0, "vsubw", "s", int_arm_neon_vsubws, 0>; defm VSUBWu : N3VWInt_QHS<1,1,0b0011,0, "vsubw", "u", int_arm_neon_vsubwu, 0>; // VHSUB : Vector Halving Subtract defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm, - IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vhsub", "s", int_arm_neon_vhsubs, 0>; defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm, - IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vhsub", "u", int_arm_neon_vhsubu, 0>; // VQSUB : Vector Saturing Subtract defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm, - IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vqsub", "s", int_arm_neon_vqsubs, 0>; defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, - IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vqsub", "u", int_arm_neon_vqsubu, 0>; // VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", @@ -2279,8 +2288,8 @@ defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", // Vector Comparisons. // VCEQ : Vector Compare Equal -defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vceq", "i", NEONvceq, 1>; +defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>; def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, NEONvceq, 1>; def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, @@ -2290,10 +2299,10 @@ defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", "$dst, $src, #0">; // VCGE : Vector Compare Greater Than or Equal -defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vcge", "s", NEONvcge, 0>; -defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vcge", "u", NEONvcgeu, 0>; +defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>; +defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>; def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, NEONvcge, 0>; def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, @@ -2306,10 +2315,10 @@ defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", "$dst, $src, #0">; // VCGT : Vector Compare Greater Than -defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vcgt", "s", NEONvcgt, 0>; -defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vcgt", "u", NEONvcgtu, 0>; +defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>; +defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>; def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, NEONvcgt, 0>; def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, @@ -2387,11 +2396,11 @@ def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst), // VMVN : Vector Bitwise NOT def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, - (outs DPR:$dst), (ins DPR:$src), IIC_VSHLiD, + (outs DPR:$dst), (ins DPR:$src), IIC_VSUBiD, "vmvn", "$dst, $src", "", [(set DPR:$dst, (v2i32 (vnot8 DPR:$src)))]>; def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, - (outs QPR:$dst), (ins QPR:$src), IIC_VSHLiD, + (outs QPR:$dst), (ins QPR:$src), IIC_VSUBiD, "vmvn", "$dst, $src", "", [(set QPR:$dst, (v4i32 (vnot16 QPR:$src)))]>; def : Pat<(v2i32 (vnot8 DPR:$src)), (VMVNd DPR:$src)>; @@ -2447,10 +2456,10 @@ def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, // VABD : Vector Absolute Difference defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm, - IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vabd", "s", int_arm_neon_vabds, 0>; defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm, - IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vabd", "u", int_arm_neon_vabdu, 0>; def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 0>; @@ -2458,56 +2467,68 @@ def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 0>; // VABDL : Vector Absolute Difference Long (Q = | D - D |) -defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, IIC_VBINi4Q, +defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q, "vabdl", "s", int_arm_neon_vabdls, 0>; -defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, IIC_VBINi4Q, +defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q, "vabdl", "u", int_arm_neon_vabdlu, 0>; // VABA : Vector Absolute Difference and Accumulate -defm VABAs : N3VInt3_QHS<0,0,0b0111,1, "vaba", "s", int_arm_neon_vabas>; -defm VABAu : N3VInt3_QHS<1,0,0b0111,1, "vaba", "u", int_arm_neon_vabau>; +defm VABAs : N3VInt3_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, + "vaba", "s", int_arm_neon_vabas>; +defm VABAu : N3VInt3_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, + "vaba", "u", int_arm_neon_vabau>; // VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) -defm VABALs : N3VLInt3_QHS<0,1,0b0101,0, "vabal", "s", int_arm_neon_vabals>; -defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, "vabal", "u", int_arm_neon_vabalu>; +defm VABALs : N3VLInt3_QHS<0,1,0b0101,0, IIC_VABAD, IIC_VABAD, + "vabal", "s", int_arm_neon_vabals>; +defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, IIC_VABAD, IIC_VABAD, + "vabal", "u", int_arm_neon_vabalu>; // Vector Maximum and Minimum. // VMAX : Vector Maximum defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm, - IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vmax", "s", int_arm_neon_vmaxs, 1>; defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm, - IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vmax", "u", int_arm_neon_vmaxu, 1>; -def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, "vmax", - "f32", v2f32, v2f32, int_arm_neon_vmaxs, 1>; -def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, "vmax", - "f32", v4f32, v4f32, int_arm_neon_vmaxs, 1>; +def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, + "vmax", "f32", + v2f32, v2f32, int_arm_neon_vmaxs, 1>; +def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, + "vmax", "f32", + v4f32, v4f32, int_arm_neon_vmaxs, 1>; // VMIN : Vector Minimum defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, - IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vmin", "s", int_arm_neon_vmins, 1>; defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm, - IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vmin", "u", int_arm_neon_vminu, 1>; -def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, "vmin", - "f32", v2f32, v2f32, int_arm_neon_vmins, 1>; -def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, "vmin", - "f32", v4f32, v4f32, int_arm_neon_vmins, 1>; +def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, + "vmin", "f32", + v2f32, v2f32, int_arm_neon_vmins, 1>; +def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, + "vmin", "f32", + v4f32, v4f32, int_arm_neon_vmins, 1>; // Vector Pairwise Operations. // VPADD : Vector Pairwise Add -def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VBINiD, "vpadd", - "i8", v8i8, v8i8, int_arm_neon_vpadd, 0>; -def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VBINiD, "vpadd", - "i16", v4i16, v4i16, int_arm_neon_vpadd, 0>; -def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VBINiD, "vpadd", - "i32", v2i32, v2i32, int_arm_neon_vpadd, 0>; -def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, IIC_VBIND, "vpadd", - "f32", v2f32, v2f32, int_arm_neon_vpadd, 0>; +def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD, + "vpadd", "i8", + v8i8, v8i8, int_arm_neon_vpadd, 0>; +def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD, + "vpadd", "i16", + v4i16, v4i16, int_arm_neon_vpadd, 0>; +def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD, + "vpadd", "i32", + v2i32, v2i32, int_arm_neon_vpadd, 0>; +def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, + IIC_VBIND, "vpadd", "f32", + v2f32, v2f32, int_arm_neon_vpadd, 0>; // VPADDL : Vector Pairwise Add Long defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s", @@ -2522,35 +2543,35 @@ defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u", int_arm_neon_vpadalu>; // VPMAX : Vector Pairwise Maximum -def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax", +def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>; -def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax", +def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>; -def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax", +def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>; -def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax", +def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>; -def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax", +def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>; -def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax", +def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>; -def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINi4D, "vpmax", +def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>; // VPMIN : Vector Pairwise Minimum -def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin", +def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>; -def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin", +def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>; -def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin", +def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>; -def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin", +def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>; -def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin", +def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>; -def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin", +def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>; -def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINi4D, "vpmin", +def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VSUBi4D, "vpmin", "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>; // Vector Reciprocal and Reciprocal Square Root Estimate and Step. diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 262aae4..742bd40 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -2386,9 +2386,25 @@ let Defs = "\tb\t1f\n" "\tmovs\tr0, #1\t@ end eh.setjmp\n" "1:", "", - [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>; + [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>, + Requires<[IsThumb2, HasVFP2]>; } +let Defs = + [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ] in { + def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val), + AddrModeNone, SizeSpecial, NoItinerary, + "str\t$val, [$src, #8]\t@ begin eh.setjmp\n" + "\tmov\t$val, pc\n" + "\tadds\t$val, #9\n" + "\tstr\t$val, [$src, #4]\n" + "\tmovs\tr0, #0\n" + "\tb\t1f\n" + "\tmovs\tr0, #1\t@ end eh.setjmp\n" + "1:", "", + [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>, + Requires<[IsThumb2, NoVFP]>; +} //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 0458389..36fcaa1 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -256,25 +256,25 @@ def VCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm, // Between half-precision and single-precision. For disassembly only. def VCVTBSH : ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a), - /* FIXME */ IIC_fpCVTDS, "vcvtb", ".f32.f16\t$dst, $a", + /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$dst, $a", [/* For disassembly only; pattern left blank */]>; def : ARMPat<(f32_to_f16 SPR:$a), (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>; def VCVTBHS : ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$dst), (ins SPR:$a), - /* FIXME */ IIC_fpCVTDS, "vcvtb", ".f16.f32\t$dst, $a", + /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$dst, $a", [/* For disassembly only; pattern left blank */]>; def : ARMPat<(f16_to_f32 GPR:$a), (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>; def VCVTTSH : ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$dst), (ins SPR:$a), - /* FIXME */ IIC_fpCVTDS, "vcvtt", ".f32.f16\t$dst, $a", + /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$dst, $a", [/* For disassembly only; pattern left blank */]>; def VCVTTHS : ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$dst), (ins SPR:$a), - /* FIXME */ IIC_fpCVTDS, "vcvtt", ".f16.f32\t$dst, $a", + /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$dst, $a", [/* For disassembly only; pattern left blank */]>; let neverHasSideEffects = 1 in { @@ -306,23 +306,23 @@ def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs SPR:$dst), (ins SPR:$a), // def VMOVRS : AVConv2I<0b11100001, 0b1010, (outs GPR:$dst), (ins SPR:$src), - IIC_VMOVSI, "vmov", "\t$dst, $src", + IIC_fpMOVSI, "vmov", "\t$dst, $src", [(set GPR:$dst, (bitconvert SPR:$src))]>; def VMOVSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src), - IIC_VMOVIS, "vmov", "\t$dst, $src", + IIC_fpMOVIS, "vmov", "\t$dst, $src", [(set SPR:$dst, (bitconvert GPR:$src))]>; def VMOVRRD : AVConv3I<0b11000101, 0b1011, (outs GPR:$wb, GPR:$dst2), (ins DPR:$src), - IIC_VMOVDI, "vmov", "\t$wb, $dst2, $src", + IIC_fpMOVDI, "vmov", "\t$wb, $dst2, $src", [/* FIXME: Can't write pattern for multiple result instr*/]> { let Inst{7-6} = 0b00; } def VMOVRRS : AVConv3I<0b11000101, 0b1010, (outs GPR:$wb, GPR:$dst2), (ins SPR:$src1, SPR:$src2), - IIC_VMOVDI, "vmov", "\t$wb, $dst2, $src1, $src2", + IIC_fpMOVDI, "vmov", "\t$wb, $dst2, $src1, $src2", [/* For disassembly only; pattern left blank */]> { let Inst{7-6} = 0b00; } @@ -332,14 +332,14 @@ def VMOVRRS : AVConv3I<0b11000101, 0b1010, def VMOVDRR : AVConv5I<0b11000100, 0b1011, (outs DPR:$dst), (ins GPR:$src1, GPR:$src2), - IIC_VMOVID, "vmov", "\t$dst, $src1, $src2", + IIC_fpMOVID, "vmov", "\t$dst, $src1, $src2", [(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]> { let Inst{7-6} = 0b00; } def VMOVSRR : AVConv5I<0b11000100, 0b1010, (outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2), - IIC_VMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2", + IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2", [/* For disassembly only; pattern left blank */]> { let Inst{7-6} = 0b00; } @@ -678,7 +678,7 @@ def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT, "vmsr", // Materialize FP immediates. VFP3 only. let isReMaterializable = 1 in { def FCONSTD : VFPAI<(outs DPR:$dst), (ins vfp_f64imm:$imm), - VFPMiscFrm, IIC_VMOVImm, + VFPMiscFrm, IIC_fpUNA64, "vmov", ".f64\t$dst, $imm", [(set DPR:$dst, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> { let Inst{27-23} = 0b11101; @@ -689,7 +689,7 @@ def FCONSTD : VFPAI<(outs DPR:$dst), (ins vfp_f64imm:$imm), } def FCONSTS : VFPAI<(outs SPR:$dst), (ins vfp_f32imm:$imm), - VFPMiscFrm, IIC_VMOVImm, + VFPMiscFrm, IIC_fpUNA32, "vmov", ".f32\t$dst, $imm", [(set SPR:$dst, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> { let Inst{27-23} = 0b11101; diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp index 8c0b720..b31a4fa 100644 --- a/lib/Target/ARM/ARMJITInfo.cpp +++ b/lib/Target/ARM/ARMJITInfo.cpp @@ -27,7 +27,7 @@ using namespace llvm; void ARMJITInfo::replaceMachineCodeForFunction(void *Old, void *New) { - llvm_report_error("ARMJITInfo::replaceMachineCodeForFunction"); + report_fatal_error("ARMJITInfo::replaceMachineCodeForFunction"); } /// JITCompilerFunction - This contains the address of the JIT function used to diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index cb762a4..8585c1e 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -1358,7 +1358,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, return false; unsigned Align = (*Op0->memoperands_begin())->getAlignment(); - Function *Func = MF->getFunction(); + const Function *Func = MF->getFunction(); unsigned ReqAlign = STI->hasV6Ops() ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext())) : 8; // Pre-v6 need 8-byte align diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h index c998ede..0134276 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -85,6 +85,9 @@ class ARMFunctionInfo : public MachineFunctionInfo { unsigned ConstPoolEntryUId; + /// VarArgsFrameIndex - FrameIndex for start of varargs area. + int VarArgsFrameIndex; + public: ARMFunctionInfo() : isThumb(false), @@ -94,7 +97,7 @@ public: FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0), - JumpTableUId(0), ConstPoolEntryUId(0) {} + JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0) {} explicit ARMFunctionInfo(MachineFunction &MF) : isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()), @@ -105,7 +108,7 @@ public: GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32), SpilledCSRegs(MF.getTarget().getRegisterInfo()->getNumRegs()), - JumpTableUId(0), ConstPoolEntryUId(0) {} + JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0) {} bool isThumbFunction() const { return isThumb; } bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; } @@ -223,6 +226,9 @@ public: unsigned createConstPoolEntryUId() { return ConstPoolEntryUId++; } + + int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } + void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; } }; } // End llvm namespace diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index fc4c5f5..b60ccca 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -8,17 +8,6 @@ //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// Functional units across ARM processors -// -def FU_Issue : FuncUnit; // issue -def FU_Pipe0 : FuncUnit; // pipeline 0 -def FU_Pipe1 : FuncUnit; // pipeline 1 -def FU_LdSt0 : FuncUnit; // pipeline 0 load/store -def FU_LdSt1 : FuncUnit; // pipeline 1 load/store -def FU_NPipe : FuncUnit; // NEON ALU/MUL pipe -def FU_NLSPipe : FuncUnit; // NEON LS pipe - -//===----------------------------------------------------------------------===// // Instruction Itinerary classes used for ARM // def IIC_iALUx : InstrItinClass; @@ -69,10 +58,16 @@ def IIC_fpCMP32 : InstrItinClass; def IIC_fpCMP64 : InstrItinClass; def IIC_fpCVTSD : InstrItinClass; def IIC_fpCVTDS : InstrItinClass; +def IIC_fpCVTSH : InstrItinClass; +def IIC_fpCVTHS : InstrItinClass; def IIC_fpCVTIS : InstrItinClass; def IIC_fpCVTID : InstrItinClass; def IIC_fpCVTSI : InstrItinClass; def IIC_fpCVTDI : InstrItinClass; +def IIC_fpMOVIS : InstrItinClass; +def IIC_fpMOVID : InstrItinClass; +def IIC_fpMOVSI : InstrItinClass; +def IIC_fpMOVDI : InstrItinClass; def IIC_fpALU32 : InstrItinClass; def IIC_fpALU64 : InstrItinClass; def IIC_fpMUL32 : InstrItinClass; @@ -125,6 +120,10 @@ def IIC_VSUBiD : InstrItinClass; def IIC_VSUBiQ : InstrItinClass; def IIC_VBINi4D : InstrItinClass; def IIC_VBINi4Q : InstrItinClass; +def IIC_VSUBi4D : InstrItinClass; +def IIC_VSUBi4Q : InstrItinClass; +def IIC_VABAD : InstrItinClass; +def IIC_VABAQ : InstrItinClass; def IIC_VSHLiD : InstrItinClass; def IIC_VSHLiQ : InstrItinClass; def IIC_VSHLi4D : InstrItinClass; @@ -153,8 +152,8 @@ def IIC_VTBX4 : InstrItinClass; //===----------------------------------------------------------------------===// // Processor instruction itineraries. -def GenericItineraries : ProcessorItineraries<[]>; - +def GenericItineraries : ProcessorItineraries<[], []>; include "ARMScheduleV6.td" -include "ARMScheduleV7.td" +include "ARMScheduleA8.td" +include "ARMScheduleA9.td" diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td new file mode 100644 index 0000000..bbfc0b2 --- /dev/null +++ b/lib/Target/ARM/ARMScheduleA8.td @@ -0,0 +1,618 @@ +//=- ARMScheduleA8.td - ARM Cortex-A8 Scheduling Definitions -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the ARM Cortex A8 processors. +// +//===----------------------------------------------------------------------===// + +// +// Scheduling information derived from "Cortex-A8 Technical Reference Manual". +// Functional Units. +def A8_Issue : FuncUnit; // issue +def A8_Pipe0 : FuncUnit; // pipeline 0 +def A8_Pipe1 : FuncUnit; // pipeline 1 +def A8_LdSt0 : FuncUnit; // pipeline 0 load/store +def A8_LdSt1 : FuncUnit; // pipeline 1 load/store +def A8_NPipe : FuncUnit; // NEON ALU/MUL pipe +def A8_NLSPipe : FuncUnit; // NEON LS pipe +// +// Dual issue pipeline represented by A8_Pipe0 | A8_Pipe1 +// +def CortexA8Itineraries : ProcessorItineraries< + [A8_Issue, A8_Pipe0, A8_Pipe1, A8_LdSt0, A8_LdSt1, A8_NPipe, A8_NLSPipe], [ + // Two fully-pipelined integer ALU pipelines + // + // No operand cycles + InstrItinData<IIC_iALUx , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>, + // + // Binary Instructions that produce a result + InstrItinData<IIC_iALUi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, + InstrItinData<IIC_iALUr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>, + InstrItinData<IIC_iALUsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>, + InstrItinData<IIC_iALUsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>, + // + // Unary Instructions that produce a result + InstrItinData<IIC_iUNAr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, + InstrItinData<IIC_iUNAsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, + InstrItinData<IIC_iUNAsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>, + // + // Compare instructions + InstrItinData<IIC_iCMPi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>, + InstrItinData<IIC_iCMPr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, + InstrItinData<IIC_iCMPsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, + InstrItinData<IIC_iCMPsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>, + // + // Move instructions, unconditional + InstrItinData<IIC_iMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>, + InstrItinData<IIC_iMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>, + InstrItinData<IIC_iMOVsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>, + InstrItinData<IIC_iMOVsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>, + // + // Move instructions, conditional + InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>, + InstrItinData<IIC_iCMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, + InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, + InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>, + + // Integer multiply pipeline + // Result written in E5, but that is relative to the last cycle of multicycle, + // so we use 6 for those cases + // + InstrItinData<IIC_iMUL16 , [InstrStage<1, [A8_Pipe0]>], [5, 1, 1]>, + InstrItinData<IIC_iMAC16 , [InstrStage<1, [A8_Pipe1], 0>, + InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>, + InstrItinData<IIC_iMUL32 , [InstrStage<1, [A8_Pipe1], 0>, + InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>, + InstrItinData<IIC_iMAC32 , [InstrStage<1, [A8_Pipe1], 0>, + InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>, + InstrItinData<IIC_iMUL64 , [InstrStage<2, [A8_Pipe1], 0>, + InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>, + InstrItinData<IIC_iMAC64 , [InstrStage<2, [A8_Pipe1], 0>, + InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>, + + // Integer load pipeline + // + // loads have an extra cycle of latency, but are fully pipelined + // use A8_Issue to enforce the 1 load/store per cycle limit + // + // Immediate offset + InstrItinData<IIC_iLoadi , [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1]>, + // + // Register offset + InstrItinData<IIC_iLoadr , [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>, + // + // Scaled register offset, issues over 2 cycles + InstrItinData<IIC_iLoadsi , [InstrStage<2, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0], 0>, + InstrStage<1, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [4, 1, 1]>, + // + // Immediate offset with update + InstrItinData<IIC_iLoadiu , [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 2, 1]>, + // + // Register offset with update + InstrItinData<IIC_iLoadru , [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 2, 1, 1]>, + // + // Scaled register offset with update, issues over 2 cycles + InstrItinData<IIC_iLoadsiu , [InstrStage<2, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0], 0>, + InstrStage<1, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [4, 3, 1, 1]>, + // + // Load multiple + InstrItinData<IIC_iLoadm , [InstrStage<2, [A8_Issue], 0>, + InstrStage<2, [A8_Pipe0], 0>, + InstrStage<2, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>]>, + + // Integer store pipeline + // + // use A8_Issue to enforce the 1 load/store per cycle limit + // + // Immediate offset + InstrItinData<IIC_iStorei , [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1]>, + // + // Register offset + InstrItinData<IIC_iStorer , [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>, + // + // Scaled register offset, issues over 2 cycles + InstrItinData<IIC_iStoresi , [InstrStage<2, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0], 0>, + InstrStage<1, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>, + // + // Immediate offset with update + InstrItinData<IIC_iStoreiu , [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [2, 3, 1]>, + // + // Register offset with update + InstrItinData<IIC_iStoreru , [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [2, 3, 1, 1]>, + // + // Scaled register offset with update, issues over 2 cycles + InstrItinData<IIC_iStoresiu, [InstrStage<2, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0], 0>, + InstrStage<1, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 3, 1, 1]>, + // + // Store multiple + InstrItinData<IIC_iStorem , [InstrStage<2, [A8_Issue], 0>, + InstrStage<2, [A8_Pipe0], 0>, + InstrStage<2, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>]>, + + // Branch + // + // no delay slots, so the latency of a branch is unimportant + InstrItinData<IIC_Br , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>, + + // VFP + // Issue through integer pipeline, and execute in NEON unit. We assume + // RunFast mode so that NFP pipeline is used for single-precision when + // possible. + // + // FP Special Register to Integer Register File Move + InstrItinData<IIC_fpSTAT , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>]>, + // + // Single-precision FP Unary + InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [7, 1]>, + // + // Double-precision FP Unary + InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<4, [A8_NPipe], 0>, + InstrStage<4, [A8_NLSPipe]>], [4, 1]>, + // + // Single-precision FP Compare + InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [1, 1]>, + // + // Double-precision FP Compare + InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<4, [A8_NPipe], 0>, + InstrStage<4, [A8_NLSPipe]>], [4, 1]>, + // + // Single to Double FP Convert + InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<7, [A8_NPipe], 0>, + InstrStage<7, [A8_NLSPipe]>], [7, 1]>, + // + // Double to Single FP Convert + InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<5, [A8_NPipe], 0>, + InstrStage<5, [A8_NLSPipe]>], [5, 1]>, + // + // Single-Precision FP to Integer Convert + InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [7, 1]>, + // + // Double-Precision FP to Integer Convert + InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<8, [A8_NPipe], 0>, + InstrStage<8, [A8_NLSPipe]>], [8, 1]>, + // + // Integer to Single-Precision FP Convert + InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [7, 1]>, + // + // Integer to Double-Precision FP Convert + InstrItinData<IIC_fpCVTID , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<8, [A8_NPipe], 0>, + InstrStage<8, [A8_NLSPipe]>], [8, 1]>, + // + // Single-precision FP ALU + InstrItinData<IIC_fpALU32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [7, 1, 1]>, + // + // Double-precision FP ALU + InstrItinData<IIC_fpALU64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<9, [A8_NPipe], 0>, + InstrStage<9, [A8_NLSPipe]>], [9, 1, 1]>, + // + // Single-precision FP Multiply + InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [7, 1, 1]>, + // + // Double-precision FP Multiply + InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<11, [A8_NPipe], 0>, + InstrStage<11, [A8_NLSPipe]>], [11, 1, 1]>, + // + // Single-precision FP MAC + InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>, + // + // Double-precision FP MAC + InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<19, [A8_NPipe], 0>, + InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>, + // + // Single-precision FP DIV + InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<20, [A8_NPipe], 0>, + InstrStage<20, [A8_NLSPipe]>], [20, 1, 1]>, + // + // Double-precision FP DIV + InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<29, [A8_NPipe], 0>, + InstrStage<29, [A8_NLSPipe]>], [29, 1, 1]>, + // + // Single-precision FP SQRT + InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<19, [A8_NPipe], 0>, + InstrStage<19, [A8_NLSPipe]>], [19, 1]>, + // + // Double-precision FP SQRT + InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<29, [A8_NPipe], 0>, + InstrStage<29, [A8_NLSPipe]>], [29, 1]>, + // + // Single-precision FP Load + // use A8_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, + // + // Double-precision FP Load + // use A8_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpLoad64, [InstrStage<2, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0], 0>, + InstrStage<1, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, + // + // FP Load Multiple + // use A8_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpLoadm, [InstrStage<3, [A8_Issue], 0>, + InstrStage<2, [A8_Pipe0], 0>, + InstrStage<2, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, + // + // Single-precision FP Store + // use A8_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, + // + // Double-precision FP Store + // use A8_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpStore64,[InstrStage<2, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0], 0>, + InstrStage<1, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, + // + // FP Store Multiple + // use A8_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpStorem, [InstrStage<3, [A8_Issue], 0>, + InstrStage<2, [A8_Pipe0], 0>, + InstrStage<2, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, + + // NEON + // Issue through integer pipeline, and execute in NEON unit. + // + // VLD1 + // FIXME: We don't model this instruction properly + InstrItinData<IIC_VLD1, [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, + // + // VLD2 + // FIXME: We don't model this instruction properly + InstrItinData<IIC_VLD2, [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>], [2, 2, 1]>, + // + // VLD3 + // FIXME: We don't model this instruction properly + InstrItinData<IIC_VLD3, [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 1]>, + // + // VLD4 + // FIXME: We don't model this instruction properly + InstrItinData<IIC_VLD4, [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 2, 1]>, + // + // VST + // FIXME: We don't model this instruction properly + InstrItinData<IIC_VST, [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, + // + // Double-register FP Unary + InstrItinData<IIC_VUNAD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [5, 2]>, + // + // Quad-register FP Unary + // Result written in N5, but that is relative to the last cycle of multicycle, + // so we use 6 for those cases + InstrItinData<IIC_VUNAQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [6, 2]>, + // + // Double-register FP Binary + InstrItinData<IIC_VBIND, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [5, 2, 2]>, + // + // Quad-register FP Binary + // Result written in N5, but that is relative to the last cycle of multicycle, + // so we use 6 for those cases + InstrItinData<IIC_VBINQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [6, 2, 2]>, + // + // Move Immediate + InstrItinData<IIC_VMOVImm, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [3]>, + // + // Double-register Permute Move + InstrItinData<IIC_VMOVD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>], [2, 1]>, + // + // Quad-register Permute Move + // Result written in N2, but that is relative to the last cycle of multicycle, + // so we use 3 for those cases + InstrItinData<IIC_VMOVQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NLSPipe]>], [3, 1]>, + // + // Integer to Single-precision Move + InstrItinData<IIC_VMOVIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>], [2, 1]>, + // + // Integer to Double-precision Move + InstrItinData<IIC_VMOVID , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>, + // + // Single-precision to Integer Move + InstrItinData<IIC_VMOVSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>], [20, 1]>, + // + // Double-precision to Integer Move + InstrItinData<IIC_VMOVDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>], [20, 20, 1]>, + // + // Integer to Lane Move + InstrItinData<IIC_VMOVISL , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>, + // + // Double-register Permute + InstrItinData<IIC_VPERMD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>], [2, 2, 1, 1]>, + // + // Quad-register Permute + // Result written in N2, but that is relative to the last cycle of multicycle, + // so we use 3 for those cases + InstrItinData<IIC_VPERMQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NLSPipe]>], [3, 3, 1, 1]>, + // + // Quad-register Permute (3 cycle issue) + // Result written in N2, but that is relative to the last cycle of multicycle, + // so we use 4 for those cases + InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>, + InstrStage<1, [A8_NPipe], 0>, + InstrStage<2, [A8_NLSPipe]>], [4, 4, 1, 1]>, + // + // Double-register FP Multiple-Accumulate + InstrItinData<IIC_VMACD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>, + // + // Quad-register FP Multiple-Accumulate + // Result written in N9, but that is relative to the last cycle of multicycle, + // so we use 10 for those cases + InstrItinData<IIC_VMACQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>, + // + // Double-register Reciprical Step + InstrItinData<IIC_VRECSD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [9, 2, 2]>, + // + // Quad-register Reciprical Step + InstrItinData<IIC_VRECSQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [10, 2, 2]>, + // + // Double-register Integer Count + InstrItinData<IIC_VCNTiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [3, 2, 2]>, + // + // Quad-register Integer Count + // Result written in N3, but that is relative to the last cycle of multicycle, + // so we use 4 for those cases + InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [4, 2, 2]>, + // + // Double-register Integer Unary + InstrItinData<IIC_VUNAiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [4, 2]>, + // + // Quad-register Integer Unary + InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [4, 2]>, + // + // Double-register Integer Q-Unary + InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [4, 1]>, + // + // Quad-register Integer CountQ-Unary + InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [4, 1]>, + // + // Double-register Integer Binary + InstrItinData<IIC_VBINiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [3, 2, 2]>, + // + // Quad-register Integer Binary + InstrItinData<IIC_VBINiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [3, 2, 2]>, + // + // Double-register Integer Binary (4 cycle) + InstrItinData<IIC_VBINi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [4, 2, 1]>, + // + // Quad-register Integer Binary (4 cycle) + InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [4, 2, 1]>, + + // + // Double-register Integer Subtract + InstrItinData<IIC_VSUBiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [3, 2, 1]>, + // + // Quad-register Integer Subtract + InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [3, 2, 1]>, + // + // Double-register Integer Subtract + InstrItinData<IIC_VSUBi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [4, 2, 1]>, + // + // Quad-register Integer Subtract + InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [4, 2, 1]>, + // + // Double-register Integer Shift + InstrItinData<IIC_VSHLiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [3, 1, 1]>, + // + // Quad-register Integer Shift + InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [4, 1, 1]>, + // + // Double-register Integer Shift (4 cycle) + InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [4, 1, 1]>, + // + // Quad-register Integer Shift (4 cycle) + InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [5, 1, 1]>, + // + // Double-register Integer Pair Add Long + InstrItinData<IIC_VPALiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [6, 3, 1]>, + // + // Quad-register Integer Pair Add Long + InstrItinData<IIC_VPALiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [7, 3, 1]>, + // + // Double-register Absolute Difference and Accumulate + InstrItinData<IIC_VABAD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [6, 3, 2, 1]>, + // + // Quad-register Absolute Difference and Accumulate + InstrItinData<IIC_VABAQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [6, 3, 2, 1]>, + + // + // Double-register Integer Multiply (.8, .16) + InstrItinData<IIC_VMULi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [6, 2, 2]>, + // + // Double-register Integer Multiply (.32) + InstrItinData<IIC_VMULi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [7, 2, 1]>, + // + // Quad-register Integer Multiply (.8, .16) + InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [7, 2, 2]>, + // + // Quad-register Integer Multiply (.32) + InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>, + InstrStage<2, [A8_NLSPipe], 0>, + InstrStage<3, [A8_NPipe]>], [9, 2, 1]>, + // + // Double-register Integer Multiply-Accumulate (.8, .16) + InstrItinData<IIC_VMACi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [6, 3, 2, 2]>, + // + // Double-register Integer Multiply-Accumulate (.32) + InstrItinData<IIC_VMACi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [7, 3, 2, 1]>, + // + // Quad-register Integer Multiply-Accumulate (.8, .16) + InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [7, 3, 2, 2]>, + // + // Quad-register Integer Multiply-Accumulate (.32) + InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>, + InstrStage<2, [A8_NLSPipe], 0>, + InstrStage<3, [A8_NPipe]>], [9, 3, 2, 1]>, + // + // Double-register VEXT + InstrItinData<IIC_VEXTD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>, + // + // Quad-register VEXT + InstrItinData<IIC_VEXTQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>, + // + // VTB + InstrItinData<IIC_VTB1, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NLSPipe]>], [3, 2, 1]>, + InstrItinData<IIC_VTB2, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NLSPipe]>], [3, 2, 2, 1]>, + InstrItinData<IIC_VTB3, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>, + InstrStage<1, [A8_NPipe], 0>, + InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 1]>, + InstrItinData<IIC_VTB4, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>, + InstrStage<1, [A8_NPipe], 0>, + InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 3, 1]>, + // + // VTBX + InstrItinData<IIC_VTBX1, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 1]>, + InstrItinData<IIC_VTBX2, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 2, 1]>, + InstrItinData<IIC_VTBX3, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>, + InstrStage<1, [A8_NPipe], 0>, + InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 1]>, + InstrItinData<IIC_VTBX4, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>, + InstrStage<1, [A8_NPipe], 0>, + InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]> +]>; diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td new file mode 100644 index 0000000..75320d9 --- /dev/null +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -0,0 +1,749 @@ +//=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the ARM Cortex A9 processors. +// +//===----------------------------------------------------------------------===// + +// +// Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical +// Reference Manual". +// +// Functional units +def A9_Issue : FuncUnit; // issue +def A9_Pipe0 : FuncUnit; // pipeline 0 +def A9_Pipe1 : FuncUnit; // pipeline 1 +def A9_LSPipe : FuncUnit; // LS pipe +def A9_NPipe : FuncUnit; // NEON ALU/MUL pipe +def A9_DRegsVFP: FuncUnit; // FP register set, VFP side +def A9_DRegsN : FuncUnit; // FP register set, NEON side + +// Dual issue pipeline represented by A9_Pipe0 | A9_Pipe1 +// +def CortexA9Itineraries : ProcessorItineraries< + [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1, A9_Issue], [ + // VFP and NEON shares the same register file. This means that every VFP + // instruction should wait for full completion of the consecutive NEON + // instruction and vice-versa. We model this behavior with two artificial FUs: + // DRegsVFP and DRegsVFP. + // + // Every VFP instruction: + // - Acquires DRegsVFP resource for 1 cycle + // - Reserves DRegsN resource for the whole duration (including time to + // register file writeback!). + // Every NEON instruction does the same but with FUs swapped. + // + // Since the reserved FU cannot be acquired this models precisly "cross-domain" + // stalls. + + // VFP + // Issue through integer pipeline, and execute in NEON unit. + + // FP Special Register to Integer Register File Move + InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<2, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>]>, + // + // Single-precision FP Unary + InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + // Extra latency cycles since wbck is 2 cycles + InstrStage<3, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [1, 1]>, + // + // Double-precision FP Unary + InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + // Extra latency cycles since wbck is 2 cycles + InstrStage<3, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [1, 1]>, + + // + // Single-precision FP Compare + InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + // Extra latency cycles since wbck is 4 cycles + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [1, 1]>, + // + // Double-precision FP Compare + InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + // Extra latency cycles since wbck is 4 cycles + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [1, 1]>, + // + // Single to Double FP Convert + InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1]>, + // + // Double to Single FP Convert + InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1]>, + + // + // Single to Half FP Convert + InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1]>, + // + // Half to Single FP Convert + InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<3, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [2, 1]>, + + // + // Single-Precision FP to Integer Convert + InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1]>, + // + // Double-Precision FP to Integer Convert + InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1]>, + // + // Integer to Single-Precision FP Convert + InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1]>, + // + // Integer to Double-Precision FP Convert + InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1]>, + // + // Single-precision FP ALU + InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1, 1]>, + // + // Double-precision FP ALU + InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1, 1]>, + // + // Single-precision FP Multiply + InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<6, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [5, 1, 1]>, + // + // Double-precision FP Multiply + InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<7, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [6, 1, 1]>, + // + // Single-precision FP MAC + InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<9, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [8, 0, 1, 1]>, + // + // Double-precision FP MAC + InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<10, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [9, 0, 1, 1]>, + // + // Single-precision FP DIV + InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<16, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<10, [A9_NPipe]>], [15, 1, 1]>, + // + // Double-precision FP DIV + InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<26, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<20, [A9_NPipe]>], [25, 1, 1]>, + // + // Single-precision FP SQRT + InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<18, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<13, [A9_NPipe]>], [17, 1]>, + // + // Double-precision FP SQRT + InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<33, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<28, [A9_NPipe]>], [32, 1]>, + + // + // Integer to Single-precision Move + InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + // Extra 1 latency cycle since wbck is 2 cycles + InstrStage<3, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [1, 1]>, + // + // Integer to Double-precision Move + InstrItinData<IIC_fpMOVID, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + // Extra 1 latency cycle since wbck is 2 cycles + InstrStage<3, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [1, 1, 1]>, + // + // Single-precision to Integer Move + InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<2, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [1, 1]>, + // + // Double-precision to Integer Move + InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<2, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [1, 1, 1]>, + // + // Single-precision FP Load + // use A9_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<2, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>]>, + // + // Double-precision FP Load + // use A9_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<2, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>]>, + // + // FP Load Multiple + // use A9_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpLoadm, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<2, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>]>, + // + // Single-precision FP Store + // use A9_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<2, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>]>, + // + // Double-precision FP Store + // use A9_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<2, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>]>, + // + // FP Store Multiple + // use A9_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpStorem, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<2, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>]>, + // NEON + // Issue through integer pipeline, and execute in NEON unit. + // FIXME: Neon pipeline and LdSt unit are multiplexed. + // Add some syntactic sugar to model this! + // VLD1 + // FIXME: We don't model this instruction properly + InstrItinData<IIC_VLD1, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>]>, + // + // VLD2 + // FIXME: We don't model this instruction properly + InstrItinData<IIC_VLD2, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>], [2, 2, 1]>, + // + // VLD3 + // FIXME: We don't model this instruction properly + InstrItinData<IIC_VLD3, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>], [2, 2, 2, 1]>, + // + // VLD4 + // FIXME: We don't model this instruction properly + InstrItinData<IIC_VLD4, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>], [2, 2, 2, 2, 1]>, + // + // VST + // FIXME: We don't model this instruction properly + InstrItinData<IIC_VST, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>]>, + // + // Double-register Integer Unary + InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 2]>, + // + // Quad-register Integer Unary + InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 2]>, + // + // Double-register Integer Q-Unary + InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1]>, + // + // Quad-register Integer CountQ-Unary + InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1]>, + // + // Double-register Integer Binary + InstrItinData<IIC_VBINiD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [3, 2, 2]>, + // + // Quad-register Integer Binary + InstrItinData<IIC_VBINiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [3, 2, 2]>, + // + // Double-register Integer Subtract + InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [3, 2, 1]>, + // + // Quad-register Integer Subtract + InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [3, 2, 1]>, + // + // Double-register Integer Shift + InstrItinData<IIC_VSHLiD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [3, 1, 1]>, + // + // Quad-register Integer Shift + InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [3, 1, 1]>, + // + // Double-register Integer Shift (4 cycle) + InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1, 1]>, + // + // Quad-register Integer Shift (4 cycle) + InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1, 1]>, + // + // Double-register Integer Binary (4 cycle) + InstrItinData<IIC_VBINi4D, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 2, 2]>, + // + // Quad-register Integer Binary (4 cycle) + InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 2, 2]>, + // + // Double-register Integer Subtract (4 cycle) + InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 2, 1]>, + // + // Quad-register Integer Subtract (4 cycle) + InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 2, 1]>, + + // + // Double-register Integer Count + InstrItinData<IIC_VCNTiD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [3, 2, 2]>, + // + // Quad-register Integer Count + // Result written in N3, but that is relative to the last cycle of multicycle, + // so we use 4 for those cases + InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [4, 2, 2]>, + // + // Double-register Absolute Difference and Accumulate + InstrItinData<IIC_VABAD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [6, 3, 2, 1]>, + // + // Quad-register Absolute Difference and Accumulate + InstrItinData<IIC_VABAQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>, + // + // Double-register Integer Pair Add Long + InstrItinData<IIC_VPALiD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [6, 3, 1]>, + // + // Quad-register Integer Pair Add Long + InstrItinData<IIC_VPALiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [6, 3, 1]>, + + // + // Double-register Integer Multiply (.8, .16) + InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [6, 2, 2]>, + // + // Quad-register Integer Multiply (.8, .16) + InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [7, 2, 2]>, + + // + // Double-register Integer Multiply (.32) + InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [7, 2, 1]>, + // + // Quad-register Integer Multiply (.32) + InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 9 cycles + InstrStage<10, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<4, [A9_NPipe]>], [9, 2, 1]>, + // + // Double-register Integer Multiply-Accumulate (.8, .16) + InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [6, 3, 2, 2]>, + // + // Double-register Integer Multiply-Accumulate (.32) + InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [7, 3, 2, 1]>, + // + // Quad-register Integer Multiply-Accumulate (.8, .16) + InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [7, 3, 2, 2]>, + // + // Quad-register Integer Multiply-Accumulate (.32) + InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 9 cycles + InstrStage<10, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<4, [A9_NPipe]>], [9, 3, 2, 1]>, + // + // Move Immediate + InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [3]>, + // + // Double-register Permute Move + InstrItinData<IIC_VMOVD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // FIXME: all latencies are arbitrary, no information is available + InstrStage<3, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe]>], [2, 1]>, + // + // Quad-register Permute Move + // Result written in N2, but that is relative to the last cycle of multicycle, + // so we use 3 for those cases + InstrItinData<IIC_VMOVQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // FIXME: all latencies are arbitrary, no information is available + InstrStage<4, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [3, 1]>, + // + // Integer to Single-precision Move + InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_DRegsN], 0, Required>, + // FIXME: all latencies are arbitrary, no information is available + InstrStage<3, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [2, 1]>, + // + // Integer to Double-precision Move + InstrItinData<IIC_VMOVID , [InstrStage<1, [A9_DRegsN], 0, Required>, + // FIXME: all latencies are arbitrary, no information is available + InstrStage<3, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [2, 1, 1]>, + // + // Single-precision to Integer Move + InstrItinData<IIC_VMOVSI , [InstrStage<1, [A9_DRegsN], 0, Required>, + // FIXME: all latencies are arbitrary, no information is available + InstrStage<3, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [2, 1]>, + // + // Double-precision to Integer Move + InstrItinData<IIC_VMOVDI , [InstrStage<1, [A9_DRegsN], 0, Required>, + // FIXME: all latencies are arbitrary, no information is available + InstrStage<3, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [2, 2, 1]>, + // + // Integer to Lane Move + InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_DRegsN], 0, Required>, + // FIXME: all latencies are arbitrary, no information is available + InstrStage<4, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [3, 1, 1]>, + + // + // Double-register FP Unary + InstrItinData<IIC_VUNAD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [5, 2]>, + // + // Quad-register FP Unary + // Result written in N5, but that is relative to the last cycle of multicycle, + // so we use 6 for those cases + InstrItinData<IIC_VUNAQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [6, 2]>, + // + // Double-register FP Binary + // FIXME: We're using this itin for many instructions and [2, 2] here is too + // optimistic. + InstrItinData<IIC_VBIND, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [5, 2, 2]>, + // + // Quad-register FP Binary + // Result written in N5, but that is relative to the last cycle of multicycle, + // so we use 6 for those cases + // FIXME: We're using this itin for many instructions and [2, 2] here is too + // optimistic. + InstrItinData<IIC_VBINQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 8 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [6, 2, 2]>, + // + // Double-register FP Multiple-Accumulate + InstrItinData<IIC_VMACD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>, + // + // Quad-register FP Multiple-Accumulate + // Result written in N9, but that is relative to the last cycle of multicycle, + // so we use 10 for those cases + InstrItinData<IIC_VMACQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 9 cycles + InstrStage<10, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<4, [A9_NPipe]>], [8, 4, 2, 1]>, + // + // Double-register Reciprical Step + InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [6, 2, 2]>, + // + // Quad-register Reciprical Step + InstrItinData<IIC_VRECSQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 9 cycles + InstrStage<10, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<4, [A9_NPipe]>], [8, 2, 2]>, + // + // Double-register Permute + InstrItinData<IIC_VPERMD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [2, 2, 1, 1]>, + // + // Quad-register Permute + // Result written in N2, but that is relative to the last cycle of multicycle, + // so we use 3 for those cases + InstrItinData<IIC_VPERMQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [3, 3, 1, 1]>, + // + // Quad-register Permute (3 cycle issue) + // Result written in N2, but that is relative to the last cycle of multicycle, + // so we use 4 for those cases + InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 8 cycles + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<3, [A9_LSPipe]>], [4, 4, 1, 1]>, + + // + // Double-register VEXT + InstrItinData<IIC_VEXTD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [2, 1, 1]>, + // + // Quad-register VEXT + InstrItinData<IIC_VEXTQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 9 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [3, 1, 1]>, + // + // VTB + InstrItinData<IIC_VTB1, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [3, 2, 1]>, + InstrItinData<IIC_VTB2, [InstrStage<2, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [3, 2, 2, 1]>, + InstrItinData<IIC_VTB3, [InstrStage<2, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 8 cycles + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 1]>, + InstrItinData<IIC_VTB4, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 8 cycles + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 3, 1]>, + // + // VTBX + InstrItinData<IIC_VTBX1, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [3, 1, 2, 1]>, + InstrItinData<IIC_VTBX2, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [3, 1, 2, 2, 1]>, + InstrItinData<IIC_VTBX3, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 8 cycles + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<3, [A9_NPipe]>], [4, 1, 2, 2, 3, 1]>, + InstrItinData<IIC_VTBX4, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 8 cycles + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [4, 1, 2, 2, 3, 3, 1]> +]>; diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td index 0fef466..f813022 100644 --- a/lib/Target/ARM/ARMScheduleV6.td +++ b/lib/Target/ARM/ARMScheduleV6.td @@ -13,103 +13,107 @@ // Model based on ARM1176 // +// Functional Units +def V6_Pipe : FuncUnit; // pipeline + // Scheduling information derived from "ARM1176JZF-S Technical Reference Manual". // -def ARMV6Itineraries : ProcessorItineraries<[ +def ARMV6Itineraries : ProcessorItineraries< + [V6_Pipe], [ // // No operand cycles - InstrItinData<IIC_iALUx , [InstrStage<1, [FU_Pipe0]>]>, + InstrItinData<IIC_iALUx , [InstrStage<1, [V6_Pipe]>]>, // // Binary Instructions that produce a result - InstrItinData<IIC_iALUi , [InstrStage<1, [FU_Pipe0]>], [2, 2]>, - InstrItinData<IIC_iALUr , [InstrStage<1, [FU_Pipe0]>], [2, 2, 2]>, - InstrItinData<IIC_iALUsi , [InstrStage<1, [FU_Pipe0]>], [2, 2, 1]>, - InstrItinData<IIC_iALUsr , [InstrStage<2, [FU_Pipe0]>], [3, 3, 2, 1]>, + InstrItinData<IIC_iALUi , [InstrStage<1, [V6_Pipe]>], [2, 2]>, + InstrItinData<IIC_iALUr , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>, + InstrItinData<IIC_iALUsi , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>, + InstrItinData<IIC_iALUsr , [InstrStage<2, [V6_Pipe]>], [3, 3, 2, 1]>, // // Unary Instructions that produce a result - InstrItinData<IIC_iUNAr , [InstrStage<1, [FU_Pipe0]>], [2, 2]>, - InstrItinData<IIC_iUNAsi , [InstrStage<1, [FU_Pipe0]>], [2, 1]>, - InstrItinData<IIC_iUNAsr , [InstrStage<2, [FU_Pipe0]>], [3, 2, 1]>, + InstrItinData<IIC_iUNAr , [InstrStage<1, [V6_Pipe]>], [2, 2]>, + InstrItinData<IIC_iUNAsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>, + InstrItinData<IIC_iUNAsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>, // // Compare instructions - InstrItinData<IIC_iCMPi , [InstrStage<1, [FU_Pipe0]>], [2]>, - InstrItinData<IIC_iCMPr , [InstrStage<1, [FU_Pipe0]>], [2, 2]>, - InstrItinData<IIC_iCMPsi , [InstrStage<1, [FU_Pipe0]>], [2, 1]>, - InstrItinData<IIC_iCMPsr , [InstrStage<2, [FU_Pipe0]>], [3, 2, 1]>, + InstrItinData<IIC_iCMPi , [InstrStage<1, [V6_Pipe]>], [2]>, + InstrItinData<IIC_iCMPr , [InstrStage<1, [V6_Pipe]>], [2, 2]>, + InstrItinData<IIC_iCMPsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>, + InstrItinData<IIC_iCMPsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>, // // Move instructions, unconditional - InstrItinData<IIC_iMOVi , [InstrStage<1, [FU_Pipe0]>], [2]>, - InstrItinData<IIC_iMOVr , [InstrStage<1, [FU_Pipe0]>], [2, 2]>, - InstrItinData<IIC_iMOVsi , [InstrStage<1, [FU_Pipe0]>], [2, 1]>, - InstrItinData<IIC_iMOVsr , [InstrStage<2, [FU_Pipe0]>], [3, 2, 1]>, + InstrItinData<IIC_iMOVi , [InstrStage<1, [V6_Pipe]>], [2]>, + InstrItinData<IIC_iMOVr , [InstrStage<1, [V6_Pipe]>], [2, 2]>, + InstrItinData<IIC_iMOVsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>, + InstrItinData<IIC_iMOVsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>, // // Move instructions, conditional - InstrItinData<IIC_iCMOVi , [InstrStage<1, [FU_Pipe0]>], [3]>, - InstrItinData<IIC_iCMOVr , [InstrStage<1, [FU_Pipe0]>], [3, 2]>, - InstrItinData<IIC_iCMOVsi , [InstrStage<1, [FU_Pipe0]>], [3, 1]>, - InstrItinData<IIC_iCMOVsr , [InstrStage<1, [FU_Pipe0]>], [4, 2, 1]>, + InstrItinData<IIC_iCMOVi , [InstrStage<1, [V6_Pipe]>], [3]>, + InstrItinData<IIC_iCMOVr , [InstrStage<1, [V6_Pipe]>], [3, 2]>, + InstrItinData<IIC_iCMOVsi , [InstrStage<1, [V6_Pipe]>], [3, 1]>, + InstrItinData<IIC_iCMOVsr , [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>, // Integer multiply pipeline // - InstrItinData<IIC_iMUL16 , [InstrStage<1, [FU_Pipe0]>], [4, 1, 1]>, - InstrItinData<IIC_iMAC16 , [InstrStage<1, [FU_Pipe0]>], [4, 1, 1, 2]>, - InstrItinData<IIC_iMUL32 , [InstrStage<2, [FU_Pipe0]>], [5, 1, 1]>, - InstrItinData<IIC_iMAC32 , [InstrStage<2, [FU_Pipe0]>], [5, 1, 1, 2]>, - InstrItinData<IIC_iMUL64 , [InstrStage<3, [FU_Pipe0]>], [6, 1, 1]>, - InstrItinData<IIC_iMAC64 , [InstrStage<3, [FU_Pipe0]>], [6, 1, 1, 2]>, + InstrItinData<IIC_iMUL16 , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>, + InstrItinData<IIC_iMAC16 , [InstrStage<1, [V6_Pipe]>], [4, 1, 1, 2]>, + InstrItinData<IIC_iMUL32 , [InstrStage<2, [V6_Pipe]>], [5, 1, 1]>, + InstrItinData<IIC_iMAC32 , [InstrStage<2, [V6_Pipe]>], [5, 1, 1, 2]>, + InstrItinData<IIC_iMUL64 , [InstrStage<3, [V6_Pipe]>], [6, 1, 1]>, + InstrItinData<IIC_iMAC64 , [InstrStage<3, [V6_Pipe]>], [6, 1, 1, 2]>, // Integer load pipeline // // Immediate offset - InstrItinData<IIC_iLoadi , [InstrStage<1, [FU_Pipe0]>], [4, 1]>, + InstrItinData<IIC_iLoadi , [InstrStage<1, [V6_Pipe]>], [4, 1]>, // // Register offset - InstrItinData<IIC_iLoadr , [InstrStage<1, [FU_Pipe0]>], [4, 1, 1]>, + InstrItinData<IIC_iLoadr , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>, // // Scaled register offset, issues over 2 cycles - InstrItinData<IIC_iLoadsi , [InstrStage<2, [FU_Pipe0]>], [5, 2, 1]>, + InstrItinData<IIC_iLoadsi , [InstrStage<2, [V6_Pipe]>], [5, 2, 1]>, // // Immediate offset with update - InstrItinData<IIC_iLoadiu , [InstrStage<1, [FU_Pipe0]>], [4, 2, 1]>, + InstrItinData<IIC_iLoadiu , [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>, // // Register offset with update - InstrItinData<IIC_iLoadru , [InstrStage<1, [FU_Pipe0]>], [4, 2, 1, 1]>, + InstrItinData<IIC_iLoadru , [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>, // // Scaled register offset with update, issues over 2 cycles - InstrItinData<IIC_iLoadsiu , [InstrStage<2, [FU_Pipe0]>], [5, 2, 2, 1]>, + InstrItinData<IIC_iLoadsiu , [InstrStage<2, [V6_Pipe]>], [5, 2, 2, 1]>, // // Load multiple - InstrItinData<IIC_iLoadm , [InstrStage<3, [FU_Pipe0]>]>, + InstrItinData<IIC_iLoadm , [InstrStage<3, [V6_Pipe]>]>, // Integer store pipeline // // Immediate offset - InstrItinData<IIC_iStorei , [InstrStage<1, [FU_Pipe0]>], [2, 1]>, + InstrItinData<IIC_iStorei , [InstrStage<1, [V6_Pipe]>], [2, 1]>, // // Register offset - InstrItinData<IIC_iStorer , [InstrStage<1, [FU_Pipe0]>], [2, 1, 1]>, + InstrItinData<IIC_iStorer , [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>, // // Scaled register offset, issues over 2 cycles - InstrItinData<IIC_iStoresi , [InstrStage<2, [FU_Pipe0]>], [2, 2, 1]>, + InstrItinData<IIC_iStoresi , [InstrStage<2, [V6_Pipe]>], [2, 2, 1]>, // // Immediate offset with update - InstrItinData<IIC_iStoreiu , [InstrStage<1, [FU_Pipe0]>], [2, 2, 1]>, + InstrItinData<IIC_iStoreiu , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>, // // Register offset with update - InstrItinData<IIC_iStoreru , [InstrStage<1, [FU_Pipe0]>], [2, 2, 1, 1]>, + InstrItinData<IIC_iStoreru , [InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>, // // Scaled register offset with update, issues over 2 cycles - InstrItinData<IIC_iStoresiu, [InstrStage<2, [FU_Pipe0]>], [2, 2, 2, 1]>, + InstrItinData<IIC_iStoresiu, [InstrStage<2, [V6_Pipe]>], [2, 2, 2, 1]>, // // Store multiple - InstrItinData<IIC_iStorem , [InstrStage<3, [FU_Pipe0]>]>, + InstrItinData<IIC_iStorem , [InstrStage<3, [V6_Pipe]>]>, // Branch // // no delay slots, so the latency of a branch is unimportant - InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0]>]>, + InstrItinData<IIC_Br , [InstrStage<1, [V6_Pipe]>]>, // VFP // Issue through integer pipeline, and execute in NEON unit. We assume @@ -117,84 +121,84 @@ def ARMV6Itineraries : ProcessorItineraries<[ // possible. // // FP Special Register to Integer Register File Move - InstrItinData<IIC_fpSTAT , [InstrStage<1, [FU_Pipe0]>], [3]>, + InstrItinData<IIC_fpSTAT , [InstrStage<1, [V6_Pipe]>], [3]>, // // Single-precision FP Unary - InstrItinData<IIC_fpUNA32 , [InstrStage<1, [FU_Pipe0]>], [5, 2]>, + InstrItinData<IIC_fpUNA32 , [InstrStage<1, [V6_Pipe]>], [5, 2]>, // // Double-precision FP Unary - InstrItinData<IIC_fpUNA64 , [InstrStage<1, [FU_Pipe0]>], [5, 2]>, + InstrItinData<IIC_fpUNA64 , [InstrStage<1, [V6_Pipe]>], [5, 2]>, // // Single-precision FP Compare - InstrItinData<IIC_fpCMP32 , [InstrStage<1, [FU_Pipe0]>], [2, 2]>, + InstrItinData<IIC_fpCMP32 , [InstrStage<1, [V6_Pipe]>], [2, 2]>, // // Double-precision FP Compare - InstrItinData<IIC_fpCMP64 , [InstrStage<1, [FU_Pipe0]>], [2, 2]>, + InstrItinData<IIC_fpCMP64 , [InstrStage<1, [V6_Pipe]>], [2, 2]>, // // Single to Double FP Convert - InstrItinData<IIC_fpCVTSD , [InstrStage<1, [FU_Pipe0]>], [5, 2]>, + InstrItinData<IIC_fpCVTSD , [InstrStage<1, [V6_Pipe]>], [5, 2]>, // // Double to Single FP Convert - InstrItinData<IIC_fpCVTDS , [InstrStage<1, [FU_Pipe0]>], [5, 2]>, + InstrItinData<IIC_fpCVTDS , [InstrStage<1, [V6_Pipe]>], [5, 2]>, // // Single-Precision FP to Integer Convert - InstrItinData<IIC_fpCVTSI , [InstrStage<1, [FU_Pipe0]>], [9, 2]>, + InstrItinData<IIC_fpCVTSI , [InstrStage<1, [V6_Pipe]>], [9, 2]>, // // Double-Precision FP to Integer Convert - InstrItinData<IIC_fpCVTDI , [InstrStage<1, [FU_Pipe0]>], [9, 2]>, + InstrItinData<IIC_fpCVTDI , [InstrStage<1, [V6_Pipe]>], [9, 2]>, // // Integer to Single-Precision FP Convert - InstrItinData<IIC_fpCVTIS , [InstrStage<1, [FU_Pipe0]>], [9, 2]>, + InstrItinData<IIC_fpCVTIS , [InstrStage<1, [V6_Pipe]>], [9, 2]>, // // Integer to Double-Precision FP Convert - InstrItinData<IIC_fpCVTID , [InstrStage<1, [FU_Pipe0]>], [9, 2]>, + InstrItinData<IIC_fpCVTID , [InstrStage<1, [V6_Pipe]>], [9, 2]>, // // Single-precision FP ALU - InstrItinData<IIC_fpALU32 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2]>, + InstrItinData<IIC_fpALU32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>, // // Double-precision FP ALU - InstrItinData<IIC_fpALU64 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2]>, + InstrItinData<IIC_fpALU64 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>, // // Single-precision FP Multiply - InstrItinData<IIC_fpMUL32 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2]>, + InstrItinData<IIC_fpMUL32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>, // // Double-precision FP Multiply - InstrItinData<IIC_fpMUL64 , [InstrStage<2, [FU_Pipe0]>], [9, 2, 2]>, + InstrItinData<IIC_fpMUL64 , [InstrStage<2, [V6_Pipe]>], [9, 2, 2]>, // // Single-precision FP MAC - InstrItinData<IIC_fpMAC32 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2, 2]>, + InstrItinData<IIC_fpMAC32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2, 2]>, // // Double-precision FP MAC - InstrItinData<IIC_fpMAC64 , [InstrStage<2, [FU_Pipe0]>], [9, 2, 2, 2]>, + InstrItinData<IIC_fpMAC64 , [InstrStage<2, [V6_Pipe]>], [9, 2, 2, 2]>, // // Single-precision FP DIV - InstrItinData<IIC_fpDIV32 , [InstrStage<15, [FU_Pipe0]>], [20, 2, 2]>, + InstrItinData<IIC_fpDIV32 , [InstrStage<15, [V6_Pipe]>], [20, 2, 2]>, // // Double-precision FP DIV - InstrItinData<IIC_fpDIV64 , [InstrStage<29, [FU_Pipe0]>], [34, 2, 2]>, + InstrItinData<IIC_fpDIV64 , [InstrStage<29, [V6_Pipe]>], [34, 2, 2]>, // // Single-precision FP SQRT - InstrItinData<IIC_fpSQRT32 , [InstrStage<15, [FU_Pipe0]>], [20, 2, 2]>, + InstrItinData<IIC_fpSQRT32 , [InstrStage<15, [V6_Pipe]>], [20, 2, 2]>, // // Double-precision FP SQRT - InstrItinData<IIC_fpSQRT64 , [InstrStage<29, [FU_Pipe0]>], [34, 2, 2]>, + InstrItinData<IIC_fpSQRT64 , [InstrStage<29, [V6_Pipe]>], [34, 2, 2]>, // // Single-precision FP Load - InstrItinData<IIC_fpLoad32 , [InstrStage<1, [FU_Pipe0]>], [5, 2, 2]>, + InstrItinData<IIC_fpLoad32 , [InstrStage<1, [V6_Pipe]>], [5, 2, 2]>, // // Double-precision FP Load - InstrItinData<IIC_fpLoad64 , [InstrStage<1, [FU_Pipe0]>], [5, 2, 2]>, + InstrItinData<IIC_fpLoad64 , [InstrStage<1, [V6_Pipe]>], [5, 2, 2]>, // // FP Load Multiple - InstrItinData<IIC_fpLoadm , [InstrStage<3, [FU_Pipe0]>]>, + InstrItinData<IIC_fpLoadm , [InstrStage<3, [V6_Pipe]>]>, // // Single-precision FP Store - InstrItinData<IIC_fpStore32 , [InstrStage<1, [FU_Pipe0]>], [2, 2, 2]>, + InstrItinData<IIC_fpStore32 , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>, // // Double-precision FP Store // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpStore64 , [InstrStage<1, [FU_Pipe0]>], [2, 2, 2]>, + InstrItinData<IIC_fpStore64 , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>, // // FP Store Multiple - InstrItinData<IIC_fpStorem , [InstrStage<3, [FU_Pipe0]>]> + InstrItinData<IIC_fpStorem , [InstrStage<3, [V6_Pipe]>]> ]>; diff --git a/lib/Target/ARM/ARMScheduleV7.td b/lib/Target/ARM/ARMScheduleV7.td deleted file mode 100644 index bbbf413..0000000 --- a/lib/Target/ARM/ARMScheduleV7.td +++ /dev/null @@ -1,587 +0,0 @@ -//===- ARMScheduleV7.td - ARM v7 Scheduling Definitions ----*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the itinerary class data for the ARM v7 processors. -// -//===----------------------------------------------------------------------===// - -// -// Scheduling information derived from "Cortex-A8 Technical Reference Manual". -// -// Dual issue pipeline represented by FU_Pipe0 | FU_Pipe1 -// -def CortexA8Itineraries : ProcessorItineraries<[ - - // Two fully-pipelined integer ALU pipelines - // - // No operand cycles - InstrItinData<IIC_iALUx , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>, - // - // Binary Instructions that produce a result - InstrItinData<IIC_iALUi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2]>, - InstrItinData<IIC_iALUr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2, 2]>, - InstrItinData<IIC_iALUsi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2, 1]>, - InstrItinData<IIC_iALUsr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2, 1, 1]>, - // - // Unary Instructions that produce a result - InstrItinData<IIC_iUNAr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2]>, - InstrItinData<IIC_iUNAsi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1]>, - InstrItinData<IIC_iUNAsr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1, 1]>, - // - // Compare instructions - InstrItinData<IIC_iCMPi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2]>, - InstrItinData<IIC_iCMPr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2]>, - InstrItinData<IIC_iCMPsi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1]>, - InstrItinData<IIC_iCMPsr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1, 1]>, - // - // Move instructions, unconditional - InstrItinData<IIC_iMOVi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [1]>, - InstrItinData<IIC_iMOVr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [1, 1]>, - InstrItinData<IIC_iMOVsi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [1, 1]>, - InstrItinData<IIC_iMOVsr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [1, 1, 1]>, - // - // Move instructions, conditional - InstrItinData<IIC_iCMOVi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2]>, - InstrItinData<IIC_iCMOVr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1]>, - InstrItinData<IIC_iCMOVsi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1]>, - InstrItinData<IIC_iCMOVsr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1, 1]>, - - // Integer multiply pipeline - // Result written in E5, but that is relative to the last cycle of multicycle, - // so we use 6 for those cases - // - InstrItinData<IIC_iMUL16 , [InstrStage<1, [FU_Pipe0]>], [5, 1, 1]>, - InstrItinData<IIC_iMAC16 , [InstrStage<1, [FU_Pipe1], 0>, - InstrStage<2, [FU_Pipe0]>], [6, 1, 1, 4]>, - InstrItinData<IIC_iMUL32 , [InstrStage<1, [FU_Pipe1], 0>, - InstrStage<2, [FU_Pipe0]>], [6, 1, 1]>, - InstrItinData<IIC_iMAC32 , [InstrStage<1, [FU_Pipe1], 0>, - InstrStage<2, [FU_Pipe0]>], [6, 1, 1, 4]>, - InstrItinData<IIC_iMUL64 , [InstrStage<2, [FU_Pipe1], 0>, - InstrStage<3, [FU_Pipe0]>], [6, 6, 1, 1]>, - InstrItinData<IIC_iMAC64 , [InstrStage<2, [FU_Pipe1], 0>, - InstrStage<3, [FU_Pipe0]>], [6, 6, 1, 1]>, - - // Integer load pipeline - // - // loads have an extra cycle of latency, but are fully pipelined - // use FU_Issue to enforce the 1 load/store per cycle limit - // - // Immediate offset - InstrItinData<IIC_iLoadi , [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 1]>, - // - // Register offset - InstrItinData<IIC_iLoadr , [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>, - // - // Scaled register offset, issues over 2 cycles - InstrItinData<IIC_iLoadsi , [InstrStage<2, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0], 0>, - InstrStage<1, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [4, 1, 1]>, - // - // Immediate offset with update - InstrItinData<IIC_iLoadiu , [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 2, 1]>, - // - // Register offset with update - InstrItinData<IIC_iLoadru , [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 2, 1, 1]>, - // - // Scaled register offset with update, issues over 2 cycles - InstrItinData<IIC_iLoadsiu , [InstrStage<2, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0], 0>, - InstrStage<1, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [4, 3, 1, 1]>, - // - // Load multiple - InstrItinData<IIC_iLoadm , [InstrStage<2, [FU_Issue], 0>, - InstrStage<2, [FU_Pipe0], 0>, - InstrStage<2, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>]>, - - // Integer store pipeline - // - // use FU_Issue to enforce the 1 load/store per cycle limit - // - // Immediate offset - InstrItinData<IIC_iStorei , [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 1]>, - // - // Register offset - InstrItinData<IIC_iStorer , [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>, - // - // Scaled register offset, issues over 2 cycles - InstrItinData<IIC_iStoresi , [InstrStage<2, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0], 0>, - InstrStage<1, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>, - // - // Immediate offset with update - InstrItinData<IIC_iStoreiu , [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [2, 3, 1]>, - // - // Register offset with update - InstrItinData<IIC_iStoreru , [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [2, 3, 1, 1]>, - // - // Scaled register offset with update, issues over 2 cycles - InstrItinData<IIC_iStoresiu, [InstrStage<2, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0], 0>, - InstrStage<1, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 3, 1, 1]>, - // - // Store multiple - InstrItinData<IIC_iStorem , [InstrStage<2, [FU_Issue], 0>, - InstrStage<2, [FU_Pipe0], 0>, - InstrStage<2, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>]>, - - // Branch - // - // no delay slots, so the latency of a branch is unimportant - InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>, - - // VFP - // Issue through integer pipeline, and execute in NEON unit. We assume - // RunFast mode so that NFP pipeline is used for single-precision when - // possible. - // - // FP Special Register to Integer Register File Move - InstrItinData<IIC_fpSTAT , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>]>, - // - // Single-precision FP Unary - InstrItinData<IIC_fpUNA32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [7, 1]>, - // - // Double-precision FP Unary - InstrItinData<IIC_fpUNA64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<4, [FU_NPipe], 0>, - InstrStage<4, [FU_NLSPipe]>], [4, 1]>, - // - // Single-precision FP Compare - InstrItinData<IIC_fpCMP32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [1, 1]>, - // - // Double-precision FP Compare - InstrItinData<IIC_fpCMP64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<4, [FU_NPipe], 0>, - InstrStage<4, [FU_NLSPipe]>], [4, 1]>, - // - // Single to Double FP Convert - InstrItinData<IIC_fpCVTSD , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<7, [FU_NPipe], 0>, - InstrStage<7, [FU_NLSPipe]>], [7, 1]>, - // - // Double to Single FP Convert - InstrItinData<IIC_fpCVTDS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<5, [FU_NPipe], 0>, - InstrStage<5, [FU_NLSPipe]>], [5, 1]>, - // - // Single-Precision FP to Integer Convert - InstrItinData<IIC_fpCVTSI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [7, 1]>, - // - // Double-Precision FP to Integer Convert - InstrItinData<IIC_fpCVTDI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<8, [FU_NPipe], 0>, - InstrStage<8, [FU_NLSPipe]>], [8, 1]>, - // - // Integer to Single-Precision FP Convert - InstrItinData<IIC_fpCVTIS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [7, 1]>, - // - // Integer to Double-Precision FP Convert - InstrItinData<IIC_fpCVTID , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<8, [FU_NPipe], 0>, - InstrStage<8, [FU_NLSPipe]>], [8, 1]>, - // - // Single-precision FP ALU - InstrItinData<IIC_fpALU32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [7, 1, 1]>, - // - // Double-precision FP ALU - InstrItinData<IIC_fpALU64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<9, [FU_NPipe], 0>, - InstrStage<9, [FU_NLSPipe]>], [9, 1, 1]>, - // - // Single-precision FP Multiply - InstrItinData<IIC_fpMUL32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [7, 1, 1]>, - // - // Double-precision FP Multiply - InstrItinData<IIC_fpMUL64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<11, [FU_NPipe], 0>, - InstrStage<11, [FU_NLSPipe]>], [11, 1, 1]>, - // - // Single-precision FP MAC - InstrItinData<IIC_fpMAC32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [7, 2, 1, 1]>, - // - // Double-precision FP MAC - InstrItinData<IIC_fpMAC64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<19, [FU_NPipe], 0>, - InstrStage<19, [FU_NLSPipe]>], [19, 2, 1, 1]>, - // - // Single-precision FP DIV - InstrItinData<IIC_fpDIV32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<20, [FU_NPipe], 0>, - InstrStage<20, [FU_NLSPipe]>], [20, 1, 1]>, - // - // Double-precision FP DIV - InstrItinData<IIC_fpDIV64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<29, [FU_NPipe], 0>, - InstrStage<29, [FU_NLSPipe]>], [29, 1, 1]>, - // - // Single-precision FP SQRT - InstrItinData<IIC_fpSQRT32, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<19, [FU_NPipe], 0>, - InstrStage<19, [FU_NLSPipe]>], [19, 1]>, - // - // Double-precision FP SQRT - InstrItinData<IIC_fpSQRT64, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<29, [FU_NPipe], 0>, - InstrStage<29, [FU_NLSPipe]>], [29, 1]>, - // - // Single-precision FP Load - // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpLoad32, [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, - // - // Double-precision FP Load - // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpLoad64, [InstrStage<2, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0], 0>, - InstrStage<1, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, - // - // FP Load Multiple - // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpLoadm, [InstrStage<3, [FU_Issue], 0>, - InstrStage<2, [FU_Pipe0], 0>, - InstrStage<2, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, - // - // Single-precision FP Store - // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpStore32,[InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, - // - // Double-precision FP Store - // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpStore64,[InstrStage<2, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0], 0>, - InstrStage<1, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, - // - // FP Store Multiple - // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpStorem, [InstrStage<3, [FU_Issue], 0>, - InstrStage<2, [FU_Pipe0], 0>, - InstrStage<2, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, - - // NEON - // Issue through integer pipeline, and execute in NEON unit. - // - // VLD1 - InstrItinData<IIC_VLD1, [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, - // - // VLD2 - InstrItinData<IIC_VLD2, [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>], [2, 2, 1]>, - // - // VLD3 - InstrItinData<IIC_VLD3, [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>], [2, 2, 2, 1]>, - // - // VLD4 - InstrItinData<IIC_VLD4, [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>], [2, 2, 2, 2, 1]>, - // - // VST - InstrItinData<IIC_VST, [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, - // - // Double-register FP Unary - InstrItinData<IIC_VUNAD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [5, 2]>, - // - // Quad-register FP Unary - // Result written in N5, but that is relative to the last cycle of multicycle, - // so we use 6 for those cases - InstrItinData<IIC_VUNAQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [6, 2]>, - // - // Double-register FP Binary - InstrItinData<IIC_VBIND, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [5, 2, 2]>, - // - // Quad-register FP Binary - // Result written in N5, but that is relative to the last cycle of multicycle, - // so we use 6 for those cases - InstrItinData<IIC_VBINQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [6, 2, 2]>, - // - // Move Immediate - InstrItinData<IIC_VMOVImm, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [3]>, - // - // Double-register Permute Move - InstrItinData<IIC_VMOVD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>], [2, 1]>, - // - // Quad-register Permute Move - // Result written in N2, but that is relative to the last cycle of multicycle, - // so we use 3 for those cases - InstrItinData<IIC_VMOVQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NLSPipe]>], [3, 1]>, - // - // Integer to Single-precision Move - InstrItinData<IIC_VMOVIS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>], [2, 1]>, - // - // Integer to Double-precision Move - InstrItinData<IIC_VMOVID , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>], [2, 1, 1]>, - // - // Single-precision to Integer Move - InstrItinData<IIC_VMOVSI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>], [20, 1]>, - // - // Double-precision to Integer Move - InstrItinData<IIC_VMOVDI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>], [20, 20, 1]>, - // - // Integer to Lane Move - InstrItinData<IIC_VMOVISL , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NLSPipe]>], [3, 1, 1]>, - // - // Double-register Permute - InstrItinData<IIC_VPERMD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>], [2, 2, 1, 1]>, - // - // Quad-register Permute - // Result written in N2, but that is relative to the last cycle of multicycle, - // so we use 3 for those cases - InstrItinData<IIC_VPERMQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NLSPipe]>], [3, 3, 1, 1]>, - // - // Quad-register Permute (3 cycle issue) - // Result written in N2, but that is relative to the last cycle of multicycle, - // so we use 4 for those cases - InstrItinData<IIC_VPERMQ3, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>, - InstrStage<1, [FU_NPipe], 0>, - InstrStage<2, [FU_NLSPipe]>], [4, 4, 1, 1]>, - // - // Double-register FP Multiple-Accumulate - InstrItinData<IIC_VMACD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [9, 2, 2, 3]>, - // - // Quad-register FP Multiple-Accumulate - // Result written in N9, but that is relative to the last cycle of multicycle, - // so we use 10 for those cases - InstrItinData<IIC_VMACQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [10, 2, 2, 3]>, - // - // Double-register Reciprical Step - InstrItinData<IIC_VRECSD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [9, 2, 2]>, - // - // Quad-register Reciprical Step - InstrItinData<IIC_VRECSQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [10, 2, 2]>, - // - // Double-register Integer Count - InstrItinData<IIC_VCNTiD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [3, 2, 2]>, - // - // Quad-register Integer Count - // Result written in N3, but that is relative to the last cycle of multicycle, - // so we use 4 for those cases - InstrItinData<IIC_VCNTiQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [4, 2, 2]>, - // - // Double-register Integer Unary - InstrItinData<IIC_VUNAiD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 2]>, - // - // Quad-register Integer Unary - InstrItinData<IIC_VUNAiQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 2]>, - // - // Double-register Integer Q-Unary - InstrItinData<IIC_VQUNAiD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 1]>, - // - // Quad-register Integer CountQ-Unary - InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 1]>, - // - // Double-register Integer Binary - InstrItinData<IIC_VBINiD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [3, 2, 2]>, - // - // Quad-register Integer Binary - InstrItinData<IIC_VBINiQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [3, 2, 2]>, - // - // Double-register Integer Binary (4 cycle) - InstrItinData<IIC_VBINi4D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 2, 1]>, - // - // Quad-register Integer Binary (4 cycle) - InstrItinData<IIC_VBINi4Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 2, 1]>, - // - // Double-register Integer Subtract - InstrItinData<IIC_VSUBiD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [3, 2, 1]>, - // - // Quad-register Integer Subtract - InstrItinData<IIC_VSUBiQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [3, 2, 1]>, - // - // Double-register Integer Shift - InstrItinData<IIC_VSHLiD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [3, 1, 1]>, - // - // Quad-register Integer Shift - InstrItinData<IIC_VSHLiQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [4, 1, 1]>, - // - // Double-register Integer Shift (4 cycle) - InstrItinData<IIC_VSHLi4D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 1, 1]>, - // - // Quad-register Integer Shift (4 cycle) - InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [5, 1, 1]>, - // - // Double-register Integer Pair Add Long - InstrItinData<IIC_VPALiD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [6, 3, 2, 1]>, - // - // Quad-register Integer Pair Add Long - InstrItinData<IIC_VPALiQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [7, 3, 2, 1]>, - // - // Double-register Integer Multiply (.8, .16) - InstrItinData<IIC_VMULi16D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [6, 2, 2]>, - // - // Double-register Integer Multiply (.32) - InstrItinData<IIC_VMULi32D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [7, 2, 1]>, - // - // Quad-register Integer Multiply (.8, .16) - InstrItinData<IIC_VMULi16Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [7, 2, 2]>, - // - // Quad-register Integer Multiply (.32) - InstrItinData<IIC_VMULi32Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>, - InstrStage<2, [FU_NLSPipe], 0>, - InstrStage<3, [FU_NPipe]>], [9, 2, 1]>, - // - // Double-register Integer Multiply-Accumulate (.8, .16) - InstrItinData<IIC_VMACi16D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [6, 2, 2, 3]>, - // - // Double-register Integer Multiply-Accumulate (.32) - InstrItinData<IIC_VMACi32D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [7, 2, 1, 3]>, - // - // Quad-register Integer Multiply-Accumulate (.8, .16) - InstrItinData<IIC_VMACi16Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [7, 2, 2, 3]>, - // - // Quad-register Integer Multiply-Accumulate (.32) - InstrItinData<IIC_VMACi32Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>, - InstrStage<2, [FU_NLSPipe], 0>, - InstrStage<3, [FU_NPipe]>], [9, 2, 1, 3]>, - // - // Double-register VEXT - InstrItinData<IIC_VEXTD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>], [2, 1, 1]>, - // - // Quad-register VEXT - InstrItinData<IIC_VEXTQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NLSPipe]>], [3, 1, 1]>, - // - // VTB - InstrItinData<IIC_VTB1, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NLSPipe]>], [3, 2, 1]>, - InstrItinData<IIC_VTB2, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NLSPipe]>], [3, 2, 2, 1]>, - InstrItinData<IIC_VTB3, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>, - InstrStage<1, [FU_NPipe], 0>, - InstrStage<2, [FU_NLSPipe]>], [4, 2, 2, 3, 1]>, - InstrItinData<IIC_VTB4, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>, - InstrStage<1, [FU_NPipe], 0>, - InstrStage<2, [FU_NLSPipe]>], [4, 2, 2, 3, 3, 1]>, - // - // VTBX - InstrItinData<IIC_VTBX1, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NLSPipe]>], [3, 1, 2, 1]>, - InstrItinData<IIC_VTBX2, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NLSPipe]>], [3, 1, 2, 2, 1]>, - InstrItinData<IIC_VTBX3, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>, - InstrStage<1, [FU_NPipe], 0>, - InstrStage<2, [FU_NLSPipe]>], [4, 1, 2, 2, 3, 1]>, - InstrItinData<IIC_VTBX4, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>, - InstrStage<1, [FU_NPipe], 0>, - InstrStage<2, [FU_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]> -]>; diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp new file mode 100644 index 0000000..c04ee38 --- /dev/null +++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -0,0 +1,22 @@ +//===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ARMSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "arm-selectiondag-info" +#include "ARMSelectionDAGInfo.h" +using namespace llvm; + +ARMSelectionDAGInfo::ARMSelectionDAGInfo() { +} + +ARMSelectionDAGInfo::~ARMSelectionDAGInfo() { +} diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.h b/lib/Target/ARM/ARMSelectionDAGInfo.h new file mode 100644 index 0000000..afe9a47 --- /dev/null +++ b/lib/Target/ARM/ARMSelectionDAGInfo.h @@ -0,0 +1,29 @@ +//===-- ARMSelectionDAGInfo.h - ARM SelectionDAG Info -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the ARM subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMSELECTIONDAGINFO_H +#define ARMSELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class ARMSelectionDAGInfo : public TargetSelectionDAGInfo { +public: + ARMSelectionDAGInfo(); + ~ARMSelectionDAGInfo(); +}; + +} + +#endif diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 9e55cd8..b11580a 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -116,7 +116,8 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol. bool -ARMSubtarget::GVIsIndirectSymbol(GlobalValue *GV, Reloc::Model RelocM) const { +ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV, + Reloc::Model RelocM) const { if (RelocM == Reloc::Static) return false; diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index fa56a91..288a19a 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -160,7 +160,7 @@ protected: /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect /// symbol. - bool GVIsIndirectSymbol(GlobalValue *GV, Reloc::Model RelocM) const; + bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const; }; } // End llvm namespace diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 95f57b7..662e61e 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -102,8 +102,12 @@ bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM, bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { // FIXME: temporarily disabling load / store optimization pass for Thumb1. - if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only()) - PM.add(createARMLoadStoreOptimizationPass()); + if (OptLevel != CodeGenOpt::None) { + if (!Subtarget.isThumb1Only()) + PM.add(createARMLoadStoreOptimizationPass()); + if (Subtarget.hasNEON()) + PM.add(createNEONMoveFixPass()); + } // Expand some pseudo instructions into multiple instructions to allow // proper scheduling. @@ -118,8 +122,6 @@ bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM, if (OptLevel != CodeGenOpt::None) { if (!Subtarget.isThumb1Only()) PM.add(createIfConverterPass()); - if (Subtarget.hasNEON()) - PM.add(createNEONMoveFixPass()); } if (Subtarget.isThumb2()) { diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index c32f16c..4e205df 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -71,8 +71,8 @@ public: return &InstrInfo.getRegisterInfo(); } - virtual ARMTargetLowering *getTargetLowering() const { - return const_cast<ARMTargetLowering*>(&TLInfo); + virtual const ARMTargetLowering *getTargetLowering() const { + return &TLInfo; } virtual const ARMInstrInfo *getInstrInfo() const { return &InstrInfo; } @@ -97,8 +97,8 @@ public: return &InstrInfo->getRegisterInfo(); } - virtual ARMTargetLowering *getTargetLowering() const { - return const_cast<ARMTargetLowering*>(&TLInfo); + virtual const ARMTargetLowering *getTargetLowering() const { + return &TLInfo; } /// returns either Thumb1InstrInfo or Thumb2InstrInfo diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp index 680d032..091a3b3 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.cpp +++ b/lib/Target/ARM/ARMTargetObjectFile.cpp @@ -9,6 +9,7 @@ #include "ARMTargetObjectFile.h" #include "ARMSubtarget.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/Support/Dwarf.h" #include "llvm/Target/TargetMachine.h" @@ -25,12 +26,14 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, if (TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI()) { StaticCtorSection = - getELFSection(".init_array", MCSectionELF::SHT_INIT_ARRAY, - MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, - SectionKind::getDataRel()); + getContext().getELFSection(".init_array", MCSectionELF::SHT_INIT_ARRAY, + MCSectionELF::SHF_WRITE | + MCSectionELF::SHF_ALLOC, + SectionKind::getDataRel()); StaticDtorSection = - getELFSection(".fini_array", MCSectionELF::SHT_FINI_ARRAY, - MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, - SectionKind::getDataRel()); + getContext().getELFSection(".fini_array", MCSectionELF::SHT_FINI_ARRAY, + MCSectionELF::SHF_WRITE | + MCSectionELF::SHF_ALLOC, + SectionKind::getDataRel()); } } diff --git a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp new file mode 100644 index 0000000..f859d1b --- /dev/null +++ b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp @@ -0,0 +1,140 @@ +//===-- ARMAsmLexer.cpp - Tokenize ARM assembly to AsmTokens --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "ARMTargetMachine.h" + +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" + +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" + +#include "llvm/Target/TargetAsmLexer.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegistry.h" + +#include <string> +#include <map> + +using namespace llvm; + +namespace { + + class ARMBaseAsmLexer : public TargetAsmLexer { + const MCAsmInfo &AsmInfo; + + const AsmToken &lexDefinite() { + return getLexer()->Lex(); + } + + AsmToken LexTokenUAL(); + protected: + typedef std::map <std::string, unsigned> rmap_ty; + + rmap_ty RegisterMap; + + void InitRegisterMap(const TargetRegisterInfo *info) { + unsigned numRegs = info->getNumRegs(); + + for (unsigned i = 0; i < numRegs; ++i) { + const char *regName = info->getName(i); + if (regName) + RegisterMap[regName] = i; + } + } + + unsigned MatchRegisterName(StringRef Name) { + rmap_ty::iterator iter = RegisterMap.find(Name.str()); + if (iter != RegisterMap.end()) + return iter->second; + else + return 0; + } + + AsmToken LexToken() { + if (!Lexer) { + SetError(SMLoc(), "No MCAsmLexer installed"); + return AsmToken(AsmToken::Error, "", 0); + } + + switch (AsmInfo.getAssemblerDialect()) { + default: + SetError(SMLoc(), "Unhandled dialect"); + return AsmToken(AsmToken::Error, "", 0); + case 0: + return LexTokenUAL(); + } + } + public: + ARMBaseAsmLexer(const Target &T, const MCAsmInfo &MAI) + : TargetAsmLexer(T), AsmInfo(MAI) { + } + }; + + class ARMAsmLexer : public ARMBaseAsmLexer { + public: + ARMAsmLexer(const Target &T, const MCAsmInfo &MAI) + : ARMBaseAsmLexer(T, MAI) { + std::string tripleString("arm-unknown-unknown"); + std::string featureString; + OwningPtr<const TargetMachine> + targetMachine(T.createTargetMachine(tripleString, featureString)); + InitRegisterMap(targetMachine->getRegisterInfo()); + } + }; + + class ThumbAsmLexer : public ARMBaseAsmLexer { + public: + ThumbAsmLexer(const Target &T, const MCAsmInfo &MAI) + : ARMBaseAsmLexer(T, MAI) { + std::string tripleString("thumb-unknown-unknown"); + std::string featureString; + OwningPtr<const TargetMachine> + targetMachine(T.createTargetMachine(tripleString, featureString)); + InitRegisterMap(targetMachine->getRegisterInfo()); + } + }; +} + +AsmToken ARMBaseAsmLexer::LexTokenUAL() { + const AsmToken &lexedToken = lexDefinite(); + + switch (lexedToken.getKind()) { + default: + return AsmToken(lexedToken); + case AsmToken::Error: + SetError(Lexer->getErrLoc(), Lexer->getErr()); + return AsmToken(lexedToken); + case AsmToken::Identifier: + { + std::string upperCase = lexedToken.getString().str(); + std::string lowerCase = LowercaseString(upperCase); + StringRef lowerRef(lowerCase); + + unsigned regID = MatchRegisterName(lowerRef); + + if (regID) { + return AsmToken(AsmToken::Register, + lexedToken.getString(), + static_cast<int64_t>(regID)); + } else { + return AsmToken(lexedToken); + } + } + } +} + +extern "C" void LLVMInitializeARMAsmLexer() { + RegisterAsmLexer<ARMAsmLexer> X(TheARMTarget); + RegisterAsmLexer<ThumbAsmLexer> Y(TheThumbTarget); +} + diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index cf55377..bfa89c4 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -812,8 +812,11 @@ bool ARMAsmParser::ParseDirectiveCode(SMLoc L) { return false; } +extern "C" void LLVMInitializeARMAsmLexer(); + /// Force static initialization. extern "C" void LLVMInitializeARMAsmParser() { RegisterAsmParser<ARMAsmParser> X(TheARMTarget); RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget); + LLVMInitializeARMAsmLexer(); } diff --git a/lib/Target/ARM/AsmParser/CMakeLists.txt b/lib/Target/ARM/AsmParser/CMakeLists.txt index 308c6cf..9ba7c01 100644 --- a/lib/Target/ARM/AsmParser/CMakeLists.txt +++ b/lib/Target/ARM/AsmParser/CMakeLists.txt @@ -1,6 +1,7 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) add_llvm_library(LLVMARMAsmParser + ARMAsmLexer.cpp ARMAsmParser.cpp ) diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index 15c5294..80a9d2d 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -21,6 +21,7 @@ #include "ARMMachineFunctionInfo.h" #include "ARMMCInstLower.h" #include "ARMTargetMachine.h" +#include "llvm/Analysis/DebugInfo.h" #include "llvm/Constants.h" #include "llvm/Module.h" #include "llvm/Type.h" @@ -239,7 +240,7 @@ namespace { } else if (ACPV->isBlockAddress()) { O << *GetBlockAddressSymbol(ACPV->getBlockAddress()); } else if (ACPV->isGlobalValue()) { - GlobalValue *GV = ACPV->getGV(); + const GlobalValue *GV = ACPV->getGV(); bool isIndirect = Subtarget->isTargetDarwin() && Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel()); if (!isIndirect) @@ -352,7 +353,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, return; case MachineOperand::MO_GlobalAddress: { bool isCallOp = Modifier && !strcmp(Modifier, "call"); - GlobalValue *GV = MO.getGlobal(); + const GlobalValue *GV = MO.getGlobal(); if ((Modifier && strcmp(Modifier, "lo16") == 0) || (TF & ARMII::MO_LO16)) @@ -504,7 +505,6 @@ void ARMAsmPrinter::printAddrMode2OffsetOperand(const MachineInstr *MI, int Op, if (!MO1.getReg()) { unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm()); - assert(ImmOffs && "Malformed indexed load / store!"); O << "#" << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) << ImmOffs; @@ -556,7 +556,6 @@ void ARMAsmPrinter::printAddrMode3OffsetOperand(const MachineInstr *MI, int Op, } unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm()); - assert(ImmOffs && "Malformed indexed load / store!"); O << "#" << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) << ImmOffs; @@ -1110,6 +1109,24 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { SmallString<128> Str; raw_svector_ostream OS(Str); + if (MI->getOpcode() == ARM::DBG_VALUE) { + unsigned NOps = MI->getNumOperands(); + assert(NOps==4); + OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; + // cast away const; DIetc do not take const operands for some reason. + DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata())); + OS << V.getName(); + OS << " <- "; + // Frame address. Currently handles register +- offset only. + assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm()); + OS << '['; printOperand(MI, 0, OS); OS << '+'; printOperand(MI, 1, OS); + OS << ']'; + OS << "+"; + printOperand(MI, NOps-2, OS); + OutStreamer.EmitRawText(OS.str()); + return; + } + printInstruction(MI, OS); OutStreamer.EmitRawText(OS.str()); @@ -1129,22 +1146,23 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { // avoid out-of-range branches that are due a fundamental limitation of // the way symbol offsets are encoded with the current Darwin ARM // relocations. - TargetLoweringObjectFileMachO &TLOFMacho = - static_cast<TargetLoweringObjectFileMachO &>(getObjFileLowering()); + const TargetLoweringObjectFileMachO &TLOFMacho = + static_cast<const TargetLoweringObjectFileMachO &>( + getObjFileLowering()); OutStreamer.SwitchSection(TLOFMacho.getTextSection()); OutStreamer.SwitchSection(TLOFMacho.getTextCoalSection()); OutStreamer.SwitchSection(TLOFMacho.getConstTextCoalSection()); if (RelocM == Reloc::DynamicNoPIC) { const MCSection *sect = - TLOFMacho.getMachOSection("__TEXT", "__symbol_stub4", - MCSectionMachO::S_SYMBOL_STUBS, - 12, SectionKind::getText()); + OutContext.getMachOSection("__TEXT", "__symbol_stub4", + MCSectionMachO::S_SYMBOL_STUBS, + 12, SectionKind::getText()); OutStreamer.SwitchSection(sect); } else { const MCSection *sect = - TLOFMacho.getMachOSection("__TEXT", "__picsymbolstub4", - MCSectionMachO::S_SYMBOL_STUBS, - 16, SectionKind::getText()); + OutContext.getMachOSection("__TEXT", "__picsymbolstub4", + MCSectionMachO::S_SYMBOL_STUBS, + 16, SectionKind::getText()); OutStreamer.SwitchSection(sect); } } @@ -1201,8 +1219,8 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { if (Subtarget->isTargetDarwin()) { // All darwin targets use mach-o. - TargetLoweringObjectFileMachO &TLOFMacho = - static_cast<TargetLoweringObjectFileMachO &>(getObjFileLowering()); + const TargetLoweringObjectFileMachO &TLOFMacho = + static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering()); MachineModuleInfoMachO &MMIMacho = MMI->getObjFileInfo<MachineModuleInfoMachO>(); diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp index ef5ead6..ac6331f 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp @@ -18,6 +18,7 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -330,7 +331,6 @@ void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI, if (!MO1.getReg()) { unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm()); - assert(ImmOffs && "Malformed indexed load / store!"); O << '#' << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) << ImmOffs; @@ -380,7 +380,6 @@ void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI, } unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm()); - assert(ImmOffs && "Malformed indexed load / store!"); O << '#' << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) << ImmOffs; @@ -779,3 +778,22 @@ void ARMInstPrinter::printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum, O << '#' << MI->getOperand(OpNum).getImm(); } +void ARMInstPrinter::printHex8ImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xff); +} + +void ARMInstPrinter::printHex16ImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffff); +} + +void ARMInstPrinter::printHex32ImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffffffff); +} + +void ARMInstPrinter::printHex64ImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm()); +} diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h index dd006fc..be0b7c1 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h @@ -104,10 +104,10 @@ public: void printNoHashImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printHex8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {} - void printHex16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {} - void printHex32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {} - void printHex64ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {} + void printHex8ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printHex16ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printHex32ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printHex64ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O); // FIXME: Implement. diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index bbc0095..29e66e1 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -10,6 +10,7 @@ tablegen(ARMGenAsmWriter.inc -gen-asm-writer) tablegen(ARMGenDAGISel.inc -gen-dag-isel) tablegen(ARMGenCallingConv.inc -gen-callingconv) tablegen(ARMGenSubtarget.inc -gen-subtarget) +tablegen(ARMGenEDInfo.inc -gen-enhanced-disassembly-info) add_llvm_target(ARMCodeGen ARMBaseInstrInfo.cpp @@ -36,6 +37,7 @@ add_llvm_target(ARMCodeGen Thumb2InstrInfo.cpp Thumb2RegisterInfo.cpp Thumb2SizeReduction.cpp + ARMSelectionDAGInfo.cpp ) target_link_libraries (LLVMARMCodeGen LLVMSelectionDAG) diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 47c3104..4de697e 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -18,6 +18,7 @@ #include "ARMDisassembler.h" #include "ARMDisassemblerCore.h" +#include "llvm/MC/EDInstInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/Target/TargetRegistry.h" #include "llvm/Support/Debug.h" @@ -38,7 +39,9 @@ /// #include "../ARMGenDecoderTables.inc" -namespace llvm { +#include "../ARMGenEDInfo.inc" + +using namespace llvm; /// showBitVector - Use the raw_ostream to log a diagnostic message describing /// the inidividual bits of the instruction. @@ -247,27 +250,27 @@ static unsigned T2Morph2LoadLiteral(unsigned Opcode) { case ARM::t2LDR_POST: case ARM::t2LDR_PRE: case ARM::t2LDRi12: case ARM::t2LDRi8: - case ARM::t2LDRs: + case ARM::t2LDRs: case ARM::t2LDRT: return ARM::t2LDRpci; case ARM::t2LDRB_POST: case ARM::t2LDRB_PRE: case ARM::t2LDRBi12: case ARM::t2LDRBi8: - case ARM::t2LDRBs: + case ARM::t2LDRBs: case ARM::t2LDRBT: return ARM::t2LDRBpci; case ARM::t2LDRH_POST: case ARM::t2LDRH_PRE: case ARM::t2LDRHi12: case ARM::t2LDRHi8: - case ARM::t2LDRHs: + case ARM::t2LDRHs: case ARM::t2LDRHT: return ARM::t2LDRHpci; case ARM::t2LDRSB_POST: case ARM::t2LDRSB_PRE: case ARM::t2LDRSBi12: case ARM::t2LDRSBi8: - case ARM::t2LDRSBs: + case ARM::t2LDRSBs: case ARM::t2LDRSBT: return ARM::t2LDRSBpci; case ARM::t2LDRSH_POST: case ARM::t2LDRSH_PRE: case ARM::t2LDRSHi12: case ARM::t2LDRSHi8: - case ARM::t2LDRSHs: + case ARM::t2LDRSHs: case ARM::t2LDRSHT: return ARM::t2LDRSHpci; } } @@ -404,7 +407,6 @@ bool ARMDisassembler::getInstruction(MCInst &MI, }); ARMBasicMCBuilder *Builder = CreateMCBuilder(Opcode, Format); - if (!Builder) return false; @@ -492,11 +494,11 @@ bool ThumbDisassembler::getInstruction(MCInst &MI, }); ARMBasicMCBuilder *Builder = CreateMCBuilder(Opcode, Format); - Builder->setSession(const_cast<Session *>(&SO)); - if (!Builder) return false; + Builder->SetSession(const_cast<Session *>(&SO)); + if (!Builder->Build(MI, insn)) return false; @@ -506,17 +508,37 @@ bool ThumbDisassembler::getInstruction(MCInst &MI, } // A8.6.50 +// Valid return values are {1, 2, 3, 4}, with 0 signifying an error condition. static unsigned short CountITSize(unsigned ITMask) { // First count the trailing zeros of the IT mask. unsigned TZ = CountTrailingZeros_32(ITMask); - assert(TZ <= 3 && "Encoding error"); + if (TZ > 3) { + DEBUG(errs() << "Encoding error: IT Mask '0000'"); + return 0; + } return (4 - TZ); } -/// Init ITState. -void Session::InitIT(unsigned short bits7_0) { +/// Init ITState. Note that at least one bit is always 1 in mask. +bool Session::InitIT(unsigned short bits7_0) { ITCounter = CountITSize(slice(bits7_0, 3, 0)); + if (ITCounter == 0) + return false; + + // A8.6.50 IT + unsigned short FirstCond = slice(bits7_0, 7, 4); + if (FirstCond == 0xF) { + DEBUG(errs() << "Encoding error: IT FirstCond '1111'"); + return false; + } + if (FirstCond == 0xE && ITCounter != 1) { + DEBUG(errs() << "Encoding error: IT FirstCond '1110' && Mask != '1000'"); + return false; + } + ITState = bits7_0; + + return true; } /// Update ITState if necessary. @@ -547,4 +569,10 @@ extern "C" void LLVMInitializeARMDisassembler() { createThumbDisassembler); } -} // namespace llvm +EDInstInfo *ARMDisassembler::getEDInfo() const { + return instInfoARM; +} + +EDInstInfo *ThumbDisassembler::getEDInfo() const { + return instInfoARM; +} diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.h b/lib/Target/ARM/Disassembler/ARMDisassembler.h index 44592e0..0a74a38 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.h +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.h @@ -24,6 +24,8 @@ class MCInst; class MemoryObject; class raw_ostream; +struct EDInstInfo; + /// ARMDisassembler - ARM disassembler for all ARM platforms. class ARMDisassembler : public MCDisassembler { public: @@ -42,6 +44,9 @@ public: const MemoryObject ®ion, uint64_t address, raw_ostream &vStream) const; + + /// getEDInfo - See MCDisassembler. + EDInstInfo *getEDInfo() const; private: }; @@ -55,7 +60,7 @@ public: Session() : ITCounter(0), ITState(0) {} ~Session() {} /// InitIT - Initializes ITCounter/ITState. - void InitIT(unsigned short bits7_0); + bool InitIT(unsigned short bits7_0); /// UpdateIT - Updates ITCounter/ITState as IT Block progresses. void UpdateIT(); @@ -82,6 +87,9 @@ public: const MemoryObject ®ion, uint64_t address, raw_ostream &vStream) const; + + /// getEDInfo - See MCDisassembler. + EDInstInfo *getEDInfo() const; private: Session SO; }; diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp index db921ef..adb7795 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp @@ -13,8 +13,12 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "arm-disassembler" + #include "ARMDisassemblerCore.h" #include "ARMAddressingModes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" /// ARMGenInstrInfo.inc - ARMGenInstrInfo.inc contains the static const /// TargetInstrDesc ARMInsts[] definition and the TargetOperandInfo[]'s @@ -75,7 +79,7 @@ const char *ARMUtils::OpcodeName(unsigned Opcode) { // Return the register enum Based on RegClass and the raw register number. // For DRegPair, see comments below. // FIXME: Auto-gened? -static unsigned getRegisterEnum(unsigned RegClassID, unsigned RawRegister, +static unsigned getRegisterEnum(BO B, unsigned RegClassID, unsigned RawRegister, bool DRegPair = false) { if (DRegPair && RegClassID == ARM::QPRRegClassID) { @@ -345,7 +349,9 @@ static unsigned getRegisterEnum(unsigned RegClassID, unsigned RawRegister, } break; } - assert(0 && "Invalid (RegClassID, RawRegister) combination"); + DEBUG(errs() << "Invalid (RegClassID, RawRegister) combination\n"); + // Encoding error. Mark the builder with error code != 0. + B->SetErr(-1); return 0; } @@ -509,7 +515,7 @@ static bool DisassemblePseudo(MCInst &MI, unsigned Opcode, uint32_t insn, // Inst{3-0} => Rm // Inst{11-8} => Rs static bool DisassembleMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; unsigned short NumDefs = TID.getNumDefs(); @@ -529,26 +535,26 @@ static bool DisassembleMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (NumDefs == 2) { assert(NumOps >= 4 && OpInfo[3].RegClass == ARM::GPRRegClassID && "Expect 4th register operand"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); ++OpIdx; } // The destination register: RdHi{19-16} or Rd{19-16}. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); // The two src regsiters: Rn{3-0}, then Rm{11-8}. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRs(insn)))); OpIdx += 3; // Many multiply instructions (e.g., MLA) have three src registers. // The third register operand is Ra{15-12}. if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); ++OpIdx; } @@ -610,7 +616,7 @@ static inline unsigned GetCopOpc(uint32_t insn) { // and friends // static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert(NumOps >= 5 && "Num of operands >= 5 for coprocessor instr"); @@ -631,7 +637,7 @@ static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MCOperand::CreateImm(decodeRd(insn))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); if (PW) { @@ -651,11 +657,11 @@ static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(NoGPR ? MCOperand::CreateImm(decodeRd(insn)) : MCOperand::CreateReg( - getRegisterEnum(ARM::GPRRegClassID, + getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); MI.addOperand(OneCopOpc ? MCOperand::CreateReg( - getRegisterEnum(ARM::GPRRegClassID, + getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn))) : MCOperand::CreateImm(decodeRn(insn))); @@ -688,18 +694,19 @@ static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn, // SRSW/SRS: addrmode4:$addr mode_imm // RFEW/RFE: addrmode4:$addr Rn static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { if (CoprocessorOpcode(Opcode)) - return DisassembleCoprocessor(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleCoprocessor(MI, Opcode, insn, NumOps, NumOpsAdded, B); const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + if (!OpInfo) return false; // MRS and MRSsys take one GPR reg Rd. if (Opcode == ARM::MRS || Opcode == ARM::MRSsys) { assert(NumOps >= 1 && OpInfo[0].RegClass == ARM::GPRRegClassID && "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); NumOpsAdded = 1; return true; @@ -708,7 +715,7 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (Opcode == ARM::BXJ) { assert(NumOps >= 1 && OpInfo[0].RegClass == ARM::GPRRegClassID && "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); NumOpsAdded = 1; return true; @@ -717,7 +724,7 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (Opcode == ARM::MSR || Opcode == ARM::MSRsys) { assert(NumOps >= 1 && OpInfo[0].RegClass == ARM::GPRRegClassID && "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 16))); NumOpsAdded = 2; @@ -748,7 +755,7 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (Opcode == ARM::SRSW || Opcode == ARM::SRS) MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); else - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); NumOpsAdded = 3; return true; @@ -791,9 +798,11 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // BLXr9, BXr9 // BRIND, BX_RET static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + if (!OpInfo) return false; + unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -806,7 +815,7 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (Opcode == ARM::BLXr9 || Opcode == ARM::BRIND) { assert(NumOps >= 1 && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); OpIdx = 1; return true; @@ -817,9 +826,9 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // InOperandList with GPR:$target and GPR:$idx regs. assert(NumOps == 4 && "Expect 4 operands"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); // Fill in the two remaining imm operands to signify build completion. @@ -835,7 +844,7 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // InOperandList with GPR::$target reg. assert(NumOps == 3 && "Expect 3 operands"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); // Fill in the two remaining imm operands to signify build completion. @@ -852,13 +861,13 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // See also ARMAddressingModes.h (Addressing Mode #2). assert(NumOps == 5 && getIBit(insn) == 1 && "Expect 5 operands && I-bit=1"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub; // Disassemble the offset reg (Rm), shift type, and immediate shift length. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); // Inst{6-5} encodes the shift opcode. ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5)); @@ -882,14 +891,19 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, return false; } -static inline uint32_t getBFCInvMask(uint32_t insn) { +static inline bool getBFCInvMask(uint32_t insn, uint32_t &mask) { uint32_t lsb = slice(insn, 11, 7); uint32_t msb = slice(insn, 20, 16); uint32_t Val = 0; - assert(lsb <= msb && "Encoding error: lsb > msb"); + if (msb < lsb) { + DEBUG(errs() << "Encoding error: msb < lsb\n"); + return false; + } + for (uint32_t i = lsb; i <= msb; ++i) Val |= (1 << i); - return ~Val; + mask = ~Val; + return true; } static inline bool SaturateOpcode(unsigned Opcode) { @@ -924,7 +938,7 @@ static inline unsigned decodeSaturatePos(unsigned Opcode, uint32_t insn) { // operations have Rd Rm Rn, instead of the "normal" Rd Rn Rm. // They are QADD, QDADD, QDSUB, and QSUB. static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; unsigned short NumDefs = TID.getNumDefs(); @@ -936,7 +950,7 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Disassemble register def if there is one. if (NumDefs && (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID)) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); ++OpIdx; } @@ -949,7 +963,7 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (SaturateOpcode(Opcode)) { MI.addOperand(MCOperand::CreateImm(decodeSaturatePos(Opcode, insn))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); if (Opcode == ARM::SSAT16 || Opcode == ARM::USAT16) { @@ -977,14 +991,18 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (Opcode == ARM::BFC || Opcode == ARM::BFI) { // TIED_TO operand skipped for BFC and Inst{3-0} (Reg) for BFI. MI.addOperand(MCOperand::CreateReg(Opcode == ARM::BFC ? 0 - : getRegisterEnum(ARM::GPRRegClassID, + : getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); - MI.addOperand(MCOperand::CreateImm(getBFCInvMask(insn))); + uint32_t mask = 0; + if (!getBFCInvMask(insn, mask)) + return false; + + MI.addOperand(MCOperand::CreateImm(mask)); OpIdx += 2; return true; } if (Opcode == ARM::SBFX || Opcode == ARM::UBFX) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); MI.addOperand(MCOperand::CreateImm(slice(insn, 11, 7))); MI.addOperand(MCOperand::CreateImm(slice(insn, 20, 16) + 1)); @@ -1000,7 +1018,7 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && "Reg operand expected"); MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(ARM::GPRRegClassID, + getRegisterEnum(B, ARM::GPRRegClassID, RmRn ? decodeRm(insn) : decodeRn(insn)))); ++OpIdx; } @@ -1021,7 +1039,7 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // routed here as well. // assert(getIBit(insn) == 0 && "I_Bit != '0' reg/reg form"); MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(ARM::GPRRegClassID, + getRegisterEnum(B, ARM::GPRRegClassID, RmRn? decodeRn(insn) : decodeRm(insn)))); ++OpIdx; } else if (Opcode == ARM::MOVi16 || Opcode == ARM::MOVTi16) { @@ -1046,7 +1064,7 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, } static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; unsigned short NumDefs = TID.getNumDefs(); @@ -1058,7 +1076,7 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Disassemble register def if there is one. if (NumDefs && (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID)) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); ++OpIdx; } @@ -1071,7 +1089,7 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (!isUnary) { assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; } @@ -1094,11 +1112,11 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Register-controlled shifts have Inst{7} = 0 and Inst{4} = 1. unsigned Rs = slice(insn, 4, 4); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); if (Rs) { // Register-controlled shifts: [Rm, Rs, shift]. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRs(insn)))); // Inst{6-5} encodes the shift opcode. ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5)); @@ -1121,24 +1139,26 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn, } static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, bool isStore) { + unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; - unsigned short NumDefs = TID.getNumDefs(); bool isPrePost = isPrePostLdSt(TID.TSFlags); const TargetOperandInfo *OpInfo = TID.OpInfo; + if (!OpInfo) return false; + unsigned &OpIdx = NumOpsAdded; OpIdx = 0; - assert(((!isStore && NumDefs > 0) || (isStore && (NumDefs == 0 || isPrePost))) + assert(((!isStore && TID.getNumDefs() > 0) || + (isStore && (TID.getNumDefs() == 0 || isPrePost))) && "Invalid arguments"); // Operand 0 of a pre- and post-indexed store is the address base writeback. if (isPrePost && isStore) { assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; } @@ -1149,7 +1169,7 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); ++OpIdx; @@ -1157,7 +1177,7 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (isPrePost && !isStore) { assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; } @@ -1170,7 +1190,7 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, "Reg operand expected"); assert((!isPrePost || (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)) && "Index mode or tied_to operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; @@ -1194,7 +1214,7 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MCOperand::CreateImm(Offset)); } else { // Disassemble the offset reg (Rm), shift type, and immediate shift length. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); // Inst{6-5} encodes the shift opcode. ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5)); @@ -1212,13 +1232,13 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, } static bool DisassembleLdFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { - return DisassembleLdStFrm(MI, Opcode, insn, NumOps, NumOpsAdded, false); + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { + return DisassembleLdStFrm(MI, Opcode, insn, NumOps, NumOpsAdded, false, B); } static bool DisassembleStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { - return DisassembleLdStFrm(MI, Opcode, insn, NumOps, NumOpsAdded, true); + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { + return DisassembleLdStFrm(MI, Opcode, insn, NumOps, NumOpsAdded, true, B); } static bool HasDualReg(unsigned Opcode) { @@ -1232,24 +1252,26 @@ static bool HasDualReg(unsigned Opcode) { } static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, bool isStore) { + unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; - unsigned short NumDefs = TID.getNumDefs(); bool isPrePost = isPrePostLdSt(TID.TSFlags); const TargetOperandInfo *OpInfo = TID.OpInfo; + if (!OpInfo) return false; + unsigned &OpIdx = NumOpsAdded; OpIdx = 0; - assert(((!isStore && NumDefs > 0) || (isStore && (NumDefs == 0 || isPrePost))) + assert(((!isStore && TID.getNumDefs() > 0) || + (isStore && (TID.getNumDefs() == 0 || isPrePost))) && "Invalid arguments"); // Operand 0 of a pre- and post-indexed store is the address base writeback. if (isPrePost && isStore) { assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; } @@ -1262,13 +1284,13 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); ++OpIdx; // Fill in LDRD and STRD's second operand. if (DualReg) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn) + 1))); ++OpIdx; } @@ -1277,7 +1299,7 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (isPrePost && !isStore) { assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; } @@ -1290,7 +1312,7 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, "Reg operand expected"); assert((!isPrePost || (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)) && "Index mode or tied_to operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; @@ -1315,7 +1337,7 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MCOperand::CreateImm(Offset)); } else { // Disassemble the offset reg (Rm). - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, 0); MI.addOperand(MCOperand::CreateImm(Offset)); @@ -1326,13 +1348,14 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, } static bool DisassembleLdMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { - return DisassembleLdStMiscFrm(MI, Opcode, insn, NumOps, NumOpsAdded, false); + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { + return DisassembleLdStMiscFrm(MI, Opcode, insn, NumOps, NumOpsAdded, false, + B); } static bool DisassembleStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { - return DisassembleLdStMiscFrm(MI, Opcode, insn, NumOps, NumOpsAdded, true); + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { + return DisassembleLdStMiscFrm(MI, Opcode, insn, NumOps, NumOpsAdded, true, B); } // The algorithm for disassembly of LdStMulFrm is different from others because @@ -1340,7 +1363,7 @@ static bool DisassembleStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // and operand 1 (the AM4 mode imm). After operand 3, we need to populate the // reglist with each affected register encoded as an MCOperand. static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert(NumOps >= 5 && "LdStMulFrm expects NumOps >= 5"); @@ -1348,7 +1371,7 @@ static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, OpIdx = 0; - unsigned Base = getRegisterEnum(ARM::GPRRegClassID, decodeRn(insn)); + unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)); // Writeback to base, if necessary. if (Opcode == ARM::LDM_UPD || Opcode == ARM::STM_UPD) { @@ -1372,7 +1395,7 @@ static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned RegListBits = insn & ((1 << 16) - 1); for (unsigned i = 0; i < 16; ++i) { if ((RegListBits >> i) & 1) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, i))); ++OpIdx; } @@ -1388,9 +1411,11 @@ static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // // SWP, SWPB: Rd Rm Rn static bool DisassembleLdStExFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + if (!OpInfo) return false; + unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -1404,29 +1429,29 @@ static bool DisassembleLdStExFrm(MCInst &MI, unsigned Opcode, uint32_t insn, bool isDW = (Opcode == ARM::LDREXD || Opcode == ARM::STREXD); // Add the destination operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); ++OpIdx; // Store register Exclusive needs a source operand. if (isStore) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); ++OpIdx; if (isDW) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)+1))); ++OpIdx; } } else if (isDW) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)+1))); ++OpIdx; } // Finally add the pointer operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; @@ -1438,7 +1463,7 @@ static bool DisassembleLdStExFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // PKHBT, PKHTB: Rd Rn Rm , LSL/ASR #imm5 // RBIT, REV, REV16, REVSH: Rd Rm static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; unsigned &OpIdx = NumOpsAdded; @@ -1452,18 +1477,18 @@ static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID; - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); ++OpIdx; if (ThreeReg) { assert(NumOps >= 4 && "Expect >= 4 operands"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); ++OpIdx; @@ -1485,7 +1510,7 @@ static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // The 2nd operand register is Rn and the 3rd operand regsiter is Rm for the // three register operand form. Otherwise, Rn=0b1111 and only Rm is used. static bool DisassembleExtFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; unsigned &OpIdx = NumOpsAdded; @@ -1499,17 +1524,17 @@ static bool DisassembleExtFrm(MCInst &MI, unsigned Opcode, uint32_t insn, bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID; - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); ++OpIdx; if (ThreeReg) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); ++OpIdx; @@ -1591,7 +1616,7 @@ static uint64_t VFPExpandImm(unsigned char byte, unsigned N) { // VCVTDS, VCVTSD: converts between double-precision and single-precision // The rest of the instructions have homogeneous [VFP]Rd and [VFP]Rm registers. static bool DisassembleVFPUnaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert(NumOps >= 1 && "VFPUnaryFrm expects NumOps >= 1"); @@ -1606,7 +1631,7 @@ static bool DisassembleVFPUnaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn, bool isSP = (RegClass == ARM::SPRRegClassID); MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(RegClass, decodeVFPRd(insn, isSP)))); + getRegisterEnum(B, RegClass, decodeVFPRd(insn, isSP)))); ++OpIdx; // Early return for compare with zero instructions. @@ -1620,7 +1645,7 @@ static bool DisassembleVFPUnaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn, isSP = (RegClass == ARM::SPRRegClassID); MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(RegClass, decodeVFPRm(insn, isSP)))); + getRegisterEnum(B, RegClass, decodeVFPRm(insn, isSP)))); ++OpIdx; return true; @@ -1631,7 +1656,7 @@ static bool DisassembleVFPUnaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // InOperandList to that of the dst. As far as asm printing is concerned, this // tied_to operand is simply skipped. static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert(NumOps >= 3 && "VFPBinaryFrm expects NumOps >= 3"); @@ -1647,7 +1672,7 @@ static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn, bool isSP = (RegClass == ARM::SPRRegClassID); MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(RegClass, decodeVFPRd(insn, isSP)))); + getRegisterEnum(B, RegClass, decodeVFPRd(insn, isSP)))); ++OpIdx; // Skip tied_to operand constraint. @@ -1658,11 +1683,11 @@ static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn, } MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(RegClass, decodeVFPRn(insn, isSP)))); + getRegisterEnum(B, RegClass, decodeVFPRn(insn, isSP)))); ++OpIdx; MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(RegClass, decodeVFPRm(insn, isSP)))); + getRegisterEnum(B, RegClass, decodeVFPRm(insn, isSP)))); ++OpIdx; return true; @@ -1675,12 +1700,13 @@ static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // A8.6.297 vcvt (floating-point and fixed-point) // Dd|Sd Dd|Sd(TIED_TO) #fbits(= 16|32 - UInt(imm4:i)) static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert(NumOps >= 2 && "VFPConv1Frm expects NumOps >= 2"); const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; + if (!OpInfo) return false; bool SP = slice(insn, 8, 8) == 0; // A8.6.295 & A8.6.297 bool fixed_point = slice(insn, 17, 17) == 1; // A8.6.297 @@ -1692,7 +1718,7 @@ static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn, int size = slice(insn, 7, 7) == 0 ? 16 : 32; int fbits = size - (slice(insn,3,0) << 1 | slice(insn,5,5)); MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(RegClassID, + getRegisterEnum(B, RegClassID, decodeVFPRd(insn, SP)))); assert(TID.getOperandConstraint(1, TOI::TIED_TO) != -1 && @@ -1712,15 +1738,15 @@ static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn, if (slice(insn, 18, 18) == 1) { // to_integer operation d = decodeVFPRd(insn, true /* Is Single Precision */); MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(ARM::SPRRegClassID, d))); + getRegisterEnum(B, ARM::SPRRegClassID, d))); m = decodeVFPRm(insn, SP); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClassID, m))); + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, m))); } else { d = decodeVFPRd(insn, SP); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClassID, d))); + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, d))); m = decodeVFPRm(insn, true /* Is Single Precision */); MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(ARM::SPRRegClassID, m))); + getRegisterEnum(B, ARM::SPRRegClassID, m))); } NumOpsAdded = 2; } @@ -1731,13 +1757,13 @@ static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn, // VMOVRS - A8.6.330 // Rt => Rd; Sn => UInt(Vn:N) static bool DisassembleVFPConv2Frm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert(NumOps >= 2 && "VFPConv2Frm expects NumOps >= 2"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::SPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID, decodeVFPRn(insn, true)))); NumOpsAdded = 2; return true; @@ -1749,29 +1775,29 @@ static bool DisassembleVFPConv2Frm(MCInst &MI, unsigned Opcode, uint32_t insn, // VMOVRRS - A8.6.331 // Rt => Rd; Rt2 => Rn; Sm => UInt(Vm:M); Sm1 = Sm+1 static bool DisassembleVFPConv3Frm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert(NumOps >= 3 && "VFPConv3Frm expects NumOps >= 3"); const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; unsigned &OpIdx = NumOpsAdded; - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); OpIdx = 2; if (OpInfo[OpIdx].RegClass == ARM::SPRRegClassID) { unsigned Sm = decodeVFPRm(insn, true); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::SPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID, Sm))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::SPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID, Sm+1))); OpIdx += 2; } else { MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(ARM::DPRRegClassID, + getRegisterEnum(B, ARM::DPRRegClassID, decodeVFPRm(insn, false)))); ++OpIdx; } @@ -1781,13 +1807,13 @@ static bool DisassembleVFPConv3Frm(MCInst &MI, unsigned Opcode, uint32_t insn, // VMOVSR - A8.6.330 // Rt => Rd; Sn => UInt(Vn:N) static bool DisassembleVFPConv4Frm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert(NumOps >= 2 && "VFPConv4Frm expects NumOps >= 2"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::SPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID, decodeVFPRn(insn, true)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); NumOpsAdded = 2; return true; @@ -1799,7 +1825,7 @@ static bool DisassembleVFPConv4Frm(MCInst &MI, unsigned Opcode, uint32_t insn, // VMOVRRS - A8.6.331 // Rt => Rd; Rt2 => Rn; Sm => UInt(Vm:M); Sm1 = Sm+1 static bool DisassembleVFPConv5Frm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert(NumOps >= 3 && "VFPConv5Frm expects NumOps >= 3"); @@ -1810,21 +1836,21 @@ static bool DisassembleVFPConv5Frm(MCInst &MI, unsigned Opcode, uint32_t insn, if (OpInfo[OpIdx].RegClass == ARM::SPRRegClassID) { unsigned Sm = decodeVFPRm(insn, true); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::SPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID, Sm))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::SPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID, Sm+1))); OpIdx += 2; } else { MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(ARM::DPRRegClassID, + getRegisterEnum(B, ARM::DPRRegClassID, decodeVFPRm(insn, false)))); ++OpIdx; } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); OpIdx += 2; return true; @@ -1833,7 +1859,7 @@ static bool DisassembleVFPConv5Frm(MCInst &MI, unsigned Opcode, uint32_t insn, // VFP Load/Store Instructions. // VLDRD, VLDRS, VSTRD, VSTRS static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert(NumOps >= 3 && "VFPLdStFrm expects NumOps >= 3"); @@ -1843,9 +1869,9 @@ static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Extract Dd/Sd for operand 0. unsigned RegD = decodeVFPRd(insn, isSPVFP); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClassID, RegD))); + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, RegD))); - unsigned Base = getRegisterEnum(ARM::GPRRegClassID, decodeRn(insn)); + unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)); MI.addOperand(MCOperand::CreateReg(Base)); // Next comes the AM5 Opcode. @@ -1865,7 +1891,7 @@ static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // // VLDMD[_UPD], VLDMS[_UPD], VSTMD[_UPD], VSTMS[_UPD] static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert(NumOps >= 5 && "VFPLdStMulFrm expects NumOps >= 5"); @@ -1873,7 +1899,7 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, OpIdx = 0; - unsigned Base = getRegisterEnum(ARM::GPRRegClassID, decodeRn(insn)); + unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)); // Writeback to base, if necessary. if (Opcode == ARM::VLDMD_UPD || Opcode == ARM::VLDMS_UPD || @@ -1886,6 +1912,12 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Next comes the AM5 Opcode. ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn)); + // Must be either "ia" or "db" submode. + if (SubMode != ARM_AM::ia && SubMode != ARM_AM::db) { + DEBUG(errs() << "Illegal addressing mode 5 sub-mode!\n"); + return false; + } + unsigned char Imm8 = insn & 0xFF; MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(SubMode, Imm8))); @@ -1906,7 +1938,7 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Fill the variadic part of reglist. unsigned Regs = isSPVFP ? Imm8 : Imm8/2; for (unsigned i = 0; i < Regs; ++i) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, RegD + i))); ++OpIdx; } @@ -1920,7 +1952,7 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // FCONSTS (SPR and a VFPf32Imm operand) // VMRS/VMSR (GPR operand) static bool DisassembleVFPMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; unsigned &OpIdx = NumOpsAdded; @@ -1935,13 +1967,13 @@ static bool DisassembleVFPMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned RegEnum = 0; switch (OpInfo[0].RegClass) { case ARM::DPRRegClassID: - RegEnum = getRegisterEnum(ARM::DPRRegClassID, decodeVFPRd(insn, false)); + RegEnum = getRegisterEnum(B, ARM::DPRRegClassID, decodeVFPRd(insn, false)); break; case ARM::SPRRegClassID: - RegEnum = getRegisterEnum(ARM::SPRRegClassID, decodeVFPRd(insn, true)); + RegEnum = getRegisterEnum(B, ARM::SPRRegClassID, decodeVFPRd(insn, true)); break; case ARM::GPRRegClassID: - RegEnum = getRegisterEnum(ARM::GPRRegClassID, decodeRd(insn)); + RegEnum = getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)); break; default: assert(0 && "Invalid reg class id"); @@ -1986,7 +2018,7 @@ static bool DisassembleVFPMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // D = Inst{22}, Vd = Inst{15-12} static unsigned decodeNEONRd(uint32_t insn) { return ((insn >> ARMII::NEON_D_BitShift) & 1) << 4 - | (insn >> ARMII::NEON_RegRdShift) & ARMII::NEONRegMask; + | ((insn >> ARMII::NEON_RegRdShift) & ARMII::NEONRegMask); } // Extract/Decode NEON N/Vn: @@ -1997,7 +2029,7 @@ static unsigned decodeNEONRd(uint32_t insn) { // N = Inst{7}, Vn = Inst{19-16} static unsigned decodeNEONRn(uint32_t insn) { return ((insn >> ARMII::NEON_N_BitShift) & 1) << 4 - | (insn >> ARMII::NEON_RegRnShift) & ARMII::NEONRegMask; + | ((insn >> ARMII::NEON_RegRnShift) & ARMII::NEONRegMask); } // Extract/Decode NEON M/Vm: @@ -2008,7 +2040,7 @@ static unsigned decodeNEONRn(uint32_t insn) { // M = Inst{5}, Vm = Inst{3-0} static unsigned decodeNEONRm(uint32_t insn) { return ((insn >> ARMII::NEON_M_BitShift) & 1) << 4 - | (insn >> ARMII::NEON_RegRmShift) & ARMII::NEONRegMask; + | ((insn >> ARMII::NEON_RegRmShift) & ARMII::NEONRegMask); } namespace { @@ -2072,7 +2104,7 @@ static uint64_t decodeN1VImm(uint32_t insn, ElemSize esize) { case ESize64: { for (unsigned i = 0; i < 8; ++i) if ((Imm8 >> i) & 1) - Imm64 |= 0xFF << 8*i; + Imm64 |= (uint64_t)0xFF << 8*i; break; } default: @@ -2200,6 +2232,22 @@ static unsigned decodeN3VImm(uint32_t insn) { return (insn >> 8) & 0xF; } +static bool UseDRegPair(unsigned Opcode) { + switch (Opcode) { + default: + return false; + case ARM::VLD1q8_UPD: + case ARM::VLD1q16_UPD: + case ARM::VLD1q32_UPD: + case ARM::VLD1q64_UPD: + case ARM::VST1q8_UPD: + case ARM::VST1q16_UPD: + case ARM::VST1q32_UPD: + case ARM::VST1q64_UPD: + return true; + } +} + // VLD* // D[d] D[d2] ... Rn [TIED_TO Rn] align [Rm] // VLD*LN* @@ -2211,7 +2259,8 @@ static unsigned decodeN3VImm(uint32_t insn) { // // Correctly set VLD*/VST*'s TIED_TO GPR, as the asm printer needs it. static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, bool Store, bool DblSpaced) { + unsigned short NumOps, unsigned &NumOpsAdded, bool Store, bool DblSpaced, + BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; @@ -2239,7 +2288,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, // LLVM Addressing Mode #6. unsigned RmEnum = 0; if (WB && Rm != 13) - RmEnum = getRegisterEnum(ARM::GPRRegClassID, Rm); + RmEnum = getRegisterEnum(B, ARM::GPRRegClassID, Rm); if (Store) { // Consume possible WB, AddrMode6, possible increment reg, the DPR/QPR's, @@ -2248,14 +2297,14 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, "Reg operand expected"); if (WB) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, Rn))); ++OpIdx; } assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && OpInfo[OpIdx + 1].RegClass == 0 && "Addrmode #6 Operands expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, Rn))); MI.addOperand(MCOperand::CreateImm(0)); // Alignment ignored? OpIdx += 2; @@ -2272,10 +2321,9 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, RegClass = OpInfo[OpIdx].RegClass; while (OpIdx < NumOps && OpInfo[OpIdx].RegClass == RegClass) { - if (Opcode >= ARM::VST1q16 && Opcode <= ARM::VST1q8) - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClass,Rd,true))); - else - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClass,Rd))); + MI.addOperand(MCOperand::CreateReg( + getRegisterEnum(B, RegClass, Rd, + UseDRegPair(Opcode)))); Rd += Inc; ++OpIdx; } @@ -2293,23 +2341,22 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, RegClass = OpInfo[0].RegClass; while (OpIdx < NumOps && OpInfo[OpIdx].RegClass == RegClass) { - if (Opcode >= ARM::VLD1q16 && Opcode <= ARM::VLD1q8) - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClass,Rd,true))); - else - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClass,Rd))); + MI.addOperand(MCOperand::CreateReg( + getRegisterEnum(B, RegClass, Rd, + UseDRegPair(Opcode)))); Rd += Inc; ++OpIdx; } if (WB) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, Rn))); ++OpIdx; } assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && OpInfo[OpIdx + 1].RegClass == 0 && "Addrmode #6 Operands expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, Rn))); MI.addOperand(MCOperand::CreateImm(0)); // Alignment ignored? OpIdx += 2; @@ -2342,7 +2389,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, // Find out about double-spaced-ness of the Opcode and pass it on to // DisassembleNLdSt0(). static bool DisassembleNLdSt(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const StringRef Name = ARMInsts[Opcode].Name; bool DblSpaced = false; @@ -2377,13 +2424,13 @@ static bool DisassembleNLdSt(MCInst &MI, unsigned Opcode, uint32_t insn, } return DisassembleNLdSt0(MI, Opcode, insn, NumOps, NumOpsAdded, - slice(insn, 21, 21) == 0, DblSpaced); + slice(insn, 21, 21) == 0, DblSpaced, B); } // VMOV (immediate) // Qd/Dd imm static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; @@ -2395,7 +2442,7 @@ static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode, "Expect 1 reg operand followed by 1 imm operand"); // Qd/Dd = Inst{22:15-12} => NEON Rd - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(OpInfo[0].RegClass, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[0].RegClass, decodeNEONRd(insn)))); ElemSize esize = ESizeNA; @@ -2415,6 +2462,7 @@ static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode, case ARM::VMOVv1i64: case ARM::VMOVv2i64: esize = ESize64; + break; default: assert(0 && "Unreachable code!"); return false; @@ -2451,7 +2499,7 @@ enum N2VFlag { // // Others static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, N2VFlag Flag = N2V_None) { + unsigned short NumOps, unsigned &NumOpsAdded, N2VFlag Flag, BO B) { const TargetInstrDesc &TID = ARMInsts[Opc]; const TargetOperandInfo *OpInfo = TID.OpInfo; @@ -2478,7 +2526,7 @@ static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn, } // Qd/Dd = Inst{22:15-12} => NEON Rd - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(OpInfo[OpIdx].RegClass, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, decodeNEONRd(insn)))); ++OpIdx; @@ -2490,7 +2538,7 @@ static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn, } // Dm = Inst{5:3-0} => NEON Rm - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(OpInfo[OpIdx].RegClass, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, decodeNEONRm(insn)))); ++OpIdx; @@ -2523,21 +2571,22 @@ static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn, } static bool DisassembleN2RegFrm(MCInst &MI, unsigned Opc, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded); + return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded, + N2V_None, B); } static bool DisassembleNVCVTFrm(MCInst &MI, unsigned Opc, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded, - N2V_VectorConvert_Between_Float_Fixed); + N2V_VectorConvert_Between_Float_Fixed, B); } static bool DisassembleNVecDupLnFrm(MCInst &MI, unsigned Opc, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded, - N2V_VectorDupLane); + N2V_VectorDupLane, B); } // Vector Shift [Accumulate] Instructions. @@ -2547,7 +2596,7 @@ static bool DisassembleNVecDupLnFrm(MCInst &MI, unsigned Opc, uint32_t insn, // VSHLLi16, VSHLLi32, VSHLLi8: Qd Dm imm (== size) // static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, bool LeftShift) { + unsigned short NumOps, unsigned &NumOpsAdded, bool LeftShift, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; @@ -2564,7 +2613,7 @@ static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn, OpIdx = 0; // Qd/Dd = Inst{22:15-12} => NEON Rd - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(OpInfo[OpIdx].RegClass, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, decodeNEONRd(insn)))); ++OpIdx; @@ -2579,7 +2628,7 @@ static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn, "Reg operand expected"); // Qm/Dm = Inst{5:3-0} => NEON Rm - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(OpInfo[OpIdx].RegClass, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, decodeNEONRm(insn)))); ++OpIdx; @@ -2611,15 +2660,17 @@ static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn, // Left shift instructions. static bool DisassembleN2RegVecShLFrm(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - return DisassembleNVectorShift(MI, Opcode, insn, NumOps, NumOpsAdded, true); + return DisassembleNVectorShift(MI, Opcode, insn, NumOps, NumOpsAdded, true, + B); } // Right shift instructions have different shift amount interpretation. static bool DisassembleN2RegVecShRFrm(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - return DisassembleNVectorShift(MI, Opcode, insn, NumOps, NumOpsAdded, false); + return DisassembleNVectorShift(MI, Opcode, insn, NumOps, NumOpsAdded, false, + B); } namespace { @@ -2644,7 +2695,7 @@ enum N3VFlag { // // Others static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, N3VFlag Flag = N3V_None) { + unsigned short NumOps, unsigned &NumOpsAdded, N3VFlag Flag, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; @@ -2673,7 +2724,7 @@ static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn, } // Qd/Dd = Inst{22:15-12} => NEON Rd - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(OpInfo[OpIdx].RegClass, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, decodeNEONRd(insn)))); ++OpIdx; @@ -2688,7 +2739,7 @@ static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn, // or // Dm = Inst{5:3-0} => NEON Rm MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(OpInfo[OpIdx].RegClass, + getRegisterEnum(B, OpInfo[OpIdx].RegClass, VdVnVm ? decodeNEONRn(insn) : decodeNEONRm(insn)))); ++OpIdx; @@ -2708,7 +2759,7 @@ static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn, : decodeNEONRn(insn); MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(OpInfo[OpIdx].RegClass, m))); + getRegisterEnum(B, OpInfo[OpIdx].RegClass, m))); ++OpIdx; if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0 @@ -2732,27 +2783,28 @@ static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn, } static bool DisassembleN3RegFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded, + N3V_None, B); } static bool DisassembleN3RegVecShFrm(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded, - N3V_VectorShift); + N3V_VectorShift, B); } static bool DisassembleNVecExtractFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded, - N3V_VectorExtract); + N3V_VectorExtract, B); } static bool DisassembleNVecMulScalarFrm(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded, - N3V_Multiply_By_Scalar); + N3V_Multiply_By_Scalar, B); } // Vector Table Lookup @@ -2762,10 +2814,11 @@ static bool DisassembleNVecMulScalarFrm(MCInst &MI, unsigned Opcode, // VTBL3, VTBX3: Dd [Dd(TIED_TO)] Dn Dn+1 Dn+2 Dm // VTBL4, VTBX4: Dd [Dd(TIED_TO)] Dn Dn+1 Dn+2 Dn+3 Dm static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; + if (!OpInfo) return false; assert(NumOps >= 3 && OpInfo[0].RegClass == ARM::DPRRegClassID && @@ -2786,7 +2839,7 @@ static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned Len = slice(insn, 9, 8) + 1; // Dd (the destination vector) - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::DPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID, decodeNEONRd(insn)))); ++OpIdx; @@ -2801,7 +2854,7 @@ static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn, for (unsigned i = 0; i < Len; ++i) { assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::DPRRegClassID && "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::DPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID, Rn + i))); ++OpIdx; } @@ -2809,7 +2862,7 @@ static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Dm (the index vector) assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::DPRRegClassID && "Reg operand (index vector) expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::DPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID, decodeNEONRm(insn)))); ++OpIdx; @@ -2825,13 +2878,13 @@ static bool DisassembleNEONFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Vector Get Lane (move scalar to ARM core register) Instructions. // VGETLNi32, VGETLNs16, VGETLNs8, VGETLNu16, VGETLNu8: Rt Dn index static bool DisassembleNEONGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; - unsigned short NumDefs = TID.getNumDefs(); const TargetOperandInfo *OpInfo = TID.OpInfo; + if (!OpInfo) return false; - assert(NumDefs == 1 && NumOps >= 3 && + assert(TID.getNumDefs() == 1 && NumOps >= 3 && OpInfo[0].RegClass == ARM::GPRRegClassID && OpInfo[1].RegClass == ARM::DPRRegClassID && OpInfo[2].RegClass == 0 && @@ -2843,11 +2896,11 @@ static bool DisassembleNEONGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, : ESize32); // Rt = Inst{15-12} => ARM Rd - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); // Dn = Inst{7:19-16} => NEON Rn - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::DPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID, decodeNEONRn(insn)))); MI.addOperand(MCOperand::CreateImm(decodeNVLaneOpIndex(insn, esize))); @@ -2859,13 +2912,13 @@ static bool DisassembleNEONGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Vector Set Lane (move ARM core register to scalar) Instructions. // VSETLNi16, VSETLNi32, VSETLNi8: Dd Dd (TIED_TO) Rt index static bool DisassembleNEONSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; - unsigned short NumDefs = TID.getNumDefs(); const TargetOperandInfo *OpInfo = TID.OpInfo; + if (!OpInfo) return false; - assert(NumDefs == 1 && NumOps >= 3 && + assert(TID.getNumDefs() == 1 && NumOps >= 3 && OpInfo[0].RegClass == ARM::DPRRegClassID && OpInfo[1].RegClass == ARM::DPRRegClassID && TID.getOperandConstraint(1, TOI::TIED_TO) != -1 && @@ -2879,14 +2932,14 @@ static bool DisassembleNEONSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, : ESize32); // Dd = Inst{7:19-16} => NEON Rn - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::DPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID, decodeNEONRn(insn)))); // TIED_TO operand. MI.addOperand(MCOperand::CreateReg(0)); // Rt = Inst{15-12} => ARM Rd - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); MI.addOperand(MCOperand::CreateImm(decodeNVLaneOpIndex(insn, esize))); @@ -2898,7 +2951,7 @@ static bool DisassembleNEONSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Vector Duplicate Instructions (from ARM core register to all elements). // VDUP8d, VDUP16d, VDUP32d, VDUP8q, VDUP16q, VDUP32q: Qd/Dd Rt static bool DisassembleNEONDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; @@ -2911,11 +2964,11 @@ static bool DisassembleNEONDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned RegClass = OpInfo[0].RegClass; // Qd/Dd = Inst{7:19-16} => NEON Rn - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClass, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClass, decodeNEONRn(insn)))); // Rt = Inst{15-12} => ARM Rd - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); NumOpsAdded = 2; @@ -2945,13 +2998,13 @@ static inline bool PreLoadOpcode(unsigned Opcode) { } static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { // Preload Data/Instruction requires either 2 or 4 operands. // PLDi, PLDWi, PLIi: Rn [+/-]imm12 add = (U == '1') // PLDr[a|m], PLDWr[a|m], PLIr[a|m]: Rn Rm addrmode2_opc - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); if (Opcode == ARM::PLDi || Opcode == ARM::PLDWi || Opcode == ARM::PLIi) { @@ -2961,7 +3014,7 @@ static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MCOperand::CreateImm(Offset)); NumOpsAdded = 2; } else { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub; @@ -2982,7 +3035,7 @@ static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn, } static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { if (MemBarrierInstr(insn)) return true; @@ -3031,7 +3084,7 @@ static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, } if (PreLoadOpcode(Opcode)) - return DisassemblePreLoadFrm(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassemblePreLoadFrm(MI, Opcode, insn, NumOps, NumOpsAdded, B); assert(0 && "Unexpected misc instruction!"); return false; @@ -3147,7 +3200,7 @@ bool ARMBasicMCBuilder::BuildIt(MCInst &MI, uint32_t insn) { unsigned NumOpsAdded = 0; bool OK = (*Disasm)(MI, Opcode, insn, NumOps, NumOpsAdded, this); - if (!OK) return false; + if (!OK || this->Err != 0) return false; if (NumOpsAdded >= NumOps) return true; @@ -3156,6 +3209,49 @@ bool ARMBasicMCBuilder::BuildIt(MCInst &MI, uint32_t insn) { return TryPredicateAndSBitModifier(MI, Opcode, insn, NumOps - NumOpsAdded); } +// A8.3 Conditional execution +// A8.3.1 Pseudocode details of conditional execution +// Condition bits '111x' indicate the instruction is always executed. +static uint32_t CondCode(uint32_t CondField) { + if (CondField == 0xF) + return ARMCC::AL; + return CondField; +} + +/// DoPredicateOperands - DoPredicateOperands process the predicate operands +/// of some Thumb instructions which come before the reglist operands. It +/// returns true if the two predicate operands have been processed. +bool ARMBasicMCBuilder::DoPredicateOperands(MCInst& MI, unsigned Opcode, + uint32_t /* insn */, unsigned short NumOpsRemaining) { + + assert(NumOpsRemaining > 0 && "Invalid argument"); + + const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + unsigned Idx = MI.getNumOperands(); + + // First, we check whether this instr specifies the PredicateOperand through + // a pair of TargetOperandInfos with isPredicate() property. + if (NumOpsRemaining >= 2 && + OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() && + OpInfo[Idx].RegClass == 0 && OpInfo[Idx+1].RegClass == ARM::CCRRegClassID) + { + // If we are inside an IT block, get the IT condition bits maintained via + // ARMBasicMCBuilder::ITState[7:0], through ARMBasicMCBuilder::GetITCond(). + // See also A2.5.2. + if (InITBlock()) + MI.addOperand(MCOperand::CreateImm(GetITCond())); + else + MI.addOperand(MCOperand::CreateImm(ARMCC::AL)); + MI.addOperand(MCOperand::CreateReg(ARM::CPSR)); + return true; + } + + return false; +} + +/// TryPredicateAndSBitModifier - TryPredicateAndSBitModifier tries to process +/// the possible Predicate and SBitModifier, to build the remaining MCOperand +/// constituents. bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode, uint32_t insn, unsigned short NumOpsRemaining) { @@ -3183,27 +3279,24 @@ bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode, // // A8.6.16 B if (Name == "t2Bcc") - MI.addOperand(MCOperand::CreateImm(slice(insn, 25, 22))); + MI.addOperand(MCOperand::CreateImm(CondCode(slice(insn, 25, 22)))); else if (Name == "tBcc") - MI.addOperand(MCOperand::CreateImm(slice(insn, 11, 8))); + MI.addOperand(MCOperand::CreateImm(CondCode(slice(insn, 11, 8)))); else MI.addOperand(MCOperand::CreateImm(ARMCC::AL)); } else { - // ARM Instructions. Check condition field. - int64_t CondVal = getCondField(insn); - if (CondVal == 0xF) - MI.addOperand(MCOperand::CreateImm(ARMCC::AL)); - else - MI.addOperand(MCOperand::CreateImm(CondVal)); + // ARM instructions get their condition field from Inst{31-28}. + MI.addOperand(MCOperand::CreateImm(CondCode(getCondField(insn)))); } } MI.addOperand(MCOperand::CreateReg(ARM::CPSR)); Idx += 2; NumOpsRemaining -= 2; - if (NumOpsRemaining == 0) - return true; } + if (NumOpsRemaining == 0) + return true; + // Next, if OptionalDefOperand exists, we check whether the 'S' bit is set. if (OpInfo[Idx].isOptionalDef() && OpInfo[Idx].RegClass==ARM::CCRRegClassID) { MI.addOperand(MCOperand::CreateReg(getSBit(insn) == 1 ? ARM::CPSR : 0)); @@ -3224,7 +3317,7 @@ bool ARMBasicMCBuilder::RunBuildAfterHook(bool Status, MCInst &MI, if (!SP) return Status; if (Opcode == ARM::t2IT) - SP->InitIT(slice(insn, 7, 0)); + Status = SP->InitIT(slice(insn, 7, 0)) ? Status : false; else if (InITBlock()) SP->UpdateIT(); @@ -3234,7 +3327,7 @@ bool ARMBasicMCBuilder::RunBuildAfterHook(bool Status, MCInst &MI, /// Opcode, Format, and NumOperands make up an ARM Basic MCBuilder. ARMBasicMCBuilder::ARMBasicMCBuilder(unsigned opc, ARMFormat format, unsigned short num) - : Opcode(opc), Format(format), NumOps(num), SP(0) { + : Opcode(opc), Format(format), NumOps(num), SP(0), Err(0) { unsigned Idx = (unsigned)format; assert(Idx < (array_lengthof(FuncPtrs) - 1) && "Unknown format"); Disasm = FuncPtrs[Idx]; @@ -3246,6 +3339,11 @@ ARMBasicMCBuilder::ARMBasicMCBuilder(unsigned opc, ARMFormat format, /// are responsible for freeing up of the allocated memory. Cacheing can be /// performed by the API clients to improve performance. ARMBasicMCBuilder *llvm::CreateMCBuilder(unsigned Opcode, ARMFormat Format) { + // For "Unknown format", fail by returning a NULL pointer. + if ((unsigned)Format >= (array_lengthof(FuncPtrs) - 1)) { + DEBUG(errs() << "Unknown format\n"); + return 0; + } return new ARMBasicMCBuilder(Opcode, Format, ARMInsts[Opcode].getNumOperands()); diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h index 3075230..b1d90df 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h +++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h @@ -171,30 +171,51 @@ typedef ARMBasicMCBuilder *BO; typedef bool (*DisassembleFP)(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO Builder); +/// CreateMCBuilder - Return an ARMBasicMCBuilder that can build up the MC +/// infrastructure of an MCInst given the Opcode and Format of the instr. +/// Return NULL if it fails to create/return a proper builder. API clients +/// are responsible for freeing up of the allocated memory. Cacheing can be +/// performed by the API clients to improve performance. +extern ARMBasicMCBuilder *CreateMCBuilder(unsigned Opcode, ARMFormat Format); + /// ARMBasicMCBuilder - ARMBasicMCBuilder represents an ARM MCInst builder that /// knows how to build up the MCOperand list. class ARMBasicMCBuilder { + friend ARMBasicMCBuilder *CreateMCBuilder(unsigned Opcode, ARMFormat Format); unsigned Opcode; ARMFormat Format; unsigned short NumOps; DisassembleFP Disasm; Session *SP; + int Err; // !=0 if the builder encounters some error condition during build. + +private: + /// Opcode, Format, and NumOperands make up an ARM Basic MCBuilder. + ARMBasicMCBuilder(unsigned opc, ARMFormat format, unsigned short num); public: ARMBasicMCBuilder(ARMBasicMCBuilder &B) : Opcode(B.Opcode), Format(B.Format), NumOps(B.NumOps), Disasm(B.Disasm), - SP(B.SP) - {} - - /// Opcode, Format, and NumOperands make up an ARM Basic MCBuilder. - ARMBasicMCBuilder(unsigned opc, ARMFormat format, unsigned short num); + SP(B.SP) { + Err = 0; + } virtual ~ARMBasicMCBuilder() {} - void setSession(Session *sp) { + void SetSession(Session *sp) { SP = sp; } + void SetErr(int ErrCode) { + Err = ErrCode; + } + + /// DoPredicateOperands - DoPredicateOperands process the predicate operands + /// of some Thumb instructions which come before the reglist operands. It + /// returns true if the two predicate operands have been processed. + bool DoPredicateOperands(MCInst& MI, unsigned Opcode, + uint32_t insn, unsigned short NumOpsRemaning); + /// TryPredicateAndSBitModifier - TryPredicateAndSBitModifier tries to process /// the possible Predicate and SBitModifier, to build the remaining MCOperand /// constituents. @@ -236,13 +257,6 @@ private: } }; -/// CreateMCBuilder - Return an ARMBasicMCBuilder that can build up the MC -/// infrastructure of an MCInst given the Opcode and Format of the instr. -/// Return NULL if it fails to create/return a proper builder. API clients -/// are responsible for freeing up of the allocated memory. Cacheing can be -/// performed by the API clients to improve performance. -extern ARMBasicMCBuilder *CreateMCBuilder(unsigned Opcode, ARMFormat Format); - } // namespace llvm #endif diff --git a/lib/Target/ARM/Disassembler/Makefile b/lib/Target/ARM/Disassembler/Makefile new file mode 100644 index 0000000..031b6ac --- /dev/null +++ b/lib/Target/ARM/Disassembler/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/ARM/Disassembler/Makefile ----------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMARMDisassembler + +# Hack: we need to include 'main' arm target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h index 481f25d..4b2e308 100644 --- a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h +++ b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h @@ -193,14 +193,18 @@ static inline unsigned getShiftAmtBits(uint32_t insn) { // A8.6.17 BFC // Encoding T1 ARMv6T2, ARMv7 // LLVM-specific encoding for #<lsb> and #<width> -static inline uint32_t getBitfieldInvMask(uint32_t insn) { +static inline bool getBitfieldInvMask(uint32_t insn, uint32_t &mask) { uint32_t lsb = getImm3(insn) << 2 | getImm2(insn); uint32_t msb = getMsb(insn); uint32_t Val = 0; - assert(lsb <= msb && "Encoding error: lsb > msb"); + if (msb < lsb) { + DEBUG(errs() << "Encoding error: msb < lsb\n"); + return false; + } for (uint32_t i = lsb; i <= msb; ++i) Val |= (1 << i); - return ~Val; + mask = ~Val; + return true; } // A8.4 Shifts applied to a register @@ -342,7 +346,7 @@ static inline unsigned decodeRotate(uint32_t insn) { // Special case: // tMOVSr: tRd tRn static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO Builder) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; unsigned &OpIdx = NumOpsAdded; @@ -360,14 +364,14 @@ static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn, // Add the destination operand. MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(ARM::tGPRRegClassID, + getRegisterEnum(B, ARM::tGPRRegClassID, UseRt ? getT1tRt(insn) : getT1tRd(insn)))); ++OpIdx; // Check whether the next operand to be added is a CCR Register. if (OpInfo[OpIdx].RegClass == ARM::CCRRegClassID) { assert(OpInfo[OpIdx].isOptionalDef() && "Optional def operand expected"); - MI.addOperand(MCOperand::CreateReg(Builder->InITBlock() ? 0 : ARM::CPSR)); + MI.addOperand(MCOperand::CreateReg(B->InITBlock() ? 0 : ARM::CPSR)); ++OpIdx; } @@ -376,7 +380,7 @@ static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn, if (OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) { // For UseRt, the reg operand is tied to the first reg operand. MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(ARM::tGPRRegClassID, + getRegisterEnum(B, ARM::tGPRRegClassID, UseRt ? getT1tRt(insn) : getT1tRn(insn)))); ++OpIdx; } @@ -388,7 +392,7 @@ static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn, // The next available operand is either a reg operand or an imm operand. if (OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) { // Three register operand instructions. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, getT1tRm(insn)))); } else { assert(OpInfo[OpIdx].RegClass == 0 && @@ -409,7 +413,7 @@ static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn, // tMVN, tRSB: tRd CPSR tRn // Others: tRd CPSR tRd(TIED_TO) tRn static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO Builder) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; @@ -423,14 +427,14 @@ static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn, && "Invalid arguments"); // Add the destination operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, getT1tRd(insn)))); ++OpIdx; // Check whether the next operand to be added is a CCR Register. if (OpInfo[OpIdx].RegClass == ARM::CCRRegClassID) { assert(OpInfo[OpIdx].isOptionalDef() && "Optional def operand expected"); - MI.addOperand(MCOperand::CreateReg(Builder->InITBlock() ? 0 : ARM::CPSR)); + MI.addOperand(MCOperand::CreateReg(B->InITBlock() ? 0 : ARM::CPSR)); ++OpIdx; } @@ -449,7 +453,7 @@ static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn, // Process possible next reg operand. if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) { // Add tRn operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, getT1tRn(insn)))); ++OpIdx; } @@ -466,7 +470,7 @@ static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn, // tBX_RET_vararg: Rm // tBLXr_r9: Rm static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { // tBX_RET has 0 operand. if (NumOps == 0) @@ -474,7 +478,7 @@ static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn, // BX/BLX has 1 reg operand: Rm. if (NumOps == 1) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, getT1Rm(insn)))); NumOpsAdded = 1; return true; @@ -489,7 +493,7 @@ static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn, // Add the destination operand. unsigned RegClass = OpInfo[OpIdx].RegClass; MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(RegClass, + getRegisterEnum(B, RegClass, IsGPR(RegClass) ? getT1Rd(insn) : getT1tRd(insn)))); ++OpIdx; @@ -509,7 +513,7 @@ static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn, assert(OpIdx < NumOps && "More operands expected"); RegClass = OpInfo[OpIdx].RegClass; MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(RegClass, + getRegisterEnum(B, RegClass, IsGPR(RegClass) ? getT1Rm(insn) : getT1tRn(insn)))); ++OpIdx; @@ -521,9 +525,10 @@ static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn, // // tLDRpci: tRt imm8*4 static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + if (!OpInfo) return false; assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID && (OpInfo[1].RegClass == 0 && @@ -532,7 +537,7 @@ static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn, && "Invalid arguments"); // Add the destination operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, getT1tRt(insn)))); // And the (imm8 << 2) operand. @@ -564,7 +569,7 @@ static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn, // Load/Store Register (reg|imm): tRd tRn imm5 tRm // Load Register Signed Byte|Halfword: tRd tRn tRm static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; @@ -581,9 +586,9 @@ static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode, && "Expect >= 2 operands and first two as thumb reg operands"); // Add the destination reg and the base reg. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, getT1tRd(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, getT1tRn(insn)))); OpIdx = 2; @@ -603,9 +608,10 @@ static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode, // The next reg operand is tRm, the offset. assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID && "Thumb reg operand expected"); - MI.addOperand(MCOperand::CreateReg(Imm5 ? 0 - : getRegisterEnum(ARM::tGPRRegClassID, - getT1tRm(insn)))); + MI.addOperand(MCOperand::CreateReg( + Imm5 ? 0 + : getRegisterEnum(B, ARM::tGPRRegClassID, + getT1tRm(insn)))); ++OpIdx; return true; @@ -615,12 +621,13 @@ static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode, // // Load/Store Register SP relative: tRt ARM::SP imm8 static bool DisassembleThumb1LdStSP(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert((Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) - && "Invalid opcode"); + && "Unexpected opcode"); const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + if (!OpInfo) return false; assert(NumOps >= 3 && OpInfo[0].RegClass == ARM::tGPRRegClassID && @@ -630,7 +637,7 @@ static bool DisassembleThumb1LdStSP(MCInst &MI, unsigned Opcode, uint32_t insn, !OpInfo[2].isOptionalDef()) && "Invalid arguments"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, getT1tRt(insn)))); MI.addOperand(MCOperand::CreateReg(ARM::SP)); MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn))); @@ -643,11 +650,12 @@ static bool DisassembleThumb1LdStSP(MCInst &MI, unsigned Opcode, uint32_t insn, // // tADDrPCi: tRt imm8 static bool DisassembleThumb1AddPCi(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - assert(Opcode == ARM::tADDrPCi && "Invalid opcode"); + assert(Opcode == ARM::tADDrPCi && "Unexpected opcode"); const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + if (!OpInfo) return false; assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID && (OpInfo[1].RegClass == 0 && @@ -655,7 +663,7 @@ static bool DisassembleThumb1AddPCi(MCInst &MI, unsigned Opcode, uint32_t insn, !OpInfo[1].isOptionalDef()) && "Invalid arguments"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, getT1tRt(insn)))); MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn))); NumOpsAdded = 2; @@ -667,11 +675,12 @@ static bool DisassembleThumb1AddPCi(MCInst &MI, unsigned Opcode, uint32_t insn, // // tADDrSPi: tRt ARM::SP imm8 static bool DisassembleThumb1AddSPi(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - assert(Opcode == ARM::tADDrSPi && "Invalid opcode"); + assert(Opcode == ARM::tADDrSPi && "Unexpected opcode"); const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + if (!OpInfo) return false; assert(NumOps >= 3 && OpInfo[0].RegClass == ARM::tGPRRegClassID && @@ -681,7 +690,7 @@ static bool DisassembleThumb1AddSPi(MCInst &MI, unsigned Opcode, uint32_t insn, !OpInfo[2].isOptionalDef()) && "Invalid arguments"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, getT1tRt(insn)))); MI.addOperand(MCOperand::CreateReg(ARM::SP)); MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn))); @@ -697,23 +706,27 @@ static bool DisassembleThumb1AddSPi(MCInst &MI, unsigned Opcode, uint32_t insn, // "low registers" is specified by Inst{7-0} // lr|pc is specified by Inst{8} static bool DisassembleThumb1PushPop(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - assert((Opcode == ARM::tPUSH || Opcode == ARM::tPOP) && "Invalid opcode"); + assert((Opcode == ARM::tPUSH || Opcode == ARM::tPOP) && "Unexpected opcode"); unsigned &OpIdx = NumOpsAdded; // Handling the two predicate operands before the reglist. - MI.addOperand(MCOperand::CreateImm(ARMCC::AL)); - MI.addOperand(MCOperand::CreateReg(ARM::CPSR)); - OpIdx = 2; + if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) + OpIdx += 2; + else { + DEBUG(errs() << "Expected predicate operands not found.\n"); + return false; + } - // Fill the variadic part of reglist. unsigned RegListBits = slice(insn, 8, 8) << (Opcode == ARM::tPUSH ? 14 : 15) | slice(insn, 7, 0); + + // Fill the variadic part of reglist. for (unsigned i = 0; i < 16; ++i) { if ((RegListBits >> i) & 1) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, i))); ++OpIdx; } @@ -735,13 +748,13 @@ static bool DisassembleThumb1PushPop(MCInst &MI, unsigned Opcode, uint32_t insn, // no operand // Others: tRd tRn static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { if (NumOps == 0) return true; if (Opcode == ARM::tPUSH || Opcode == ARM::tPOP) - return DisassembleThumb1PushPop(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1PushPop(MI, Opcode, insn, NumOps, NumOpsAdded, B); const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; @@ -799,16 +812,16 @@ static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn, && "Expect >=2 operands"); // Add the destination operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, getT1tRd(insn)))); if (OpInfo[1].RegClass == ARM::tGPRRegClassID) { // Two register instructions. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, getT1tRn(insn)))); } else { // CBNZ, CBZ - assert((Opcode == ARM::tCBNZ || Opcode == ARM::tCBZ) && "Invalid opcode"); + assert((Opcode == ARM::tCBNZ || Opcode == ARM::tCBZ) &&"Unexpected opcode"); MI.addOperand(MCOperand::CreateImm(getT1Imm6(insn) * 2)); } @@ -823,42 +836,47 @@ static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn, // tLDM_UPD/tSTM_UPD: tRt tRt AM4ModeImm Pred-Imm Pred-CCR register_list // tLDM: tRt AM4ModeImm Pred-Imm Pred-CCR register_list static bool DisassembleThumb1LdStMul(bool Ld, MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert((Opcode == ARM::tLDM || Opcode == ARM::tLDM_UPD || - Opcode == ARM::tSTM_UPD) && "Invalid opcode"); + Opcode == ARM::tSTM_UPD) && "Unexpected opcode"); unsigned &OpIdx = NumOpsAdded; unsigned tRt = getT1tRt(insn); - unsigned RegListBits = slice(insn, 7, 0); OpIdx = 0; // WB register, if necessary. if (Opcode == ARM::tLDM_UPD || Opcode == ARM::tSTM_UPD) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, tRt))); ++OpIdx; } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, tRt))); ++OpIdx; // A8.6.53 LDM / LDMIA / LDMFD - Encoding T1 + // A8.6.53 STM / STMIA / STMEA - Encoding T1 MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))); ++OpIdx; // Handling the two predicate operands before the reglist. - MI.addOperand(MCOperand::CreateImm(ARMCC::AL)); - MI.addOperand(MCOperand::CreateReg(ARM::CPSR)); - OpIdx += 2; + if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) + OpIdx += 2; + else { + DEBUG(errs() << "Expected predicate operands not found.\n"); + return false; + } + + unsigned RegListBits = slice(insn, 7, 0); // Fill the variadic part of reglist. for (unsigned i = 0; i < 8; ++i) { if ((RegListBits >> i) & 1) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, i))); ++OpIdx; } @@ -868,13 +886,15 @@ static bool DisassembleThumb1LdStMul(bool Ld, MCInst &MI, unsigned Opcode, } static bool DisassembleThumb1LdMul(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { - return DisassembleThumb1LdStMul(true, MI, Opcode, insn, NumOps, NumOpsAdded); + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { + return DisassembleThumb1LdStMul(true, MI, Opcode, insn, NumOps, NumOpsAdded, + B); } static bool DisassembleThumb1StMul(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { - return DisassembleThumb1LdStMul(false, MI, Opcode, insn, NumOps, NumOpsAdded); + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { + return DisassembleThumb1LdStMul(false, MI, Opcode, insn, NumOps, NumOpsAdded, + B); } // A8.6.16 B Encoding T1 @@ -885,12 +905,14 @@ static bool DisassembleThumb1StMul(MCInst &MI, unsigned Opcode, uint32_t insn, // tSVC: imm8 Pred-Imm Pred-CCR // tTRAP: 0 operand (early return) static bool DisassembleThumb1CondBr(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO) { if (Opcode == ARM::tTRAP) return true; const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + if (!OpInfo) return false; + assert(NumOps == 3 && OpInfo[0].RegClass == 0 && OpInfo[1].isPredicate() && OpInfo[2].RegClass == ARM::CCRRegClassID && "Exactly 3 operands expected"); @@ -912,9 +934,11 @@ static bool DisassembleThumb1CondBr(MCInst &MI, unsigned Opcode, uint32_t insn, // // tB: offset static bool DisassembleThumb1Br(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + if (!OpInfo) return false; + assert(NumOps == 1 && OpInfo[0].RegClass == 0 && "1 imm operand expected"); unsigned Imm11 = getT1Imm11(insn); @@ -952,9 +976,8 @@ static bool DisassembleThumb1Br(MCInst &MI, unsigned Opcode, uint32_t insn, // 1101xx Conditional branch, and Supervisor Call on page A6-13 // 11100x Unconditional Branch, see B on page A8-44 // -static bool DisassembleThumb1(uint16_t op, - MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO Builder) { +static bool DisassembleThumb1(uint16_t op, MCInst &MI, unsigned Opcode, + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { unsigned op1 = slice(op, 5, 4); unsigned op2 = slice(op, 3, 2); @@ -963,27 +986,27 @@ static bool DisassembleThumb1(uint16_t op, switch (op1) { case 0: // A6.2.1 Shift (immediate), add, subtract, move, and compare - return DisassembleThumb1General(MI, Opcode, insn, NumOps, NumOpsAdded, - Builder); + return DisassembleThumb1General(MI, Opcode, insn, NumOps, NumOpsAdded, B); case 1: switch (op2) { case 0: switch (op3) { case 0: // A6.2.2 Data-processing - return DisassembleThumb1DP(MI, Opcode, insn, NumOps, NumOpsAdded, - Builder); + return DisassembleThumb1DP(MI, Opcode, insn, NumOps, NumOpsAdded, B); case 1: // A6.2.3 Special data instructions and branch and exchange - return DisassembleThumb1Special(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1Special(MI, Opcode, insn, NumOps, NumOpsAdded, + B); default: // A8.6.59 LDR (literal) - return DisassembleThumb1LdPC(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1LdPC(MI, Opcode, insn, NumOps, NumOpsAdded, B); } break; default: // A6.2.4 Load/store single data item - return DisassembleThumb1LdSt(opA, MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1LdSt(opA, MI, Opcode, insn, NumOps, NumOpsAdded, + B); break; } break; @@ -991,21 +1014,24 @@ static bool DisassembleThumb1(uint16_t op, switch (op2) { case 0: // A6.2.4 Load/store single data item - return DisassembleThumb1LdSt(opA, MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1LdSt(opA, MI, Opcode, insn, NumOps, NumOpsAdded, + B); case 1: // A6.2.4 Load/store single data item - return DisassembleThumb1LdStSP(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1LdStSP(MI, Opcode, insn, NumOps, NumOpsAdded, B); case 2: if (op3 <= 1) { // A8.6.10 ADR - return DisassembleThumb1AddPCi(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1AddPCi(MI, Opcode, insn, NumOps, NumOpsAdded, + B); } else { // A8.6.8 ADD (SP plus immediate) - return DisassembleThumb1AddSPi(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1AddSPi(MI, Opcode, insn, NumOps, NumOpsAdded, + B); } default: // A6.2.5 Miscellaneous 16-bit instructions - return DisassembleThumb1Misc(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1Misc(MI, Opcode, insn, NumOps, NumOpsAdded, B); } break; case 3: @@ -1013,17 +1039,17 @@ static bool DisassembleThumb1(uint16_t op, case 0: if (op3 <= 1) { // A8.6.189 STM / STMIA / STMEA - return DisassembleThumb1StMul(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1StMul(MI, Opcode, insn, NumOps, NumOpsAdded, B); } else { // A8.6.53 LDM / LDMIA / LDMFD - return DisassembleThumb1LdMul(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1LdMul(MI, Opcode, insn, NumOps, NumOpsAdded, B); } case 1: // A6.2.6 Conditional branch, and Supervisor Call - return DisassembleThumb1CondBr(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1CondBr(MI, Opcode, insn, NumOps, NumOpsAdded, B); case 2: // Unconditional Branch, see B on page A8-44 - return DisassembleThumb1Br(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1Br(MI, Opcode, insn, NumOps, NumOpsAdded, B); default: assert(0 && "Unreachable code"); break; @@ -1079,32 +1105,32 @@ static bool DisassembleThumb2SRS(MCInst &MI, unsigned Opcode, uint32_t insn, // t2RFE[IA|DB]W/t2RFE[IA|DB]: Rn static bool DisassembleThumb2RFE(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); NumOpsAdded = 1; return true; } static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { if (Thumb2SRSOpcode(Opcode)) return DisassembleThumb2SRS(MI, Opcode, insn, NumOps, NumOpsAdded); if (Thumb2RFEOpcode(Opcode)) - return DisassembleThumb2RFE(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2RFE(MI, Opcode, insn, NumOps, NumOpsAdded, B); assert((Opcode == ARM::t2LDM || Opcode == ARM::t2LDM_UPD || Opcode == ARM::t2STM || Opcode == ARM::t2STM_UPD) - && "Invalid opcode"); + && "Unexpected opcode"); assert(NumOps >= 5 && "Thumb2 LdStMul expects NumOps >= 5"); unsigned &OpIdx = NumOpsAdded; OpIdx = 0; - unsigned Base = getRegisterEnum(ARM::GPRRegClassID, decodeRn(insn)); + unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)); // Writeback to base. if (Opcode == ARM::t2LDM_UPD || Opcode == ARM::t2STM_UPD) { @@ -1120,15 +1146,19 @@ static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn, ++OpIdx; // Handling the two predicate operands before the reglist. - MI.addOperand(MCOperand::CreateImm(ARMCC::AL)); - MI.addOperand(MCOperand::CreateReg(ARM::CPSR)); - OpIdx += 2; + if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) + OpIdx += 2; + else { + DEBUG(errs() << "Expected predicate operands not found.\n"); + return false; + } - // Fill the variadic part of reglist. unsigned RegListBits = insn & ((1 << 16) - 1); + + // Fill the variadic part of reglist. for (unsigned i = 0; i < 16; ++i) { if ((RegListBits >> i) & 1) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, i))); ++OpIdx; } @@ -1144,9 +1174,11 @@ static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn, // t2STREXD: Rm Rd Rs Rn // t2STREXB, t2STREXH: Rm Rd Rn static bool DisassembleThumb2LdStEx(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + if (!OpInfo) return false; + unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -1163,25 +1195,25 @@ static bool DisassembleThumb2LdStEx(MCInst &MI, unsigned Opcode, uint32_t insn, // Add the destination operand for store. if (isStore) { MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(ARM::GPRRegClassID, + getRegisterEnum(B, ARM::GPRRegClassID, isSW ? decodeRs(insn) : decodeRm(insn)))); ++OpIdx; } // Source operand for store and destination operand for load. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); ++OpIdx; // Thumb2 doubleword complication: with an extra source/destination operand. if (isDW) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRs(insn)))); ++OpIdx; } // Finally add the pointer operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; @@ -1198,9 +1230,10 @@ static bool DisassembleThumb2LdStEx(MCInst &MI, unsigned Opcode, uint32_t insn, // Ditto for t2LDRD_PRE, t2LDRD_POST, t2STRD_PRE, t2STRD_POST, which are for // disassembly only and do not have a tied_to writeback base register operand. static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + if (!OpInfo) return false; assert(NumOps >= 4 && OpInfo[0].RegClass == ARM::GPRRegClassID @@ -1210,11 +1243,11 @@ static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode, && "Expect >= 4 operands and first 3 as reg operands"); // Add the <Rt> <Rt2> operands. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRs(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); // Finally add (+/-)imm8*4, depending on the U bit. @@ -1235,15 +1268,15 @@ static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode, // // t2TBBgen, t2TBHgen: Rn Rm Pred-Imm Pred-CCR static bool DisassembleThumb2TB(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert(NumOps >= 2 && "Expect >= 2 operands"); // The generic version of TBB/TBH needs a base register. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); // Add the index register. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); NumOpsAdded = 2; @@ -1278,7 +1311,7 @@ static inline bool Thumb2ShiftOpcode(unsigned Opcode) { // nothing else, because the shift amount is already specified. // Similar case holds for t2MOVrx, t2ADDrr, ..., etc. static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; @@ -1293,7 +1326,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, && OpInfo[3].RegClass == 0 && "Exactlt 4 operands expect and first two as reg operands"); // Only need to populate the src reg operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); MI.addOperand(MCOperand::CreateReg(0)); MI.addOperand(MCOperand::CreateImm(0)); @@ -1315,7 +1348,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, // Build the register operands, followed by the constant shift specifier. MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(ARM::GPRRegClassID, + getRegisterEnum(B, ARM::GPRRegClassID, NoDstReg ? decodeRn(insn) : decodeRs(insn)))); ++OpIdx; @@ -1324,15 +1357,18 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) { // Process tied_to operand constraint. MI.addOperand(MI.getOperand(Idx)); - } else { - assert(!NoDstReg && "Internal error"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + ++OpIdx; + } else if (!NoDstReg) { + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); + ++OpIdx; + } else { + DEBUG(errs() << "Thumb2 encoding error: d==15 for three-reg operands.\n"); + return false; } - ++OpIdx; } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); ++OpIdx; @@ -1373,7 +1409,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, // // ModImm = ThumbExpandImm(i:imm3:imm8) static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; unsigned &OpIdx = NumOpsAdded; @@ -1389,13 +1425,16 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode, // Build the register operands, followed by the modified immediate. MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(ARM::GPRRegClassID, + getRegisterEnum(B, ARM::GPRRegClassID, NoDstReg ? decodeRn(insn) : decodeRs(insn)))); ++OpIdx; if (TwoReg) { - assert(!NoDstReg && "Internal error"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + if (NoDstReg) { + DEBUG(errs()<<"Thumb2 encoding error: d==15 for DPModImm 2-reg instr.\n"); + return false; + } + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; } @@ -1437,7 +1476,7 @@ static inline unsigned decodeThumb2SaturatePos(unsigned Opcode, uint32_t insn) { case ARM::t2USAT16: return slice(insn, 3, 0); default: - assert(0 && "Invalid opcode passed in"); + assert(0 && "Unexpected opcode"); return 0; } } @@ -1459,7 +1498,7 @@ static inline unsigned decodeThumb2SaturatePos(unsigned Opcode, uint32_t insn) { // o t2SSAT[lsl|asr], t2USAT[lsl|asr]: Rs sat_pos Rn shamt // o t2SSAT16, t2USAT16: Rs sat_pos Rn static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; @@ -1474,7 +1513,7 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, // Build the register operand(s), followed by the immediate(s). - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRs(insn)))); ++OpIdx; @@ -1482,7 +1521,7 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, if (Thumb2SaturateOpcode(Opcode)) { MI.addOperand(MCOperand::CreateImm(decodeThumb2SaturatePos(Opcode, insn))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); if (Opcode == ARM::t2SSAT16 || Opcode == ARM::t2USAT16) { @@ -1510,7 +1549,7 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, MI.addOperand(MI.getOperand(Idx)); } else { // Add src reg operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); } ++OpIdx; @@ -1528,15 +1567,22 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, MI.addOperand(MCOperand::CreateImm(getIImm3Imm8(insn))); else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16) MI.addOperand(MCOperand::CreateImm(getImm16(insn))); - else if (Opcode == ARM::t2BFC) - MI.addOperand(MCOperand::CreateImm(getBitfieldInvMask(insn))); - else { + else if (Opcode == ARM::t2BFC) { + uint32_t mask = 0; + if (getBitfieldInvMask(insn, mask)) + MI.addOperand(MCOperand::CreateImm(mask)); + else + return false; + } else { // Handle the case of: lsb width assert((Opcode == ARM::t2SBFX || Opcode == ARM::t2UBFX || - Opcode == ARM::t2BFI) && "Invalid opcode"); + Opcode == ARM::t2BFI) && "Unexpected opcode"); MI.addOperand(MCOperand::CreateImm(getLsb(insn))); if (Opcode == ARM::t2BFI) { - assert(getMsb(insn) >= getLsb(insn) && "Encoding error"); + if (getMsb(insn) < getLsb(insn)) { + DEBUG(errs() << "Encoding error: msb < lsb\n"); + return false; + } MI.addOperand(MCOperand::CreateImm(getMsb(insn) - getLsb(insn) + 1)); } else MI.addOperand(MCOperand::CreateImm(getWidthMinus1(insn) + 1)); @@ -1585,7 +1631,7 @@ static inline bool t2MiscCtrlInstr(uint32_t insn) { // t2MSR/t2MSRsys -> Rn mask=Inst{11-8} // t2SMC -> imm4 = Inst{19-16} static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { if (NumOps == 0) return true; @@ -1627,21 +1673,21 @@ static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode, // MRS and MRSsys take one GPR reg Rs. if (Opcode == ARM::t2MRS || Opcode == ARM::t2MRSsys) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRs(insn)))); NumOpsAdded = 1; return true; } // BXJ takes one GPR reg Rn. if (Opcode == ARM::t2BXJ) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); NumOpsAdded = 1; return true; } // MSR and MSRsys take one GPR reg Rn, followed by the mask. if (Opcode == ARM::t2MSR || Opcode == ARM::t2MSRsys || Opcode == ARM::t2BXJ) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); MI.addOperand(MCOperand::CreateImm(slice(insn, 11, 8))); NumOpsAdded = 2; @@ -1659,7 +1705,7 @@ static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode, switch (Opcode) { default: - assert(0 && "Unreachable code"); + assert(0 && "Unexpected opcode"); return false; case ARM::t2B: Offset = decodeImm32_B_EncodingT4(insn); @@ -1700,7 +1746,7 @@ static inline bool Thumb2PreloadOpcode(unsigned Opcode) { } static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { // Preload Data/Instruction requires either 2 or 3 operands. // t2PLDi12, t2PLDi8, t2PLDpci: Rn [+/-]imm12/imm8 @@ -1718,12 +1764,12 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn, OpInfo[0].RegClass == ARM::GPRRegClassID && "Expect >= 2 operands and first one as reg operand"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; if (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); } else { assert(OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate() @@ -1765,9 +1811,10 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn, // These instrs calculate an address from the PC value and an immediate offset. // Rd Rn=PC (+/-)imm12 (+ if Inst{23} == 0b1) static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + if (!OpInfo) return false; assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::GPRRegClassID && @@ -1776,7 +1823,7 @@ static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode, // Build the register operand, followed by the (+/-)imm12 immediate. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); MI.addOperand(MCOperand::CreateImm(decodeImm12(insn))); @@ -1812,16 +1859,16 @@ static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode, // Delegates to DisassembleThumb2PreLoad() for preload data/instruction. // Delegates to DisassembleThumb2Ldpci() for load * literal operations. static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { unsigned Rn = decodeRn(insn); if (Thumb2PreloadOpcode(Opcode)) - return DisassembleThumb2PreLoad(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2PreLoad(MI, Opcode, insn, NumOps, NumOpsAdded, B); // See, for example, A6.3.7 Load word: Table A6-18 Load word. if (Load && Rn == 15) - return DisassembleThumb2Ldpci(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2Ldpci(MI, Opcode, insn, NumOps, NumOpsAdded, B); const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; @@ -1870,13 +1917,16 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode, Imm = decodeImm8(insn); } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, R0))); + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + R0))); ++OpIdx; - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, R1))); + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + R1))); ++OpIdx; if (ThreeReg) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,R2))); + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + R2))); ++OpIdx; } @@ -1900,7 +1950,7 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode, // // Miscellaneous operations: Rs [Rn] Rm static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; @@ -1917,17 +1967,17 @@ static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn, bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID; - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRs(insn)))); ++OpIdx; if (ThreeReg) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); ++OpIdx; @@ -1954,7 +2004,7 @@ static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn, // Unsigned Sum of Absolute Differences [and Accumulate] // Rs Rn Rm [Ra=Inst{15-12}] static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; @@ -1968,17 +2018,17 @@ static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn, bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::GPRRegClassID; - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRs(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); if (FourReg) - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); NumOpsAdded = FourReg ? 4 : 3; @@ -1999,7 +2049,7 @@ static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn, // // Signed/Unsigned divide: t2SDIV, t2UDIV: Rs Rn Rm static bool DisassembleThumb2LongMul(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; @@ -2014,16 +2064,16 @@ static bool DisassembleThumb2LongMul(MCInst &MI, unsigned Opcode, uint32_t insn, // Build the register operands. if (FourReg) - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRs(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); if (FourReg) @@ -2059,38 +2109,41 @@ static bool DisassembleThumb2LongMul(MCInst &MI, unsigned Opcode, uint32_t insn, // 1xxxxxx - Coprocessor instructions on page A6-40 // static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op, - MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, + unsigned &NumOpsAdded, BO B) { switch (op1) { case 1: if (slice(op2, 6, 5) == 0) { if (slice(op2, 2, 2) == 0) { // Load/store multiple. - return DisassembleThumb2LdStMul(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2LdStMul(MI, Opcode, insn, NumOps, NumOpsAdded, + B); } // Load/store dual, load/store exclusive, table branch, otherwise. - assert(slice(op2, 2, 2) == 1 && "Encoding error"); + assert(slice(op2, 2, 2) == 1 && "Thumb2 encoding error!"); if ((ARM::t2LDREX <= Opcode && Opcode <= ARM::t2LDREXH) || (ARM::t2STREX <= Opcode && Opcode <= ARM::t2STREXH)) { // Load/store exclusive. - return DisassembleThumb2LdStEx(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2LdStEx(MI, Opcode, insn, NumOps, NumOpsAdded, + B); } if (Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2LDRD_PRE || Opcode == ARM::t2LDRD_POST || Opcode == ARM::t2STRDi8 || Opcode == ARM::t2STRD_PRE || Opcode == ARM::t2STRD_POST) { // Load/store dual. - return DisassembleThumb2LdStDual(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2LdStDual(MI, Opcode, insn, NumOps, NumOpsAdded, + B); } if (Opcode == ARM::t2TBBgen || Opcode == ARM::t2TBHgen) { // Table branch. - return DisassembleThumb2TB(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2TB(MI, Opcode, insn, NumOps, NumOpsAdded, B); } } else if (slice(op2, 6, 5) == 1) { // Data-processing (shifted register). - return DisassembleThumb2DPSoReg(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2DPSoReg(MI, Opcode, insn, NumOps, NumOpsAdded, B); } // FIXME: A6.3.18 Coprocessor instructions @@ -2101,14 +2154,17 @@ static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op, if (op == 0) { if (slice(op2, 5, 5) == 0) { // Data-processing (modified immediate) - return DisassembleThumb2DPModImm(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2DPModImm(MI, Opcode, insn, NumOps, NumOpsAdded, + B); } else { // Data-processing (plain binary immediate) - return DisassembleThumb2DPBinImm(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2DPBinImm(MI, Opcode, insn, NumOps, NumOpsAdded, + B); } } else { // Branches and miscellaneous control on page A6-20. - return DisassembleThumb2BrMiscCtrl(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2BrMiscCtrl(MI, Opcode, insn, NumOps, NumOpsAdded, + B); } break; @@ -2119,7 +2175,8 @@ static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op, if (slice(op2, 0, 0) == 0) { if (slice(op2, 4, 4) == 0) { // Store single data item on page A6-30 - return DisassembleThumb2LdSt(false, MI,Opcode,insn,NumOps,NumOpsAdded); + return DisassembleThumb2LdSt(false, MI,Opcode,insn,NumOps,NumOpsAdded, + B); } else { // FIXME: Advanced SIMD element or structure load/store instructions. // But see ThumbDisassembler::getInstruction(). @@ -2127,19 +2184,20 @@ static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op, } } else { // Table A6-9 32-bit Thumb instruction encoding: Load byte|halfword|word - return DisassembleThumb2LdSt(true, MI,Opcode,insn,NumOps,NumOpsAdded); + return DisassembleThumb2LdSt(true, MI,Opcode,insn,NumOps,NumOpsAdded, B); } break; case 1: if (slice(op2, 4, 4) == 0) { // A6.3.12 Data-processing (register) - return DisassembleThumb2DPReg(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2DPReg(MI, Opcode, insn, NumOps, NumOpsAdded, B); } else if (slice(op2, 3, 3) == 0) { // A6.3.16 Multiply, multiply accumulate, and absolute difference - return DisassembleThumb2Mul(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2Mul(MI, Opcode, insn, NumOps, NumOpsAdded, B); } else { // A6.3.17 Long multiply, long multiply accumulate, and divide - return DisassembleThumb2LongMul(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2LongMul(MI, Opcode, insn, NumOps, NumOpsAdded, + B); } break; default: @@ -2151,7 +2209,7 @@ static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op, break; default: - assert(0 && "Encoding error for Thumb2 instruction!"); + assert(0 && "Thumb2 encoding error!"); break; } @@ -2174,8 +2232,10 @@ static bool DisassembleThumbFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned bits15_11 = slice(HalfWord, 15, 11); // A6.1 Thumb instruction set encoding - assert((bits15_11 == 0x1D || bits15_11 == 0x1E || bits15_11 == 0x1F) && - "Bits [15:11] of first halfword of a Thumb2 instruction out of range"); + if (!(bits15_11 == 0x1D || bits15_11 == 0x1E || bits15_11 == 0x1F)) { + assert("Bits[15:11] first halfword of Thumb2 instruction is out of range"); + return false; + } // A6.3 32-bit Thumb instruction encoding @@ -2183,5 +2243,6 @@ static bool DisassembleThumbFrm(MCInst &MI, unsigned Opcode, uint32_t insn, uint16_t op2 = slice(HalfWord, 10, 4); uint16_t op = slice(insn, 15, 15); - return DisassembleThumb2(op1, op2, op, MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2(op1, op2, op, MI, Opcode, insn, NumOps, NumOpsAdded, + Builder); } diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile index a8dd38c..9e3ff29 100644 --- a/lib/Target/ARM/Makefile +++ b/lib/Target/ARM/Makefile @@ -16,8 +16,9 @@ BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \ ARMGenRegisterInfo.inc ARMGenInstrNames.inc \ ARMGenInstrInfo.inc ARMGenAsmWriter.inc \ ARMGenDAGISel.inc ARMGenSubtarget.inc \ - ARMGenCodeEmitter.inc ARMGenCallingConv.inc + ARMGenCodeEmitter.inc ARMGenCallingConv.inc \ + ARMGenDecoderTables.inc ARMGenEDInfo.inc -DIRS = AsmPrinter AsmParser TargetInfo +DIRS = AsmPrinter AsmParser Disassembler TargetInfo include $(LEVEL)/Makefile.common diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index a5dfcb3..2f635fe 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -36,9 +36,12 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -using namespace llvm; +namespace llvm { extern cl::opt<bool> ReuseFrameIndexVals; +} + +using namespace llvm; Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &sti) @@ -56,7 +59,7 @@ void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, unsigned PredReg) const { MachineFunction &MF = *MBB.getParent(); MachineConstantPool *ConstantPool = MF.getConstantPool(); - Constant *C = ConstantInt::get( + const Constant *C = ConstantInt::get( Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val); unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); @@ -461,6 +464,13 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Offset -= AFI->getFramePtrSpillOffset(); } + // Special handling of dbg_value instructions. + if (MI.isDebugValue()) { + MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/); + MI.getOperand(i+1).ChangeToImmediate(Offset); + return 0; + } + unsigned Opcode = MI.getOpcode(); const TargetInstrDesc &Desc = MI.getDesc(); unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index de46056..b143bd9 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -44,18 +44,22 @@ Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB, DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); - if (DestRC == ARM::GPRRegisterClass && - SrcRC == ARM::GPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg); - return true; - } else if (DestRC == ARM::GPRRegisterClass && - SrcRC == ARM::tGPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVtgpr2gpr), DestReg).addReg(SrcReg); - return true; - } else if (DestRC == ARM::tGPRRegisterClass && - SrcRC == ARM::GPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVgpr2tgpr), DestReg).addReg(SrcReg); - return true; + if (DestRC == ARM::GPRRegisterClass) { + if (SrcRC == ARM::GPRRegisterClass) { + BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg); + return true; + } else if (SrcRC == ARM::tGPRRegisterClass) { + BuildMI(MBB, I, DL, get(ARM::tMOVtgpr2gpr), DestReg).addReg(SrcReg); + return true; + } + } else if (DestRC == ARM::tGPRRegisterClass) { + if (SrcRC == ARM::GPRRegisterClass) { + BuildMI(MBB, I, DL, get(ARM::tMOVgpr2tgpr), DestReg).addReg(SrcReg); + return true; + } else if (SrcRC == ARM::tGPRRegisterClass) { + BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg).addReg(SrcReg); + return true; + } } // Handle SPR, DPR, and QPR copies. diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp index f24d3e2..07dd0be 100644 --- a/lib/Target/ARM/Thumb2RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp @@ -52,7 +52,7 @@ void Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, unsigned PredReg) const { MachineFunction &MF = *MBB.getParent(); MachineConstantPool *ConstantPool = MF.getConstantPool(); - Constant *C = ConstantInt::get( + const Constant *C = ConstantInt::get( Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val); unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index 2bc75f2..8fe2e42 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -656,15 +656,8 @@ static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) { bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { bool Modified = false; - bool LiveCPSR = false; // Yes, CPSR could be livein. - for (MachineBasicBlock::const_livein_iterator I = MBB.livein_begin(), - E = MBB.livein_end(); I != E; ++I) { - if (*I == ARM::CPSR) { - LiveCPSR = true; - break; - } - } + bool LiveCPSR = MBB.isLiveIn(ARM::CPSR); MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); MachineBasicBlock::iterator NextMII; diff --git a/lib/Target/Alpha/AlphaCodeEmitter.cpp b/lib/Target/Alpha/AlphaCodeEmitter.cpp index eb5e429..a6c6f52 100644 --- a/lib/Target/Alpha/AlphaCodeEmitter.cpp +++ b/lib/Target/Alpha/AlphaCodeEmitter.cpp @@ -192,10 +192,13 @@ unsigned AlphaCodeEmitter::getMachineOpValue(const MachineInstr &MI, llvm_unreachable("unknown relocatable instruction"); } if (MO.isGlobal()) - MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), - Reloc, MO.getGlobal(), Offset, - isa<Function>(MO.getGlobal()), - useGOT)); + MCE.addRelocation(MachineRelocation::getGV( + MCE.getCurrentPCOffset(), + Reloc, + const_cast<GlobalValue *>(MO.getGlobal()), + Offset, + isa<Function>(MO.getGlobal()), + useGOT)); else if (MO.isSymbol()) MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(), Reloc, MO.getSymbolName(), diff --git a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp index 5303d85..d526dc0 100644 --- a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp +++ b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp @@ -14,7 +14,6 @@ #include "Alpha.h" #include "AlphaTargetMachine.h" -#include "AlphaISelLowering.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -309,7 +308,7 @@ SDNode *AlphaDAGToDAGISel::Select(SDNode *N) { T, CurDAG->getRegister(Alpha::F31, T), CurDAG->getRegister(Alpha::F31, T)); } else { - llvm_report_error("Unhandled FP constant type"); + report_fatal_error("Unhandled FP constant type"); } break; } diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp index 94c6f80..1d85f12 100644 --- a/lib/Target/Alpha/AlphaISelLowering.cpp +++ b/lib/Target/Alpha/AlphaISelLowering.cpp @@ -13,6 +13,7 @@ #include "AlphaISelLowering.h" #include "AlphaTargetMachine.h" +#include "AlphaMachineFunctionInfo.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -225,7 +226,7 @@ AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // Alpha target does not yet support tail call optimization. isTailCall = false; @@ -342,7 +343,7 @@ AlphaTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; @@ -385,10 +386,12 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) + const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); + AlphaMachineFunctionInfo *FuncInfo = MF.getInfo<AlphaMachineFunctionInfo>(); unsigned args_int[] = { Alpha::R16, Alpha::R17, Alpha::R18, Alpha::R19, Alpha::R20, Alpha::R21}; @@ -435,14 +438,14 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain, // If the functions takes variable number of arguments, copy all regs to stack if (isVarArg) { - VarArgsOffset = Ins.size() * 8; + FuncInfo->setVarArgsOffset(Ins.size() * 8); std::vector<SDValue> LS; for (int i = 0; i < 6; ++i) { if (TargetRegisterInfo::isPhysicalRegister(args_int[i])) args_int[i] = AddLiveIn(MF, args_int[i], &Alpha::GPRCRegClass); SDValue argt = DAG.getCopyFromReg(Chain, dl, args_int[i], MVT::i64); int FI = MFI->CreateFixedObject(8, -8 * (6 - i), true, false); - if (i == 0) VarArgsBase = FI; + if (i == 0) FuncInfo->setVarArgsBase(FI); SDValue SDFI = DAG.getFrameIndex(FI, MVT::i64); LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0, false, false, 0)); @@ -467,7 +470,7 @@ SDValue AlphaTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG) { + DebugLoc dl, SelectionDAG &DAG) const { SDValue Copy = DAG.getCopyToReg(Chain, dl, Alpha::R26, DAG.getNode(AlphaISD::GlobalRetAddr, @@ -525,7 +528,8 @@ AlphaTargetLowering::LowerReturn(SDValue Chain, } void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain, - SDValue &DataPtr, SelectionDAG &DAG) { + SDValue &DataPtr, + SelectionDAG &DAG) const { Chain = N->getOperand(0); SDValue VAListP = N->getOperand(1); const Value *VAListS = cast<SrcValueSDNode>(N->getOperand(2))->getValue(); @@ -556,7 +560,8 @@ void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain, /// LowerOperation - Provide custom lowering hooks for some operations. /// -SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { +SDValue AlphaTargetLowering::LowerOperation(SDValue Op, + SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); switch (Op.getOpcode()) { default: llvm_unreachable("Wasn't expecting to be able to lower this!"); @@ -624,7 +629,7 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { } case ISD::ConstantPool: { ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); - Constant *C = CP->getConstVal(); + const Constant *C = CP->getConstVal(); SDValue CPI = DAG.getTargetConstantPool(C, MVT::i64, CP->getAlignment()); // FIXME there isn't really any debug info here @@ -637,7 +642,7 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { llvm_unreachable("TLS not implemented for Alpha."); case ISD::GlobalAddress: { GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); - GlobalValue *GV = GSDN->getGlobal(); + const GlobalValue *GV = GSDN->getGlobal(); SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i64, GSDN->getOffset()); // FIXME there isn't really any debug info here @@ -725,17 +730,22 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { false, false, 0); } case ISD::VASTART: { + MachineFunction &MF = DAG.getMachineFunction(); + AlphaMachineFunctionInfo *FuncInfo = MF.getInfo<AlphaMachineFunctionInfo>(); + SDValue Chain = Op.getOperand(0); SDValue VAListP = Op.getOperand(1); const Value *VAListS = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); // vastart stores the address of the VarArgsBase and VarArgsOffset - SDValue FR = DAG.getFrameIndex(VarArgsBase, MVT::i64); + SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsBase(), MVT::i64); SDValue S1 = DAG.getStore(Chain, dl, FR, VAListP, VAListS, 0, false, false, 0); SDValue SA2 = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP, DAG.getConstant(8, MVT::i64)); - return DAG.getTruncStore(S1, dl, DAG.getConstant(VarArgsOffset, MVT::i64), + return DAG.getTruncStore(S1, dl, + DAG.getConstant(FuncInfo->getVarArgsOffset(), + MVT::i64), SA2, NULL, 0, MVT::i32, false, false, 0); } case ISD::RETURNADDR: @@ -749,7 +759,7 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { void AlphaTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { DebugLoc dl = N->getDebugLoc(); assert(N->getValueType(0) == MVT::i32 && N->getOpcode() == ISD::VAARG && @@ -822,8 +832,7 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint, MachineBasicBlock * AlphaTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { + MachineBasicBlock *BB) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); assert((MI->getOpcode() == Alpha::CAS32 || MI->getOpcode() == Alpha::CAS64 || @@ -854,11 +863,6 @@ AlphaTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *llscMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - // Inform sdisel of the edge changes. - for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), - E = BB->succ_end(); I != E; ++I) - EM->insert(std::make_pair(*I, sinkMBB)); - sinkMBB->transferSuccessors(thisMBB); F->insert(It, llscMBB); diff --git a/lib/Target/Alpha/AlphaISelLowering.h b/lib/Target/Alpha/AlphaISelLowering.h index 0f17025..7ee823a 100644 --- a/lib/Target/Alpha/AlphaISelLowering.h +++ b/lib/Target/Alpha/AlphaISelLowering.h @@ -60,8 +60,6 @@ namespace llvm { } class AlphaTargetLowering : public TargetLowering { - int VarArgsOffset; // What is the offset to the first vaarg - int VarArgsBase; // What is the base FrameIndex public: explicit AlphaTargetLowering(TargetMachine &TM); @@ -70,13 +68,13 @@ namespace llvm { /// LowerOperation - Provide custom lowering hooks for some operations. /// - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; /// ReplaceNodeResults - Replace the results of node with an illegal result /// type with new values built out of custom code. /// virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG); + SelectionDAG &DAG) const; // Friendly names for dumps const char *getTargetNodeName(unsigned Opcode) const; @@ -85,7 +83,7 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; ConstraintType getConstraintType(const std::string &Constraint) const; @@ -93,9 +91,9 @@ namespace llvm { getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; - MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; + MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) const; virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; @@ -110,14 +108,14 @@ namespace llvm { private: // Helpers for custom lowering. void LowerVAARG(SDNode *N, SDValue &Chain, SDValue &DataPtr, - SelectionDAG &DAG); + SelectionDAG &DAG) const; virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerCall(SDValue Chain, SDValue Callee, @@ -125,13 +123,13 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG); + DebugLoc dl, SelectionDAG &DAG) const; }; } diff --git a/lib/Target/Alpha/AlphaJITInfo.cpp b/lib/Target/Alpha/AlphaJITInfo.cpp index cb8eb51..12685ed 100644 --- a/lib/Target/Alpha/AlphaJITInfo.cpp +++ b/lib/Target/Alpha/AlphaJITInfo.cpp @@ -103,7 +103,7 @@ extern "C" { asm( ".text\n" - ".globl AlphaComilationCallbackC\n" + ".globl AlphaCompilationCallbackC\n" ".align 4\n" ".globl AlphaCompilationCallback\n" ".ent AlphaCompilationCallback\n" diff --git a/lib/Target/Alpha/AlphaMachineFunctionInfo.h b/lib/Target/Alpha/AlphaMachineFunctionInfo.h index 8221fc7..186738c 100644 --- a/lib/Target/Alpha/AlphaMachineFunctionInfo.h +++ b/lib/Target/Alpha/AlphaMachineFunctionInfo.h @@ -30,17 +30,31 @@ class AlphaMachineFunctionInfo : public MachineFunctionInfo { /// the return address value. unsigned GlobalRetAddr; + /// VarArgsOffset - What is the offset to the first vaarg + int VarArgsOffset; + /// VarArgsBase - What is the base FrameIndex + int VarArgsBase; + public: - AlphaMachineFunctionInfo() : GlobalBaseReg(0), GlobalRetAddr(0) {} + AlphaMachineFunctionInfo() : GlobalBaseReg(0), GlobalRetAddr(0), + VarArgsOffset(0), VarArgsBase(0) {} explicit AlphaMachineFunctionInfo(MachineFunction &MF) : GlobalBaseReg(0), - GlobalRetAddr(0) {} + GlobalRetAddr(0), + VarArgsOffset(0), + VarArgsBase(0) {} unsigned getGlobalBaseReg() const { return GlobalBaseReg; } void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; } unsigned getGlobalRetAddr() const { return GlobalRetAddr; } void setGlobalRetAddr(unsigned Reg) { GlobalRetAddr = Reg; } + + int getVarArgsOffset() const { return VarArgsOffset; } + void setVarArgsOffset(int Offset) { VarArgsOffset = Offset; } + + int getVarArgsBase() const { return VarArgsBase; } + void setVarArgsBase(int Base) { VarArgsBase = Base; } }; } // End llvm namespace diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp index 16a23cc..c083d8c 100644 --- a/lib/Target/Alpha/AlphaRegisterInfo.cpp +++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp @@ -212,15 +212,14 @@ void AlphaRegisterInfo::emitPrologue(MachineFunction &MF) const { //handle GOP offset BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAHg), Alpha::R29) - .addGlobalAddress(const_cast<Function*>(MF.getFunction())) + .addGlobalAddress(MF.getFunction()) .addReg(Alpha::R27).addImm(++curgpdist); BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAg), Alpha::R29) - .addGlobalAddress(const_cast<Function*>(MF.getFunction())) + .addGlobalAddress(MF.getFunction()) .addReg(Alpha::R29).addImm(curgpdist); - //evil const_cast until MO stuff setup to handle const BuildMI(MBB, MBBI, dl, TII.get(Alpha::ALTENT)) - .addGlobalAddress(const_cast<Function*>(MF.getFunction())); + .addGlobalAddress(MF.getFunction()); // Get the number of bytes to allocate from the FrameInfo long NumBytes = MFI->getStackSize(); @@ -248,10 +247,7 @@ void AlphaRegisterInfo::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30) .addImm(getLower16(NumBytes)).addReg(Alpha::R30); } else { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "Too big a stack frame at " << NumBytes; - llvm_report_error(Msg.str()); + report_fatal_error("Too big a stack frame at " + Twine(NumBytes)); } //now if we need to, save the old FP and set the new @@ -300,10 +296,7 @@ void AlphaRegisterInfo::emitEpilogue(MachineFunction &MF, BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30) .addImm(getLower16(NumBytes)).addReg(Alpha::R30); } else { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "Too big a stack frame at " << NumBytes; - llvm_report_error(Msg.str()); + report_fatal_error("Too big a stack frame at " + Twine(NumBytes)); } } } diff --git a/lib/Target/Alpha/AlphaSchedule.td b/lib/Target/Alpha/AlphaSchedule.td index b7b4560..4dc04b8 100644 --- a/lib/Target/Alpha/AlphaSchedule.td +++ b/lib/Target/Alpha/AlphaSchedule.td @@ -53,7 +53,8 @@ def s_pseudo : InstrItinClass; //Table 24 Instruction Class Latency in Cycles //modified some -def Alpha21264Itineraries : ProcessorItineraries<[ +def Alpha21264Itineraries : ProcessorItineraries< + [L0, L1, FST0, FST1, U0, U1, FA, FM], [ InstrItinData<s_ild , [InstrStage<3, [L0, L1]>]>, InstrItinData<s_fld , [InstrStage<4, [L0, L1]>]>, InstrItinData<s_ist , [InstrStage<0, [L0, L1]>]>, diff --git a/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp b/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp new file mode 100644 index 0000000..0eb7b8f --- /dev/null +++ b/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp @@ -0,0 +1,22 @@ +//===-- AlphaSelectionDAGInfo.cpp - Alpha SelectionDAG Info ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AlphaSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "alpha-selectiondag-info" +#include "AlphaSelectionDAGInfo.h" +using namespace llvm; + +AlphaSelectionDAGInfo::AlphaSelectionDAGInfo() { +} + +AlphaSelectionDAGInfo::~AlphaSelectionDAGInfo() { +} diff --git a/lib/Target/Alpha/AlphaSelectionDAGInfo.h b/lib/Target/Alpha/AlphaSelectionDAGInfo.h new file mode 100644 index 0000000..70889ae --- /dev/null +++ b/lib/Target/Alpha/AlphaSelectionDAGInfo.h @@ -0,0 +1,29 @@ +//===-- AlphaSelectionDAGInfo.h - Alpha SelectionDAG Info -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Alpha subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef ALPHASELECTIONDAGINFO_H +#define ALPHASELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class AlphaSelectionDAGInfo : public TargetSelectionDAGInfo { +public: + AlphaSelectionDAGInfo(); + ~AlphaSelectionDAGInfo(); +}; + +} + +#endif diff --git a/lib/Target/Alpha/AlphaTargetMachine.h b/lib/Target/Alpha/AlphaTargetMachine.h index 6f3a774..0990f6d 100644 --- a/lib/Target/Alpha/AlphaTargetMachine.h +++ b/lib/Target/Alpha/AlphaTargetMachine.h @@ -44,8 +44,8 @@ public: virtual const AlphaRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); } - virtual AlphaTargetLowering* getTargetLowering() const { - return const_cast<AlphaTargetLowering*>(&TLInfo); + virtual const AlphaTargetLowering* getTargetLowering() const { + return &TLInfo; } virtual const TargetData *getTargetData() const { return &DataLayout; } virtual AlphaJITInfo* getJITInfo() { diff --git a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp index 2a1f5559..9f4aff6 100644 --- a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp +++ b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp @@ -73,7 +73,7 @@ namespace { void AlphaAsmPrinter::printOperand(const MachineInstr *MI, int opNum, raw_ostream &O) { const MachineOperand &MO = MI->getOperand(opNum); - if (MO.getType() == MachineOperand::MO_Register) { + if (MO.isReg()) { assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && "Not physreg??"); O << getRegisterName(MO.getReg()); diff --git a/lib/Target/Alpha/CMakeLists.txt b/lib/Target/Alpha/CMakeLists.txt index b4f41ae..fbf7f3a 100644 --- a/lib/Target/Alpha/CMakeLists.txt +++ b/lib/Target/Alpha/CMakeLists.txt @@ -23,6 +23,7 @@ add_llvm_target(AlphaCodeGen AlphaRegisterInfo.cpp AlphaSubtarget.cpp AlphaTargetMachine.cpp + AlphaSelectionDAGInfo.cpp ) target_link_libraries (LLVMAlphaCodeGen LLVMSelectionDAG) diff --git a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp index c8d71aa..b4da96c 100644 --- a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp +++ b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "Blackfin.h" -#include "BlackfinISelLowering.h" #include "BlackfinTargetMachine.h" #include "BlackfinRegisterInfo.h" #include "llvm/Intrinsics.h" diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp index 5ce2013..adf2118 100644 --- a/lib/Target/Blackfin/BlackfinISelLowering.cpp +++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp @@ -139,15 +139,16 @@ MVT::SimpleValueType BlackfinTargetLowering::getSetCCResultType(EVT VT) const { } SDValue BlackfinTargetLowering::LowerGlobalAddress(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); - GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); Op = DAG.getTargetGlobalAddress(GV, MVT::i32); return DAG.getNode(BFISD::Wrapper, DL, MVT::i32, Op); } -SDValue BlackfinTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) { +SDValue BlackfinTargetLowering::LowerJumpTable(SDValue Op, + SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); int JTI = cast<JumpTableSDNode>(Op)->getIndex(); @@ -161,7 +162,8 @@ BlackfinTargetLowering::LowerFormalArguments(SDValue Chain, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) + const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -218,7 +220,7 @@ SDValue BlackfinTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG) { + DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to locations. SmallVector<CCValAssign, 16> RVLocs; @@ -278,7 +280,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // Blackfin target does not yet support tail call optimization. isTailCall = false; @@ -414,7 +416,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Expansion of ADDE / SUBE. This is a bit involved since blackfin doesn't have // add-with-carry instructions. -SDValue BlackfinTargetLowering::LowerADDE(SDValue Op, SelectionDAG &DAG) { +SDValue BlackfinTargetLowering::LowerADDE(SDValue Op, SelectionDAG &DAG) const { // Operands: lhs, rhs, carry-in (AC0 flag) // Results: sum, carry-out (AC0 flag) DebugLoc dl = Op.getDebugLoc(); @@ -448,7 +450,8 @@ SDValue BlackfinTargetLowering::LowerADDE(SDValue Op, SelectionDAG &DAG) { return DAG.getMergeValues(ops, 2, dl); } -SDValue BlackfinTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { +SDValue BlackfinTargetLowering::LowerOperation(SDValue Op, + SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: Op.getNode()->dump(); @@ -468,7 +471,7 @@ SDValue BlackfinTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { void BlackfinTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { DebugLoc dl = N->getDebugLoc(); switch (N->getOpcode()) { default: diff --git a/lib/Target/Blackfin/BlackfinISelLowering.h b/lib/Target/Blackfin/BlackfinISelLowering.h index 5f39910..a784248 100644 --- a/lib/Target/Blackfin/BlackfinISelLowering.h +++ b/lib/Target/Blackfin/BlackfinISelLowering.h @@ -30,16 +30,13 @@ namespace llvm { } class BlackfinTargetLowering : public TargetLowering { - int VarArgsFrameOffset; // Frame offset to start of varargs area. public: BlackfinTargetLowering(TargetMachine &TM); virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, - SelectionDAG &DAG); - - int getVarArgsFrameOffset() const { return VarArgsFrameOffset; } + SelectionDAG &DAG) const; ConstraintType getConstraintType(const std::string &Constraint) const; std::pair<unsigned, const TargetRegisterClass*> @@ -52,29 +49,29 @@ namespace llvm { unsigned getFunctionAlignment(const Function *F) const; private: - SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG); - SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG); - SDValue LowerADDE(SDValue Op, SelectionDAG &DAG); + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerADDE(SDValue Op, SelectionDAG &DAG) const; virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG); + DebugLoc dl, SelectionDAG &DAG) const; }; } // end namespace llvm diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp index 6fd610f..2512c9b 100644 --- a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp +++ b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp @@ -110,7 +110,8 @@ BlackfinRegisterInfo::getPhysicalRegisterRegClass(unsigned reg, EVT VT) const { // if frame pointer elimination is disabled. bool BlackfinRegisterInfo::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - return NoFramePointerElim || MFI->hasCalls() || MFI->hasVarSizedObjects(); + return DisableFramePointerElim(MF) || + MFI->hasCalls() || MFI->hasVarSizedObjects(); } bool BlackfinRegisterInfo:: diff --git a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp new file mode 100644 index 0000000..f4bb25f --- /dev/null +++ b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp @@ -0,0 +1,22 @@ +//===-- BlackfinSelectionDAGInfo.cpp - Blackfin SelectionDAG Info ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the BlackfinSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "blackfin-selectiondag-info" +#include "BlackfinSelectionDAGInfo.h" +using namespace llvm; + +BlackfinSelectionDAGInfo::BlackfinSelectionDAGInfo() { +} + +BlackfinSelectionDAGInfo::~BlackfinSelectionDAGInfo() { +} diff --git a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h new file mode 100644 index 0000000..a620330 --- /dev/null +++ b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h @@ -0,0 +1,29 @@ +//===-- BlackfinSelectionDAGInfo.h - Blackfin SelectionDAG Info -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Blackfin subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef BLACKFINSELECTIONDAGINFO_H +#define BLACKFINSELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class BlackfinSelectionDAGInfo : public TargetSelectionDAGInfo { +public: + BlackfinSelectionDAGInfo(); + ~BlackfinSelectionDAGInfo(); +}; + +} + +#endif diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.h b/lib/Target/Blackfin/BlackfinTargetMachine.h index a14052b..07e7394 100644 --- a/lib/Target/Blackfin/BlackfinTargetMachine.h +++ b/lib/Target/Blackfin/BlackfinTargetMachine.h @@ -43,8 +43,8 @@ namespace llvm { virtual const BlackfinRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); } - virtual BlackfinTargetLowering* getTargetLowering() const { - return const_cast<BlackfinTargetLowering*>(&TLInfo); + virtual const BlackfinTargetLowering* getTargetLowering() const { + return &TLInfo; } virtual const TargetData *getTargetData() const { return &DataLayout; } virtual bool addInstSelector(PassManagerBase &PM, diff --git a/lib/Target/Blackfin/CMakeLists.txt b/lib/Target/Blackfin/CMakeLists.txt index deb005d..f8847d0 100644 --- a/lib/Target/Blackfin/CMakeLists.txt +++ b/lib/Target/Blackfin/CMakeLists.txt @@ -20,4 +20,5 @@ add_llvm_target(BlackfinCodeGen BlackfinRegisterInfo.cpp BlackfinSubtarget.cpp BlackfinTargetMachine.cpp + BlackfinSelectionDAGInfo.cpp ) diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp index 0c265ad..67f513b 100644 --- a/lib/Target/CBackend/CBackend.cpp +++ b/lib/Target/CBackend/CBackend.cpp @@ -274,8 +274,8 @@ namespace { // isInlineAsm - Check if the instruction is a call to an inline asm chunk static bool isInlineAsm(const Instruction& I) { - if (isa<CallInst>(&I) && isa<InlineAsm>(I.getOperand(0))) - return true; + if (const CallInst *CI = dyn_cast<CallInst>(&I)) + return isa<InlineAsm>(CI->getCalledValue()); return false; } @@ -473,8 +473,9 @@ void CWriter::printStructReturnPointerFunctionType(raw_ostream &Out, PrintedType = true; } if (FTy->isVarArg()) { - if (PrintedType) - FunctionInnards << ", ..."; + if (!PrintedType) + FunctionInnards << " int"; //dummy argument for empty vararg functs + FunctionInnards << ", ..."; } else if (!PrintedType) { FunctionInnards << "void"; } @@ -568,8 +569,9 @@ raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty, ++Idx; } if (FTy->isVarArg()) { - if (FTy->getNumParams()) - FunctionInnards << ", ..."; + if (!FTy->getNumParams()) + FunctionInnards << " int"; //dummy argument for empty vaarg functs + FunctionInnards << ", ..."; } else if (!FTy->getNumParams()) { FunctionInnards << "void"; } @@ -1344,7 +1346,7 @@ void CWriter::writeInstComputationInline(Instruction &I) { Ty!=Type::getInt16Ty(I.getContext()) && Ty!=Type::getInt32Ty(I.getContext()) && Ty!=Type::getInt64Ty(I.getContext()))) { - llvm_report_error("The C backend does not currently support integer " + report_fatal_error("The C backend does not currently support integer " "types of widths other than 1, 8, 16, 32, 64.\n" "This is being tracked as PR 4158."); } @@ -2237,12 +2239,16 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) { } } + if (!PrintedArg && FT->isVarArg()) { + FunctionInnards << "int vararg_dummy_arg"; + PrintedArg = true; + } + // Finish printing arguments... if this is a vararg function, print the ..., // unless there are no known types, in which case, we just emit (). // if (FT->isVarArg() && PrintedArg) { - if (PrintedArg) FunctionInnards << ", "; - FunctionInnards << "..."; // Output varargs portion of signature! + FunctionInnards << ",..."; // Output varargs portion of signature! } else if (!FT->isVarArg() && !PrintedArg) { FunctionInnards << "void"; // ret() -> ret(void) in C. } @@ -2858,7 +2864,7 @@ void CWriter::lowerIntrinsics(Function &F) { } void CWriter::visitCallInst(CallInst &I) { - if (isa<InlineAsm>(I.getOperand(0))) + if (isa<InlineAsm>(I.getCalledValue())) return visitInlineAsm(I); bool WroteCallee = false; @@ -2928,6 +2934,12 @@ void CWriter::visitCallInst(CallInst &I) { Out << '('; + bool PrintedArg = false; + if(FTy->isVarArg() && !FTy->getNumParams()) { + Out << "0 /*dummy arg*/"; + PrintedArg = true; + } + unsigned NumDeclaredParams = FTy->getNumParams(); CallSite::arg_iterator AI = I.op_begin()+1, AE = I.op_end(); @@ -2937,7 +2949,7 @@ void CWriter::visitCallInst(CallInst &I) { ++ArgNo; } - bool PrintedArg = false; + for (; AI != AE; ++AI, ++ArgNo) { if (PrintedArg) Out << ", "; if (ArgNo < NumDeclaredParams && @@ -2987,15 +2999,10 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, writeOperand(I.getOperand(1)); Out << ", "; // Output the last argument to the enclosing function. - if (I.getParent()->getParent()->arg_empty()) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "The C backend does not currently support zero " - << "argument varargs functions, such as '" - << I.getParent()->getParent()->getName() << "'!"; - llvm_report_error(Msg.str()); - } - writeOperand(--I.getParent()->getParent()->arg_end()); + if (I.getParent()->getParent()->arg_empty()) + Out << "vararg_dummy_arg"; + else + writeOperand(--I.getParent()->getParent()->arg_end()); Out << ')'; return true; case Intrinsic::vaend: @@ -3165,7 +3172,7 @@ static std::string gccifyAsm(std::string asmstr) { //TODO: assumptions about what consume arguments from the call are likely wrong // handle communitivity void CWriter::visitInlineAsm(CallInst &CI) { - InlineAsm* as = cast<InlineAsm>(CI.getOperand(0)); + InlineAsm* as = cast<InlineAsm>(CI.getCalledValue()); std::vector<InlineAsm::ConstraintInfo> Constraints = as->ParseConstraints(); std::vector<std::pair<Value*, int> > ResultVals; diff --git a/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt b/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt index 1e508fe..8a2b59a 100644 --- a/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt +++ b/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt @@ -1,9 +1,9 @@ -include_directories(
- ${CMAKE_CURRENT_BINARY_DIR}/..
- ${CMAKE_CURRENT_SOURCE_DIR}/..
- )
-
-add_llvm_library(LLVMCellSPUAsmPrinter
+include_directories( + ${CMAKE_CURRENT_BINARY_DIR}/.. + ${CMAKE_CURRENT_SOURCE_DIR}/.. + ) + +add_llvm_library(LLVMCellSPUAsmPrinter SPUAsmPrinter.cpp - )
-add_dependencies(LLVMCellSPUAsmPrinter CellSPUCodeGenTable_gen)
\ No newline at end of file + ) +add_dependencies(LLVMCellSPUAsmPrinter CellSPUCodeGenTable_gen) diff --git a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp index 0ef36e5..3e95531 100644 --- a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp +++ b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp @@ -280,7 +280,7 @@ namespace { void SPUAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) { switch (MO.getType()) { case MachineOperand::MO_Immediate: - llvm_report_error("printOp() does not handle immediate values"); + report_fatal_error("printOp() does not handle immediate values"); return; case MachineOperand::MO_MachineBasicBlock: @@ -307,7 +307,7 @@ void SPUAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) { // External or weakly linked global variables need non-lazily-resolved // stubs if (TM.getRelocationModel() != Reloc::Static) { - GlobalValue *GV = MO.getGlobal(); + const GlobalValue *GV = MO.getGlobal(); if (((GV->isDeclaration() || GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() || GV->hasCommonLinkage()))) { O << *GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt index 0cb6676..ddfca37 100644 --- a/lib/Target/CellSPU/CMakeLists.txt +++ b/lib/Target/CellSPU/CMakeLists.txt @@ -21,6 +21,7 @@ add_llvm_target(CellSPUCodeGen SPURegisterInfo.cpp SPUSubtarget.cpp SPUTargetMachine.cpp + SPUSelectionDAGInfo.cpp ) target_link_libraries (LLVMCellSPUCodeGen LLVMSelectionDAG) diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index 90f8310..c3c2b39 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -14,7 +14,6 @@ #include "SPU.h" #include "SPUTargetMachine.h" -#include "SPUISelLowering.h" #include "SPUHazardRecognizers.h" #include "SPUFrameInfo.h" #include "SPURegisterNames.h" @@ -194,11 +193,8 @@ namespace { #ifndef NDEBUG if (retval == 0) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "SPUISelDAGToDAG.cpp: getValueTypeMapEntry returns NULL for " - << VT.getEVTString(); - llvm_report_error(Msg.str()); + report_fatal_error("SPUISelDAGToDAG.cpp: getValueTypeMapEntry returns" + "NULL for " + Twine(VT.getEVTString())); } #endif @@ -242,8 +238,8 @@ namespace { class SPUDAGToDAGISel : public SelectionDAGISel { - SPUTargetMachine &TM; - SPUTargetLowering &SPUtli; + const SPUTargetMachine &TM; + const SPUTargetLowering &SPUtli; unsigned GlobalBaseReg; public: @@ -305,16 +301,15 @@ namespace { std::vector<Constant*> CV; for (size_t i = 0; i < bvNode->getNumOperands(); ++i) { - ConstantSDNode *V = dyn_cast<ConstantSDNode > (bvNode->getOperand(i)); + ConstantSDNode *V = cast<ConstantSDNode > (bvNode->getOperand(i)); CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue())); } - Constant *CP = ConstantVector::get(CV); + const Constant *CP = ConstantVector::get(CV); SDValue CPIdx = CurDAG->getConstantPool(CP, SPUtli.getPointerTy()); unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); SDValue CGPoolOffset = - SPU::LowerConstantPool(CPIdx, *CurDAG, - SPUtli.getSPUTargetMachine()); + SPU::LowerConstantPool(CPIdx, *CurDAG, TM); HandleSDNode Dummy(CurDAG->getLoad(vecVT, dl, CurDAG->getEntryNode(), CGPoolOffset, @@ -433,13 +428,13 @@ SPUDAGToDAGISel::SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base, case ISD::Constant: case ISD::ConstantPool: case ISD::GlobalAddress: - llvm_report_error("SPU SelectAFormAddr: Constant/Pool/Global not lowered."); + report_fatal_error("SPU SelectAFormAddr: Constant/Pool/Global not lowered."); /*NOTREACHED*/ case ISD::TargetConstant: case ISD::TargetGlobalAddress: case ISD::TargetJumpTable: - llvm_report_error("SPUSelectAFormAddr: Target Constant/Pool/Global " + report_fatal_error("SPUSelectAFormAddr: Target Constant/Pool/Global " "not wrapped as A-form address."); /*NOTREACHED*/ @@ -457,7 +452,7 @@ SPUDAGToDAGISel::SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base, case ISD::TargetGlobalAddress: { GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op0); - GlobalValue *GV = GSDN->getGlobal(); + const GlobalValue *GV = GSDN->getGlobal(); if (GV->getAlignment() == 16) { Base = Op0; Index = Zero; @@ -510,7 +505,7 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base, if (Opc == ISD::FrameIndex) { // Stack frame index must be less than 512 (divided by 16): - FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N); + FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(N); int FI = int(FIN->getIndex()); DEBUG(errs() << "SelectDFormAddr: ISD::FrameIndex = " << FI << "\n"); @@ -531,11 +526,11 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base, return true; } else if (Op1.getOpcode() == ISD::Constant || Op1.getOpcode() == ISD::TargetConstant) { - ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1); + ConstantSDNode *CN = cast<ConstantSDNode>(Op1); int32_t offset = int32_t(CN->getSExtValue()); if (Op0.getOpcode() == ISD::FrameIndex) { - FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op0); + FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op0); int FI = int(FIN->getIndex()); DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset << " frame index = " << FI << "\n"); @@ -552,11 +547,11 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base, } } else if (Op0.getOpcode() == ISD::Constant || Op0.getOpcode() == ISD::TargetConstant) { - ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op0); + ConstantSDNode *CN = cast<ConstantSDNode>(Op0); int32_t offset = int32_t(CN->getSExtValue()); if (Op1.getOpcode() == ISD::FrameIndex) { - FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op1); + FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op1); int FI = int(FIN->getIndex()); DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset << " frame index = " << FI << "\n"); @@ -725,7 +720,7 @@ SPUDAGToDAGISel::Select(SDNode *N) { switch (Op0VT.getSimpleVT().SimpleTy) { default: - llvm_report_error("CellSPU Select: Unhandled zero/any extend EVT"); + report_fatal_error("CellSPU Select: Unhandled zero/any extend EVT"); /*NOTREACHED*/ case MVT::i32: shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, @@ -915,11 +910,8 @@ SPUDAGToDAGISel::Select(SDNode *N) { const valtype_map_s *vtm = getValueTypeMapEntry(VT); if (vtm->ldresult_ins == 0) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "LDRESULT for unsupported type: " - << VT.getEVTString(); - llvm_report_error(Msg.str()); + report_fatal_error("LDRESULT for unsupported type: " + + Twine(VT.getEVTString())); } Opc = vtm->ldresult_ins; @@ -1252,7 +1244,7 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT, return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(emitBuildVector(i64vec.getNode()), 0)); } else { - llvm_report_error("SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec" + report_fatal_error("SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec" "condition"); } } diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index 4b0d442..5e04454 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -15,6 +15,7 @@ #include "SPUISelLowering.h" #include "SPUTargetMachine.h" #include "SPUFrameInfo.h" +#include "SPUMachineFunction.h" #include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/Intrinsics.h" @@ -71,11 +72,8 @@ namespace { #ifndef NDEBUG if (retval == 0) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "getValueTypeMapEntry returns NULL for " - << VT.getEVTString(); - llvm_report_error(Msg.str()); + report_fatal_error("getValueTypeMapEntry returns NULL for " + + Twine(VT.getEVTString())); } #endif @@ -91,7 +89,7 @@ namespace { SDValue ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG, - bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) { + bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) { // The input chain to this libcall is the entry node of the function. // Legalizing the call will automatically add the previous call to the // dependence. @@ -714,12 +712,9 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { case ISD::POST_DEC: case ISD::LAST_INDEXED_MODE: { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than " - "UNINDEXED\n"; - Msg << (unsigned) LN->getAddressingMode(); - llvm_report_error(Msg.str()); + report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other " + "than UNINDEXED\n" + + Twine((unsigned)LN->getAddressingMode())); /*NOTREACHED*/ } } @@ -884,12 +879,9 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { case ISD::POST_DEC: case ISD::LAST_INDEXED_MODE: { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than " - "UNINDEXED\n"; - Msg << (unsigned) SN->getAddressingMode(); - llvm_report_error(Msg.str()); + report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other " + "than UNINDEXED\n" + + Twine((unsigned)SN->getAddressingMode())); /*NOTREACHED*/ } } @@ -902,7 +894,7 @@ static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { EVT PtrVT = Op.getValueType(); ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); - Constant *C = CP->getConstVal(); + const Constant *C = CP->getConstVal(); SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment()); SDValue Zero = DAG.getConstant(0, PtrVT); const TargetMachine &TM = DAG.getTarget(); @@ -960,7 +952,7 @@ static SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { EVT PtrVT = Op.getValueType(); GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); - GlobalValue *GV = GSDN->getGlobal(); + const GlobalValue *GV = GSDN->getGlobal(); SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset()); const TargetMachine &TM = DAG.getTarget(); SDValue Zero = DAG.getConstant(0, PtrVT); @@ -976,7 +968,7 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); } } else { - llvm_report_error("LowerGlobalAddress: Relocation model other than static" + report_fatal_error("LowerGlobalAddress: Relocation model other than static" "not supported."); /*NOTREACHED*/ } @@ -1013,11 +1005,13 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) + const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); + SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>(); const unsigned *ArgRegs = SPURegisterInfo::getArgRegs(); const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs(); @@ -1038,13 +1032,9 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, const TargetRegisterClass *ArgRegClass; switch (ObjectVT.getSimpleVT().SimpleTy) { - default: { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "LowerFormalArguments Unhandled argument type: " - << ObjectVT.getEVTString(); - llvm_report_error(Msg.str()); - } + default: + report_fatal_error("LowerFormalArguments Unhandled argument type: " + + Twine(ObjectVT.getEVTString())); case MVT::i8: ArgRegClass = &SPU::R8CRegClass; break; @@ -1104,9 +1094,10 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, // Create the frame slot for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) { - VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset, - true, false); - SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); + FuncInfo->setVarArgsFrameIndex( + MFI->CreateFixedObject(StackSlotSize, ArgOffset, + true, false)); + SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass); SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8); SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0, @@ -1146,7 +1137,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // CellSPU target does not yet support tail call optimization. isTailCall = false; @@ -1255,7 +1246,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol // node so that legalize doesn't hack it. if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { - GlobalValue *GV = G->getGlobal(); + const GlobalValue *GV = G->getGlobal(); EVT CalleeVT = Callee.getValueType(); SDValue Zero = DAG.getConstant(0, PtrVT); SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT); @@ -1339,22 +1330,12 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, InVals.push_back(Chain.getValue(0)); } break; + case MVT::i8: + case MVT::i16: case MVT::i64: - Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64, - InFlag).getValue(1); - InVals.push_back(Chain.getValue(0)); - break; case MVT::i128: - Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128, - InFlag).getValue(1); - InVals.push_back(Chain.getValue(0)); - break; case MVT::f32: case MVT::f64: - Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT, - InFlag).getValue(1); - InVals.push_back(Chain.getValue(0)); - break; case MVT::v2f64: case MVT::v2i64: case MVT::v4f32: @@ -1374,7 +1355,7 @@ SDValue SPUTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG) { + DebugLoc dl, SelectionDAG &DAG) const { SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), @@ -1581,14 +1562,10 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { uint64_t SplatBits = APSplatBits.getZExtValue(); switch (VT.getSimpleVT().SimpleTy) { - default: { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " - << VT.getEVTString(); - llvm_report_error(Msg.str()); + default: + report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " + + Twine(VT.getEVTString())); /*NOTREACHED*/ - } case MVT::v4f32: { uint32_t Value32 = uint32_t(SplatBits); assert(SplatBitSize == 32 @@ -2004,7 +1981,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { // slot 0 across the vector EVT VecVT = N.getValueType(); if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) { - llvm_report_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit" + report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit" "vector type!"); } @@ -2032,7 +2009,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { switch (VT.getSimpleVT().SimpleTy) { default: - llvm_report_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector" + report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector" "type"); /*NOTREACHED*/ case MVT::i8: { @@ -2368,7 +2345,7 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { All conversions to i64 are expanded to a libcall. */ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, - SPUTargetLowering &TLI) { + const SPUTargetLowering &TLI) { EVT OpVT = Op.getValueType(); SDValue Op0 = Op.getOperand(0); EVT Op0VT = Op0.getValueType(); @@ -2394,7 +2371,7 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, All conversions from i64 are expanded to a libcall. */ static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, - SPUTargetLowering &TLI) { + const SPUTargetLowering &TLI) { EVT OpVT = Op.getValueType(); SDValue Op0 = Op.getOperand(0); EVT Op0VT = Op0.getValueType(); @@ -2515,7 +2492,7 @@ static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG, case ISD::SETONE: compareOp = ISD::SETNE; break; default: - llvm_report_error("CellSPU ISel Select: unimplemented f64 condition"); + report_fatal_error("CellSPU ISel Select: unimplemented f64 condition"); } SDValue result = @@ -2670,7 +2647,7 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) lowering of nodes. */ SDValue -SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) +SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { unsigned Opc = (unsigned) Op.getOpcode(); EVT VT = Op.getValueType(); @@ -2766,7 +2743,7 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) void SPUTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG) + SelectionDAG &DAG) const { #if 0 unsigned Opc = (unsigned) N->getOpcode(); diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h index 3c51177..9ebd442 100644 --- a/lib/Target/CellSPU/SPUISelLowering.h +++ b/lib/Target/CellSPU/SPUISelLowering.h @@ -109,11 +109,11 @@ namespace llvm { virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; //! Custom lowering hooks - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; //! Custom lowering hook for nodes with illegal result types. virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG); + SelectionDAG &DAG) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; @@ -153,7 +153,7 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerCall(SDValue Chain, SDValue Callee, @@ -162,13 +162,13 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG); + DebugLoc dl, SelectionDAG &DAG) const; }; } diff --git a/lib/Target/CellSPU/SPUMCAsmInfo.cpp b/lib/Target/CellSPU/SPUMCAsmInfo.cpp index 3e17a51..68445cf 100644 --- a/lib/Target/CellSPU/SPUMCAsmInfo.cpp +++ b/lib/Target/CellSPU/SPUMCAsmInfo.cpp @@ -18,7 +18,6 @@ SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, const StringRef &TT) { ZeroDirective = "\t.space\t"; Data64bitsDirective = "\t.quad\t"; AlignmentIsInBytes = false; - HasLCOMMDirective = true; PCSymbol = "."; CommentString = "#"; diff --git a/lib/Target/CellSPU/SPUMachineFunction.h b/lib/Target/CellSPU/SPUMachineFunction.h index 6a66967..3ef3ccb 100644 --- a/lib/Target/CellSPU/SPUMachineFunction.h +++ b/lib/Target/CellSPU/SPUMachineFunction.h @@ -26,14 +26,20 @@ private: /// bool UsesLR; + // VarArgsFrameIndex - FrameIndex for start of varargs area. + int VarArgsFrameIndex; + public: SPUFunctionInfo(MachineFunction& MF) - : UsesLR(false) + : UsesLR(false), + VarArgsFrameIndex(0) {} void setUsesLR(bool U) { UsesLR = U; } bool usesLR() { return UsesLR; } + int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } + void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; } }; } // end of namespace llvm diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp index ffac581..fdbe10f 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.cpp +++ b/lib/Target/CellSPU/SPURegisterInfo.cpp @@ -179,7 +179,7 @@ unsigned SPURegisterInfo::getRegisterNumbering(unsigned RegEnum) { case SPU::R126: return 126; case SPU::R127: return 127; default: - llvm_report_error("Unhandled reg in SPURegisterInfo::getRegisterNumbering"); + report_fatal_error("Unhandled reg in SPURegisterInfo::getRegisterNumbering"); } } @@ -303,7 +303,7 @@ BitVector SPURegisterInfo::getReservedRegs(const MachineFunction &MF) const { // static bool needsFP(const MachineFunction &MF) { const MachineFrameInfo *MFI = MF.getFrameInfo(); - return NoFramePointerElim || MFI->hasVarSizedObjects(); + return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects(); } //-------------------------------------------------------------------------- @@ -509,10 +509,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const .addReg(SPU::R2) .addReg(SPU::R1); } else { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "Unhandled frame size: " << FrameSize; - llvm_report_error(Msg.str()); + report_fatal_error("Unhandled frame size: " + Twine(FrameSize)); } if (hasDebugInfo) { @@ -605,10 +602,7 @@ SPURegisterInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const .addReg(SPU::R2) .addReg(SPU::R1); } else { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "Unhandled frame size: " << FrameSize; - llvm_report_error(Msg.str()); + report_fatal_error("Unhandled frame size: " + Twine(FrameSize)); } } } diff --git a/lib/Target/CellSPU/SPUSchedule.td b/lib/Target/CellSPU/SPUSchedule.td index 785dc46..a0b581f 100644 --- a/lib/Target/CellSPU/SPUSchedule.td +++ b/lib/Target/CellSPU/SPUSchedule.td @@ -36,7 +36,7 @@ def RotateShift : InstrItinClass; // EVEN_UNIT def ImmLoad : InstrItinClass; // EVEN_UNIT /* Note: The itinerary for the Cell SPU is somewhat contrived... */ -def SPUItineraries : ProcessorItineraries<[ +def SPUItineraries : ProcessorItineraries<[ODD_UNIT, EVEN_UNIT], [ InstrItinData<LoadStore , [InstrStage<6, [ODD_UNIT]>]>, InstrItinData<BranchHints , [InstrStage<6, [ODD_UNIT]>]>, InstrItinData<BranchResolv, [InstrStage<4, [ODD_UNIT]>]>, diff --git a/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp b/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp new file mode 100644 index 0000000..ca2a4bf --- /dev/null +++ b/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp @@ -0,0 +1,22 @@ +//===-- SPUSelectionDAGInfo.cpp - CellSPU SelectionDAG Info ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SPUSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "cellspu-selectiondag-info" +#include "SPUSelectionDAGInfo.h" +using namespace llvm; + +SPUSelectionDAGInfo::SPUSelectionDAGInfo() { +} + +SPUSelectionDAGInfo::~SPUSelectionDAGInfo() { +} diff --git a/lib/Target/CellSPU/SPUSelectionDAGInfo.h b/lib/Target/CellSPU/SPUSelectionDAGInfo.h new file mode 100644 index 0000000..0a6b4c1 --- /dev/null +++ b/lib/Target/CellSPU/SPUSelectionDAGInfo.h @@ -0,0 +1,29 @@ +//===-- SPUSelectionDAGInfo.h - CellSPU SelectionDAG Info -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the CellSPU subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef CELLSPUSELECTIONDAGINFO_H +#define CELLSPUSELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class SPUSelectionDAGInfo : public TargetSelectionDAGInfo { +public: + SPUSelectionDAGInfo(); + ~SPUSelectionDAGInfo(); +}; + +} + +#endif diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h index 9fdcfe9..37e7cd2 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.h +++ b/lib/Target/CellSPU/SPUTargetMachine.h @@ -57,8 +57,8 @@ public: return NULL; } - virtual SPUTargetLowering *getTargetLowering() const { - return const_cast<SPUTargetLowering*>(&TLInfo); + virtual const SPUTargetLowering *getTargetLowering() const { + return &TLInfo; } virtual const SPURegisterInfo *getRegisterInfo() const { diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 9c5893c..e739b26 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -210,7 +210,7 @@ namespace { } void CppWriter::error(const std::string& msg) { - llvm_report_error(msg); + report_fatal_error(msg); } // printCFP - Print a floating point constant .. very carefully :) @@ -1082,8 +1082,9 @@ namespace { // Before we emit this instruction, we need to take care of generating any // forward references. So, we get the names of all the operands in advance - std::string* opNames = new std::string[I->getNumOperands()]; - for (unsigned i = 0; i < I->getNumOperands(); i++) { + const unsigned Ops(I->getNumOperands()); + std::string* opNames = new std::string[Ops]; + for (unsigned i = 0; i < Ops; i++) { opNames[i] = getOpName(I->getOperand(i)); } @@ -1144,15 +1145,15 @@ namespace { const InvokeInst* inv = cast<InvokeInst>(I); Out << "std::vector<Value*> " << iName << "_params;"; nl(Out); - for (unsigned i = 3; i < inv->getNumOperands(); ++i) { + for (unsigned i = 0; i < inv->getNumOperands() - 3; ++i) { Out << iName << "_params.push_back(" << opNames[i] << ");"; nl(Out); } Out << "InvokeInst *" << iName << " = InvokeInst::Create(" - << opNames[0] << ", " - << opNames[1] << ", " - << opNames[2] << ", " + << opNames[Ops - 3] << ", " + << opNames[Ops - 2] << ", " + << opNames[Ops - 1] << ", " << iName << "_params.begin(), " << iName << "_params.end(), \""; printEscapedString(inv->getName()); Out << "\", " << bbname << ");"; diff --git a/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt b/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt index cfb2fc8..fac2c19 100644 --- a/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt +++ b/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt @@ -1,9 +1,9 @@ -include_directories(
- ${CMAKE_CURRENT_BINARY_DIR}/..
- ${CMAKE_CURRENT_SOURCE_DIR}/..
- )
-
-add_llvm_library(LLVMMBlazeAsmPrinter
+include_directories( + ${CMAKE_CURRENT_BINARY_DIR}/.. + ${CMAKE_CURRENT_SOURCE_DIR}/.. + ) + +add_llvm_library(LLVMMBlazeAsmPrinter MBlazeAsmPrinter.cpp - )
-add_dependencies(LLVMMBlazeAsmPrinter MBlazeCodeGenTable_gen)
\ No newline at end of file + ) +add_dependencies(LLVMMBlazeAsmPrinter MBlazeCodeGenTable_gen) diff --git a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp index b1df926..04dfb0a 100644 --- a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp +++ b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp @@ -268,7 +268,7 @@ void MBlazeAsmPrinter::printOperand(const MachineInstr *MI, int opNum, void MBlazeAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O) { const MachineOperand &MO = MI->getOperand(opNum); - if (MO.getType() == MachineOperand::MO_Immediate) + if (MO.isImm()) O << (unsigned int)MO.getImm(); else printOperand(MI, opNum, O); @@ -277,7 +277,7 @@ void MBlazeAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum, void MBlazeAsmPrinter::printFSLImm(const MachineInstr *MI, int opNum, raw_ostream &O) { const MachineOperand &MO = MI->getOperand(opNum); - if (MO.getType() == MachineOperand::MO_Immediate) + if (MO.isImm()) O << "rfsl" << (unsigned int)MO.getImm(); else printOperand(MI, opNum, O); diff --git a/lib/Target/MBlaze/CMakeLists.txt b/lib/Target/MBlaze/CMakeLists.txt index c93e3df..7f85bf8 100644 --- a/lib/Target/MBlaze/CMakeLists.txt +++ b/lib/Target/MBlaze/CMakeLists.txt @@ -22,6 +22,7 @@ add_llvm_target(MBlazeCodeGen MBlazeTargetMachine.cpp MBlazeTargetObjectFile.cpp MBlazeIntrinsicInfo.cpp + MBlazeSelectionDAGInfo.cpp ) target_link_libraries (LLVMMBlazeCodeGen LLVMSelectionDAG) diff --git a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp index 7e59c4a..c7cd5f4 100644 --- a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp +++ b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp @@ -13,7 +13,6 @@ #define DEBUG_TYPE "mblaze-isel" #include "MBlaze.h" -#include "MBlazeISelLowering.h" #include "MBlazeMachineFunction.h" #include "MBlazeRegisterInfo.h" #include "MBlazeSubtarget.h" diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp index f0864d0..23889b1 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.cpp +++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp @@ -185,7 +185,8 @@ unsigned MBlazeTargetLowering::getFunctionAlignment(const Function *) const { return 2; } -SDValue MBlazeTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { +SDValue MBlazeTargetLowering::LowerOperation(SDValue Op, + SelectionDAG &DAG) const { switch (Op.getOpcode()) { case ISD::ConstantPool: return LowerConstantPool(Op, DAG); @@ -201,10 +202,9 @@ SDValue MBlazeTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { //===----------------------------------------------------------------------===// // Lower helper functions //===----------------------------------------------------------------------===// -MachineBasicBlock* MBlazeTargetLowering:: -EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, - MachineBasicBlock*> *EM) const { +MachineBasicBlock* +MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); @@ -254,12 +254,9 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB, // Update machine-CFG edges by first adding all successors of the current // block to the new block which will contain the Phi node for the select. - // Also inform sdisel of the edge changes. for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), - e = BB->succ_end(); i != e; ++i) { - EM->insert(std::make_pair(*i, finish)); + e = BB->succ_end(); i != e; ++i) finish->addSuccessor(*i); - } // Next, remove all successors of the current block, and add the true // and fallthrough blocks as its successors. @@ -350,12 +347,9 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB, // Update machine-CFG edges by first adding all successors of the current // block to the new block which will contain the Phi node for the select. - // Also inform sdisel of the edge changes. for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), - e = BB->succ_end(); i != e; ++i) { - EM->insert(std::make_pair(*i, dneBB)); + e = BB->succ_end(); i != e; ++i) dneBB->addSuccessor(*i); - } // Next, remove all successors of the current block, and add the true // and fallthrough blocks as its successors. @@ -387,7 +381,8 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB, //===----------------------------------------------------------------------===// // -SDValue MBlazeTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { +SDValue MBlazeTargetLowering::LowerSELECT_CC(SDValue Op, + SelectionDAG &DAG) const { SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); SDValue TrueVal = Op.getOperand(2); @@ -409,23 +404,23 @@ SDValue MBlazeTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { } SDValue MBlazeTargetLowering:: -LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) { +LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { // FIXME there isn't actually debug info here DebugLoc dl = Op.getDebugLoc(); - GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32); return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, GA); } SDValue MBlazeTargetLowering:: -LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) { +LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable("TLS not implemented for MicroBlaze."); return SDValue(); // Not reached } SDValue MBlazeTargetLowering:: -LowerJumpTable(SDValue Op, SelectionDAG &DAG) { +LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { SDValue ResNode; SDValue HiPart; // FIXME there isn't actually debug info here @@ -442,11 +437,11 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) { } SDValue MBlazeTargetLowering:: -LowerConstantPool(SDValue Op, SelectionDAG &DAG) { +LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { SDValue ResNode; EVT PtrVT = Op.getValueType(); ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); - Constant *C = N->getConstVal(); + const Constant *C = N->getConstVal(); SDValue Zero = DAG.getConstant(0, PtrVT); DebugLoc dl = Op.getDebugLoc(); @@ -455,9 +450,14 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, CP); } -SDValue MBlazeTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { +SDValue MBlazeTargetLowering::LowerVASTART(SDValue Op, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + MBlazeFunctionInfo *FuncInfo = MF.getInfo<MBlazeFunctionInfo>(); + DebugLoc dl = Op.getDebugLoc(); - SDValue FI = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); + SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), + getPointerTy()); // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. @@ -533,7 +533,7 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // MBlaze does not yet support tail call optimization isTailCall = false; @@ -669,7 +669,7 @@ SDValue MBlazeTargetLowering:: LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), @@ -699,13 +699,13 @@ SDValue MBlazeTargetLowering:: LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>(); unsigned StackReg = MF.getTarget().getRegisterInfo()->getFrameRegister(MF); - VarArgsFrameIndex = 0; + MBlazeFI->setVarArgsFrameIndex(0); // Used with vargs to acumulate store chains. std::vector<SDValue> OutChains; @@ -818,8 +818,8 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, // Record the frame index of the first variable argument // which is a value necessary to VASTART. - if (!VarArgsFrameIndex) - VarArgsFrameIndex = FI; + if (!MBlazeFI->getVarArgsFrameIndex()) + MBlazeFI->setVarArgsFrameIndex(FI); } } @@ -841,7 +841,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, SDValue MBlazeTargetLowering:: LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG) { + DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of // the return value to a location SmallVector<CCValAssign, 16> RVLocs; diff --git a/lib/Target/MBlaze/MBlazeISelLowering.h b/lib/Target/MBlaze/MBlazeISelLowering.h index f8b1470..9f9ac89 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.h +++ b/lib/Target/MBlaze/MBlazeISelLowering.h @@ -63,14 +63,11 @@ namespace llvm { //===--------------------------------------------------------------------===// class MBlazeTargetLowering : public TargetLowering { - int VarArgsFrameIndex; // FrameIndex for start of varargs area. - public: - explicit MBlazeTargetLowering(MBlazeTargetMachine &TM); /// LowerOperation - Provide custom lowering hooks for some operations. - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; /// getTargetNodeName - This method returns the name of a target specific // DAG node. @@ -90,22 +87,22 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; // Lower Operand specifics - SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG); - SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG); - SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG); - SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG); - SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG); - SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG); + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerCall(SDValue Chain, SDValue Callee, @@ -114,17 +111,17 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG); + DebugLoc dl, SelectionDAG &DAG) const; - virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const; // Inline asm support ConstraintType getConstraintType(const std::string &Constraint) const; diff --git a/lib/Target/MBlaze/MBlazeMachineFunction.h b/lib/Target/MBlaze/MBlazeMachineFunction.h index 08d4dca..1f956c1 100644 --- a/lib/Target/MBlaze/MBlazeMachineFunction.h +++ b/lib/Target/MBlaze/MBlazeMachineFunction.h @@ -79,11 +79,14 @@ private: /// relocation models. unsigned GlobalBaseReg; + // VarArgsFrameIndex - FrameIndex for start of varargs area. + int VarArgsFrameIndex; + public: MBlazeFunctionInfo(MachineFunction& MF) : FPStackOffset(0), RAStackOffset(0), CPUTopSavedRegOff(0), GPHolder(-1,-1), HasLoadArgs(false), HasStoreVarArgs(false), - SRetReturnReg(0), GlobalBaseReg(0) + SRetReturnReg(0), GlobalBaseReg(0), VarArgsFrameIndex(0) {} int getFPStackOffset() const { return FPStackOffset; } @@ -129,6 +132,9 @@ public: unsigned getGlobalBaseReg() const { return GlobalBaseReg; } void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; } + + int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } + void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; } }; } // end of namespace llvm diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp index a12310a..e15176e 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp @@ -243,7 +243,7 @@ void MBlazeRegisterInfo::adjustMBlazeStackFrame(MachineFunction &MF) const { // if frame pointer elimination is disabled. bool MBlazeRegisterInfo::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - return NoFramePointerElim || MFI->hasVarSizedObjects(); + return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects(); } // This function eliminate ADJCALLSTACKDOWN, diff --git a/lib/Target/MBlaze/MBlazeSchedule.td b/lib/Target/MBlaze/MBlazeSchedule.td index 6a94491..1fec9e6 100644 --- a/lib/Target/MBlaze/MBlazeSchedule.td +++ b/lib/Target/MBlaze/MBlazeSchedule.td @@ -40,7 +40,8 @@ def IIPseudo : InstrItinClass; //===----------------------------------------------------------------------===// // MBlaze Generic instruction itineraries. //===----------------------------------------------------------------------===// -def MBlazeGenericItineraries : ProcessorItineraries<[ +def MBlazeGenericItineraries : ProcessorItineraries< + [ALU, IMULDIV], [ InstrItinData<IIAlu , [InstrStage<1, [ALU]>]>, InstrItinData<IILoad , [InstrStage<3, [ALU]>]>, InstrItinData<IIStore , [InstrStage<1, [ALU]>]>, diff --git a/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp b/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp new file mode 100644 index 0000000..105e42a --- /dev/null +++ b/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp @@ -0,0 +1,22 @@ +//===-- MBlazeSelectionDAGInfo.cpp - MBlaze SelectionDAG Info -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the MBlazeSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mblaze-selectiondag-info" +#include "MBlazeSelectionDAGInfo.h" +using namespace llvm; + +MBlazeSelectionDAGInfo::MBlazeSelectionDAGInfo() { +} + +MBlazeSelectionDAGInfo::~MBlazeSelectionDAGInfo() { +} diff --git a/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h b/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h new file mode 100644 index 0000000..11e6879 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h @@ -0,0 +1,29 @@ +//===-- MBlazeSelectionDAGInfo.h - MBlaze SelectionDAG Info -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the MBlaze subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef MBLAZESELECTIONDAGINFO_H +#define MBLAZESELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class MBlazeSelectionDAGInfo : public TargetSelectionDAGInfo { +public: + MBlazeSelectionDAGInfo(); + ~MBlazeSelectionDAGInfo(); +}; + +} + +#endif diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h index 85c975c..9bf9898 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.h +++ b/lib/Target/MBlaze/MBlazeTargetMachine.h @@ -51,8 +51,8 @@ namespace llvm { virtual const MBlazeRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); } - virtual MBlazeTargetLowering *getTargetLowering() const - { return const_cast<MBlazeTargetLowering*>(&TLInfo); } + virtual const MBlazeTargetLowering *getTargetLowering() const + { return &TLInfo; } const TargetIntrinsicInfo *getIntrinsicInfo() const { return &IntrinsicInfo; } diff --git a/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp b/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp index 79c9494..05c01ef 100644 --- a/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp +++ b/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp @@ -11,6 +11,7 @@ #include "MBlazeSubtarget.h" #include "llvm/DerivedTypes.h" #include "llvm/GlobalVariable.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" @@ -22,14 +23,14 @@ Initialize(MCContext &Ctx, const TargetMachine &TM) { TargetLoweringObjectFileELF::Initialize(Ctx, TM); SmallDataSection = - getELFSection(".sdata", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, - SectionKind::getDataRel()); + getContext().getELFSection(".sdata", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC, + SectionKind::getDataRel()); SmallBSSSection = - getELFSection(".sbss", MCSectionELF::SHT_NOBITS, - MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, - SectionKind::getBSS()); + getContext().getELFSection(".sbss", MCSectionELF::SHT_NOBITS, + MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC, + SectionKind::getBSS()); } diff --git a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp index f4d7d8a..d1d9a11 100644 --- a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp +++ b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp @@ -78,6 +78,16 @@ GetConstantPoolIndexSymbol(const MachineOperand &MO) const { return Ctx.GetOrCreateSymbol(Name.str()); } +MCSymbol *MSP430MCInstLower:: +GetBlockAddressSymbol(const MachineOperand &MO) const { + switch (MO.getTargetFlags()) { + default: assert(0 && "Unknown target flag on GV operand"); + case 0: break; + } + + return Printer.GetBlockAddressSymbol(MO.getBlockAddress()); +} + MCOperand MSP430MCInstLower:: LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const { // FIXME: We would like an efficient form for this, so we don't have to do a @@ -131,6 +141,8 @@ void MSP430MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { case MachineOperand::MO_ConstantPoolIndex: MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO)); break; + case MachineOperand::MO_BlockAddress: + MCOp = LowerSymbolOperand(MO, GetBlockAddressSymbol(MO)); } OutMI.addOperand(MCOp); diff --git a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h index a2b99ae..f9620e8 100644 --- a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h +++ b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h @@ -42,6 +42,7 @@ public: MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const; MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const; MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const; + MCSymbol *GetBlockAddressSymbol(const MachineOperand &MO) const; }; } diff --git a/lib/Target/MSP430/CMakeLists.txt b/lib/Target/MSP430/CMakeLists.txt index 29abe46..a3f60d2 100644 --- a/lib/Target/MSP430/CMakeLists.txt +++ b/lib/Target/MSP430/CMakeLists.txt @@ -19,6 +19,7 @@ add_llvm_target(MSP430CodeGen MSP430RegisterInfo.cpp MSP430Subtarget.cpp MSP430TargetMachine.cpp + MSP430SelectionDAGInfo.cpp ) target_link_libraries (LLVMMSP430CodeGen LLVMSelectionDAG) diff --git a/lib/Target/MSP430/MSP430BranchSelector.cpp b/lib/Target/MSP430/MSP430BranchSelector.cpp index 836e425..68cb342 100644 --- a/lib/Target/MSP430/MSP430BranchSelector.cpp +++ b/lib/Target/MSP430/MSP430BranchSelector.cpp @@ -157,7 +157,7 @@ bool MSP430BSel::runOnMachineFunction(MachineFunction &Fn) { NewSize = 6; } // Uncond branch to the real destination. - I = BuildMI(MBB, I, dl, TII->get(MSP430::B)).addMBB(Dest); + I = BuildMI(MBB, I, dl, TII->get(MSP430::Bi)).addMBB(Dest); // Remove the old branch from the function. OldBranch->eraseFromParent(); diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp index 911cfcb..7b328bb 100644 --- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp +++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "MSP430.h" -#include "MSP430ISelLowering.h" #include "MSP430TargetMachine.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" @@ -45,9 +44,9 @@ namespace { } Base; int16_t Disp; - GlobalValue *GV; - Constant *CP; - BlockAddress *BlockAddr; + const GlobalValue *GV; + const Constant *CP; + const BlockAddress *BlockAddr; const char *ES; int JT; unsigned Align; // CP alignment. @@ -100,7 +99,7 @@ namespace { /// namespace { class MSP430DAGToDAGISel : public SelectionDAGISel { - MSP430TargetLowering &Lowering; + const MSP430TargetLowering &Lowering; const MSP430Subtarget &Subtarget; public: @@ -364,7 +363,7 @@ SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDNode *Op, unsigned Opc8, unsigned Opc16) { if (N1.getOpcode() == ISD::LOAD && N1.hasOneUse() && - IsLegalToFold(N1, Op, Op)) { + IsLegalToFold(N1, Op, Op, OptLevel)) { LoadSDNode *LD = cast<LoadSDNode>(N1); if (!isValidIndexedLoad(LD)) return NULL; diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index e6c7e1e..c3e2bdf7 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -110,8 +110,8 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) : setOperationAction(ISD::ROTR, MVT::i16, Expand); setOperationAction(ISD::GlobalAddress, MVT::i16, Custom); setOperationAction(ISD::ExternalSymbol, MVT::i16, Custom); + setOperationAction(ISD::BlockAddress, MVT::i16, Custom); setOperationAction(ISD::BR_JT, MVT::Other, Expand); - setOperationAction(ISD::BRIND, MVT::Other, Expand); setOperationAction(ISD::BR_CC, MVT::i8, Custom); setOperationAction(ISD::BR_CC, MVT::i16, Custom); setOperationAction(ISD::BRCOND, MVT::Other, Expand); @@ -176,12 +176,14 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) : } } -SDValue MSP430TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { +SDValue MSP430TargetLowering::LowerOperation(SDValue Op, + SelectionDAG &DAG) const { switch (Op.getOpcode()) { case ISD::SHL: // FALLTHROUGH case ISD::SRL: case ISD::SRA: return LowerShifts(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); + case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::BR_CC: return LowerBR_CC(Op, DAG); @@ -252,7 +254,8 @@ MSP430TargetLowering::LowerFormalArguments(SDValue Chain, &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) + const { switch (CallConv) { default: @@ -264,7 +267,7 @@ MSP430TargetLowering::LowerFormalArguments(SDValue Chain, if (Ins.empty()) return Chain; else { - llvm_report_error("ISRs cannot have arguments"); + report_fatal_error("ISRs cannot have arguments"); return SDValue(); } } @@ -277,7 +280,7 @@ MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // MSP430 target does not yet support tail call optimization. isTailCall = false; @@ -289,7 +292,7 @@ MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee, return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall, Outs, Ins, dl, DAG, InVals); case CallingConv::MSP430_INTR: - llvm_report_error("ISRs cannot be called directly"); + report_fatal_error("ISRs cannot be called directly"); return SDValue(); } } @@ -306,7 +309,8 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain, &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) + const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); @@ -383,14 +387,14 @@ SDValue MSP430TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG) { + DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to a location SmallVector<CCValAssign, 16> RVLocs; // ISRs cannot return any value. if (CallConv == CallingConv::MSP430_INTR && !Outs.empty()) { - llvm_report_error("ISRs cannot return any value"); + report_fatal_error("ISRs cannot return any value"); return SDValue(); } @@ -445,7 +449,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), @@ -568,7 +572,7 @@ MSP430TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; @@ -589,7 +593,7 @@ MSP430TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, } SDValue MSP430TargetLowering::LowerShifts(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { unsigned Opc = Op.getOpcode(); SDNode* N = Op.getNode(); EVT VT = Op.getValueType(); @@ -632,7 +636,8 @@ SDValue MSP430TargetLowering::LowerShifts(SDValue Op, return Victim; } -SDValue MSP430TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) { +SDValue MSP430TargetLowering::LowerGlobalAddress(SDValue Op, + SelectionDAG &DAG) const { const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset(); @@ -643,7 +648,7 @@ SDValue MSP430TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) } SDValue MSP430TargetLowering::LowerExternalSymbol(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy()); @@ -651,6 +656,15 @@ SDValue MSP430TargetLowering::LowerExternalSymbol(SDValue Op, return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result);; } +SDValue MSP430TargetLowering::LowerBlockAddress(SDValue Op, + SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); + SDValue Result = DAG.getBlockAddress(BA, getPointerTy(), /*isTarget=*/true); + + return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result);; +} + static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC, ISD::CondCode CC, DebugLoc dl, SelectionDAG &DAG) { @@ -734,7 +748,7 @@ static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC, } -SDValue MSP430TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) { +SDValue MSP430TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); SDValue LHS = Op.getOperand(2); @@ -749,8 +763,7 @@ SDValue MSP430TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) { Chain, Dest, TargetCC, Flag); } - -SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { +SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); DebugLoc dl = Op.getDebugLoc(); @@ -830,7 +843,8 @@ SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { } } -SDValue MSP430TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { +SDValue MSP430TargetLowering::LowerSELECT_CC(SDValue Op, + SelectionDAG &DAG) const { SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); SDValue TrueV = Op.getOperand(2); @@ -852,7 +866,7 @@ SDValue MSP430TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { } SDValue MSP430TargetLowering::LowerSIGN_EXTEND(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { SDValue Val = Op.getOperand(0); EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); @@ -864,7 +878,8 @@ SDValue MSP430TargetLowering::LowerSIGN_EXTEND(SDValue Op, DAG.getValueType(Val.getValueType())); } -SDValue MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { +SDValue +MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MSP430MachineFunctionInfo *FuncInfo = MF.getInfo<MSP430MachineFunctionInfo>(); int ReturnAddrIndex = FuncInfo->getRAIndex(); @@ -880,7 +895,8 @@ SDValue MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy()); } -SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { +SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op, + SelectionDAG &DAG) const { unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); DebugLoc dl = Op.getDebugLoc(); @@ -900,7 +916,8 @@ SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { RetAddrFI, NULL, 0, false, false, 0); } -SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { +SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op, + SelectionDAG &DAG) const { MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); MFI->setFrameAddressIsTaken(true); EVT VT = Op.getValueType(); @@ -999,8 +1016,7 @@ bool MSP430TargetLowering::isZExtFree(EVT VT1, EVT VT2) const { MachineBasicBlock* MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { + MachineBasicBlock *BB) const { MachineFunction *F = BB->getParent(); MachineRegisterInfo &RI = F->getRegInfo(); DebugLoc dl = MI->getDebugLoc(); @@ -1052,11 +1068,6 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI, // block to the block containing instructions after shift. RemBB->transferSuccessors(BB); - // Inform sdisel of the edge changes. - for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), - SE = BB->succ_end(); SI != SE; ++SI) - EM->insert(std::make_pair(*SI, RemBB)); - // Add adges BB => LoopBB => RemBB, BB => RemBB, LoopBB => LoopBB BB->addSuccessor(LoopBB); BB->addSuccessor(RemBB); @@ -1111,14 +1122,13 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI, MachineBasicBlock* MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { + MachineBasicBlock *BB) const { unsigned Opc = MI->getOpcode(); if (Opc == MSP430::Shl8 || Opc == MSP430::Shl16 || Opc == MSP430::Sra8 || Opc == MSP430::Sra16 || Opc == MSP430::Srl8 || Opc == MSP430::Srl16) - return EmitShiftInstr(MI, BB, EM); + return EmitShiftInstr(MI, BB); const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); @@ -1149,10 +1159,6 @@ MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, .addImm(MI->getOperand(3).getImm()); F->insert(I, copy0MBB); F->insert(I, copy1MBB); - // Inform sdisel of the edge changes. - for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), - SE = BB->succ_end(); SI != SE; ++SI) - EM->insert(std::make_pair(*SI, copy1MBB)); // Update machine-CFG edges by transferring all successors of the current // block to the new block which will contain the Phi node for the select. copy1MBB->transferSuccessors(BB); diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h index 87a790b..01c5071 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.h +++ b/lib/Target/MSP430/MSP430ISelLowering.h @@ -74,7 +74,7 @@ namespace llvm { explicit MSP430TargetLowering(MSP430TargetMachine &TM); /// LowerOperation - Provide custom lowering hooks for some operations. - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; /// getTargetNodeName - This method returns the name of a target specific /// DAG node. @@ -83,16 +83,17 @@ namespace llvm { /// getFunctionAlignment - Return the Log2 alignment of this function. virtual unsigned getFunctionAlignment(const Function *F) const; - SDValue LowerShifts(SDValue Op, SelectionDAG &DAG); - SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG); - SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG); - SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG); - SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG); - SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG); - SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG); - SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG); - SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG); - SDValue getReturnAddressFrameIndex(SelectionDAG &DAG); + SDValue LowerShifts(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const; TargetLowering::ConstraintType getConstraintType(const std::string &Constraint) const; @@ -117,11 +118,9 @@ namespace llvm { virtual bool isZExtFree(EVT VT1, EVT VT2) const; MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; + MachineBasicBlock *BB) const; MachineBasicBlock* EmitShiftInstr(MachineInstr *MI, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; + MachineBasicBlock *BB) const; private: SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee, @@ -130,7 +129,7 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; SDValue LowerCCCArguments(SDValue Chain, CallingConv::ID CallConv, @@ -138,33 +137,33 @@ namespace llvm { const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG); + DebugLoc dl, SelectionDAG &DAG) const; virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp index 0381904..2b09b3d 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -176,7 +176,9 @@ unsigned MSP430InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { if (I->isDebugValue()) continue; if (I->getOpcode() != MSP430::JMP && - I->getOpcode() != MSP430::JCC) + I->getOpcode() != MSP430::JCC && + I->getOpcode() != MSP430::Br && + I->getOpcode() != MSP430::Bm) break; // Remove the branch. I->eraseFromParent(); @@ -256,6 +258,11 @@ bool MSP430InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, if (!I->getDesc().isBranch()) return true; + // Cannot handle indirect branches. + if (I->getOpcode() == MSP430::Br || + I->getOpcode() == MSP430::Bm) + return true; + // Handle unconditional branches. if (I->getOpcode() == MSP430::JMP) { if (!AllowModify) { @@ -365,6 +372,7 @@ unsigned MSP430InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case TargetOpcode::EH_LABEL: case TargetOpcode::IMPLICIT_DEF: case TargetOpcode::KILL: + case TargetOpcode::DBG_VALUE: return 0; case TargetOpcode::INLINEASM: { const MachineFunction *MF = MI->getParent()->getParent(); diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td index 144ba26..6b9a2f2 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.td +++ b/lib/Target/MSP430/MSP430InstrInfo.td @@ -76,8 +76,8 @@ def memdst : Operand<i16> { let MIOperandInfo = (ops GR16, i16imm); } -// Branch targets have OtherVT type. -def brtarget : Operand<OtherVT> { +// Short jump targets have OtherVT type and are printed as pcrel imm values. +def jmptarget : Operand<OtherVT> { let PrintMethod = "printPCRelImmOperand"; } @@ -169,21 +169,27 @@ let isBranch = 1, isTerminator = 1 in { // Direct branch let isBarrier = 1 in { // Short branch - def JMP : CJForm<0, 0, - (outs), (ins brtarget:$dst), + def JMP : CJForm<0, 0, (outs), (ins jmptarget:$dst), "jmp\t$dst", [(br bb:$dst)]>; - // Long branch - def B : I16ri<0, - (outs), (ins brtarget:$dst), - "br\t$dst", - []>; + let isIndirectBranch = 1 in { + // Long branches + def Bi : I16ri<0, (outs), (ins i16imm:$brdst), + "br\t$brdst", + [(brind tblockaddress:$brdst)]>; + def Br : I16rr<0, (outs), (ins GR16:$brdst), + "mov.w\t{$brdst, pc}", + [(brind GR16:$brdst)]>; + def Bm : I16rm<0, (outs), (ins memsrc:$brdst), + "mov.w\t{$brdst, pc}", + [(brind (load addr:$brdst))]>; + } } // Conditional branches let Uses = [SRW] in def JCC : CJForm<0, 0, - (outs), (ins brtarget:$dst, cc:$cc), + (outs), (ins jmptarget:$dst, cc:$cc), "j$cc\t$dst", [(MSP430brcc bb:$dst, imm:$cc)]>; } // isBranch, isTerminator @@ -1126,16 +1132,21 @@ def : Pat<(i8 (trunc GR16:$src)), // GlobalAddress, ExternalSymbol def : Pat<(i16 (MSP430Wrapper tglobaladdr:$dst)), (MOV16ri tglobaladdr:$dst)>; def : Pat<(i16 (MSP430Wrapper texternalsym:$dst)), (MOV16ri texternalsym:$dst)>; +def : Pat<(i16 (MSP430Wrapper tblockaddress:$dst)), (MOV16ri tblockaddress:$dst)>; def : Pat<(add GR16:$src1, (MSP430Wrapper tglobaladdr :$src2)), (ADD16ri GR16:$src1, tglobaladdr:$src2)>; def : Pat<(add GR16:$src1, (MSP430Wrapper texternalsym:$src2)), (ADD16ri GR16:$src1, texternalsym:$src2)>; +def : Pat<(add GR16:$src1, (MSP430Wrapper tblockaddress:$src2)), + (ADD16ri GR16:$src1, tblockaddress:$src2)>; def : Pat<(store (i16 (MSP430Wrapper tglobaladdr:$src)), addr:$dst), (MOV16mi addr:$dst, tglobaladdr:$src)>; def : Pat<(store (i16 (MSP430Wrapper texternalsym:$src)), addr:$dst), (MOV16mi addr:$dst, texternalsym:$src)>; +def : Pat<(store (i16 (MSP430Wrapper tblockaddress:$src)), addr:$dst), + (MOV16mi addr:$dst, tblockaddress:$src)>; // calls def : Pat<(MSP430call (i16 tglobaladdr:$dst)), diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp index d91783a..0cae267 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -138,7 +138,7 @@ MSP430RegisterInfo::getPointerRegClass(unsigned Kind) const { bool MSP430RegisterInfo::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - return (NoFramePointerElim || + return (DisableFramePointerElim(MF) || MF.getFrameInfo()->hasVarSizedObjects() || MFI->isFrameAddressTaken()); } diff --git a/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp b/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp new file mode 100644 index 0000000..a54c929 --- /dev/null +++ b/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp @@ -0,0 +1,22 @@ +//===-- MSP430SelectionDAGInfo.cpp - MSP430 SelectionDAG Info -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the MSP430SelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "msp430-selectiondag-info" +#include "MSP430SelectionDAGInfo.h" +using namespace llvm; + +MSP430SelectionDAGInfo::MSP430SelectionDAGInfo() { +} + +MSP430SelectionDAGInfo::~MSP430SelectionDAGInfo() { +} diff --git a/lib/Target/MSP430/MSP430SelectionDAGInfo.h b/lib/Target/MSP430/MSP430SelectionDAGInfo.h new file mode 100644 index 0000000..c952ab7 --- /dev/null +++ b/lib/Target/MSP430/MSP430SelectionDAGInfo.h @@ -0,0 +1,29 @@ +//===-- MSP430SelectionDAGInfo.h - MSP430 SelectionDAG Info -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the MSP430 subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef MSP430SELECTIONDAGINFO_H +#define MSP430SELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class MSP430SelectionDAGInfo : public TargetSelectionDAGInfo { +public: + MSP430SelectionDAGInfo(); + ~MSP430SelectionDAGInfo(); +}; + +} + +#endif diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h index d93ac5c..68bde9a 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.h +++ b/lib/Target/MSP430/MSP430TargetMachine.h @@ -50,8 +50,8 @@ public: return &InstrInfo.getRegisterInfo(); } - virtual MSP430TargetLowering *getTargetLowering() const { - return const_cast<MSP430TargetLowering*>(&TLInfo); + virtual const MSP430TargetLowering *getTargetLowering() const { + return &TLInfo; } virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp index 1d5c511..4ef017a 100644 --- a/lib/Target/Mangler.cpp +++ b/lib/Target/Mangler.cpp @@ -22,11 +22,12 @@ #include "llvm/ADT/Twine.h" using namespace llvm; -static bool isAcceptableChar(char C) { +static bool isAcceptableChar(char C, bool AllowPeriod) { if ((C < 'a' || C > 'z') && (C < 'A' || C > 'Z') && (C < '0' || C > '9') && - C != '_' && C != '$' && C != '.' && C != '@') + C != '_' && C != '$' && C != '@' && + !(AllowPeriod && C == '.')) return false; return true; } @@ -54,8 +55,9 @@ static bool NameNeedsEscaping(StringRef Str, const MCAsmInfo &MAI) { // If any of the characters in the string is an unacceptable character, force // quotes. + bool AllowPeriod = MAI.doesAllowPeriodsInName(); for (unsigned i = 0, e = Str.size(); i != e; ++i) - if (!isAcceptableChar(Str[i])) + if (!isAcceptableChar(Str[i], AllowPeriod)) return true; return false; } @@ -70,9 +72,10 @@ static void appendMangledName(SmallVectorImpl<char> &OutName, StringRef Str, MangleLetter(OutName, Str[0]); Str = Str.substr(1); } - + + bool AllowPeriod = MAI.doesAllowPeriodsInName(); for (unsigned i = 0, e = Str.size(); i != e; ++i) { - if (!isAcceptableChar(Str[i])) + if (!isAcceptableChar(Str[i], AllowPeriod)) MangleLetter(OutName, Str[i]); else OutName.push_back(Str[i]); diff --git a/lib/Target/Mips/AsmPrinter/CMakeLists.txt b/lib/Target/Mips/AsmPrinter/CMakeLists.txt index 56c68a6..d3099d2 100644 --- a/lib/Target/Mips/AsmPrinter/CMakeLists.txt +++ b/lib/Target/Mips/AsmPrinter/CMakeLists.txt @@ -1,9 +1,9 @@ -include_directories(
- ${CMAKE_CURRENT_BINARY_DIR}/..
- ${CMAKE_CURRENT_SOURCE_DIR}/..
- )
-
-add_llvm_library(LLVMMipsAsmPrinter
+include_directories( + ${CMAKE_CURRENT_BINARY_DIR}/.. + ${CMAKE_CURRENT_SOURCE_DIR}/.. + ) + +add_llvm_library(LLVMMipsAsmPrinter MipsAsmPrinter.cpp - )
-add_dependencies(LLVMMipsAsmPrinter MipsCodeGenTable_gen)
\ No newline at end of file + ) +add_dependencies(LLVMMipsAsmPrinter MipsCodeGenTable_gen) diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp index 6974371..d269153 100644 --- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp @@ -306,7 +306,7 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum, void MipsAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O) { const MachineOperand &MO = MI->getOperand(opNum); - if (MO.getType() == MachineOperand::MO_Immediate) + if (MO.isImm()) O << (unsigned short int)MO.getImm(); else printOperand(MI, opNum, O); diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index 0e3bf5a..a77802a 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -20,6 +20,7 @@ add_llvm_target(MipsCodeGen MipsSubtarget.cpp MipsTargetMachine.cpp MipsTargetObjectFile.cpp + MipsSelectionDAGInfo.cpp ) target_link_libraries (LLVMMipsCodeGen LLVMSelectionDAG) diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index c4746db..ee85a3f3 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -13,7 +13,6 @@ #define DEBUG_TYPE "mips-isel" #include "Mips.h" -#include "MipsISelLowering.h" #include "MipsMachineFunction.h" #include "MipsRegisterInfo.h" #include "MipsSubtarget.h" diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 584b887..e979c3f 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -166,7 +166,7 @@ unsigned MipsTargetLowering::getFunctionAlignment(const Function *) const { } SDValue MipsTargetLowering:: -LowerOperation(SDValue Op, SelectionDAG &DAG) +LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { @@ -252,8 +252,7 @@ static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) { MachineBasicBlock * MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { + MachineBasicBlock *BB) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); bool isFPCmp = false; DebugLoc dl = MI->getDebugLoc(); @@ -301,12 +300,9 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, F->insert(It, sinkMBB); // Update machine-CFG edges by first adding all successors of the current // block to the new block which will contain the Phi node for the select. - // Also inform sdisel of the edge changes. for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), - e = BB->succ_end(); i != e; ++i) { - EM->insert(std::make_pair(*i, sinkMBB)); + e = BB->succ_end(); i != e; ++i) sinkMBB->addSuccessor(*i); - } // Next, remove all successors of the current block, and add the true // and fallthrough blocks as its successors. while(!BB->succ_empty()) @@ -341,7 +337,7 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, //===----------------------------------------------------------------------===// SDValue MipsTargetLowering:: -LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) +LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const { if (!Subtarget->isMips1()) return Op; @@ -374,7 +370,7 @@ LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) } SDValue MipsTargetLowering:: -LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) +LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); @@ -398,7 +394,7 @@ LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) } SDValue MipsTargetLowering:: -LowerANDOR(SDValue Op, SelectionDAG &DAG) +LowerANDOR(SDValue Op, SelectionDAG &DAG) const { SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); @@ -419,7 +415,7 @@ LowerANDOR(SDValue Op, SelectionDAG &DAG) } SDValue MipsTargetLowering:: -LowerBRCOND(SDValue Op, SelectionDAG &DAG) +LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { // The first operand is the chain, the second is the condition, the third is // the block to branch to if the condition is true. @@ -441,7 +437,7 @@ LowerBRCOND(SDValue Op, SelectionDAG &DAG) } SDValue MipsTargetLowering:: -LowerSETCC(SDValue Op, SelectionDAG &DAG) +LowerSETCC(SDValue Op, SelectionDAG &DAG) const { // The operands to this are the left and right operands to compare (ops #0, // and #1) and the condition code to compare them with (op #2) as a @@ -457,7 +453,7 @@ LowerSETCC(SDValue Op, SelectionDAG &DAG) } SDValue MipsTargetLowering:: -LowerSELECT(SDValue Op, SelectionDAG &DAG) +LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cond = Op.getOperand(0); SDValue True = Op.getOperand(1); @@ -481,10 +477,11 @@ LowerSELECT(SDValue Op, SelectionDAG &DAG) Cond, True, False, CCNode); } -SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) { +SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, + SelectionDAG &DAG) const { // FIXME there isn't actually debug info here DebugLoc dl = Op.getDebugLoc(); - GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); if (getTargetMachine().getRelocationModel() != Reloc::PIC_) { SDVTList VTs = DAG.getVTList(MVT::i32); @@ -525,14 +522,14 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) { } SDValue MipsTargetLowering:: -LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) +LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable("TLS not implemented for MIPS."); return SDValue(); // Not reached } SDValue MipsTargetLowering:: -LowerJumpTable(SDValue Op, SelectionDAG &DAG) +LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { SDValue ResNode; SDValue HiPart; @@ -560,11 +557,11 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) } SDValue MipsTargetLowering:: -LowerConstantPool(SDValue Op, SelectionDAG &DAG) +LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { SDValue ResNode; ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); - Constant *C = N->getConstVal(); + const Constant *C = N->getConstVal(); // FIXME there isn't actually debug info here DebugLoc dl = Op.getDebugLoc(); @@ -596,9 +593,13 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) return ResNode; } -SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { +SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + MipsFunctionInfo *FuncInfo = MF.getInfo<MipsFunctionInfo>(); + DebugLoc dl = Op.getDebugLoc(); - SDValue FI = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); + SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), + getPointerTy()); // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. @@ -769,7 +770,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // MIPs target does not yet support tail call optimization. isTailCall = false; @@ -963,7 +964,7 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; @@ -995,14 +996,15 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) + const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); unsigned StackReg = MF.getTarget().getRegisterInfo()->getFrameRegister(MF); - VarArgsFrameIndex = 0; + MipsFI->setVarArgsFrameIndex(0); // Used with vargs to acumulate store chains. std::vector<SDValue> OutChains; @@ -1143,8 +1145,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, // Record the frame index of the first variable argument // which is a value necessary to VASTART. - if (!VarArgsFrameIndex) - VarArgsFrameIndex = FI; + if (!MipsFI->getVarArgsFrameIndex()) + MipsFI->setVarArgsFrameIndex(FI); } } @@ -1167,7 +1169,7 @@ SDValue MipsTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG) { + DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of // the return value to a location diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 7256617..f2de489 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -68,14 +68,11 @@ namespace llvm { //===--------------------------------------------------------------------===// class MipsTargetLowering : public TargetLowering { - int VarArgsFrameIndex; // FrameIndex for start of varargs area. - public: - explicit MipsTargetLowering(MipsTargetMachine &TM); /// LowerOperation - Provide custom lowering hooks for some operations. - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; /// getTargetNodeName - This method returns the name of a target specific // DAG node. @@ -96,27 +93,27 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; // Lower Operand specifics - SDValue LowerANDOR(SDValue Op, SelectionDAG &DAG); - SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG); - SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG); - SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG); - SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG); - SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG); - SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG); - SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG); - SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG); - SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG); - SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG); + SDValue LowerANDOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerCall(SDValue Chain, SDValue Callee, @@ -125,17 +122,17 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG); + DebugLoc dl, SelectionDAG &DAG) const; - virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const; // Inline asm support ConstraintType getConstraintType(const std::string &Constraint) const; diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h index a300f49..5723f9e 100644 --- a/lib/Target/Mips/MipsMachineFunction.h +++ b/lib/Target/Mips/MipsMachineFunction.h @@ -80,11 +80,15 @@ private: /// relocation models. unsigned GlobalBaseReg; + /// VarArgsFrameIndex - FrameIndex for start of varargs area. + int VarArgsFrameIndex; + public: MipsFunctionInfo(MachineFunction& MF) : FPStackOffset(0), RAStackOffset(0), CPUTopSavedRegOff(0), FPUTopSavedRegOff(0), GPHolder(-1,-1), HasLoadArgs(false), - HasStoreVarArgs(false), SRetReturnReg(0), GlobalBaseReg(0) + HasStoreVarArgs(false), SRetReturnReg(0), GlobalBaseReg(0), + VarArgsFrameIndex(0) {} int getFPStackOffset() const { return FPStackOffset; } @@ -133,6 +137,9 @@ public: unsigned getGlobalBaseReg() const { return GlobalBaseReg; } void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; } + + int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } + void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; } }; } // end of namespace llvm diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index f43e69b..478da84 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -338,7 +338,7 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const bool MipsRegisterInfo:: hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - return NoFramePointerElim || MFI->hasVarSizedObjects(); + return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects(); } // This function eliminate ADJCALLSTACKDOWN, diff --git a/lib/Target/Mips/MipsSchedule.td b/lib/Target/Mips/MipsSchedule.td index 0c3ca573..616a79b 100644 --- a/lib/Target/Mips/MipsSchedule.td +++ b/lib/Target/Mips/MipsSchedule.td @@ -40,7 +40,7 @@ def IIPseudo : InstrItinClass; //===----------------------------------------------------------------------===// // Mips Generic instruction itineraries. //===----------------------------------------------------------------------===// -def MipsGenericItineraries : ProcessorItineraries<[ +def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [ InstrItinData<IIAlu , [InstrStage<1, [ALU]>]>, InstrItinData<IILoad , [InstrStage<3, [ALU]>]>, InstrItinData<IIStore , [InstrStage<1, [ALU]>]>, diff --git a/lib/Target/Mips/MipsSelectionDAGInfo.cpp b/lib/Target/Mips/MipsSelectionDAGInfo.cpp new file mode 100644 index 0000000..72c149d --- /dev/null +++ b/lib/Target/Mips/MipsSelectionDAGInfo.cpp @@ -0,0 +1,22 @@ +//===-- MipsSelectionDAGInfo.cpp - Mips SelectionDAG Info -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the MipsSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mips-selectiondag-info" +#include "MipsSelectionDAGInfo.h" +using namespace llvm; + +MipsSelectionDAGInfo::MipsSelectionDAGInfo() { +} + +MipsSelectionDAGInfo::~MipsSelectionDAGInfo() { +} diff --git a/lib/Target/Mips/MipsSelectionDAGInfo.h b/lib/Target/Mips/MipsSelectionDAGInfo.h new file mode 100644 index 0000000..6eaf0c9 --- /dev/null +++ b/lib/Target/Mips/MipsSelectionDAGInfo.h @@ -0,0 +1,29 @@ +//===-- MipsSelectionDAGInfo.h - Mips SelectionDAG Info ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Mips subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSSELECTIONDAGINFO_H +#define MIPSSELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class MipsSelectionDAGInfo : public TargetSelectionDAGInfo { +public: + MipsSelectionDAGInfo(); + ~MipsSelectionDAGInfo(); +}; + +} + +#endif diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index c3428be..cd671cf 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -47,8 +47,8 @@ namespace llvm { return &InstrInfo.getRegisterInfo(); } - virtual MipsTargetLowering *getTargetLowering() const { - return const_cast<MipsTargetLowering*>(&TLInfo); + virtual const MipsTargetLowering *getTargetLowering() const { + return &TLInfo; } // Pass Pipeline Configuration diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp index 0fb423d..405f419 100644 --- a/lib/Target/Mips/MipsTargetObjectFile.cpp +++ b/lib/Target/Mips/MipsTargetObjectFile.cpp @@ -11,6 +11,7 @@ #include "MipsSubtarget.h" #include "llvm/DerivedTypes.h" #include "llvm/GlobalVariable.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" @@ -26,14 +27,14 @@ void MipsTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){ TargetLoweringObjectFileELF::Initialize(Ctx, TM); SmallDataSection = - getELFSection(".sdata", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, - SectionKind::getDataRel()); + getContext().getELFSection(".sdata", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC, + SectionKind::getDataRel()); SmallBSSSection = - getELFSection(".sbss", MCSectionELF::SHT_NOBITS, - MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, - SectionKind::getBSS()); + getContext().getELFSection(".sbss", MCSectionELF::SHT_NOBITS, + MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC, + SectionKind::getBSS()); } diff --git a/lib/Target/PIC16/AsmPrinter/CMakeLists.txt b/lib/Target/PIC16/AsmPrinter/CMakeLists.txt index 2e1b809..d36bb8e 100644 --- a/lib/Target/PIC16/AsmPrinter/CMakeLists.txt +++ b/lib/Target/PIC16/AsmPrinter/CMakeLists.txt @@ -1,9 +1,9 @@ -include_directories(
- ${CMAKE_CURRENT_BINARY_DIR}/..
- ${CMAKE_CURRENT_SOURCE_DIR}/..
- )
-
-add_llvm_library(LLVMPIC16AsmPrinter
+include_directories( + ${CMAKE_CURRENT_BINARY_DIR}/.. + ${CMAKE_CURRENT_SOURCE_DIR}/.. + ) + +add_llvm_library(LLVMPIC16AsmPrinter PIC16AsmPrinter.cpp - )
+ ) add_dependencies(LLVMPIC16AsmPrinter PIC16CodeGenTable_gen) diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp index c46db46..b665817 100644 --- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp +++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp @@ -16,6 +16,7 @@ #include "PIC16AsmPrinter.h" #include "PIC16Section.h" #include "PIC16MCAsmInfo.h" +#include "PIC16MachineFunctionInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/Module.h" @@ -36,9 +37,8 @@ using namespace llvm; PIC16AsmPrinter::PIC16AsmPrinter(TargetMachine &TM, MCStreamer &Streamer) : AsmPrinter(TM, Streamer), DbgInfo(Streamer, TM.getMCAsmInfo()) { - PTLI = static_cast<PIC16TargetLowering*>(TM.getTargetLowering()); PMAI = static_cast<const PIC16MCAsmInfo*>(TM.getMCAsmInfo()); - PTOF = (PIC16TargetObjectFile *)&PTLI->getObjFileLowering(); + PTOF = &getObjFileLowering(); } void PIC16AsmPrinter::EmitInstruction(const MachineInstr *MI) { @@ -379,6 +379,8 @@ bool PIC16AsmPrinter::doFinalization(Module &M) { void PIC16AsmPrinter::EmitFunctionFrame(MachineFunction &MF) { const Function *F = MF.getFunction(); const TargetData *TD = TM.getTargetData(); + PIC16MachineFunctionInfo *FuncInfo = MF.getInfo<PIC16MachineFunctionInfo>(); + // Emit the data section name. PIC16Section *fPDataSection = @@ -420,7 +422,7 @@ void PIC16AsmPrinter::EmitFunctionFrame(MachineFunction &MF) { Twine(" RES ") + Twine(ArgSize)); // Emit temporary space - int TempSize = PTLI->GetTmpSize(); + int TempSize = FuncInfo->getTmpSize(); if (TempSize > 0) OutStreamer.EmitRawText(PAN::getTempdataLabel(CurrentFnSym->getName()) + Twine(" RES ") + Twine(TempSize)); diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h index e27778f..a424c27 100644 --- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h +++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h @@ -37,8 +37,8 @@ namespace llvm { return "PIC16 Assembly Printer"; } - PIC16TargetObjectFile &getObjFileLowering() const { - return (PIC16TargetObjectFile &)AsmPrinter::getObjFileLowering(); + const PIC16TargetObjectFile &getObjFileLowering() const { + return (const PIC16TargetObjectFile &)AsmPrinter::getObjFileLowering(); } bool runOnMachineFunction(MachineFunction &F); @@ -76,8 +76,7 @@ namespace llvm { } private: - PIC16TargetObjectFile *PTOF; - PIC16TargetLowering *PTLI; + const PIC16TargetObjectFile *PTOF; PIC16DbgInfo DbgInfo; const PIC16MCAsmInfo *PMAI; std::set<std::string> LibcallDecls; // Sorted & uniqued set of extern decls. diff --git a/lib/Target/PIC16/CMakeLists.txt b/lib/Target/PIC16/CMakeLists.txt index 208b067..cd4afe8 100644 --- a/lib/Target/PIC16/CMakeLists.txt +++ b/lib/Target/PIC16/CMakeLists.txt @@ -22,4 +22,5 @@ add_llvm_target(PIC16 PIC16Subtarget.cpp PIC16TargetMachine.cpp PIC16TargetObjectFile.cpp + PIC16SelectionDAGInfo.cpp ) diff --git a/lib/Target/PIC16/PIC16.h b/lib/Target/PIC16/PIC16.h index 8d067de..cee55f4 100644 --- a/lib/Target/PIC16/PIC16.h +++ b/lib/Target/PIC16/PIC16.h @@ -21,6 +21,7 @@ #include <sstream> #include <cstring> #include <string> +#include <vector> namespace llvm { class PIC16TargetMachine; @@ -52,17 +53,34 @@ namespace PIC16CC { UDATA_SHR }; + class ESNames { + std::vector<char*> stk; + ESNames() {} + public: + ~ESNames() { + std::vector<char*>::iterator it = stk.end(); + it--; + while(stk.end() != stk.begin()) + { + char* p = *it; + delete [] p; + it--; + stk.pop_back(); + } + } - // External symbol names require memory to live till the program end. - // So we have to allocate it and keep. - // FIXME: Don't leak the allocated strings. - inline static const char *createESName (const std::string &name) { - char *tmpName = new char[name.size() + 1]; - memcpy(tmpName, name.c_str(), name.size() + 1); - return tmpName; - } - + // External symbol names require memory to live till the program end. + // So we have to allocate it and keep. Push all such allocations into a + // vector so that they get freed up on termination. + inline static const char *createESName (const std::string &name) { + static ESNames esn; + char *tmpName = new char[name.size() + 1]; + memcpy(tmpName, name.c_str(), name.size() + 1); + esn.stk.push_back(tmpName); + return tmpName; + } + }; inline static const char *PIC16CondCodeToString(PIC16CC::CondCodes CC) { switch (CC) { diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.h b/lib/Target/PIC16/PIC16ISelDAGToDAG.h index 8ed5bf7..f1fcec5 100644 --- a/lib/Target/PIC16/PIC16ISelDAGToDAG.h +++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.h @@ -14,9 +14,9 @@ #define DEBUG_TYPE "pic16-isel" #include "PIC16.h" -#include "PIC16ISelLowering.h" #include "PIC16RegisterInfo.h" #include "PIC16TargetMachine.h" +#include "PIC16MachineFunctionInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/raw_ostream.h" @@ -29,19 +29,16 @@ namespace { class VISIBILITY_HIDDEN PIC16DAGToDAGISel : public SelectionDAGISel { /// TM - Keep a reference to PIC16TargetMachine. - PIC16TargetMachine &TM; + const PIC16TargetMachine &TM; /// PIC16Lowering - This object fully describes how to lower LLVM code to an /// PIC16-specific SelectionDAG. - PIC16TargetLowering &PIC16Lowering; + const PIC16TargetLowering &PIC16Lowering; public: explicit PIC16DAGToDAGISel(PIC16TargetMachine &tm) : SelectionDAGISel(tm), - TM(tm), PIC16Lowering(*TM.getTargetLowering()) { - // Keep PIC16 specific DAGISel to use during the lowering - PIC16Lowering.ISel = this; - } + TM(tm), PIC16Lowering(*TM.getTargetLowering()) {} // Pass Name virtual const char *getPassName() const { diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp index d17abb9..f479f46 100644 --- a/lib/Target/PIC16/PIC16ISelLowering.cpp +++ b/lib/Target/PIC16/PIC16ISelLowering.cpp @@ -16,6 +16,7 @@ #include "PIC16ISelLowering.h" #include "PIC16TargetObjectFile.h" #include "PIC16TargetMachine.h" +#include "PIC16MachineFunctionInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/GlobalValue.h" #include "llvm/Function.h" @@ -24,6 +25,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Support/ErrorHandling.h" @@ -116,7 +118,7 @@ static const char *getIntrinsicName(unsigned opcode) { std::string Fullname = prefix + tagname + Basename; // The name has to live through program life. - return createESName(Fullname); + return ESNames::createESName(Fullname); } // getStdLibCallName - Get the name for the standard library function. @@ -139,12 +141,12 @@ static const char *getStdLibCallName(unsigned opcode) { std::string LibCallName = prefix + BaseName; // The name has to live through program life. - return createESName(LibCallName); + return ESNames::createESName(LibCallName); } // PIC16TargetLowering Constructor. PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM) - : TargetLowering(TM, new PIC16TargetObjectFile()), TmpSize(0) { + : TargetLowering(TM, new PIC16TargetObjectFile()) { Subtarget = &TM.getSubtarget<PIC16Subtarget>(); @@ -321,18 +323,29 @@ static SDValue getOutFlag(SDValue &Op) { return Flag; } // Get the TmpOffset for FrameIndex -unsigned PIC16TargetLowering::GetTmpOffsetForFI(unsigned FI, unsigned size) { +unsigned PIC16TargetLowering::GetTmpOffsetForFI(unsigned FI, unsigned size, + MachineFunction &MF) const { + PIC16MachineFunctionInfo *FuncInfo = MF.getInfo<PIC16MachineFunctionInfo>(); + std::map<unsigned, unsigned> &FiTmpOffsetMap = FuncInfo->getFiTmpOffsetMap(); + std::map<unsigned, unsigned>::iterator MapIt = FiTmpOffsetMap.find(FI); if (MapIt != FiTmpOffsetMap.end()) return MapIt->second; // This FI (FrameIndex) is not yet mapped, so map it - FiTmpOffsetMap[FI] = TmpSize; - TmpSize += size; + FiTmpOffsetMap[FI] = FuncInfo->getTmpSize(); + FuncInfo->setTmpSize(FuncInfo->getTmpSize() + size); return FiTmpOffsetMap[FI]; } +void PIC16TargetLowering::ResetTmpOffsetMap(SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + PIC16MachineFunctionInfo *FuncInfo = MF.getInfo<PIC16MachineFunctionInfo>(); + FuncInfo->getFiTmpOffsetMap().clear(); + FuncInfo->setTmpSize(0); +} + // To extract chain value from the SDValue Nodes // This function will help to maintain the chain extracting // code at one place. In case of any change in future it will @@ -390,7 +403,7 @@ PIC16TargetLowering::setPIC16LibcallName(PIC16ISD::PIC16Libcall Call, } const char * -PIC16TargetLowering::getPIC16LibcallName(PIC16ISD::PIC16Libcall Call) { +PIC16TargetLowering::getPIC16LibcallName(PIC16ISD::PIC16Libcall Call) const { return PIC16LibcallNames[Call]; } @@ -398,7 +411,7 @@ SDValue PIC16TargetLowering::MakePIC16Libcall(PIC16ISD::PIC16Libcall Call, EVT RetVT, const SDValue *Ops, unsigned NumOps, bool isSigned, - SelectionDAG &DAG, DebugLoc dl) { + SelectionDAG &DAG, DebugLoc dl) const { TargetLowering::ArgListTy Args; Args.reserve(NumOps); @@ -456,7 +469,7 @@ const char *PIC16TargetLowering::getTargetNodeName(unsigned Opcode) const { void PIC16TargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { switch (N->getOpcode()) { case ISD::GlobalAddress: @@ -483,7 +496,8 @@ void PIC16TargetLowering::ReplaceNodeResults(SDNode *N, } } -SDValue PIC16TargetLowering::ExpandFrameIndex(SDNode *N, SelectionDAG &DAG) { +SDValue PIC16TargetLowering::ExpandFrameIndex(SDNode *N, + SelectionDAG &DAG) const { // Currently handling FrameIndex of size MVT::i16 only // One example of this scenario is when return value is written on @@ -518,7 +532,7 @@ SDValue PIC16TargetLowering::ExpandFrameIndex(SDNode *N, SelectionDAG &DAG) { } -SDValue PIC16TargetLowering::ExpandStore(SDNode *N, SelectionDAG &DAG) { +SDValue PIC16TargetLowering::ExpandStore(SDNode *N, SelectionDAG &DAG) const { StoreSDNode *St = cast<StoreSDNode>(N); SDValue Chain = St->getChain(); SDValue Src = St->getValue(); @@ -636,8 +650,9 @@ SDValue PIC16TargetLowering::ExpandStore(SDNode *N, SelectionDAG &DAG) { } } -SDValue PIC16TargetLowering::ExpandExternalSymbol(SDNode *N, SelectionDAG &DAG) -{ +SDValue PIC16TargetLowering::ExpandExternalSymbol(SDNode *N, + SelectionDAG &DAG) + const { ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(SDValue(N, 0)); // FIXME there isn't really debug info here DebugLoc dl = ES->getDebugLoc(); @@ -651,7 +666,8 @@ SDValue PIC16TargetLowering::ExpandExternalSymbol(SDNode *N, SelectionDAG &DAG) } // ExpandGlobalAddress - -SDValue PIC16TargetLowering::ExpandGlobalAddress(SDNode *N, SelectionDAG &DAG) { +SDValue PIC16TargetLowering::ExpandGlobalAddress(SDNode *N, + SelectionDAG &DAG) const { GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(SDValue(N, 0)); // FIXME there isn't really debug info here DebugLoc dl = G->getDebugLoc(); @@ -666,7 +682,7 @@ SDValue PIC16TargetLowering::ExpandGlobalAddress(SDNode *N, SelectionDAG &DAG) { return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i16, Lo, Hi); } -bool PIC16TargetLowering::isDirectAddress(const SDValue &Op) { +bool PIC16TargetLowering::isDirectAddress(const SDValue &Op) const { assert (Op.getNode() != NULL && "Can't operate on NULL SDNode!!"); if (Op.getOpcode() == ISD::BUILD_PAIR) { @@ -677,7 +693,7 @@ bool PIC16TargetLowering::isDirectAddress(const SDValue &Op) { } // Return true if DirectAddress is in ROM_SPACE -bool PIC16TargetLowering::isRomAddress(const SDValue &Op) { +bool PIC16TargetLowering::isRomAddress(const SDValue &Op) const { // RomAddress is a GlobalAddress in ROM_SPACE_ // If the Op is not a GlobalAddress return NULL without checking @@ -703,7 +719,7 @@ bool PIC16TargetLowering::isRomAddress(const SDValue &Op) { // parts of Op in Lo and Hi. void PIC16TargetLowering::GetExpandedParts(SDValue Op, SelectionDAG &DAG, - SDValue &Lo, SDValue &Hi) { + SDValue &Lo, SDValue &Hi) const { SDNode *N = Op.getNode(); DebugLoc dl = N->getDebugLoc(); EVT NewVT = getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); @@ -720,11 +736,12 @@ void PIC16TargetLowering::GetExpandedParts(SDValue Op, SelectionDAG &DAG, // Legalize FrameIndex into ExternalSymbol and offset. void PIC16TargetLowering::LegalizeFrameIndex(SDValue Op, SelectionDAG &DAG, - SDValue &ES, int &Offset) { + SDValue &ES, int &Offset) const { MachineFunction &MF = DAG.getMachineFunction(); const Function *Func = MF.getFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); + PIC16MachineFunctionInfo *FuncInfo = MF.getInfo<PIC16MachineFunctionInfo>(); const std::string Name = Func->getName(); FrameIndexSDNode *FR = dyn_cast<FrameIndexSDNode>(Op); @@ -736,8 +753,8 @@ PIC16TargetLowering::LegalizeFrameIndex(SDValue Op, SelectionDAG &DAG, // the list and add their requested size. unsigned FIndex = FR->getIndex(); const char *tmpName; - if (FIndex < ReservedFrameCount) { - tmpName = createESName(PAN::getFrameLabel(Name)); + if (FIndex < FuncInfo->getReservedFrameCount()) { + tmpName = ESNames::createESName(PAN::getFrameLabel(Name)); ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8); Offset = 0; for (unsigned i=0; i<FIndex ; ++i) { @@ -745,9 +762,9 @@ PIC16TargetLowering::LegalizeFrameIndex(SDValue Op, SelectionDAG &DAG, } } else { // FrameIndex has been made for some temporary storage - tmpName = createESName(PAN::getTempdataLabel(Name)); + tmpName = ESNames::createESName(PAN::getTempdataLabel(Name)); ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8); - Offset = GetTmpOffsetForFI(FIndex, MFI->getObjectSize(FIndex)); + Offset = GetTmpOffsetForFI(FIndex, MFI->getObjectSize(FIndex), MF); } return; @@ -767,7 +784,7 @@ PIC16TargetLowering::LegalizeFrameIndex(SDValue Op, SelectionDAG &DAG, void PIC16TargetLowering::LegalizeAddress(SDValue Ptr, SelectionDAG &DAG, SDValue &Lo, SDValue &Hi, - unsigned &Offset, DebugLoc dl) { + unsigned &Offset, DebugLoc dl) const { // Offset, by default, should be 0 Offset = 0; @@ -846,7 +863,7 @@ void PIC16TargetLowering::LegalizeAddress(SDValue Ptr, SelectionDAG &DAG, return; } -SDValue PIC16TargetLowering::ExpandLoad(SDNode *N, SelectionDAG &DAG) { +SDValue PIC16TargetLowering::ExpandLoad(SDNode *N, SelectionDAG &DAG) const { LoadSDNode *LD = dyn_cast<LoadSDNode>(SDValue(N, 0)); SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); @@ -961,7 +978,7 @@ SDValue PIC16TargetLowering::ExpandLoad(SDNode *N, SelectionDAG &DAG) { return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, BP, Chain); } -SDValue PIC16TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) { +SDValue PIC16TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { // We should have handled larger operands in type legalizer itself. assert (Op.getValueType() == MVT::i8 && "illegal shift to lower"); @@ -991,7 +1008,7 @@ SDValue PIC16TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) { return Call; } -SDValue PIC16TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) { +SDValue PIC16TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { // We should have handled larger operands in type legalizer itself. assert (Op.getValueType() == MVT::i8 && "illegal multiply to lower"); @@ -1007,7 +1024,7 @@ SDValue PIC16TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) { void PIC16TargetLowering::LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { SDValue Op = SDValue(N, 0); SDValue Res; unsigned i; @@ -1031,7 +1048,8 @@ PIC16TargetLowering::LowerOperationWrapper(SDNode *N, } } -SDValue PIC16TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { +SDValue PIC16TargetLowering::LowerOperation(SDValue Op, + SelectionDAG &DAG) const { switch (Op.getOpcode()) { case ISD::ADD: case ISD::ADDC: @@ -1065,7 +1083,7 @@ SDValue PIC16TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { SDValue PIC16TargetLowering::ConvertToMemOperand(SDValue Op, SelectionDAG &DAG, - DebugLoc dl) { + DebugLoc dl) const { assert (Op.getValueType() == MVT::i8 && "illegal value type to store on stack."); @@ -1077,7 +1095,7 @@ SDValue PIC16TargetLowering::ConvertToMemOperand(SDValue Op, // Put the value on stack. // Get a stack slot index and convert to es. int FI = MF.getFrameInfo()->CreateStackObject(1, 1, false); - const char *tmpName = createESName(PAN::getTempdataLabel(FuncName)); + const char *tmpName = ESNames::createESName(PAN::getTempdataLabel(FuncName)); SDValue ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8); // Store the value to ES. @@ -1085,14 +1103,14 @@ SDValue PIC16TargetLowering::ConvertToMemOperand(SDValue Op, DAG.getEntryNode(), Op, ES, DAG.getConstant (1, MVT::i8), // Banksel. - DAG.getConstant (GetTmpOffsetForFI(FI, 1), + DAG.getConstant (GetTmpOffsetForFI(FI, 1, MF), MVT::i8)); // Load the value from ES. SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Other); SDValue Load = DAG.getNode(PIC16ISD::PIC16Load, dl, Tys, Store, ES, DAG.getConstant (1, MVT::i8), - DAG.getConstant (GetTmpOffsetForFI(FI, 1), + DAG.getConstant (GetTmpOffsetForFI(FI, 1, MF), MVT::i8)); return Load.getValue(0); @@ -1103,7 +1121,7 @@ LowerIndirectCallArguments(SDValue Chain, SDValue InFlag, SDValue DataAddr_Lo, SDValue DataAddr_Hi, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG) { + DebugLoc dl, SelectionDAG &DAG) const { unsigned NumOps = Outs.size(); // If call has no arguments then do nothing and return. @@ -1140,7 +1158,7 @@ LowerIndirectCallArguments(SDValue Chain, SDValue InFlag, SDValue PIC16TargetLowering:: LowerDirectCallArguments(SDValue ArgLabel, SDValue Chain, SDValue InFlag, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG) { + DebugLoc dl, SelectionDAG &DAG) const { unsigned NumOps = Outs.size(); std::string Name; SDValue Arg, StoreAt; @@ -1197,7 +1215,7 @@ LowerIndirectCallReturn(SDValue Chain, SDValue InFlag, SDValue DataAddr_Lo, SDValue DataAddr_Hi, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { unsigned RetVals = Ins.size(); // If call does not have anything to return @@ -1224,7 +1242,7 @@ SDValue PIC16TargetLowering:: LowerDirectCallReturn(SDValue RetLabel, SDValue Chain, SDValue InFlag, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // Currently handling primitive types only. They will come in // i8 parts @@ -1264,7 +1282,7 @@ SDValue PIC16TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG) { + DebugLoc dl, SelectionDAG &DAG) const { // Number of values to return unsigned NumRet = Outs.size(); @@ -1275,7 +1293,7 @@ PIC16TargetLowering::LowerReturn(SDValue Chain, const Function *F = MF.getFunction(); std::string FuncName = F->getName(); - const char *tmpName = createESName(PAN::getFrameLabel(FuncName)); + const char *tmpName = ESNames::createESName(PAN::getFrameLabel(FuncName)); SDValue ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8); SDValue BS = DAG.getConstant(1, MVT::i8); SDValue RetVal; @@ -1292,7 +1310,7 @@ PIC16TargetLowering::LowerReturn(SDValue Chain, void PIC16TargetLowering:: GetDataAddress(DebugLoc dl, SDValue Callee, SDValue &Chain, SDValue &DataAddr_Lo, SDValue &DataAddr_Hi, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { assert (Callee.getOpcode() == PIC16ISD::PIC16Connect && "Don't know what to do of such callee!!"); SDValue ZeroOperand = DAG.getConstant(0, MVT::i8); @@ -1358,7 +1376,7 @@ PIC16TargetLowering::LowerCall(SDValue Chain, SDValue Callee, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // PIC16 target does not yet support tail call optimization. isTailCall = false; @@ -1409,7 +1427,7 @@ PIC16TargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (IsDirectCall) { // Considering the GlobalAddressNode case here. if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { - GlobalValue *GV = G->getGlobal(); + const GlobalValue *GV = G->getGlobal(); Callee = DAG.getTargetGlobalAddress(GV, MVT::i8); Name = G->getGlobal()->getName(); } else {// Considering the ExternalSymbol case here @@ -1419,11 +1437,11 @@ PIC16TargetLowering::LowerCall(SDValue Chain, SDValue Callee, } // Label for argument passing - const char *argFrame = createESName(PAN::getArgsLabel(Name)); + const char *argFrame = ESNames::createESName(PAN::getArgsLabel(Name)); ArgLabel = DAG.getTargetExternalSymbol(argFrame, MVT::i8); // Label for reading return value - const char *retName = createESName(PAN::getRetvalLabel(Name)); + const char *retName = ESNames::createESName(PAN::getRetvalLabel(Name)); RetLabel = DAG.getTargetExternalSymbol(retName, MVT::i8); } else { // if indirect call @@ -1476,7 +1494,7 @@ PIC16TargetLowering::LowerCall(SDValue Chain, SDValue Callee, DataAddr_Hi, Ins, dl, DAG, InVals); } -bool PIC16TargetLowering::isDirectLoad(const SDValue Op) { +bool PIC16TargetLowering::isDirectLoad(const SDValue Op) const { if (Op.getOpcode() == PIC16ISD::PIC16Load) if (Op.getOperand(1).getOpcode() == ISD::TargetGlobalAddress || Op.getOperand(1).getOpcode() == ISD::TargetExternalSymbol) @@ -1490,7 +1508,7 @@ bool PIC16TargetLowering::isDirectLoad(const SDValue Op) { // no instruction that can operation on two registers. Most insns take // one register and one memory operand (addwf) / Constant (addlw). bool PIC16TargetLowering::NeedToConvertToMemOp(SDValue Op, unsigned &MemOp, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { // If one of the operand is a constant, return false. if (Op.getOperand(0).getOpcode() == ISD::Constant || Op.getOperand(1).getOpcode() == ISD::Constant) @@ -1512,7 +1530,9 @@ bool PIC16TargetLowering::NeedToConvertToMemOp(SDValue Op, unsigned &MemOp, // Direct load operands are folded in binary operations. But before folding // verify if this folding is legal. Fold only if it is legal otherwise // convert this direct load to a separate memory operation. - if(ISel->IsLegalToFold(Op.getOperand(0), Op.getNode(), Op.getNode())) + if (SelectionDAGISel::IsLegalToFold(Op.getOperand(0), + Op.getNode(), Op.getNode(), + CodeGenOpt::Default)) return false; else MemOp = 0; @@ -1539,7 +1559,9 @@ bool PIC16TargetLowering::NeedToConvertToMemOp(SDValue Op, unsigned &MemOp, // Direct load operands are folded in binary operations. But before folding // verify if this folding is legal. Fold only if it is legal otherwise // convert this direct load to a separate memory operation. - if(ISel->IsLegalToFold(Op.getOperand(1), Op.getNode(), Op.getNode())) + if (SelectionDAGISel::IsLegalToFold(Op.getOperand(1), + Op.getNode(), Op.getNode(), + CodeGenOpt::Default)) return false; else MemOp = 1; @@ -1550,7 +1572,7 @@ bool PIC16TargetLowering::NeedToConvertToMemOp(SDValue Op, unsigned &MemOp, // LowerBinOp - Lower a commutative binary operation that does not // affect status flag carry. -SDValue PIC16TargetLowering::LowerBinOp(SDValue Op, SelectionDAG &DAG) { +SDValue PIC16TargetLowering::LowerBinOp(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); // We should have handled larger operands in type legalizer itself. @@ -1571,7 +1593,7 @@ SDValue PIC16TargetLowering::LowerBinOp(SDValue Op, SelectionDAG &DAG) { // LowerADD - Lower all types of ADD operations including the ones // that affects carry. -SDValue PIC16TargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) { +SDValue PIC16TargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const { // We should have handled larger operands in type legalizer itself. assert (Op.getValueType() == MVT::i8 && "illegal add to lower"); DebugLoc dl = Op.getDebugLoc(); @@ -1600,7 +1622,7 @@ SDValue PIC16TargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) { return Op; } -SDValue PIC16TargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) { +SDValue PIC16TargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); // We should have handled larger operands in type legalizer itself. assert (Op.getValueType() == MVT::i8 && "illegal sub to lower"); @@ -1647,15 +1669,19 @@ SDValue PIC16TargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) { return Op; } -void PIC16TargetLowering::InitReservedFrameCount(const Function *F) { +void PIC16TargetLowering::InitReservedFrameCount(const Function *F, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + PIC16MachineFunctionInfo *FuncInfo = MF.getInfo<PIC16MachineFunctionInfo>(); + unsigned NumArgs = F->arg_size(); bool isVoidFunc = (F->getReturnType()->getTypeID() == Type::VoidTyID); if (isVoidFunc) - ReservedFrameCount = NumArgs; + FuncInfo->setReservedFrameCount(NumArgs); else - ReservedFrameCount = NumArgs + 1; + FuncInfo->setReservedFrameCount(NumArgs + 1); } // LowerFormalArguments - Argument values are loaded from the @@ -1669,7 +1695,8 @@ PIC16TargetLowering::LowerFormalArguments(SDValue Chain, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) + const { unsigned NumArgVals = Ins.size(); // Get the callee's name to create the <fname>.args label to pass args. @@ -1678,12 +1705,12 @@ PIC16TargetLowering::LowerFormalArguments(SDValue Chain, std::string FuncName = F->getName(); // Reset the map of FI and TmpOffset - ResetTmpOffsetMap(); + ResetTmpOffsetMap(DAG); // Initialize the ReserveFrameCount - InitReservedFrameCount(F); + InitReservedFrameCount(F, DAG); // Create the <fname>.args external symbol. - const char *tmpName = createESName(PAN::getArgsLabel(FuncName)); + const char *tmpName = ESNames::createESName(PAN::getArgsLabel(FuncName)); SDValue ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8); // Load arg values from the label + offset. @@ -1782,7 +1809,7 @@ static void LookThroughSetCC(SDValue &LHS, SDValue &RHS, // Returns appropriate CMP insn and corresponding condition code in PIC16CC SDValue PIC16TargetLowering::getPIC16Cmp(SDValue LHS, SDValue RHS, unsigned CC, SDValue &PIC16CC, - SelectionDAG &DAG, DebugLoc dl) { + SelectionDAG &DAG, DebugLoc dl) const { PIC16CC::CondCodes CondCode = (PIC16CC::CondCodes) CC; // PIC16 sub is literal - W. So Swap the operands and condition if needed. @@ -1846,7 +1873,8 @@ SDValue PIC16TargetLowering::getPIC16Cmp(SDValue LHS, SDValue RHS, } -SDValue PIC16TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { +SDValue PIC16TargetLowering::LowerSELECT_CC(SDValue Op, + SelectionDAG &DAG) const { SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); @@ -1874,8 +1902,7 @@ SDValue PIC16TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { MachineBasicBlock * PIC16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { + MachineBasicBlock *BB) const { const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); unsigned CC = (PIC16CC::CondCodes)MI->getOperand(3).getImm(); DebugLoc dl = MI->getDebugLoc(); @@ -1903,12 +1930,9 @@ PIC16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Update machine-CFG edges by first adding all successors of the current // block to the new block which will contain the Phi node for the select. - // Also inform sdisel of the edge changes. for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), - E = BB->succ_end(); I != E; ++I) { - EM->insert(std::make_pair(*I, sinkMBB)); + E = BB->succ_end(); I != E; ++I) sinkMBB->addSuccessor(*I); - } // Next, remove all successors of the current block, and add the true // and fallthrough blocks as its successors. while (!BB->succ_empty()) @@ -1938,7 +1962,7 @@ PIC16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, } -SDValue PIC16TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) { +SDValue PIC16TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); SDValue LHS = Op.getOperand(2); // LHS of the condition. diff --git a/lib/Target/PIC16/PIC16ISelLowering.h b/lib/Target/PIC16/PIC16ISelLowering.h index de14520..eea17f8 100644 --- a/lib/Target/PIC16/PIC16ISelLowering.h +++ b/lib/Target/PIC16/PIC16ISelLowering.h @@ -18,7 +18,6 @@ #include "PIC16.h" #include "PIC16Subtarget.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Target/TargetLowering.h" #include <map> @@ -85,53 +84,52 @@ namespace llvm { /// getSetCCResultType - Return the ISD::SETCC ValueType virtual MVT::SimpleValueType getSetCCResultType(EVT ValType) const; virtual MVT::SimpleValueType getCmpLibcallReturnType() const; - SDValue LowerShift(SDValue Op, SelectionDAG &DAG); - SDValue LowerMUL(SDValue Op, SelectionDAG &DAG); - SDValue LowerADD(SDValue Op, SelectionDAG &DAG); - SDValue LowerSUB(SDValue Op, SelectionDAG &DAG); - SDValue LowerBinOp(SDValue Op, SelectionDAG &DAG); + SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerADD(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBinOp(SDValue Op, SelectionDAG &DAG) const; // Call returns SDValue LowerDirectCallReturn(SDValue RetLabel, SDValue Chain, SDValue InFlag, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; SDValue LowerIndirectCallReturn(SDValue Chain, SDValue InFlag, SDValue DataAddr_Lo, SDValue DataAddr_Hi, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; // Call arguments SDValue LowerDirectCallArguments(SDValue ArgLabel, SDValue Chain, SDValue InFlag, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG); + DebugLoc dl, SelectionDAG &DAG) const; SDValue LowerIndirectCallArguments(SDValue Chain, SDValue InFlag, SDValue DataAddr_Lo, SDValue DataAddr_Hi, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG); + DebugLoc dl, SelectionDAG &DAG) const; - SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG); - SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG); + SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue getPIC16Cmp(SDValue LHS, SDValue RHS, unsigned OrigCC, SDValue &CC, - SelectionDAG &DAG, DebugLoc dl); - virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; + SelectionDAG &DAG, DebugLoc dl) const; + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const; - - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, - SelectionDAG &DAG); + SelectionDAG &DAG) const; virtual void LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results, - SelectionDAG &DAG); + SelectionDAG &DAG) const; virtual SDValue LowerFormalArguments(SDValue Chain, @@ -139,7 +137,7 @@ namespace llvm { bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerCall(SDValue Chain, SDValue Callee, @@ -147,19 +145,19 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG); + DebugLoc dl, SelectionDAG &DAG) const; - SDValue ExpandStore(SDNode *N, SelectionDAG &DAG); - SDValue ExpandLoad(SDNode *N, SelectionDAG &DAG); - SDValue ExpandGlobalAddress(SDNode *N, SelectionDAG &DAG); - SDValue ExpandExternalSymbol(SDNode *N, SelectionDAG &DAG); - SDValue ExpandFrameIndex(SDNode *N, SelectionDAG &DAG); + SDValue ExpandStore(SDNode *N, SelectionDAG &DAG) const; + SDValue ExpandLoad(SDNode *N, SelectionDAG &DAG) const; + SDValue ExpandGlobalAddress(SDNode *N, SelectionDAG &DAG) const; + SDValue ExpandExternalSymbol(SDNode *N, SelectionDAG &DAG) const; + SDValue ExpandFrameIndex(SDNode *N, SelectionDAG &DAG) const; SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue PerformPIC16LoadCombine(SDNode *N, DAGCombinerInfo &DCI) const; @@ -168,13 +166,11 @@ namespace llvm { // This function returns the Tmp Offset for FrameIndex. If any TmpOffset // already exists for the FI then it returns the same else it creates the // new offset and returns. - unsigned GetTmpOffsetForFI(unsigned FI, unsigned slot_size); - void ResetTmpOffsetMap() { FiTmpOffsetMap.clear(); SetTmpSize(0); } - void InitReservedFrameCount(const Function *F); - - // Return the size of Tmp variable - unsigned GetTmpSize() { return TmpSize; } - void SetTmpSize(unsigned Size) { TmpSize = Size; } + unsigned GetTmpOffsetForFI(unsigned FI, unsigned slot_size, + MachineFunction &MF) const; + void ResetTmpOffsetMap(SelectionDAG &DAG) const; + void InitReservedFrameCount(const Function *F, + SelectionDAG &DAG) const; /// getFunctionAlignment - Return the Log2 alignment of this function. virtual unsigned getFunctionAlignment(const Function *) const { @@ -184,43 +180,45 @@ namespace llvm { private: // If the Node is a BUILD_PAIR representing a direct Address, // then this function will return true. - bool isDirectAddress(const SDValue &Op); + bool isDirectAddress(const SDValue &Op) const; // If the Node is a DirectAddress in ROM_SPACE then this // function will return true - bool isRomAddress(const SDValue &Op); + bool isRomAddress(const SDValue &Op) const; // Extract the Lo and Hi component of Op. void GetExpandedParts(SDValue Op, SelectionDAG &DAG, SDValue &Lo, - SDValue &Hi); + SDValue &Hi) const; // Load pointer can be a direct or indirect address. In PIC16 direct // addresses need Banksel and Indirect addresses need to be loaded to // FSR first. Handle address specific cases here. void LegalizeAddress(SDValue Ptr, SelectionDAG &DAG, SDValue &Chain, - SDValue &NewPtr, unsigned &Offset, DebugLoc dl); + SDValue &NewPtr, unsigned &Offset, DebugLoc dl) const; // FrameIndex should be broken down into ExternalSymbol and FrameOffset. void LegalizeFrameIndex(SDValue Op, SelectionDAG &DAG, SDValue &ES, - int &Offset); + int &Offset) const; // For indirect calls data address of the callee frame need to be // extracted. This function fills the arguments DataAddr_Lo and // DataAddr_Hi with the address of the callee frame. void GetDataAddress(DebugLoc dl, SDValue Callee, SDValue &Chain, SDValue &DataAddr_Lo, SDValue &DataAddr_Hi, - SelectionDAG &DAG); + SelectionDAG &DAG) const; // We can not have both operands of a binary operation in W. // This function is used to put one operand on stack and generate a load. - SDValue ConvertToMemOperand(SDValue Op, SelectionDAG &DAG, DebugLoc dl); + SDValue ConvertToMemOperand(SDValue Op, SelectionDAG &DAG, + DebugLoc dl) const; // This function checks if we need to put an operand of an operation on // stack and generate a load or not. // DAG parameter is required to access DAG information during // analysis. - bool NeedToConvertToMemOp(SDValue Op, unsigned &MemOp, SelectionDAG &DAG); + bool NeedToConvertToMemOp(SDValue Op, unsigned &MemOp, + SelectionDAG &DAG) const; /// Subtarget - Keep a pointer to the PIC16Subtarget around so that we can /// make the right decision when generating code for different targets. @@ -233,31 +231,15 @@ namespace llvm { // To set and retrieve the lib call names. void setPIC16LibcallName(PIC16ISD::PIC16Libcall Call, const char *Name); - const char *getPIC16LibcallName(PIC16ISD::PIC16Libcall Call); + const char *getPIC16LibcallName(PIC16ISD::PIC16Libcall Call) const; // Make PIC16 Libcall. SDValue MakePIC16Libcall(PIC16ISD::PIC16Libcall Call, EVT RetVT, const SDValue *Ops, unsigned NumOps, bool isSigned, - SelectionDAG &DAG, DebugLoc dl); + SelectionDAG &DAG, DebugLoc dl) const; // Check if operation has a direct load operand. - inline bool isDirectLoad(const SDValue Op); - - public: - // Keep a pointer to SelectionDAGISel to access its public - // interface (It is required during legalization) - SelectionDAGISel *ISel; - - private: - // The frameindexes generated for spill/reload are stack based. - // This maps maintain zero based indexes for these FIs. - std::map<unsigned, unsigned> FiTmpOffsetMap; - unsigned TmpSize; - - // These are the frames for return value and argument passing - // These FrameIndices will be expanded to foo.frame external symbol - // and all others will be expanded to foo.tmp external symbol. - unsigned ReservedFrameCount; + inline bool isDirectLoad(const SDValue Op) const; }; } // namespace llvm diff --git a/lib/Target/PIC16/PIC16InstrInfo.cpp b/lib/Target/PIC16/PIC16InstrInfo.cpp index 365e8b2..9e415e0 100644 --- a/lib/Target/PIC16/PIC16InstrInfo.cpp +++ b/lib/Target/PIC16/PIC16InstrInfo.cpp @@ -71,14 +71,14 @@ void PIC16InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, const TargetRegisterClass *RC) const { - PIC16TargetLowering *PTLI = TM.getTargetLowering(); + const PIC16TargetLowering *PTLI = TM.getTargetLowering(); DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); const Function *Func = MBB.getParent()->getFunction(); const std::string FuncName = Func->getName(); - const char *tmpName = createESName(PAN::getTempdataLabel(FuncName)); + const char *tmpName = ESNames::createESName(PAN::getTempdataLabel(FuncName)); // On the order of operands here: think "movwf SrcReg, tmp_slot, offset". if (RC == PIC16::GPRRegisterClass) { @@ -86,7 +86,7 @@ void PIC16InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, //MachineRegisterInfo &RI = MF.getRegInfo(); BuildMI(MBB, I, DL, get(PIC16::movwf)) .addReg(SrcReg, getKillRegState(isKill)) - .addImm(PTLI->GetTmpOffsetForFI(FI, 1)) + .addImm(PTLI->GetTmpOffsetForFI(FI, 1, *MBB.getParent())) .addExternalSymbol(tmpName) .addImm(1); // Emit banksel for it. } @@ -101,7 +101,7 @@ void PIC16InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, : PIC16::save_fsr1; BuildMI(MBB, I, DL, get(opcode)) .addReg(SrcReg, getKillRegState(isKill)) - .addImm(PTLI->GetTmpOffsetForFI(FI, 3)) + .addImm(PTLI->GetTmpOffsetForFI(FI, 3, *MBB.getParent())) .addExternalSymbol(tmpName) .addImm(1); // Emit banksel for it. } @@ -113,21 +113,21 @@ void PIC16InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, const TargetRegisterClass *RC) const { - PIC16TargetLowering *PTLI = TM.getTargetLowering(); + const PIC16TargetLowering *PTLI = TM.getTargetLowering(); DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); const Function *Func = MBB.getParent()->getFunction(); const std::string FuncName = Func->getName(); - const char *tmpName = createESName(PAN::getTempdataLabel(FuncName)); + const char *tmpName = ESNames::createESName(PAN::getTempdataLabel(FuncName)); // On the order of operands here: think "movf FrameIndex, W". if (RC == PIC16::GPRRegisterClass) { //MachineFunction &MF = *MBB.getParent(); //MachineRegisterInfo &RI = MF.getRegInfo(); BuildMI(MBB, I, DL, get(PIC16::movf), DestReg) - .addImm(PTLI->GetTmpOffsetForFI(FI, 1)) + .addImm(PTLI->GetTmpOffsetForFI(FI, 1, *MBB.getParent())) .addExternalSymbol(tmpName) .addImm(1); // Emit banksel for it. } @@ -141,7 +141,7 @@ void PIC16InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, unsigned opcode = (DestReg == PIC16::FSR0) ? PIC16::restore_fsr0 : PIC16::restore_fsr1; BuildMI(MBB, I, DL, get(opcode), DestReg) - .addImm(PTLI->GetTmpOffsetForFI(FI, 3)) + .addImm(PTLI->GetTmpOffsetForFI(FI, 3, *MBB.getParent())) .addExternalSymbol(tmpName) .addImm(1); // Emit banksel for it. } diff --git a/lib/Target/PIC16/PIC16MachineFunctionInfo.h b/lib/Target/PIC16/PIC16MachineFunctionInfo.h new file mode 100644 index 0000000..bdf5086 --- /dev/null +++ b/lib/Target/PIC16/PIC16MachineFunctionInfo.h @@ -0,0 +1,52 @@ +//====- PIC16MachineFuctionInfo.h - PIC16 machine function info -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares PIC16-specific per-machine-function information. +// +//===----------------------------------------------------------------------===// + +#ifndef PIC16MACHINEFUNCTIONINFO_H +#define PIC16MACHINEFUNCTIONINFO_H + +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { + +/// PIC16MachineFunctionInfo - This class is derived from MachineFunction +/// private PIC16 target-specific information for each MachineFunction. +class PIC16MachineFunctionInfo : public MachineFunctionInfo { + // The frameindexes generated for spill/reload are stack based. + // This maps maintain zero based indexes for these FIs. + std::map<unsigned, unsigned> FiTmpOffsetMap; + unsigned TmpSize; + + // These are the frames for return value and argument passing + // These FrameIndices will be expanded to foo.frame external symbol + // and all others will be expanded to foo.tmp external symbol. + unsigned ReservedFrameCount; + +public: + PIC16MachineFunctionInfo() + : TmpSize(0), ReservedFrameCount(0) {} + + explicit PIC16MachineFunctionInfo(MachineFunction &MF) + : TmpSize(0), ReservedFrameCount(0) {} + + std::map<unsigned, unsigned> &getFiTmpOffsetMap() { return FiTmpOffsetMap; } + + unsigned getTmpSize() const { return TmpSize; } + void setTmpSize(unsigned Size) { TmpSize = Size; } + + unsigned getReservedFrameCount() const { return ReservedFrameCount; } + void setReservedFrameCount(unsigned Count) { ReservedFrameCount = Count; } +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp index 865da35..c282521 100644 --- a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp +++ b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp @@ -172,7 +172,7 @@ void PIC16Cloner::CloneAutos(Function *F) { VarName = I->getName().str(); if (PAN::isLocalToFunc(FnName, VarName)) { // Auto variable for current function found. Clone it. - GlobalVariable *GV = I; + const GlobalVariable *GV = I; const Type *InitTy = GV->getInitializer()->getType(); GlobalVariable *ClonedGV = diff --git a/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp b/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp new file mode 100644 index 0000000..76c6c60 --- /dev/null +++ b/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp @@ -0,0 +1,22 @@ +//===-- PIC16SelectionDAGInfo.cpp - PIC16 SelectionDAG Info ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PIC16SelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pic16-selectiondag-info" +#include "PIC16SelectionDAGInfo.h" +using namespace llvm; + +PIC16SelectionDAGInfo::PIC16SelectionDAGInfo() { +} + +PIC16SelectionDAGInfo::~PIC16SelectionDAGInfo() { +} diff --git a/lib/Target/PIC16/PIC16SelectionDAGInfo.h b/lib/Target/PIC16/PIC16SelectionDAGInfo.h new file mode 100644 index 0000000..112480e5 --- /dev/null +++ b/lib/Target/PIC16/PIC16SelectionDAGInfo.h @@ -0,0 +1,29 @@ +//===-- PIC16SelectionDAGInfo.h - PIC16 SelectionDAG Info -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the PIC16 subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef PIC16SELECTIONDAGINFO_H +#define PIC16SELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class PIC16SelectionDAGInfo : public TargetSelectionDAGInfo { +public: + PIC16SelectionDAGInfo(); + ~PIC16SelectionDAGInfo(); +}; + +} + +#endif diff --git a/lib/Target/PIC16/PIC16TargetMachine.h b/lib/Target/PIC16/PIC16TargetMachine.h index b11fdd5..849845a 100644 --- a/lib/Target/PIC16/PIC16TargetMachine.h +++ b/lib/Target/PIC16/PIC16TargetMachine.h @@ -50,8 +50,8 @@ public: return &(InstrInfo.getRegisterInfo()); } - virtual PIC16TargetLowering *getTargetLowering() const { - return const_cast<PIC16TargetLowering*>(&TLInfo); + virtual const PIC16TargetLowering *getTargetLowering() const { + return &TLInfo; } virtual bool addInstSelector(PassManagerBase &PM, diff --git a/lib/Target/PIC16/PIC16TargetObjectFile.cpp b/lib/Target/PIC16/PIC16TargetObjectFile.cpp index b891c18..ff0f971 100644 --- a/lib/Target/PIC16/PIC16TargetObjectFile.cpp +++ b/lib/Target/PIC16/PIC16TargetObjectFile.cpp @@ -8,7 +8,6 @@ //===----------------------------------------------------------------------===// #include "PIC16TargetObjectFile.h" -#include "PIC16ISelLowering.h" #include "PIC16TargetMachine.h" #include "PIC16Section.h" #include "llvm/DerivedTypes.h" @@ -27,7 +26,7 @@ PIC16TargetObjectFile::~PIC16TargetObjectFile() { /// Find a pic16 section. Return null if not found. Do not create one. PIC16Section *PIC16TargetObjectFile:: -findPIC16Section(const std::string &Name) { +findPIC16Section(const std::string &Name) const { /// Return if we have an already existing one. PIC16Section *Entry = SectionsByName[Name]; if (Entry) @@ -134,7 +133,7 @@ void PIC16TargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &tm){ const MCSection * PIC16TargetObjectFile::allocateUDATA(const GlobalVariable *GV) const { assert(GV->hasInitializer() && "This global doesn't need space"); - Constant *C = GV->getInitializer(); + const Constant *C = GV->getInitializer(); assert(C->isNullValue() && "Unitialized globals has non-zero initializer"); // Find how much space this global needs. @@ -169,7 +168,7 @@ PIC16TargetObjectFile::allocateUDATA(const GlobalVariable *GV) const { const MCSection * PIC16TargetObjectFile::allocateIDATA(const GlobalVariable *GV) const{ assert(GV->hasInitializer() && "This global doesn't need space"); - Constant *C = GV->getInitializer(); + const Constant *C = GV->getInitializer(); assert(!C->isNullValue() && "initialized globals has zero initializer"); assert(GV->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE && "can allocate initialized RAM data only"); diff --git a/lib/Target/PIC16/PIC16TargetObjectFile.h b/lib/Target/PIC16/PIC16TargetObjectFile.h index cf8bf84..b1eb9f9 100644 --- a/lib/Target/PIC16/PIC16TargetObjectFile.h +++ b/lib/Target/PIC16/PIC16TargetObjectFile.h @@ -122,7 +122,7 @@ namespace llvm { void Initialize(MCContext &Ctx, const TargetMachine &TM); /// Return the section with the given Name. Null if not found. - PIC16Section *findPIC16Section(const std::string &Name); + PIC16Section *findPIC16Section(const std::string &Name) const; /// Override section allocations for user specified sections. virtual const MCSection * diff --git a/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt b/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt index 236b264..42cd486 100644 --- a/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt +++ b/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt @@ -3,4 +3,4 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/ add_llvm_library(LLVMPowerPCAsmPrinter PPCAsmPrinter.cpp ) -add_dependencies(LLVMPowerPCAsmPrinter PowerPCCodeGenTable_gen)
\ No newline at end of file +add_dependencies(LLVMPowerPCAsmPrinter PowerPCCodeGenTable_gen) diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp index 6056564..e35dc57 100644 --- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp @@ -21,6 +21,7 @@ #include "PPCPredicates.h" #include "PPCTargetMachine.h" #include "PPCSubtarget.h" +#include "llvm/Analysis/DebugInfo.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" @@ -199,8 +200,8 @@ namespace { raw_ostream &O) { const MachineOperand &MO = MI->getOperand(OpNo); if (TM.getRelocationModel() != Reloc::Static) { - if (MO.getType() == MachineOperand::MO_GlobalAddress) { - GlobalValue *GV = MO.getGlobal(); + if (MO.isGlobal()) { + const GlobalValue *GV = MO.getGlobal(); if (GV->isDeclaration() || GV->isWeakForLinker()) { // Dynamically-resolved functions need a stub for the function. MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$stub"); @@ -213,7 +214,7 @@ namespace { return; } } - if (MO.getType() == MachineOperand::MO_ExternalSymbol) { + if (MO.isSymbol()) { SmallString<128> TempNameStr; TempNameStr += StringRef(MO.getSymbolName()); TempNameStr += StringRef("$stub"); @@ -311,7 +312,7 @@ namespace { void printTOCEntryLabel(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { const MachineOperand &MO = MI->getOperand(OpNo); - assert(MO.getType() == MachineOperand::MO_GlobalAddress); + assert(MO.isGlobal()); MCSymbol *Sym = Mang->getSymbol(MO.getGlobal()); // Map symbol -> label of TOC entry. @@ -405,7 +406,7 @@ void PPCAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) { } case MachineOperand::MO_GlobalAddress: { // Computing the address of a global symbol, not calling it. - GlobalValue *GV = MO.getGlobal(); + const GlobalValue *GV = MO.getGlobal(); MCSymbol *SymToPrint; // External or weakly linked global variables need non-lazily-resolved stubs @@ -535,6 +536,23 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { SmallString<128> Str; raw_svector_ostream O(Str); + if (MI->getOpcode() == TargetOpcode::DBG_VALUE) { + unsigned NOps = MI->getNumOperands(); + assert(NOps==4); + O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; + // cast away const; DIetc do not take const operands for some reason. + DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata())); + O << V.getName(); + O << " <- "; + // Frame address. Currently handles register +- offset only. + assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm()); + O << '['; printOperand(MI, 0, O); O << '+'; printOperand(MI, 1, O); + O << ']'; + O << "+"; + printOperand(MI, NOps-2, O); + OutStreamer.EmitRawText(O.str()); + return; + } // Check for slwi/srwi mnemonics. if (MI->getOpcode() == PPC::RLWINM) { unsigned char SH = MI->getOperand(2).getImm(); @@ -649,18 +667,18 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { // Prime text sections so they are adjacent. This reduces the likelihood a // large data or debug section causes a branch to exceed 16M limit. - TargetLoweringObjectFileMachO &TLOFMacho = - static_cast<TargetLoweringObjectFileMachO &>(getObjFileLowering()); + const TargetLoweringObjectFileMachO &TLOFMacho = + static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering()); OutStreamer.SwitchSection(TLOFMacho.getTextCoalSection()); if (TM.getRelocationModel() == Reloc::PIC_) { OutStreamer.SwitchSection( - TLOFMacho.getMachOSection("__TEXT", "__picsymbolstub1", + OutContext.getMachOSection("__TEXT", "__picsymbolstub1", MCSectionMachO::S_SYMBOL_STUBS | MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, 32, SectionKind::getText())); } else if (TM.getRelocationModel() == Reloc::DynamicNoPIC) { OutStreamer.SwitchSection( - TLOFMacho.getMachOSection("__TEXT","__symbol_stub1", + OutContext.getMachOSection("__TEXT","__symbol_stub1", MCSectionMachO::S_SYMBOL_STUBS | MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, 16, SectionKind::getText())); @@ -686,8 +704,8 @@ void PPCDarwinAsmPrinter:: EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { bool isPPC64 = TM.getTargetData()->getPointerSizeInBits() == 64; - TargetLoweringObjectFileMachO &TLOFMacho = - static_cast<TargetLoweringObjectFileMachO &>(getObjFileLowering()); + const TargetLoweringObjectFileMachO &TLOFMacho = + static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering()); // .lazy_symbol_pointer const MCSection *LSPSection = TLOFMacho.getLazySymbolPointerSection(); @@ -695,10 +713,10 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { // Output stubs for dynamically-linked functions if (TM.getRelocationModel() == Reloc::PIC_) { const MCSection *StubSection = - TLOFMacho.getMachOSection("__TEXT", "__picsymbolstub1", - MCSectionMachO::S_SYMBOL_STUBS | - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, - 32, SectionKind::getText()); + OutContext.getMachOSection("__TEXT", "__picsymbolstub1", + MCSectionMachO::S_SYMBOL_STUBS | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + 32, SectionKind::getText()); for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { OutStreamer.SwitchSection(StubSection); EmitAlignment(4); @@ -742,10 +760,10 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { } const MCSection *StubSection = - TLOFMacho.getMachOSection("__TEXT","__symbol_stub1", - MCSectionMachO::S_SYMBOL_STUBS | - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, - 16, SectionKind::getText()); + OutContext.getMachOSection("__TEXT","__symbol_stub1", + MCSectionMachO::S_SYMBOL_STUBS | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + 16, SectionKind::getText()); for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { MCSymbol *Stub = Stubs[i].first; MCSymbol *RawSym = Stubs[i].second.getPointer(); @@ -782,8 +800,8 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { bool isPPC64 = TM.getTargetData()->getPointerSizeInBits() == 64; // Darwin/PPC always uses mach-o. - TargetLoweringObjectFileMachO &TLOFMacho = - static_cast<TargetLoweringObjectFileMachO &>(getObjFileLowering()); + const TargetLoweringObjectFileMachO &TLOFMacho = + static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering()); MachineModuleInfoMachO &MMIMacho = MMI->getObjFileInfo<MachineModuleInfoMachO>(); @@ -794,8 +812,8 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { if (MAI->doesSupportExceptionHandling() && MMI) { // Add the (possibly multiple) personalities to the set of global values. // Only referenced functions get into the Personalities list. - const std::vector<Function *> &Personalities = MMI->getPersonalities(); - for (std::vector<Function *>::const_iterator I = Personalities.begin(), + const std::vector<const Function*> &Personalities = MMI->getPersonalities(); + for (std::vector<const Function*>::const_iterator I = Personalities.begin(), E = Personalities.end(); I != E; ++I) { if (*I) { MCSymbol *NLPSym = GetSymbolWithGlobalValueBase(*I, "$non_lazy_ptr"); diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt index c997c5c..7ffc5eb 100644 --- a/lib/Target/PowerPC/CMakeLists.txt +++ b/lib/Target/PowerPC/CMakeLists.txt @@ -24,6 +24,7 @@ add_llvm_target(PowerPCCodeGen PPCRegisterInfo.cpp PPCSubtarget.cpp PPCTargetMachine.cpp + PPCSelectionDAGInfo.cpp ) target_link_libraries (LLVMPowerPCCodeGen LLVMSelectionDAG) diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp index f7c27d4..361fa70 100644 --- a/lib/Target/PowerPC/PPCCodeEmitter.cpp +++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp @@ -202,7 +202,7 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI, MachineRelocation R; if (MO.isGlobal()) { R = MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, - MO.getGlobal(), 0, + const_cast<GlobalValue *>(MO.getGlobal()), 0, isa<Function>(MO.getGlobal())); } else if (MO.isSymbol()) { R = MachineRelocation::getExtSym(MCE.getCurrentPCOffset(), diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 1e32384..3d9f8aa 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -16,7 +16,6 @@ #include "PPC.h" #include "PPCPredicates.h" #include "PPCTargetMachine.h" -#include "PPCISelLowering.h" #include "PPCHazardRecognizers.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" @@ -41,8 +40,8 @@ namespace { /// instructions for SelectionDAG operations. /// class PPCDAGToDAGISel : public SelectionDAGISel { - PPCTargetMachine &TM; - PPCTargetLowering &PPCLowering; + const PPCTargetMachine &TM; + const PPCTargetLowering &PPCLowering; const PPCSubtarget &PPCSubTarget; unsigned GlobalBaseReg; public: diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 9cd01be..6f11953 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -397,7 +397,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate /// function arguments in the caller parameter area. unsigned PPCTargetLowering::getByValTypeAlignment(const Type *Ty) const { - TargetMachine &TM = getTargetMachine(); + const TargetMachine &TM = getTargetMachine(); // Darwin passes everything on 4 byte boundary. if (TM.getSubtarget<PPCSubtarget>().isDarwin()) return 4; @@ -476,7 +476,7 @@ static bool isFloatingPointZero(SDValue Op) { else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { // Maybe this has already been legalized into the constant pool? if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1))) - if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) + if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) return CFP->getValueAPF().isZero(); } return false; @@ -1095,10 +1095,10 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, //===----------------------------------------------------------------------===// SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); - Constant *C = CP->getConstVal(); + const Constant *C = CP->getConstVal(); SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment()); SDValue Zero = DAG.getConstant(0, PtrVT); // FIXME there isn't really any debug info here @@ -1129,7 +1129,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, return Lo; } -SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) { +SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); @@ -1163,16 +1163,17 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) { } SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { llvm_unreachable("TLS not implemented for PPC."); return SDValue(); // Not reached } -SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) { +SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, + SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); DebugLoc DL = Op.getDebugLoc(); - BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); + const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); SDValue TgtBA = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true); SDValue Zero = DAG.getConstant(0, PtrVT); SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, TgtBA, Zero); @@ -1199,10 +1200,10 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) { } SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); - GlobalValue *GV = GSDN->getGlobal(); + const GlobalValue *GV = GSDN->getGlobal(); SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset()); SDValue Zero = DAG.getConstant(0, PtrVT); // FIXME there isn't really any debug info here @@ -1247,7 +1248,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, false, false, 0); } -SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { +SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); DebugLoc dl = Op.getDebugLoc(); @@ -1291,17 +1292,14 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { } SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, - int VarArgsFrameIndex, - int VarArgsStackOffset, - unsigned VarArgsNumGPR, - unsigned VarArgsNumFPR, - const PPCSubtarget &Subtarget) { + const PPCSubtarget &Subtarget) const { llvm_unreachable("VAARG not yet implemented for the SVR4 ABI!"); return SDValue(); // Not reached } -SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) { +SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, + SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Trmp = Op.getOperand(1); // trampoline SDValue FPtr = Op.getOperand(2); // nested function @@ -1343,18 +1341,17 @@ SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) { } SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, - int VarArgsFrameIndex, - int VarArgsStackOffset, - unsigned VarArgsNumGPR, - unsigned VarArgsNumFPR, - const PPCSubtarget &Subtarget) { + const PPCSubtarget &Subtarget) const { + MachineFunction &MF = DAG.getMachineFunction(); + PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + DebugLoc dl = Op.getDebugLoc(); if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); + SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0, false, false, 0); @@ -1385,14 +1382,16 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, // } va_list[1]; - SDValue ArgGPR = DAG.getConstant(VarArgsNumGPR, MVT::i32); - SDValue ArgFPR = DAG.getConstant(VarArgsNumFPR, MVT::i32); + SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), MVT::i32); + SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), MVT::i32); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - SDValue StackOffsetFI = DAG.getFrameIndex(VarArgsStackOffset, PtrVT); - SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); + SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(), + PtrVT); + SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), + PtrVT); uint64_t FrameOffset = PtrVT.getSizeInBits()/8; SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT); @@ -1525,7 +1524,8 @@ PPCTargetLowering::LowerFormalArguments(SDValue Chain, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) + const { if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) { return LowerFormalArguments_SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals); @@ -1542,7 +1542,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // 32-bit SVR4 ABI Stack Frame Layout: // +-----------------------------------+ @@ -1575,6 +1575,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); + PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Potential tail calls could cause overwriting of argument stack slots. @@ -1688,24 +1689,27 @@ PPCTargetLowering::LowerFormalArguments_SVR4( }; const unsigned NumFPArgRegs = array_lengthof(FPArgRegs); - VarArgsNumGPR = CCInfo.getFirstUnallocated(GPArgRegs, NumGPArgRegs); - VarArgsNumFPR = CCInfo.getFirstUnallocated(FPArgRegs, NumFPArgRegs); + FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs, + NumGPArgRegs)); + FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs, + NumFPArgRegs)); // Make room for NumGPArgRegs and NumFPArgRegs. int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 + NumFPArgRegs * EVT(MVT::f64).getSizeInBits()/8; - VarArgsStackOffset = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, - CCInfo.getNextStackOffset(), - true, false); + FuncInfo->setVarArgsStackOffset( + MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, + CCInfo.getNextStackOffset(), + true, false)); - VarArgsFrameIndex = MFI->CreateStackObject(Depth, 8, false); - SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); + FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false)); + SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); // The fixed integer arguments of a variadic function are // stored to the VarArgsFrameIndex on the stack. unsigned GPRIndex = 0; - for (; GPRIndex != VarArgsNumGPR; ++GPRIndex) { + for (; GPRIndex != FuncInfo->getVarArgsNumGPR(); ++GPRIndex) { SDValue Val = DAG.getRegister(GPArgRegs[GPRIndex], PtrVT); SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0, false, false, 0); @@ -1736,7 +1740,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( // The double arguments are stored to the VarArgsFrameIndex // on the stack. unsigned FPRIndex = 0; - for (FPRIndex = 0; FPRIndex != VarArgsNumFPR; ++FPRIndex) { + for (FPRIndex = 0; FPRIndex != FuncInfo->getVarArgsNumFPR(); ++FPRIndex) { SDValue Val = DAG.getRegister(FPArgRegs[FPRIndex], MVT::f64); SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0, false, false, 0); @@ -1775,11 +1779,12 @@ PPCTargetLowering::LowerFormalArguments_Darwin( const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // TODO: add description of PPC stack frame format, or at least some docs. // MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); + PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); bool isPPC64 = PtrVT == MVT::i64; @@ -2090,9 +2095,10 @@ PPCTargetLowering::LowerFormalArguments_Darwin( if (isVarArg) { int Depth = ArgOffset; - VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, - Depth, true, false); - SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); + FuncInfo->setVarArgsFrameIndex( + MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, + Depth, true, false)); + SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); // If this function is vararg, store any remaining integer argument regs // to their spots on the stack so that they may be loaded by deferencing the @@ -2359,7 +2365,7 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, SDValue &LROpOut, SDValue &FPOpOut, bool isDarwinABI, - DebugLoc dl) { + DebugLoc dl) const { if (SPDiff) { // Load the LR and FP stack slot for later adjusting. EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32; @@ -2582,7 +2588,7 @@ PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { SmallVector<CCValAssign, 16> RVLocs; CCState CCRetInfo(CallConv, isVarArg, getTargetMachine(), @@ -2613,7 +2619,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, SDValue &Callee, int SPDiff, unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { std::vector<EVT> NodeTys; SmallVector<SDValue, 8> Ops; unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff, @@ -2701,7 +2707,7 @@ PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { if (isTailCall) isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, Ins, DAG); @@ -2724,7 +2730,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // See PPCTargetLowering::LowerFormalArguments_SVR4() for a description // of the 32-bit SVR4 ABI stack frame layout. @@ -2930,7 +2936,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { unsigned NumOps = Outs.size(); @@ -3291,7 +3297,7 @@ SDValue PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG) { + DebugLoc dl, SelectionDAG &DAG) const { SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), @@ -3323,7 +3329,7 @@ PPCTargetLowering::LowerReturn(SDValue Chain, } SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, - const PPCSubtarget &Subtarget) { + const PPCSubtarget &Subtarget) const { // When we pop the dynamic allocation we need to restore the SP link. DebugLoc dl = Op.getDebugLoc(); @@ -3407,7 +3413,7 @@ PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG, - const PPCSubtarget &Subtarget) { + const PPCSubtarget &Subtarget) const { // Get the inputs. SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); @@ -3428,7 +3434,7 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when /// possible. -SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { +SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { // Not FP? Not a fsel. if (!Op.getOperand(0).getValueType().isFloatingPoint() || !Op.getOperand(2).getValueType().isFloatingPoint()) @@ -3502,7 +3508,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { // FIXME: Split this code up when LegalizeDAGTypes lands. SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, - DebugLoc dl) { + DebugLoc dl) const { assert(Op.getOperand(0).getValueType().isFloatingPoint()); SDValue Src = Op.getOperand(0); if (Src.getValueType() == MVT::f32) @@ -3537,7 +3543,8 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, false, false, 0); } -SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { +SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, + SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); // Don't handle ppc_fp128 here; let it be lowered to a libcall. if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) @@ -3586,7 +3593,8 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { return FP; } -SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) { +SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, + SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); /* The rounding mode is in bits 30:31 of FPSR, and has the following @@ -3649,7 +3657,7 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) { ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal); } -SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) { +SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); unsigned BitWidth = VT.getSizeInBits(); DebugLoc dl = Op.getDebugLoc(); @@ -3678,7 +3686,7 @@ SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) { return DAG.getMergeValues(OutOps, 2, dl); } -SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) { +SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); unsigned BitWidth = VT.getSizeInBits(); @@ -3707,7 +3715,7 @@ SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) { return DAG.getMergeValues(OutOps, 2, dl); } -SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) { +SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); EVT VT = Op.getValueType(); unsigned BitWidth = VT.getSizeInBits(); @@ -3808,7 +3816,8 @@ static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, // selects to a single instruction, return Op. Otherwise, if we can codegen // this case more efficiently than a constant pool load, lower it to the // sequence of ops that should be used. -SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { +SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, + SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); @@ -4050,7 +4059,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, /// return the code it can be lowered into. Worst case, it can always be /// lowered into a vperm. SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); @@ -4216,7 +4225,7 @@ static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc, /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom /// lower, do it, otherwise return null. SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { // If this is a lowered altivec predicate compare, CompareOpc is set to the // opcode number of the comparison. DebugLoc dl = Op.getDebugLoc(); @@ -4284,12 +4293,12 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, } SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); // Create a stack slot that is 16-byte aligned. MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = getPointerTy(); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); // Store the input value into Value#0 of the stack slot. @@ -4301,7 +4310,7 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, false, false, 0); } -SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) { +SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); if (Op.getValueType() == MVT::v4i32) { SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); @@ -4362,7 +4371,7 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) { /// LowerOperation - Provide custom lowering hooks for some operations. /// -SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { +SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Wasn't expecting to be able to lower this!"); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); @@ -4373,12 +4382,10 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); case ISD::VASTART: - return LowerVASTART(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset, - VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget); + return LowerVASTART(Op, DAG, PPCSubTarget); case ISD::VAARG: - return LowerVAARG(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset, - VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget); + return LowerVAARG(Op, DAG, PPCSubTarget); case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget); case ISD::DYNAMIC_STACKALLOC: @@ -4412,7 +4419,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { void PPCTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { DebugLoc dl = N->getDebugLoc(); switch (N->getOpcode()) { default: @@ -4677,8 +4684,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, MachineBasicBlock * PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { + MachineBasicBlock *BB) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); // To "insert" these instructions we actually have to insert their @@ -4716,12 +4722,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, F->insert(It, sinkMBB); // Update machine-CFG edges by first adding all successors of the current // block to the new block which will contain the Phi node for the select. - // Also inform sdisel of the edge changes. for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), - E = BB->succ_end(); I != E; ++I) { - EM->insert(std::make_pair(*I, sinkMBB)); + E = BB->succ_end(); I != E; ++I) sinkMBB->addSuccessor(*I); - } // Next, remove all successors of the current block, and add the true // and fallthrough blocks as its successors. while (!BB->succ_empty()) @@ -5032,7 +5035,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { - TargetMachine &TM = getTargetMachine(); + const TargetMachine &TM = getTargetMachine(); SelectionDAG &DAG = DCI.DAG; DebugLoc dl = N->getDebugLoc(); switch (N->getOpcode()) { @@ -5491,46 +5494,59 @@ bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const { return false; } -SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { +SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, + SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); - // Depths > 0 not supported yet! - if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0) - return SDValue(); + unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + // Make sure the function does not optimize away the store of the RA to + // the stack. MachineFunction &MF = DAG.getMachineFunction(); PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + FuncInfo->setLRStoreRequired(); + bool isPPC64 = PPCSubTarget.isPPC64(); + bool isDarwinABI = PPCSubTarget.isDarwinABI(); + + if (Depth > 0) { + SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); + SDValue Offset = + + DAG.getConstant(PPCFrameInfo::getReturnSaveOffset(isPPC64, isDarwinABI), + isPPC64? MVT::i64 : MVT::i32); + return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), + DAG.getNode(ISD::ADD, dl, getPointerTy(), + FrameAddr, Offset), + NULL, 0, false, false, 0); + } // Just load the return address off the stack. SDValue RetAddrFI = getReturnAddrFrameIndex(DAG); - - // Make sure the function really does not optimize away the store of the RA - // to the stack. - FuncInfo->setLRStoreRequired(); - return DAG.getLoad(getPointerTy(), dl, - DAG.getEntryNode(), RetAddrFI, NULL, 0, - false, false, 0); + return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), + RetAddrFI, NULL, 0, false, false, 0); } -SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { +SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, + SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); - // Depths > 0 not supported yet! - if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0) - return SDValue(); + unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); bool isPPC64 = PtrVT == MVT::i64; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool is31 = (NoFramePointerElim || MFI->hasVarSizedObjects()) - && MFI->getStackSize(); - - if (isPPC64) - return DAG.getCopyFromReg(DAG.getEntryNode(), dl, is31 ? PPC::X31 : PPC::X1, - MVT::i64); - else - return DAG.getCopyFromReg(DAG.getEntryNode(), dl, is31 ? PPC::R31 : PPC::R1, - MVT::i32); + MFI->setFrameAddressIsTaken(true); + bool is31 = (DisableFramePointerElim(MF) || MFI->hasVarSizedObjects()) && + MFI->getStackSize() && + !MF.getFunction()->hasFnAttr(Attribute::Naked); + unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) : + (is31 ? PPC::R31 : PPC::R1); + SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, + PtrVT); + while (Depth--) + FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(), + FrameAddr, NULL, 0, false, false, 0); + return FrameAddr; } bool @@ -5547,12 +5563,15 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { /// probably because the source does not need to be loaded. If /// 'NonScalarIntSafe' is true, that means it's safe to return a /// non-scalar-integer type, e.g. empty string source, constant, or loaded -/// from memory. It returns EVT::Other if SelectionDAG should be responsible -/// for determining it. +/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is +/// constant so it does not need to be loaded. +/// It returns EVT::Other if the type should be determined using generic +/// target-independent logic. EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool NonScalarIntSafe, - SelectionDAG &DAG) const { + bool MemcpyStrSrc, + MachineFunction &MF) const { if (this->PPCSubTarget.isPPC64()) { return MVT::i64; } else { diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index f816bdd..1d05f3d 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -233,14 +233,8 @@ namespace llvm { } class PPCTargetLowering : public TargetLowering { - int VarArgsFrameIndex; // FrameIndex for start of varargs area. - int VarArgsStackOffset; // StackOffset for start of stack - // arguments. - unsigned VarArgsNumGPR; // Index of the first unused integer - // register for parameter passing. - unsigned VarArgsNumFPR; // Index of the first unused double - // register for parameter passing. const PPCSubtarget &PPCSubTarget; + public: explicit PPCTargetLowering(PPCTargetMachine &TM); @@ -285,13 +279,13 @@ namespace llvm { /// LowerOperation - Provide custom lowering hooks for some operations. /// - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; /// ReplaceNodeResults - Replace the results of node with an illegal result /// type with new values built out of custom code. /// virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG); + SelectionDAG &DAG) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; @@ -302,9 +296,9 @@ namespace llvm { const SelectionDAG &DAG, unsigned Depth = 0) const; - virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const; MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *MBB, bool is64Bit, unsigned BinOpcode) const; @@ -355,12 +349,14 @@ namespace llvm { /// probably because the source does not need to be loaded. If /// 'NonScalarIntSafe' is true, that means it's safe to return a /// non-scalar-integer type, e.g. empty string source, constant, or loaded - /// from memory. It returns EVT::Other if SelectionDAG should be responsible - /// for determining it. + /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is + /// constant so it does not need to be loaded. + /// It returns EVT::Other if the type should be determined using generic + /// target-independent logic. virtual EVT - getOptimalMemOpType(uint64_t Size, - unsigned DstAlign, unsigned SrcAlign, - bool NonScalarIntSafe, SelectionDAG &DAG) const; + getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, + bool NonScalarIntSafe, bool MemcpyStrSrc, + MachineFunction &MF) const; /// getFunctionAlignment - Return the Log2 alignment of this function. virtual unsigned getFunctionAlignment(const Function *F) const; @@ -382,46 +378,43 @@ namespace llvm { SDValue &LROpOut, SDValue &FPOpOut, bool isDarwinABI, - DebugLoc dl); - - SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG); - SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG); - SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG); - SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG); - SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG); - SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG); - SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG); - SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG); - SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG); + DebugLoc dl) const; + + SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, - int VarArgsFrameIndex, int VarArgsStackOffset, - unsigned VarArgsNumGPR, unsigned VarArgsNumFPR, - const PPCSubtarget &Subtarget); - SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG, int VarArgsFrameIndex, - int VarArgsStackOffset, unsigned VarArgsNumGPR, - unsigned VarArgsNumFPR, const PPCSubtarget &Subtarget); + const PPCSubtarget &Subtarget) const; + SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG, + const PPCSubtarget &Subtarget) const; SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, - const PPCSubtarget &Subtarget); + const PPCSubtarget &Subtarget) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG, - const PPCSubtarget &Subtarget); - SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG); - SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, DebugLoc dl); - SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG); - SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG); - SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG); - SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG); - SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG); - SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG); - SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG); - SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG); - SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG); - SDValue LowerMUL(SDValue Op, SelectionDAG &DAG); + const PPCSubtarget &Subtarget) const; + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, DebugLoc dl) const; + SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; SDValue FinishCall(CallingConv::ID CallConv, DebugLoc dl, bool isTailCall, bool isVarArg, SelectionDAG &DAG, @@ -431,14 +424,14 @@ namespace llvm { SDValue &Callee, int SPDiff, unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerCall(SDValue Chain, SDValue Callee, @@ -446,26 +439,26 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG); + DebugLoc dl, SelectionDAG &DAG) const; SDValue LowerFormalArguments_Darwin(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; SDValue LowerFormalArguments_SVR4(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; SDValue LowerCall_Darwin(SDValue Chain, SDValue Callee, @@ -473,14 +466,14 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; SDValue LowerCall_SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; }; } diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 6b0a282..ae1fbd8 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -24,10 +24,13 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/MC/MCAsmInfo.h" -using namespace llvm; +namespace llvm { extern cl::opt<bool> EnablePPC32RS; // FIXME (64-bit): See PPCRegisterInfo.cpp. extern cl::opt<bool> EnablePPC64RS; // FIXME (64-bit): See PPCRegisterInfo.cpp. +} + +using namespace llvm; PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm) : TargetInstrInfoImpl(PPCInsts, array_lengthof(PPCInsts)), TM(tm), @@ -642,6 +645,16 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MBB.insert(MI, NewMIs[i]); } +MachineInstr* +PPCInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, + int FrameIx, uint64_t Offset, + const MDNode *MDPtr, + DebugLoc DL) const { + MachineInstrBuilder MIB = BuildMI(MF, DL, get(PPC::DBG_VALUE)); + addFrameReference(MIB, FrameIx, 0, false).addImm(Offset).addMetadata(MDPtr); + return &*MIB; +} + /// foldMemoryOperand - PowerPC (like most RISC's) can only fold spills into /// copy instructions, turning them into load/store instructions. MachineInstr *PPCInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, @@ -778,6 +791,7 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case PPC::DBG_LABEL: case PPC::EH_LABEL: case PPC::GC_LABEL: + case PPC::DBG_VALUE: return 0; default: return 4; // PowerPC instructions are all 4 bytes diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index 57facac..9fb6e7d 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -126,6 +126,12 @@ public: unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC) const; + virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, + int FrameIx, + uint64_t Offset, + const MDNode *MDPtr, + DebugLoc DL) const; + /// foldMemoryOperand - PowerPC (like most RISC's) can only fold spills into /// copy instructions, turning them into load/store instructions. virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h index b359dd3..e2649c8 100644 --- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -58,6 +58,18 @@ private: /// how the caller's stack pointer should be calculated (epilog/dynamicalloc). bool HasFastCall; + /// VarArgsFrameIndex - FrameIndex for start of varargs area. + int VarArgsFrameIndex; + /// VarArgsStackOffset - StackOffset for start of stack + /// arguments. + int VarArgsStackOffset; + /// VarArgsNumGPR - Index of the first unused integer + /// register for parameter passing. + unsigned VarArgsNumGPR; + /// VarArgsNumFPR - Index of the first unused double + /// register for parameter passing. + unsigned VarArgsNumFPR; + public: explicit PPCFunctionInfo(MachineFunction &MF) : FramePointerSaveIndex(0), @@ -66,7 +78,11 @@ public: LRStoreRequired(false), MinReservedArea(0), TailCallSPDelta(0), - HasFastCall(false) {} + HasFastCall(false), + VarArgsFrameIndex(0), + VarArgsStackOffset(0), + VarArgsNumGPR(0), + VarArgsNumFPR(0) {} int getFramePointerSaveIndex() const { return FramePointerSaveIndex; } void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; } @@ -96,6 +112,18 @@ public: void setHasFastCall() { HasFastCall = true; } bool hasFastCall() const { return HasFastCall;} + + int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } + void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; } + + int getVarArgsStackOffset() const { return VarArgsStackOffset; } + void setVarArgsStackOffset(int Offset) { VarArgsStackOffset = Offset; } + + unsigned getVarArgsNumGPR() const { return VarArgsNumGPR; } + void setVarArgsNumGPR(unsigned Num) { VarArgsNumGPR = Num; } + + unsigned getVarArgsNumFPR() const { return VarArgsNumFPR; } + void setVarArgsNumFPR(unsigned Num) { VarArgsNumFPR = Num; } }; } // end of namespace llvm diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 52d87cd..5f1e04e 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -43,7 +43,6 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include <cstdlib> -using namespace llvm; // FIXME This disables some code that aligns the stack to a boundary // bigger than the default (16 bytes on Darwin) when there is a stack local @@ -56,14 +55,19 @@ using namespace llvm; #define ALIGN_STACK 0 // FIXME (64-bit): Eventually enable by default. +namespace llvm { cl::opt<bool> EnablePPC32RS("enable-ppc32-regscavenger", - cl::init(false), - cl::desc("Enable PPC32 register scavenger"), - cl::Hidden); + cl::init(false), + cl::desc("Enable PPC32 register scavenger"), + cl::Hidden); cl::opt<bool> EnablePPC64RS("enable-ppc64-regscavenger", - cl::init(false), - cl::desc("Enable PPC64 register scavenger"), - cl::Hidden); + cl::init(false), + cl::desc("Enable PPC64 register scavenger"), + cl::Hidden); +} + +using namespace llvm; + #define EnableRegisterScavenging \ ((EnablePPC32RS && !Subtarget.isPPC64()) || \ (EnablePPC64RS && Subtarget.isPPC64())) @@ -405,7 +409,10 @@ PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { // static bool needsFP(const MachineFunction &MF) { const MachineFrameInfo *MFI = MF.getFrameInfo(); - return NoFramePointerElim || MFI->hasVarSizedObjects() || + // Naked functions have no stack frame pushed, so we don't have a frame pointer. + if (MF.getFunction()->hasFnAttr(Attribute::Naked)) + return false; + return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects() || (GuaranteedTailCallOpt && MF.getInfo<PPCFunctionInfo>()->hasFastCall()); } @@ -790,7 +797,10 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // If we're not using a Frame Pointer that has been set to the value of the // SP before having the stack size subtracted from it, then add the stack size // to Offset to get the correct offset. - Offset += MFI->getStackSize(); + // Naked functions have stack size 0, although getStackSize may not reflect that + // because we didn't call all the pieces that compute it for naked functions. + if (!MF.getFunction()->hasFnAttr(Attribute::Naked)) + Offset += MFI->getStackSize(); // If we can, encode the offset directly into the instruction. If this is a // normal PPC "ri" instruction, any 16-bit value can be safely encoded. If diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td index d589414..9664f14 100644 --- a/lib/Target/PowerPC/PPCSchedule.td +++ b/lib/Target/PowerPC/PPCSchedule.td @@ -15,8 +15,6 @@ def SLU : FuncUnit; // Store/load unit def SRU : FuncUnit; // special register unit def IU1 : FuncUnit; // integer unit 1 (simple) def IU2 : FuncUnit; // integer unit 2 (complex) -def IU3 : FuncUnit; // integer unit 3 (7450 simple) -def IU4 : FuncUnit; // integer unit 4 (7450 simple) def FPU1 : FuncUnit; // floating point unit 1 def FPU2 : FuncUnit; // floating point unit 2 def VPU : FuncUnit; // vector permutation unit @@ -24,7 +22,6 @@ def VIU1 : FuncUnit; // vector integer unit 1 (simple) def VIU2 : FuncUnit; // vector integer unit 2 (complex) def VFPU : FuncUnit; // vector floating point unit - //===----------------------------------------------------------------------===// // Instruction Itinerary classes used for PowerPC // diff --git a/lib/Target/PowerPC/PPCScheduleG3.td b/lib/Target/PowerPC/PPCScheduleG3.td index f72194d..7344763 100644 --- a/lib/Target/PowerPC/PPCScheduleG3.td +++ b/lib/Target/PowerPC/PPCScheduleG3.td @@ -12,7 +12,8 @@ //===----------------------------------------------------------------------===// -def G3Itineraries : ProcessorItineraries<[ +def G3Itineraries : ProcessorItineraries< + [IU1, IU2, FPU1, BPU, SRU, SLU], [ InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2]>]>, InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2]>]>, InstrItinData<IntDivW , [InstrStage<19, [IU1]>]>, diff --git a/lib/Target/PowerPC/PPCScheduleG4.td b/lib/Target/PowerPC/PPCScheduleG4.td index 92ed20f..7efc693 100644 --- a/lib/Target/PowerPC/PPCScheduleG4.td +++ b/lib/Target/PowerPC/PPCScheduleG4.td @@ -11,7 +11,8 @@ // //===----------------------------------------------------------------------===// -def G4Itineraries : ProcessorItineraries<[ +def G4Itineraries : ProcessorItineraries< + [IU1, IU2, SLU, SRU, BPU, FPU1, VIU1, VIU2, VPU, VFPU], [ InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2]>]>, InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2]>]>, InstrItinData<IntDivW , [InstrStage<19, [IU1]>]>, diff --git a/lib/Target/PowerPC/PPCScheduleG4Plus.td b/lib/Target/PowerPC/PPCScheduleG4Plus.td index 7474ba4..15056c0 100644 --- a/lib/Target/PowerPC/PPCScheduleG4Plus.td +++ b/lib/Target/PowerPC/PPCScheduleG4Plus.td @@ -11,7 +11,11 @@ // //===----------------------------------------------------------------------===// -def G4PlusItineraries : ProcessorItineraries<[ +def IU3 : FuncUnit; // integer unit 3 (7450 simple) +def IU4 : FuncUnit; // integer unit 4 (7450 simple) + +def G4PlusItineraries : ProcessorItineraries< + [IU1, IU2, IU3, IU4, BPU, SLU, FPU1, VFPU, VIU1, VIU2, VPU], [ InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>, InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>, InstrItinData<IntDivW , [InstrStage<23, [IU2]>]>, diff --git a/lib/Target/PowerPC/PPCScheduleG5.td b/lib/Target/PowerPC/PPCScheduleG5.td index d282147..2dffc48 100644 --- a/lib/Target/PowerPC/PPCScheduleG5.td +++ b/lib/Target/PowerPC/PPCScheduleG5.td @@ -11,7 +11,8 @@ // //===----------------------------------------------------------------------===// -def G5Itineraries : ProcessorItineraries<[ +def G5Itineraries : ProcessorItineraries< + [IU1, IU2, SLU, BPU, FPU1, FPU2, VFPU, VIU1, VIU2, VPU], [ InstrItinData<IntGeneral , [InstrStage<2, [IU1, IU2]>]>, InstrItinData<IntCompare , [InstrStage<3, [IU1, IU2]>]>, InstrItinData<IntDivD , [InstrStage<68, [IU1]>]>, diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp b/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp new file mode 100644 index 0000000..c0004a9 --- /dev/null +++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp @@ -0,0 +1,22 @@ +//===-- PPCSelectionDAGInfo.cpp - PowerPC SelectionDAG Info ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PPCSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "powerpc-selectiondag-info" +#include "PPCSelectionDAGInfo.h" +using namespace llvm; + +PPCSelectionDAGInfo::PPCSelectionDAGInfo() { +} + +PPCSelectionDAGInfo::~PPCSelectionDAGInfo() { +} diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/lib/Target/PowerPC/PPCSelectionDAGInfo.h new file mode 100644 index 0000000..3ad3418 --- /dev/null +++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.h @@ -0,0 +1,29 @@ +//===-- PPCSelectionDAGInfo.h - PowerPC SelectionDAG Info -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the PowerPC subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef POWERPCCSELECTIONDAGINFO_H +#define POWERPCCSELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class PPCSelectionDAGInfo : public TargetSelectionDAGInfo { +public: + PPCSelectionDAGInfo(); + ~PPCSelectionDAGInfo(); +}; + +} + +#endif diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index ac9ae2b..35e33a2 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -44,8 +44,8 @@ public: virtual const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const PPCFrameInfo *getFrameInfo() const { return &FrameInfo; } virtual PPCJITInfo *getJITInfo() { return &JITInfo; } - virtual PPCTargetLowering *getTargetLowering() const { - return const_cast<PPCTargetLowering*>(&TLInfo); + virtual const PPCTargetLowering *getTargetLowering() const { + return &TLInfo; } virtual const PPCRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); diff --git a/lib/Target/README.txt b/lib/Target/README.txt index 052a575..144bf5d 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -263,19 +263,6 @@ if anyone cared enough about sincos. //===---------------------------------------------------------------------===// -Turn this into a single byte store with no load (the other 3 bytes are -unmodified): - -define void @test(i32* %P) { - %tmp = load i32* %P - %tmp14 = or i32 %tmp, 3305111552 - %tmp15 = and i32 %tmp14, 3321888767 - store i32 %tmp15, i32* %P - ret void -} - -//===---------------------------------------------------------------------===// - quantum_sigma_x in 462.libquantum contains the following loop: for(i=0; i<reg->size; i++) @@ -1843,3 +1830,21 @@ entry: //===---------------------------------------------------------------------===// +We should use DSE + llvm.lifetime.end to delete dead vtable pointer updates. +See GCC PR34949 + +//===---------------------------------------------------------------------===// + +In this code: + +long foo(long x) { + return x > 1 ? x : 1; +} + +LLVM emits a comparison with 1 instead of 0. 0 would be equivalent +and cheaper on most targets. + +LLVM prefers comparisons with zero over non-zero in general, but in this +case it choses instead to keep the max operation obvious. + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/Sparc/AsmPrinter/CMakeLists.txt b/lib/Target/Sparc/AsmPrinter/CMakeLists.txt index e3ca18e..da629f6 100644 --- a/lib/Target/Sparc/AsmPrinter/CMakeLists.txt +++ b/lib/Target/Sparc/AsmPrinter/CMakeLists.txt @@ -3,4 +3,4 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/ add_llvm_library(LLVMSparcAsmPrinter SparcAsmPrinter.cpp ) -add_dependencies(LLVMSparcAsmPrinter SparcCodeGenTable_gen)
\ No newline at end of file +add_dependencies(LLVMSparcAsmPrinter SparcCodeGenTable_gen) diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt index 74f320a..684cadf 100644 --- a/lib/Target/Sparc/CMakeLists.txt +++ b/lib/Target/Sparc/CMakeLists.txt @@ -20,6 +20,7 @@ add_llvm_target(SparcCodeGen SparcRegisterInfo.cpp SparcSubtarget.cpp SparcTargetMachine.cpp + SparcSelectionDAGInfo.cpp ) target_link_libraries (LLVMSparcCodeGen LLVMSelectionDAG) diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp index a7d1805..698923e 100644 --- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp +++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#include "SparcISelLowering.h" #include "SparcTargetMachine.h" #include "llvm/Intrinsics.h" #include "llvm/CodeGen/SelectionDAGISel.h" @@ -68,7 +67,6 @@ private: } // end anonymous namespace SDNode* SparcDAGToDAGISel::getGlobalBaseReg() { - MachineFunction *MF = BB->getParent(); unsigned GlobalBaseReg = TM.getInstrInfo()->getGlobalBaseReg(MF); return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); } diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 4e93ef0..f47e53a 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -14,6 +14,7 @@ #include "SparcISelLowering.h" #include "SparcTargetMachine.h" +#include "SparcMachineFunctionInfo.h" #include "llvm/Function.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -37,7 +38,7 @@ SDValue SparcTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG) { + DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to locations. SmallVector<CCValAssign, 16> RVLocs; @@ -85,10 +86,12 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) + const { MachineFunction &MF = DAG.getMachineFunction(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); + SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>(); // Assign locations to all of the incoming arguments. SmallVector<CCValAssign, 16> ArgLocs; @@ -226,7 +229,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, // Store remaining ArgRegs to the stack if this is a varargs function. if (isVarArg) { // Remember the vararg offset for the va_start implementation. - VarArgsFrameOffset = ArgOffset; + FuncInfo->setVarArgsFrameOffset(ArgOffset); std::vector<SDValue> OutChains; @@ -261,7 +264,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // Sparc target does not yet support tail call optimization. isTailCall = false; @@ -752,8 +755,8 @@ static void LookThroughSetCC(SDValue &LHS, SDValue &RHS, } SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op, - SelectionDAG &DAG) { - GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + SelectionDAG &DAG) const { + const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); // FIXME there isn't really any debug info here DebugLoc dl = Op.getDebugLoc(); SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32); @@ -773,11 +776,11 @@ SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op, } SDValue SparcTargetLowering::LowerConstantPool(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); // FIXME there isn't really any debug info here DebugLoc dl = Op.getDebugLoc(); - Constant *C = N->getConstVal(); + const Constant *C = N->getConstVal(); SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment()); SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, CP); SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, CP); @@ -873,14 +876,18 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { } static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, - SparcTargetLowering &TLI) { + const SparcTargetLowering &TLI) { + MachineFunction &MF = DAG.getMachineFunction(); + SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>(); + // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. DebugLoc dl = Op.getDebugLoc(); - SDValue Offset = DAG.getNode(ISD::ADD, dl, MVT::i32, - DAG.getRegister(SP::I6, MVT::i32), - DAG.getConstant(TLI.getVarArgsFrameOffset(), - MVT::i32)); + SDValue Offset = + DAG.getNode(ISD::ADD, dl, MVT::i32, + DAG.getRegister(SP::I6, MVT::i32), + DAG.getConstant(FuncInfo->getVarArgsFrameOffset(), + MVT::i32)); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1), SV, 0, false, false, 0); @@ -939,7 +946,7 @@ static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) { SDValue SparcTargetLowering:: -LowerOperation(SDValue Op, SelectionDAG &DAG) { +LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Should not custom lower this!"); // Frame & Return address. Currently unimplemented @@ -961,8 +968,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) { MachineBasicBlock * SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { + MachineBasicBlock *BB) const { const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); unsigned BROpcode; unsigned CC; @@ -1006,12 +1012,9 @@ SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, F->insert(It, sinkMBB); // Update machine-CFG edges by first adding all successors of the current // block to the new block which will contain the Phi node for the select. - // Also inform sdisel of the edge changes. for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), - E = BB->succ_end(); I != E; ++I) { - EM->insert(std::make_pair(*I, sinkMBB)); + E = BB->succ_end(); I != E; ++I) sinkMBB->addSuccessor(*I); - } // Next, remove all successors of the current block, and add the true // and fallthrough blocks as its successors. while (!BB->succ_empty()) diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h index 2ee73c1..5ebdcac 100644 --- a/lib/Target/Sparc/SparcISelLowering.h +++ b/lib/Target/Sparc/SparcISelLowering.h @@ -41,12 +41,9 @@ namespace llvm { } class SparcTargetLowering : public TargetLowering { - int VarArgsFrameOffset; // Frame offset to start of varargs area. public: SparcTargetLowering(TargetMachine &TM); - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); - - int getVarArgsFrameOffset() const { return VarArgsFrameOffset; } + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; /// computeMaskedBitsForTargetNode - Determine which of the bits specified /// in Mask are known to be either zero or one and return them in the @@ -58,9 +55,9 @@ namespace llvm { const SelectionDAG &DAG, unsigned Depth = 0) const; - virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const; virtual const char *getTargetNodeName(unsigned Opcode) const; @@ -82,7 +79,7 @@ namespace llvm { bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerCall(SDValue Chain, SDValue Callee, @@ -91,16 +88,16 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG); + DebugLoc dl, SelectionDAG &DAG) const; - SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG); - SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG); + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; }; } // end namespace llvm diff --git a/lib/Target/Sparc/SparcMachineFunctionInfo.h b/lib/Target/Sparc/SparcMachineFunctionInfo.h index 56d8708..e34c131 100644 --- a/lib/Target/Sparc/SparcMachineFunctionInfo.h +++ b/lib/Target/Sparc/SparcMachineFunctionInfo.h @@ -20,12 +20,20 @@ namespace llvm { class SparcMachineFunctionInfo : public MachineFunctionInfo { private: unsigned GlobalBaseReg; + + /// VarArgsFrameOffset - Frame offset to start of varargs area. + int VarArgsFrameOffset; + public: - SparcMachineFunctionInfo() : GlobalBaseReg(0) {} - explicit SparcMachineFunctionInfo(MachineFunction &MF) : GlobalBaseReg(0) {} + SparcMachineFunctionInfo() : GlobalBaseReg(0), VarArgsFrameOffset(0) {} + explicit SparcMachineFunctionInfo(MachineFunction &MF) + : GlobalBaseReg(0), VarArgsFrameOffset(0) {} unsigned getGlobalBaseReg() const { return GlobalBaseReg; } void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; } + + int getVarArgsFrameOffset() const { return VarArgsFrameOffset; } + void setVarArgsFrameOffset(int Offset) { VarArgsFrameOffset = Offset; } }; } diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.cpp b/lib/Target/Sparc/SparcSelectionDAGInfo.cpp new file mode 100644 index 0000000..4825aa9 --- /dev/null +++ b/lib/Target/Sparc/SparcSelectionDAGInfo.cpp @@ -0,0 +1,22 @@ +//===-- SparcSelectionDAGInfo.cpp - Sparc SelectionDAG Info ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SparcSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "sparc-selectiondag-info" +#include "SparcSelectionDAGInfo.h" +using namespace llvm; + +SparcSelectionDAGInfo::SparcSelectionDAGInfo() { +} + +SparcSelectionDAGInfo::~SparcSelectionDAGInfo() { +} diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.h b/lib/Target/Sparc/SparcSelectionDAGInfo.h new file mode 100644 index 0000000..bc1b561 --- /dev/null +++ b/lib/Target/Sparc/SparcSelectionDAGInfo.h @@ -0,0 +1,29 @@ +//===-- SparcSelectionDAGInfo.h - Sparc SelectionDAG Info -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Sparc subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef SPARCSELECTIONDAGINFO_H +#define SPARCSELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class SparcSelectionDAGInfo : public TargetSelectionDAGInfo { +public: + SparcSelectionDAGInfo(); + ~SparcSelectionDAGInfo(); +}; + +} + +#endif diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h index 5834d08..1367a31 100644 --- a/lib/Target/Sparc/SparcTargetMachine.h +++ b/lib/Target/Sparc/SparcTargetMachine.h @@ -39,8 +39,8 @@ public: virtual const SparcRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); } - virtual SparcTargetLowering* getTargetLowering() const { - return const_cast<SparcTargetLowering*>(&TLInfo); + virtual const SparcTargetLowering* getTargetLowering() const { + return &TLInfo; } virtual const TargetData *getTargetData() const { return &DataLayout; } diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt index 81e51d8..880e56f 100644 --- a/lib/Target/SystemZ/CMakeLists.txt +++ b/lib/Target/SystemZ/CMakeLists.txt @@ -18,6 +18,7 @@ add_llvm_target(SystemZCodeGen SystemZRegisterInfo.cpp SystemZSubtarget.cpp SystemZTargetMachine.cpp + SystemZSelectionDAGInfo.cpp ) target_link_libraries (LLVMSystemZCodeGen LLVMSelectionDAG) diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 8152e1d..75d563b 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "SystemZ.h" -#include "SystemZISelLowering.h" #include "SystemZTargetMachine.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" @@ -85,7 +84,7 @@ namespace { /// namespace { class SystemZDAGToDAGISel : public SelectionDAGISel { - SystemZTargetLowering &Lowering; + const SystemZTargetLowering &Lowering; const SystemZSubtarget &Subtarget; void getAddressOperandsRI(const SystemZRRIAddressMode &AM, @@ -588,7 +587,7 @@ bool SystemZDAGToDAGISel::SelectLAAddr(SDNode *Op, SDValue Addr, bool SystemZDAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, SDValue &Base, SDValue &Disp, SDValue &Index) { if (ISD::isNON_EXTLoad(N.getNode()) && - IsLegalToFold(N, P, P)) + IsLegalToFold(N, P, P, OptLevel)) return SelectAddrRRI20(P, N.getOperand(1), Base, Disp, Index); return false; } diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 6f4b30f..e98f18b 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -158,7 +158,8 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) : setTruncStoreAction(MVT::f64, MVT::f32, Expand); } -SDValue SystemZTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { +SDValue SystemZTargetLowering::LowerOperation(SDValue Op, + SelectionDAG &DAG) const { switch (Op.getOpcode()) { case ISD::BR_CC: return LowerBR_CC(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); @@ -236,7 +237,8 @@ SystemZTargetLowering::LowerFormalArguments(SDValue Chain, &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) + const { switch (CallConv) { default: @@ -254,7 +256,7 @@ SystemZTargetLowering::LowerCall(SDValue Chain, SDValue Callee, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // SystemZ target does not yet support tail call optimization. isTailCall = false; @@ -280,7 +282,8 @@ SystemZTargetLowering::LowerCCCArguments(SDValue Chain, &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) + const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -293,7 +296,7 @@ SystemZTargetLowering::LowerCCCArguments(SDValue Chain, CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ); if (isVarArg) - llvm_report_error("Varargs not supported yet"); + report_fatal_error("Varargs not supported yet"); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { SDValue ArgValue; @@ -371,7 +374,7 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); @@ -505,7 +508,7 @@ SystemZTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; @@ -547,7 +550,7 @@ SDValue SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG) { + DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to a location SmallVector<CCValAssign, 16> RVLocs; @@ -600,7 +603,7 @@ SystemZTargetLowering::LowerReturn(SDValue Chain, SDValue SystemZTargetLowering::EmitCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &SystemZCC, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { // FIXME: Emit a test if RHS is zero bool isUnsigned = false; @@ -678,7 +681,7 @@ SDValue SystemZTargetLowering::EmitCmp(SDValue LHS, SDValue RHS, } -SDValue SystemZTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) { +SDValue SystemZTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); SDValue LHS = Op.getOperand(2); @@ -692,7 +695,8 @@ SDValue SystemZTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) { Chain, Dest, SystemZCC, Flag); } -SDValue SystemZTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { +SDValue SystemZTargetLowering::LowerSELECT_CC(SDValue Op, + SelectionDAG &DAG) const { SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); SDValue TrueV = Op.getOperand(2); @@ -714,9 +718,9 @@ SDValue SystemZTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { } SDValue SystemZTargetLowering::LowerGlobalAddress(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); - GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset(); bool IsPic = getTargetMachine().getRelocationModel() == Reloc::PIC_; @@ -753,7 +757,7 @@ SDValue SystemZTargetLowering::LowerGlobalAddress(SDValue Op, // FIXME: PIC here SDValue SystemZTargetLowering::LowerJumpTable(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()); @@ -765,7 +769,7 @@ SDValue SystemZTargetLowering::LowerJumpTable(SDValue Op, // FIXME: PIC here // FIXME: This is just dirty hack. We need to lower cpool properly SDValue SystemZTargetLowering::LowerConstantPool(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); @@ -795,8 +799,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { MachineBasicBlock* SystemZTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { + MachineBasicBlock *BB) const { const SystemZInstrInfo &TII = *TM.getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); assert((MI->getOpcode() == SystemZ::Select32 || @@ -827,10 +830,6 @@ SystemZTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BuildMI(BB, dl, TII.getBrCond(CC)).addMBB(copy1MBB); F->insert(I, copy0MBB); F->insert(I, copy1MBB); - // Inform sdisel of the edge changes. - for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), - SE = BB->succ_end(); SI != SE; ++SI) - EM->insert(std::make_pair(*SI, copy1MBB)); // Update machine-CFG edges by transferring all successors of the current // block to the new block which will contain the Phi node for the select. copy1MBB->transferSuccessors(BB); diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 36ff994..94bd906 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -58,7 +58,7 @@ namespace llvm { explicit SystemZTargetLowering(SystemZTargetMachine &TM); /// LowerOperation - Provide custom lowering hooks for some operations. - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; /// getTargetNodeName - This method returns the name of a target specific /// DAG node. @@ -74,20 +74,19 @@ namespace llvm { TargetLowering::ConstraintType getConstraintType(const std::string &Constraint) const; - SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG); - SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG); - SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG); - SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG); - SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG); + SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue EmitCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &SystemZCC, - SelectionDAG &DAG); + SelectionDAG &DAG) const; MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; + MachineBasicBlock *BB) const; /// isFPImmLegal - Returns true if the target can instruction select the /// specified FP immediate natively. If false, the legalizer will @@ -101,7 +100,7 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; SDValue LowerCCCArguments(SDValue Chain, CallingConv::ID CallConv, @@ -109,33 +108,33 @@ namespace llvm { const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG); + DebugLoc dl, SelectionDAG &DAG) const; const SystemZSubtarget &Subtarget; const SystemZTargetMachine &TM; diff --git a/lib/Target/SystemZ/SystemZInstrBuilder.h b/lib/Target/SystemZ/SystemZInstrBuilder.h index b69d2f6..fa87061 100644 --- a/lib/Target/SystemZ/SystemZInstrBuilder.h +++ b/lib/Target/SystemZ/SystemZInstrBuilder.h @@ -44,7 +44,7 @@ struct SystemZAddressMode { unsigned IndexReg; int32_t Disp; - GlobalValue *GV; + const GlobalValue *GV; SystemZAddressMode() : BaseType(RegBase), IndexReg(0), Disp(0) { Base.Reg = 0; diff --git a/lib/Target/SystemZ/SystemZMCAsmInfo.cpp b/lib/Target/SystemZ/SystemZMCAsmInfo.cpp index 1a09206..f9ccc47 100644 --- a/lib/Target/SystemZ/SystemZMCAsmInfo.cpp +++ b/lib/Target/SystemZ/SystemZMCAsmInfo.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "SystemZMCAsmInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" using namespace llvm; @@ -21,7 +22,8 @@ SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, const StringRef &TT) { PCSymbol = "."; } -MCSection *SystemZMCAsmInfo::getNonexecutableStackSection(MCContext &Ctx) const{ - return MCSectionELF::Create(".note.GNU-stack", MCSectionELF::SHT_PROGBITS, - 0, SectionKind::getMetadata(), false, Ctx); +const MCSection *SystemZMCAsmInfo:: +getNonexecutableStackSection(MCContext &Ctx) const{ + return Ctx.getELFSection(".note.GNU-stack", MCSectionELF::SHT_PROGBITS, + 0, SectionKind::getMetadata(), false); } diff --git a/lib/Target/SystemZ/SystemZMCAsmInfo.h b/lib/Target/SystemZ/SystemZMCAsmInfo.h index 00cb99b..87908f2 100644 --- a/lib/Target/SystemZ/SystemZMCAsmInfo.h +++ b/lib/Target/SystemZ/SystemZMCAsmInfo.h @@ -22,7 +22,7 @@ namespace llvm { struct SystemZMCAsmInfo : public MCAsmInfo { explicit SystemZMCAsmInfo(const Target &T, const StringRef &TT); - virtual MCSection *getNonexecutableStackSection(MCContext &Ctx) const; + virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const; }; } // namespace llvm diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp index 302c418..638fd17 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -77,7 +77,7 @@ BitVector SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const /// allocas or if frame pointer elimination is disabled. bool SystemZRegisterInfo::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - return NoFramePointerElim || MFI->hasVarSizedObjects(); + return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects(); } void SystemZRegisterInfo:: @@ -200,7 +200,7 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset; MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), SystemZ::R15D) - .addReg(SystemZ::R15D).addImm((isSub ? -(int64_t)ThisVal : ThisVal)); + .addReg(SystemZ::R15D).addImm(isSub ? -ThisVal : ThisVal); // The PSW implicit def is dead. MI->getOperand(3).setIsDead(); Offset -= ThisVal; diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp new file mode 100644 index 0000000..87c831b --- /dev/null +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -0,0 +1,22 @@ +//===-- SystemZSelectionDAGInfo.cpp - SystemZ SelectionDAG Info -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SystemZSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "systemz-selectiondag-info" +#include "SystemZSelectionDAGInfo.h" +using namespace llvm; + +SystemZSelectionDAGInfo::SystemZSelectionDAGInfo() { +} + +SystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() { +} diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h new file mode 100644 index 0000000..5292de9 --- /dev/null +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h @@ -0,0 +1,29 @@ +//===-- SystemZSelectionDAGInfo.h - SystemZ SelectionDAG Info ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the SystemZ subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef SYSTEMZSELECTIONDAGINFO_H +#define SYSTEMZSELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class SystemZSelectionDAGInfo : public TargetSelectionDAGInfo { +public: + SystemZSelectionDAGInfo(); + ~SystemZSelectionDAGInfo(); +}; + +} + +#endif diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h index 551aeb5..d3357cc 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.h +++ b/lib/Target/SystemZ/SystemZTargetMachine.h @@ -49,8 +49,8 @@ public: return &InstrInfo.getRegisterInfo(); } - virtual SystemZTargetLowering *getTargetLowering() const { - return const_cast<SystemZTargetLowering*>(&TLInfo); + virtual const SystemZTargetLowering *getTargetLowering() const { + return &TLInfo; } virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp index 643b397..5870d8a 100644 --- a/lib/Target/TargetData.cpp +++ b/lib/Target/TargetData.cpp @@ -228,7 +228,7 @@ void TargetData::init(StringRef Desc) { /// @note This has to exist, because this is a pass, but it should never be /// used. TargetData::TargetData() : ImmutablePass(&ID) { - llvm_report_error("Bad TargetData ctor used. " + report_fatal_error("Bad TargetData ctor used. " "Tool did not specify a TargetData to use?"); } @@ -269,18 +269,8 @@ unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType, return ABIInfo ? Alignments[i].ABIAlign : Alignments[i].PrefAlign; // The best match so far depends on what we're looking for. - if (AlignType == VECTOR_ALIGN && Alignments[i].AlignType == VECTOR_ALIGN) { - // If this is a specification for a smaller vector type, we will fall back - // to it. This happens because <128 x double> can be implemented in terms - // of 64 <2 x double>. - if (Alignments[i].TypeBitWidth < BitWidth) { - // Verify that we pick the biggest of the fallbacks. - if (BestMatchIdx == -1 || - Alignments[BestMatchIdx].TypeBitWidth < Alignments[i].TypeBitWidth) - BestMatchIdx = i; - } - } else if (AlignType == INTEGER_ALIGN && - Alignments[i].AlignType == INTEGER_ALIGN) { + if (AlignType == INTEGER_ALIGN && + Alignments[i].AlignType == INTEGER_ALIGN) { // The "best match" for integers is the smallest size that is larger than // the BitWidth requested. if (Alignments[i].TypeBitWidth > BitWidth && (BestMatchIdx == -1 || @@ -303,10 +293,15 @@ unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType, } else { assert(AlignType == VECTOR_ALIGN && "Unknown alignment type!"); - // If we didn't find a vector size that is smaller or equal to this type, - // then we will end up scalarizing this to its element type. Just return - // the alignment of the element. - return getAlignment(cast<VectorType>(Ty)->getElementType(), ABIInfo); + // By default, use natural alignment for vector types. This is consistent + // with what clang and llvm-gcc do. + unsigned Align = getTypeAllocSize(cast<VectorType>(Ty)->getElementType()); + Align *= cast<VectorType>(Ty)->getNumElements(); + // If the alignment is not a power of 2, round up to the next power of 2. + // This happens for non-power-of-2 length vectors. + if (Align & (Align-1)) + Align = llvm::NextPowerOf2(Align); + return Align; } } @@ -630,8 +625,8 @@ uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices, Ty = cast<SequentialType>(Ty)->getElementType(); // Get the array index and the size of each array element. - int64_t arrayIdx = cast<ConstantInt>(Indices[CurIDX])->getSExtValue(); - Result += arrayIdx * (int64_t)getTypeAllocSize(Ty); + if (int64_t arrayIdx = cast<ConstantInt>(Indices[CurIDX])->getSExtValue()) + Result += arrayIdx * (int64_t)getTypeAllocSize(Ty); } } diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index 44722b3..b9372d0 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -310,7 +310,7 @@ getExprForDwarfReference(const MCSymbol *Sym, Mangler *Mang, switch (Encoding & 0xF0) { default: - llvm_report_error("We do not support this DWARF encoding yet!"); + report_fatal_error("We do not support this DWARF encoding yet!"); case dwarf::DW_EH_PE_absptr: // Do nothing special return Res; diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index 88871e3..ac67c91 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -11,6 +11,8 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -25,6 +27,7 @@ namespace llvm { bool LessPreciseFPMADOption; bool PrintMachineCode; bool NoFramePointerElim; + bool NoFramePointerElimNonLeaf; bool NoExcessFPPrecision; bool UnsafeFPMath; bool FiniteOnlyFPMathOption; @@ -33,8 +36,7 @@ namespace llvm { FloatABI::ABIType FloatABIType; bool NoImplicitFloat; bool NoZerosInBSS; - bool DwarfExceptionHandling; - bool SjLjExceptionHandling; + bool JITExceptionHandling; bool JITEmitDebugInfo; bool JITEmitDebugInfoToDisk; bool UnwindTablesMandatory; @@ -58,6 +60,11 @@ DisableFPElim("disable-fp-elim", cl::location(NoFramePointerElim), cl::init(false)); static cl::opt<bool, true> +DisableFPElimNonLeaf("disable-non-leaf-fp-elim", + cl::desc("Disable frame pointer elimination optimization for non-leaf funcs"), + cl::location(NoFramePointerElimNonLeaf), + cl::init(false)); +static cl::opt<bool, true> DisableExcessPrecision("disable-excess-fp-precision", cl::desc("Disable optimizations that may increase FP precision"), cl::location(NoExcessFPPrecision), @@ -107,14 +114,9 @@ DontPlaceZerosInBSS("nozero-initialized-in-bss", cl::location(NoZerosInBSS), cl::init(false)); static cl::opt<bool, true> -EnableDwarfExceptionHandling("enable-eh", - cl::desc("Emit DWARF exception handling (default if target supports)"), - cl::location(DwarfExceptionHandling), - cl::init(false)); -static cl::opt<bool, true> -EnableSjLjExceptionHandling("enable-sjlj-eh", - cl::desc("Emit SJLJ exception handling (default if target supports)"), - cl::location(SjLjExceptionHandling), +EnableJITExceptionHandling("jit-enable-eh", + cl::desc("Emit exception handling information"), + cl::location(JITExceptionHandling), cl::init(false)); // In debug builds, make this default to true. #ifdef NDEBUG @@ -197,7 +199,14 @@ EnableStrongPHIElim(cl::Hidden, "strong-phi-elim", cl::desc("Use strong PHI elimination."), cl::location(StrongPHIElim), cl::init(false)); - +static cl::opt<bool> +DataSections("fdata-sections", + cl::desc("Emit data into separate sections"), + cl::init(false)); +static cl::opt<bool> +FunctionSections("ffunction-sections", + cl::desc("Emit functions into separate sections"), + cl::init(false)); //--------------------------------------------------------------------------- // TargetMachine Class // @@ -244,7 +253,35 @@ void TargetMachine::setAsmVerbosityDefault(bool V) { AsmVerbosityDefault = V; } +bool TargetMachine::getFunctionSections() { + return FunctionSections; +} + +bool TargetMachine::getDataSections() { + return DataSections; +} + +void TargetMachine::setFunctionSections(bool V) { + FunctionSections = V; +} + +void TargetMachine::setDataSections(bool V) { + DataSections = V; +} + namespace llvm { + /// DisableFramePointerElim - This returns true if frame pointer elimination + /// optimization should be disabled for the given machine function. + bool DisableFramePointerElim(const MachineFunction &MF) { + if (NoFramePointerElim) + return true; + if (NoFramePointerElimNonLeaf) { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + return MFI->hasCalls(); + } + return false; + } + /// LessPreciseFPMAD - This flag return true when -enable-fp-mad option /// is specified on the command line. When this flag is off(default), the /// code generator is not allowed to generate mad (multiply add) if the diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 47873d1..da01350 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -44,7 +44,7 @@ private: bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); X86Operand *ParseOperand(); - X86Operand *ParseMemOperand(); + X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc); bool ParseDirectiveWord(unsigned Size, SMLoc L); @@ -368,14 +368,22 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo, X86Operand *X86ATTAsmParser::ParseOperand() { switch (getLexer().getKind()) { default: - return ParseMemOperand(); + // Parse a memory operand with no segment register. + return ParseMemOperand(0, Parser.getTok().getLoc()); case AsmToken::Percent: { - // FIXME: if a segment register, this could either be just the seg reg, or - // the start of a memory operand. + // Read the register. unsigned RegNo; SMLoc Start, End; if (ParseRegister(RegNo, Start, End)) return 0; - return X86Operand::CreateReg(RegNo, Start, End); + + // If this is a segment register followed by a ':', then this is the start + // of a memory reference, otherwise this is a normal register reference. + if (getLexer().isNot(AsmToken::Colon)) + return X86Operand::CreateReg(RegNo, Start, End); + + + getParser().Lex(); // Eat the colon. + return ParseMemOperand(RegNo, Start); } case AsmToken::Dollar: { // $42 -> immediate. @@ -389,13 +397,10 @@ X86Operand *X86ATTAsmParser::ParseOperand() { } } -/// ParseMemOperand: segment: disp(basereg, indexreg, scale) -X86Operand *X86ATTAsmParser::ParseMemOperand() { - SMLoc MemStart = Parser.getTok().getLoc(); - - // FIXME: If SegReg ':' (e.g. %gs:), eat and remember. - unsigned SegReg = 0; - +/// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix +/// has already been parsed if present. +X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { + // We have to disambiguate a parenthesized expression "(4+5)" from the start // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The // only way to do this without lookahead is to eat the '(' and see what is diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp index f592396..8b0ed1c 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp @@ -474,9 +474,6 @@ void X86AsmPrinter::EmitStartOfAsmFile(Module &M) { void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { if (Subtarget->isTargetDarwin()) { // All darwin targets use mach-o. - TargetLoweringObjectFileMachO &TLOFMacho = - static_cast<TargetLoweringObjectFileMachO &>(getObjFileLowering()); - MachineModuleInfoMachO &MMIMacho = MMI->getObjFileInfo<MachineModuleInfoMachO>(); @@ -486,11 +483,11 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { Stubs = MMIMacho.GetFnStubList(); if (!Stubs.empty()) { const MCSection *TheSection = - TLOFMacho.getMachOSection("__IMPORT", "__jump_table", - MCSectionMachO::S_SYMBOL_STUBS | - MCSectionMachO::S_ATTR_SELF_MODIFYING_CODE | - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, - 5, SectionKind::getMetadata()); + OutContext.getMachOSection("__IMPORT", "__jump_table", + MCSectionMachO::S_SYMBOL_STUBS | + MCSectionMachO::S_ATTR_SELF_MODIFYING_CODE | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + 5, SectionKind::getMetadata()); OutStreamer.SwitchSection(TheSection); for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { @@ -512,9 +509,9 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { Stubs = MMIMacho.GetGVStubList(); if (!Stubs.empty()) { const MCSection *TheSection = - TLOFMacho.getMachOSection("__IMPORT", "__pointers", - MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS, - SectionKind::getMetadata()); + OutContext.getMachOSection("__IMPORT", "__pointers", + MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS, + SectionKind::getMetadata()); OutStreamer.SwitchSection(TheSection); for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { @@ -587,8 +584,8 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { // Necessary for dllexport support std::vector<const MCSymbol*> DLLExportedFns, DLLExportedGlobals; - TargetLoweringObjectFileCOFF &TLOFCOFF = - static_cast<TargetLoweringObjectFileCOFF&>(getObjFileLowering()); + const TargetLoweringObjectFileCOFF &TLOFCOFF = + static_cast<const TargetLoweringObjectFileCOFF&>(getObjFileLowering()); for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) if (I->hasDLLExportLinkage()) @@ -617,8 +614,8 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { } if (Subtarget->isTargetELF()) { - TargetLoweringObjectFileELF &TLOFELF = - static_cast<TargetLoweringObjectFileELF &>(getObjFileLowering()); + const TargetLoweringObjectFileELF &TLOFELF = + static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering()); MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>(); diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h index ee59289..95984b2 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h @@ -80,6 +80,8 @@ class VISIBILITY_HIDDEN X86AsmPrinter : public AsmPrinter { bool runOnMachineFunction(MachineFunction &F); void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); + + MachineLocation getDebugValueLocation(const MachineInstr *MI) const; }; } // end namespace llvm diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp index a290eb0..effc8ed 100644 --- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp +++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp @@ -169,6 +169,15 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, Expr = MCBinaryExpr::CreateSub(Expr, MCSymbolRefExpr::Create(GetPICBaseSymbol(), Ctx), Ctx); + if (MO.isJTI() && AsmPrinter.MAI->hasSetDirective()) { + // If .set directive is supported, use it to reduce the number of + // relocations the assembler will generate for differences between + // local labels. This is only safe when the symbols are in the same + // section so we are restricting it to jumptable references. + MCSymbol *Label = Ctx.CreateTempSymbol(); + AsmPrinter.OutStreamer.EmitAssignment(Label, Expr); + Expr = MCSymbolRefExpr::Create(Label, Ctx); + } break; } @@ -328,61 +337,36 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, raw_ostream &O) { - // FIXME: if this is implemented for another target before it goes - // away completely, the common part should be moved into AsmPrinter. - O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; + // Only the target-dependent form of DBG_VALUE should get here. + // Referencing the offset and metadata as NOps-2 and NOps-1 is + // probably portable to other targets; frame pointer location is not. unsigned NOps = MI->getNumOperands(); + assert(NOps==7); + O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; // cast away const; DIetc do not take const operands for some reason. - DIVariable V((MDNode*)(MI->getOperand(NOps-1).getMetadata())); + DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata())); + if (V.getContext().isSubprogram()) + O << DISubprogram(V.getContext().getNode()).getDisplayName() << ":"; O << V.getName(); O << " <- "; - if (NOps==3) { - // Register or immediate value. Register 0 means undef. - assert(MI->getOperand(0).isReg() || - MI->getOperand(0).isImm() || - MI->getOperand(0).isFPImm()); - if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg() == 0) { - // Suppress offset in this case, it is not meaningful. - O << "undef"; - OutStreamer.AddBlankLine(); - return; - } - - if (MI->getOperand(0).isFPImm()) { - // This is more naturally done in printOperand, but since the only use - // of such an operand is in this comment and that is temporary (and it's - // ugly), we prefer to keep this localized. - // The include of Type.h may be removable when this code is. - if (MI->getOperand(0).getFPImm()->getType()->isFloatTy() || - MI->getOperand(0).getFPImm()->getType()->isDoubleTy()) - MI->getOperand(0).print(O, &TM); - else { - // There is no good way to print long double. Convert a copy to - // double. Ah well, it's only a comment. - bool ignored; - APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF()); - APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, - &ignored); - O << "(long double) " << APF.convertToDouble(); - } - } else - printOperand(MI, 0, O); - } else { - if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg() == 0) { - // Suppress offset in this case, it is not meaningful. - O << "undef"; - OutStreamer.AddBlankLine(); - return; - } - // Frame address. Currently handles register +- offset only. - assert(MI->getOperand(0).isReg() && MI->getOperand(3).isImm()); - O << '['; printOperand(MI, 0, O); O << '+'; printOperand(MI, 3, O); - O << ']'; - } + // Frame address. Currently handles register +- offset only. + assert(MI->getOperand(0).isReg() && MI->getOperand(3).isImm()); + O << '['; printOperand(MI, 0, O); O << '+'; printOperand(MI, 3, O); + O << ']'; O << "+"; printOperand(MI, NOps-2, O); } +MachineLocation +X86AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const { + MachineLocation Location; + assert (MI->getNumOperands() == 7 && "Invalid no. of machine operands!"); + // Frame address. Currently handles register +- offset only. + assert(MI->getOperand(0).isReg() && MI->getOperand(3).isImm()); + Location.set(MI->getOperand(0).getReg(), MI->getOperand(3).getImm()); + return Location; +} + void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { X86MCInstLower MCInstLowering(OutContext, Mang, *this); @@ -395,7 +379,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { OutStreamer.EmitRawText(StringRef(OS.str())); } return; - + case X86::MOVPC32r: { MCInst TmpInst; // This is a pseudo op for a two instruction sequence with a label, which diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 22285f1..da6bb91 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -13,6 +13,7 @@ tablegen(X86GenDAGISel.inc -gen-dag-isel) tablegen(X86GenFastISel.inc -gen-fast-isel) tablegen(X86GenCallingConv.inc -gen-callingconv) tablegen(X86GenSubtarget.inc -gen-subtarget) +tablegen(X86GenEDInfo.inc -gen-enhanced-disassembly-info) set(sources SSEDomainFix.cpp @@ -33,6 +34,7 @@ set(sources X86TargetMachine.cpp X86TargetObjectFile.cpp X86FastISel.cpp + X86SelectionDAGInfo.cpp ) if( CMAKE_CL_64 ) diff --git a/lib/Target/X86/Disassembler/CMakeLists.txt b/lib/Target/X86/Disassembler/CMakeLists.txt index 2a83a9c..9f91060 100644 --- a/lib/Target/X86/Disassembler/CMakeLists.txt +++ b/lib/Target/X86/Disassembler/CMakeLists.txt @@ -4,4 +4,11 @@ add_llvm_library(LLVMX86Disassembler X86Disassembler.cpp X86DisassemblerDecoder.c ) +# workaround for hanging compilation on MSVC9 +if( MSVC_VERSION EQUAL 1500 ) +set_property( + SOURCE X86Disassembler.cpp + PROPERTY COMPILE_FLAGS "/Od" + ) +endif() add_dependencies(LLVMX86Disassembler X86CodeGenTable_gen) diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 7328dc0..62e7357 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -17,6 +17,7 @@ #include "X86Disassembler.h" #include "X86DisassemblerDecoder.h" +#include "llvm/MC/EDInstInfo.h" #include "llvm/MC/MCDisassembler.h" #include "llvm/MC/MCDisassembler.h" #include "llvm/MC/MCInst.h" @@ -26,6 +27,7 @@ #include "llvm/Support/raw_ostream.h" #include "X86GenRegisterNames.inc" +#include "X86GenEDInfo.inc" using namespace llvm; using namespace llvm::X86Disassembler; @@ -69,6 +71,10 @@ X86GenericDisassembler::X86GenericDisassembler(DisassemblerMode mode) : X86GenericDisassembler::~X86GenericDisassembler() { } +EDInstInfo *X86GenericDisassembler::getEDInfo() const { + return instInfoX86; +} + /// regionReader - a callback function that wraps the readByte method from /// MemoryObject. /// diff --git a/lib/Target/X86/Disassembler/X86Disassembler.h b/lib/Target/X86/Disassembler/X86Disassembler.h index 0e6e0b0..9c54262 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.h +++ b/lib/Target/X86/Disassembler/X86Disassembler.h @@ -94,6 +94,8 @@ namespace llvm { class MCInst; class MemoryObject; class raw_ostream; + +struct EDInstInfo; namespace X86Disassembler { @@ -115,6 +117,9 @@ public: const MemoryObject ®ion, uint64_t address, raw_ostream &vStream) const; + + /// getEDInfo - See MCDisassembler. + EDInstInfo *getEDInfo() const; private: DisassemblerMode fMode; }; diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index db694bc..64f6b2d 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -1277,6 +1277,9 @@ static int readOperands(struct InternalInstruction* insn) { case ENCODING_IB: if (readImmediate(insn, 1)) return -1; + if (insn->spec->operands[index].type == TYPE_IMM3 && + insn->immediates[insn->numImmediatesConsumed - 1] > 7) + return -1; break; case ENCODING_IW: if (readImmediate(insn, 2)) @@ -1293,6 +1296,7 @@ static int readOperands(struct InternalInstruction* insn) { case ENCODING_Iv: if (readImmediate(insn, insn->immediateSize)) return -1; + break; case ENCODING_Ia: if (readImmediate(insn, insn->addressSize)) return -1; diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h index c213f89..4a7cd57 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h @@ -236,6 +236,7 @@ struct ContextDecision { ENUM_ENTRY(TYPE_IMM16, "2-byte") \ ENUM_ENTRY(TYPE_IMM32, "4-byte") \ ENUM_ENTRY(TYPE_IMM64, "8-byte") \ + ENUM_ENTRY(TYPE_IMM3, "1-byte immediate operand between 0 and 7") \ ENUM_ENTRY(TYPE_RM8, "1-byte register or memory operand") \ ENUM_ENTRY(TYPE_RM16, "2-byte") \ ENUM_ENTRY(TYPE_RM32, "4-byte") \ diff --git a/lib/Target/X86/SSEDomainFix.cpp b/lib/Target/X86/SSEDomainFix.cpp index 4b54676..5e80845 100644 --- a/lib/Target/X86/SSEDomainFix.cpp +++ b/lib/Target/X86/SSEDomainFix.cpp @@ -159,7 +159,7 @@ int SSEDomainFixPass::RegIndex(unsigned reg) { // We just need them to be consecutive, ordering doesn't matter. assert(X86::XMM9 == X86::XMM0+NumRegs-1 && "Unexpected sort"); reg -= X86::XMM0; - return reg < NumRegs ? reg : -1; + return reg < NumRegs ? (int) reg : -1; } DomainValue *SSEDomainFixPass::Alloc(int domain) { @@ -216,8 +216,15 @@ void SSEDomainFixPass::Force(int rx, unsigned domain) { if (LiveRegs && (dv = LiveRegs[rx])) { if (dv->isCollapsed()) dv->addDomain(domain); - else + else if (dv->hasDomain(domain)) Collapse(dv, domain); + else { + // This is an incompatible open DomainValue. Collapse it to whatever and force + // the new value into domain. This costs a domain crossing. + Collapse(dv, dv->getFirstDomain()); + assert(LiveRegs[rx] && "Not live after collapse?"); + LiveRegs[rx]->addDomain(domain); + } } else { // Set up basic collapsed DomainValue. SetLiveReg(rx, Alloc(domain)); @@ -263,7 +270,7 @@ bool SSEDomainFixPass::Merge(DomainValue *A, DomainValue *B) { void SSEDomainFixPass::enterBasicBlock() { // Try to coalesce live-out registers from predecessors. - for (MachineBasicBlock::const_livein_iterator i = MBB->livein_begin(), + for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(), e = MBB->livein_end(); i != e; ++i) { int rx = RegIndex(*i); if (rx < 0) continue; @@ -281,8 +288,9 @@ void SSEDomainFixPass::enterBasicBlock() { // We have a live DomainValue from more than one predecessor. if (LiveRegs[rx]->isCollapsed()) { // We are already collapsed, but predecessor is not. Force him. - if (!pdv->isCollapsed()) - Collapse(pdv, LiveRegs[rx]->getFirstDomain()); + unsigned domain = LiveRegs[rx]->getFirstDomain(); + if (!pdv->isCollapsed() && pdv->hasDomain(domain)) + Collapse(pdv, domain); continue; } @@ -290,7 +298,7 @@ void SSEDomainFixPass::enterBasicBlock() { if (!pdv->isCollapsed()) Merge(LiveRegs[rx], pdv); else - Collapse(LiveRegs[rx], pdv->getFirstDomain()); + Force(rx, pdv->getFirstDomain()); } } } diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 9be38a4..22e89a5 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -69,6 +69,12 @@ TargetAsmBackend *createX86_64AsmBackend(const Target &, const std::string &); /// FunctionPass *createEmitX86CodeToMemory(); +/// createX86MaxStackAlignmentHeuristicPass - This function returns a pass +/// which determines whether the frame pointer register should be +/// reserved in case dynamic stack alignment is later required. +/// +FunctionPass *createX86MaxStackAlignmentHeuristicPass(); + extern Target TheX86_32Target, TheX86_64Target; } // End llvm namespace diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp index 8e2928c3..ba9c1d0 100644 --- a/lib/Target/X86/X86AsmBackend.cpp +++ b/lib/Target/X86/X86AsmBackend.cpp @@ -111,7 +111,7 @@ void X86AsmBackend::RelaxInstruction(const MCInstFragment *IF, SmallString<256> Tmp; raw_svector_ostream OS(Tmp); IF->getInst().dump_pretty(OS); - llvm_report_error("unexpected instruction to relax: " + OS.str()); + report_fatal_error("unexpected instruction to relax: " + OS.str()); } Res = IF->getInst(); diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index 6638e11..8f02604 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -79,7 +79,7 @@ namespace { private: void emitPCRelativeBlockAddress(MachineBasicBlock *MBB); - void emitGlobalAddress(GlobalValue *GV, unsigned Reloc, + void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, intptr_t Disp = 0, intptr_t PCAdj = 0, bool Indirect = false); void emitExternalSymbolAddress(const char *ES, unsigned Reloc); @@ -163,7 +163,8 @@ void Emitter<CodeEmitter>::emitPCRelativeBlockAddress(MachineBasicBlock *MBB) { /// this is part of a "take the address of a global" instruction. /// template<class CodeEmitter> -void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc, +void Emitter<CodeEmitter>::emitGlobalAddress(const GlobalValue *GV, + unsigned Reloc, intptr_t Disp /* = 0 */, intptr_t PCAdj /* = 0 */, bool Indirect /* = false */) { @@ -174,9 +175,10 @@ void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc, RelocCST = PCAdj; MachineRelocation MR = Indirect ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc, - GV, RelocCST, false) + const_cast<GlobalValue *>(GV), + RelocCST, false) : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, - GV, RelocCST, false); + const_cast<GlobalValue *>(GV), RelocCST, false); MCE.addRelocation(MR); // The relocated value will be added to the displacement if (Reloc == X86::reloc_absolute_dword) @@ -378,6 +380,16 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI, const MachineOperand &IndexReg = MI.getOperand(Op+2); unsigned BaseReg = Base.getReg(); + + // Handle %rip relative addressing. + if (BaseReg == X86::RIP || + (Is64BitMode && DispForReloc)) { // [disp32+RIP] in X86-64 mode + assert(IndexReg.getReg() == 0 && Is64BitMode && + "Invalid rip-relative address"); + MCE.emitByte(ModRMByte(0, RegOpcodeField, 5)); + emitDisplacementField(DispForReloc, DispVal, PCAdj, true); + return; + } // Indicate that the displacement will use an pcrel or absolute reference // by default. MCEs able to resolve addresses on-the-fly use pcrel by default @@ -445,7 +457,7 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI, // Emit the normal disp32 encoding. MCE.emitByte(ModRMByte(2, RegOpcodeField, 4)); ForceDisp32 = true; - } else if (DispVal == 0 && getX86RegNum(BaseReg) != N86::EBP) { + } else if (DispVal == 0 && BaseRegNo != N86::EBP) { // Emit no displacement ModR/M byte MCE.emitByte(ModRMByte(0, RegOpcodeField, 4)); } else if (isDisp8(DispVal)) { @@ -600,7 +612,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, // We allow inline assembler nodes with empty bodies - they can // implicitly define registers, which is ok for JIT. if (MI.getOperand(0).getSymbolName()[0]) - llvm_report_error("JIT does not support inline asm!"); + report_fatal_error("JIT does not support inline asm!"); break; case TargetOpcode::DBG_LABEL: case TargetOpcode::GC_LABEL: diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp index 1597d2b3..f84995d 100644 --- a/lib/Target/X86/X86ELFWriterInfo.cpp +++ b/lib/Target/X86/X86ELFWriterInfo.cpp @@ -26,7 +26,6 @@ using namespace llvm; X86ELFWriterInfo::X86ELFWriterInfo(TargetMachine &TM) : TargetELFWriterInfo(TM) { - bool is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64; EMachine = is64Bit ? EM_X86_64 : EM_386; } diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 7849b51..ff9208c 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -15,7 +15,6 @@ #include "X86.h" #include "X86InstrBuilder.h" -#include "X86ISelLowering.h" #include "X86RegisterInfo.h" #include "X86Subtarget.h" #include "X86TargetMachine.h" @@ -56,12 +55,13 @@ public: explicit X86FastISel(MachineFunction &mf, DenseMap<const Value *, unsigned> &vm, DenseMap<const BasicBlock *, MachineBasicBlock *> &bm, - DenseMap<const AllocaInst *, int> &am + DenseMap<const AllocaInst *, int> &am, + std::vector<std::pair<MachineInstr*, unsigned> > &pn #ifndef NDEBUG - , SmallSet<Instruction*, 8> &cil + , SmallSet<const Instruction *, 8> &cil #endif ) - : FastISel(mf, vm, bm, am + : FastISel(mf, vm, bm, am, pn #ifndef NDEBUG , cil #endif @@ -72,16 +72,16 @@ public: X86ScalarSSEf32 = Subtarget->hasSSE1(); } - virtual bool TargetSelectInstruction(Instruction *I); + virtual bool TargetSelectInstruction(const Instruction *I); #include "X86GenFastISel.inc" private: - bool X86FastEmitCompare(Value *LHS, Value *RHS, EVT VT); + bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT); bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR); - bool X86FastEmitStore(EVT VT, Value *Val, + bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM); bool X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM); @@ -89,32 +89,32 @@ private: bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, unsigned &ResultReg); - bool X86SelectAddress(Value *V, X86AddressMode &AM); - bool X86SelectCallAddress(Value *V, X86AddressMode &AM); + bool X86SelectAddress(const Value *V, X86AddressMode &AM); + bool X86SelectCallAddress(const Value *V, X86AddressMode &AM); - bool X86SelectLoad(Instruction *I); + bool X86SelectLoad(const Instruction *I); - bool X86SelectStore(Instruction *I); + bool X86SelectStore(const Instruction *I); - bool X86SelectCmp(Instruction *I); + bool X86SelectCmp(const Instruction *I); - bool X86SelectZExt(Instruction *I); + bool X86SelectZExt(const Instruction *I); - bool X86SelectBranch(Instruction *I); + bool X86SelectBranch(const Instruction *I); - bool X86SelectShift(Instruction *I); + bool X86SelectShift(const Instruction *I); - bool X86SelectSelect(Instruction *I); + bool X86SelectSelect(const Instruction *I); - bool X86SelectTrunc(Instruction *I); + bool X86SelectTrunc(const Instruction *I); - bool X86SelectFPExt(Instruction *I); - bool X86SelectFPTrunc(Instruction *I); + bool X86SelectFPExt(const Instruction *I); + bool X86SelectFPTrunc(const Instruction *I); - bool X86SelectExtractValue(Instruction *I); + bool X86SelectExtractValue(const Instruction *I); - bool X86VisitIntrinsicCall(IntrinsicInst &I); - bool X86SelectCall(Instruction *I); + bool X86VisitIntrinsicCall(const IntrinsicInst &I); + bool X86SelectCall(const Instruction *I); CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isTailCall = false); @@ -125,9 +125,9 @@ private: return static_cast<const X86TargetMachine *>(&TM); } - unsigned TargetMaterializeConstant(Constant *C); + unsigned TargetMaterializeConstant(const Constant *C); - unsigned TargetMaterializeAlloca(AllocaInst *C); + unsigned TargetMaterializeAlloca(const AllocaInst *C); /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is /// computed in an SSE register, not on the X87 floating point stack. @@ -280,14 +280,14 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, return true; } -bool X86FastISel::X86FastEmitStore(EVT VT, Value *Val, +bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM) { // Handle 'null' like i32/i64 0. if (isa<ConstantPointerNull>(Val)) Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext())); // If this is a store of a simple constant, fold the constant into the store. - if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) { + if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) { unsigned Opc = 0; bool Signed = true; switch (VT.getSimpleVT().SimpleTy) { @@ -305,7 +305,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, Value *Val, if (Opc) { addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM) - .addImm(Signed ? CI->getSExtValue() : + .addImm(Signed ? (uint64_t) CI->getSExtValue() : CI->getZExtValue()); return true; } @@ -335,13 +335,13 @@ bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, /// X86SelectAddress - Attempt to fill in an address from the given value. /// -bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) { - User *U = NULL; +bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { + const User *U = NULL; unsigned Opcode = Instruction::UserOp1; - if (Instruction *I = dyn_cast<Instruction>(V)) { + if (const Instruction *I = dyn_cast<Instruction>(V)) { Opcode = I->getOpcode(); U = I; - } else if (ConstantExpr *C = dyn_cast<ConstantExpr>(V)) { + } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) { Opcode = C->getOpcode(); U = C; } @@ -378,7 +378,7 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) { case Instruction::Add: { // Adds of constants are common and easy enough. - if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) { + if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) { uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue(); // They have to fit in the 32-bit signed displacement field though. if (isInt<32>(Disp)) { @@ -399,16 +399,16 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) { gep_type_iterator GTI = gep_type_begin(U); // Iterate through the indices, folding what we can. Constants can be // folded, and one dynamic index can be handled, if the scale is supported. - for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); + for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i, ++GTI) { - Value *Op = *i; + const Value *Op = *i; if (const StructType *STy = dyn_cast<StructType>(*GTI)) { const StructLayout *SL = TD.getStructLayout(STy); unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); Disp += SL->getElementOffset(Idx); } else { uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType()); - if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { + if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { // Constant-offset addressing. Disp += CI->getSExtValue() * S; } else if (IndexReg == 0 && @@ -446,7 +446,7 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) { } // Handle constant address. - if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { + if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { // Can't handle alternate code models yet. if (TM.getCodeModel() != CodeModel::Small) return false; @@ -457,7 +457,7 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) { return false; // Can't handle TLS yet. - if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) + if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) if (GVar->isThreadLocal()) return false; @@ -544,13 +544,13 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) { /// X86SelectCallAddress - Attempt to fill in an address from the given value. /// -bool X86FastISel::X86SelectCallAddress(Value *V, X86AddressMode &AM) { - User *U = NULL; +bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) { + const User *U = NULL; unsigned Opcode = Instruction::UserOp1; - if (Instruction *I = dyn_cast<Instruction>(V)) { + if (const Instruction *I = dyn_cast<Instruction>(V)) { Opcode = I->getOpcode(); U = I; - } else if (ConstantExpr *C = dyn_cast<ConstantExpr>(V)) { + } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) { Opcode = C->getOpcode(); U = C; } @@ -575,7 +575,7 @@ bool X86FastISel::X86SelectCallAddress(Value *V, X86AddressMode &AM) { } // Handle constant address. - if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { + if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { // Can't handle alternate code models yet. if (TM.getCodeModel() != CodeModel::Small) return false; @@ -586,7 +586,7 @@ bool X86FastISel::X86SelectCallAddress(Value *V, X86AddressMode &AM) { return false; // Can't handle TLS or DLLImport. - if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) + if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) if (GVar->isThreadLocal() || GVar->hasDLLImportLinkage()) return false; @@ -627,7 +627,7 @@ bool X86FastISel::X86SelectCallAddress(Value *V, X86AddressMode &AM) { /// X86SelectStore - Select and emit code to implement store instructions. -bool X86FastISel::X86SelectStore(Instruction* I) { +bool X86FastISel::X86SelectStore(const Instruction *I) { EVT VT; if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true)) return false; @@ -641,7 +641,7 @@ bool X86FastISel::X86SelectStore(Instruction* I) { /// X86SelectLoad - Select and emit code to implement load instructions. /// -bool X86FastISel::X86SelectLoad(Instruction *I) { +bool X86FastISel::X86SelectLoad(const Instruction *I) { EVT VT; if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true)) return false; @@ -673,7 +673,7 @@ static unsigned X86ChooseCmpOpcode(EVT VT) { /// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS /// of the comparison, return an opcode that works for the compare (e.g. /// CMP32ri) otherwise return 0. -static unsigned X86ChooseCmpImmediateOpcode(EVT VT, ConstantInt *RHSC) { +static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) { switch (VT.getSimpleVT().SimpleTy) { // Otherwise, we can't fold the immediate into this comparison. default: return 0; @@ -689,7 +689,8 @@ static unsigned X86ChooseCmpImmediateOpcode(EVT VT, ConstantInt *RHSC) { } } -bool X86FastISel::X86FastEmitCompare(Value *Op0, Value *Op1, EVT VT) { +bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, + EVT VT) { unsigned Op0Reg = getRegForValue(Op0); if (Op0Reg == 0) return false; @@ -700,7 +701,7 @@ bool X86FastISel::X86FastEmitCompare(Value *Op0, Value *Op1, EVT VT) { // We have two options: compare with register or immediate. If the RHS of // the compare is an immediate that we can fold into this compare, use // CMPri, otherwise use CMPrr. - if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { + if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) { BuildMI(MBB, DL, TII.get(CompareImmOpc)).addReg(Op0Reg) .addImm(Op1C->getSExtValue()); @@ -718,8 +719,8 @@ bool X86FastISel::X86FastEmitCompare(Value *Op0, Value *Op1, EVT VT) { return true; } -bool X86FastISel::X86SelectCmp(Instruction *I) { - CmpInst *CI = cast<CmpInst>(I); +bool X86FastISel::X86SelectCmp(const Instruction *I) { + const CmpInst *CI = cast<CmpInst>(I); EVT VT; if (!isTypeLegal(I->getOperand(0)->getType(), VT)) @@ -781,7 +782,7 @@ bool X86FastISel::X86SelectCmp(Instruction *I) { return false; } - Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1); + const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1); if (SwapArgs) std::swap(Op0, Op1); @@ -794,7 +795,7 @@ bool X86FastISel::X86SelectCmp(Instruction *I) { return true; } -bool X86FastISel::X86SelectZExt(Instruction *I) { +bool X86FastISel::X86SelectZExt(const Instruction *I) { // Handle zero-extension from i1 to i8, which is common. if (I->getType()->isIntegerTy(8) && I->getOperand(0)->getType()->isIntegerTy(1)) { @@ -811,15 +812,15 @@ bool X86FastISel::X86SelectZExt(Instruction *I) { } -bool X86FastISel::X86SelectBranch(Instruction *I) { +bool X86FastISel::X86SelectBranch(const Instruction *I) { // Unconditional branches are selected by tablegen-generated code. // Handle a conditional branch. - BranchInst *BI = cast<BranchInst>(I); + const BranchInst *BI = cast<BranchInst>(I); MachineBasicBlock *TrueMBB = MBBMap[BI->getSuccessor(0)]; MachineBasicBlock *FalseMBB = MBBMap[BI->getSuccessor(1)]; // Fold the common case of a conditional branch with a comparison. - if (CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { + if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { if (CI->hasOneUse()) { EVT VT = TLI.getValueType(CI->getOperand(0)->getType()); @@ -866,7 +867,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) { return false; } - Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1); + const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1); if (SwapArgs) std::swap(Op0, Op1); @@ -901,7 +902,8 @@ bool X86FastISel::X86SelectBranch(Instruction *I) { // looking for the SETO/SETB instruction. If an instruction modifies the // EFLAGS register before we reach the SETO/SETB instruction, then we can't // convert the branch into a JO/JB instruction. - if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(EI->getAggregateOperand())){ + if (const IntrinsicInst *CI = + dyn_cast<IntrinsicInst>(EI->getAggregateOperand())){ if (CI->getIntrinsicID() == Intrinsic::sadd_with_overflow || CI->getIntrinsicID() == Intrinsic::uadd_with_overflow) { const MachineInstr *SetMI = 0; @@ -956,7 +958,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) { return true; } -bool X86FastISel::X86SelectShift(Instruction *I) { +bool X86FastISel::X86SelectShift(const Instruction *I) { unsigned CReg = 0, OpReg = 0, OpImm = 0; const TargetRegisterClass *RC = NULL; if (I->getType()->isIntegerTy(8)) { @@ -1007,7 +1009,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { if (Op0Reg == 0) return false; // Fold immediate in shl(x,3). - if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { + if (const ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { unsigned ResultReg = createResultReg(RC); BuildMI(MBB, DL, TII.get(OpImm), ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue() & 0xff); @@ -1032,7 +1034,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { return true; } -bool X86FastISel::X86SelectSelect(Instruction *I) { +bool X86FastISel::X86SelectSelect(const Instruction *I) { EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true); if (VT == MVT::Other || !isTypeLegal(I->getType(), VT)) return false; @@ -1066,11 +1068,11 @@ bool X86FastISel::X86SelectSelect(Instruction *I) { return true; } -bool X86FastISel::X86SelectFPExt(Instruction *I) { +bool X86FastISel::X86SelectFPExt(const Instruction *I) { // fpext from float to double. if (Subtarget->hasSSE2() && I->getType()->isDoubleTy()) { - Value *V = I->getOperand(0); + const Value *V = I->getOperand(0); if (V->getType()->isFloatTy()) { unsigned OpReg = getRegForValue(V); if (OpReg == 0) return false; @@ -1084,10 +1086,10 @@ bool X86FastISel::X86SelectFPExt(Instruction *I) { return false; } -bool X86FastISel::X86SelectFPTrunc(Instruction *I) { +bool X86FastISel::X86SelectFPTrunc(const Instruction *I) { if (Subtarget->hasSSE2()) { if (I->getType()->isFloatTy()) { - Value *V = I->getOperand(0); + const Value *V = I->getOperand(0); if (V->getType()->isDoubleTy()) { unsigned OpReg = getRegForValue(V); if (OpReg == 0) return false; @@ -1102,7 +1104,7 @@ bool X86FastISel::X86SelectFPTrunc(Instruction *I) { return false; } -bool X86FastISel::X86SelectTrunc(Instruction *I) { +bool X86FastISel::X86SelectTrunc(const Instruction *I) { if (Subtarget->is64Bit()) // All other cases should be handled by the tblgen generated code. return false; @@ -1139,11 +1141,11 @@ bool X86FastISel::X86SelectTrunc(Instruction *I) { return true; } -bool X86FastISel::X86SelectExtractValue(Instruction *I) { - ExtractValueInst *EI = cast<ExtractValueInst>(I); - Value *Agg = EI->getAggregateOperand(); +bool X86FastISel::X86SelectExtractValue(const Instruction *I) { + const ExtractValueInst *EI = cast<ExtractValueInst>(I); + const Value *Agg = EI->getAggregateOperand(); - if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Agg)) { + if (const IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Agg)) { switch (CI->getIntrinsicID()) { default: break; case Intrinsic::sadd_with_overflow: @@ -1160,7 +1162,7 @@ bool X86FastISel::X86SelectExtractValue(Instruction *I) { return false; } -bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) { +bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { // FIXME: Handle more intrinsics. switch (I.getIntrinsicID()) { default: return false; @@ -1168,8 +1170,8 @@ bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) { // Emit code inline code to store the stack guard onto the stack. EVT PtrTy = TLI.getPointerTy(); - Value *Op1 = I.getOperand(1); // The guard's value. - AllocaInst *Slot = cast<AllocaInst>(I.getOperand(2)); + const Value *Op1 = I.getOperand(1); // The guard's value. + const AllocaInst *Slot = cast<AllocaInst>(I.getOperand(2)); // Grab the frame index. X86AddressMode AM; @@ -1204,7 +1206,7 @@ bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) { return true; } case Intrinsic::dbg_declare: { - DbgDeclareInst *DI = cast<DbgDeclareInst>(&I); + const DbgDeclareInst *DI = cast<DbgDeclareInst>(&I); X86AddressMode AM; assert(DI->getAddress() && "Null address should be checked earlier!"); if (!X86SelectAddress(DI->getAddress(), AM)) @@ -1235,8 +1237,8 @@ bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) { if (!isTypeLegal(RetTy, VT)) return false; - Value *Op1 = I.getOperand(1); - Value *Op2 = I.getOperand(2); + const Value *Op1 = I.getOperand(1); + const Value *Op2 = I.getOperand(2); unsigned Reg1 = getRegForValue(Op1); unsigned Reg2 = getRegForValue(Op2); @@ -1277,20 +1279,20 @@ bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) { } } -bool X86FastISel::X86SelectCall(Instruction *I) { - CallInst *CI = cast<CallInst>(I); - Value *Callee = I->getOperand(0); +bool X86FastISel::X86SelectCall(const Instruction *I) { + const CallInst *CI = cast<CallInst>(I); + const Value *Callee = I->getOperand(0); // Can't handle inline asm yet. if (isa<InlineAsm>(Callee)) return false; // Handle intrinsic calls. - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) return X86VisitIntrinsicCall(*II); // Handle only C and fastcc calling conventions for now. - CallSite CS(CI); + ImmutableCallSite CS(CI); CallingConv::ID CC = CS.getCallingConv(); if (CC != CallingConv::C && CC != CallingConv::Fast && @@ -1322,7 +1324,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) { if (!X86SelectCallAddress(Callee, CalleeAM)) return false; unsigned CalleeOp = 0; - GlobalValue *GV = 0; + const GlobalValue *GV = 0; if (CalleeAM.GV != 0) { GV = CalleeAM.GV; } else if (CalleeAM.Base.Reg != 0) { @@ -1338,7 +1340,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) { } // Deal with call operands first. - SmallVector<Value*, 8> ArgVals; + SmallVector<const Value *, 8> ArgVals; SmallVector<unsigned, 8> Args; SmallVector<EVT, 8> ArgVTs; SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; @@ -1346,7 +1348,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) { ArgVals.reserve(CS.arg_size()); ArgVTs.reserve(CS.arg_size()); ArgFlags.reserve(CS.arg_size()); - for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); + for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { unsigned Arg = getRegForValue(*i); if (Arg == 0) @@ -1454,7 +1456,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) { X86AddressMode AM; AM.Base.Reg = StackPtr; AM.Disp = LocMemOffset; - Value *ArgVal = ArgVals[VA.getValNo()]; + const Value *ArgVal = ArgVals[VA.getValNo()]; // If this is a really simple value, emit this with the Value* version of // X86FastEmitStore. If it isn't simple, we don't want to do this, as it @@ -1585,7 +1587,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) { bool -X86FastISel::TargetSelectInstruction(Instruction *I) { +X86FastISel::TargetSelectInstruction(const Instruction *I) { switch (I->getOpcode()) { default: break; case Instruction::Load: @@ -1633,7 +1635,7 @@ X86FastISel::TargetSelectInstruction(Instruction *I) { return false; } -unsigned X86FastISel::TargetMaterializeConstant(Constant *C) { +unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { EVT VT; if (!isTypeLegal(C->getType(), VT)) return false; @@ -1728,7 +1730,7 @@ unsigned X86FastISel::TargetMaterializeConstant(Constant *C) { return ResultReg; } -unsigned X86FastISel::TargetMaterializeAlloca(AllocaInst *C) { +unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) { // Fail on dynamic allocas. At this point, getRegForValue has already // checked its CSE maps, so if we're here trying to handle a dynamic // alloca, we're not going to succeed. X86SelectAddress has a @@ -1753,12 +1755,13 @@ namespace llvm { llvm::FastISel *X86::createFastISel(MachineFunction &mf, DenseMap<const Value *, unsigned> &vm, DenseMap<const BasicBlock *, MachineBasicBlock *> &bm, - DenseMap<const AllocaInst *, int> &am + DenseMap<const AllocaInst *, int> &am, + std::vector<std::pair<MachineInstr*, unsigned> > &pn #ifndef NDEBUG - , SmallSet<Instruction*, 8> &cil + , SmallSet<const Instruction *, 8> &cil #endif ) { - return new X86FastISel(mf, vm, bm, am + return new X86FastISel(mf, vm, bm, am, pn #ifndef NDEBUG , cil #endif diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 6d6fe77..93460ef 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -1088,8 +1088,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { // 'f' constraint. These should be turned into the current ST(x) register // in the machine instr. Also, any kills should be explicitly popped after // the inline asm. - unsigned Kills[7]; - unsigned NumKills = 0; + unsigned Kills = 0; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &Op = MI->getOperand(i); if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6) @@ -1103,7 +1102,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { // asm. We just remember it for now, and pop them all off at the end in // a batch. if (Op.isKill()) - Kills[NumKills++] = FPReg; + Kills |= 1U << FPReg; } // If this asm kills any FP registers (is the last use of them) we must @@ -1114,9 +1113,11 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { // Note: this might be a non-optimal pop sequence. We might be able to do // better by trying to pop in stack order or something. MachineBasicBlock::iterator InsertPt = MI; - while (NumKills) - freeStackSlotAfter(InsertPt, Kills[--NumKills]); - + while (Kills) { + unsigned FPReg = CountTrailingZeros_32(Kills); + freeStackSlotAfter(InsertPt, FPReg); + Kills &= ~(1U << FPReg); + } // Don't delete the inline asm! return; } diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index da45dac..fd8bb1e 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -15,12 +15,10 @@ #define DEBUG_TYPE "x86-isel" #include "X86.h" #include "X86InstrBuilder.h" -#include "X86ISelLowering.h" #include "X86MachineFunctionInfo.h" #include "X86RegisterInfo.h" #include "X86Subtarget.h" #include "X86TargetMachine.h" -#include "llvm/GlobalValue.h" #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" #include "llvm/Support/CFG.h" @@ -57,25 +55,24 @@ namespace { FrameIndexBase } BaseType; - struct { // This is really a union, discriminated by BaseType! - SDValue Reg; - int FrameIndex; - } Base; + // This is really a union, discriminated by BaseType! + SDValue Base_Reg; + int Base_FrameIndex; unsigned Scale; SDValue IndexReg; int32_t Disp; SDValue Segment; - GlobalValue *GV; - Constant *CP; - BlockAddress *BlockAddr; + const GlobalValue *GV; + const Constant *CP; + const BlockAddress *BlockAddr; const char *ES; int JT; unsigned Align; // CP alignment. unsigned char SymbolFlags; // X86II::MO_* X86ISelAddressMode() - : BaseType(RegBase), Scale(1), IndexReg(), Disp(0), + : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0), Segment(), GV(0), CP(0), BlockAddr(0), ES(0), JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) { } @@ -85,7 +82,7 @@ namespace { } bool hasBaseOrIndexReg() const { - return IndexReg.getNode() != 0 || Base.Reg.getNode() != 0; + return IndexReg.getNode() != 0 || Base_Reg.getNode() != 0; } /// isRIPRelative - Return true if this addressing mode is already RIP @@ -93,24 +90,24 @@ namespace { bool isRIPRelative() const { if (BaseType != RegBase) return false; if (RegisterSDNode *RegNode = - dyn_cast_or_null<RegisterSDNode>(Base.Reg.getNode())) + dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode())) return RegNode->getReg() == X86::RIP; return false; } void setBaseReg(SDValue Reg) { BaseType = RegBase; - Base.Reg = Reg; + Base_Reg = Reg; } void dump() { dbgs() << "X86ISelAddressMode " << this << '\n'; - dbgs() << "Base.Reg "; - if (Base.Reg.getNode() != 0) - Base.Reg.getNode()->dump(); + dbgs() << "Base_Reg "; + if (Base_Reg.getNode() != 0) + Base_Reg.getNode()->dump(); else dbgs() << "nul"; - dbgs() << " Base.FrameIndex " << Base.FrameIndex << '\n' + dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n' << " Scale" << Scale << '\n' << "IndexReg "; if (IndexReg.getNode() != 0) @@ -162,7 +159,7 @@ namespace { class X86DAGToDAGISel : public SelectionDAGISel { /// X86Lowering - This object fully describes how to lower LLVM code to an /// X86-specific SelectionDAG. - X86TargetLowering &X86Lowering; + const X86TargetLowering &X86Lowering; /// Subtarget - Keep a pointer to the X86Subtarget around so that we can /// make the right decision when generating code for different targets. @@ -183,7 +180,7 @@ namespace { return "X86 DAG->DAG Instruction Selection"; } - virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF); + virtual void EmitFunctionEntryCode(); virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const; @@ -235,8 +232,8 @@ namespace { SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ? - CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) : - AM.Base.Reg; + CurDAG->getTargetFrameIndex(AM.Base_FrameIndex, TLI.getPointerTy()) : + AM.Base_Reg; Scale = getI8Imm(AM.Scale); Index = AM.IndexReg; // These are 32-bit even in 64-bit mode since RIP relative offset @@ -546,11 +543,11 @@ void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB, TII->get(X86::CALLpcrel32)).addExternalSymbol("__main"); } -void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) { +void X86DAGToDAGISel::EmitFunctionEntryCode() { // If this is main, emit special code for main. - MachineBasicBlock *BB = MF.begin(); - if (Fn.hasExternalLinkage() && Fn.getName() == "main") - EmitSpecialCodeForMain(BB, MF.getFrameInfo()); + if (const Function *Fn = MF->getFunction()) + if (Fn->hasExternalLinkage() && Fn->getName() == "main") + EmitSpecialCodeForMain(MF->begin(), MF->getFrameInfo()); } @@ -675,8 +672,8 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { // a smaller encoding and avoids a scaled-index. if (AM.Scale == 2 && AM.BaseType == X86ISelAddressMode::RegBase && - AM.Base.Reg.getNode() == 0) { - AM.Base.Reg = AM.IndexReg; + AM.Base_Reg.getNode() == 0) { + AM.Base_Reg = AM.IndexReg; AM.Scale = 1; } @@ -687,15 +684,34 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { Subtarget->is64Bit() && AM.Scale == 1 && AM.BaseType == X86ISelAddressMode::RegBase && - AM.Base.Reg.getNode() == 0 && + AM.Base_Reg.getNode() == 0 && AM.IndexReg.getNode() == 0 && AM.SymbolFlags == X86II::MO_NO_FLAG && AM.hasSymbolicDisplacement()) - AM.Base.Reg = CurDAG->getRegister(X86::RIP, MVT::i64); + AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64); return false; } +/// isLogicallyAddWithConstant - Return true if this node is semantically an +/// add of a value with a constantint. +static bool isLogicallyAddWithConstant(SDValue V, SelectionDAG *CurDAG) { + // Check for (add x, Cst) + if (V->getOpcode() == ISD::ADD) + return isa<ConstantSDNode>(V->getOperand(1)); + + // Check for (or x, Cst), where Cst & x == 0. + if (V->getOpcode() != ISD::OR || + !isa<ConstantSDNode>(V->getOperand(1))) + return false; + + // Handle "X | C" as "X + C" iff X is known to have C bits clear. + ConstantSDNode *CN = cast<ConstantSDNode>(V->getOperand(1)); + + // Check to see if the LHS & C is zero. + return CurDAG->MaskedValueIsZero(V->getOperand(0), CN->getAPIntValue()); +} + bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, X86ISelListener &DeadNodes, unsigned Depth) { @@ -762,9 +778,9 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, case ISD::FrameIndex: if (AM.BaseType == X86ISelAddressMode::RegBase - && AM.Base.Reg.getNode() == 0) { + && AM.Base_Reg.getNode() == 0) { AM.BaseType = X86ISelAddressMode::FrameIndexBase; - AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex(); + AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex(); return false; } break; @@ -787,8 +803,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, // Okay, we know that we have a scale by now. However, if the scaled // value is an add of something and a constant, we can fold the // constant into the disp field here. - if (ShVal.getNode()->getOpcode() == ISD::ADD && - isa<ConstantSDNode>(ShVal.getNode()->getOperand(1))) { + if (isLogicallyAddWithConstant(ShVal, CurDAG)) { AM.IndexReg = ShVal.getNode()->getOperand(0); ConstantSDNode *AddVal = cast<ConstantSDNode>(ShVal.getNode()->getOperand(1)); @@ -816,7 +831,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, case X86ISD::MUL_IMM: // X*[3,5,9] -> X+X*[2,4,8] if (AM.BaseType == X86ISelAddressMode::RegBase && - AM.Base.Reg.getNode() == 0 && + AM.Base_Reg.getNode() == 0 && AM.IndexReg.getNode() == 0) { if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) @@ -847,7 +862,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, Reg = N.getNode()->getOperand(0); } - AM.IndexReg = AM.Base.Reg = Reg; + AM.IndexReg = AM.Base_Reg = Reg; return false; } } @@ -892,8 +907,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, // If the base is a register with multiple uses, this // transformation may save a mov. if ((AM.BaseType == X86ISelAddressMode::RegBase && - AM.Base.Reg.getNode() && - !AM.Base.Reg.getNode()->hasOneUse()) || + AM.Base_Reg.getNode() && + !AM.Base_Reg.getNode()->hasOneUse()) || AM.BaseType == X86ISelAddressMode::FrameIndexBase) --Cost; // If the folded LHS was interesting, this transformation saves @@ -963,9 +978,9 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, // see if we can just put each operand into a register and fold at least // the add. if (AM.BaseType == X86ISelAddressMode::RegBase && - !AM.Base.Reg.getNode() && + !AM.Base_Reg.getNode() && !AM.IndexReg.getNode()) { - AM.Base.Reg = N.getNode()->getOperand(0); + AM.Base_Reg = N.getNode()->getOperand(0); AM.IndexReg = N.getNode()->getOperand(1); AM.Scale = 1; return false; @@ -975,14 +990,11 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, case ISD::OR: // Handle "X | C" as "X + C" iff X is known to have C bits clear. - if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) { + if (isLogicallyAddWithConstant(N, CurDAG)) { X86ISelAddressMode Backup = AM; + ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1)); uint64_t Offset = CN->getSExtValue(); - // Check to see if the LHS & C is zero. - if (!CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) - break; - // Start with the LHS as an addr mode. if (!MatchAddressRecursively(N.getOperand(0), AM, DeadNodes, Depth+1) && // Address could not have picked a GV address for the displacement. @@ -1127,7 +1139,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, /// specified addressing mode without any further recursion. bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) { // Is the base register already occupied? - if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.getNode()) { + if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) { // If so, check to see if the scale index register is set. if (AM.IndexReg.getNode() == 0) { AM.IndexReg = N; @@ -1141,7 +1153,7 @@ bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) { // Default, generate it as a register. AM.BaseType = X86ISelAddressMode::RegBase; - AM.Base.Reg = N; + AM.Base_Reg = N; return false; } @@ -1157,8 +1169,8 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, SDValue &Base, EVT VT = N.getValueType(); if (AM.BaseType == X86ISelAddressMode::RegBase) { - if (!AM.Base.Reg.getNode()) - AM.Base.Reg = CurDAG->getRegister(0, VT); + if (!AM.Base_Reg.getNode()) + AM.Base_Reg = CurDAG->getRegister(0, VT); } if (!AM.IndexReg.getNode()) @@ -1185,7 +1197,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root, if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) && PatternNodeWithChain.hasOneUse() && IsProfitableToFold(N.getOperand(0), N.getNode(), Root) && - IsLegalToFold(N.getOperand(0), N.getNode(), Root)) { + IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) { LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain); if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp,Segment)) return false; @@ -1202,7 +1214,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root, ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) && N.getOperand(0).getOperand(0).hasOneUse() && IsProfitableToFold(N.getOperand(0), N.getNode(), Root) && - IsLegalToFold(N.getOperand(0), N.getNode(), Root)) { + IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) { // Okay, this is a zero extending load. Fold it. LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0)); if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) @@ -1234,10 +1246,10 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N, EVT VT = N.getValueType(); unsigned Complexity = 0; if (AM.BaseType == X86ISelAddressMode::RegBase) - if (AM.Base.Reg.getNode()) + if (AM.Base_Reg.getNode()) Complexity = 1; else - AM.Base.Reg = CurDAG->getRegister(0, VT); + AM.Base_Reg = CurDAG->getRegister(0, VT); else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase) Complexity = 4; @@ -1265,7 +1277,7 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N, Complexity += 2; } - if (AM.Disp && (AM.Base.Reg.getNode() || AM.IndexReg.getNode())) + if (AM.Disp && (AM.Base_Reg.getNode() || AM.IndexReg.getNode())) Complexity++; // If it isn't worth using an LEA, reject it. @@ -1287,7 +1299,7 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base, X86ISelAddressMode AM; AM.GV = GA->getGlobal(); AM.Disp += GA->getOffset(); - AM.Base.Reg = CurDAG->getRegister(0, N.getValueType()); + AM.Base_Reg = CurDAG->getRegister(0, N.getValueType()); AM.SymbolFlags = GA->getTargetFlags(); if (N.getValueType() == MVT::i32) { @@ -1309,7 +1321,7 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, SDValue &Segment) { if (!ISD::isNON_EXTLoad(N.getNode()) || !IsProfitableToFold(N, P, P) || - !IsLegalToFold(N, P, P)) + !IsLegalToFold(N, P, P, OptLevel)) return false; return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment); @@ -1841,6 +1853,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to // use a smaller encoding. + if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse()) + // Look past the truncate if CMP is the only use of it. + N0 = N0.getOperand(0); if (N0.getNode()->getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && N0.getValueType() != MVT::i8 && X86::isZeroNode(N1)) { diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 64702f1..6ce9ab7 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -57,14 +57,6 @@ STATISTIC(NumTailCalls, "Number of tail calls"); static cl::opt<bool> DisableMMX("disable-mmx", cl::Hidden, cl::desc("Disable use of MMX")); -// Disable16Bit - 16-bit operations typically have a larger encoding than -// corresponding 32-bit instructions, and 16-bit code is slow on some -// processors. This is an experimental flag to disable 16-bit operations -// (which forces them to be Legalized to 32-bit operations). -static cl::opt<bool> -Disable16Bit("disable-16bit", cl::Hidden, - cl::desc("Disable use of 16-bit instructions")); - // Forward declarations. static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, SDValue V2); @@ -120,8 +112,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // Set up the register classes. addRegisterClass(MVT::i8, X86::GR8RegisterClass); - if (!Disable16Bit) - addRegisterClass(MVT::i16, X86::GR16RegisterClass); + addRegisterClass(MVT::i16, X86::GR16RegisterClass); addRegisterClass(MVT::i32, X86::GR32RegisterClass); if (Subtarget->is64Bit()) addRegisterClass(MVT::i64, X86::GR64RegisterClass); @@ -130,11 +121,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // We don't accept any truncstore of integer registers. setTruncStoreAction(MVT::i64, MVT::i32, Expand); - if (!Disable16Bit) - setTruncStoreAction(MVT::i64, MVT::i16, Expand); + setTruncStoreAction(MVT::i64, MVT::i16, Expand); setTruncStoreAction(MVT::i64, MVT::i8 , Expand); - if (!Disable16Bit) - setTruncStoreAction(MVT::i32, MVT::i16, Expand); + setTruncStoreAction(MVT::i32, MVT::i16, Expand); setTruncStoreAction(MVT::i32, MVT::i8 , Expand); setTruncStoreAction(MVT::i16, MVT::i8, Expand); @@ -285,13 +274,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::CTTZ , MVT::i8 , Custom); setOperationAction(ISD::CTLZ , MVT::i8 , Custom); setOperationAction(ISD::CTPOP , MVT::i16 , Expand); - if (Disable16Bit) { - setOperationAction(ISD::CTTZ , MVT::i16 , Expand); - setOperationAction(ISD::CTLZ , MVT::i16 , Expand); - } else { - setOperationAction(ISD::CTTZ , MVT::i16 , Custom); - setOperationAction(ISD::CTLZ , MVT::i16 , Custom); - } + setOperationAction(ISD::CTTZ , MVT::i16 , Custom); + setOperationAction(ISD::CTLZ , MVT::i16 , Custom); setOperationAction(ISD::CTPOP , MVT::i32 , Expand); setOperationAction(ISD::CTTZ , MVT::i32 , Custom); setOperationAction(ISD::CTLZ , MVT::i32 , Custom); @@ -308,19 +292,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SELECT , MVT::i1 , Promote); // X86 wants to expand cmov itself. setOperationAction(ISD::SELECT , MVT::i8 , Custom); - if (Disable16Bit) - setOperationAction(ISD::SELECT , MVT::i16 , Expand); - else - setOperationAction(ISD::SELECT , MVT::i16 , Custom); + setOperationAction(ISD::SELECT , MVT::i16 , Custom); setOperationAction(ISD::SELECT , MVT::i32 , Custom); setOperationAction(ISD::SELECT , MVT::f32 , Custom); setOperationAction(ISD::SELECT , MVT::f64 , Custom); setOperationAction(ISD::SELECT , MVT::f80 , Custom); setOperationAction(ISD::SETCC , MVT::i8 , Custom); - if (Disable16Bit) - setOperationAction(ISD::SETCC , MVT::i16 , Expand); - else - setOperationAction(ISD::SETCC , MVT::i16 , Custom); + setOperationAction(ISD::SETCC , MVT::i16 , Custom); setOperationAction(ISD::SETCC , MVT::i32 , Custom); setOperationAction(ISD::SETCC , MVT::f32 , Custom); setOperationAction(ISD::SETCC , MVT::f64 , Custom); @@ -623,11 +601,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // FIXME: In order to prevent SSE instructions being expanded to MMX ones // with -msoft-float, disable use of MMX as well. if (!UseSoftFloat && !DisableMMX && Subtarget->hasMMX()) { - addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); - addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); - addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); - addRegisterClass(MVT::v2f32, X86::VR64RegisterClass); - addRegisterClass(MVT::v1i64, X86::VR64RegisterClass); + addRegisterClass(MVT::v8i8, X86::VR64RegisterClass, false); + addRegisterClass(MVT::v4i16, X86::VR64RegisterClass, false); + addRegisterClass(MVT::v2i32, X86::VR64RegisterClass, false); + addRegisterClass(MVT::v2f32, X86::VR64RegisterClass, false); + addRegisterClass(MVT::v1i64, X86::VR64RegisterClass, false); setOperationAction(ISD::ADD, MVT::v8i8, Legal); setOperationAction(ISD::ADD, MVT::v4i16, Legal); @@ -1067,23 +1045,27 @@ unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const { } /// getOptimalMemOpType - Returns the target specific optimal type for load -/// and store operations as a result of memset, memcpy, and memmove lowering. -/// If DstAlign is zero that means it's safe to destination alignment can -/// satisfy any constraint. Similarly if SrcAlign is zero it means there -/// isn't a need to check it against alignment requirement, probably because -/// the source does not need to be loaded. If 'NonScalarIntSafe' is true, that -/// means it's safe to return a non-scalar-integer type, e.g. constant string -/// source or loaded from memory. It returns EVT::Other if SelectionDAG should -/// be responsible for determining it. +/// and store operations as a result of memset, memcpy, and memmove +/// lowering. If DstAlign is zero that means it's safe to destination +/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it +/// means there isn't a need to check it against alignment requirement, +/// probably because the source does not need to be loaded. If +/// 'NonScalarIntSafe' is true, that means it's safe to return a +/// non-scalar-integer type, e.g. empty string source, constant, or loaded +/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is +/// constant so it does not need to be loaded. +/// It returns EVT::Other if the type should be determined using generic +/// target-independent logic. EVT X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool NonScalarIntSafe, - SelectionDAG &DAG) const { + bool MemcpyStrSrc, + MachineFunction &MF) const { // FIXME: This turns off use of xmm stores for memset/memcpy on targets like // linux. This is because the stack realignment code can't handle certain // cases like PR2962. This should be removed when PR2962 is fixed. - const Function *F = DAG.getMachineFunction().getFunction(); + const Function *F = MF.getFunction(); if (NonScalarIntSafe && !F->hasFnAttr(Attribute::NoImplicitFloat)) { if (Size >= 16 && @@ -1095,11 +1077,14 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size, return MVT::v4i32; if (Subtarget->hasSSE1()) return MVT::v4f32; - } else if (Size >= 8 && + } else if (!MemcpyStrSrc && Size >= 8 && !Subtarget->is64Bit() && Subtarget->getStackAlignment() >= 8 && - Subtarget->hasSSE2()) + Subtarget->hasSSE2()) { + // Do not use f64 to lower memcpy if source is string constant. It's + // better to use i32 to avoid the loads. return MVT::f64; + } } if (Subtarget->is64Bit() && Size >= 8) return MVT::i64; @@ -1182,7 +1167,7 @@ bool X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<EVT> &OutTys, const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs, *DAG.getContext()); @@ -1193,7 +1178,9 @@ SDValue X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG) { + DebugLoc dl, SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), @@ -1211,7 +1198,8 @@ X86TargetLowering::LowerReturn(SDValue Chain, SmallVector<SDValue, 6> RetOps; RetOps.push_back(Chain); // Operand #0 = Chain (updated below) // Operand #1 = Bytes To Pop - RetOps.push_back(DAG.getTargetConstant(getBytesToPopOnReturn(), MVT::i16)); + RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), + MVT::i16)); // Copy the result values into the output registers. for (unsigned i = 0; i != RVLocs.size(); ++i) { @@ -1287,7 +1275,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; @@ -1304,7 +1292,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, // If this is x86-64, and we disabled SSE, we can't return FP values if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) && ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) { - llvm_report_error("SSE register return with SSE disabled"); + report_fatal_error("SSE register return with SSE disabled"); } // If this is a call to a function that returns an fp value on the floating @@ -1384,7 +1372,8 @@ ArgsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) { /// IsCalleePop - Determines whether the callee is required to pop its /// own arguments. Callee pop is necessary to support tail calls. -bool X86TargetLowering::IsCalleePop(bool IsVarArg, CallingConv::ID CallingConv){ +bool X86TargetLowering::IsCalleePop(bool IsVarArg, + CallingConv::ID CallingConv) const { if (IsVarArg) return false; @@ -1457,7 +1446,7 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, DebugLoc dl, SelectionDAG &DAG, const CCValAssign &VA, MachineFrameInfo *MFI, - unsigned i) { + unsigned i) const { // Create the nodes corresponding to a load from this parameter slot. ISD::ArgFlagsTy Flags = Ins[i].Flags; bool AlwaysUseMutable = FuncIsMadeTailCallSafe(CallConv); @@ -1496,7 +1485,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) + const { MachineFunction &MF = DAG.getMachineFunction(); X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); @@ -1607,7 +1597,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { if (Is64Bit || CallConv != CallingConv::X86_FastCall) { - VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize, true, false); + FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize, + true, false)); } if (Is64Bit) { unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0; @@ -1655,16 +1646,17 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, // For X86-64, if there are vararg parameters that are passed via // registers, then we must store them to their spots on the stack so they // may be loaded by deferencing the result of va_next. - VarArgsGPOffset = NumIntRegs * 8; - VarArgsFPOffset = TotalNumIntRegs * 8 + NumXMMRegs * 16; - RegSaveFrameIndex = MFI->CreateStackObject(TotalNumIntRegs * 8 + - TotalNumXMMRegs * 16, 16, - false); + FuncInfo->setVarArgsGPOffset(NumIntRegs * 8); + FuncInfo->setVarArgsFPOffset(TotalNumIntRegs * 8 + NumXMMRegs * 16); + FuncInfo->setRegSaveFrameIndex( + MFI->CreateStackObject(TotalNumIntRegs * 8 + TotalNumXMMRegs * 16, 16, + false)); // Store the integer parameter registers. SmallVector<SDValue, 8> MemOps; - SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); - unsigned Offset = VarArgsGPOffset; + SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), + getPointerTy()); + unsigned Offset = FuncInfo->getVarArgsGPOffset(); for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) { SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN, DAG.getIntPtrConstant(Offset)); @@ -1673,7 +1665,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - PseudoSourceValue::getFixedStack(RegSaveFrameIndex), + PseudoSourceValue::getFixedStack( + FuncInfo->getRegSaveFrameIndex()), Offset, false, false, 0); MemOps.push_back(Store); Offset += 8; @@ -1688,8 +1681,10 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8); SaveXMMOps.push_back(ALVal); - SaveXMMOps.push_back(DAG.getIntPtrConstant(RegSaveFrameIndex)); - SaveXMMOps.push_back(DAG.getIntPtrConstant(VarArgsFPOffset)); + SaveXMMOps.push_back(DAG.getIntPtrConstant( + FuncInfo->getRegSaveFrameIndex())); + SaveXMMOps.push_back(DAG.getIntPtrConstant( + FuncInfo->getVarArgsFPOffset())); for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) { unsigned VReg = MF.addLiveIn(XMMArgRegs[NumXMMRegs], @@ -1710,22 +1705,22 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, // Some CCs need callee pop. if (IsCalleePop(isVarArg, CallConv)) { - BytesToPopOnReturn = StackSize; // Callee pops everything. + FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything. } else { - BytesToPopOnReturn = 0; // Callee pops nothing. + FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing. // If this is an sret function, the return should pop the hidden pointer. if (!Is64Bit && !IsTailCallConvention(CallConv) && ArgsAreStructReturn(Ins)) - BytesToPopOnReturn = 4; + FuncInfo->setBytesToPopOnReturn(4); } if (!Is64Bit) { - RegSaveFrameIndex = 0xAAAAAAA; // RegSaveFrameIndex is X86-64 only. + // RegSaveFrameIndex is X86-64 only. + FuncInfo->setRegSaveFrameIndex(0xAAAAAAA); if (CallConv == CallingConv::X86_FastCall) - VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. + // fastcc functions can't have varargs. + FuncInfo->setVarArgsFrameIndex(0xAAAAAAA); } - FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); - return Chain; } @@ -1734,7 +1729,7 @@ X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, DebugLoc dl, SelectionDAG &DAG, const CCValAssign &VA, - ISD::ArgFlagsTy Flags) { + ISD::ArgFlagsTy Flags) const { const unsigned FirstStackArgOffset = (Subtarget->isTargetWin64() ? 32 : 0); unsigned LocMemOffset = FirstStackArgOffset + VA.getLocMemOffset(); SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); @@ -1753,7 +1748,7 @@ SDValue X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall, bool Is64Bit, - int FPDiff, DebugLoc dl) { + int FPDiff, DebugLoc dl) const { // Adjust the Return address stack slot. EVT VT = getPointerTy(); OutRetAddr = getReturnAddressFrameIndex(DAG); @@ -1790,7 +1785,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); bool Is64Bit = Subtarget->is64Bit(); bool IsStructRet = CallIsStructReturn(Outs); @@ -2060,7 +2055,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // We should use extra load for direct calls to dllimported functions in // non-JIT mode. - GlobalValue *GV = G->getGlobal(); + const GlobalValue *GV = G->getGlobal(); if (!GV->hasDLLImportLinkage()) { unsigned char OpFlags = 0; @@ -2219,8 +2214,9 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, /// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned /// for a 16 byte align requirement. -unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize, - SelectionDAG& DAG) { +unsigned +X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize, + SelectionDAG& DAG) const { MachineFunction &MF = DAG.getMachineFunction(); const TargetMachine &TM = MF.getTarget(); const TargetFrameInfo &TFI = *TM.getFrameInfo(); @@ -2308,9 +2304,11 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // If -tailcallopt is specified, make fastcc functions tail-callable. const MachineFunction &MF = DAG.getMachineFunction(); const Function *CallerF = DAG.getMachineFunction().getFunction(); + CallingConv::ID CallerCC = CallerF->getCallingConv(); + bool CCMatch = CallerCC == CalleeCC; + if (GuaranteedTailCallOpt) { - if (IsTailCallConvention(CalleeCC) && - CallerF->getCallingConv() == CalleeCC) + if (IsTailCallConvention(CalleeCC) && CCMatch) return true; return false; } @@ -2348,13 +2346,43 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, CCState CCInfo(CalleeCC, false, getTargetMachine(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeCallResult(Ins, RetCC_X86); - for (unsigned i = 0; i != RVLocs.size(); ++i) { + for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { CCValAssign &VA = RVLocs[i]; if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1) return false; } } + // If the calling conventions do not match, then we'd better make sure the + // results are returned in the same way as what the caller expects. + if (!CCMatch) { + SmallVector<CCValAssign, 16> RVLocs1; + CCState CCInfo1(CalleeCC, false, getTargetMachine(), + RVLocs1, *DAG.getContext()); + CCInfo1.AnalyzeCallResult(Ins, RetCC_X86); + + SmallVector<CCValAssign, 16> RVLocs2; + CCState CCInfo2(CallerCC, false, getTargetMachine(), + RVLocs2, *DAG.getContext()); + CCInfo2.AnalyzeCallResult(Ins, RetCC_X86); + + if (RVLocs1.size() != RVLocs2.size()) + return false; + for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) { + if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc()) + return false; + if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo()) + return false; + if (RVLocs1[i].isRegLoc()) { + if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg()) + return false; + } else { + if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset()) + return false; + } + } + } + // If the callee takes no arguments then go on to check the results of the // call. if (!Outs.empty()) { @@ -2401,12 +2429,13 @@ FastISel * X86TargetLowering::createFastISel(MachineFunction &mf, DenseMap<const Value *, unsigned> &vm, DenseMap<const BasicBlock*, MachineBasicBlock*> &bm, - DenseMap<const AllocaInst *, int> &am + DenseMap<const AllocaInst *, int> &am, + std::vector<std::pair<MachineInstr*, unsigned> > &pn #ifndef NDEBUG - , SmallSet<Instruction*, 8> &cil + , SmallSet<const Instruction *, 8> &cil #endif - ) { - return X86::createFastISel(mf, vm, bm, am + ) const { + return X86::createFastISel(mf, vm, bm, am, pn #ifndef NDEBUG , cil #endif @@ -2419,7 +2448,7 @@ X86TargetLowering::createFastISel(MachineFunction &mf, //===----------------------------------------------------------------------===// -SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { +SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); int ReturnAddrIndex = FuncInfo->getRAIndex(); @@ -3440,7 +3469,7 @@ unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, int NumElems, /// FIXME: split into pslldqi, psrldqi, palignr variants. static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { - int NumElems = SVOp->getValueType(0).getVectorNumElements(); + unsigned NumElems = SVOp->getValueType(0).getVectorNumElements(); isLeft = true; unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, true, DAG); @@ -3452,11 +3481,12 @@ static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, } bool SeenV1 = false; bool SeenV2 = false; - for (int i = NumZeros; i < NumElems; ++i) { - int Val = isLeft ? (i - NumZeros) : i; - int Idx = SVOp->getMaskElt(isLeft ? i : (i - NumZeros)); - if (Idx < 0) + for (unsigned i = NumZeros; i < NumElems; ++i) { + unsigned Val = isLeft ? (i - NumZeros) : i; + int Idx_ = SVOp->getMaskElt(isLeft ? i : (i - NumZeros)); + if (Idx_ < 0) continue; + unsigned Idx = (unsigned) Idx_; if (Idx < NumElems) SeenV1 = true; else { @@ -3479,7 +3509,8 @@ static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, /// static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, unsigned NumNonZero, unsigned NumZero, - SelectionDAG &DAG, TargetLowering &TLI) { + SelectionDAG &DAG, + const TargetLowering &TLI) { if (NumNonZero > 8) return SDValue(); @@ -3524,8 +3555,9 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, /// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16. /// static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, - unsigned NumNonZero, unsigned NumZero, - SelectionDAG &DAG, TargetLowering &TLI) { + unsigned NumNonZero, unsigned NumZero, + SelectionDAG &DAG, + const TargetLowering &TLI) { if (NumNonZero > 4) return SDValue(); @@ -3567,7 +3599,7 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, SDValue X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { // Check if the scalar load can be widened into a vector load. And if // the address is "base + cst" see if the cst can be "absorbed" into @@ -3699,7 +3731,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts, } SDValue -X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { +X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); // All zero's are handled with pxor, all one's are handled with pcmpeqd. if (ISD::isBuildVectorAllZeros(Op.getNode()) @@ -3965,7 +3997,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { } SDValue -X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { +X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { // We support concatenate two MMX registers and place them in a MMX // register. This is better than doing a stack convert. DebugLoc dl = Op.getDebugLoc(); @@ -3998,7 +4030,8 @@ X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { // 4. [all] mov + pshuflw + pshufhw + N x (pextrw + pinsrw) static SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, - SelectionDAG &DAG, X86TargetLowering &TLI) { + SelectionDAG &DAG, + const X86TargetLowering &TLI) { SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); DebugLoc dl = SVOp->getDebugLoc(); @@ -4241,7 +4274,8 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, // 3. [all] v8i16 shuffle + N x pextrw + rotate + pinsrw static SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, - SelectionDAG &DAG, X86TargetLowering &TLI) { + SelectionDAG &DAG, + const X86TargetLowering &TLI) { SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); DebugLoc dl = SVOp->getDebugLoc(); @@ -4387,7 +4421,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, static SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, - TargetLowering &TLI, DebugLoc dl) { + const TargetLowering &TLI, DebugLoc dl) { EVT VT = SVOp->getValueType(0); SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); @@ -4619,7 +4653,7 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { } SDValue -X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { +X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); @@ -4806,7 +4840,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { SDValue X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); if (VT.getSizeInBits() == 8) { @@ -4860,7 +4894,8 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SDValue -X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { +X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { if (!isa<ConstantSDNode>(Op.getOperand(1))) return SDValue(); @@ -4924,7 +4959,8 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { } SDValue -X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){ +X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, + SelectionDAG &DAG) const { EVT VT = Op.getValueType(); EVT EltVT = VT.getVectorElementType(); DebugLoc dl = Op.getDebugLoc(); @@ -4973,7 +5009,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){ } SDValue -X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { +X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); EVT EltVT = VT.getVectorElementType(); @@ -5002,7 +5038,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { } SDValue -X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { +X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); if (Op.getValueType() == MVT::v2f32) return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f32, @@ -5033,7 +5069,7 @@ X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { // be used to form addressing mode. These wrapped nodes will be selected // into MOV32ri. SDValue -X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) { +X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the @@ -5066,7 +5102,7 @@ X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) { return Result; } -SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the @@ -5100,7 +5136,7 @@ SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) { } SDValue -X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) { +X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const { const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the @@ -5136,12 +5172,12 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) { } SDValue -X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) { +X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { // Create the TargetBlockAddressAddress node. unsigned char OpFlags = Subtarget->ClassifyBlockAddressReference(); CodeModel::Model M = getTargetMachine().getCodeModel(); - BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); + const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); DebugLoc dl = Op.getDebugLoc(); SDValue Result = DAG.getBlockAddress(BA, getPointerTy(), /*isTarget=*/true, OpFlags); @@ -5210,7 +5246,7 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl, } SDValue -X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) { +X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset(); return LowerGlobalAddress(GV, Op.getDebugLoc(), Offset, DAG); @@ -5310,7 +5346,7 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, } SDValue -X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) { +X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { // TODO: implement the "local dynamic" model // TODO: implement the "initial exec"model for pic executables assert(Subtarget->isTargetELF() && @@ -5346,7 +5382,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) { /// LowerShift - Lower SRA_PARTS and friends, which return two i32 values and /// take a 2 x i32 value to shift plus a shift amount. -SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { assert(Op.getNumOperands() == 3 && "Not a double-shift!"); EVT VT = Op.getValueType(); unsigned VTBits = VT.getSizeInBits(); @@ -5390,7 +5426,8 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) { return DAG.getMergeValues(Ops, 2, dl); } -SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, + SelectionDAG &DAG) const { EVT SrcVT = Op.getOperand(0).getValueType(); if (SrcVT.isVector()) { @@ -5426,7 +5463,7 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { // Build the FILD DebugLoc dl = Op.getDebugLoc(); SDVTList Tys; @@ -5463,7 +5500,8 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, } // LowerUINT_TO_FP_i64 - 64-bit unsigned integer to double expansion. -SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, + SelectionDAG &DAG) const { // This algorithm is not obvious. Here it is in C code, more or less: /* double uint64_to_double( uint32_t hi, uint32_t lo ) { @@ -5547,7 +5585,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) { } // LowerUINT_TO_FP_i32 - 32-bit unsigned integer to float expansion. -SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, + SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); // FP constant to bias correct the final result. SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), @@ -5592,7 +5631,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) { return Sub; } -SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, + SelectionDAG &DAG) const { SDValue N0 = Op.getOperand(0); DebugLoc dl = Op.getDebugLoc(); @@ -5628,7 +5668,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) { } std::pair<SDValue,SDValue> X86TargetLowering:: -FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) { +FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const { DebugLoc dl = Op.getDebugLoc(); EVT DstTy = Op.getValueType(); @@ -5690,7 +5730,8 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) { return std::make_pair(FIST, StackSlot); } -SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, + SelectionDAG &DAG) const { if (Op.getValueType().isVector()) { if (Op.getValueType() == MVT::v2i32 && Op.getOperand(0).getValueType() == MVT::v2f64) { @@ -5709,7 +5750,8 @@ SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { FIST, StackSlot, NULL, 0, false, false, 0); } -SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, + SelectionDAG &DAG) const { std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, false); SDValue FIST = Vals.first, StackSlot = Vals.second; assert(FIST.getNode() && "Unexpected failure"); @@ -5719,7 +5761,8 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) { FIST, StackSlot, NULL, 0, false, false, 0); } -SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerFABS(SDValue Op, + SelectionDAG &DAG) const { LLVMContext *Context = DAG.getContext(); DebugLoc dl = Op.getDebugLoc(); EVT VT = Op.getValueType(); @@ -5746,7 +5789,7 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask); } -SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const { LLVMContext *Context = DAG.getContext(); DebugLoc dl = Op.getDebugLoc(); EVT VT = Op.getValueType(); @@ -5781,7 +5824,7 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) { } } -SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { LLVMContext *Context = DAG.getContext(); SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); @@ -5857,7 +5900,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { /// Emit nodes that will be selected as "test Op0,Op0", or something /// equivalent. SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); // CF and OF aren't always set the way we want. Determine which @@ -5885,15 +5928,20 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, unsigned NumOperands = 0; switch (Op.getNode()->getOpcode()) { case ISD::ADD: - // Due to an isel shortcoming, be conservative if this add is likely to - // be selected as part of a load-modify-store instruction. When the root - // node in a match is a store, isel doesn't know how to remap non-chain - // non-flag uses of other nodes in the match, such as the ADD in this - // case. This leads to the ADD being left around and reselected, with - // the result being two adds in the output. + // Due to an isel shortcoming, be conservative if this add is + // likely to be selected as part of a load-modify-store + // instruction. When the root node in a match is a store, isel + // doesn't know how to remap non-chain non-flag uses of other + // nodes in the match, such as the ADD in this case. This leads + // to the ADD being left around and reselected, with the result + // being two adds in the output. Alas, even if none our users + // are stores, that doesn't prove we're O.K. Ergo, if we have + // any parents that aren't CopyToReg or SETCC, eschew INC/DEC. + // A better fix seems to require climbing the DAG back to the + // root, and it doesn't seem to be worth the effort. for (SDNode::use_iterator UI = Op.getNode()->use_begin(), - UE = Op.getNode()->use_end(); UI != UE; ++UI) - if (UI->getOpcode() == ISD::STORE) + UE = Op.getNode()->use_end(); UI != UE; ++UI) + if (UI->getOpcode() != ISD::CopyToReg && UI->getOpcode() != ISD::SETCC) goto default_case; if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getNode()->getOperand(1))) { @@ -5988,7 +6036,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, /// Emit nodes that will be selected as "cmp Op0,Op1", or something /// equivalent. SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op1)) if (C->getAPIntValue() == 0) return EmitTest(Op0, X86CC, DAG); @@ -5999,8 +6047,8 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, /// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node /// if it's possible. -static SDValue LowerToBT(SDValue And, ISD::CondCode CC, - DebugLoc dl, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC, + DebugLoc dl, SelectionDAG &DAG) const { SDValue Op0 = And.getOperand(0); SDValue Op1 = And.getOperand(1); if (Op0.getOpcode() == ISD::TRUNCATE) @@ -6031,11 +6079,13 @@ static SDValue LowerToBT(SDValue And, ISD::CondCode CC, } if (LHS.getNode()) { - // If LHS is i8, promote it to i16 with any_extend. There is no i8 BT + // If LHS is i8, promote it to i32 with any_extend. There is no i8 BT // instruction. Since the shift amount is in-range-or-undefined, we know - // that doing a bittest on the i16 value is ok. We extend to i32 because + // that doing a bittest on the i32 value is ok. We extend to i32 because // the encoding for the i16 version is larger than the i32 version. - if (LHS.getValueType() == MVT::i8) + // Also promote i16 to i32 for performance / code size reason. + if (LHS.getValueType() == MVT::i8 || + LHS.getValueType() == MVT::i16) LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS); // If the operand types disagree, extend the shift amount to match. Since @@ -6052,7 +6102,7 @@ static SDValue LowerToBT(SDValue And, ISD::CondCode CC, return SDValue(); } -SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); @@ -6088,7 +6138,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1)); } - bool isFP = Op.getOperand(1).getValueType().isFloatingPoint(); + bool isFP = Op1.getValueType().isFloatingPoint(); unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG); if (X86CC == X86::COND_INVALID) return SDValue(); @@ -6106,7 +6156,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { DAG.getConstant(X86CC, MVT::i8), Cond); } -SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { SDValue Cond; SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); @@ -6242,7 +6292,7 @@ static bool isX86LogicalCmp(SDValue Op) { return false; } -SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { bool addTest = true; SDValue Cond = Op.getOperand(0); DebugLoc dl = Op.getDebugLoc(); @@ -6362,7 +6412,7 @@ static bool isXor1OfSetCC(SDValue Op) { return false; } -SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { bool addTest = true; SDValue Chain = Op.getOperand(0); SDValue Cond = Op.getOperand(1); @@ -6514,7 +6564,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) { // correct sequence. SDValue X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { assert(Subtarget->isTargetCygMing() && "This should be used only on Cygwin/Mingw targets"); DebugLoc dl = Op.getDebugLoc(); @@ -6550,7 +6600,7 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, SDValue Size, unsigned Align, bool isVolatile, const Value *DstSV, - uint64_t DstSVOff) { + uint64_t DstSVOff) const { ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); // If not DWORD aligned or size is more than the threshold, call the library. @@ -6689,8 +6739,10 @@ X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, - const Value *DstSV, uint64_t DstSVOff, - const Value *SrcSV, uint64_t SrcSVOff) { + const Value *DstSV, + uint64_t DstSVOff, + const Value *SrcSV, + uint64_t SrcSVOff) const { // This requires the copy size to be a constant, preferrably // within a subtarget-specific limit. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); @@ -6720,7 +6772,7 @@ X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, Count, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : - X86::EDI, + X86::EDI, Dst, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI : @@ -6756,14 +6808,18 @@ X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, &Results[0], Results.size()); } -SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); + const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); DebugLoc dl = Op.getDebugLoc(); if (!Subtarget->is64Bit()) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. - SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); + SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), + getPointerTy()); return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0, false, false, 0); } @@ -6777,7 +6833,8 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { SDValue FIN = Op.getOperand(1); // Store gp_offset SDValue Store = DAG.getStore(Op.getOperand(0), dl, - DAG.getConstant(VarArgsGPOffset, MVT::i32), + DAG.getConstant(FuncInfo->getVarArgsGPOffset(), + MVT::i32), FIN, SV, 0, false, false, 0); MemOps.push_back(Store); @@ -6785,14 +6842,16 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, DAG.getIntPtrConstant(4)); Store = DAG.getStore(Op.getOperand(0), dl, - DAG.getConstant(VarArgsFPOffset, MVT::i32), + DAG.getConstant(FuncInfo->getVarArgsFPOffset(), + MVT::i32), FIN, SV, 0, false, false, 0); MemOps.push_back(Store); // Store ptr to overflow_arg_area FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, DAG.getIntPtrConstant(4)); - SDValue OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); + SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), + getPointerTy()); Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 0, false, false, 0); MemOps.push_back(Store); @@ -6800,7 +6859,8 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { // Store ptr to reg_save_area. FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, DAG.getIntPtrConstant(8)); - SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); + SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), + getPointerTy()); Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 0, false, false, 0); MemOps.push_back(Store); @@ -6808,18 +6868,18 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { &MemOps[0], MemOps.size()); } -SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { // X86-64 va_list is a struct { i32, i32, i8*, i8* }. assert(Subtarget->is64Bit() && "This code only handles 64-bit va_arg!"); SDValue Chain = Op.getOperand(0); SDValue SrcPtr = Op.getOperand(1); SDValue SrcSV = Op.getOperand(2); - llvm_report_error("VAArgInst is not yet implemented for x86-64!"); + report_fatal_error("VAArgInst is not yet implemented for x86-64!"); return SDValue(); } -SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { // X86-64 va_list is a struct { i32, i32, i8*, i8* }. assert(Subtarget->is64Bit() && "This code only handles 64-bit va_copy!"); SDValue Chain = Op.getOperand(0); @@ -6835,7 +6895,7 @@ SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) { } SDValue -X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { +X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); switch (IntNo) { @@ -7076,7 +7136,8 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { } } -SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, + SelectionDAG &DAG) const { unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); DebugLoc dl = Op.getDebugLoc(); @@ -7097,7 +7158,7 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { RetAddrFI, NULL, 0, false, false, 0); } -SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); MFI->setFrameAddressIsTaken(true); EVT VT = Op.getValueType(); @@ -7112,12 +7173,11 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { } SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { return DAG.getIntPtrConstant(2*TD->getPointerSize()); } -SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) -{ +SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); SDValue Chain = Op.getOperand(0); SDValue Offset = Op.getOperand(1); @@ -7141,7 +7201,7 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) } SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { SDValue Root = Op.getOperand(0); SDValue Trmp = Op.getOperand(1); // trampoline SDValue FPtr = Op.getOperand(2); // nested function @@ -7232,7 +7292,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32; if (InRegCount > 2) { - llvm_report_error("Nest register in use - reduce number of inreg parameters!"); + report_fatal_error("Nest register in use - reduce number of inreg parameters!"); } } break; @@ -7281,7 +7341,8 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, } } -SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, + SelectionDAG &DAG) const { /* The rounding mode is in bits 11:10 of FPSR, and has the following settings: @@ -7343,7 +7404,7 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) { ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal); } -SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); EVT OpVT = VT; unsigned NumBits = VT.getSizeInBits(); @@ -7377,7 +7438,7 @@ SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) { return Op; } -SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); EVT OpVT = VT; unsigned NumBits = VT.getSizeInBits(); @@ -7407,7 +7468,7 @@ SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) { return Op; } -SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); assert(VT == MVT::v2i64 && "Only know how to lower V2I64 multiply"); DebugLoc dl = Op.getDebugLoc(); @@ -7452,7 +7513,7 @@ SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) { } -SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { // Lower the "add/sub/mul with overflow" instruction into a regular ins plus // a "setcc" instruction that checks the overflow flag. The "brcond" lowering // looks for this combo and may remove the "setcc" instruction if the "setcc" @@ -7520,7 +7581,7 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) { return Sum; } -SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const { EVT T = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); unsigned Reg = 0; @@ -7551,7 +7612,7 @@ SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) { } SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { assert(Subtarget->is64Bit() && "Result not type legalized?"); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); SDValue TheChain = Op.getOperand(0); @@ -7569,7 +7630,7 @@ SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op, return DAG.getMergeValues(Ops, 2, dl); } -SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); DebugLoc dl = Node->getDebugLoc(); EVT T = Node->getValueType(0); @@ -7585,7 +7646,7 @@ SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) { /// LowerOperation - Provide custom lowering hooks for some operations. /// -SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Should not custom lower this!"); case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG); @@ -7643,7 +7704,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { void X86TargetLowering:: ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG, unsigned NewOp) { + SelectionDAG &DAG, unsigned NewOp) const { EVT T = Node->getValueType(0); DebugLoc dl = Node->getDebugLoc(); assert (T == MVT::i64 && "Only know how to expand i64 atomics"); @@ -7668,7 +7729,7 @@ ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results, /// with a new node built out of custom code. void X86TargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { DebugLoc dl = N->getDebugLoc(); switch (N->getOpcode()) { default: @@ -7941,9 +8002,9 @@ bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const { bool X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const { - // Only do shuffles on 128-bit vector types for now. + // Very little shuffling can be done for 64-bit vectors right now. if (VT.getSizeInBits() == 64) - return false; + return isPALIGNRMask(M, VT, Subtarget->hasSSSE3()); // FIXME: pshufb, blends, shifts. return (VT.getVectorNumElements() == 2 || @@ -8448,8 +8509,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( MachineBasicBlock * X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { + MachineBasicBlock *BB) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); @@ -8478,12 +8538,9 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, F->insert(It, sinkMBB); // Update machine-CFG edges by first adding all successors of the current // block to the new block which will contain the Phi node for the select. - // Also inform sdisel of the edge changes. for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), - E = BB->succ_end(); I != E; ++I) { - EM->insert(std::make_pair(*I, sinkMBB)); + E = BB->succ_end(); I != E; ++I) sinkMBB->addSuccessor(*I); - } // Next, remove all successors of the current block, and add the true // and fallthrough blocks as its successors. while (!BB->succ_empty()) @@ -8495,27 +8552,22 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, // copy0MBB: // %FalseValue = ... // # fallthrough to sinkMBB - BB = copy0MBB; - - // Update machine-CFG edges - BB->addSuccessor(sinkMBB); + copy0MBB->addSuccessor(sinkMBB); // sinkMBB: // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... - BB = sinkMBB; - BuildMI(BB, DL, TII->get(X86::PHI), MI->getOperand(0).getReg()) + BuildMI(sinkMBB, DL, TII->get(X86::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. - return BB; + return sinkMBB; } MachineBasicBlock * X86TargetLowering::EmitLoweredMingwAlloca(MachineInstr *MI, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { + MachineBasicBlock *BB) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); MachineFunction *F = BB->getParent(); @@ -8538,12 +8590,11 @@ X86TargetLowering::EmitLoweredMingwAlloca(MachineInstr *MI, MachineBasicBlock * X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { + MachineBasicBlock *BB) const { switch (MI->getOpcode()) { default: assert(false && "Unexpected instr type to insert"); case X86::MINGW_ALLOCA: - return EmitLoweredMingwAlloca(MI, BB, EM); + return EmitLoweredMingwAlloca(MI, BB); case X86::CMOV_GR8: case X86::CMOV_V1I64: case X86::CMOV_FR32: @@ -8556,7 +8607,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::CMOV_RFP32: case X86::CMOV_RFP64: case X86::CMOV_RFP80: - return EmitLoweredSelect(MI, BB, EM); + return EmitLoweredSelect(MI, BB); case X86::FP32_TO_INT16_IN_MEM: case X86::FP32_TO_INT32_IN_MEM: @@ -8638,21 +8689,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. return BB; } - // DBG_VALUE. Only the frame index case is done here. - case X86::DBG_VALUE: { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - DebugLoc DL = MI->getDebugLoc(); - X86AddressMode AM; - MachineFunction *F = BB->getParent(); - AM.BaseType = X86AddressMode::FrameIndexBase; - AM.Base.FrameIndex = MI->getOperand(0).getImm(); - addFullAddress(BuildMI(BB, DL, TII->get(X86::DBG_VALUE)), AM). - addImm(MI->getOperand(1).getImm()). - addMetadata(MI->getOperand(2).getMetadata()); - F->DeleteMachineInstr(MI); // Remove pseudo. - return BB; - } - // String/text processing lowering. case X86::PCMPISTRM128REG: return EmitPCMP(MI, BB, 3, false /* in-mem */); @@ -8874,7 +8910,8 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, /// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the /// node is a GlobalAddress + offset. bool X86TargetLowering::isGAPlusOffset(SDNode *N, - GlobalValue* &GA, int64_t &Offset) const{ + const GlobalValue* &GA, + int64_t &Offset) const { if (N->getOpcode() == X86ISD::Wrapper) { if (isa<GlobalAddressSDNode>(N->getOperand(0))) { GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal(); @@ -9558,9 +9595,13 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, } static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + EVT VT = N->getValueType(0); - if (VT != MVT::i64 || !Subtarget->is64Bit()) + if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64) return SDValue(); // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c) @@ -9570,6 +9611,8 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, std::swap(N0, N1); if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) return SDValue(); + if (!N0.hasOneUse() || !N1.hasOneUse()) + return SDValue(); SDValue ShAmt0 = N0.getOperand(1); if (ShAmt0.getValueType() != MVT::i8) @@ -9592,10 +9635,11 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, std::swap(ShAmt0, ShAmt1); } + unsigned Bits = VT.getSizeInBits(); if (ShAmt1.getOpcode() == ISD::SUB) { SDValue Sum = ShAmt1.getOperand(0); if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) { - if (SumC->getSExtValue() == 64 && + if (SumC->getSExtValue() == Bits && ShAmt1.getOperand(1) == ShAmt0) return DAG.getNode(Opc, DL, VT, Op0, Op1, @@ -9605,7 +9649,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, } else if (ConstantSDNode *ShAmt1C = dyn_cast<ConstantSDNode>(ShAmt1)) { ConstantSDNode *ShAmt0C = dyn_cast<ConstantSDNode>(ShAmt0); if (ShAmt0C && - ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue() == 64) + ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue() == Bits) return DAG.getNode(Opc, DL, VT, N0.getOperand(0), N1.getOperand(0), DAG.getNode(ISD::TRUNCATE, DL, @@ -9769,8 +9813,9 @@ static SDValue PerformBTCombine(SDNode *N, unsigned BitWidth = Op1.getValueSizeInBits(); APInt DemandedMask = APInt::getLowBitsSet(BitWidth, Log2_32(BitWidth)); APInt KnownZero, KnownOne; - TargetLowering::TargetLoweringOpt TLO(DAG); - TargetLowering &TLI = DAG.getTargetLoweringInfo(); + TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), + !DCI.isBeforeLegalizeOps()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLO.ShrinkDemandedConstant(Op1, DemandedMask) || TLI.SimplifyDemandedBits(Op1, DemandedMask, KnownZero, KnownOne, TLO)) DCI.CommitTargetLoweringOpt(TLO); @@ -9883,7 +9928,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::SHL: case ISD::SRA: case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget); - case ISD::OR: return PerformOrCombine(N, DAG, Subtarget); + case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget); case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget); case X86ISD::FXOR: case X86ISD::FOR: return PerformFORCombine(N, DAG); @@ -9897,6 +9942,111 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, return SDValue(); } +/// isTypeDesirableForOp - Return true if the target has native support for +/// the specified value type and it is 'desirable' to use the type for the +/// given node type. e.g. On x86 i16 is legal, but undesirable since i16 +/// instruction encodings are longer and some i16 instructions are slow. +bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const { + if (!isTypeLegal(VT)) + return false; + if (VT != MVT::i16) + return true; + + switch (Opc) { + default: + return true; + case ISD::LOAD: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + case ISD::SHL: + case ISD::SRL: + case ISD::SUB: + case ISD::ADD: + case ISD::MUL: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + return false; + } +} + +static bool MayFoldLoad(SDValue Op) { + return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode()); +} + +static bool MayFoldIntoStore(SDValue Op) { + return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin()); +} + +/// IsDesirableToPromoteOp - This method query the target whether it is +/// beneficial for dag combiner to promote the specified node. If true, it +/// should return the desired promotion type by reference. +bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const { + EVT VT = Op.getValueType(); + if (VT != MVT::i16) + return false; + + bool Promote = false; + bool Commute = false; + switch (Op.getOpcode()) { + default: break; + case ISD::LOAD: { + LoadSDNode *LD = cast<LoadSDNode>(Op); + // If the non-extending load has a single use and it's not live out, then it + // might be folded. + if (LD->getExtensionType() == ISD::NON_EXTLOAD /*&& + Op.hasOneUse()*/) { + for (SDNode::use_iterator UI = Op.getNode()->use_begin(), + UE = Op.getNode()->use_end(); UI != UE; ++UI) { + // The only case where we'd want to promote LOAD (rather then it being + // promoted as an operand is when it's only use is liveout. + if (UI->getOpcode() != ISD::CopyToReg) + return false; + } + } + Promote = true; + break; + } + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + Promote = true; + break; + case ISD::SHL: + case ISD::SRL: { + SDValue N0 = Op.getOperand(0); + // Look out for (store (shl (load), x)). + if (MayFoldLoad(N0) && MayFoldIntoStore(Op)) + return false; + Promote = true; + break; + } + case ISD::ADD: + case ISD::MUL: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + Commute = true; + // fallthrough + case ISD::SUB: { + SDValue N0 = Op.getOperand(0); + SDValue N1 = Op.getOperand(1); + if (!Commute && MayFoldLoad(N1)) + return false; + // Avoid disabling potential load folding opportunities. + if (MayFoldLoad(N0) && (!isa<ConstantSDNode>(N1) || MayFoldIntoStore(Op))) + return false; + if (MayFoldLoad(N1) && (!isa<ConstantSDNode>(N0) || MayFoldIntoStore(Op))) + return false; + Promote = true; + } + } + + PVT = MVT::i32; + return Promote; +} + //===----------------------------------------------------------------------===// // X86 Inline Assembly Support //===----------------------------------------------------------------------===// @@ -10159,7 +10309,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, return; } - GlobalValue *GV = GA->getGlobal(); + const GlobalValue *GV = GA->getGlobal(); // If we require an extra load to get this address, as in PIC mode, we // can't accept it. if (isGlobalStubReference(Subtarget->ClassifyGlobalReference(GV, diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 1026480..440601f9 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -374,12 +374,6 @@ namespace llvm { //===--------------------------------------------------------------------===// // X86TargetLowering - X86 Implementation of the TargetLowering interface class X86TargetLowering : public TargetLowering { - int VarArgsFrameIndex; // FrameIndex for start of varargs area. - int RegSaveFrameIndex; // X86-64 vararg func register save area. - unsigned VarArgsGPOffset; // X86-64 vararg func int reg offset. - unsigned VarArgsFPOffset; // X86-64 vararg func fp reg offset. - int BytesToPopOnReturn; // Number of arg bytes ret should pop. - public: explicit X86TargetLowering(X86TargetMachine &TM); @@ -401,11 +395,6 @@ namespace llvm { getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const; - // Return the number of bytes that a function should pop when it returns (in - // addition to the space used by the return address). - // - unsigned getBytesToPopOnReturn() const { return BytesToPopOnReturn; } - /// getStackPtrReg - Return the stack pointer register we are using: either /// ESP or RSP. unsigned getStackPtrReg() const { return X86StackPtr; } @@ -424,12 +413,14 @@ namespace llvm { /// probably because the source does not need to be loaded. If /// 'NonScalarIntSafe' is true, that means it's safe to return a /// non-scalar-integer type, e.g. empty string source, constant, or loaded - /// from memory. It returns EVT::Other if SelectionDAG should be responsible - /// for determining it. + /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is + /// constant so it does not need to be loaded. + /// It returns EVT::Other if the type should be determined using generic + /// target-independent logic. virtual EVT - getOptimalMemOpType(uint64_t Size, - unsigned DstAlign, unsigned SrcAlign, - bool NonScalarIntSafe, SelectionDAG &DAG) const; + getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, + bool NonScalarIntSafe, bool MemcpyStrSrc, + MachineFunction &MF) const; /// allowsUnalignedMemoryAccesses - Returns true if the target allows /// unaligned memory accesses. of the specified type. @@ -439,20 +430,32 @@ namespace llvm { /// LowerOperation - Provide custom lowering hooks for some operations. /// - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; /// ReplaceNodeResults - Replace the results of node with an illegal result /// type with new values built out of custom code. /// virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG); + SelectionDAG &DAG) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; - virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; + /// isTypeDesirableForOp - Return true if the target has native support for + /// the specified value type and it is 'desirable' to use the type for the + /// given node type. e.g. On x86 i16 is legal, but undesirable since i16 + /// instruction encodings are longer and some i16 instructions are slow. + virtual bool isTypeDesirableForOp(unsigned Opc, EVT VT) const; + + /// isTypeDesirable - Return true if the target has native support for the + /// specified value type and it is 'desirable' to use the type. e.g. On x86 + /// i16 is legal, but undesirable since i16 instruction encodings are longer + /// and some i16 instructions are slow. + virtual bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const; + + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const; /// getTargetNodeName - This method returns the name of a target specific @@ -473,9 +476,9 @@ namespace llvm { unsigned Depth = 0) const; virtual bool - isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) const; + isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const; - SDValue getReturnAddressFrameIndex(SelectionDAG &DAG); + SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const; virtual bool ExpandInlineAsm(CallInst *CI) const; @@ -560,7 +563,7 @@ namespace llvm { return !X86ScalarSSEf64 || VT == MVT::f80; } - virtual const X86Subtarget* getSubtarget() { + virtual const X86Subtarget* getSubtarget() const { return Subtarget; } @@ -577,11 +580,12 @@ namespace llvm { createFastISel(MachineFunction &mf, DenseMap<const Value *, unsigned> &, DenseMap<const BasicBlock *, MachineBasicBlock *> &, - DenseMap<const AllocaInst *, int> & + DenseMap<const AllocaInst *, int> &, + std::vector<std::pair<MachineInstr*, unsigned> > & #ifndef NDEBUG - , SmallSet<Instruction*, 8> & + , SmallSet<const Instruction *, 8> & #endif - ); + ) const; /// getFunctionAlignment - Return the Log2 alignment of this function. virtual unsigned getFunctionAlignment(const Function *F) const; @@ -616,17 +620,17 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, const SmallVectorImpl<ISD::InputArg> &ArgInfo, DebugLoc dl, SelectionDAG &DAG, const CCValAssign &VA, MachineFrameInfo *MFI, - unsigned i); + unsigned i) const; SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, DebugLoc dl, SelectionDAG &DAG, const CCValAssign &VA, - ISD::ArgFlagsTy Flags); + ISD::ArgFlagsTy Flags) const; // Call lowering helpers. @@ -641,114 +645,120 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG& DAG) const; - bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv); + bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv) const; SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall, bool Is64Bit, - int FPDiff, DebugLoc dl); + int FPDiff, DebugLoc dl) const; CCAssignFn *CCAssignFnForNode(CallingConv::ID CallConv) const; - unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG &DAG); + unsigned GetAlignedArgumentStackSize(unsigned StackSize, + SelectionDAG &DAG) const; std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, - bool isSigned); + bool isSigned) const; SDValue LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl, - SelectionDAG &DAG); - SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG); - SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG); - SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG); - SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG); - SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG); - SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG); - SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG); - SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG); - SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG); - SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG); + SelectionDAG &DAG) const; + SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl, int64_t Offset, SelectionDAG &DAG) const; - SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG); - SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG); - SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG); - SDValue LowerShift(SDValue Op, SelectionDAG &DAG); + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const; SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot, - SelectionDAG &DAG); - SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG); - SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG); - SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG); - SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG); - SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG); - SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG); - SDValue LowerFABS(SDValue Op, SelectionDAG &DAG); - SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG); - SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG); - SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG); - SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG); - SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG); - SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG); - SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG); - SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG); - SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG); - SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG); - SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG); - SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG); - SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG); - SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG); - SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG); - SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG); - SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG); - SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG); - SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG); - SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG); - SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG); - SDValue LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG); - SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG); - - SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG); - SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG); - SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG); + SelectionDAG &DAG) const; + SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerToBT(SDValue And, ISD::CondCode CC, + DebugLoc dl, SelectionDAG &DAG) const; + SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const; virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG); + DebugLoc dl, SelectionDAG &DAG) const; virtual bool CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<EVT> &OutTys, const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags, - SelectionDAG &DAG); + SelectionDAG &DAG) const; void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results, - SelectionDAG &DAG, unsigned NewOp); + SelectionDAG &DAG, unsigned NewOp) const; SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, - const Value *DstSV, uint64_t DstSVOff); + const Value *DstSV, + uint64_t DstSVOff) const; SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, - const Value *DstSV, uint64_t DstSVOff, - const Value *SrcSV, uint64_t SrcSVOff); + const Value *DstSV, + uint64_t DstSVOff, + const Value *SrcSV, + uint64_t SrcSVOff) const; /// Utility function to emit string processing sse4.2 instructions /// that return in xmm0. @@ -796,30 +806,29 @@ namespace llvm { MachineBasicBlock *BB) const; MachineBasicBlock *EmitLoweredSelect(MachineInstr *I, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; + MachineBasicBlock *BB) const; MachineBasicBlock *EmitLoweredMingwAlloca(MachineInstr *MI, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; + MachineBasicBlock *BB) const; /// Emit nodes that will be selected as "test Op0,Op0", or something /// equivalent, for use with the given x86 condition code. - SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG); + SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG) const; /// Emit nodes that will be selected as "cmp Op0,Op1", or something /// equivalent, for use with the given x86 condition code. SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, - SelectionDAG &DAG); + SelectionDAG &DAG) const; }; namespace X86 { FastISel *createFastISel(MachineFunction &mf, DenseMap<const Value *, unsigned> &, DenseMap<const BasicBlock *, MachineBasicBlock *> &, - DenseMap<const AllocaInst *, int> & + DenseMap<const AllocaInst *, int> &, + std::vector<std::pair<MachineInstr*, unsigned> > & #ifndef NDEBUG - , SmallSet<Instruction*, 8> & + , SmallSet<const Instruction*, 8> & #endif ); } diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index eef2ca0..f5c3dbf 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -2086,6 +2086,11 @@ def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), x86_subreg_8bit_hi))>, Requires<[In64BitMode]>; +def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)), + (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, + GR32_ABCD)), + x86_subreg_8bit_hi))>, + Requires<[In64BitMode]>; def : Pat<(srl GR16:$src, (i8 8)), (EXTRACT_SUBREG (MOVZX32_NOREXrr8 @@ -2156,21 +2161,6 @@ def : Pat<(sra GR64:$src1, (and CL, 63)), def : Pat<(store (sra (loadi64 addr:$dst), (and CL, 63)), addr:$dst), (SAR64mCL addr:$dst)>; -// Double shift patterns -def : Pat<(shrd GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm)), - (SHRD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>; - -def : Pat<(store (shrd (loadi64 addr:$dst), (i8 imm:$amt1), - GR64:$src2, (i8 imm)), addr:$dst), - (SHRD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>; - -def : Pat<(shld GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm)), - (SHLD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>; - -def : Pat<(store (shld (loadi64 addr:$dst), (i8 imm:$amt1), - GR64:$src2, (i8 imm)), addr:$dst), - (SHLD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>; - // (or x1, x2) -> (add x1, x2) if two operands are known not to share bits. let AddedComplexity = 5 in { // Try this before the selecting to OR def : Pat<(or_is_add GR64:$src1, i64immSExt8:$src2), @@ -2294,7 +2284,7 @@ def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert GR64:$src))]>; -def MOV64toSDrm : RPDI<0x6E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), +def MOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>; @@ -2347,15 +2337,3 @@ let isTwoAddress = 1 in { } defm PINSRQ : SS41I_insert64<0x22, "pinsrq">; - -// -disable-16bit support. -def : Pat<(truncstorei16 (i16 imm:$src), addr:$dst), - (MOV16mi addr:$dst, imm:$src)>; -def : Pat<(truncstorei16 GR64:$src, addr:$dst), - (MOV16mr addr:$dst, (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit))>; -def : Pat<(i64 (sextloadi16 addr:$dst)), - (MOVSX64rm16 addr:$dst)>; -def : Pat<(i64 (zextloadi16 addr:$dst)), - (MOVZX64rm16 addr:$dst)>; -def : Pat<(i64 (extloadi16 addr:$dst)), - (MOVZX64rm16 addr:$dst)>; diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h index c475b56..5a82a7b 100644 --- a/lib/Target/X86/X86InstrBuilder.h +++ b/lib/Target/X86/X86InstrBuilder.h @@ -49,7 +49,7 @@ struct X86AddressMode { unsigned Scale; unsigned IndexReg; int Disp; - GlobalValue *GV; + const GlobalValue *GV; unsigned GVOpFlags; X86AddressMode() diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index e6d1fee..0aae4a8 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -327,8 +327,8 @@ def TST_F : FPI<0xE4, RawFrm, (outs), (ins), "ftst">, D9; // Versions of FP instructions that take a single memory operand. Added for the // disassembler; remove as they are included with patterns elsewhere. -def FCOM32m : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom{l}\t$src">; -def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp{l}\t$src">; +def FCOM32m : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom{s}\t$src">; +def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp{s}\t$src">; def FLDENVm : FPI<0xD9, MRM4m, (outs), (ins f32mem:$src), "fldenv\t$src">; def FSTENVm : FPI<0xD9, MRM6m, (outs f32mem:$dst), (ins), "fnstenv\t$dst">; @@ -336,15 +336,15 @@ def FSTENVm : FPI<0xD9, MRM6m, (outs f32mem:$dst), (ins), "fnstenv\t$dst">; def FICOM32m : FPI<0xDA, MRM2m, (outs), (ins i32mem:$src), "ficom{l}\t$src">; def FICOMP32m: FPI<0xDA, MRM3m, (outs), (ins i32mem:$src), "ficomp{l}\t$src">; -def FCOM64m : FPI<0xDC, MRM2m, (outs), (ins f64mem:$src), "fcom{ll}\t$src">; -def FCOMP64m : FPI<0xDC, MRM3m, (outs), (ins f64mem:$src), "fcomp{ll}\t$src">; +def FCOM64m : FPI<0xDC, MRM2m, (outs), (ins f64mem:$src), "fcom{l}\t$src">; +def FCOMP64m : FPI<0xDC, MRM3m, (outs), (ins f64mem:$src), "fcomp{l}\t$src">; def FRSTORm : FPI<0xDD, MRM4m, (outs f32mem:$dst), (ins), "frstor\t$dst">; def FSAVEm : FPI<0xDD, MRM6m, (outs f32mem:$dst), (ins), "fnsave\t$dst">; def FNSTSWm : FPI<0xDD, MRM7m, (outs f32mem:$dst), (ins), "fnstsw\t$dst">; -def FICOM16m : FPI<0xDE, MRM2m, (outs), (ins i16mem:$src), "ficom{w}\t$src">; -def FICOMP16m: FPI<0xDE, MRM3m, (outs), (ins i16mem:$src), "ficomp{w}\t$src">; +def FICOM16m : FPI<0xDE, MRM2m, (outs), (ins i16mem:$src), "ficom{s}\t$src">; +def FICOMP16m: FPI<0xDE, MRM3m, (outs), (ins i16mem:$src), "ficomp{s}\t$src">; def FBLDm : FPI<0xDF, MRM4m, (outs), (ins f32mem:$src), "fbld\t$src">; def FBSTPm : FPI<0xDF, MRM6m, (outs f32mem:$dst), (ins), "fbstp\t$dst">; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index ccb7b05..a21bfb9 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -27,6 +27,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/MC/MCInst.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -1684,6 +1685,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, // Start from the bottom of the block and work up, examining the // terminator instructions. MachineBasicBlock::iterator I = MBB.end(); + MachineBasicBlock::iterator UnCondBrIter = MBB.end(); while (I != MBB.begin()) { --I; if (I->isDebugValue()) @@ -1701,6 +1703,8 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, // Handle unconditional branches. if (I->getOpcode() == X86::JMP_4) { + UnCondBrIter = I; + if (!AllowModify) { TBB = I->getOperand(0).getMBB(); continue; @@ -1718,10 +1722,11 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, TBB = 0; I->eraseFromParent(); I = MBB.end(); + UnCondBrIter = MBB.end(); continue; } - // TBB is used to indicate the unconditinal destination. + // TBB is used to indicate the unconditional destination. TBB = I->getOperand(0).getMBB(); continue; } @@ -1733,6 +1738,45 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, // Working from the bottom, handle the first conditional branch. if (Cond.empty()) { + MachineBasicBlock *TargetBB = I->getOperand(0).getMBB(); + if (AllowModify && UnCondBrIter != MBB.end() && + MBB.isLayoutSuccessor(TargetBB)) { + // If we can modify the code and it ends in something like: + // + // jCC L1 + // jmp L2 + // L1: + // ... + // L2: + // + // Then we can change this to: + // + // jnCC L2 + // L1: + // ... + // L2: + // + // Which is a bit more efficient. + // We conditionally jump to the fall-through block. + BranchCode = GetOppositeBranchCondition(BranchCode); + unsigned JNCC = GetCondBranchFromCond(BranchCode); + MachineBasicBlock::iterator OldInst = I; + + BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(JNCC)) + .addMBB(UnCondBrIter->getOperand(0).getMBB()); + BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JMP_4)) + .addMBB(TargetBB); + MBB.addSuccessor(TargetBB); + + OldInst->eraseFromParent(); + UnCondBrIter->eraseFromParent(); + + // Restart the analysis. + UnCondBrIter = MBB.end(); + I = MBB.end(); + continue; + } + FBB = TBB; TBB = I->getOperand(0).getMBB(); Cond.push_back(MachineOperand::CreateImm(BranchCode)); @@ -2276,6 +2320,19 @@ bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, return true; } +MachineInstr* +X86InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, + int FrameIx, uint64_t Offset, + const MDNode *MDPtr, + DebugLoc DL) const { + X86AddressMode AM; + AM.BaseType = X86AddressMode::FrameIndexBase; + AM.Base.FrameIndex = FrameIx; + MachineInstrBuilder MIB = BuildMI(MF, DL, get(X86::DBG_VALUE)); + addFullAddress(MIB, AM).addImm(Offset).addMetadata(MDPtr); + return &*MIB; +} + static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, const SmallVectorImpl<MachineOperand> &MOs, MachineInstr *MI, @@ -2586,7 +2643,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, Ty = Type::getDoubleTy(MF.getFunction()->getContext()); else Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4); - Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ? + const Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ? Constant::getAllOnesValue(Ty) : Constant::getNullValue(Ty); unsigned CPI = MCP.getConstantPoolIndex(C, Alignment); @@ -3406,6 +3463,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, } case TargetOpcode::DBG_LABEL: case TargetOpcode::EH_LABEL: + case TargetOpcode::DBG_VALUE: break; case TargetOpcode::IMPLICIT_DEF: case TargetOpcode::KILL: @@ -3603,7 +3661,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, std::string msg; raw_string_ostream Msg(msg); Msg << "Cannot determine size: " << MI; - llvm_report_error(Msg.str()); + report_fatal_error(Msg.str()); } @@ -3709,3 +3767,9 @@ void X86InstrInfo::SetSSEDomain(MachineInstr *MI, unsigned Domain) const { assert(table && "Cannot change domain"); MI->setDesc(get(table[Domain-1])); } + +/// getNoopForMachoTarget - Return the noop instruction to use for a noop. +void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { + NopInst.setOpcode(X86::NOOP); +} + diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index f0bdd06..df99c7f 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -623,6 +623,12 @@ public: MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI) const; + virtual + MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, + int FrameIx, uint64_t Offset, + const MDNode *MDPtr, + DebugLoc DL) const; + /// foldMemoryOperand - If this target supports it, fold a load or store of /// the specified stack slot into the specified machine instruction for the /// specified operand(s). If this is possible, the target should perform the @@ -687,6 +693,8 @@ public: int64_t Offset1, int64_t Offset2, unsigned NumLoads) const; + virtual void getNoopForMachoTarget(MCInst &NopInst) const; + virtual bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 940b439..a2754ea 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -487,34 +487,6 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ return (~KnownZero0 & ~KnownZero1) == 0; }]>; -// 'shld' and 'shrd' instruction patterns. Note that even though these have -// the srl and shl in their patterns, the C++ code must still check for them, -// because predicates are tested before children nodes are explored. - -def shrd : PatFrag<(ops node:$src1, node:$amt1, node:$src2, node:$amt2), - (or (srl node:$src1, node:$amt1), - (shl node:$src2, node:$amt2)), [{ - assert(N->getOpcode() == ISD::OR); - return N->getOperand(0).getOpcode() == ISD::SRL && - N->getOperand(1).getOpcode() == ISD::SHL && - isa<ConstantSDNode>(N->getOperand(0).getOperand(1)) && - isa<ConstantSDNode>(N->getOperand(1).getOperand(1)) && - N->getOperand(0).getConstantOperandVal(1) == - N->getValueSizeInBits(0) - N->getOperand(1).getConstantOperandVal(1); -}]>; - -def shld : PatFrag<(ops node:$src1, node:$amt1, node:$src2, node:$amt2), - (or (shl node:$src1, node:$amt1), - (srl node:$src2, node:$amt2)), [{ - assert(N->getOpcode() == ISD::OR); - return N->getOperand(0).getOpcode() == ISD::SHL && - N->getOperand(1).getOpcode() == ISD::SRL && - isa<ConstantSDNode>(N->getOperand(0).getOperand(1)) && - isa<ConstantSDNode>(N->getOperand(1).getOperand(1)) && - N->getOperand(0).getConstantOperandVal(1) == - N->getValueSizeInBits(0) - N->getOperand(1).getConstantOperandVal(1); -}]>; - //===----------------------------------------------------------------------===// // Instruction list... // @@ -781,11 +753,11 @@ def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[]>; } let Defs = [ESP], Uses = [ESP], neverHasSideEffects = 1, mayStore = 1 in { -def PUSH32i8 : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm), - "push{l}\t$imm", []>; -def PUSH32i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), +def PUSHi8 : Ii8<0x6a, RawFrm, (outs), (ins i32i8imm:$imm), "push{l}\t$imm", []>; -def PUSH32i32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm), +def PUSHi16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), + "push{w}\t$imm", []>, OpSize; +def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm), "push{l}\t$imm", []>; } @@ -809,10 +781,11 @@ let isTwoAddress = 1 in // GR32 = bswap GR32 let Defs = [EFLAGS] in { def BSF16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "bsf{w}\t{$src, $dst|$dst, $src}", - [(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))]>, TB; + [(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))]>, TB, OpSize; def BSF16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "bsf{w}\t{$src, $dst|$dst, $src}", - [(set GR16:$dst, EFLAGS, (X86bsf (loadi16 addr:$src)))]>, TB; + [(set GR16:$dst, EFLAGS, (X86bsf (loadi16 addr:$src)))]>, TB, + OpSize; def BSF32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "bsf{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))]>, TB; @@ -822,10 +795,11 @@ def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), def BSR16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "bsr{w}\t{$src, $dst|$dst, $src}", - [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))]>, TB; + [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))]>, TB, OpSize; def BSR16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "bsr{w}\t{$src, $dst|$dst, $src}", - [(set GR16:$dst, EFLAGS, (X86bsr (loadi16 addr:$src)))]>, TB; + [(set GR16:$dst, EFLAGS, (X86bsr (loadi16 addr:$src)))]>, TB, + OpSize; def BSR32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "bsr{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))]>, TB; @@ -4476,7 +4450,11 @@ def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>; // avoid partial-register updates. def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>; def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>; -def : Pat<(i32 (anyext GR16:$src)), (MOVZX32rr16 GR16:$src)>; + +// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32. +def : Pat<(i32 (anyext GR16:$src)), + (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>; + //===----------------------------------------------------------------------===// // Some peepholes @@ -4537,11 +4515,11 @@ def : Pat<(i8 (trunc GR16:$src)), // h-register tricks def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))), - (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), + (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), x86_subreg_8bit_hi)>, Requires<[In32BitMode]>; def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))), - (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), + (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), x86_subreg_8bit_hi)>, Requires<[In32BitMode]>; def : Pat<(srl GR16:$src, (i8 8)), @@ -4566,6 +4544,11 @@ def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), GR32_ABCD)), x86_subreg_8bit_hi))>, Requires<[In32BitMode]>; +def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)), + (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, + GR32_ABCD)), + x86_subreg_8bit_hi))>, + Requires<[In32BitMode]>; // (shl x, 1) ==> (add x, x) def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>; @@ -4612,111 +4595,13 @@ def : Pat<(store (sra (loadi16 addr:$dst), (and CL, 31)), addr:$dst), def : Pat<(store (sra (loadi32 addr:$dst), (and CL, 31)), addr:$dst), (SAR32mCL addr:$dst)>; -// (or (x >> c) | (y << (32 - c))) ==> (shrd32 x, y, c) -def : Pat<(or (srl GR32:$src1, CL:$amt), - (shl GR32:$src2, (sub 32, CL:$amt))), - (SHRD32rrCL GR32:$src1, GR32:$src2)>; - -def : Pat<(store (or (srl (loadi32 addr:$dst), CL:$amt), - (shl GR32:$src2, (sub 32, CL:$amt))), addr:$dst), - (SHRD32mrCL addr:$dst, GR32:$src2)>; - -def : Pat<(or (srl GR32:$src1, (i8 (trunc ECX:$amt))), - (shl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))), - (SHRD32rrCL GR32:$src1, GR32:$src2)>; - -def : Pat<(store (or (srl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))), - (shl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))), - addr:$dst), - (SHRD32mrCL addr:$dst, GR32:$src2)>; - -def : Pat<(shrd GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm/*:$amt2*/)), - (SHRD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>; - -def : Pat<(store (shrd (loadi32 addr:$dst), (i8 imm:$amt1), - GR32:$src2, (i8 imm/*:$amt2*/)), addr:$dst), - (SHRD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>; - -// (or (x << c) | (y >> (32 - c))) ==> (shld32 x, y, c) -def : Pat<(or (shl GR32:$src1, CL:$amt), - (srl GR32:$src2, (sub 32, CL:$amt))), - (SHLD32rrCL GR32:$src1, GR32:$src2)>; - -def : Pat<(store (or (shl (loadi32 addr:$dst), CL:$amt), - (srl GR32:$src2, (sub 32, CL:$amt))), addr:$dst), - (SHLD32mrCL addr:$dst, GR32:$src2)>; - -def : Pat<(or (shl GR32:$src1, (i8 (trunc ECX:$amt))), - (srl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))), - (SHLD32rrCL GR32:$src1, GR32:$src2)>; - -def : Pat<(store (or (shl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))), - (srl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))), - addr:$dst), - (SHLD32mrCL addr:$dst, GR32:$src2)>; - -def : Pat<(shld GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm/*:$amt2*/)), - (SHLD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>; - -def : Pat<(store (shld (loadi32 addr:$dst), (i8 imm:$amt1), - GR32:$src2, (i8 imm/*:$amt2*/)), addr:$dst), - (SHLD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>; - -// (or (x >> c) | (y << (16 - c))) ==> (shrd16 x, y, c) -def : Pat<(or (srl GR16:$src1, CL:$amt), - (shl GR16:$src2, (sub 16, CL:$amt))), - (SHRD16rrCL GR16:$src1, GR16:$src2)>; - -def : Pat<(store (or (srl (loadi16 addr:$dst), CL:$amt), - (shl GR16:$src2, (sub 16, CL:$amt))), addr:$dst), - (SHRD16mrCL addr:$dst, GR16:$src2)>; - -def : Pat<(or (srl GR16:$src1, (i8 (trunc CX:$amt))), - (shl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))), - (SHRD16rrCL GR16:$src1, GR16:$src2)>; - -def : Pat<(store (or (srl (loadi16 addr:$dst), (i8 (trunc CX:$amt))), - (shl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))), - addr:$dst), - (SHRD16mrCL addr:$dst, GR16:$src2)>; - -def : Pat<(shrd GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm/*:$amt2*/)), - (SHRD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>; - -def : Pat<(store (shrd (loadi16 addr:$dst), (i8 imm:$amt1), - GR16:$src2, (i8 imm/*:$amt2*/)), addr:$dst), - (SHRD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>; - -// (or (x << c) | (y >> (16 - c))) ==> (shld16 x, y, c) -def : Pat<(or (shl GR16:$src1, CL:$amt), - (srl GR16:$src2, (sub 16, CL:$amt))), - (SHLD16rrCL GR16:$src1, GR16:$src2)>; - -def : Pat<(store (or (shl (loadi16 addr:$dst), CL:$amt), - (srl GR16:$src2, (sub 16, CL:$amt))), addr:$dst), - (SHLD16mrCL addr:$dst, GR16:$src2)>; - -def : Pat<(or (shl GR16:$src1, (i8 (trunc CX:$amt))), - (srl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))), - (SHLD16rrCL GR16:$src1, GR16:$src2)>; - -def : Pat<(store (or (shl (loadi16 addr:$dst), (i8 (trunc CX:$amt))), - (srl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))), - addr:$dst), - (SHLD16mrCL addr:$dst, GR16:$src2)>; - -def : Pat<(shld GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm/*:$amt2*/)), - (SHLD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>; - -def : Pat<(store (shld (loadi16 addr:$dst), (i8 imm:$amt1), - GR16:$src2, (i8 imm/*:$amt2*/)), addr:$dst), - (SHLD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>; - // (anyext (setcc_carry)) -> (setcc_carry) def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C16r)>; def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C32r)>; +def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))), + (SETB_C32r)>; // (or x1, x2) -> (add x1, x2) if two operands are known not to share bits. let AddedComplexity = 5 in { // Try this before the selecting to OR @@ -4907,18 +4792,6 @@ def : Pat<(and GR16:$src1, i16immSExt8:$src2), def : Pat<(and GR32:$src1, i32immSExt8:$src2), (AND32ri8 GR32:$src1, i32immSExt8:$src2)>; -// -disable-16bit support. -def : Pat<(truncstorei16 (i16 imm:$src), addr:$dst), - (MOV16mi addr:$dst, imm:$src)>; -def : Pat<(truncstorei16 GR32:$src, addr:$dst), - (MOV16mr addr:$dst, (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit))>; -def : Pat<(i32 (sextloadi16 addr:$dst)), - (MOVSX32rm16 addr:$dst)>; -def : Pat<(i32 (zextloadi16 addr:$dst)), - (MOVZX32rm16 addr:$dst)>; -def : Pat<(i32 (extloadi16 addr:$dst)), - (MOVZX32rm16 addr:$dst)>; - //===----------------------------------------------------------------------===// // Floating Point Stack Support //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 1c81c5e..744af50 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -117,7 +117,7 @@ def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src), "movd\t{$src, $dst|$dst, $src}", []>; def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs), (ins GR32:$dst, VR64:$src), "movd\t{$src, $dst|$dst, $src}", []>; -def MMX_MOVQ64gmr : MMXRI<0x7E, MRMDestMem, (outs), +def MMX_MOVQ64gmr : MMXRI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), "movq\t{$src, $dst|$dst, $src}", []>; @@ -167,6 +167,9 @@ let neverHasSideEffects = 1 in def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst), (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", []>; +def MMX_MOVFR642Qrr: SSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins FR64:$src), + "movdq2q\t{$src, $dst|$dst, $src}", []>; + def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), "movntq\t{$src, $dst|$dst, $src}", [(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)]>; @@ -569,6 +572,14 @@ def : Pat<(f64 (bitconvert (v4i16 VR64:$src))), (MMX_MOVQ2FR64rr VR64:$src)>; def : Pat<(f64 (bitconvert (v8i8 VR64:$src))), (MMX_MOVQ2FR64rr VR64:$src)>; +def : Pat<(v1i64 (bitconvert (f64 FR64:$src))), + (MMX_MOVFR642Qrr FR64:$src)>; +def : Pat<(v2i32 (bitconvert (f64 FR64:$src))), + (MMX_MOVFR642Qrr FR64:$src)>; +def : Pat<(v4i16 (bitconvert (f64 FR64:$src))), + (MMX_MOVFR642Qrr FR64:$src)>; +def : Pat<(v8i8 (bitconvert (f64 FR64:$src))), + (MMX_MOVFR642Qrr FR64:$src)>; let AddedComplexity = 20 in { def : Pat<(v2i32 (X86vzmovl (bc_v2i32 (load_mmx addr:$src)))), diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 11f7e27..2129580 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2878,6 +2878,7 @@ let Constraints = "$src1 = $dst" in { } } +let ImmT = NoImm in { // None of these have i8 immediate fields. defm PHADDW : SS3I_binop_rm_int_16<0x01, "phaddw", int_x86_ssse3_phadd_w, int_x86_ssse3_phadd_w_128>; @@ -2902,6 +2903,7 @@ defm PMADDUBSW : SS3I_binop_rm_int_8 <0x04, "pmaddubsw", defm PMULHRSW : SS3I_binop_rm_int_16<0x0B, "pmulhrsw", int_x86_ssse3_pmul_hr_sw, int_x86_ssse3_pmul_hr_sw_128, 1>; + defm PSHUFB : SS3I_binop_rm_int_8 <0x00, "pshufb", int_x86_ssse3_pshuf_b, int_x86_ssse3_pshuf_b_128>; @@ -2914,7 +2916,9 @@ defm PSIGNW : SS3I_binop_rm_int_16<0x09, "psignw", defm PSIGND : SS3I_binop_rm_int_32<0x0A, "psignd", int_x86_ssse3_psign_d, int_x86_ssse3_psign_d_128>; +} +// palignr patterns. let Constraints = "$src1 = $dst" in { def PALIGNR64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2, i8imm:$src3), @@ -2935,26 +2939,29 @@ let Constraints = "$src1 = $dst" in { []>, OpSize; } -// palignr patterns. -def : Pat<(int_x86_ssse3_palign_r VR64:$src1, VR64:$src2, (i8 imm:$src3)), - (PALIGNR64rr VR64:$src1, VR64:$src2, (BYTE_imm imm:$src3))>, +let AddedComplexity = 5 in { + +def : Pat<(v1i64 (palign:$src3 VR64:$src1, VR64:$src2)), + (PALIGNR64rr VR64:$src2, VR64:$src1, + (SHUFFLE_get_palign_imm VR64:$src3))>, Requires<[HasSSSE3]>; -def : Pat<(int_x86_ssse3_palign_r VR64:$src1, - (memop64 addr:$src2), - (i8 imm:$src3)), - (PALIGNR64rm VR64:$src1, addr:$src2, (BYTE_imm imm:$src3))>, +def : Pat<(v2i32 (palign:$src3 VR64:$src1, VR64:$src2)), + (PALIGNR64rr VR64:$src2, VR64:$src1, + (SHUFFLE_get_palign_imm VR64:$src3))>, Requires<[HasSSSE3]>; - -def : Pat<(int_x86_ssse3_palign_r_128 VR128:$src1, VR128:$src2, (i8 imm:$src3)), - (PALIGNR128rr VR128:$src1, VR128:$src2, (BYTE_imm imm:$src3))>, +def : Pat<(v2f32 (palign:$src3 VR64:$src1, VR64:$src2)), + (PALIGNR64rr VR64:$src2, VR64:$src1, + (SHUFFLE_get_palign_imm VR64:$src3))>, Requires<[HasSSSE3]>; -def : Pat<(int_x86_ssse3_palign_r_128 VR128:$src1, - (memopv2i64 addr:$src2), - (i8 imm:$src3)), - (PALIGNR128rm VR128:$src1, addr:$src2, (BYTE_imm imm:$src3))>, +def : Pat<(v4i16 (palign:$src3 VR64:$src1, VR64:$src2)), + (PALIGNR64rr VR64:$src2, VR64:$src1, + (SHUFFLE_get_palign_imm VR64:$src3))>, + Requires<[HasSSSE3]>; +def : Pat<(v8i8 (palign:$src3 VR64:$src1, VR64:$src2)), + (PALIGNR64rr VR64:$src2, VR64:$src1, + (SHUFFLE_get_palign_imm VR64:$src3))>, Requires<[HasSSSE3]>; -let AddedComplexity = 5 in { def : Pat<(v4i32 (palign:$src3 VR128:$src1, VR128:$src2)), (PALIGNR128rr VR128:$src2, VR128:$src1, (SHUFFLE_get_palign_imm VR128:$src3))>, @@ -3510,7 +3517,7 @@ defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd, 1>; defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", - int_x86_sse41_mpsadbw, 1>; + int_x86_sse41_mpsadbw, 0>; /// SS41I_ternary_int - SSE 4.1 ternary operator diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/X86MCAsmInfo.cpp index d257ee3..2b8720b 100644 --- a/lib/Target/X86/X86MCAsmInfo.cpp +++ b/lib/Target/X86/X86MCAsmInfo.cpp @@ -14,6 +14,7 @@ #include "X86MCAsmInfo.h" #include "X86TargetMachine.h" #include "llvm/ADT/Triple.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/Support/CommandLine.h" using namespace llvm; @@ -95,9 +96,10 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) { Data64bitsDirective = 0; } -MCSection *X86ELFMCAsmInfo::getNonexecutableStackSection(MCContext &Ctx) const { - return MCSectionELF::Create(".note.GNU-stack", MCSectionELF::SHT_PROGBITS, - 0, SectionKind::getMetadata(), false, Ctx); +const MCSection *X86ELFMCAsmInfo:: +getNonexecutableStackSection(MCContext &Ctx) const { + return Ctx.getELFSection(".note.GNU-stack", MCSectionELF::SHT_PROGBITS, + 0, SectionKind::getMetadata(), false); } X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) { diff --git a/lib/Target/X86/X86MCAsmInfo.h b/lib/Target/X86/X86MCAsmInfo.h index 69716bf..5815225 100644 --- a/lib/Target/X86/X86MCAsmInfo.h +++ b/lib/Target/X86/X86MCAsmInfo.h @@ -27,7 +27,7 @@ namespace llvm { struct X86ELFMCAsmInfo : public MCAsmInfo { explicit X86ELFMCAsmInfo(const Triple &Triple); - virtual MCSection *getNonexecutableStackSection(MCContext &Ctx) const; + virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const; }; struct X86MCAsmInfoCOFF : public MCAsmInfoCOFF { diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h index 4b2529b..06043ec 100644 --- a/lib/Target/X86/X86MachineFunctionInfo.h +++ b/lib/Target/X86/X86MachineFunctionInfo.h @@ -31,7 +31,8 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { /// stack frame in bytes. unsigned CalleeSavedFrameSize; - /// BytesToPopOnReturn - Number of bytes function pops on return. + /// BytesToPopOnReturn - Number of bytes function pops on return (in addition + /// to the space used by the return address). /// Used on windows platform for stdcall & fastcall name decoration unsigned BytesToPopOnReturn; @@ -52,6 +53,19 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { /// relocation models. unsigned GlobalBaseReg; + /// ReserveFP - whether the function should reserve the frame pointer + /// when allocating, even if there may not actually be a frame pointer used. + bool ReserveFP; + + /// VarArgsFrameIndex - FrameIndex for start of varargs area. + int VarArgsFrameIndex; + /// RegSaveFrameIndex - X86-64 vararg func register save area. + int RegSaveFrameIndex; + /// VarArgsGPOffset - X86-64 vararg func int reg offset. + unsigned VarArgsGPOffset; + /// VarArgsFPOffset - X86-64 vararg func fp reg offset. + unsigned VarArgsFPOffset; + public: X86MachineFunctionInfo() : ForceFramePointer(false), CalleeSavedFrameSize(0), @@ -59,7 +73,11 @@ public: ReturnAddrIndex(0), TailCallReturnAddrDelta(0), SRetReturnReg(0), - GlobalBaseReg(0) {} + GlobalBaseReg(0), + VarArgsFrameIndex(0), + RegSaveFrameIndex(0), + VarArgsGPOffset(0), + VarArgsFPOffset(0) {} explicit X86MachineFunctionInfo(MachineFunction &MF) : ForceFramePointer(false), @@ -68,7 +86,12 @@ public: ReturnAddrIndex(0), TailCallReturnAddrDelta(0), SRetReturnReg(0), - GlobalBaseReg(0) {} + GlobalBaseReg(0), + ReserveFP(false), + VarArgsFrameIndex(0), + RegSaveFrameIndex(0), + VarArgsGPOffset(0), + VarArgsFPOffset(0) {} bool getForceFramePointer() const { return ForceFramePointer;} void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; } @@ -90,6 +113,21 @@ public: unsigned getGlobalBaseReg() const { return GlobalBaseReg; } void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; } + + bool getReserveFP() const { return ReserveFP; } + void setReserveFP(bool reserveFP) { ReserveFP = reserveFP; } + + int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } + void setVarArgsFrameIndex(int Idx) { VarArgsFrameIndex = Idx; } + + int getRegSaveFrameIndex() const { return RegSaveFrameIndex; } + void setRegSaveFrameIndex(int Idx) { RegSaveFrameIndex = Idx; } + + unsigned getVarArgsGPOffset() const { return VarArgsGPOffset; } + void setVarArgsGPOffset(unsigned Offset) { VarArgsGPOffset = Offset; } + + unsigned getVarArgsFPOffset() const { return VarArgsFPOffset; } + void setVarArgsFPOffset(unsigned Offset) { VarArgsFPOffset = Offset; } }; } // End llvm namespace diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 32f28a5..a3e04b0 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -439,7 +439,7 @@ bool X86RegisterInfo::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const MachineModuleInfo &MMI = MF.getMMI(); - return (NoFramePointerElim || + return (DisableFramePointerElim(MF) || needsStackRealignment(MF) || MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() || @@ -464,7 +464,7 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { // variable-sized allocas. // FIXME: Temporary disable the error - it seems to be too conservative. if (0 && requiresRealignment && MFI->hasVarSizedObjects()) - llvm_report_error( + report_fatal_error( "Stack realignment in presense of dynamic allocas is not supported"); return (requiresRealignment && !MFI->hasVarSizedObjects()); @@ -608,8 +608,12 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int FrameIndex = MI.getOperand(i).getIndex(); unsigned BasePtr; + unsigned Opc = MI.getOpcode(); + bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm; if (needsStackRealignment(MF)) BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr); + else if (AfterFPPop) + BasePtr = StackPtr; else BasePtr = (hasFP(MF) ? FramePtr : StackPtr); @@ -618,16 +622,22 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(i).ChangeToRegister(BasePtr, false); // Now add the frame object offset to the offset from EBP. + int FIOffset; + if (AfterFPPop) { + // Tail call jmp happens after FP is popped. + const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo(); + const MachineFrameInfo *MFI = MF.getFrameInfo(); + FIOffset = MFI->getObjectOffset(FrameIndex) - TFI.getOffsetOfLocalArea(); + } else + FIOffset = getFrameIndexOffset(MF, FrameIndex); + if (MI.getOperand(i+3).isImm()) { // Offset is a 32-bit integer. - int Offset = getFrameIndexOffset(MF, FrameIndex) + - (int)(MI.getOperand(i + 3).getImm()); - + int Offset = FIOffset + (int)(MI.getOperand(i + 3).getImm()); MI.getOperand(i + 3).ChangeToImmediate(Offset); } else { // Offset is symbolic. This is extremely rare. - uint64_t Offset = getFrameIndexOffset(MF, FrameIndex) + - (uint64_t)MI.getOperand(i+3).getOffset(); + uint64_t Offset = FIOffset + (uint64_t)MI.getOperand(i+3).getOffset(); MI.getOperand(i+3).setOffset(Offset); } return 0; @@ -1487,3 +1497,46 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) { } #include "X86GenRegisterInfo.inc" + +namespace { + struct MSAH : public MachineFunctionPass { + static char ID; + MSAH() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF) { + const X86TargetMachine *TM = + static_cast<const X86TargetMachine *>(&MF.getTarget()); + const X86RegisterInfo *X86RI = TM->getRegisterInfo(); + MachineRegisterInfo &RI = MF.getRegInfo(); + X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); + unsigned StackAlignment = X86RI->getStackAlignment(); + + // Be over-conservative: scan over all vreg defs and find whether vector + // registers are used. If yes, there is a possibility that vector register + // will be spilled and thus require dynamic stack realignment. + for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister; + RegNum < RI.getLastVirtReg(); ++RegNum) + if (RI.getRegClass(RegNum)->getAlignment() > StackAlignment) { + FuncInfo->setReserveFP(true); + return true; + } + + // Nothing to do + return false; + } + + virtual const char *getPassName() const { + return "X86 Maximal Stack Alignment Check"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + }; + + char MSAH::ID = 0; +} + +FunctionPass* +llvm::createX86MaxStackAlignmentHeuristicPass() { return new MSAH(); } diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 76b8f7a..49a6ca0 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -352,11 +352,12 @@ def GR8 : RegisterClass<"X86", [i8], 8, const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); + const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); // Does the function dedicate RBP / EBP to being a frame ptr? if (!Subtarget.is64Bit()) // In 32-mode, none of the 8-bit registers aliases EBP or ESP. return begin() + 8; - else if (RI->hasFP(MF)) + else if (RI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate SPL or BPL. return array_endof(X86_GR8_AO_64) - 1; else @@ -396,9 +397,10 @@ def GR16 : RegisterClass<"X86", [i16], 16, const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); + const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); if (Subtarget.is64Bit()) { // Does the function dedicate RBP to being a frame ptr? - if (RI->hasFP(MF)) + if (RI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate SP or BP. return array_endof(X86_GR16_AO_64) - 1; else @@ -406,7 +408,7 @@ def GR16 : RegisterClass<"X86", [i16], 16, return array_endof(X86_GR16_AO_64); } else { // Does the function dedicate EBP to being a frame ptr? - if (RI->hasFP(MF)) + if (RI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate SP or BP. return begin() + 6; else @@ -447,9 +449,10 @@ def GR32 : RegisterClass<"X86", [i32], 32, const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); + const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); if (Subtarget.is64Bit()) { // Does the function dedicate RBP to being a frame ptr? - if (RI->hasFP(MF)) + if (RI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate ESP or EBP. return array_endof(X86_GR32_AO_64) - 1; else @@ -457,7 +460,7 @@ def GR32 : RegisterClass<"X86", [i32], 32, return array_endof(X86_GR32_AO_64); } else { // Does the function dedicate EBP to being a frame ptr? - if (RI->hasFP(MF)) + if (RI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate ESP or EBP. return begin() + 6; else @@ -484,9 +487,11 @@ def GR64 : RegisterClass<"X86", [i64], 64, const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); + const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); if (!Subtarget.is64Bit()) return begin(); // None of these are allocatable in 32-bit. - if (RI->hasFP(MF)) // Does the function dedicate RBP to being a frame ptr? + // Does the function dedicate RBP to being a frame ptr? + if (RI->hasFP(MF) || MFI->getReserveFP()) return end()-3; // If so, don't allocate RIP, RSP or RBP else return end()-2; // If not, just don't allocate RIP or RSP @@ -589,8 +594,9 @@ def GR16_NOREX : RegisterClass<"X86", [i16], 16, GR16_NOREXClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); // Does the function dedicate RBP / EBP to being a frame ptr? - if (RI->hasFP(MF)) + if (RI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate SP or BP. return end() - 2; else @@ -611,8 +617,9 @@ def GR32_NOREX : RegisterClass<"X86", [i32], 32, GR32_NOREXClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); // Does the function dedicate RBP / EBP to being a frame ptr? - if (RI->hasFP(MF)) + if (RI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate ESP or EBP. return end() - 2; else @@ -633,8 +640,9 @@ def GR64_NOREX : RegisterClass<"X86", [i64], 64, GR64_NOREXClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); // Does the function dedicate RBP to being a frame ptr? - if (RI->hasFP(MF)) + if (RI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate RIP, RSP or RBP. return end() - 3; else @@ -675,9 +683,10 @@ def GR32_NOSP : RegisterClass<"X86", [i32], 32, const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); + const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); if (Subtarget.is64Bit()) { // Does the function dedicate RBP to being a frame ptr? - if (RI->hasFP(MF)) + if (RI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate EBP. return array_endof(X86_GR32_NOSP_AO_64) - 1; else @@ -685,7 +694,7 @@ def GR32_NOSP : RegisterClass<"X86", [i32], 32, return array_endof(X86_GR32_NOSP_AO_64); } else { // Does the function dedicate EBP to being a frame ptr? - if (RI->hasFP(MF)) + if (RI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate EBP. return begin() + 6; else @@ -710,9 +719,11 @@ def GR64_NOSP : RegisterClass<"X86", [i64], 64, const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); + const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); if (!Subtarget.is64Bit()) return begin(); // None of these are allocatable in 32-bit. - if (RI->hasFP(MF)) // Does the function dedicate RBP to being a frame ptr? + // Does the function dedicate RBP to being a frame ptr? + if (RI->hasFP(MF) || MFI->getReserveFP()) return end()-1; // If so, don't allocate RBP else return end(); // If not, any reg in this class is ok. @@ -733,8 +744,9 @@ def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64, { const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); // Does the function dedicate RBP to being a frame ptr? - if (RI->hasFP(MF)) + if (RI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate RBP. return end() - 1; else diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp new file mode 100644 index 0000000..cd87b82 --- /dev/null +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -0,0 +1,22 @@ +//===-- X86SelectionDAGInfo.cpp - X86 SelectionDAG Info -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the X86SelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "x86-selectiondag-info" +#include "X86SelectionDAGInfo.h" +using namespace llvm; + +X86SelectionDAGInfo::X86SelectionDAGInfo() { +} + +X86SelectionDAGInfo::~X86SelectionDAGInfo() { +} diff --git a/lib/Target/X86/X86SelectionDAGInfo.h b/lib/Target/X86/X86SelectionDAGInfo.h new file mode 100644 index 0000000..9834754 --- /dev/null +++ b/lib/Target/X86/X86SelectionDAGInfo.h @@ -0,0 +1,29 @@ +//===-- X86SelectionDAGInfo.h - X86 SelectionDAG Info -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the X86 subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef X86SELECTIONDAGINFO_H +#define X86SELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class X86SelectionDAGInfo : public TargetSelectionDAGInfo { +public: + X86SelectionDAGInfo(); + ~X86SelectionDAGInfo(); +}; + +} + +#endif diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 8a873f0..646af91 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -85,8 +85,7 @@ protected: bool IsUAMemFast; /// HasVectorUAMem - True if SIMD operations can have unaligned memory - /// operands. This may require setting a feature bit in the - /// processor. + /// operands. This may require setting a feature bit in the processor. bool HasVectorUAMem; /// DarwinVers - Nonzero if this is a darwin platform: the numeric diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index c608e56..f39904e 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -17,12 +17,17 @@ #include "llvm/PassManager.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/CommandLine.h" + using namespace llvm; +static cl::opt<bool> DisableSSEDomain("disable-sse-domain", + cl::init(false), cl::Hidden, + cl::desc("Disable SSE Domain Fixing")); + static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); switch (TheTriple.getOS()) { @@ -161,6 +166,7 @@ bool X86TargetMachine::addInstSelector(PassManagerBase &PM, bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { + PM.add(createX86MaxStackAlignmentHeuristicPass()); return false; // -print-machineinstr shouldn't print after this. } @@ -172,7 +178,8 @@ bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM, bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { - if (OptLevel != CodeGenOpt::None && Subtarget.hasSSE2()) { + if (OptLevel != CodeGenOpt::None && Subtarget.hasSSE2() && + !DisableSSEDomain) { PM.add(createSSEDomainFixPass()); return true; } diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index ae7b5b2..dc4234c 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -51,8 +51,8 @@ public: virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; } virtual X86JITInfo *getJITInfo() { return &JITInfo; } virtual const X86Subtarget *getSubtargetImpl() const{ return &Subtarget; } - virtual X86TargetLowering *getTargetLowering() const { - return const_cast<X86TargetLowering*>(&TLInfo); + virtual const X86TargetLowering *getTargetLowering() const { + return &TLInfo; } virtual const X86RegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); diff --git a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp index 5801b40..c100c59 100644 --- a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp +++ b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp @@ -123,7 +123,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { switch (GV->getLinkage()) { case GlobalValue::AppendingLinkage: - llvm_report_error("AppendingLinkage is not supported by this target!"); + report_fatal_error("AppendingLinkage is not supported by this target!"); case GlobalValue::LinkOnceAnyLinkage: case GlobalValue::LinkOnceODRLinkage: case GlobalValue::WeakAnyLinkage: @@ -148,7 +148,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { llvm_unreachable("Unknown linkage type!"); } - EmitAlignment(Align, GV, 2); + EmitAlignment(Align > 2 ? Align : 2, GV); unsigned Size = TD->getTypeAllocSize(C->getType()); if (GV->isThreadLocal()) { diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt index 0965323..1b8e7ed 100644 --- a/lib/Target/XCore/CMakeLists.txt +++ b/lib/Target/XCore/CMakeLists.txt @@ -11,7 +11,6 @@ tablegen(XCoreGenCallingConv.inc -gen-callingconv) tablegen(XCoreGenSubtarget.inc -gen-subtarget) add_llvm_target(XCore - MCSectionXCore.cpp XCoreFrameInfo.cpp XCoreInstrInfo.cpp XCoreISelDAGToDAG.cpp @@ -21,4 +20,5 @@ add_llvm_target(XCore XCoreSubtarget.cpp XCoreTargetMachine.cpp XCoreTargetObjectFile.cpp + XCoreSelectionDAGInfo.cpp ) diff --git a/lib/Target/XCore/MCSectionXCore.cpp b/lib/Target/XCore/MCSectionXCore.cpp deleted file mode 100644 index 5acceaf..0000000 --- a/lib/Target/XCore/MCSectionXCore.cpp +++ /dev/null @@ -1,35 +0,0 @@ -//===- MCSectionXCore.cpp - XCore-specific section representation ---------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the MCSectionXCore class. -// -//===----------------------------------------------------------------------===// - -#include "MCSectionXCore.h" -#include "llvm/MC/MCContext.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -MCSectionXCore * -MCSectionXCore::Create(const StringRef &Section, unsigned Type, - unsigned Flags, SectionKind K, - bool isExplicit, MCContext &Ctx) { - return new (Ctx) MCSectionXCore(Section, Type, Flags, K, isExplicit); -} - - -/// PrintTargetSpecificSectionFlags - This handles the XCore-specific cp/dp -/// section flags. -void MCSectionXCore::PrintTargetSpecificSectionFlags(const MCAsmInfo &MAI, - raw_ostream &OS) const { - if (getFlags() & MCSectionXCore::SHF_CP_SECTION) - OS << 'c'; - if (getFlags() & MCSectionXCore::SHF_DP_SECTION) - OS << 'd'; -} diff --git a/lib/Target/XCore/MCSectionXCore.h b/lib/Target/XCore/MCSectionXCore.h deleted file mode 100644 index 02f8f95..0000000 --- a/lib/Target/XCore/MCSectionXCore.h +++ /dev/null @@ -1,54 +0,0 @@ -//===- MCSectionXCore.h - XCore-specific section representation -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the MCSectionXCore class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_MCSECTION_XCORE_H -#define LLVM_MCSECTION_XCORE_H - -#include "llvm/MC/MCSectionELF.h" - -namespace llvm { - -class MCSectionXCore : public MCSectionELF { - MCSectionXCore(const StringRef &Section, unsigned Type, unsigned Flags, - SectionKind K, bool isExplicit) - : MCSectionELF(Section, Type, Flags, K, isExplicit) {} - -public: - - enum { - /// SHF_CP_SECTION - All sections with the "c" flag are grouped together - /// by the linker to form the constant pool and the cp register is set to - /// the start of the constant pool by the boot code. - SHF_CP_SECTION = FIRST_TARGET_DEP_FLAG, - - /// SHF_DP_SECTION - All sections with the "d" flag are grouped together - /// by the linker to form the data section and the dp register is set to - /// the start of the section by the boot code. - SHF_DP_SECTION = FIRST_TARGET_DEP_FLAG << 1 - }; - - static MCSectionXCore *Create(const StringRef &Section, unsigned Type, - unsigned Flags, SectionKind K, - bool isExplicit, MCContext &Ctx); - - - /// PrintTargetSpecificSectionFlags - This handles the XCore-specific cp/dp - /// section flags. - virtual void PrintTargetSpecificSectionFlags(const MCAsmInfo &MAI, - raw_ostream &OS) const; - -}; - -} // end namespace llvm - -#endif diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp index 1615547..5564ddf 100644 --- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp +++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "XCore.h" -#include "XCoreISelLowering.h" #include "XCoreTargetMachine.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" @@ -40,7 +39,7 @@ using namespace llvm; /// namespace { class XCoreDAGToDAGISel : public SelectionDAGISel { - XCoreTargetLowering &Lowering; + const XCoreTargetLowering &Lowering; const XCoreSubtarget &Subtarget; public: diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 27e5233..3990b8b 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -158,7 +158,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) } SDValue XCoreTargetLowering:: -LowerOperation(SDValue Op, SelectionDAG &DAG) { +LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); @@ -187,7 +187,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) { /// type with new values built out of custom code. void XCoreTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { switch (N->getOpcode()) { default: llvm_unreachable("Don't know how to custom expand this!"); @@ -210,7 +210,7 @@ getFunctionAlignment(const Function *) const { //===----------------------------------------------------------------------===// SDValue XCoreTargetLowering:: -LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) +LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); SDValue Cond = DAG.getNode(ISD::SETCC, dl, MVT::i32, Op.getOperand(2), @@ -220,7 +220,8 @@ LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) } SDValue XCoreTargetLowering:: -getGlobalAddressWrapper(SDValue GA, GlobalValue *GV, SelectionDAG &DAG) +getGlobalAddressWrapper(SDValue GA, const GlobalValue *GV, + SelectionDAG &DAG) const { // FIXME there is no actual debug info here DebugLoc dl = GA.getDebugLoc(); @@ -241,9 +242,9 @@ getGlobalAddressWrapper(SDValue GA, GlobalValue *GV, SelectionDAG &DAG) } SDValue XCoreTargetLowering:: -LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) +LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { - GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32); // If it's a debug information descriptor, don't mess with it. if (DAG.isVerifiedDebugInfoDesc(Op)) @@ -262,12 +263,12 @@ static inline bool isZeroLengthArray(const Type *Ty) { } SDValue XCoreTargetLowering:: -LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) +LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { // FIXME there isn't really debug info here DebugLoc dl = Op.getDebugLoc(); // transform to label + getid() * size - GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32); const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV); if (!GVar) { @@ -296,18 +297,18 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) } SDValue XCoreTargetLowering:: -LowerBlockAddress(SDValue Op, SelectionDAG &DAG) +LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); - BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); + const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); SDValue Result = DAG.getBlockAddress(BA, getPointerTy(), /*isTarget=*/true); return DAG.getNode(XCoreISD::PCRelativeWrapper, DL, getPointerTy(), Result); } SDValue XCoreTargetLowering:: -LowerConstantPool(SDValue Op, SelectionDAG &DAG) +LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); // FIXME there isn't really debug info here @@ -329,7 +330,7 @@ unsigned XCoreTargetLowering::getJumpTableEncoding() const { } SDValue XCoreTargetLowering:: -LowerBR_JT(SDValue Op, SelectionDAG &DAG) +LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Table = Op.getOperand(1); @@ -391,7 +392,7 @@ IsWordAlignedBasePlusConstantOffset(SDValue Addr, SDValue &AlignedBase, } SDValue XCoreTargetLowering:: -LowerLOAD(SDValue Op, SelectionDAG &DAG) +LowerLOAD(SDValue Op, SelectionDAG &DAG) const { LoadSDNode *LD = cast<LoadSDNode>(Op); assert(LD->getExtensionType() == ISD::NON_EXTLOAD && @@ -494,7 +495,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) } SDValue XCoreTargetLowering:: -LowerSTORE(SDValue Op, SelectionDAG &DAG) +LowerSTORE(SDValue Op, SelectionDAG &DAG) const { StoreSDNode *ST = cast<StoreSDNode>(Op); assert(!ST->isTruncatingStore() && "Unexpected store type"); @@ -554,7 +555,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) } SDValue XCoreTargetLowering:: -LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) +LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const { assert(Op.getValueType() == MVT::i32 && Op.getOpcode() == ISD::SMUL_LOHI && "Unexpected operand to lower!"); @@ -571,7 +572,7 @@ LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) } SDValue XCoreTargetLowering:: -LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) +LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const { assert(Op.getValueType() == MVT::i32 && Op.getOpcode() == ISD::UMUL_LOHI && "Unexpected operand to lower!"); @@ -647,7 +648,7 @@ isADDADDMUL(SDValue Op, SDValue &Mul0, SDValue &Mul1, SDValue &Addend0, } SDValue XCoreTargetLowering:: -TryExpandADDWithMul(SDNode *N, SelectionDAG &DAG) +TryExpandADDWithMul(SDNode *N, SelectionDAG &DAG) const { SDValue Mul; SDValue Other; @@ -707,7 +708,7 @@ TryExpandADDWithMul(SDNode *N, SelectionDAG &DAG) } SDValue XCoreTargetLowering:: -ExpandADDSUB(SDNode *N, SelectionDAG &DAG) +ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const { assert(N->getValueType(0) == MVT::i64 && (N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && @@ -747,7 +748,7 @@ ExpandADDSUB(SDNode *N, SelectionDAG &DAG) } SDValue XCoreTargetLowering:: -LowerVAARG(SDValue Op, SelectionDAG &DAG) +LowerVAARG(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable("unimplemented"); // FIX Arguments passed by reference need a extra dereference. @@ -769,7 +770,7 @@ LowerVAARG(SDValue Op, SelectionDAG &DAG) } SDValue XCoreTargetLowering:: -LowerVASTART(SDValue Op, SelectionDAG &DAG) +LowerVASTART(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); // vastart stores the address of the VarArgsFrameIndex slot into the @@ -782,7 +783,8 @@ LowerVASTART(SDValue Op, SelectionDAG &DAG) false, false, 0); } -SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { +SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op, + SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); // Depths > 0 not supported yet! if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0) @@ -812,7 +814,7 @@ XCoreTargetLowering::LowerCall(SDValue Chain, SDValue Callee, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // XCore target does not yet support tail call optimization. isTailCall = false; @@ -839,7 +841,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; @@ -962,7 +964,7 @@ XCoreTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; @@ -994,7 +996,8 @@ XCoreTargetLowering::LowerFormalArguments(SDValue Chain, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) + const { switch (CallConv) { default: @@ -1018,7 +1021,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); @@ -1132,7 +1135,7 @@ bool XCoreTargetLowering:: CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<EVT> &OutTys, const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs, *DAG.getContext()); @@ -1143,7 +1146,7 @@ SDValue XCoreTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG) { + DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of // the return value to a location @@ -1194,8 +1197,7 @@ XCoreTargetLowering::LowerReturn(SDValue Chain, MachineBasicBlock * XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { + MachineBasicBlock *BB) const { const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); assert((MI->getOpcode() == XCore::SELECT_CC) && @@ -1225,12 +1227,9 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, F->insert(It, sinkMBB); // Update machine-CFG edges by first adding all successors of the current // block to the new block which will contain the Phi node for the select. - // Also inform sdisel of the edge changes. for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), - E = BB->succ_end(); I != E; ++I) { - EM->insert(std::make_pair(*I, sinkMBB)); + E = BB->succ_end(); I != E; ++I) sinkMBB->addSuccessor(*I); - } // Next, remove all successors of the current block, and add the true // and fallthrough blocks as its successors. while (!BB->succ_empty()) diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index 3ccdeec..d8d2a3a 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -83,21 +83,21 @@ namespace llvm { virtual unsigned getJumpTableEncoding() const; /// LowerOperation - Provide custom lowering hooks for some operations. - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; /// ReplaceNodeResults - Replace the results of node with an illegal result /// type with new values built out of custom code. /// virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG); + SelectionDAG &DAG) const; /// getTargetNodeName - This method returns the name of a target specific // DAG node. virtual const char *getTargetNodeName(unsigned Opcode) const; - virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const; virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty) const; @@ -115,37 +115,37 @@ namespace llvm { bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); - SDValue getReturnAddressFrameIndex(SelectionDAG &DAG); - SDValue getGlobalAddressWrapper(SDValue GA, GlobalValue *GV, - SelectionDAG &DAG); + SmallVectorImpl<SDValue> &InVals) const; + SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const; + SDValue getGlobalAddressWrapper(SDValue GA, const GlobalValue *GV, + SelectionDAG &DAG) const; // Lower Operand specifics - SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG); - SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG); - SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG); - SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG); - SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG); - SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG); - SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG); - SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG); - SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG); - SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG); - SDValue LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG); - SDValue LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG); - SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG); + SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; // Inline asm support std::vector<unsigned> @@ -153,8 +153,8 @@ namespace llvm { EVT VT) const; // Expand specifics - SDValue TryExpandADDWithMul(SDNode *Op, SelectionDAG &DAG); - SDValue ExpandADDSUB(SDNode *Op, SelectionDAG &DAG); + SDValue TryExpandADDWithMul(SDNode *Op, SelectionDAG &DAG) const; + SDValue ExpandADDSUB(SDNode *Op, SelectionDAG &DAG) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; @@ -171,7 +171,7 @@ namespace llvm { bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerCall(SDValue Chain, SDValue Callee, @@ -180,19 +180,19 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG); + DebugLoc dl, SelectionDAG &DAG) const; virtual bool CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<EVT> &OutTys, const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags, - SelectionDAG &DAG); + SelectionDAG &DAG) const; }; } diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp index ab71d05..0cfb358 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -113,7 +113,7 @@ XCoreRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { } bool XCoreRegisterInfo::hasFP(const MachineFunction &MF) const { - return NoFramePointerElim || MF.getFrameInfo()->hasVarSizedObjects(); + return DisableFramePointerElim(MF) || MF.getFrameInfo()->hasVarSizedObjects(); } // This function eliminates ADJCALLSTACKDOWN, @@ -225,12 +225,9 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned FramePtr = XCore::R10; if (!isUs) { - if (!RS) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "eliminateFrameIndex Frame size too big: " << Offset; - llvm_report_error(Msg.str()); - } + if (!RS) + report_fatal_error("eliminateFrameIndex Frame size too big: " + + Twine(Offset)); unsigned ScratchReg = RS->scavengeRegister(XCore::GRRegsRegisterClass, II, SPAdj); loadConstant(MBB, II, ScratchReg, Offset, dl); @@ -278,12 +275,9 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } } else { bool isU6 = isImmU6(Offset); - if (!isU6 && !isImmU16(Offset)) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "eliminateFrameIndex Frame size too big: " << Offset; - llvm_report_error(Msg.str()); - } + if (!isU6 && !isImmU16(Offset)) + report_fatal_error("eliminateFrameIndex Frame size too big: " + + Twine(Offset)); switch (MI.getOpcode()) { int NewOpcode; @@ -360,10 +354,7 @@ loadConstant(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, // TODO use mkmsk if possible. if (!isImmU16(Value)) { // TODO use constant pool. - std::string msg; - raw_string_ostream Msg(msg); - Msg << "loadConstant value too big " << Value; - llvm_report_error(Msg.str()); + report_fatal_error("loadConstant value too big " + Twine(Value)); } int Opcode = isImmU6(Value) ? XCore::LDC_ru6 : XCore::LDC_lru6; BuildMI(MBB, I, dl, TII.get(Opcode), DstReg).addImm(Value); @@ -375,12 +366,8 @@ storeToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, assert(Offset%4 == 0 && "Misaligned stack offset"); Offset/=4; bool isU6 = isImmU6(Offset); - if (!isU6 && !isImmU16(Offset)) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "storeToStack offset too big " << Offset; - llvm_report_error(Msg.str()); - } + if (!isU6 && !isImmU16(Offset)) + report_fatal_error("storeToStack offset too big " + Twine(Offset)); int Opcode = isU6 ? XCore::STWSP_ru6 : XCore::STWSP_lru6; BuildMI(MBB, I, dl, TII.get(Opcode)) .addReg(SrcReg) @@ -393,12 +380,8 @@ loadFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, assert(Offset%4 == 0 && "Misaligned stack offset"); Offset/=4; bool isU6 = isImmU6(Offset); - if (!isU6 && !isImmU16(Offset)) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "loadFromStack offset too big " << Offset; - llvm_report_error(Msg.str()); - } + if (!isU6 && !isImmU16(Offset)) + report_fatal_error("loadFromStack offset too big " + Twine(Offset)); int Opcode = isU6 ? XCore::LDWSP_ru6 : XCore::LDWSP_lru6; BuildMI(MBB, I, dl, TII.get(Opcode), DstReg) .addImm(Offset); @@ -425,10 +408,7 @@ void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const { if (!isU6 && !isImmU16(FrameSize)) { // FIXME could emit multiple instructions. - std::string msg; - raw_string_ostream Msg(msg); - Msg << "emitPrologue Frame size too big: " << FrameSize; - llvm_report_error(Msg.str()); + report_fatal_error("emitPrologue Frame size too big: " + Twine(FrameSize)); } bool emitFrameMoves = needsFrameMoves(MF); @@ -549,10 +529,7 @@ void XCoreRegisterInfo::emitEpilogue(MachineFunction &MF, if (!isU6 && !isImmU16(FrameSize)) { // FIXME could emit multiple instructions. - std::string msg; - raw_string_ostream Msg(msg); - Msg << "emitEpilogue Frame size too big: " << FrameSize; - llvm_report_error(Msg.str()); + report_fatal_error("emitEpilogue Frame size too big: " + Twine(FrameSize)); } if (FrameSize) { diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp new file mode 100644 index 0000000..6aac237 --- /dev/null +++ b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp @@ -0,0 +1,22 @@ +//===-- XCoreSelectionDAGInfo.cpp - XCore SelectionDAG Info ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the XCoreSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "xcore-selectiondag-info" +#include "XCoreSelectionDAGInfo.h" +using namespace llvm; + +XCoreSelectionDAGInfo::XCoreSelectionDAGInfo() { +} + +XCoreSelectionDAGInfo::~XCoreSelectionDAGInfo() { +} diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.h b/lib/Target/XCore/XCoreSelectionDAGInfo.h new file mode 100644 index 0000000..fd96716 --- /dev/null +++ b/lib/Target/XCore/XCoreSelectionDAGInfo.h @@ -0,0 +1,29 @@ +//===-- XCoreSelectionDAGInfo.h - XCore SelectionDAG Info -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the XCore subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef XCORESELECTIONDAGINFO_H +#define XCORESELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class XCoreSelectionDAGInfo : public TargetSelectionDAGInfo { +public: + XCoreSelectionDAGInfo(); + ~XCoreSelectionDAGInfo(); +}; + +} + +#endif diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h index b0b1464..701a6f1 100644 --- a/lib/Target/XCore/XCoreTargetMachine.h +++ b/lib/Target/XCore/XCoreTargetMachine.h @@ -36,8 +36,8 @@ public: virtual const XCoreInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const XCoreFrameInfo *getFrameInfo() const { return &FrameInfo; } virtual const XCoreSubtarget *getSubtargetImpl() const { return &Subtarget; } - virtual XCoreTargetLowering *getTargetLowering() const { - return const_cast<XCoreTargetLowering*>(&TLInfo); + virtual const XCoreTargetLowering *getTargetLowering() const { + return &TLInfo; } virtual const TargetRegisterInfo *getRegisterInfo() const { diff --git a/lib/Target/XCore/XCoreTargetObjectFile.cpp b/lib/Target/XCore/XCoreTargetObjectFile.cpp index 7de3b55..cdf5a53 100644 --- a/lib/Target/XCore/XCoreTargetObjectFile.cpp +++ b/lib/Target/XCore/XCoreTargetObjectFile.cpp @@ -9,7 +9,8 @@ #include "XCoreTargetObjectFile.h" #include "XCoreSubtarget.h" -#include "MCSectionXCore.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionELF.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -18,34 +19,31 @@ void XCoreTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){ TargetLoweringObjectFileELF::Initialize(Ctx, TM); DataSection = - MCSectionXCore::Create(".dp.data", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE | - MCSectionXCore::SHF_DP_SECTION, - SectionKind::getDataRel(), false, getContext()); + Ctx.getELFSection(".dp.data", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE | + MCSectionELF::XCORE_SHF_DP_SECTION, + SectionKind::getDataRel(), false); BSSSection = - MCSectionXCore::Create(".dp.bss", MCSectionELF::SHT_NOBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE | - MCSectionXCore::SHF_DP_SECTION, - SectionKind::getBSS(), false, getContext()); + Ctx.getELFSection(".dp.bss", MCSectionELF::SHT_NOBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE | + MCSectionELF::XCORE_SHF_DP_SECTION, + SectionKind::getBSS(), false); MergeableConst4Section = - MCSectionXCore::Create(".cp.rodata.cst4", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE | - MCSectionXCore::SHF_CP_SECTION, - SectionKind::getMergeableConst4(), false, - getContext()); + Ctx.getELFSection(".cp.rodata.cst4", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE | + MCSectionELF::XCORE_SHF_CP_SECTION, + SectionKind::getMergeableConst4(), false); MergeableConst8Section = - MCSectionXCore::Create(".cp.rodata.cst8", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE | - MCSectionXCore::SHF_CP_SECTION, - SectionKind::getMergeableConst8(), false, - getContext()); + Ctx.getELFSection(".cp.rodata.cst8", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE | + MCSectionELF::XCORE_SHF_CP_SECTION, + SectionKind::getMergeableConst8(), false); MergeableConst16Section = - MCSectionXCore::Create(".cp.rodata.cst16", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE | - MCSectionXCore::SHF_CP_SECTION, - SectionKind::getMergeableConst16(), false, - getContext()); + Ctx.getELFSection(".cp.rodata.cst16", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE | + MCSectionELF::XCORE_SHF_CP_SECTION, + SectionKind::getMergeableConst16(), false); // TLS globals are lowered in the backend to arrays indexed by the current // thread id. After lowering they require no special handling by the linker @@ -54,11 +52,10 @@ void XCoreTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){ TLSBSSSection = BSSSection; ReadOnlySection = - MCSectionXCore::Create(".cp.rodata", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | - MCSectionXCore::SHF_CP_SECTION, - SectionKind::getReadOnlyWithRel(), false, - getContext()); + Ctx.getELFSection(".cp.rodata", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | + MCSectionELF::XCORE_SHF_CP_SECTION, + SectionKind::getReadOnlyWithRel(), false); // Dynamic linking is not supported. Data with relocations is placed in the // same section as data without relocations. diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 40a87e8..89f213e 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -64,7 +64,7 @@ namespace { CallGraphSCCPass::getAnalysisUsage(AU); } - virtual bool runOnSCC(std::vector<CallGraphNode *> &SCC); + virtual bool runOnSCC(CallGraphSCC &SCC); static char ID; // Pass identification, replacement for typeid explicit ArgPromotion(unsigned maxElements = 3) : CallGraphSCCPass(&ID), maxElements(maxElements) {} @@ -91,20 +91,21 @@ Pass *llvm::createArgumentPromotionPass(unsigned maxElements) { return new ArgPromotion(maxElements); } -bool ArgPromotion::runOnSCC(std::vector<CallGraphNode *> &SCC) { +bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) { bool Changed = false, LocalChange; do { // Iterate until we stop promoting from this SCC. LocalChange = false; // Attempt to promote arguments from all functions in this SCC. - for (unsigned i = 0, e = SCC.size(); i != e; ++i) - if (CallGraphNode *CGN = PromoteArguments(SCC[i])) { + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + if (CallGraphNode *CGN = PromoteArguments(*I)) { LocalChange = true; - SCC[i] = CGN; + SCC.ReplaceNode(*I, CGN); } + } Changed |= LocalChange; // Remember that we changed something. } while (LocalChange); - + return Changed; } @@ -873,8 +874,14 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, NF_CGN->stealCalledFunctionsFrom(CG[F]); - // Now that the old function is dead, delete it. - delete CG.removeFunctionFromModule(F); + // Now that the old function is dead, delete it. If there is a dangling + // reference to the CallgraphNode, just leave the dead function around for + // someone else to nuke. + CallGraphNode *CGN = CG[F]; + if (CGN->getNumReferences() == 0) + delete CG.removeFunctionFromModule(CGN); + else + F->setLinkage(Function::ExternalLinkage); return NF_CGN; } diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt index 92bef3b..65483e8 100644 --- a/lib/Transforms/IPO/CMakeLists.txt +++ b/lib/Transforms/IPO/CMakeLists.txt @@ -23,3 +23,5 @@ add_llvm_library(LLVMipo StripSymbols.cpp StructRetPromotion.cpp ) + +target_link_libraries (LLVMipo LLVMScalarOpts LLVMInstCombine) diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 227602d..6443dd4 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -243,6 +243,9 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { if (cast<CallInst>(Call)->isTailCall()) cast<CallInst>(New)->setTailCall(); } + if (MDNode *N = Call->getDbgMetadata()) + New->setDbgMetadata(N); + Args.clear(); if (!Call->use_empty()) @@ -694,18 +697,6 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { AttrListPtr NewPAL = AttrListPtr::get(AttributesVec.begin(), AttributesVec.end()); - // Work around LLVM bug PR56: the CWriter cannot emit varargs functions which - // have zero fixed arguments. - // - // Note that we apply this hack for a vararg fuction that does not have any - // arguments anymore, but did have them before (so don't bother fixing - // functions that were already broken wrt CWriter). - bool ExtraArgHack = false; - if (Params.empty() && FTy->isVarArg() && FTy->getNumParams() != 0) { - ExtraArgHack = true; - Params.push_back(Type::getInt32Ty(F->getContext())); - } - // Create the new function type based on the recomputed parameters. FunctionType *NFTy = FunctionType::get(NRetTy, Params, FTy->isVarArg()); @@ -755,9 +746,6 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs)); } - if (ExtraArgHack) - Args.push_back(UndefValue::get(Type::getInt32Ty(F->getContext()))); - // Push any varargs arguments on the list. Don't forget their attributes. for (CallSite::arg_iterator E = CS.arg_end(); I != E; ++I, ++i) { Args.push_back(*I); @@ -785,6 +773,9 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { if (cast<CallInst>(Call)->isTailCall()) cast<CallInst>(New)->setTailCall(); } + if (MDNode *N = Call->getDbgMetadata()) + New->setDbgMetadata(N); + Args.clear(); if (!Call->use_empty()) { diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index 298d5cf..9bd7af6 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -44,20 +44,20 @@ namespace { FunctionAttrs() : CallGraphSCCPass(&ID) {} // runOnSCC - Analyze the SCC, performing the transformation if possible. - bool runOnSCC(std::vector<CallGraphNode *> &SCC); + bool runOnSCC(CallGraphSCC &SCC); // AddReadAttrs - Deduce readonly/readnone attributes for the SCC. - bool AddReadAttrs(const std::vector<CallGraphNode *> &SCC); + bool AddReadAttrs(const CallGraphSCC &SCC); // AddNoCaptureAttrs - Deduce nocapture attributes for the SCC. - bool AddNoCaptureAttrs(const std::vector<CallGraphNode *> &SCC); + bool AddNoCaptureAttrs(const CallGraphSCC &SCC); // IsFunctionMallocLike - Does this function allocate new memory? bool IsFunctionMallocLike(Function *F, SmallPtrSet<Function*, 8> &) const; // AddNoAliasAttrs - Deduce noalias attributes for the SCC. - bool AddNoAliasAttrs(const std::vector<CallGraphNode *> &SCC); + bool AddNoAliasAttrs(const CallGraphSCC &SCC); virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); @@ -123,19 +123,19 @@ bool FunctionAttrs::PointsToLocalMemory(Value *V) { } /// AddReadAttrs - Deduce readonly/readnone attributes for the SCC. -bool FunctionAttrs::AddReadAttrs(const std::vector<CallGraphNode *> &SCC) { +bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) { SmallPtrSet<Function*, 8> SCCNodes; // Fill SCCNodes with the elements of the SCC. Used for quickly // looking up whether a given CallGraphNode is in this SCC. - for (unsigned i = 0, e = SCC.size(); i != e; ++i) - SCCNodes.insert(SCC[i]->getFunction()); + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) + SCCNodes.insert((*I)->getFunction()); // Check if any of the functions in the SCC read or write memory. If they // write memory then they can't be marked readnone or readonly. bool ReadsMemory = false; - for (unsigned i = 0, e = SCC.size(); i != e; ++i) { - Function *F = SCC[i]->getFunction(); + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + Function *F = (*I)->getFunction(); if (F == 0) // External node - may write memory. Just give up. @@ -210,8 +210,8 @@ bool FunctionAttrs::AddReadAttrs(const std::vector<CallGraphNode *> &SCC) { // Success! Functions in this SCC do not access memory, or only read memory. // Give them the appropriate attribute. bool MadeChange = false; - for (unsigned i = 0, e = SCC.size(); i != e; ++i) { - Function *F = SCC[i]->getFunction(); + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + Function *F = (*I)->getFunction(); if (F->doesNotAccessMemory()) // Already perfect! @@ -239,13 +239,13 @@ bool FunctionAttrs::AddReadAttrs(const std::vector<CallGraphNode *> &SCC) { } /// AddNoCaptureAttrs - Deduce nocapture attributes for the SCC. -bool FunctionAttrs::AddNoCaptureAttrs(const std::vector<CallGraphNode *> &SCC) { +bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) { bool Changed = false; // Check each function in turn, determining which pointer arguments are not // captured. - for (unsigned i = 0, e = SCC.size(); i != e; ++i) { - Function *F = SCC[i]->getFunction(); + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + Function *F = (*I)->getFunction(); if (F == 0) // External node - skip it; @@ -334,18 +334,18 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F, } /// AddNoAliasAttrs - Deduce noalias attributes for the SCC. -bool FunctionAttrs::AddNoAliasAttrs(const std::vector<CallGraphNode *> &SCC) { +bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) { SmallPtrSet<Function*, 8> SCCNodes; // Fill SCCNodes with the elements of the SCC. Used for quickly // looking up whether a given CallGraphNode is in this SCC. - for (unsigned i = 0, e = SCC.size(); i != e; ++i) - SCCNodes.insert(SCC[i]->getFunction()); + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) + SCCNodes.insert((*I)->getFunction()); // Check each function in turn, determining which functions return noalias // pointers. - for (unsigned i = 0, e = SCC.size(); i != e; ++i) { - Function *F = SCC[i]->getFunction(); + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + Function *F = (*I)->getFunction(); if (F == 0) // External node - skip it; @@ -370,8 +370,8 @@ bool FunctionAttrs::AddNoAliasAttrs(const std::vector<CallGraphNode *> &SCC) { } bool MadeChange = false; - for (unsigned i = 0, e = SCC.size(); i != e; ++i) { - Function *F = SCC[i]->getFunction(); + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + Function *F = (*I)->getFunction(); if (F->doesNotAlias(0) || !F->getReturnType()->isPointerTy()) continue; @@ -383,7 +383,7 @@ bool FunctionAttrs::AddNoAliasAttrs(const std::vector<CallGraphNode *> &SCC) { return MadeChange; } -bool FunctionAttrs::runOnSCC(std::vector<CallGraphNode *> &SCC) { +bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) { bool Changed = AddReadAttrs(SCC); Changed |= AddNoCaptureAttrs(SCC); Changed |= AddNoAliasAttrs(SCC); diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index ddff5ef..b429213 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -143,7 +143,8 @@ struct GlobalStatus { static bool SafeToDestroyConstant(const Constant *C) { if (isa<GlobalValue>(C)) return false; - for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E; ++UI) + for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E; + ++UI) if (const Constant *CU = dyn_cast<Constant>(*UI)) { if (!SafeToDestroyConstant(CU)) return false; } else @@ -158,7 +159,8 @@ static bool SafeToDestroyConstant(const Constant *C) { /// static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, SmallPtrSet<const PHINode*, 16> &PHIUsers) { - for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) + for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; + ++UI) if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(*UI)) { GS.HasNonInstructionUser = true; @@ -185,7 +187,8 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, // value, not an aggregate), keep more specific information about // stores. if (GS.StoredType != GlobalStatus::isStored) { - if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(SI->getOperand(1))){ + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>( + SI->getOperand(1))) { Value *StoredVal = SI->getOperand(0); if (StoredVal == GV->getInitializer()) { if (GS.StoredType < GlobalStatus::isInitializerStored) @@ -610,62 +613,69 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { /// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified /// value will trap if the value is dynamically null. PHIs keeps track of any /// phi nodes we've seen to avoid reprocessing them. -static bool AllUsesOfValueWillTrapIfNull(Value *V, - SmallPtrSet<PHINode*, 8> &PHIs) { - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) - if (isa<LoadInst>(*UI)) { +static bool AllUsesOfValueWillTrapIfNull(const Value *V, + SmallPtrSet<const PHINode*, 8> &PHIs) { + for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; + ++UI) { + const User *U = *UI; + + if (isa<LoadInst>(U)) { // Will trap. - } else if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) { + } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { if (SI->getOperand(0) == V) { - //cerr << "NONTRAPPING USE: " << **UI; + //cerr << "NONTRAPPING USE: " << *U; return false; // Storing the value. } - } else if (CallInst *CI = dyn_cast<CallInst>(*UI)) { + } else if (const CallInst *CI = dyn_cast<CallInst>(U)) { if (CI->getCalledValue() != V) { - //cerr << "NONTRAPPING USE: " << **UI; + //cerr << "NONTRAPPING USE: " << *U; return false; // Not calling the ptr } - } else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) { + } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) { if (II->getCalledValue() != V) { - //cerr << "NONTRAPPING USE: " << **UI; + //cerr << "NONTRAPPING USE: " << *U; return false; // Not calling the ptr } - } else if (BitCastInst *CI = dyn_cast<BitCastInst>(*UI)) { + } else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) { if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false; - } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI)) { + } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false; - } else if (PHINode *PN = dyn_cast<PHINode>(*UI)) { + } else if (const PHINode *PN = dyn_cast<PHINode>(U)) { // If we've already seen this phi node, ignore it, it has already been // checked. if (PHIs.insert(PN) && !AllUsesOfValueWillTrapIfNull(PN, PHIs)) return false; - } else if (isa<ICmpInst>(*UI) && + } else if (isa<ICmpInst>(U) && isa<ConstantPointerNull>(UI->getOperand(1))) { // Ignore icmp X, null } else { - //cerr << "NONTRAPPING USE: " << **UI; + //cerr << "NONTRAPPING USE: " << *U; return false; } + } return true; } /// AllUsesOfLoadedValueWillTrapIfNull - Return true if all uses of any loads /// from GV will trap if the loaded value is null. Note that this also permits /// comparisons of the loaded value against null, as a special case. -static bool AllUsesOfLoadedValueWillTrapIfNull(GlobalVariable *GV) { - for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI!=E; ++UI) - if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) { - SmallPtrSet<PHINode*, 8> PHIs; +static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) { + for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end(); + UI != E; ++UI) { + const User *U = *UI; + + if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { + SmallPtrSet<const PHINode*, 8> PHIs; if (!AllUsesOfValueWillTrapIfNull(LI, PHIs)) return false; - } else if (isa<StoreInst>(*UI)) { + } else if (isa<StoreInst>(U)) { // Ignore stores to the global. } else { // We don't know or understand this user, bail out. - //cerr << "UNKNOWN USER OF GLOBAL!: " << **UI; + //cerr << "UNKNOWN USER OF GLOBAL!: " << *U; return false; } - + } return true; } @@ -682,16 +692,17 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { Changed = true; } } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) { - if (I->getOperand(0) == V) { + CallSite CS(I); + if (CS.getCalledValue() == V) { // Calling through the pointer! Turn into a direct call, but be careful // that the pointer is not also being passed as an argument. - I->setOperand(0, NewV); + CS.setCalledFunction(NewV); Changed = true; bool PassedAsArg = false; - for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i) - if (I->getOperand(i) == V) { + for (unsigned i = 0, e = CS.arg_size(); i != e; ++i) + if (CS.getArgument(i) == V) { PassedAsArg = true; - I->setOperand(i, NewV); + CS.setArgument(i, NewV); } if (PassedAsArg) { @@ -938,29 +949,31 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, /// to make sure that there are no complex uses of V. We permit simple things /// like dereferencing the pointer, but not storing through the address, unless /// it is to the specified global. -static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Instruction *V, - GlobalVariable *GV, - SmallPtrSet<PHINode*, 8> &PHIs) { - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ - Instruction *Inst = cast<Instruction>(*UI); - +static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V, + const GlobalVariable *GV, + SmallPtrSet<const PHINode*, 8> &PHIs) { + for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI) { + const Instruction *Inst = cast<Instruction>(*UI); + if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) { continue; // Fine, ignore. } - if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) { if (SI->getOperand(0) == V && SI->getOperand(1) != GV) return false; // Storing the pointer itself... bad. continue; // Otherwise, storing through it, or storing into GV... fine. } - if (isa<GetElementPtrInst>(Inst)) { + // Must index into the array and into the struct. + if (isa<GetElementPtrInst>(Inst) && Inst->getNumOperands() >= 3) { if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs)) return false; continue; } - if (PHINode *PN = dyn_cast<PHINode>(Inst)) { + if (const PHINode *PN = dyn_cast<PHINode>(Inst)) { // PHIs are ok if all uses are ok. Don't infinitely recurse through PHI // cycles. if (PHIs.insert(PN)) @@ -969,7 +982,7 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Instruction *V, continue; } - if (BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) { + if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) { if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs)) return false; continue; @@ -1029,11 +1042,12 @@ static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, /// of a load) are simple enough to perform heap SRA on. This permits GEP's /// that index through the array and struct field, icmps of null, and PHIs. static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V, - SmallPtrSet<const PHINode*, 32> &LoadUsingPHIs, - SmallPtrSet<const PHINode*, 32> &LoadUsingPHIsPerLoad) { + SmallPtrSet<const PHINode*, 32> &LoadUsingPHIs, + SmallPtrSet<const PHINode*, 32> &LoadUsingPHIsPerLoad) { // We permit two users of the load: setcc comparing against the null // pointer, and a getelementptr of a specific form. - for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ + for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; + ++UI) { const Instruction *User = cast<Instruction>(*UI); // Comparison against null is ok. @@ -1084,8 +1098,8 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV, Instruction *StoredVal) { SmallPtrSet<const PHINode*, 32> LoadUsingPHIs; SmallPtrSet<const PHINode*, 32> LoadUsingPHIsPerLoad; - for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E; - ++UI) + for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end(); + UI != E; ++UI) if (const LoadInst *LI = dyn_cast<LoadInst>(*UI)) { if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs, LoadUsingPHIsPerLoad)) @@ -1098,8 +1112,8 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV, // that all inputs the to the PHI nodes are in the same equivalence sets. // Check to verify that all operands of the PHIs are either PHIS that can be // transformed, loads from GV, or MI itself. - for (SmallPtrSet<const PHINode*, 32>::const_iterator I = LoadUsingPHIs.begin(), - E = LoadUsingPHIs.end(); I != E; ++I) { + for (SmallPtrSet<const PHINode*, 32>::const_iterator I = LoadUsingPHIs.begin() + , E = LoadUsingPHIs.end(); I != E; ++I) { const PHINode *PN = *I; for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) { Value *InVal = PN->getIncomingValue(op); @@ -1448,6 +1462,9 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, const Type *AllocTy, Module::global_iterator &GVI, TargetData *TD) { + if (!TD) + return false; + // If this is a malloc of an abstract type, don't touch it. if (!AllocTy->isSized()) return false; @@ -1466,66 +1483,66 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // malloc to be stored into the specified global, loaded setcc'd, and // GEP'd. These are all things we could transform to using the global // for. - { - SmallPtrSet<PHINode*, 8> PHIs; - if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs)) - return false; - } + SmallPtrSet<const PHINode*, 8> PHIs; + if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs)) + return false; // If we have a global that is only initialized with a fixed size malloc, // transform the program to use global memory instead of malloc'd memory. // This eliminates dynamic allocation, avoids an indirection accessing the // data, and exposes the resultant global to further GlobalOpt. // We cannot optimize the malloc if we cannot determine malloc array size. - if (Value *NElems = getMallocArraySize(CI, TD, true)) { - if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems)) - // Restrict this transformation to only working on small allocations - // (2048 bytes currently), as we don't want to introduce a 16M global or - // something. - if (TD && - NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) { - GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD); - return true; - } + Value *NElems = getMallocArraySize(CI, TD, true); + if (!NElems) + return false; + + if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems)) + // Restrict this transformation to only working on small allocations + // (2048 bytes currently), as we don't want to introduce a 16M global or + // something. + if (NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) { + GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD); + return true; + } - // If the allocation is an array of structures, consider transforming this - // into multiple malloc'd arrays, one for each field. This is basically - // SRoA for malloc'd memory. - - // If this is an allocation of a fixed size array of structs, analyze as a - // variable size array. malloc [100 x struct],1 -> malloc struct, 100 - if (NElems == ConstantInt::get(CI->getOperand(1)->getType(), 1)) - if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy)) - AllocTy = AT->getElementType(); + // If the allocation is an array of structures, consider transforming this + // into multiple malloc'd arrays, one for each field. This is basically + // SRoA for malloc'd memory. + + // If this is an allocation of a fixed size array of structs, analyze as a + // variable size array. malloc [100 x struct],1 -> malloc struct, 100 + if (NElems == ConstantInt::get(CI->getOperand(1)->getType(), 1)) + if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy)) + AllocTy = AT->getElementType(); - if (const StructType *AllocSTy = dyn_cast<StructType>(AllocTy)) { - // This the structure has an unreasonable number of fields, leave it - // alone. - if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 && - AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) { - - // If this is a fixed size array, transform the Malloc to be an alloc of - // structs. malloc [100 x struct],1 -> malloc struct, 100 - if (const ArrayType *AT = - dyn_cast<ArrayType>(getMallocAllocatedType(CI))) { - const Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); - unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes(); - Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize); - Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements()); - Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, - AllocSize, NumElements, - CI->getName()); - Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI); - CI->replaceAllUsesWith(Cast); - CI->eraseFromParent(); - CI = dyn_cast<BitCastInst>(Malloc) ? - extractMallocCallFromBitCast(Malloc) : cast<CallInst>(Malloc); - } - - GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true),TD); - return true; - } + const StructType *AllocSTy = dyn_cast<StructType>(AllocTy); + if (!AllocSTy) + return false; + + // This the structure has an unreasonable number of fields, leave it + // alone. + if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 && + AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) { + + // If this is a fixed size array, transform the Malloc to be an alloc of + // structs. malloc [100 x struct],1 -> malloc struct, 100 + if (const ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) { + const Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); + unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes(); + Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize); + Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements()); + Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, + AllocSize, NumElements, + CI->getName()); + Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI); + CI->replaceAllUsesWith(Cast); + CI->eraseFromParent(); + CI = dyn_cast<BitCastInst>(Malloc) ? + extractMallocCallFromBitCast(Malloc) : cast<CallInst>(Malloc); } + + GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true),TD); + return true; } return false; @@ -1689,8 +1706,8 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, if (GS.StoredType == GlobalStatus::isStoredOnce && GS.StoredOnceValue) DEBUG(dbgs() << " StoredOnceValue = " << *GS.StoredOnceValue << "\n"); if (GS.AccessingFunction && !GS.HasMultipleAccessingFunctions) - DEBUG(dbgs() << " AccessingFunction = " << GS.AccessingFunction->getName() - << "\n"); + DEBUG(dbgs() << " AccessingFunction = " + << GS.AccessingFunction->getName() << "\n"); DEBUG(dbgs() << " HasMultipleAccessingFunctions = " << GS.HasMultipleAccessingFunctions << "\n"); DEBUG(dbgs() << " HasNonInstructionUser = " @@ -2278,10 +2295,10 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, } // Cannot handle inline asm. - if (isa<InlineAsm>(CI->getOperand(0))) return false; + if (isa<InlineAsm>(CI->getCalledValue())) return false; // Resolve function pointers. - Function *Callee = dyn_cast<Function>(getVal(Values, CI->getOperand(0))); + Function *Callee = dyn_cast<Function>(getVal(Values, CI->getCalledValue())); if (!Callee) return false; // Cannot resolve. SmallVector<Constant*, 8> Formals; @@ -2500,7 +2517,7 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) { continue; // Do not perform the transform if multiple aliases potentially target the - // aliasee. This check also ensures that it is safe to replace the section + // aliasee. This check also ensures that it is safe to replace the section // and other attributes of the aliasee with those of the alias. if (!hasOneUse) continue; diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp index 83e8624..340b70e 100644 --- a/lib/Transforms/IPO/IPO.cpp +++ b/lib/Transforms/IPO/IPO.cpp @@ -62,6 +62,15 @@ void LLVMAddPruneEHPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createPruneEHPass()); } +void LLVMAddIPSCCPPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createIPSCCPPass()); +} + +void LLVMAddInternalizePass(LLVMPassManagerRef PM, unsigned AllButMain) { + unwrap(PM)->add(createInternalizePass(AllButMain != 0)); +} + + void LLVMAddRaiseAllocationsPass(LLVMPassManagerRef PM) { // FIXME: Remove in LLVM 3.0. } diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 03ec72c..b785bb0 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -73,16 +73,14 @@ InlinedArrayAllocasTy; /// available from other functions inlined into the caller. If we are able to /// inline this call site we attempt to reuse already available allocas or add /// any new allocas to the set if not possible. -static bool InlineCallIfPossible(CallSite CS, CallGraph &CG, - const TargetData *TD, +static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, InlinedArrayAllocasTy &InlinedArrayAllocas) { Function *Callee = CS.getCalledFunction(); Function *Caller = CS.getCaller(); // Try to inline the function. Get the list of static allocas that were // inlined. - SmallVector<AllocaInst*, 16> StaticAllocas; - if (!InlineFunction(CS, &CG, TD, &StaticAllocas)) + if (!InlineFunction(CS, IFI)) return false; // If the inlined function had a higher stack protection level than the @@ -119,9 +117,9 @@ static bool InlineCallIfPossible(CallSite CS, CallGraph &CG, // Loop over all the allocas we have so far and see if they can be merged with // a previously inlined alloca. If not, remember that we had it. - for (unsigned AllocaNo = 0, e = StaticAllocas.size(); + for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size(); AllocaNo != e; ++AllocaNo) { - AllocaInst *AI = StaticAllocas[AllocaNo]; + AllocaInst *AI = IFI.StaticAllocas[AllocaNo]; // Don't bother trying to merge array allocations (they will usually be // canonicalized to be an allocation *of* an array), or allocations whose @@ -292,14 +290,29 @@ bool Inliner::shouldInline(CallSite CS) { return true; } -bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) { +/// InlineHistoryIncludes - Return true if the specified inline history ID +/// indicates an inline history that includes the specified function. +static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, + const SmallVectorImpl<std::pair<Function*, int> > &InlineHistory) { + while (InlineHistoryID != -1) { + assert(unsigned(InlineHistoryID) < InlineHistory.size() && + "Invalid inline history ID"); + if (InlineHistory[InlineHistoryID].first == F) + return true; + InlineHistoryID = InlineHistory[InlineHistoryID].second; + } + return false; +} + + +bool Inliner::runOnSCC(CallGraphSCC &SCC) { CallGraph &CG = getAnalysis<CallGraph>(); const TargetData *TD = getAnalysisIfAvailable<TargetData>(); SmallPtrSet<Function*, 8> SCCFunctions; DEBUG(dbgs() << "Inliner visiting SCC:"); - for (unsigned i = 0, e = SCC.size(); i != e; ++i) { - Function *F = SCC[i]->getFunction(); + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + Function *F = (*I)->getFunction(); if (F) SCCFunctions.insert(F); DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE")); } @@ -307,10 +320,16 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) { // Scan through and identify all call sites ahead of time so that we only // inline call sites in the original functions, not call sites that result // from inlining other functions. - SmallVector<CallSite, 16> CallSites; - - for (unsigned i = 0, e = SCC.size(); i != e; ++i) { - Function *F = SCC[i]->getFunction(); + SmallVector<std::pair<CallSite, int>, 16> CallSites; + + // When inlining a callee produces new call sites, we want to keep track of + // the fact that they were inlined from the callee. This allows us to avoid + // infinite inlining in some obscure cases. To represent this, we use an + // index into the InlineHistory vector. + SmallVector<std::pair<Function*, int>, 8> InlineHistory; + + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + Function *F = (*I)->getFunction(); if (!F) continue; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) @@ -327,22 +346,27 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) { if (CS.getCalledFunction() && CS.getCalledFunction()->isDeclaration()) continue; - CallSites.push_back(CS); + CallSites.push_back(std::make_pair(CS, -1)); } } DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n"); + // If there are no calls in this function, exit early. + if (CallSites.empty()) + return false; + // Now that we have all of the call sites, move the ones to functions in the // current SCC to the end of the list. unsigned FirstCallInSCC = CallSites.size(); for (unsigned i = 0; i < FirstCallInSCC; ++i) - if (Function *F = CallSites[i].getCalledFunction()) + if (Function *F = CallSites[i].first.getCalledFunction()) if (SCCFunctions.count(F)) std::swap(CallSites[i--], CallSites[--FirstCallInSCC]); InlinedArrayAllocasTy InlinedArrayAllocas; + InlineFunctionInfo InlineInfo(&CG, TD); // Now that we have all of the call sites, loop over them and inline them if // it looks profitable to do so. @@ -353,7 +377,7 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) { // Iterate over the outer loop because inlining functions can cause indirect // calls to become direct calls. for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) { - CallSite CS = CallSites[CSi]; + CallSite CS = CallSites[CSi].first; Function *Caller = CS.getCaller(); Function *Callee = CS.getCalledFunction(); @@ -375,16 +399,42 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) { // We can only inline direct calls to non-declarations. if (Callee == 0 || Callee->isDeclaration()) continue; + // If this call sites was obtained by inlining another function, verify + // that the include path for the function did not include the callee + // itself. If so, we'd be recursively inlinling the same function, + // which would provide the same callsites, which would cause us to + // infinitely inline. + int InlineHistoryID = CallSites[CSi].second; + if (InlineHistoryID != -1 && + InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) + continue; + + // If the policy determines that we should inline this function, // try to do so. if (!shouldInline(CS)) continue; - // Attempt to inline the function... - if (!InlineCallIfPossible(CS, CG, TD, InlinedArrayAllocas)) + // Attempt to inline the function. + if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas)) continue; ++NumInlined; - + + // If inlining this function gave us any new call sites, throw them + // onto our worklist to process. They are useful inline candidates. + if (!InlineInfo.InlinedCalls.empty()) { + // Create a new inline history entry for this, so that we remember + // that these new callsites came about due to inlining Callee. + int NewHistoryID = InlineHistory.size(); + InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID)); + + for (unsigned i = 0, e = InlineInfo.InlinedCalls.size(); + i != e; ++i) { + Value *Ptr = InlineInfo.InlinedCalls[i]; + CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID)); + } + } + // Update the cached cost info with the inlined call. growCachedCostInfo(Caller, Callee); } @@ -417,7 +467,7 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) { // swap/pop_back for efficiency, but do not use it if doing so would // move a call site to a function in this SCC before the // 'FirstCallInSCC' barrier. - if (SCC.size() == 1) { + if (SCC.isSingular()) { std::swap(CallSites[CSi], CallSites.back()); CallSites.pop_back(); } else { diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index f8ec722..07525ea 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -120,15 +120,17 @@ Function* PartialInliner::unswitchFunction(Function* F) { // Extract the body of the if. Function* extractedFunction = ExtractCodeRegion(DT, toExtract); + InlineFunctionInfo IFI; + // Inline the top-level if test into all callers. std::vector<User*> Users(duplicateFunction->use_begin(), duplicateFunction->use_end()); for (std::vector<User*>::iterator UI = Users.begin(), UE = Users.end(); UI != UE; ++UI) - if (CallInst* CI = dyn_cast<CallInst>(*UI)) - InlineFunction(CI); - else if (InvokeInst* II = dyn_cast<InvokeInst>(*UI)) - InlineFunction(II); + if (CallInst *CI = dyn_cast<CallInst>(*UI)) + InlineFunction(CI, IFI); + else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) + InlineFunction(II, IFI); // Ditch the duplicate, since we're done with it, and rewrite all remaining // users (function pointers, etc.) back to the original function. diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp index 161246b..de6099c 100644 --- a/lib/Transforms/IPO/PruneEH.cpp +++ b/lib/Transforms/IPO/PruneEH.cpp @@ -40,7 +40,7 @@ namespace { PruneEH() : CallGraphSCCPass(&ID) {} // runOnSCC - Analyze the SCC, performing the transformation if possible. - bool runOnSCC(std::vector<CallGraphNode *> &SCC); + bool runOnSCC(CallGraphSCC &SCC); bool SimplifyFunction(Function *F); void DeleteBasicBlock(BasicBlock *BB); @@ -54,20 +54,20 @@ X("prune-eh", "Remove unused exception handling info"); Pass *llvm::createPruneEHPass() { return new PruneEH(); } -bool PruneEH::runOnSCC(std::vector<CallGraphNode *> &SCC) { +bool PruneEH::runOnSCC(CallGraphSCC &SCC) { SmallPtrSet<CallGraphNode *, 8> SCCNodes; CallGraph &CG = getAnalysis<CallGraph>(); bool MadeChange = false; // Fill SCCNodes with the elements of the SCC. Used for quickly // looking up whether a given CallGraphNode is in this SCC. - for (unsigned i = 0, e = SCC.size(); i != e; ++i) - SCCNodes.insert(SCC[i]); + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) + SCCNodes.insert(*I); // First pass, scan all of the functions in the SCC, simplifying them // according to what we know. - for (unsigned i = 0, e = SCC.size(); i != e; ++i) - if (Function *F = SCC[i]->getFunction()) + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) + if (Function *F = (*I)->getFunction()) MadeChange |= SimplifyFunction(F); // Next, check to see if any callees might throw or if there are any external @@ -78,9 +78,9 @@ bool PruneEH::runOnSCC(std::vector<CallGraphNode *> &SCC) { // obviously the SCC might throw. // bool SCCMightUnwind = false, SCCMightReturn = false; - for (unsigned i = 0, e = SCC.size(); - (!SCCMightUnwind || !SCCMightReturn) && i != e; ++i) { - Function *F = SCC[i]->getFunction(); + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); + (!SCCMightUnwind || !SCCMightReturn) && I != E; ++I) { + Function *F = (*I)->getFunction(); if (F == 0) { SCCMightUnwind = true; SCCMightReturn = true; @@ -132,7 +132,7 @@ bool PruneEH::runOnSCC(std::vector<CallGraphNode *> &SCC) { // If the SCC doesn't unwind or doesn't throw, note this fact. if (!SCCMightUnwind || !SCCMightReturn) - for (unsigned i = 0, e = SCC.size(); i != e; ++i) { + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { Attributes NewAttributes = Attribute::None; if (!SCCMightUnwind) @@ -140,19 +140,20 @@ bool PruneEH::runOnSCC(std::vector<CallGraphNode *> &SCC) { if (!SCCMightReturn) NewAttributes |= Attribute::NoReturn; - const AttrListPtr &PAL = SCC[i]->getFunction()->getAttributes(); + Function *F = (*I)->getFunction(); + const AttrListPtr &PAL = F->getAttributes(); const AttrListPtr &NPAL = PAL.addAttr(~0, NewAttributes); if (PAL != NPAL) { MadeChange = true; - SCC[i]->getFunction()->setAttributes(NPAL); + F->setAttributes(NPAL); } } - for (unsigned i = 0, e = SCC.size(); i != e; ++i) { + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { // Convert any invoke instructions to non-throwing functions in this node // into call instructions with a branch. This makes the exception blocks // dead. - if (Function *F = SCC[i]->getFunction()) + if (Function *F = (*I)->getFunction()) MadeChange |= SimplifyFunction(F); } diff --git a/lib/Transforms/IPO/StructRetPromotion.cpp b/lib/Transforms/IPO/StructRetPromotion.cpp index dda32d0..473e83c 100644 --- a/lib/Transforms/IPO/StructRetPromotion.cpp +++ b/lib/Transforms/IPO/StructRetPromotion.cpp @@ -48,7 +48,7 @@ namespace { CallGraphSCCPass::getAnalysisUsage(AU); } - virtual bool runOnSCC(std::vector<CallGraphNode *> &SCC); + virtual bool runOnSCC(CallGraphSCC &SCC); static char ID; // Pass identification, replacement for typeid SRETPromotion() : CallGraphSCCPass(&ID) {} @@ -69,12 +69,12 @@ Pass *llvm::createStructRetPromotionPass() { return new SRETPromotion(); } -bool SRETPromotion::runOnSCC(std::vector<CallGraphNode *> &SCC) { +bool SRETPromotion::runOnSCC(CallGraphSCC &SCC) { bool Changed = false; - for (unsigned i = 0, e = SCC.size(); i != e; ++i) - if (CallGraphNode *NewNode = PromoteReturn(SCC[i])) { - SCC[i] = NewNode; + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) + if (CallGraphNode *NewNode = PromoteReturn(*I)) { + SCC.ReplaceNode(*I, NewNode); Changed = true; } diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 3fb3de7..8586054 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1735,16 +1735,12 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { - if (RHS->isOne() && Op0->hasOneUse()) { + if (RHS->isOne() && Op0->hasOneUse()) // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B - if (ICmpInst *ICI = dyn_cast<ICmpInst>(Op0)) - return new ICmpInst(ICI->getInversePredicate(), - ICI->getOperand(0), ICI->getOperand(1)); - - if (FCmpInst *FCI = dyn_cast<FCmpInst>(Op0)) - return new FCmpInst(FCI->getInversePredicate(), - FCI->getOperand(0), FCI->getOperand(1)); - } + if (CmpInst *CI = dyn_cast<CmpInst>(Op0)) + return CmpInst::Create(CI->getOpcode(), + CI->getInversePredicate(), + CI->getOperand(0), CI->getOperand(1)); // fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp). if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) { diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index e025b05..38e7b6e 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -59,29 +59,32 @@ static unsigned EnforceKnownAlignment(Value *V, // Treat this like a bitcast. return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign); } - break; + return Align; + } + case Instruction::Alloca: { + AllocaInst *AI = cast<AllocaInst>(V); + // If there is a requested alignment and if this is an alloca, round up. + if (AI->getAlignment() >= PrefAlign) + return AI->getAlignment(); + AI->setAlignment(PrefAlign); + return PrefAlign; } } if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { // If there is a large requested alignment and we can, bump up the alignment // of the global. - if (!GV->isDeclaration()) { - if (GV->getAlignment() >= PrefAlign) - Align = GV->getAlignment(); - else { - GV->setAlignment(PrefAlign); - Align = PrefAlign; - } - } - } else if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { - // If there is a requested alignment and if this is an alloca, round up. - if (AI->getAlignment() >= PrefAlign) - Align = AI->getAlignment(); - else { - AI->setAlignment(PrefAlign); - Align = PrefAlign; - } + if (GV->isDeclaration()) return Align; + + if (GV->getAlignment() >= PrefAlign) + return GV->getAlignment(); + // We can only increase the alignment of the global if it has no alignment + // specified or if it is not assigned a section. If it is assigned a + // section, the global could be densely packed with other objects in the + // section, increasing the alignment could cause padding issues. + if (!GV->hasSection() || GV->getAlignment() == 0) + GV->setAlignment(PrefAlign); + return GV->getAlignment(); } return Align; @@ -287,7 +290,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { const Type *Tys[3] = { CI.getOperand(1)->getType(), CI.getOperand(2)->getType(), CI.getOperand(3)->getType() }; - CI.setOperand(0, + CI.setCalledFunction( Intrinsic::getDeclaration(M, MemCpyID, Tys, 3)); Changed = true; } @@ -526,7 +529,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // X + 0 -> {X, false} if (RHS->isZero()) { Constant *V[] = { - UndefValue::get(II->getOperand(0)->getType()), + UndefValue::get(II->getCalledValue()->getType()), ConstantInt::getFalse(II->getContext()) }; Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index a68fc6d..eb7628e 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1323,7 +1323,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(Src)) { // Okay, we have (bitcast (shuffle ..)). Check to see if this is - // a bitconvert to a vector with the same # elts. + // a bitcast to a vector with the same # elts. if (SVI->hasOneUse() && DestTy->isVectorTy() && cast<VectorType>(DestTy)->getNumElements() == SVI->getType()->getNumElements() && diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 72fd558..861cf92 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1325,7 +1325,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one. if (RHSV == 0 && isa<ConstantInt>(BO->getOperand(1)) &&BO->hasOneUse()){ const APInt &V = cast<ConstantInt>(BO->getOperand(1))->getValue(); - if (V.sgt(APInt(V.getBitWidth(), 1)) && V.isPowerOf2()) { + if (V.sgt(1) && V.isPowerOf2()) { Value *NewRem = Builder->CreateURem(BO->getOperand(0), BO->getOperand(1), BO->getName()); diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index 2fc9325..c958cde 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -13,6 +13,7 @@ #include "InstCombine.h" #include "llvm/Support/PatternMatch.h" +#include "llvm/Analysis/InstructionSimplify.h" using namespace llvm; using namespace PatternMatch; @@ -421,49 +422,30 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { Value *TrueVal = SI.getTrueValue(); Value *FalseVal = SI.getFalseValue(); - // select true, X, Y -> X - // select false, X, Y -> Y - if (ConstantInt *C = dyn_cast<ConstantInt>(CondVal)) - return ReplaceInstUsesWith(SI, C->getZExtValue() ? TrueVal : FalseVal); - - // select C, X, X -> X - if (TrueVal == FalseVal) - return ReplaceInstUsesWith(SI, TrueVal); - - if (isa<UndefValue>(TrueVal)) // select C, undef, X -> X - return ReplaceInstUsesWith(SI, FalseVal); - if (isa<UndefValue>(FalseVal)) // select C, X, undef -> X - return ReplaceInstUsesWith(SI, TrueVal); - if (isa<UndefValue>(CondVal)) { // select undef, X, Y -> X or Y - if (isa<Constant>(TrueVal)) - return ReplaceInstUsesWith(SI, TrueVal); - else - return ReplaceInstUsesWith(SI, FalseVal); - } + if (Value *V = SimplifySelectInst(CondVal, TrueVal, FalseVal, TD)) + return ReplaceInstUsesWith(SI, V); if (SI.getType()->isIntegerTy(1)) { if (ConstantInt *C = dyn_cast<ConstantInt>(TrueVal)) { if (C->getZExtValue()) { // Change: A = select B, true, C --> A = or B, C return BinaryOperator::CreateOr(CondVal, FalseVal); - } else { - // Change: A = select B, false, C --> A = and !B, C - Value *NotCond = - InsertNewInstBefore(BinaryOperator::CreateNot(CondVal, - "not."+CondVal->getName()), SI); - return BinaryOperator::CreateAnd(NotCond, FalseVal); } + // Change: A = select B, false, C --> A = and !B, C + Value *NotCond = + InsertNewInstBefore(BinaryOperator::CreateNot(CondVal, + "not."+CondVal->getName()), SI); + return BinaryOperator::CreateAnd(NotCond, FalseVal); } else if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) { if (C->getZExtValue() == false) { // Change: A = select B, C, false --> A = and B, C return BinaryOperator::CreateAnd(CondVal, TrueVal); - } else { - // Change: A = select B, C, true --> A = or !B, C - Value *NotCond = - InsertNewInstBefore(BinaryOperator::CreateNot(CondVal, - "not."+CondVal->getName()), SI); - return BinaryOperator::CreateOr(NotCond, TrueVal); } + // Change: A = select B, C, true --> A = or !B, C + Value *NotCond = + InsertNewInstBefore(BinaryOperator::CreateNot(CondVal, + "not."+CondVal->getName()), SI); + return BinaryOperator::CreateOr(NotCond, TrueVal); } // select a, b, a -> a&b diff --git a/lib/Transforms/Makefile b/lib/Transforms/Makefile index ea4a115..e527be2 100644 --- a/lib/Transforms/Makefile +++ b/lib/Transforms/Makefile @@ -13,7 +13,7 @@ PARALLEL_DIRS = Utils Instrumentation Scalar InstCombine IPO Hello include $(LEVEL)/Makefile.config # No support for plugins on windows targets -ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW)) +ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW Minix)) PARALLEL_DIRS := $(filter-out Hello, $(PARALLEL_DIRS)) endif diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index 683c1c2..5778864 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -21,7 +21,6 @@ add_llvm_library(LLVMScalarOpts Reassociate.cpp Reg2Mem.cpp SCCP.cpp - SCCVN.cpp Scalar.cpp ScalarReplAggregates.cpp SimplifyCFGPass.cpp diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 642d59d..321def7 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -1217,7 +1217,7 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, return ConstantFoldLoadFromConstPtr(Src, &TD); } - +namespace { struct AvailableValueInBlock { /// BB - The basic block in question. @@ -1291,6 +1291,8 @@ struct AvailableValueInBlock { } }; +} + /// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock, /// construct SSA form, allowing us to eliminate LI. This returns the value /// that should be used at LI's definition site. @@ -1333,8 +1335,8 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, return V; } -static bool isLifetimeStart(Instruction *Inst) { - if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst)) +static bool isLifetimeStart(const Instruction *Inst) { + if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst)) return II->getIntrinsicID() == Intrinsic::lifetime_start; return false; } diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 6605666..36bea67 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -97,6 +97,8 @@ namespace { private: + void EliminateIVComparisons(); + void EliminateIVRemainders(); void RewriteNonIntegerIVs(Loop *L); ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, @@ -133,6 +135,24 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, BasicBlock *ExitingBlock, BranchInst *BI, SCEVExpander &Rewriter) { + // Special case: If the backedge-taken count is a UDiv, it's very likely a + // UDiv that ScalarEvolution produced in order to compute a precise + // expression, rather than a UDiv from the user's code. If we can't find a + // UDiv in the code with some simple searching, assume the former and forego + // rewriting the loop. + if (isa<SCEVUDivExpr>(BackedgeTakenCount)) { + ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition()); + if (!OrigCond) return 0; + const SCEV *R = SE->getSCEV(OrigCond->getOperand(1)); + R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1)); + if (R != BackedgeTakenCount) { + const SCEV *L = SE->getSCEV(OrigCond->getOperand(0)); + L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1)); + if (L != BackedgeTakenCount) + return 0; + } + } + // If the exiting block is not the same as the backedge block, we must compare // against the preincremented value, otherwise we prefer to compare against // the post-incremented value. @@ -142,12 +162,12 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, // Add one to the "backedge-taken" count to get the trip count. // If this addition may overflow, we have to be more pessimistic and // cast the induction variable before doing the add. - const SCEV *Zero = SE->getIntegerSCEV(0, BackedgeTakenCount->getType()); + const SCEV *Zero = SE->getConstant(BackedgeTakenCount->getType(), 0); const SCEV *N = SE->getAddExpr(BackedgeTakenCount, - SE->getIntegerSCEV(1, BackedgeTakenCount->getType())); + SE->getConstant(BackedgeTakenCount->getType(), 1)); if ((isa<SCEVConstant>(N) && !N->isZero()) || - SE->isLoopGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) { + SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) { // No overflow. Cast the sum. RHS = SE->getTruncateOrZeroExtend(N, IndVar->getType()); } else { @@ -155,7 +175,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount, IndVar->getType()); RHS = SE->getAddExpr(RHS, - SE->getIntegerSCEV(1, IndVar->getType())); + SE->getConstant(IndVar->getType(), 1)); } // The BackedgeTaken expression contains the number of times that the @@ -336,6 +356,116 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) { SE->forgetLoop(L); } +void IndVarSimplify::EliminateIVComparisons() { + SmallVector<WeakVH, 16> DeadInsts; + + // Look for ICmp users. + for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E; ++I) { + IVStrideUse &UI = *I; + ICmpInst *ICmp = dyn_cast<ICmpInst>(UI.getUser()); + if (!ICmp) continue; + + bool Swapped = UI.getOperandValToReplace() == ICmp->getOperand(1); + ICmpInst::Predicate Pred = ICmp->getPredicate(); + if (Swapped) Pred = ICmpInst::getSwappedPredicate(Pred); + + // Get the SCEVs for the ICmp operands. + const SCEV *S = IU->getReplacementExpr(UI); + const SCEV *X = SE->getSCEV(ICmp->getOperand(!Swapped)); + + // Simplify unnecessary loops away. + const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent()); + S = SE->getSCEVAtScope(S, ICmpLoop); + X = SE->getSCEVAtScope(X, ICmpLoop); + + // If the condition is always true or always false, replace it with + // a constant value. + if (SE->isKnownPredicate(Pred, S, X)) + ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext())); + else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X)) + ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext())); + else + continue; + + DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); + DeadInsts.push_back(ICmp); + } + + // Now that we're done iterating through lists, clean up any instructions + // which are now dead. + while (!DeadInsts.empty()) + if (Instruction *Inst = + dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val())) + RecursivelyDeleteTriviallyDeadInstructions(Inst); +} + +void IndVarSimplify::EliminateIVRemainders() { + SmallVector<WeakVH, 16> DeadInsts; + + // Look for SRem and URem users. + for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E; ++I) { + IVStrideUse &UI = *I; + BinaryOperator *Rem = dyn_cast<BinaryOperator>(UI.getUser()); + if (!Rem) continue; + + bool isSigned = Rem->getOpcode() == Instruction::SRem; + if (!isSigned && Rem->getOpcode() != Instruction::URem) + continue; + + // We're only interested in the case where we know something about + // the numerator. + if (UI.getOperandValToReplace() != Rem->getOperand(0)) + continue; + + // Get the SCEVs for the ICmp operands. + const SCEV *S = SE->getSCEV(Rem->getOperand(0)); + const SCEV *X = SE->getSCEV(Rem->getOperand(1)); + + // Simplify unnecessary loops away. + const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent()); + S = SE->getSCEVAtScope(S, ICmpLoop); + X = SE->getSCEVAtScope(X, ICmpLoop); + + // i % n --> i if i is in [0,n). + if ((!isSigned || SE->isKnownNonNegative(S)) && + SE->isKnownPredicate(isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, + S, X)) + Rem->replaceAllUsesWith(Rem->getOperand(0)); + else { + // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n). + const SCEV *LessOne = + SE->getMinusSCEV(S, SE->getConstant(S->getType(), 1)); + if ((!isSigned || SE->isKnownNonNegative(LessOne)) && + SE->isKnownPredicate(isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, + LessOne, X)) { + ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ, + Rem->getOperand(0), Rem->getOperand(1), + "tmp"); + SelectInst *Sel = + SelectInst::Create(ICmp, + ConstantInt::get(Rem->getType(), 0), + Rem->getOperand(0), "tmp", Rem); + Rem->replaceAllUsesWith(Sel); + } else + continue; + } + + // Inform IVUsers about the new users. + if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0))) + IU->AddUsersIfInteresting(I); + + DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); + DeadInsts.push_back(Rem); + } + + // Now that we're done iterating through lists, clean up any instructions + // which are now dead. + while (!DeadInsts.empty()) + if (Instruction *Inst = + dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val())) + RecursivelyDeleteTriviallyDeadInstructions(Inst); +} + bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { IU = &getAnalysis<IVUsers>(); LI = &getAnalysis<LoopInfo>(); @@ -362,6 +492,12 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount)) RewriteLoopExitValues(L, Rewriter); + // Simplify ICmp IV users. + EliminateIVComparisons(); + + // Simplify SRem and URem IV users. + EliminateIVRemainders(); + // Compute the type of the largest recurrence expression, and decide whether // a canonical induction variable should be inserted. const Type *LargestType = 0; @@ -454,6 +590,46 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { return Changed; } +// FIXME: It is an extremely bad idea to indvar substitute anything more +// complex than affine induction variables. Doing so will put expensive +// polynomial evaluations inside of the loop, and the str reduction pass +// currently can only reduce affine polynomials. For now just disable +// indvar subst on anything more complex than an affine addrec, unless +// it can be expanded to a trivial value. +static bool isSafe(const SCEV *S, const Loop *L) { + // Loop-invariant values are safe. + if (S->isLoopInvariant(L)) return true; + + // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how + // to transform them into efficient code. + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) + return AR->isAffine(); + + // An add is safe it all its operands are safe. + if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) { + for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(), + E = Commutative->op_end(); I != E; ++I) + if (!isSafe(*I, L)) return false; + return true; + } + + // A cast is safe if its operand is. + if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) + return isSafe(C->getOperand(), L); + + // A udiv is safe if its operands are. + if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S)) + return isSafe(UD->getLHS(), L) && + isSafe(UD->getRHS(), L); + + // SCEVUnknown is always safe. + if (isa<SCEVUnknown>(S)) + return true; + + // Nothing else is safe. + return false; +} + void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { SmallVector<WeakVH, 16> DeadInsts; @@ -465,7 +641,6 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { // the need for the code evaluation methods to insert induction variables // of different sizes. for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) { - const SCEV *Stride = UI->getStride(); Value *Op = UI->getOperandValToReplace(); const Type *UseTy = Op->getType(); Instruction *User = UI->getUser(); @@ -486,7 +661,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { // currently can only reduce affine polynomials. For now just disable // indvar subst on anything more complex than an affine addrec, unless // it can be expanded to a trivial value. - if (!AR->isLoopInvariant(L) && !Stride->isLoopInvariant(L)) + if (!isSafe(AR, L)) continue; // Determine the insertion point for this user. By default, insert diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index a6489ec..df05b71 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -670,8 +670,10 @@ bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB, Value *OldCond = DestBI->getCondition(); DestBI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()), BranchDir)); - ConstantFoldTerminator(BB); + // Delete dead instructions before we fold the branch. Folding the branch + // can eliminate edges from the CFG which can end up deleting OldCond. RecursivelyDeleteTriviallyDeadInstructions(OldCond); + ConstantFoldTerminator(BB); return true; } diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index d7ace342..7347395 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -683,16 +683,18 @@ void LICM::PromoteValuesInLoop() { // to LI as we are loading or storing. Since we know that the value is // stored in this loop, this will always succeed. for (Value::use_iterator UI = Ptr->use_begin(), E = Ptr->use_end(); - UI != E; ++UI) - if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) { + UI != E; ++UI) { + User *U = *UI; + if (LoadInst *LI = dyn_cast<LoadInst>(U)) { LoadValue = LI; break; - } else if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) { + } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) { if (SI->getOperand(1) == Ptr) { LoadValue = SI->getOperand(0); break; } } + } assert(LoadValue && "No store through the pointer found!"); PointerValueNumbers.push_back(LoadValue); // Remember this for later. } diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp index 16d3f2f..101ff5b 100644 --- a/lib/Transforms/Scalar/LoopIndexSplit.cpp +++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp @@ -948,6 +948,25 @@ bool LoopIndexSplit::splitLoop() { if (!IVBasedValues.count(SplitCondition->getOperand(!SVOpNum))) return false; + // Check for side effects. + for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); + I != E; ++I) { + BasicBlock *BB = *I; + + assert(DT->dominates(Header, BB)); + if (DT->properlyDominates(SplitCondition->getParent(), BB)) + continue; + + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); + BI != BE; ++BI) { + Instruction *Inst = BI; + + if (!Inst->isSafeToSpeculativelyExecute() && !isa<PHINode>(Inst) + && !isa<BranchInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst)) + return false; + } + } + // Normalize loop conditions so that it is easier to calculate new loop // bounds. if (IVisGT(*ExitCondition) || IVisGE(*ExitCondition)) { diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 625a75d..cf3d16f 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -221,7 +221,7 @@ static void DoInitialMatch(const SCEV *S, Loop *L, if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) if (!AR->getStart()->isZero()) { DoInitialMatch(AR->getStart(), L, Good, Bad, SE, DT); - DoInitialMatch(SE.getAddRecExpr(SE.getIntegerSCEV(0, AR->getType()), + DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0), AR->getStepRecurrence(SE), AR->getLoop()), L, Good, Bad, SE, DT); @@ -262,11 +262,15 @@ void Formula::InitialMatch(const SCEV *S, Loop *L, SmallVector<const SCEV *, 4> Bad; DoInitialMatch(S, L, Good, Bad, SE, DT); if (!Good.empty()) { - BaseRegs.push_back(SE.getAddExpr(Good)); + const SCEV *Sum = SE.getAddExpr(Good); + if (!Sum->isZero()) + BaseRegs.push_back(Sum); AM.HasBaseReg = true; } if (!Bad.empty()) { - BaseRegs.push_back(SE.getAddExpr(Bad)); + const SCEV *Sum = SE.getAddExpr(Bad); + if (!Sum->isZero()) + BaseRegs.push_back(Sum); AM.HasBaseReg = true; } } @@ -375,7 +379,7 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, bool IgnoreSignificantBits = false) { // Handle the trivial case, which works for any SCEV type. if (LHS == RHS) - return SE.getIntegerSCEV(1, LHS->getType()); + return SE.getConstant(LHS->getType(), 1); // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do some // folding. @@ -450,7 +454,7 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) { if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) { if (C->getValue()->getValue().getMinSignedBits() <= 64) { - S = SE.getIntegerSCEV(0, C->getType()); + S = SE.getConstant(C->getType(), 0); return C->getValue()->getSExtValue(); } } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { @@ -473,7 +477,7 @@ static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) { static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) { if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) { - S = SE.getIntegerSCEV(0, GV->getType()); + S = SE.getConstant(GV->getType(), 0); return GV; } } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { @@ -781,10 +785,10 @@ struct LSRFixup { /// will be replaced. Value *OperandValToReplace; - /// PostIncLoop - If this user is to use the post-incremented value of an + /// PostIncLoops - If this user is to use the post-incremented value of an /// induction variable, this variable is non-null and holds the loop /// associated with the induction variable. - const Loop *PostIncLoop; + PostIncLoopSet PostIncLoops; /// LUIdx - The index of the LSRUse describing the expression which /// this fixup needs, minus an offset (below). @@ -795,6 +799,8 @@ struct LSRFixup { /// offsets, for example in an unrolled loop. int64_t Offset; + bool isUseFullyOutsideLoop(const Loop *L) const; + LSRFixup(); void print(raw_ostream &OS) const; @@ -804,9 +810,24 @@ struct LSRFixup { } LSRFixup::LSRFixup() - : UserInst(0), OperandValToReplace(0), PostIncLoop(0), + : UserInst(0), OperandValToReplace(0), LUIdx(~size_t(0)), Offset(0) {} +/// isUseFullyOutsideLoop - Test whether this fixup always uses its +/// value outside of the given loop. +bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const { + // PHI nodes use their value in their incoming blocks. + if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == OperandValToReplace && + L->contains(PN->getIncomingBlock(i))) + return false; + return true; + } + + return !L->contains(UserInst); +} + void LSRFixup::print(raw_ostream &OS) const { OS << "UserInst="; // Store is common and interesting enough to be worth special-casing. @@ -821,9 +842,10 @@ void LSRFixup::print(raw_ostream &OS) const { OS << ", OperandValToReplace="; WriteAsOperand(OS, OperandValToReplace, /*PrintType=*/false); - if (PostIncLoop) { + for (PostIncLoopSet::const_iterator I = PostIncLoops.begin(), + E = PostIncLoops.end(); I != E; ++I) { OS << ", PostIncLoop="; - WriteAsOperand(OS, PostIncLoop->getHeader(), /*PrintType=*/false); + WriteAsOperand(OS, (*I)->getHeader(), /*PrintType=*/false); } if (LUIdx != ~size_t(0)) @@ -1135,6 +1157,7 @@ class LSRInstance { IVUsers &IU; ScalarEvolution &SE; DominatorTree &DT; + LoopInfo &LI; const TargetLowering *const TLI; Loop *const L; bool Changed; @@ -1214,6 +1237,13 @@ public: DenseSet<const SCEV *> &VisitedRegs) const; void Solve(SmallVectorImpl<const Formula *> &Solution) const; + BasicBlock::iterator + HoistInsertPosition(BasicBlock::iterator IP, + const SmallVectorImpl<Instruction *> &Inputs) const; + BasicBlock::iterator AdjustInsertPositionForExpand(BasicBlock::iterator IP, + const LSRFixup &LF, + const LSRUse &LU) const; + Value *Expand(const LSRFixup &LF, const Formula &F, BasicBlock::iterator IP, @@ -1427,16 +1457,30 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) { const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L); if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) return Cond; - const SCEV *One = SE.getIntegerSCEV(1, BackedgeTakenCount->getType()); + const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1); // Add one to the backedge-taken count to get the trip count. const SCEV *IterationCount = SE.getAddExpr(BackedgeTakenCount, One); - - // Check for a max calculation that matches the pattern. - if (!isa<SCEVSMaxExpr>(IterationCount) && !isa<SCEVUMaxExpr>(IterationCount)) + if (IterationCount != SE.getSCEV(Sel)) return Cond; + + // Check for a max calculation that matches the pattern. There's no check + // for ICMP_ULE here because the comparison would be with zero, which + // isn't interesting. + CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; + const SCEVNAryExpr *Max = 0; + if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) { + Pred = ICmpInst::ICMP_SLE; + Max = S; + } else if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) { + Pred = ICmpInst::ICMP_SLT; + Max = S; + } else if (const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) { + Pred = ICmpInst::ICMP_ULT; + Max = U; + } else { + // No match; bail. return Cond; - const SCEVNAryExpr *Max = cast<SCEVNAryExpr>(IterationCount); - if (Max != SE.getSCEV(Sel)) return Cond; + } // To handle a max with more than two operands, this optimization would // require additional checking and setup. @@ -1445,7 +1489,13 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) { const SCEV *MaxLHS = Max->getOperand(0); const SCEV *MaxRHS = Max->getOperand(1); - if (!MaxLHS || MaxLHS != One) return Cond; + + // ScalarEvolution canonicalizes constants to the left. For < and >, look + // for a comparison with 1. For <= and >=, a comparison with zero. + if (!MaxLHS || + (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->isZero() : (MaxLHS != One))) + return Cond; + // Check the relevant induction variable for conformance to // the pattern. const SCEV *IV = SE.getSCEV(Cond->getOperand(0)); @@ -1461,16 +1511,29 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) { // Check the right operand of the select, and remember it, as it will // be used in the new comparison instruction. Value *NewRHS = 0; - if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS) + if (ICmpInst::isTrueWhenEqual(Pred)) { + // Look for n+1, and grab n. + if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1))) + if (isa<ConstantInt>(BO->getOperand(1)) && + cast<ConstantInt>(BO->getOperand(1))->isOne() && + SE.getSCEV(BO->getOperand(0)) == MaxRHS) + NewRHS = BO->getOperand(0); + if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2))) + if (isa<ConstantInt>(BO->getOperand(1)) && + cast<ConstantInt>(BO->getOperand(1))->isOne() && + SE.getSCEV(BO->getOperand(0)) == MaxRHS) + NewRHS = BO->getOperand(0); + if (!NewRHS) + return Cond; + } else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS) NewRHS = Sel->getOperand(1); else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS) NewRHS = Sel->getOperand(2); - if (!NewRHS) return Cond; + else + llvm_unreachable("Max doesn't match expected pattern!"); // Determine the new comparison opcode. It may be signed or unsigned, // and the original comparison may be either equality or inequality. - CmpInst::Predicate Pred = - isa<SCEVSMaxExpr>(Max) ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT; if (Cond->getPredicate() == CmpInst::ICMP_EQ) Pred = CmpInst::getInversePredicate(Pred); @@ -1545,8 +1608,9 @@ LSRInstance::OptimizeLoopTermCond() { !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) { // Conservatively assume there may be reuse if the quotient of their // strides could be a legal scale. - const SCEV *A = CondUse->getStride(); - const SCEV *B = UI->getStride(); + const SCEV *A = IU.getStride(*CondUse, L); + const SCEV *B = IU.getStride(*UI, L); + if (!A || !B) continue; if (SE.getTypeSizeInBits(A->getType()) != SE.getTypeSizeInBits(B->getType())) { if (SE.getTypeSizeInBits(A->getType()) > @@ -1598,8 +1662,7 @@ LSRInstance::OptimizeLoopTermCond() { ExitingBlock->getInstList().insert(TermBr, Cond); // Clone the IVUse, as the old use still exists! - CondUse = &IU.AddUser(CondUse->getStride(), CondUse->getOffset(), - Cond, CondUse->getOperandValToReplace()); + CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace()); TermBr->replaceUsesOfWith(OldCond, Cond); } } @@ -1607,9 +1670,7 @@ LSRInstance::OptimizeLoopTermCond() { // If we get to here, we know that we can transform the setcc instruction to // use the post-incremented version of the IV, allowing us to coalesce the // live ranges for the IV correctly. - CondUse->setOffset(SE.getMinusSCEV(CondUse->getOffset(), - CondUse->getStride())); - CondUse->setIsUseOfPostIncrementedValue(true); + CondUse->transformToPostInc(L); Changed = true; PostIncs.insert(Cond); @@ -1717,19 +1778,24 @@ void LSRInstance::CollectInterestingTypesAndFactors() { SmallSetVector<const SCEV *, 4> Strides; // Collect interesting types and strides. + SmallVector<const SCEV *, 4> Worklist; for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) { - const SCEV *Stride = UI->getStride(); + const SCEV *Expr = IU.getExpr(*UI); // Collect interesting types. - Types.insert(SE.getEffectiveSCEVType(Stride->getType())); - - // Add the stride for this loop. - Strides.insert(Stride); - - // Add strides for other mentioned loops. - for (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(UI->getOffset()); - AR; AR = dyn_cast<SCEVAddRecExpr>(AR->getStart())) - Strides.insert(AR->getStepRecurrence(SE)); + Types.insert(SE.getEffectiveSCEVType(Expr->getType())); + + // Add strides for mentioned loops. + Worklist.push_back(Expr); + do { + const SCEV *S = Worklist.pop_back_val(); + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + Strides.insert(AR->getStepRecurrence(SE)); + Worklist.push_back(AR->getStart()); + } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + Worklist.insert(Worklist.end(), Add->op_begin(), Add->op_end()); + } + } while (!Worklist.empty()); } // Compute interesting factors from the set of interesting strides. @@ -1776,8 +1842,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { LSRFixup &LF = getNewFixup(); LF.UserInst = UI->getUser(); LF.OperandValToReplace = UI->getOperandValToReplace(); - if (UI->isUseOfPostIncrementedValue()) - LF.PostIncLoop = L; + LF.PostIncLoops = UI->getPostIncLoops(); LSRUse::KindType Kind = LSRUse::Basic; const Type *AccessTy = 0; @@ -1786,7 +1851,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { AccessTy = getAccessType(LF.UserInst); } - const SCEV *S = IU.getCanonicalExpr(*UI); + const SCEV *S = IU.getExpr(*UI); // Equality (== and !=) ICmps are special. We can rewrite (i == N) as // (N - i == 0), and this allows (N - i) to be the expression that we work @@ -1824,7 +1889,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { LF.LUIdx = P.first; LF.Offset = P.second; LSRUse &LU = Uses[LF.LUIdx]; - LU.AllFixupsOutsideLoop &= !L->contains(LF.UserInst); + LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L); // If this is the first use of this LSRUse, give it a formula. if (LU.Formulae.empty()) { @@ -1918,9 +1983,17 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { continue; // Ignore uses which are part of other SCEV expressions, to avoid // analyzing them multiple times. - if (SE.isSCEVable(UserInst->getType()) && - !isa<SCEVUnknown>(SE.getSCEV(const_cast<Instruction *>(UserInst)))) - continue; + if (SE.isSCEVable(UserInst->getType())) { + const SCEV *UserS = SE.getSCEV(const_cast<Instruction *>(UserInst)); + // If the user is a no-op, look through to its uses. + if (!isa<SCEVUnknown>(UserS)) + continue; + if (UserS == U) { + Worklist.push_back( + SE.getUnknown(const_cast<Instruction *>(UserInst))); + continue; + } + } // Ignore icmp instructions which are already being analyzed. if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) { unsigned OtherIdx = !UI.getOperandNo(); @@ -1936,7 +2009,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { LF.LUIdx = P.first; LF.Offset = P.second; LSRUse &LU = Uses[LF.LUIdx]; - LU.AllFixupsOutsideLoop &= L->contains(LF.UserInst); + LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L); InsertSupplementalFormula(U, LU, LF.LUIdx); CountRegisters(LU.Formulae.back(), Uses.size() - 1); break; @@ -1959,7 +2032,7 @@ static void CollectSubexprs(const SCEV *S, const SCEVConstant *C, } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { // Split a non-zero base out of an addrec. if (!AR->getStart()->isZero()) { - CollectSubexprs(SE.getAddRecExpr(SE.getIntegerSCEV(0, AR->getType()), + CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0), AR->getStepRecurrence(SE), AR->getLoop()), C, Ops, SE); CollectSubexprs(AR->getStart(), C, Ops, SE); @@ -2020,8 +2093,11 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, LU.Kind, LU.AccessTy, TLI, SE)) continue; + const SCEV *InnerSum = SE.getAddExpr(InnerAddOps); + if (InnerSum->isZero()) + continue; Formula F = Base; - F.BaseRegs[i] = SE.getAddExpr(InnerAddOps); + F.BaseRegs[i] = InnerSum; F.BaseRegs.push_back(*J); if (InsertFormula(LU, LUIdx, F)) // If that formula hadn't been seen before, recurse to find more like @@ -2102,7 +2178,7 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs - *I; if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind, LU.AccessTy, TLI)) { - F.BaseRegs[i] = SE.getAddExpr(G, SE.getIntegerSCEV(*I, G->getType())); + F.BaseRegs[i] = SE.getAddExpr(G, SE.getConstant(G->getType(), *I)); (void)InsertFormula(LU, LUIdx, F); } @@ -2165,7 +2241,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, // Compensate for the use having MinOffset built into it. F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Offset - LU.MinOffset; - const SCEV *FactorS = SE.getIntegerSCEV(Factor, IntTy); + const SCEV *FactorS = SE.getConstant(IntTy, Factor); // Check that multiplying with each base register doesn't overflow. for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) { @@ -2227,7 +2303,7 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i])) { - const SCEV *FactorS = SE.getIntegerSCEV(Factor, IntTy); + const SCEV *FactorS = SE.getConstant(IntTy, Factor); if (FactorS->isZero()) continue; // Divide out the factor, ignoring high bits, since we'll be @@ -2426,7 +2502,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { if (C->getValue()->getValue().isNegative() != (NewF.AM.BaseOffs < 0) && (C->getValue()->getValue().abs() * APInt(BitWidth, F.AM.Scale)) - .ule(APInt(BitWidth, NewF.AM.BaseOffs).abs())) + .ule(abs64(NewF.AM.BaseOffs))) continue; // OK, looks good. @@ -2454,7 +2530,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { if (C->getValue()->getValue().isNegative() != (NewF.AM.BaseOffs < 0) && C->getValue()->getValue().abs() - .ule(APInt(BitWidth, NewF.AM.BaseOffs).abs())) + .ule(abs64(NewF.AM.BaseOffs))) goto skip_formula; // Ok, looks good. @@ -2776,37 +2852,33 @@ static BasicBlock *getImmediateDominator(BasicBlock *BB, DominatorTree &DT) { return Node->getBlock(); } -Value *LSRInstance::Expand(const LSRFixup &LF, - const Formula &F, - BasicBlock::iterator IP, - SCEVExpander &Rewriter, - SmallVectorImpl<WeakVH> &DeadInsts) const { - const LSRUse &LU = Uses[LF.LUIdx]; - - // Then, collect some instructions which we will remain dominated by when - // expanding the replacement. These must be dominated by any operands that - // will be required in the expansion. - SmallVector<Instruction *, 4> Inputs; - if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace)) - Inputs.push_back(I); - if (LU.Kind == LSRUse::ICmpZero) - if (Instruction *I = - dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1))) - Inputs.push_back(I); - if (LF.PostIncLoop) { - if (!L->contains(LF.UserInst)) - Inputs.push_back(L->getLoopLatch()->getTerminator()); - else - Inputs.push_back(IVIncInsertPos); - } - - // Then, climb up the immediate dominator tree as far as we can go while - // still being dominated by the input positions. +/// HoistInsertPosition - Helper for AdjustInsertPositionForExpand. Climb up +/// the dominator tree far as we can go while still being dominated by the +/// input positions. This helps canonicalize the insert position, which +/// encourages sharing. +BasicBlock::iterator +LSRInstance::HoistInsertPosition(BasicBlock::iterator IP, + const SmallVectorImpl<Instruction *> &Inputs) + const { for (;;) { + const Loop *IPLoop = LI.getLoopFor(IP->getParent()); + unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0; + + BasicBlock *IDom; + for (BasicBlock *Rung = IP->getParent(); ; Rung = IDom) { + IDom = getImmediateDominator(Rung, DT); + if (!IDom) return IP; + + // Don't climb into a loop though. + const Loop *IDomLoop = LI.getLoopFor(IDom); + unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0; + if (IDomDepth <= IPLoopDepth && + (IDomDepth != IPLoopDepth || IDomLoop == IPLoop)) + break; + } + bool AllDominate = true; Instruction *BetterPos = 0; - BasicBlock *IDom = getImmediateDominator(IP->getParent(), DT); - if (!IDom) break; Instruction *Tentative = IDom->getTerminator(); for (SmallVectorImpl<Instruction *>::const_iterator I = Inputs.begin(), E = Inputs.end(); I != E; ++I) { @@ -2815,6 +2887,8 @@ Value *LSRInstance::Expand(const LSRFixup &LF, AllDominate = false; break; } + // Attempt to find an insert position in the middle of the block, + // instead of at the end, so that it can be used for other expansions. if (IDom == Inst->getParent() && (!BetterPos || DT.dominates(BetterPos, Inst))) BetterPos = next(BasicBlock::iterator(Inst)); @@ -2826,12 +2900,77 @@ Value *LSRInstance::Expand(const LSRFixup &LF, else IP = Tentative; } + + return IP; +} + +/// AdjustInsertPositionForExpand - Determine an input position which will be +/// dominated by the operands and which will dominate the result. +BasicBlock::iterator +LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP, + const LSRFixup &LF, + const LSRUse &LU) const { + // Collect some instructions which must be dominated by the + // expanding replacement. These must be dominated by any operands that + // will be required in the expansion. + SmallVector<Instruction *, 4> Inputs; + if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace)) + Inputs.push_back(I); + if (LU.Kind == LSRUse::ICmpZero) + if (Instruction *I = + dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1))) + Inputs.push_back(I); + if (LF.PostIncLoops.count(L)) { + if (LF.isUseFullyOutsideLoop(L)) + Inputs.push_back(L->getLoopLatch()->getTerminator()); + else + Inputs.push_back(IVIncInsertPos); + } + // The expansion must also be dominated by the increment positions of any + // loops it for which it is using post-inc mode. + for (PostIncLoopSet::const_iterator I = LF.PostIncLoops.begin(), + E = LF.PostIncLoops.end(); I != E; ++I) { + const Loop *PIL = *I; + if (PIL == L) continue; + + // Be dominated by the loop exit. + SmallVector<BasicBlock *, 4> ExitingBlocks; + PIL->getExitingBlocks(ExitingBlocks); + if (!ExitingBlocks.empty()) { + BasicBlock *BB = ExitingBlocks[0]; + for (unsigned i = 1, e = ExitingBlocks.size(); i != e; ++i) + BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]); + Inputs.push_back(BB->getTerminator()); + } + } + + // Then, climb up the immediate dominator tree as far as we can go while + // still being dominated by the input positions. + IP = HoistInsertPosition(IP, Inputs); + + // Don't insert instructions before PHI nodes. while (isa<PHINode>(IP)) ++IP; + + // Ignore debug intrinsics. while (isa<DbgInfoIntrinsic>(IP)) ++IP; + return IP; +} + +Value *LSRInstance::Expand(const LSRFixup &LF, + const Formula &F, + BasicBlock::iterator IP, + SCEVExpander &Rewriter, + SmallVectorImpl<WeakVH> &DeadInsts) const { + const LSRUse &LU = Uses[LF.LUIdx]; + + // Determine an input position which will be dominated by the operands and + // which will dominate the result. + IP = AdjustInsertPositionForExpand(IP, LF, LU); + // Inform the Rewriter if we have a post-increment use, so that it can // perform an advantageous expansion. - Rewriter.setPostInc(LF.PostIncLoop); + Rewriter.setPostInc(LF.PostIncLoops); // This is the type that the user actually needs. const Type *OpTy = LF.OperandValToReplace->getType(); @@ -2855,24 +2994,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF, const SCEV *Reg = *I; assert(!Reg->isZero() && "Zero allocated in a base register!"); - // If we're expanding for a post-inc user for the add-rec's loop, make the - // post-inc adjustment. - const SCEV *Start = Reg; - while (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Start)) { - if (AR->getLoop() == LF.PostIncLoop) { - Reg = SE.getAddExpr(Reg, AR->getStepRecurrence(SE)); - // If the user is inside the loop, insert the code after the increment - // so that it is dominated by its operand. If the original insert point - // was already dominated by the increment, keep it, because there may - // be loop-variant operands that need to be respected also. - if (L->contains(LF.UserInst) && !DT.dominates(IVIncInsertPos, IP)) { - IP = IVIncInsertPos; - while (isa<DbgInfoIntrinsic>(IP)) ++IP; - } - break; - } - Start = AR->getStart(); - } + // If we're expanding for a post-inc user, make the post-inc adjustment. + PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops); + Reg = TransformForPostIncUse(Denormalize, Reg, + LF.UserInst, LF.OperandValToReplace, + Loops, SE, DT); Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP))); } @@ -2889,11 +3015,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF, if (F.AM.Scale != 0) { const SCEV *ScaledS = F.ScaledReg; - // If we're expanding for a post-inc user for the add-rec's loop, make the - // post-inc adjustment. - if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ScaledS)) - if (AR->getLoop() == LF.PostIncLoop) - ScaledS = SE.getAddExpr(ScaledS, AR->getStepRecurrence(SE)); + // If we're expanding for a post-inc user, make the post-inc adjustment. + PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops); + ScaledS = TransformForPostIncUse(Denormalize, ScaledS, + LF.UserInst, LF.OperandValToReplace, + Loops, SE, DT); if (LU.Kind == LSRUse::ICmpZero) { // An interesting way of "folding" with an icmp is to use a negated @@ -2907,8 +3033,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, // which is expected to be matched as part of the address. ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP)); ScaledS = SE.getMulExpr(ScaledS, - SE.getIntegerSCEV(F.AM.Scale, - ScaledS->getType())); + SE.getConstant(ScaledS->getType(), F.AM.Scale)); Ops.push_back(ScaledS); // Flush the operand list to suppress SCEVExpander hoisting. @@ -2949,12 +3074,12 @@ Value *LSRInstance::Expand(const LSRFixup &LF, // Emit instructions summing all the operands. const SCEV *FullS = Ops.empty() ? - SE.getIntegerSCEV(0, IntTy) : + SE.getConstant(IntTy, 0) : SE.getAddExpr(Ops); Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP); // We're done expanding now, so reset the rewriter. - Rewriter.setPostInc(0); + Rewriter.clearPostInc(); // An ICmpZero Formula represents an ICmp which we're handling as a // comparison against zero. Now that we've expanded an expression for that @@ -3118,6 +3243,7 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P) : IU(P->getAnalysis<IVUsers>()), SE(P->getAnalysis<ScalarEvolution>()), DT(P->getAnalysis<DominatorTree>()), + LI(P->getAnalysis<LoopInfo>()), TLI(tli), L(l), Changed(false), IVIncInsertPos(0) { // If LoopSimplify form is not available, stay out of trouble. @@ -3274,9 +3400,10 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const { // We split critical edges, so we change the CFG. However, we do update // many analyses if they are around. AU.addPreservedID(LoopSimplifyID); - AU.addPreserved<LoopInfo>(); AU.addPreserved("domfrontier"); + AU.addRequired<LoopInfo>(); + AU.addPreserved<LoopInfo>(); AU.addRequiredID(LoopSimplifyID); AU.addRequired<DominatorTree>(); AU.addPreserved<DominatorTree>(); diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 3918738..ae7bf40 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -34,6 +34,7 @@ #include "llvm/Instructions.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/Dominators.h" @@ -677,15 +678,22 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, LoopProcessWorklist.push_back(NewLoop); redoLoop = true; + // Keep a WeakVH holding onto LIC. If the first call to RewriteLoopBody + // deletes the instruction (for example by simplifying a PHI that feeds into + // the condition that we're unswitching on), we don't rewrite the second + // iteration. + WeakVH LICHandle(LIC); + // Now we rewrite the original code to know that the condition is true and the // new code to know that the condition is false. RewriteLoopBodyWithConditionConstant(L, LIC, Val, false); - - // It's possible that simplifying one loop could cause the other to be - // deleted. If so, don't simplify it. - if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop) - RewriteLoopBodyWithConditionConstant(NewLoop, LIC, Val, true); + // It's possible that simplifying one loop could cause the other to be + // changed to another value or a constant. If its a constant, don't simplify + // it. + if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop && + LICHandle && !isa<Constant>(LICHandle)) + RewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, true); } /// RemoveFromWorklist - Remove all instances of I from the worklist vector @@ -981,45 +989,16 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { continue; } + // See if instruction simplification can hack this up. This is common for + // things like "select false, X, Y" after unswitching made the condition be + // 'false'. + if (Value *V = SimplifyInstruction(I)) { + ReplaceUsesOfWith(I, V, Worklist, L, LPM); + continue; + } + // Special case hacks that appear commonly in unswitched code. - switch (I->getOpcode()) { - case Instruction::Select: - if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(0))) { - ReplaceUsesOfWith(I, I->getOperand(!CB->getZExtValue()+1), Worklist, L, - LPM); - continue; - } - break; - case Instruction::And: - if (isa<ConstantInt>(I->getOperand(0)) && - // constant -> RHS - I->getOperand(0)->getType()->isIntegerTy(1)) - cast<BinaryOperator>(I)->swapOperands(); - if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(1))) - if (CB->getType()->isIntegerTy(1)) { - if (CB->isOne()) // X & 1 -> X - ReplaceUsesOfWith(I, I->getOperand(0), Worklist, L, LPM); - else // X & 0 -> 0 - ReplaceUsesOfWith(I, I->getOperand(1), Worklist, L, LPM); - continue; - } - break; - case Instruction::Or: - if (isa<ConstantInt>(I->getOperand(0)) && - // constant -> RHS - I->getOperand(0)->getType()->isIntegerTy(1)) - cast<BinaryOperator>(I)->swapOperands(); - if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(1))) - if (CB->getType()->isIntegerTy(1)) { - if (CB->isOne()) // X | 1 -> 1 - ReplaceUsesOfWith(I, I->getOperand(1), Worklist, L, LPM); - else // X | 0 -> X - ReplaceUsesOfWith(I, I->getOperand(0), Worklist, L, LPM); - continue; - } - break; - case Instruction::Br: { - BranchInst *BI = cast<BranchInst>(I); + if (BranchInst *BI = dyn_cast<BranchInst>(I)) { if (BI->isUnconditional()) { // If BI's parent is the only pred of the successor, fold the two blocks // together. @@ -1052,13 +1031,13 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { LPM->deleteSimpleAnalysisValue(Succ, L); Succ->eraseFromParent(); ++NumSimplify; - break; + continue; } if (ConstantInt *CB = dyn_cast<ConstantInt>(BI->getCondition())){ // Conditional branch. Turn it into an unconditional branch, then // remove dead blocks. - break; // FIXME: Enable. + continue; // FIXME: Enable. DEBUG(dbgs() << "Folded branch: " << *BI); BasicBlock *DeadSucc = BI->getSuccessor(CB->getZExtValue()); @@ -1072,8 +1051,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { RemoveBlockIfDead(DeadSucc, Worklist, L); } - break; - } + continue; } } } diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 3b305ae..3611b8e 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -744,7 +744,7 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) { const Type *ArgTys[3] = { M->getRawDest()->getType(), M->getRawSource()->getType(), M->getLength()->getType() }; - M->setOperand(0,Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, ArgTys, 3)); + M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, ArgTys, 3)); // MemDep may have over conservative information about this instruction, just // conservatively flush it from the cache. diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp index 7a6eec3..13222ac 100644 --- a/lib/Transforms/Scalar/Reg2Mem.cpp +++ b/lib/Transforms/Scalar/Reg2Mem.cpp @@ -46,10 +46,11 @@ namespace { bool valueEscapes(const Instruction *Inst) const { const BasicBlock *BB = Inst->getParent(); for (Value::const_use_iterator UI = Inst->use_begin(),E = Inst->use_end(); - UI != E; ++UI) - if (cast<Instruction>(*UI)->getParent() != BB || - isa<PHINode>(*UI)) + UI != E; ++UI) { + const Instruction *I = cast<Instruction>(*UI); + if (I->getParent() != BB || isa<PHINode>(I)) return true; + } return false; } diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 4f09bee..907ece8 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -317,7 +317,10 @@ private: void markConstant(LatticeVal &IV, Value *V, Constant *C) { if (!IV.markConstant(C)) return; DEBUG(dbgs() << "markConstant: " << *C << ": " << *V << '\n'); - InstWorkList.push_back(V); + if (IV.isOverdefined()) + OverdefinedInstWorkList.push_back(V); + else + InstWorkList.push_back(V); } void markConstant(Value *V, Constant *C) { @@ -327,9 +330,13 @@ private: void markForcedConstant(Value *V, Constant *C) { assert(!V->getType()->isStructTy() && "Should use other method"); - ValueState[V].markForcedConstant(C); + LatticeVal &IV = ValueState[V]; + IV.markForcedConstant(C); DEBUG(dbgs() << "markForcedConstant: " << *C << ": " << *V << '\n'); - InstWorkList.push_back(V); + if (IV.isOverdefined()) + OverdefinedInstWorkList.push_back(V); + else + InstWorkList.push_back(V); } @@ -1445,6 +1452,8 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { // After a zero extend, we know the top part is zero. SExt doesn't have // to be handled here, because we don't know whether the top part is 1's // or 0's. + case Instruction::SIToFP: // some FP values are not possible, just use 0. + case Instruction::UIToFP: // some FP values are not possible, just use 0. markForcedConstant(I, Constant::getNullValue(ITy)); return true; case Instruction::Mul: diff --git a/lib/Transforms/Scalar/SCCVN.cpp b/lib/Transforms/Scalar/SCCVN.cpp deleted file mode 100644 index 9685a29..0000000 --- a/lib/Transforms/Scalar/SCCVN.cpp +++ /dev/null @@ -1,716 +0,0 @@ -//===- SCCVN.cpp - Eliminate redundant values -----------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass performs global value numbering to eliminate fully redundant -// instructions. This is based on the paper "SCC-based Value Numbering" -// by Cooper, et al. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "sccvn" -#include "llvm/Transforms/Scalar.h" -#include "llvm/BasicBlock.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/Operator.h" -#include "llvm/Value.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/SparseBitVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/Dominators.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" -using namespace llvm; - -STATISTIC(NumSCCVNInstr, "Number of instructions deleted by SCCVN"); -STATISTIC(NumSCCVNPhi, "Number of phis deleted by SCCVN"); - -//===----------------------------------------------------------------------===// -// ValueTable Class -//===----------------------------------------------------------------------===// - -/// This class holds the mapping between values and value numbers. It is used -/// as an efficient mechanism to determine the expression-wise equivalence of -/// two values. -namespace { - struct Expression { - enum ExpressionOpcode { ADD, FADD, SUB, FSUB, MUL, FMUL, - UDIV, SDIV, FDIV, UREM, SREM, - FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ, - ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE, - ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ, - FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE, - FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE, - FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT, - SHUFFLE, SELECT, TRUNC, ZEXT, SEXT, FPTOUI, - FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT, - PTRTOINT, INTTOPTR, BITCAST, GEP, CALL, CONSTANT, - INSERTVALUE, EXTRACTVALUE, EMPTY, TOMBSTONE }; - - ExpressionOpcode opcode; - const Type* type; - SmallVector<uint32_t, 4> varargs; - - Expression() { } - Expression(ExpressionOpcode o) : opcode(o) { } - - bool operator==(const Expression &other) const { - if (opcode != other.opcode) - return false; - else if (opcode == EMPTY || opcode == TOMBSTONE) - return true; - else if (type != other.type) - return false; - else { - if (varargs.size() != other.varargs.size()) - return false; - - for (size_t i = 0; i < varargs.size(); ++i) - if (varargs[i] != other.varargs[i]) - return false; - - return true; - } - } - - bool operator!=(const Expression &other) const { - return !(*this == other); - } - }; - - class ValueTable { - private: - DenseMap<Value*, uint32_t> valueNumbering; - DenseMap<Expression, uint32_t> expressionNumbering; - DenseMap<Value*, uint32_t> constantsNumbering; - - uint32_t nextValueNumber; - - Expression::ExpressionOpcode getOpcode(BinaryOperator* BO); - Expression::ExpressionOpcode getOpcode(CmpInst* C); - Expression::ExpressionOpcode getOpcode(CastInst* C); - Expression create_expression(BinaryOperator* BO); - Expression create_expression(CmpInst* C); - Expression create_expression(ShuffleVectorInst* V); - Expression create_expression(ExtractElementInst* C); - Expression create_expression(InsertElementInst* V); - Expression create_expression(SelectInst* V); - Expression create_expression(CastInst* C); - Expression create_expression(GetElementPtrInst* G); - Expression create_expression(CallInst* C); - Expression create_expression(Constant* C); - Expression create_expression(ExtractValueInst* C); - Expression create_expression(InsertValueInst* C); - public: - ValueTable() : nextValueNumber(1) { } - uint32_t computeNumber(Value *V); - uint32_t lookup(Value *V); - void add(Value *V, uint32_t num); - void clear(); - void clearExpressions(); - void erase(Value *v); - unsigned size(); - void verifyRemoved(const Value *) const; - }; -} - -namespace llvm { -template <> struct DenseMapInfo<Expression> { - static inline Expression getEmptyKey() { - return Expression(Expression::EMPTY); - } - - static inline Expression getTombstoneKey() { - return Expression(Expression::TOMBSTONE); - } - - static unsigned getHashValue(const Expression e) { - unsigned hash = e.opcode; - - hash = ((unsigned)((uintptr_t)e.type >> 4) ^ - (unsigned)((uintptr_t)e.type >> 9)); - - for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(), - E = e.varargs.end(); I != E; ++I) - hash = *I + hash * 37; - - return hash; - } - static bool isEqual(const Expression &LHS, const Expression &RHS) { - return LHS == RHS; - } -}; -template <> -struct isPodLike<Expression> { static const bool value = true; }; - -} - -//===----------------------------------------------------------------------===// -// ValueTable Internal Functions -//===----------------------------------------------------------------------===// -Expression::ExpressionOpcode ValueTable::getOpcode(BinaryOperator* BO) { - switch(BO->getOpcode()) { - default: // THIS SHOULD NEVER HAPPEN - llvm_unreachable("Binary operator with unknown opcode?"); - case Instruction::Add: return Expression::ADD; - case Instruction::FAdd: return Expression::FADD; - case Instruction::Sub: return Expression::SUB; - case Instruction::FSub: return Expression::FSUB; - case Instruction::Mul: return Expression::MUL; - case Instruction::FMul: return Expression::FMUL; - case Instruction::UDiv: return Expression::UDIV; - case Instruction::SDiv: return Expression::SDIV; - case Instruction::FDiv: return Expression::FDIV; - case Instruction::URem: return Expression::UREM; - case Instruction::SRem: return Expression::SREM; - case Instruction::FRem: return Expression::FREM; - case Instruction::Shl: return Expression::SHL; - case Instruction::LShr: return Expression::LSHR; - case Instruction::AShr: return Expression::ASHR; - case Instruction::And: return Expression::AND; - case Instruction::Or: return Expression::OR; - case Instruction::Xor: return Expression::XOR; - } -} - -Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) { - if (isa<ICmpInst>(C)) { - switch (C->getPredicate()) { - default: // THIS SHOULD NEVER HAPPEN - llvm_unreachable("Comparison with unknown predicate?"); - case ICmpInst::ICMP_EQ: return Expression::ICMPEQ; - case ICmpInst::ICMP_NE: return Expression::ICMPNE; - case ICmpInst::ICMP_UGT: return Expression::ICMPUGT; - case ICmpInst::ICMP_UGE: return Expression::ICMPUGE; - case ICmpInst::ICMP_ULT: return Expression::ICMPULT; - case ICmpInst::ICMP_ULE: return Expression::ICMPULE; - case ICmpInst::ICMP_SGT: return Expression::ICMPSGT; - case ICmpInst::ICMP_SGE: return Expression::ICMPSGE; - case ICmpInst::ICMP_SLT: return Expression::ICMPSLT; - case ICmpInst::ICMP_SLE: return Expression::ICMPSLE; - } - } else { - switch (C->getPredicate()) { - default: // THIS SHOULD NEVER HAPPEN - llvm_unreachable("Comparison with unknown predicate?"); - case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ; - case FCmpInst::FCMP_OGT: return Expression::FCMPOGT; - case FCmpInst::FCMP_OGE: return Expression::FCMPOGE; - case FCmpInst::FCMP_OLT: return Expression::FCMPOLT; - case FCmpInst::FCMP_OLE: return Expression::FCMPOLE; - case FCmpInst::FCMP_ONE: return Expression::FCMPONE; - case FCmpInst::FCMP_ORD: return Expression::FCMPORD; - case FCmpInst::FCMP_UNO: return Expression::FCMPUNO; - case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ; - case FCmpInst::FCMP_UGT: return Expression::FCMPUGT; - case FCmpInst::FCMP_UGE: return Expression::FCMPUGE; - case FCmpInst::FCMP_ULT: return Expression::FCMPULT; - case FCmpInst::FCMP_ULE: return Expression::FCMPULE; - case FCmpInst::FCMP_UNE: return Expression::FCMPUNE; - } - } -} - -Expression::ExpressionOpcode ValueTable::getOpcode(CastInst* C) { - switch(C->getOpcode()) { - default: // THIS SHOULD NEVER HAPPEN - llvm_unreachable("Cast operator with unknown opcode?"); - case Instruction::Trunc: return Expression::TRUNC; - case Instruction::ZExt: return Expression::ZEXT; - case Instruction::SExt: return Expression::SEXT; - case Instruction::FPToUI: return Expression::FPTOUI; - case Instruction::FPToSI: return Expression::FPTOSI; - case Instruction::UIToFP: return Expression::UITOFP; - case Instruction::SIToFP: return Expression::SITOFP; - case Instruction::FPTrunc: return Expression::FPTRUNC; - case Instruction::FPExt: return Expression::FPEXT; - case Instruction::PtrToInt: return Expression::PTRTOINT; - case Instruction::IntToPtr: return Expression::INTTOPTR; - case Instruction::BitCast: return Expression::BITCAST; - } -} - -Expression ValueTable::create_expression(CallInst* C) { - Expression e; - - e.type = C->getType(); - e.opcode = Expression::CALL; - - e.varargs.push_back(lookup(C->getCalledFunction())); - for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end(); - I != E; ++I) - e.varargs.push_back(lookup(*I)); - - return e; -} - -Expression ValueTable::create_expression(BinaryOperator* BO) { - Expression e; - e.varargs.push_back(lookup(BO->getOperand(0))); - e.varargs.push_back(lookup(BO->getOperand(1))); - e.type = BO->getType(); - e.opcode = getOpcode(BO); - - return e; -} - -Expression ValueTable::create_expression(CmpInst* C) { - Expression e; - - e.varargs.push_back(lookup(C->getOperand(0))); - e.varargs.push_back(lookup(C->getOperand(1))); - e.type = C->getType(); - e.opcode = getOpcode(C); - - return e; -} - -Expression ValueTable::create_expression(CastInst* C) { - Expression e; - - e.varargs.push_back(lookup(C->getOperand(0))); - e.type = C->getType(); - e.opcode = getOpcode(C); - - return e; -} - -Expression ValueTable::create_expression(ShuffleVectorInst* S) { - Expression e; - - e.varargs.push_back(lookup(S->getOperand(0))); - e.varargs.push_back(lookup(S->getOperand(1))); - e.varargs.push_back(lookup(S->getOperand(2))); - e.type = S->getType(); - e.opcode = Expression::SHUFFLE; - - return e; -} - -Expression ValueTable::create_expression(ExtractElementInst* E) { - Expression e; - - e.varargs.push_back(lookup(E->getOperand(0))); - e.varargs.push_back(lookup(E->getOperand(1))); - e.type = E->getType(); - e.opcode = Expression::EXTRACT; - - return e; -} - -Expression ValueTable::create_expression(InsertElementInst* I) { - Expression e; - - e.varargs.push_back(lookup(I->getOperand(0))); - e.varargs.push_back(lookup(I->getOperand(1))); - e.varargs.push_back(lookup(I->getOperand(2))); - e.type = I->getType(); - e.opcode = Expression::INSERT; - - return e; -} - -Expression ValueTable::create_expression(SelectInst* I) { - Expression e; - - e.varargs.push_back(lookup(I->getCondition())); - e.varargs.push_back(lookup(I->getTrueValue())); - e.varargs.push_back(lookup(I->getFalseValue())); - e.type = I->getType(); - e.opcode = Expression::SELECT; - - return e; -} - -Expression ValueTable::create_expression(GetElementPtrInst* G) { - Expression e; - - e.varargs.push_back(lookup(G->getPointerOperand())); - e.type = G->getType(); - e.opcode = Expression::GEP; - - for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end(); - I != E; ++I) - e.varargs.push_back(lookup(*I)); - - return e; -} - -Expression ValueTable::create_expression(ExtractValueInst* E) { - Expression e; - - e.varargs.push_back(lookup(E->getAggregateOperand())); - for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); - II != IE; ++II) - e.varargs.push_back(*II); - e.type = E->getType(); - e.opcode = Expression::EXTRACTVALUE; - - return e; -} - -Expression ValueTable::create_expression(InsertValueInst* E) { - Expression e; - - e.varargs.push_back(lookup(E->getAggregateOperand())); - e.varargs.push_back(lookup(E->getInsertedValueOperand())); - for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); - II != IE; ++II) - e.varargs.push_back(*II); - e.type = E->getType(); - e.opcode = Expression::INSERTVALUE; - - return e; -} - -//===----------------------------------------------------------------------===// -// ValueTable External Functions -//===----------------------------------------------------------------------===// - -/// add - Insert a value into the table with a specified value number. -void ValueTable::add(Value *V, uint32_t num) { - valueNumbering[V] = num; -} - -/// computeNumber - Returns the value number for the specified value, assigning -/// it a new number if it did not have one before. -uint32_t ValueTable::computeNumber(Value *V) { - if (uint32_t v = valueNumbering[V]) - return v; - else if (uint32_t v= constantsNumbering[V]) - return v; - - if (!isa<Instruction>(V)) { - constantsNumbering[V] = nextValueNumber; - return nextValueNumber++; - } - - Instruction* I = cast<Instruction>(V); - Expression exp; - switch (I->getOpcode()) { - case Instruction::Add: - case Instruction::FAdd: - case Instruction::Sub: - case Instruction::FSub: - case Instruction::Mul: - case Instruction::FMul: - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::FDiv: - case Instruction::URem: - case Instruction::SRem: - case Instruction::FRem: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - case Instruction::And: - case Instruction::Or : - case Instruction::Xor: - exp = create_expression(cast<BinaryOperator>(I)); - break; - case Instruction::ICmp: - case Instruction::FCmp: - exp = create_expression(cast<CmpInst>(I)); - break; - case Instruction::Trunc: - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::UIToFP: - case Instruction::SIToFP: - case Instruction::FPTrunc: - case Instruction::FPExt: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - case Instruction::BitCast: - exp = create_expression(cast<CastInst>(I)); - break; - case Instruction::Select: - exp = create_expression(cast<SelectInst>(I)); - break; - case Instruction::ExtractElement: - exp = create_expression(cast<ExtractElementInst>(I)); - break; - case Instruction::InsertElement: - exp = create_expression(cast<InsertElementInst>(I)); - break; - case Instruction::ShuffleVector: - exp = create_expression(cast<ShuffleVectorInst>(I)); - break; - case Instruction::ExtractValue: - exp = create_expression(cast<ExtractValueInst>(I)); - break; - case Instruction::InsertValue: - exp = create_expression(cast<InsertValueInst>(I)); - break; - case Instruction::GetElementPtr: - exp = create_expression(cast<GetElementPtrInst>(I)); - break; - default: - valueNumbering[V] = nextValueNumber; - return nextValueNumber++; - } - - uint32_t& e = expressionNumbering[exp]; - if (!e) e = nextValueNumber++; - valueNumbering[V] = e; - - return e; -} - -/// lookup - Returns the value number of the specified value. Returns 0 if -/// the value has not yet been numbered. -uint32_t ValueTable::lookup(Value *V) { - if (!isa<Instruction>(V)) { - if (!constantsNumbering.count(V)) - constantsNumbering[V] = nextValueNumber++; - return constantsNumbering[V]; - } - - return valueNumbering[V]; -} - -/// clear - Remove all entries from the ValueTable -void ValueTable::clear() { - valueNumbering.clear(); - expressionNumbering.clear(); - constantsNumbering.clear(); - nextValueNumber = 1; -} - -void ValueTable::clearExpressions() { - expressionNumbering.clear(); - constantsNumbering.clear(); - nextValueNumber = 1; -} - -/// erase - Remove a value from the value numbering -void ValueTable::erase(Value *V) { - valueNumbering.erase(V); -} - -/// verifyRemoved - Verify that the value is removed from all internal data -/// structures. -void ValueTable::verifyRemoved(const Value *V) const { - for (DenseMap<Value*, uint32_t>::const_iterator - I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) { - assert(I->first != V && "Inst still occurs in value numbering map!"); - } -} - -//===----------------------------------------------------------------------===// -// SCCVN Pass -//===----------------------------------------------------------------------===// - -namespace { - - struct ValueNumberScope { - ValueNumberScope* parent; - DenseMap<uint32_t, Value*> table; - SparseBitVector<128> availIn; - SparseBitVector<128> availOut; - - ValueNumberScope(ValueNumberScope* p) : parent(p) { } - }; - - class SCCVN : public FunctionPass { - bool runOnFunction(Function &F); - public: - static char ID; // Pass identification, replacement for typeid - SCCVN() : FunctionPass(&ID) { } - - private: - ValueTable VT; - DenseMap<BasicBlock*, ValueNumberScope*> BBMap; - - // This transformation requires dominator postdominator info - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<DominatorTree>(); - - AU.addPreserved<DominatorTree>(); - AU.setPreservesCFG(); - } - }; - - char SCCVN::ID = 0; -} - -// createSCCVNPass - The public interface to this file... -FunctionPass *llvm::createSCCVNPass() { return new SCCVN(); } - -static RegisterPass<SCCVN> X("sccvn", - "SCC Value Numbering"); - -static Value *lookupNumber(ValueNumberScope *Locals, uint32_t num) { - while (Locals) { - DenseMap<uint32_t, Value*>::iterator I = Locals->table.find(num); - if (I != Locals->table.end()) - return I->second; - Locals = Locals->parent; - } - - return 0; -} - -bool SCCVN::runOnFunction(Function& F) { - // Implement the RPO version of the SCCVN algorithm. Conceptually, - // we optimisitically assume that all instructions with the same opcode have - // the same VN. Then we deepen our comparison by one level, to all - // instructions whose operands have the same opcodes get the same VN. We - // iterate this process until the partitioning stops changing, at which - // point we have computed a full numbering. - ReversePostOrderTraversal<Function*> RPOT(&F); - bool done = false; - while (!done) { - done = true; - VT.clearExpressions(); - for (ReversePostOrderTraversal<Function*>::rpo_iterator I = RPOT.begin(), - E = RPOT.end(); I != E; ++I) { - BasicBlock* BB = *I; - for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); - BI != BE; ++BI) { - uint32_t origVN = VT.lookup(BI); - uint32_t newVN = VT.computeNumber(BI); - if (origVN != newVN) - done = false; - } - } - } - - // Now, do a dominator walk, eliminating simple, dominated redundancies as we - // go. Also, build the ValueNumberScope structure that will be used for - // computing full availability. - DominatorTree& DT = getAnalysis<DominatorTree>(); - bool changed = false; - for (df_iterator<DomTreeNode*> DI = df_begin(DT.getRootNode()), - DE = df_end(DT.getRootNode()); DI != DE; ++DI) { - BasicBlock* BB = DI->getBlock(); - if (DI->getIDom()) - BBMap[BB] = new ValueNumberScope(BBMap[DI->getIDom()->getBlock()]); - else - BBMap[BB] = new ValueNumberScope(0); - - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { - uint32_t num = VT.lookup(I); - Value* repl = lookupNumber(BBMap[BB], num); - - if (repl) { - if (isa<PHINode>(I)) - ++NumSCCVNPhi; - else - ++NumSCCVNInstr; - I->replaceAllUsesWith(repl); - Instruction* OldInst = I; - ++I; - BBMap[BB]->table[num] = repl; - OldInst->eraseFromParent(); - changed = true; - } else { - BBMap[BB]->table[num] = I; - BBMap[BB]->availOut.set(num); - - ++I; - } - } - } - - // Perform a forward data-flow to compute availability at all points on - // the CFG. - do { - changed = false; - for (ReversePostOrderTraversal<Function*>::rpo_iterator I = RPOT.begin(), - E = RPOT.end(); I != E; ++I) { - BasicBlock* BB = *I; - ValueNumberScope *VNS = BBMap[BB]; - - SparseBitVector<128> preds; - bool first = true; - for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); - PI != PE; ++PI) { - if (first) { - preds = BBMap[*PI]->availOut; - first = false; - } else { - preds &= BBMap[*PI]->availOut; - } - } - - changed |= (VNS->availIn |= preds); - changed |= (VNS->availOut |= preds); - } - } while (changed); - - // Use full availability information to perform non-dominated replacements. - SSAUpdater SSU; - for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { - if (!BBMap.count(FI)) continue; - for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); - BI != BE; ) { - uint32_t num = VT.lookup(BI); - if (!BBMap[FI]->availIn.test(num)) { - ++BI; - continue; - } - - SSU.Initialize(BI); - - SmallPtrSet<BasicBlock*, 8> visited; - SmallVector<BasicBlock*, 8> stack; - visited.insert(FI); - for (pred_iterator PI = pred_begin(FI), PE = pred_end(FI); - PI != PE; ++PI) - if (!visited.count(*PI)) - stack.push_back(*PI); - - while (!stack.empty()) { - BasicBlock* CurrBB = stack.pop_back_val(); - visited.insert(CurrBB); - - ValueNumberScope* S = BBMap[CurrBB]; - if (S->table.count(num)) { - SSU.AddAvailableValue(CurrBB, S->table[num]); - } else { - for (pred_iterator PI = pred_begin(CurrBB), PE = pred_end(CurrBB); - PI != PE; ++PI) - if (!visited.count(*PI)) - stack.push_back(*PI); - } - } - - Value* repl = SSU.GetValueInMiddleOfBlock(FI); - BI->replaceAllUsesWith(repl); - Instruction* CurInst = BI; - ++BI; - BBMap[FI]->table[num] = repl; - if (isa<PHINode>(CurInst)) - ++NumSCCVNPhi; - else - ++NumSCCVNInstr; - - CurInst->eraseFromParent(); - } - } - - VT.clear(); - for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator - I = BBMap.begin(), E = BBMap.end(); I != E; ++I) - delete I->second; - BBMap.clear(); - - return changed; -} diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 6211beb..5ca9ce3 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -130,14 +130,7 @@ namespace { void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, SmallVector<AllocaInst*, 32> &NewElts); - bool CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy, - bool &SawVec, uint64_t Offset, unsigned AllocaSize); - void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset); - Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType, - uint64_t Offset, IRBuilder<> &Builder); - Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal, - uint64_t Offset, IRBuilder<> &Builder); - static Instruction *isOnlyCopiedFromConstantGlobal(AllocaInst *AI); + static MemTransferInst *isOnlyCopiedFromConstantGlobal(AllocaInst *AI); }; } @@ -150,6 +143,596 @@ FunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) { } +//===----------------------------------------------------------------------===// +// Convert To Scalar Optimization. +//===----------------------------------------------------------------------===// + +namespace { +/// ConvertToScalarInfo - This class implements the "Convert To Scalar" +/// optimization, which scans the uses of an alloca and determines if it can +/// rewrite it in terms of a single new alloca that can be mem2reg'd. +class ConvertToScalarInfo { + /// AllocaSize - The size of the alloca being considered. + unsigned AllocaSize; + const TargetData &TD; + + /// IsNotTrivial - This is set to true if there is some access to the object + /// which means that mem2reg can't promote it. + bool IsNotTrivial; + + /// VectorTy - This tracks the type that we should promote the vector to if + /// it is possible to turn it into a vector. This starts out null, and if it + /// isn't possible to turn into a vector type, it gets set to VoidTy. + const Type *VectorTy; + + /// HadAVector - True if there is at least one vector access to the alloca. + /// We don't want to turn random arrays into vectors and use vector element + /// insert/extract, but if there are element accesses to something that is + /// also declared as a vector, we do want to promote to a vector. + bool HadAVector; + +public: + explicit ConvertToScalarInfo(unsigned Size, const TargetData &td) + : AllocaSize(Size), TD(td) { + IsNotTrivial = false; + VectorTy = 0; + HadAVector = false; + } + + AllocaInst *TryConvert(AllocaInst *AI); + +private: + bool CanConvertToScalar(Value *V, uint64_t Offset); + void MergeInType(const Type *In, uint64_t Offset); + void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset); + + Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType, + uint64_t Offset, IRBuilder<> &Builder); + Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal, + uint64_t Offset, IRBuilder<> &Builder); +}; +} // end anonymous namespace. + +/// TryConvert - Analyze the specified alloca, and if it is safe to do so, +/// rewrite it to be a new alloca which is mem2reg'able. This returns the new +/// alloca if possible or null if not. +AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { + // If we can't convert this scalar, or if mem2reg can trivially do it, bail + // out. + if (!CanConvertToScalar(AI, 0) || !IsNotTrivial) + return 0; + + // If we were able to find a vector type that can handle this with + // insert/extract elements, and if there was at least one use that had + // a vector type, promote this to a vector. We don't want to promote + // random stuff that doesn't use vectors (e.g. <9 x double>) because then + // we just get a lot of insert/extracts. If at least one vector is + // involved, then we probably really do have a union of vector/array. + const Type *NewTy; + if (VectorTy && VectorTy->isVectorTy() && HadAVector) { + DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = " + << *VectorTy << '\n'); + NewTy = VectorTy; // Use the vector type. + } else { + DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"); + // Create and insert the integer alloca. + NewTy = IntegerType::get(AI->getContext(), AllocaSize*8); + } + AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin()); + ConvertUsesToScalar(AI, NewAI, 0); + return NewAI; +} + +/// MergeInType - Add the 'In' type to the accumulated vector type (VectorTy) +/// so far at the offset specified by Offset (which is specified in bytes). +/// +/// There are two cases we handle here: +/// 1) A union of vector types of the same size and potentially its elements. +/// Here we turn element accesses into insert/extract element operations. +/// This promotes a <4 x float> with a store of float to the third element +/// into a <4 x float> that uses insert element. +/// 2) A fully general blob of memory, which we turn into some (potentially +/// large) integer type with extract and insert operations where the loads +/// and stores would mutate the memory. We mark this by setting VectorTy +/// to VoidTy. +void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) { + // If we already decided to turn this into a blob of integer memory, there is + // nothing to be done. + if (VectorTy && VectorTy->isVoidTy()) + return; + + // If this could be contributing to a vector, analyze it. + + // If the In type is a vector that is the same size as the alloca, see if it + // matches the existing VecTy. + if (const VectorType *VInTy = dyn_cast<VectorType>(In)) { + // Remember if we saw a vector type. + HadAVector = true; + + if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) { + // If we're storing/loading a vector of the right size, allow it as a + // vector. If this the first vector we see, remember the type so that + // we know the element size. If this is a subsequent access, ignore it + // even if it is a differing type but the same size. Worst case we can + // bitcast the resultant vectors. + if (VectorTy == 0) + VectorTy = VInTy; + return; + } + } else if (In->isFloatTy() || In->isDoubleTy() || + (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 && + isPowerOf2_32(In->getPrimitiveSizeInBits()))) { + // If we're accessing something that could be an element of a vector, see + // if the implied vector agrees with what we already have and if Offset is + // compatible with it. + unsigned EltSize = In->getPrimitiveSizeInBits()/8; + if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 && + (VectorTy == 0 || + cast<VectorType>(VectorTy)->getElementType() + ->getPrimitiveSizeInBits()/8 == EltSize)) { + if (VectorTy == 0) + VectorTy = VectorType::get(In, AllocaSize/EltSize); + return; + } + } + + // Otherwise, we have a case that we can't handle with an optimized vector + // form. We can still turn this into a large integer. + VectorTy = Type::getVoidTy(In->getContext()); +} + +/// CanConvertToScalar - V is a pointer. If we can convert the pointee and all +/// its accesses to a single vector type, return true and set VecTy to +/// the new type. If we could convert the alloca into a single promotable +/// integer, return true but set VecTy to VoidTy. Further, if the use is not a +/// completely trivial use that mem2reg could promote, set IsNotTrivial. Offset +/// is the current offset from the base of the alloca being analyzed. +/// +/// If we see at least one access to the value that is as a vector type, set the +/// SawVec flag. +bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { + Instruction *User = cast<Instruction>(*UI); + + if (LoadInst *LI = dyn_cast<LoadInst>(User)) { + // Don't break volatile loads. + if (LI->isVolatile()) + return false; + MergeInType(LI->getType(), Offset); + continue; + } + + if (StoreInst *SI = dyn_cast<StoreInst>(User)) { + // Storing the pointer, not into the value? + if (SI->getOperand(0) == V || SI->isVolatile()) return false; + MergeInType(SI->getOperand(0)->getType(), Offset); + continue; + } + + if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) { + IsNotTrivial = true; // Can't be mem2reg'd. + if (!CanConvertToScalar(BCI, Offset)) + return false; + continue; + } + + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) { + // If this is a GEP with a variable indices, we can't handle it. + if (!GEP->hasAllConstantIndices()) + return false; + + // Compute the offset that this GEP adds to the pointer. + SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); + uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(), + &Indices[0], Indices.size()); + // See if all uses can be converted. + if (!CanConvertToScalar(GEP, Offset+GEPOffset)) + return false; + IsNotTrivial = true; // Can't be mem2reg'd. + continue; + } + + // If this is a constant sized memset of a constant value (e.g. 0) we can + // handle it. + if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) { + // Store of constant value and constant size. + if (!isa<ConstantInt>(MSI->getValue()) || + !isa<ConstantInt>(MSI->getLength())) + return false; + IsNotTrivial = true; // Can't be mem2reg'd. + continue; + } + + // If this is a memcpy or memmove into or out of the whole allocation, we + // can handle it like a load or store of the scalar type. + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) { + ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength()); + if (Len == 0 || Len->getZExtValue() != AllocaSize || Offset != 0) + return false; + + IsNotTrivial = true; // Can't be mem2reg'd. + continue; + } + + // Otherwise, we cannot handle this! + return false; + } + + return true; +} + +/// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca +/// directly. This happens when we are converting an "integer union" to a +/// single integer scalar, or when we are converting a "vector union" to a +/// vector with insert/extractelement instructions. +/// +/// Offset is an offset from the original alloca, in bits that need to be +/// shifted to the right. By the end of this, there should be no uses of Ptr. +void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, + uint64_t Offset) { + while (!Ptr->use_empty()) { + Instruction *User = cast<Instruction>(Ptr->use_back()); + + if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) { + ConvertUsesToScalar(CI, NewAI, Offset); + CI->eraseFromParent(); + continue; + } + + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) { + // Compute the offset that this GEP adds to the pointer. + SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); + uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(), + &Indices[0], Indices.size()); + ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8); + GEP->eraseFromParent(); + continue; + } + + IRBuilder<> Builder(User->getParent(), User); + + if (LoadInst *LI = dyn_cast<LoadInst>(User)) { + // The load is a bit extract from NewAI shifted right by Offset bits. + Value *LoadedVal = Builder.CreateLoad(NewAI, "tmp"); + Value *NewLoadVal + = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder); + LI->replaceAllUsesWith(NewLoadVal); + LI->eraseFromParent(); + continue; + } + + if (StoreInst *SI = dyn_cast<StoreInst>(User)) { + assert(SI->getOperand(0) != Ptr && "Consistency error!"); + Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in"); + Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset, + Builder); + Builder.CreateStore(New, NewAI); + SI->eraseFromParent(); + + // If the load we just inserted is now dead, then the inserted store + // overwrote the entire thing. + if (Old->use_empty()) + Old->eraseFromParent(); + continue; + } + + // If this is a constant sized memset of a constant value (e.g. 0) we can + // transform it into a store of the expanded constant value. + if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) { + assert(MSI->getRawDest() == Ptr && "Consistency error!"); + unsigned NumBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue(); + if (NumBytes != 0) { + unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue(); + + // Compute the value replicated the right number of times. + APInt APVal(NumBytes*8, Val); + + // Splat the value if non-zero. + if (Val) + for (unsigned i = 1; i != NumBytes; ++i) + APVal |= APVal << 8; + + Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in"); + Value *New = ConvertScalar_InsertValue( + ConstantInt::get(User->getContext(), APVal), + Old, Offset, Builder); + Builder.CreateStore(New, NewAI); + + // If the load we just inserted is now dead, then the memset overwrote + // the entire thing. + if (Old->use_empty()) + Old->eraseFromParent(); + } + MSI->eraseFromParent(); + continue; + } + + // If this is a memcpy or memmove into or out of the whole allocation, we + // can handle it like a load or store of the scalar type. + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) { + assert(Offset == 0 && "must be store to start of alloca"); + + // If the source and destination are both to the same alloca, then this is + // a noop copy-to-self, just delete it. Otherwise, emit a load and store + // as appropriate. + AllocaInst *OrigAI = cast<AllocaInst>(Ptr->getUnderlyingObject(0)); + + if (MTI->getSource()->getUnderlyingObject(0) != OrigAI) { + // Dest must be OrigAI, change this to be a load from the original + // pointer (bitcasted), then a store to our new alloca. + assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?"); + Value *SrcPtr = MTI->getSource(); + SrcPtr = Builder.CreateBitCast(SrcPtr, NewAI->getType()); + + LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval"); + SrcVal->setAlignment(MTI->getAlignment()); + Builder.CreateStore(SrcVal, NewAI); + } else if (MTI->getDest()->getUnderlyingObject(0) != OrigAI) { + // Src must be OrigAI, change this to be a load from NewAI then a store + // through the original dest pointer (bitcasted). + assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?"); + LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval"); + + Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), NewAI->getType()); + StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr); + NewStore->setAlignment(MTI->getAlignment()); + } else { + // Noop transfer. Src == Dst + } + + MTI->eraseFromParent(); + continue; + } + + llvm_unreachable("Unsupported operation!"); + } +} + +/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer +/// or vector value FromVal, extracting the bits from the offset specified by +/// Offset. This returns the value, which is of type ToType. +/// +/// This happens when we are converting an "integer union" to a single +/// integer scalar, or when we are converting a "vector union" to a vector with +/// insert/extractelement instructions. +/// +/// Offset is an offset from the original alloca, in bits that need to be +/// shifted to the right. +Value *ConvertToScalarInfo:: +ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, + uint64_t Offset, IRBuilder<> &Builder) { + // If the load is of the whole new alloca, no conversion is needed. + if (FromVal->getType() == ToType && Offset == 0) + return FromVal; + + // If the result alloca is a vector type, this is either an element + // access or a bitcast to another vector type of the same size. + if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) { + if (ToType->isVectorTy()) + return Builder.CreateBitCast(FromVal, ToType, "tmp"); + + // Otherwise it must be an element access. + unsigned Elt = 0; + if (Offset) { + unsigned EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType()); + Elt = Offset/EltSize; + assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); + } + // Return the element extracted out of it. + Value *V = Builder.CreateExtractElement(FromVal, ConstantInt::get( + Type::getInt32Ty(FromVal->getContext()), Elt), "tmp"); + if (V->getType() != ToType) + V = Builder.CreateBitCast(V, ToType, "tmp"); + return V; + } + + // If ToType is a first class aggregate, extract out each of the pieces and + // use insertvalue's to form the FCA. + if (const StructType *ST = dyn_cast<StructType>(ToType)) { + const StructLayout &Layout = *TD.getStructLayout(ST); + Value *Res = UndefValue::get(ST); + for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { + Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i), + Offset+Layout.getElementOffsetInBits(i), + Builder); + Res = Builder.CreateInsertValue(Res, Elt, i, "tmp"); + } + return Res; + } + + if (const ArrayType *AT = dyn_cast<ArrayType>(ToType)) { + uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType()); + Value *Res = UndefValue::get(AT); + for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { + Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(), + Offset+i*EltSize, Builder); + Res = Builder.CreateInsertValue(Res, Elt, i, "tmp"); + } + return Res; + } + + // Otherwise, this must be a union that was converted to an integer value. + const IntegerType *NTy = cast<IntegerType>(FromVal->getType()); + + // If this is a big-endian system and the load is narrower than the + // full alloca type, we need to do a shift to get the right bits. + int ShAmt = 0; + if (TD.isBigEndian()) { + // On big-endian machines, the lowest bit is stored at the bit offset + // from the pointer given by getTypeStoreSizeInBits. This matters for + // integers with a bitwidth that is not a multiple of 8. + ShAmt = TD.getTypeStoreSizeInBits(NTy) - + TD.getTypeStoreSizeInBits(ToType) - Offset; + } else { + ShAmt = Offset; + } + + // Note: we support negative bitwidths (with shl) which are not defined. + // We do this to support (f.e.) loads off the end of a structure where + // only some bits are used. + if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth()) + FromVal = Builder.CreateLShr(FromVal, + ConstantInt::get(FromVal->getType(), + ShAmt), "tmp"); + else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth()) + FromVal = Builder.CreateShl(FromVal, + ConstantInt::get(FromVal->getType(), + -ShAmt), "tmp"); + + // Finally, unconditionally truncate the integer to the right width. + unsigned LIBitWidth = TD.getTypeSizeInBits(ToType); + if (LIBitWidth < NTy->getBitWidth()) + FromVal = + Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(), + LIBitWidth), "tmp"); + else if (LIBitWidth > NTy->getBitWidth()) + FromVal = + Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(), + LIBitWidth), "tmp"); + + // If the result is an integer, this is a trunc or bitcast. + if (ToType->isIntegerTy()) { + // Should be done. + } else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) { + // Just do a bitcast, we know the sizes match up. + FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp"); + } else { + // Otherwise must be a pointer. + FromVal = Builder.CreateIntToPtr(FromVal, ToType, "tmp"); + } + assert(FromVal->getType() == ToType && "Didn't convert right?"); + return FromVal; +} + +/// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer +/// or vector value "Old" at the offset specified by Offset. +/// +/// This happens when we are converting an "integer union" to a +/// single integer scalar, or when we are converting a "vector union" to a +/// vector with insert/extractelement instructions. +/// +/// Offset is an offset from the original alloca, in bits that need to be +/// shifted to the right. +Value *ConvertToScalarInfo:: +ConvertScalar_InsertValue(Value *SV, Value *Old, + uint64_t Offset, IRBuilder<> &Builder) { + // Convert the stored type to the actual type, shift it left to insert + // then 'or' into place. + const Type *AllocaType = Old->getType(); + LLVMContext &Context = Old->getContext(); + + if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) { + uint64_t VecSize = TD.getTypeAllocSizeInBits(VTy); + uint64_t ValSize = TD.getTypeAllocSizeInBits(SV->getType()); + + // Changing the whole vector with memset or with an access of a different + // vector type? + if (ValSize == VecSize) + return Builder.CreateBitCast(SV, AllocaType, "tmp"); + + uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType()); + + // Must be an element insertion. + unsigned Elt = Offset/EltSize; + + if (SV->getType() != VTy->getElementType()) + SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp"); + + SV = Builder.CreateInsertElement(Old, SV, + ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt), + "tmp"); + return SV; + } + + // If SV is a first-class aggregate value, insert each value recursively. + if (const StructType *ST = dyn_cast<StructType>(SV->getType())) { + const StructLayout &Layout = *TD.getStructLayout(ST); + for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { + Value *Elt = Builder.CreateExtractValue(SV, i, "tmp"); + Old = ConvertScalar_InsertValue(Elt, Old, + Offset+Layout.getElementOffsetInBits(i), + Builder); + } + return Old; + } + + if (const ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) { + uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType()); + for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { + Value *Elt = Builder.CreateExtractValue(SV, i, "tmp"); + Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder); + } + return Old; + } + + // If SV is a float, convert it to the appropriate integer type. + // If it is a pointer, do the same. + unsigned SrcWidth = TD.getTypeSizeInBits(SV->getType()); + unsigned DestWidth = TD.getTypeSizeInBits(AllocaType); + unsigned SrcStoreWidth = TD.getTypeStoreSizeInBits(SV->getType()); + unsigned DestStoreWidth = TD.getTypeStoreSizeInBits(AllocaType); + if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy()) + SV = Builder.CreateBitCast(SV, + IntegerType::get(SV->getContext(),SrcWidth), "tmp"); + else if (SV->getType()->isPointerTy()) + SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getContext()), "tmp"); + + // Zero extend or truncate the value if needed. + if (SV->getType() != AllocaType) { + if (SV->getType()->getPrimitiveSizeInBits() < + AllocaType->getPrimitiveSizeInBits()) + SV = Builder.CreateZExt(SV, AllocaType, "tmp"); + else { + // Truncation may be needed if storing more than the alloca can hold + // (undefined behavior). + SV = Builder.CreateTrunc(SV, AllocaType, "tmp"); + SrcWidth = DestWidth; + SrcStoreWidth = DestStoreWidth; + } + } + + // If this is a big-endian system and the store is narrower than the + // full alloca type, we need to do a shift to get the right bits. + int ShAmt = 0; + if (TD.isBigEndian()) { + // On big-endian machines, the lowest bit is stored at the bit offset + // from the pointer given by getTypeStoreSizeInBits. This matters for + // integers with a bitwidth that is not a multiple of 8. + ShAmt = DestStoreWidth - SrcStoreWidth - Offset; + } else { + ShAmt = Offset; + } + + // Note: we support negative bitwidths (with shr) which are not defined. + // We do this to support (f.e.) stores off the end of a structure where + // only some bits in the structure are set. + APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth)); + if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) { + SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(), + ShAmt), "tmp"); + Mask <<= ShAmt; + } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) { + SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(), + -ShAmt), "tmp"); + Mask = Mask.lshr(-ShAmt); + } + + // Mask out the bits we are about to insert from the old value, and or + // in the new bits. + if (SrcWidth != DestWidth) { + assert(DestWidth > SrcWidth); + Old = Builder.CreateAnd(Old, ConstantInt::get(Context, ~Mask), "mask"); + SV = Builder.CreateOr(Old, SV, "ins"); + } + return SV; +} + + +//===----------------------------------------------------------------------===// +// SRoA Driver +//===----------------------------------------------------------------------===// + + bool SROA::runOnFunction(Function &F) { TD = getAnalysisIfAvailable<TargetData>(); @@ -202,6 +785,7 @@ bool SROA::performPromotion(Function &F) { return Changed; } + /// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for /// SROA. It must be a struct or array type with a small number of elements. static bool ShouldAttemptScalarRepl(AllocaInst *AI) { @@ -216,6 +800,7 @@ static bool ShouldAttemptScalarRepl(AllocaInst *AI) { return false; } + // performScalarRepl - This algorithm is a simple worklist driven algorithm, // which runs on all of the malloc/alloca instructions in the function, removing // them if they are only used by getelementptr instructions. @@ -223,7 +808,7 @@ static bool ShouldAttemptScalarRepl(AllocaInst *AI) { bool SROA::performScalarRepl(Function &F) { std::vector<AllocaInst*> WorkList; - // Scan the entry basic block, adding any alloca's and mallocs to the worklist + // Scan the entry basic block, adding allocas to the worklist. BasicBlock &BB = F.getEntryBlock(); for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) if (AllocaInst *A = dyn_cast<AllocaInst>(I)) @@ -239,6 +824,7 @@ bool SROA::performScalarRepl(Function &F) { // with unused elements. if (AI->use_empty()) { AI->eraseFromParent(); + Changed = true; continue; } @@ -251,10 +837,10 @@ bool SROA::performScalarRepl(Function &F) { // the constant global instead. This is commonly produced by the CFE by // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' // is only subsequently read. - if (Instruction *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) { + if (MemTransferInst *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) { DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n'); DEBUG(dbgs() << " memcpy = " << *TheCopy << '\n'); - Constant *TheSrc = cast<Constant>(TheCopy->getOperand(2)); + Constant *TheSrc = cast<Constant>(TheCopy->getSource()); AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType())); TheCopy->eraseFromParent(); // Don't mutate the global. AI->eraseFromParent(); @@ -271,7 +857,10 @@ bool SROA::performScalarRepl(Function &F) { // Do not promote [0 x %struct]. if (AllocaSize == 0) continue; - + + // Do not promote any struct whose size is too big. + if (AllocaSize > SRThreshold) continue; + // If the alloca looks like a good candidate for scalar replacement, and if // all its users can be transformed, then split up the aggregate into its // separate elements. @@ -281,48 +870,20 @@ bool SROA::performScalarRepl(Function &F) { continue; } - // Do not promote any struct whose size is too big. - if (AllocaSize > SRThreshold) continue; - // If we can turn this aggregate value (potentially with casts) into a // simple scalar value that can be mem2reg'd into a register value. // IsNotTrivial tracks whether this is something that mem2reg could have // promoted itself. If so, we don't want to transform it needlessly. Note // that we can't just check based on the type: the alloca may be of an i32 // but that has pointer arithmetic to set byte 3 of it or something. - bool IsNotTrivial = false; - const Type *VectorTy = 0; - bool HadAVector = false; - if (CanConvertToScalar(AI, IsNotTrivial, VectorTy, HadAVector, - 0, unsigned(AllocaSize)) && IsNotTrivial) { - AllocaInst *NewAI; - // If we were able to find a vector type that can handle this with - // insert/extract elements, and if there was at least one use that had - // a vector type, promote this to a vector. We don't want to promote - // random stuff that doesn't use vectors (e.g. <9 x double>) because then - // we just get a lot of insert/extracts. If at least one vector is - // involved, then we probably really do have a union of vector/array. - if (VectorTy && VectorTy->isVectorTy() && HadAVector) { - DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = " - << *VectorTy << '\n'); - - // Create and insert the vector alloca. - NewAI = new AllocaInst(VectorTy, 0, "", AI->getParent()->begin()); - ConvertUsesToScalar(AI, NewAI, 0); - } else { - DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"); - - // Create and insert the integer alloca. - const Type *NewTy = IntegerType::get(AI->getContext(), AllocaSize*8); - NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin()); - ConvertUsesToScalar(AI, NewAI, 0); - } + if (AllocaInst *NewAI = + ConvertToScalarInfo((unsigned)AllocaSize, *TD).TryConvert(AI)) { NewAI->takeName(AI); AI->eraseFromParent(); ++NumConverted; Changed = true; continue; - } + } // Otherwise, couldn't process this alloca. } @@ -698,7 +1259,6 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, // that doesn't have anything to do with the alloca that we are promoting. For // memset, this Value* stays null. Value *OtherPtr = 0; - LLVMContext &Context = MI->getContext(); unsigned MemAlignment = MI->getAlignment(); if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy if (Inst == MTI->getRawDest()) @@ -756,7 +1316,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, } // Process each element of the aggregate. - Value *TheFn = MI->getOperand(0); + Value *TheFn = MI->getCalledValue(); const Type *BytePtrTy = MI->getRawDest()->getType(); bool SROADest = MI->getRawDest() == Inst; @@ -775,12 +1335,11 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, MI); uint64_t EltOffset; const PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType()); - if (const StructType *ST = - dyn_cast<StructType>(OtherPtrTy->getElementType())) { + const Type *OtherTy = OtherPtrTy->getElementType(); + if (const StructType *ST = dyn_cast<StructType>(OtherTy)) { EltOffset = TD->getStructLayout(ST)->getElementOffset(i); } else { - const Type *EltTy = - cast<SequentialType>(OtherPtr->getType())->getElementType(); + const Type *EltTy = cast<SequentialType>(OtherTy)->getElementType(); EltOffset = TD->getTypeAllocSize(EltTy)*i; } @@ -832,7 +1391,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, } // Convert the integer value to the appropriate type. - StoreVal = ConstantInt::get(Context, TotalVal); + StoreVal = ConstantInt::get(CI->getContext(), TotalVal); if (ValTy->isPointerTy()) StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy); else if (ValTy->isFloatingPointTy()) @@ -1174,509 +1733,6 @@ bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) { return true; } -/// MergeInType - Add the 'In' type to the accumulated type (Accum) so far at -/// the offset specified by Offset (which is specified in bytes). -/// -/// There are two cases we handle here: -/// 1) A union of vector types of the same size and potentially its elements. -/// Here we turn element accesses into insert/extract element operations. -/// This promotes a <4 x float> with a store of float to the third element -/// into a <4 x float> that uses insert element. -/// 2) A fully general blob of memory, which we turn into some (potentially -/// large) integer type with extract and insert operations where the loads -/// and stores would mutate the memory. -static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy, - unsigned AllocaSize, const TargetData &TD, - LLVMContext &Context) { - // If this could be contributing to a vector, analyze it. - if (VecTy != Type::getVoidTy(Context)) { // either null or a vector type. - - // If the In type is a vector that is the same size as the alloca, see if it - // matches the existing VecTy. - if (const VectorType *VInTy = dyn_cast<VectorType>(In)) { - if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) { - // If we're storing/loading a vector of the right size, allow it as a - // vector. If this the first vector we see, remember the type so that - // we know the element size. - if (VecTy == 0) - VecTy = VInTy; - return; - } - } else if (In->isFloatTy() || In->isDoubleTy() || - (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 && - isPowerOf2_32(In->getPrimitiveSizeInBits()))) { - // If we're accessing something that could be an element of a vector, see - // if the implied vector agrees with what we already have and if Offset is - // compatible with it. - unsigned EltSize = In->getPrimitiveSizeInBits()/8; - if (Offset % EltSize == 0 && - AllocaSize % EltSize == 0 && - (VecTy == 0 || - cast<VectorType>(VecTy)->getElementType() - ->getPrimitiveSizeInBits()/8 == EltSize)) { - if (VecTy == 0) - VecTy = VectorType::get(In, AllocaSize/EltSize); - return; - } - } - } - - // Otherwise, we have a case that we can't handle with an optimized vector - // form. We can still turn this into a large integer. - VecTy = Type::getVoidTy(Context); -} - -/// CanConvertToScalar - V is a pointer. If we can convert the pointee and all -/// its accesses to a single vector type, return true and set VecTy to -/// the new type. If we could convert the alloca into a single promotable -/// integer, return true but set VecTy to VoidTy. Further, if the use is not a -/// completely trivial use that mem2reg could promote, set IsNotTrivial. Offset -/// is the current offset from the base of the alloca being analyzed. -/// -/// If we see at least one access to the value that is as a vector type, set the -/// SawVec flag. -bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy, - bool &SawVec, uint64_t Offset, - unsigned AllocaSize) { - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { - Instruction *User = cast<Instruction>(*UI); - - if (LoadInst *LI = dyn_cast<LoadInst>(User)) { - // Don't break volatile loads. - if (LI->isVolatile()) - return false; - MergeInType(LI->getType(), Offset, VecTy, - AllocaSize, *TD, V->getContext()); - SawVec |= LI->getType()->isVectorTy(); - continue; - } - - if (StoreInst *SI = dyn_cast<StoreInst>(User)) { - // Storing the pointer, not into the value? - if (SI->getOperand(0) == V || SI->isVolatile()) return 0; - MergeInType(SI->getOperand(0)->getType(), Offset, - VecTy, AllocaSize, *TD, V->getContext()); - SawVec |= SI->getOperand(0)->getType()->isVectorTy(); - continue; - } - - if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) { - if (!CanConvertToScalar(BCI, IsNotTrivial, VecTy, SawVec, Offset, - AllocaSize)) - return false; - IsNotTrivial = true; - continue; - } - - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) { - // If this is a GEP with a variable indices, we can't handle it. - if (!GEP->hasAllConstantIndices()) - return false; - - // Compute the offset that this GEP adds to the pointer. - SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); - uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(), - &Indices[0], Indices.size()); - // See if all uses can be converted. - if (!CanConvertToScalar(GEP, IsNotTrivial, VecTy, SawVec,Offset+GEPOffset, - AllocaSize)) - return false; - IsNotTrivial = true; - continue; - } - - // If this is a constant sized memset of a constant value (e.g. 0) we can - // handle it. - if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) { - // Store of constant value and constant size. - if (isa<ConstantInt>(MSI->getValue()) && - isa<ConstantInt>(MSI->getLength())) { - IsNotTrivial = true; - continue; - } - } - - // If this is a memcpy or memmove into or out of the whole allocation, we - // can handle it like a load or store of the scalar type. - if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) { - if (ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength())) - if (Len->getZExtValue() == AllocaSize && Offset == 0) { - IsNotTrivial = true; - continue; - } - } - - // Otherwise, we cannot handle this! - return false; - } - - return true; -} - -/// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca -/// directly. This happens when we are converting an "integer union" to a -/// single integer scalar, or when we are converting a "vector union" to a -/// vector with insert/extractelement instructions. -/// -/// Offset is an offset from the original alloca, in bits that need to be -/// shifted to the right. By the end of this, there should be no uses of Ptr. -void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) { - while (!Ptr->use_empty()) { - Instruction *User = cast<Instruction>(Ptr->use_back()); - - if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) { - ConvertUsesToScalar(CI, NewAI, Offset); - CI->eraseFromParent(); - continue; - } - - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) { - // Compute the offset that this GEP adds to the pointer. - SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); - uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(), - &Indices[0], Indices.size()); - ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8); - GEP->eraseFromParent(); - continue; - } - - IRBuilder<> Builder(User->getParent(), User); - - if (LoadInst *LI = dyn_cast<LoadInst>(User)) { - // The load is a bit extract from NewAI shifted right by Offset bits. - Value *LoadedVal = Builder.CreateLoad(NewAI, "tmp"); - Value *NewLoadVal - = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder); - LI->replaceAllUsesWith(NewLoadVal); - LI->eraseFromParent(); - continue; - } - - if (StoreInst *SI = dyn_cast<StoreInst>(User)) { - assert(SI->getOperand(0) != Ptr && "Consistency error!"); - Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in"); - Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset, - Builder); - Builder.CreateStore(New, NewAI); - SI->eraseFromParent(); - - // If the load we just inserted is now dead, then the inserted store - // overwrote the entire thing. - if (Old->use_empty()) - Old->eraseFromParent(); - continue; - } - - // If this is a constant sized memset of a constant value (e.g. 0) we can - // transform it into a store of the expanded constant value. - if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) { - assert(MSI->getRawDest() == Ptr && "Consistency error!"); - unsigned NumBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue(); - if (NumBytes != 0) { - unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue(); - - // Compute the value replicated the right number of times. - APInt APVal(NumBytes*8, Val); - - // Splat the value if non-zero. - if (Val) - for (unsigned i = 1; i != NumBytes; ++i) - APVal |= APVal << 8; - - Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in"); - Value *New = ConvertScalar_InsertValue( - ConstantInt::get(User->getContext(), APVal), - Old, Offset, Builder); - Builder.CreateStore(New, NewAI); - - // If the load we just inserted is now dead, then the memset overwrote - // the entire thing. - if (Old->use_empty()) - Old->eraseFromParent(); - } - MSI->eraseFromParent(); - continue; - } - - // If this is a memcpy or memmove into or out of the whole allocation, we - // can handle it like a load or store of the scalar type. - if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) { - assert(Offset == 0 && "must be store to start of alloca"); - - // If the source and destination are both to the same alloca, then this is - // a noop copy-to-self, just delete it. Otherwise, emit a load and store - // as appropriate. - AllocaInst *OrigAI = cast<AllocaInst>(Ptr->getUnderlyingObject(0)); - - if (MTI->getSource()->getUnderlyingObject(0) != OrigAI) { - // Dest must be OrigAI, change this to be a load from the original - // pointer (bitcasted), then a store to our new alloca. - assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?"); - Value *SrcPtr = MTI->getSource(); - SrcPtr = Builder.CreateBitCast(SrcPtr, NewAI->getType()); - - LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval"); - SrcVal->setAlignment(MTI->getAlignment()); - Builder.CreateStore(SrcVal, NewAI); - } else if (MTI->getDest()->getUnderlyingObject(0) != OrigAI) { - // Src must be OrigAI, change this to be a load from NewAI then a store - // through the original dest pointer (bitcasted). - assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?"); - LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval"); - - Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), NewAI->getType()); - StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr); - NewStore->setAlignment(MTI->getAlignment()); - } else { - // Noop transfer. Src == Dst - } - - MTI->eraseFromParent(); - continue; - } - - llvm_unreachable("Unsupported operation!"); - } -} - -/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer -/// or vector value FromVal, extracting the bits from the offset specified by -/// Offset. This returns the value, which is of type ToType. -/// -/// This happens when we are converting an "integer union" to a single -/// integer scalar, or when we are converting a "vector union" to a vector with -/// insert/extractelement instructions. -/// -/// Offset is an offset from the original alloca, in bits that need to be -/// shifted to the right. -Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, - uint64_t Offset, IRBuilder<> &Builder) { - // If the load is of the whole new alloca, no conversion is needed. - if (FromVal->getType() == ToType && Offset == 0) - return FromVal; - - // If the result alloca is a vector type, this is either an element - // access or a bitcast to another vector type of the same size. - if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) { - if (ToType->isVectorTy()) - return Builder.CreateBitCast(FromVal, ToType, "tmp"); - - // Otherwise it must be an element access. - unsigned Elt = 0; - if (Offset) { - unsigned EltSize = TD->getTypeAllocSizeInBits(VTy->getElementType()); - Elt = Offset/EltSize; - assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); - } - // Return the element extracted out of it. - Value *V = Builder.CreateExtractElement(FromVal, ConstantInt::get( - Type::getInt32Ty(FromVal->getContext()), Elt), "tmp"); - if (V->getType() != ToType) - V = Builder.CreateBitCast(V, ToType, "tmp"); - return V; - } - - // If ToType is a first class aggregate, extract out each of the pieces and - // use insertvalue's to form the FCA. - if (const StructType *ST = dyn_cast<StructType>(ToType)) { - const StructLayout &Layout = *TD->getStructLayout(ST); - Value *Res = UndefValue::get(ST); - for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { - Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i), - Offset+Layout.getElementOffsetInBits(i), - Builder); - Res = Builder.CreateInsertValue(Res, Elt, i, "tmp"); - } - return Res; - } - - if (const ArrayType *AT = dyn_cast<ArrayType>(ToType)) { - uint64_t EltSize = TD->getTypeAllocSizeInBits(AT->getElementType()); - Value *Res = UndefValue::get(AT); - for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { - Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(), - Offset+i*EltSize, Builder); - Res = Builder.CreateInsertValue(Res, Elt, i, "tmp"); - } - return Res; - } - - // Otherwise, this must be a union that was converted to an integer value. - const IntegerType *NTy = cast<IntegerType>(FromVal->getType()); - - // If this is a big-endian system and the load is narrower than the - // full alloca type, we need to do a shift to get the right bits. - int ShAmt = 0; - if (TD->isBigEndian()) { - // On big-endian machines, the lowest bit is stored at the bit offset - // from the pointer given by getTypeStoreSizeInBits. This matters for - // integers with a bitwidth that is not a multiple of 8. - ShAmt = TD->getTypeStoreSizeInBits(NTy) - - TD->getTypeStoreSizeInBits(ToType) - Offset; - } else { - ShAmt = Offset; - } - - // Note: we support negative bitwidths (with shl) which are not defined. - // We do this to support (f.e.) loads off the end of a structure where - // only some bits are used. - if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth()) - FromVal = Builder.CreateLShr(FromVal, - ConstantInt::get(FromVal->getType(), - ShAmt), "tmp"); - else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth()) - FromVal = Builder.CreateShl(FromVal, - ConstantInt::get(FromVal->getType(), - -ShAmt), "tmp"); - - // Finally, unconditionally truncate the integer to the right width. - unsigned LIBitWidth = TD->getTypeSizeInBits(ToType); - if (LIBitWidth < NTy->getBitWidth()) - FromVal = - Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(), - LIBitWidth), "tmp"); - else if (LIBitWidth > NTy->getBitWidth()) - FromVal = - Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(), - LIBitWidth), "tmp"); - - // If the result is an integer, this is a trunc or bitcast. - if (ToType->isIntegerTy()) { - // Should be done. - } else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) { - // Just do a bitcast, we know the sizes match up. - FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp"); - } else { - // Otherwise must be a pointer. - FromVal = Builder.CreateIntToPtr(FromVal, ToType, "tmp"); - } - assert(FromVal->getType() == ToType && "Didn't convert right?"); - return FromVal; -} - -/// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer -/// or vector value "Old" at the offset specified by Offset. -/// -/// This happens when we are converting an "integer union" to a -/// single integer scalar, or when we are converting a "vector union" to a -/// vector with insert/extractelement instructions. -/// -/// Offset is an offset from the original alloca, in bits that need to be -/// shifted to the right. -Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old, - uint64_t Offset, IRBuilder<> &Builder) { - - // Convert the stored type to the actual type, shift it left to insert - // then 'or' into place. - const Type *AllocaType = Old->getType(); - LLVMContext &Context = Old->getContext(); - - if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) { - uint64_t VecSize = TD->getTypeAllocSizeInBits(VTy); - uint64_t ValSize = TD->getTypeAllocSizeInBits(SV->getType()); - - // Changing the whole vector with memset or with an access of a different - // vector type? - if (ValSize == VecSize) - return Builder.CreateBitCast(SV, AllocaType, "tmp"); - - uint64_t EltSize = TD->getTypeAllocSizeInBits(VTy->getElementType()); - - // Must be an element insertion. - unsigned Elt = Offset/EltSize; - - if (SV->getType() != VTy->getElementType()) - SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp"); - - SV = Builder.CreateInsertElement(Old, SV, - ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt), - "tmp"); - return SV; - } - - // If SV is a first-class aggregate value, insert each value recursively. - if (const StructType *ST = dyn_cast<StructType>(SV->getType())) { - const StructLayout &Layout = *TD->getStructLayout(ST); - for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { - Value *Elt = Builder.CreateExtractValue(SV, i, "tmp"); - Old = ConvertScalar_InsertValue(Elt, Old, - Offset+Layout.getElementOffsetInBits(i), - Builder); - } - return Old; - } - - if (const ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) { - uint64_t EltSize = TD->getTypeAllocSizeInBits(AT->getElementType()); - for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { - Value *Elt = Builder.CreateExtractValue(SV, i, "tmp"); - Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder); - } - return Old; - } - - // If SV is a float, convert it to the appropriate integer type. - // If it is a pointer, do the same. - unsigned SrcWidth = TD->getTypeSizeInBits(SV->getType()); - unsigned DestWidth = TD->getTypeSizeInBits(AllocaType); - unsigned SrcStoreWidth = TD->getTypeStoreSizeInBits(SV->getType()); - unsigned DestStoreWidth = TD->getTypeStoreSizeInBits(AllocaType); - if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy()) - SV = Builder.CreateBitCast(SV, - IntegerType::get(SV->getContext(),SrcWidth), "tmp"); - else if (SV->getType()->isPointerTy()) - SV = Builder.CreatePtrToInt(SV, TD->getIntPtrType(SV->getContext()), "tmp"); - - // Zero extend or truncate the value if needed. - if (SV->getType() != AllocaType) { - if (SV->getType()->getPrimitiveSizeInBits() < - AllocaType->getPrimitiveSizeInBits()) - SV = Builder.CreateZExt(SV, AllocaType, "tmp"); - else { - // Truncation may be needed if storing more than the alloca can hold - // (undefined behavior). - SV = Builder.CreateTrunc(SV, AllocaType, "tmp"); - SrcWidth = DestWidth; - SrcStoreWidth = DestStoreWidth; - } - } - - // If this is a big-endian system and the store is narrower than the - // full alloca type, we need to do a shift to get the right bits. - int ShAmt = 0; - if (TD->isBigEndian()) { - // On big-endian machines, the lowest bit is stored at the bit offset - // from the pointer given by getTypeStoreSizeInBits. This matters for - // integers with a bitwidth that is not a multiple of 8. - ShAmt = DestStoreWidth - SrcStoreWidth - Offset; - } else { - ShAmt = Offset; - } - - // Note: we support negative bitwidths (with shr) which are not defined. - // We do this to support (f.e.) stores off the end of a structure where - // only some bits in the structure are set. - APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth)); - if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) { - SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(), - ShAmt), "tmp"); - Mask <<= ShAmt; - } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) { - SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(), - -ShAmt), "tmp"); - Mask = Mask.lshr(-ShAmt); - } - - // Mask out the bits we are about to insert from the old value, and or - // in the new bits. - if (SrcWidth != DestWidth) { - assert(DestWidth > SrcWidth); - Old = Builder.CreateAnd(Old, ConstantInt::get(Context, ~Mask), "mask"); - SV = Builder.CreateOr(Old, SV, "ins"); - } - return SV; -} - /// PointsToConstantGlobal - Return true if V (possibly indirectly) points to @@ -1699,21 +1755,23 @@ static bool PointsToConstantGlobal(Value *V) { /// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to /// the alloca, and if the source pointer is a pointer to a constant global, we /// can optimize this. -static bool isOnlyCopiedFromConstantGlobal(Value *V, Instruction *&TheCopy, +static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, bool isOffset) { for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { - if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) + User *U = cast<Instruction>(*UI); + + if (LoadInst *LI = dyn_cast<LoadInst>(U)) // Ignore non-volatile loads, they are always ok. if (!LI->isVolatile()) continue; - if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI)) { + if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { // If uses of the bitcast are ok, we are ok. if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset)) return false; continue; } - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(*UI)) { + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { // If the GEP has all zero indices, it doesn't offset the pointer. If it // doesn't, it does. if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, @@ -1724,7 +1782,8 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, Instruction *&TheCopy, // If this is isn't our memcpy/memmove, reject it as something we can't // handle. - if (!isa<MemTransferInst>(*UI)) + MemTransferInst *MI = dyn_cast<MemTransferInst>(U); + if (MI == 0) return false; // If we already have seen a copy, reject the second one. @@ -1737,10 +1796,8 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, Instruction *&TheCopy, // If the memintrinsic isn't using the alloca as the dest, reject it. if (UI.getOperandNo() != 1) return false; - MemIntrinsic *MI = cast<MemIntrinsic>(*UI); - // If the source of the memcpy/move is not a constant global, reject it. - if (!PointsToConstantGlobal(MI->getOperand(2))) + if (!PointsToConstantGlobal(MI->getSource())) return false; // Otherwise, the transform is safe. Remember the copy instruction. @@ -1752,8 +1809,8 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, Instruction *&TheCopy, /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only /// modified by a copy from a constant global. If we can prove this, we can /// replace any uses of the alloca with uses of the global directly. -Instruction *SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI) { - Instruction *TheCopy = 0; +MemTransferInst *SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI) { + MemTransferInst *TheCopy = 0; if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false)) return TheCopy; return 0; diff --git a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp index 4464961..c3408e7 100644 --- a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp @@ -93,7 +93,8 @@ InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs, // Inline the call, taking care of what code ends up where. NewBlock = SplitBlock(NextInst->getParent(), NextInst, this); - bool B = InlineFunction(Call, 0, TD); + InlineFunctionInfo IFI(0, TD); + bool B = InlineFunction(Call, IFI); assert(B && "half_powr didn't inline?"); B=B; BasicBlock *NewBody = NewBlock->getSinglePredecessor(); diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index 162d902..5ad5de2 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -59,6 +59,8 @@ #include "llvm/Instructions.h" #include "llvm/Pass.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/InlineCost.h" +#include "llvm/Support/CallSite.h" #include "llvm/Support/CFG.h" #include "llvm/ADT/Statistic.h" using namespace llvm; @@ -328,15 +330,6 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, if (&BB->front() == Ret) // Make sure there is something before the ret... return false; - // If the return is in the entry block, then making this transformation would - // turn infinite recursion into an infinite loop. This transformation is ok - // in theory, but breaks some code like: - // double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call - // disable this xform in this case, because the code generator will lower the - // call to fabs into inline code. - if (BB == &F->getEntryBlock()) - return false; - // Scan backwards from the return, checking to see if there is a tail call in // this block. If so, set CI to it. CallInst *CI; @@ -356,6 +349,25 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail) return false; + // As a special case, detect code like this: + // double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call + // and disable this xform in this case, because the code generator will + // lower the call to fabs into inline code. + if (BB == &F->getEntryBlock() && + &BB->front() == CI && &*++BB->begin() == Ret && + callIsSmall(F)) { + // A single-block function with just a call and a return. Check that + // the arguments match. + CallSite::arg_iterator I = CallSite(CI).arg_begin(), + E = CallSite(CI).arg_end(); + Function::arg_iterator FI = F->arg_begin(), + FE = F->arg_end(); + for (; I != E && FI != FE; ++I, ++FI) + if (*I != &*FI) break; + if (I == E && FI == FE) + return false; + } + // If we are introducing accumulator recursion to eliminate associative // operations after the call instruction, this variable contains the initial // value for the accumulator. If this value is set, we actually perform diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp index c70bab5..ea9d1c1 100644 --- a/lib/Transforms/Utils/AddrModeMatcher.cpp +++ b/lib/Transforms/Utils/AddrModeMatcher.cpp @@ -434,19 +434,21 @@ static bool FindAllMemoryUses(Instruction *I, // Loop over all the uses, recursively processing them. for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI) { - if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) { + User *U = *UI; + + if (LoadInst *LI = dyn_cast<LoadInst>(U)) { MemoryUses.push_back(std::make_pair(LI, UI.getOperandNo())); continue; } - if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) { + if (StoreInst *SI = dyn_cast<StoreInst>(U)) { unsigned opNo = UI.getOperandNo(); if (opNo == 0) return true; // Storing addr, not into addr. MemoryUses.push_back(std::make_pair(SI, opNo)); continue; } - if (CallInst *CI = dyn_cast<CallInst>(*UI)) { + if (CallInst *CI = dyn_cast<CallInst>(U)) { InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue()); if (IA == 0) return true; @@ -456,7 +458,7 @@ static bool FindAllMemoryUses(Instruction *I, continue; } - if (FindAllMemoryUses(cast<Instruction>(*UI), MemoryUses, ConsideredInsts, + if (FindAllMemoryUses(cast<Instruction>(U), MemoryUses, ConsideredInsts, TLI)) return true; } diff --git a/lib/Transforms/Utils/BasicInliner.cpp b/lib/Transforms/Utils/BasicInliner.cpp index c580b8f..f0e31ef 100644 --- a/lib/Transforms/Utils/BasicInliner.cpp +++ b/lib/Transforms/Utils/BasicInliner.cpp @@ -129,7 +129,8 @@ void BasicInlinerImpl::inlineFunctions() { } // Inline - if (InlineFunction(CS, NULL, TD)) { + InlineFunctionInfo IFI(0, TD); + if (InlineFunction(CS, IFI)) { if (Callee->use_empty() && (Callee->hasLocalLinkage() || Callee->hasAvailableExternallyLinkage())) DeadFunctions.insert(Callee); diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index fff8179..767fa3a 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -90,15 +90,15 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, /// EmitStrNCpy - Emit a call to the strncpy function to the builder, for the /// specified pointer arguments. Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, - IRBuilder<> &B, const TargetData *TD) { + IRBuilder<> &B, const TargetData *TD, StringRef Name) { Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture); AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); const Type *I8Ptr = B.getInt8PtrTy(); - Value *StrNCpy = M->getOrInsertFunction("strncpy", AttrListPtr::get(AWI, 2), - I8Ptr, I8Ptr, I8Ptr, - Len->getType(), NULL); + Value *StrNCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI, 2), + I8Ptr, I8Ptr, I8Ptr, + Len->getType(), NULL); CallInst *CI = B.CreateCall3(StrNCpy, CastToCStr(Dst, B), CastToCStr(Src, B), Len, "strncpy"); if (const Function *F = dyn_cast<Function>(StrNCpy->stripPointerCasts())) @@ -373,15 +373,29 @@ void llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, SimplifyFortifiedLibCalls::~SimplifyFortifiedLibCalls() { } bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { + // We really need TargetData for later. + if (!TD) return false; + this->CI = CI; - StringRef Name = CI->getCalledFunction()->getName(); + Function *Callee = CI->getCalledFunction(); + StringRef Name = Callee->getName(); + const FunctionType *FT = Callee->getFunctionType(); BasicBlock *BB = CI->getParent(); - IRBuilder<> B(CI->getParent()->getContext()); + LLVMContext &Context = CI->getParent()->getContext(); + IRBuilder<> B(Context); // Set the builder to the instruction after the call. B.SetInsertPoint(BB, CI); if (Name == "__memcpy_chk") { + // Check if this has the right signature. + if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + FT->getParamType(2) != TD->getIntPtrType(Context) || + FT->getParamType(3) != TD->getIntPtrType(Context)) + return false; + if (isFoldable(4, 3, false)) { EmitMemCpy(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), 1, false, B, TD); @@ -397,6 +411,14 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { } if (Name == "__memmove_chk") { + // Check if this has the right signature. + if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + FT->getParamType(2) != TD->getIntPtrType(Context) || + FT->getParamType(3) != TD->getIntPtrType(Context)) + return false; + if (isFoldable(4, 3, false)) { EmitMemMove(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), 1, false, B, TD); @@ -407,6 +429,14 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { } if (Name == "__memset_chk") { + // Check if this has the right signature. + if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isIntegerTy() || + FT->getParamType(2) != TD->getIntPtrType(Context) || + FT->getParamType(3) != TD->getIntPtrType(Context)) + return false; + if (isFoldable(4, 3, false)) { Value *Val = B.CreateIntCast(CI->getOperand(2), B.getInt8Ty(), false); @@ -418,6 +448,15 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { } if (Name == "__strcpy_chk" || Name == "__stpcpy_chk") { + // Check if this has the right signature. + if (FT->getNumParams() != 3 || + FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != Type::getInt8PtrTy(Context) || + FT->getParamType(2) != TD->getIntPtrType(Context)) + return 0; + + // If a) we don't have any length information, or b) we know this will // fit then just lower to a plain st[rp]cpy. Otherwise we'll keep our // st[rp]cpy_chk call which may fail at runtime if the size is too long. @@ -432,10 +471,18 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { return false; } - if (Name == "__strncpy_chk") { + if (Name == "__strncpy_chk" || Name == "__stpncpy_chk") { + // Check if this has the right signature. + if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != Type::getInt8PtrTy(Context) || + !FT->getParamType(2)->isIntegerTy() || + FT->getParamType(3) != TD->getIntPtrType(Context)) + return false; + if (isFoldable(4, 3, false)) { Value *Ret = EmitStrNCpy(CI->getOperand(1), CI->getOperand(2), - CI->getOperand(3), B, TD); + CI->getOperand(3), B, TD, Name.substr(2, 7)); replaceCall(Ret); return true; } diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 62fc2ec..8ad66dd 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -23,7 +23,7 @@ #include "llvm/LLVMContext.h" #include "llvm/Metadata.h" #include "llvm/Support/CFG.h" -#include "llvm/Transforms/Utils/ValueMapper.h" +#include "ValueMapper.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/ADT/SmallVector.h" diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp index a163f89..b87c082 100644 --- a/lib/Transforms/Utils/CloneModule.cpp +++ b/lib/Transforms/Utils/CloneModule.cpp @@ -17,7 +17,7 @@ #include "llvm/DerivedTypes.h" #include "llvm/TypeSymbolTable.h" #include "llvm/Constant.h" -#include "llvm/Transforms/Utils/ValueMapper.h" +#include "ValueMapper.h" using namespace llvm; /// CloneModule - Return an exact copy of the specified module. This is not as diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index b208494..b51f751 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -751,7 +751,7 @@ ExtractCodeRegion(const std::vector<BasicBlock*> &code) { // verifyFunction(*oldFunction); DEBUG(if (verifyFunction(*newFunction)) - llvm_report_error("verifyFunction failed!")); + report_fatal_error("verifyFunction failed!")); return newFunction; } diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 75c9ccd..91390bc 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -15,7 +15,6 @@ #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" -#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" @@ -29,13 +28,11 @@ #include "llvm/Support/CallSite.h" using namespace llvm; -bool llvm::InlineFunction(CallInst *CI, CallGraph *CG, const TargetData *TD, - SmallVectorImpl<AllocaInst*> *StaticAllocas) { - return InlineFunction(CallSite(CI), CG, TD, StaticAllocas); +bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI) { + return InlineFunction(CallSite(CI), IFI); } -bool llvm::InlineFunction(InvokeInst *II, CallGraph *CG, const TargetData *TD, - SmallVectorImpl<AllocaInst*> *StaticAllocas) { - return InlineFunction(CallSite(II), CG, TD, StaticAllocas); +bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI) { + return InlineFunction(CallSite(II), IFI); } @@ -75,7 +72,7 @@ static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, II->setAttributes(CI->getAttributes()); // Make sure that anything using the call now uses the invoke! This also - // updates the CallGraph if present. + // updates the CallGraph if present, because it uses a WeakVH. CI->replaceAllUsesWith(II); // Delete the unconditional branch inserted by splitBasicBlock @@ -173,7 +170,8 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, static void UpdateCallGraphAfterInlining(CallSite CS, Function::iterator FirstNewBlock, DenseMap<const Value*, Value*> &ValueMap, - CallGraph &CG) { + InlineFunctionInfo &IFI) { + CallGraph &CG = *IFI.CG; const Function *Caller = CS.getInstruction()->getParent()->getParent(); const Function *Callee = CS.getCalledFunction(); CallGraphNode *CalleeNode = CG[Callee]; @@ -201,8 +199,27 @@ static void UpdateCallGraphAfterInlining(CallSite CS, // If the call was inlined, but then constant folded, there is no edge to // add. Check for this case. - if (Instruction *NewCall = dyn_cast<Instruction>(VMI->second)) - CallerNode->addCalledFunction(CallSite::get(NewCall), I->second); + Instruction *NewCall = dyn_cast<Instruction>(VMI->second); + if (NewCall == 0) continue; + + // Remember that this call site got inlined for the client of + // InlineFunction. + IFI.InlinedCalls.push_back(NewCall); + + // It's possible that inlining the callsite will cause it to go from an + // indirect to a direct call by resolving a function pointer. If this + // happens, set the callee of the new call site to a more precise + // destination. This can also happen if the call graph node of the caller + // was just unnecessarily imprecise. + if (I->second->getFunction() == 0) + if (Function *F = CallSite(NewCall).getCalledFunction()) { + // Indirect call site resolved to direct call. + CallerNode->addCalledFunction(CallSite::get(NewCall), CG[F]); + + continue; + } + + CallerNode->addCalledFunction(CallSite::get(NewCall), I->second); } // Update the call graph by deleting the edge from Callee to Caller. We must @@ -219,13 +236,15 @@ static void UpdateCallGraphAfterInlining(CallSite CS, // exists in the instruction stream. Similiarly this will inline a recursive // function by one level. // -bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD, - SmallVectorImpl<AllocaInst*> *StaticAllocas) { +bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { Instruction *TheCall = CS.getInstruction(); LLVMContext &Context = TheCall->getContext(); assert(TheCall->getParent() && TheCall->getParent()->getParent() && "Instruction not in function!"); + // If IFI has any state in it, zap it before we fill it in. + IFI.reset(); + const Function *CalledFunc = CS.getCalledFunction(); if (CalledFunc == 0 || // Can't inline external function or indirect CalledFunc->isDeclaration() || // call, or call to a vararg function! @@ -292,7 +311,7 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD, // Create the alloca. If we have TargetData, use nice alignment. unsigned Align = 1; - if (TD) Align = TD->getPrefTypeAlignment(AggTy); + if (IFI.TD) Align = IFI.TD->getPrefTypeAlignment(AggTy); Value *NewAlloca = new AllocaInst(AggTy, 0, Align, I->getName(), &*Caller->begin()->begin()); @@ -305,11 +324,11 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD, Value *SrcCast = new BitCastInst(*AI, VoidPtrTy, "tmp", TheCall); Value *Size; - if (TD == 0) + if (IFI.TD == 0) Size = ConstantExpr::getSizeOf(AggTy); else Size = ConstantInt::get(Type::getInt64Ty(Context), - TD->getTypeStoreSize(AggTy)); + IFI.TD->getTypeStoreSize(AggTy)); // Always generate a memcpy of alignment 1 here because we don't know // the alignment of the src pointer. Other optimizations can infer @@ -323,7 +342,7 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD, CallInst::Create(MemCpyFn, CallArgs, CallArgs+5, "", TheCall); // If we have a call graph, update it. - if (CG) { + if (CallGraph *CG = IFI.CG) { CallGraphNode *MemCpyCGN = CG->getOrInsertFunction(MemCpyFn); CallGraphNode *CallerNode = (*CG)[Caller]; CallerNode->addCalledFunction(TheMemCpy, MemCpyCGN); @@ -342,14 +361,14 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD, // (which can happen, e.g., because an argument was constant), but we'll be // happy with whatever the cloner can do. CloneAndPruneFunctionInto(Caller, CalledFunc, ValueMap, Returns, ".i", - &InlinedFunctionInfo, TD, TheCall); + &InlinedFunctionInfo, IFI.TD, TheCall); // Remember the first block that is newly cloned over. FirstNewBlock = LastBlock; ++FirstNewBlock; // Update the callgraph if requested. - if (CG) - UpdateCallGraphAfterInlining(CS, FirstNewBlock, ValueMap, *CG); + if (IFI.CG) + UpdateCallGraphAfterInlining(CS, FirstNewBlock, ValueMap, IFI); } // If there are any alloca instructions in the block that used to be the entry @@ -376,13 +395,13 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD, // Keep track of the static allocas that we inline into the caller if the // StaticAllocas pointer is non-null. - if (StaticAllocas) StaticAllocas->push_back(AI); + IFI.StaticAllocas.push_back(AI); // Scan for the block of allocas that we can move over, and move them // all at once. while (isa<AllocaInst>(I) && isa<Constant>(cast<AllocaInst>(I)->getArraySize())) { - if (StaticAllocas) StaticAllocas->push_back(cast<AllocaInst>(I)); + IFI.StaticAllocas.push_back(cast<AllocaInst>(I)); ++I; } @@ -406,7 +425,7 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD, // If we are preserving the callgraph, add edges to the stacksave/restore // functions for the calls we insert. CallGraphNode *StackSaveCGN = 0, *StackRestoreCGN = 0, *CallerNode = 0; - if (CG) { + if (CallGraph *CG = IFI.CG) { StackSaveCGN = CG->getOrInsertFunction(StackSave); StackRestoreCGN = CG->getOrInsertFunction(StackRestore); CallerNode = (*CG)[Caller]; @@ -415,13 +434,13 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD, // Insert the llvm.stacksave. CallInst *SavedPtr = CallInst::Create(StackSave, "savedstack", FirstNewBlock->begin()); - if (CG) CallerNode->addCalledFunction(SavedPtr, StackSaveCGN); + if (IFI.CG) CallerNode->addCalledFunction(SavedPtr, StackSaveCGN); // Insert a call to llvm.stackrestore before any return instructions in the // inlined function. for (unsigned i = 0, e = Returns.size(); i != e; ++i) { CallInst *CI = CallInst::Create(StackRestore, SavedPtr, "", Returns[i]); - if (CG) CallerNode->addCalledFunction(CI, StackRestoreCGN); + if (IFI.CG) CallerNode->addCalledFunction(CI, StackRestoreCGN); } // Count the number of StackRestore calls we insert. @@ -434,7 +453,7 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD, BB != E; ++BB) if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { CallInst *CI = CallInst::Create(StackRestore, SavedPtr, "", UI); - if (CG) CallerNode->addCalledFunction(CI, StackRestoreCGN); + if (IFI.CG) CallerNode->addCalledFunction(CI, StackRestoreCGN); ++NumStackRestores; } } diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index ac59b4d..84fd1eb 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -183,8 +183,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) // For the first iteration of the loop, we should use the precloned values for // PHI nodes. Insert associations now. - typedef DenseMap<const Value*, Value*> ValueMapTy; - ValueMapTy LastValueMap; + typedef DenseMap<const Value*, Value*> ValueToValueMapTy; + ValueToValueMapTy LastValueMap; std::vector<PHINode*> OrigPHINode; for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *PN = cast<PHINode>(I); @@ -205,7 +205,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) for (std::vector<BasicBlock*>::iterator BB = LoopBlocks.begin(), E = LoopBlocks.end(); BB != E; ++BB) { - ValueMapTy ValueMap; + ValueToValueMapTy ValueMap; BasicBlock *New = CloneBasicBlock(*BB, ValueMap, "." + Twine(It)); Header->getParent()->getBasicBlockList().push_back(New); @@ -224,7 +224,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) // Update our running map of newest clones LastValueMap[*BB] = New; - for (ValueMapTy::iterator VI = ValueMap.begin(), VE = ValueMap.end(); + for (ValueToValueMapTy::iterator VI = ValueMap.begin(), VE = ValueMap.end(); VI != VE; ++VI) LastValueMap[VI->first] = VI->second; diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp index bbbcc1a..0ed8c72 100644 --- a/lib/Transforms/Utils/LowerInvoke.cpp +++ b/lib/Transforms/Utils/LowerInvoke.cpp @@ -70,15 +70,18 @@ namespace { // Used for expensive EH support. const Type *JBLinkTy; GlobalVariable *JBListHead; - Constant *SetJmpFn, *LongJmpFn; + Constant *SetJmpFn, *LongJmpFn, *StackSaveFn, *StackRestoreFn; + bool useExpensiveEHSupport; // We peek in TLI to grab the target's jmp_buf size and alignment const TargetLowering *TLI; public: static char ID; // Pass identification, replacement for typeid - explicit LowerInvoke(const TargetLowering *tli = NULL) - : FunctionPass(&ID), TLI(tli) { } + explicit LowerInvoke(const TargetLowering *tli = NULL, + bool useExpensiveEHSupport = ExpensiveEHSupport) + : FunctionPass(&ID), useExpensiveEHSupport(useExpensiveEHSupport), + TLI(tli) { } bool doInitialization(Module &M); bool runOnFunction(Function &F); @@ -94,7 +97,8 @@ namespace { bool insertCheapEHSupport(Function &F); void splitLiveRangesLiveAcrossInvokes(std::vector<InvokeInst*> &Invokes); void rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo, - AllocaInst *InvokeNum, SwitchInst *CatchSwitch); + AllocaInst *InvokeNum, AllocaInst *StackPtr, + SwitchInst *CatchSwitch); bool insertExpensiveEHSupport(Function &F); }; } @@ -107,7 +111,11 @@ const PassInfo *const llvm::LowerInvokePassID = &X; // Public Interface To the LowerInvoke pass. FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI) { - return new LowerInvoke(TLI); + return new LowerInvoke(TLI, ExpensiveEHSupport); +} +FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI, + bool useExpensiveEHSupport) { + return new LowerInvoke(TLI, useExpensiveEHSupport); } // doInitialization - Make sure that there is a prototype for abort in the @@ -116,7 +124,7 @@ bool LowerInvoke::doInitialization(Module &M) { const Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); AbortMessage = 0; - if (ExpensiveEHSupport) { + if (useExpensiveEHSupport) { // Insert a type for the linked list of jump buffers. unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0; JBSize = JBSize ? JBSize : 200; @@ -160,6 +168,8 @@ bool LowerInvoke::doInitialization(Module &M) { #endif LongJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::longjmp); + StackSaveFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave); + StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore); } // We need the 'write' and 'abort' functions for both models. @@ -175,7 +185,7 @@ bool LowerInvoke::doInitialization(Module &M) { } void LowerInvoke::createAbortMessage(Module *M) { - if (ExpensiveEHSupport) { + if (useExpensiveEHSupport) { // The abort message for expensive EH support tells the user that the // program 'unwound' without an 'invoke' instruction. Constant *Msg = @@ -229,7 +239,8 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) { std::vector<Value*> CallArgs(II->op_begin(), II->op_end() - 3); // Insert a normal call instruction... CallInst *NewCall = CallInst::Create(II->getCalledValue(), - CallArgs.begin(), CallArgs.end(), "",II); + CallArgs.begin(), CallArgs.end(), + "",II); NewCall->takeName(II); NewCall->setCallingConv(II->getCallingConv()); NewCall->setAttributes(II->getAttributes()); @@ -270,6 +281,7 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) { /// specified invoke instruction with a call. void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo, AllocaInst *InvokeNum, + AllocaInst *StackPtr, SwitchInst *CatchSwitch) { ConstantInt *InvokeNoC = ConstantInt::get(Type::getInt32Ty(II->getContext()), InvokeNo); @@ -288,12 +300,22 @@ void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo, // Insert a store of the invoke num before the invoke and store zero into the // location afterward. new StoreInst(InvokeNoC, InvokeNum, true, II); // volatile + + // Insert a store of the stack ptr before the invoke, so we can restore it + // later in the exception case. + CallInst* StackSaveRet = CallInst::Create(StackSaveFn, "ssret", II); + new StoreInst(StackSaveRet, StackPtr, true, II); // volatile BasicBlock::iterator NI = II->getNormalDest()->getFirstNonPHI(); // nonvolatile. new StoreInst(Constant::getNullValue(Type::getInt32Ty(II->getContext())), InvokeNum, false, NI); + Instruction* StackPtrLoad = new LoadInst(StackPtr, "stackptr.restore", true, + II->getUnwindDest()->getFirstNonPHI() + ); + CallInst::Create(StackRestoreFn, StackPtrLoad, "")->insertAfter(StackPtrLoad); + // Add a switch case to our unwind block. CatchSwitch->addCase(InvokeNoC, II->getUnwindDest()); @@ -500,6 +522,12 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { BasicBlock *CatchBB = BasicBlock::Create(F.getContext(), "setjmp.catch", &F); + // Create an alloca which keeps track of the stack pointer before every + // invoke, this allows us to properly restore the stack pointer after + // long jumping. + AllocaInst *StackPtr = new AllocaInst(Type::getInt8PtrTy(F.getContext()), 0, + "stackptr", EntryBB->begin()); + // Create an alloca which keeps track of which invoke is currently // executing. For normal calls it contains zero. AllocaInst *InvokeNum = new AllocaInst(Type::getInt32Ty(F.getContext()), 0, @@ -546,7 +574,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { // At this point, we are all set up, rewrite each invoke instruction. for (unsigned i = 0, e = Invokes.size(); i != e; ++i) - rewriteExpensiveInvoke(Invokes[i], i+1, InvokeNum, CatchSwitch); + rewriteExpensiveInvoke(Invokes[i], i+1, InvokeNum, StackPtr, CatchSwitch); } // We know that there is at least one unwind. @@ -622,7 +650,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { } bool LowerInvoke::runOnFunction(Function &F) { - if (ExpensiveEHSupport) + if (useExpensiveEHSupport) return insertExpensiveEHSupport(F); else return insertCheapEHSupport(F); diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index a31235a..25d50db 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -11,34 +11,52 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "ssaupdater" #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Instructions.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/Support/AlignOf.h" +#include "llvm/Support/Allocator.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -typedef DenseMap<BasicBlock*, TrackingVH<Value> > AvailableValsTy; -typedef std::vector<std::pair<BasicBlock*, TrackingVH<Value> > > - IncomingPredInfoTy; - +/// BBInfo - Per-basic block information used internally by SSAUpdater. +/// The predecessors of each block are cached here since pred_iterator is +/// slow and we need to iterate over the blocks at least a few times. +class SSAUpdater::BBInfo { +public: + BasicBlock *BB; // Back-pointer to the corresponding block. + Value *AvailableVal; // Value to use in this block. + BBInfo *DefBB; // Block that defines the available value. + int BlkNum; // Postorder number. + BBInfo *IDom; // Immediate dominator. + unsigned NumPreds; // Number of predecessor blocks. + BBInfo **Preds; // Array[NumPreds] of predecessor blocks. + PHINode *PHITag; // Marker for existing PHIs that match. + + BBInfo(BasicBlock *ThisBB, Value *V) + : BB(ThisBB), AvailableVal(V), DefBB(V ? this : 0), BlkNum(0), IDom(0), + NumPreds(0), Preds(0), PHITag(0) { } +}; + +typedef DenseMap<BasicBlock*, SSAUpdater::BBInfo*> BBMapTy; + +typedef DenseMap<BasicBlock*, Value*> AvailableValsTy; static AvailableValsTy &getAvailableVals(void *AV) { return *static_cast<AvailableValsTy*>(AV); } -static IncomingPredInfoTy &getIncomingPredInfo(void *IPI) { - return *static_cast<IncomingPredInfoTy*>(IPI); +static BBMapTy *getBBMap(void *BM) { + return static_cast<BBMapTy*>(BM); } - SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI) - : AV(0), PrototypeValue(0), IPI(0), InsertedPHIs(NewPHI) {} + : AV(0), PrototypeValue(0), BM(0), InsertedPHIs(NewPHI) {} SSAUpdater::~SSAUpdater() { delete &getAvailableVals(AV); - delete &getIncomingPredInfo(IPI); } /// Initialize - Reset this object to get ready for a new set of SSA @@ -48,11 +66,6 @@ void SSAUpdater::Initialize(Value *ProtoValue) { AV = new AvailableValsTy(); else getAvailableVals(AV).clear(); - - if (IPI == 0) - IPI = new IncomingPredInfoTy(); - else - getIncomingPredInfo(IPI).clear(); PrototypeValue = ProtoValue; } @@ -73,7 +86,7 @@ void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) { /// IsEquivalentPHI - Check if PHI has the same incoming value as specified /// in ValueMapping for each predecessor block. -static bool IsEquivalentPHI(PHINode *PHI, +static bool IsEquivalentPHI(PHINode *PHI, DenseMap<BasicBlock*, Value*> &ValueMapping) { unsigned PHINumValues = PHI->getNumIncomingValues(); if (PHINumValues != ValueMapping.size()) @@ -89,38 +102,12 @@ static bool IsEquivalentPHI(PHINode *PHI, return true; } -/// GetExistingPHI - Check if BB already contains a phi node that is equivalent -/// to the specified mapping from predecessor blocks to incoming values. -static Value *GetExistingPHI(BasicBlock *BB, - DenseMap<BasicBlock*, Value*> &ValueMapping) { - PHINode *SomePHI; - for (BasicBlock::iterator It = BB->begin(); - (SomePHI = dyn_cast<PHINode>(It)); ++It) { - if (IsEquivalentPHI(SomePHI, ValueMapping)) - return SomePHI; - } - return 0; -} - -/// GetExistingPHI - Check if BB already contains an equivalent phi node. -/// The InputIt type must be an iterator over std::pair<BasicBlock*, Value*> -/// objects that specify the mapping from predecessor blocks to incoming values. -template<typename InputIt> -static Value *GetExistingPHI(BasicBlock *BB, const InputIt &I, - const InputIt &E) { - // Avoid create the mapping if BB has no phi nodes at all. - if (!isa<PHINode>(BB->begin())) - return 0; - DenseMap<BasicBlock*, Value*> ValueMapping(I, E); - return GetExistingPHI(BB, ValueMapping); -} - /// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is /// live at the end of the specified block. Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) { - assert(getIncomingPredInfo(IPI).empty() && "Unexpected Internal State"); + assert(BM == 0 && "Unexpected Internal State"); Value *Res = GetValueAtEndOfBlockInternal(BB); - assert(getIncomingPredInfo(IPI).empty() && "Unexpected Internal State"); + assert(BM == 0 && "Unexpected Internal State"); return Res; } @@ -146,7 +133,7 @@ Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) { Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { // If there is no definition of the renamed variable in this block, just use // GetValueAtEndOfBlock to do our work. - if (!getAvailableVals(AV).count(BB)) + if (!HasValueForBlock(BB)) return GetValueAtEndOfBlock(BB); // Otherwise, we have the hard case. Get the live-in values for each @@ -193,10 +180,18 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { if (SingularValue != 0) return SingularValue; - // Otherwise, we do need a PHI. - if (Value *ExistingPHI = GetExistingPHI(BB, PredValues.begin(), - PredValues.end())) - return ExistingPHI; + // Otherwise, we do need a PHI: check to see if we already have one available + // in this block that produces the right value. + if (isa<PHINode>(BB->begin())) { + DenseMap<BasicBlock*, Value*> ValueMapping(PredValues.begin(), + PredValues.end()); + PHINode *SomePHI; + for (BasicBlock::iterator It = BB->begin(); + (SomePHI = dyn_cast<PHINode>(It)); ++It) { + if (IsEquivalentPHI(SomePHI, ValueMapping)) + return SomePHI; + } + } // Ok, we have no way out, insert a new one now. PHINode *InsertedPHI = PHINode::Create(PrototypeValue->getType(), @@ -226,7 +221,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { /// which use their value in the corresponding predecessor. void SSAUpdater::RewriteUse(Use &U) { Instruction *User = cast<Instruction>(U.getUser()); - + Value *V; if (PHINode *UserPN = dyn_cast<PHINode>(User)) V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U)); @@ -236,161 +231,427 @@ void SSAUpdater::RewriteUse(Use &U) { U.set(V); } - /// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry /// for the specified BB and if so, return it. If not, construct SSA form by -/// walking predecessors inserting PHI nodes as needed until we get to a block -/// where the value is available. -/// +/// first calculating the required placement of PHIs and then inserting new +/// PHIs where needed. Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { AvailableValsTy &AvailableVals = getAvailableVals(AV); + if (Value *V = AvailableVals[BB]) + return V; + + // Pool allocation used internally by GetValueAtEndOfBlock. + BumpPtrAllocator Allocator; + BBMapTy BBMapObj; + BM = &BBMapObj; + + SmallVector<BBInfo*, 100> BlockList; + BuildBlockList(BB, &BlockList, &Allocator); - // Query AvailableVals by doing an insertion of null. - std::pair<AvailableValsTy::iterator, bool> InsertRes = - AvailableVals.insert(std::make_pair(BB, TrackingVH<Value>())); - - // Handle the case when the insertion fails because we have already seen BB. - if (!InsertRes.second) { - // If the insertion failed, there are two cases. The first case is that the - // value is already available for the specified block. If we get this, just - // return the value. - if (InsertRes.first->second != 0) - return InsertRes.first->second; - - // Otherwise, if the value we find is null, then this is the value is not - // known but it is being computed elsewhere in our recursion. This means - // that we have a cycle. Handle this by inserting a PHI node and returning - // it. When we get back to the first instance of the recursion we will fill - // in the PHI node. - return InsertRes.first->second = - PHINode::Create(PrototypeValue->getType(), PrototypeValue->getName(), - &BB->front()); + // Special case: bail out if BB is unreachable. + if (BlockList.size() == 0) { + BM = 0; + return UndefValue::get(PrototypeValue->getType()); } - // Okay, the value isn't in the map and we just inserted a null in the entry - // to indicate that we're processing the block. Since we have no idea what - // value is in this block, we have to recurse through our predecessors. - // - // While we're walking our predecessors, we keep track of them in a vector, - // then insert a PHI node in the end if we actually need one. We could use a - // smallvector here, but that would take a lot of stack space for every level - // of the recursion, just use IncomingPredInfo as an explicit stack. - IncomingPredInfoTy &IncomingPredInfo = getIncomingPredInfo(IPI); - unsigned FirstPredInfoEntry = IncomingPredInfo.size(); - - // As we're walking the predecessors, keep track of whether they are all - // producing the same value. If so, this value will capture it, if not, it - // will get reset to null. We distinguish the no-predecessor case explicitly - // below. - TrackingVH<Value> ExistingValue; + FindDominators(&BlockList); + FindPHIPlacement(&BlockList); + FindAvailableVals(&BlockList); - // We can get our predecessor info by walking the pred_iterator list, but it - // is relatively slow. If we already have PHI nodes in this block, walk one - // of them to get the predecessor list instead. + BM = 0; + return BBMapObj[BB]->DefBB->AvailableVal; +} + +/// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds +/// vector, set Info->NumPreds, and allocate space in Info->Preds. +static void FindPredecessorBlocks(SSAUpdater::BBInfo *Info, + SmallVectorImpl<BasicBlock*> *Preds, + BumpPtrAllocator *Allocator) { + // We can get our predecessor info by walking the pred_iterator list, + // but it is relatively slow. If we already have PHI nodes in this + // block, walk one of them to get the predecessor list instead. + BasicBlock *BB = Info->BB; if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) { - for (unsigned i = 0, e = SomePhi->getNumIncomingValues(); i != e; ++i) { - BasicBlock *PredBB = SomePhi->getIncomingBlock(i); - Value *PredVal = GetValueAtEndOfBlockInternal(PredBB); - IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal)); + for (unsigned PI = 0, E = SomePhi->getNumIncomingValues(); PI != E; ++PI) + Preds->push_back(SomePhi->getIncomingBlock(PI)); + } else { + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + Preds->push_back(*PI); + } - // Set ExistingValue to singular value from all predecessors so far. - if (i == 0) - ExistingValue = PredVal; - else if (PredVal != ExistingValue) - ExistingValue = 0; + Info->NumPreds = Preds->size(); + Info->Preds = static_cast<SSAUpdater::BBInfo**> + (Allocator->Allocate(Info->NumPreds * sizeof(SSAUpdater::BBInfo*), + AlignOf<SSAUpdater::BBInfo*>::Alignment)); +} + +/// BuildBlockList - Starting from the specified basic block, traverse back +/// through its predecessors until reaching blocks with known values. Create +/// BBInfo structures for the blocks and append them to the block list. +void SSAUpdater::BuildBlockList(BasicBlock *BB, BlockListTy *BlockList, + BumpPtrAllocator *Allocator) { + AvailableValsTy &AvailableVals = getAvailableVals(AV); + BBMapTy *BBMap = getBBMap(BM); + SmallVector<BBInfo*, 10> RootList; + SmallVector<BBInfo*, 64> WorkList; + + BBInfo *Info = new (*Allocator) BBInfo(BB, 0); + (*BBMap)[BB] = Info; + WorkList.push_back(Info); + + // Search backward from BB, creating BBInfos along the way and stopping when + // reaching blocks that define the value. Record those defining blocks on + // the RootList. + SmallVector<BasicBlock*, 10> Preds; + while (!WorkList.empty()) { + Info = WorkList.pop_back_val(); + Preds.clear(); + FindPredecessorBlocks(Info, &Preds, Allocator); + + // Treat an unreachable predecessor as a definition with 'undef'. + if (Info->NumPreds == 0) { + Info->AvailableVal = UndefValue::get(PrototypeValue->getType()); + Info->DefBB = Info; + RootList.push_back(Info); + continue; } - } else { - bool isFirstPred = true; - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { - BasicBlock *PredBB = *PI; - Value *PredVal = GetValueAtEndOfBlockInternal(PredBB); - IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal)); - // Set ExistingValue to singular value from all predecessors so far. - if (isFirstPred) { - ExistingValue = PredVal; - isFirstPred = false; - } else if (PredVal != ExistingValue) - ExistingValue = 0; + for (unsigned p = 0; p != Info->NumPreds; ++p) { + BasicBlock *Pred = Preds[p]; + // Check if BBMap already has a BBInfo for the predecessor block. + BBMapTy::value_type &BBMapBucket = BBMap->FindAndConstruct(Pred); + if (BBMapBucket.second) { + Info->Preds[p] = BBMapBucket.second; + continue; + } + + // Create a new BBInfo for the predecessor. + Value *PredVal = AvailableVals.lookup(Pred); + BBInfo *PredInfo = new (*Allocator) BBInfo(Pred, PredVal); + BBMapBucket.second = PredInfo; + Info->Preds[p] = PredInfo; + + if (PredInfo->AvailableVal) { + RootList.push_back(PredInfo); + continue; + } + WorkList.push_back(PredInfo); + } + } + + // Now that we know what blocks are backwards-reachable from the starting + // block, do a forward depth-first traversal to assign postorder numbers + // to those blocks. + BBInfo *PseudoEntry = new (*Allocator) BBInfo(0, 0); + unsigned BlkNum = 1; + + // Initialize the worklist with the roots from the backward traversal. + while (!RootList.empty()) { + Info = RootList.pop_back_val(); + Info->IDom = PseudoEntry; + Info->BlkNum = -1; + WorkList.push_back(Info); + } + + while (!WorkList.empty()) { + Info = WorkList.back(); + + if (Info->BlkNum == -2) { + // All the successors have been handled; assign the postorder number. + Info->BlkNum = BlkNum++; + // If not a root, put it on the BlockList. + if (!Info->AvailableVal) + BlockList->push_back(Info); + WorkList.pop_back(); + continue; + } + + // Leave this entry on the worklist, but set its BlkNum to mark that its + // successors have been put on the worklist. When it returns to the top + // the list, after handling its successors, it will be assigned a number. + Info->BlkNum = -2; + + // Add unvisited successors to the work list. + for (succ_iterator SI = succ_begin(Info->BB), E = succ_end(Info->BB); + SI != E; ++SI) { + BBInfo *SuccInfo = (*BBMap)[*SI]; + if (!SuccInfo || SuccInfo->BlkNum) + continue; + SuccInfo->BlkNum = -1; + WorkList.push_back(SuccInfo); } } + PseudoEntry->BlkNum = BlkNum; +} - // If there are no predecessors, then we must have found an unreachable block - // just return 'undef'. Since there are no predecessors, InsertRes must not - // be invalidated. - if (IncomingPredInfo.size() == FirstPredInfoEntry) - return InsertRes.first->second = UndefValue::get(PrototypeValue->getType()); - - /// Look up BB's entry in AvailableVals. 'InsertRes' may be invalidated. If - /// this block is involved in a loop, a no-entry PHI node will have been - /// inserted as InsertedVal. Otherwise, we'll still have the null we inserted - /// above. - TrackingVH<Value> &InsertedVal = AvailableVals[BB]; - - // If the predecessor values are not all the same, then check to see if there - // is an existing PHI that can be used. - if (!ExistingValue) - ExistingValue = GetExistingPHI(BB, - IncomingPredInfo.begin()+FirstPredInfoEntry, - IncomingPredInfo.end()); - - // If there is an existing value we can use, then we don't need to insert a - // PHI. This is the simple and common case. - if (ExistingValue) { - // If a PHI node got inserted, replace it with the existing value and delete - // it. - if (InsertedVal) { - PHINode *OldVal = cast<PHINode>(InsertedVal); - // Be careful about dead loops. These RAUW's also update InsertedVal. - if (InsertedVal != ExistingValue) - OldVal->replaceAllUsesWith(ExistingValue); - else - OldVal->replaceAllUsesWith(UndefValue::get(InsertedVal->getType())); - OldVal->eraseFromParent(); - } else { - InsertedVal = ExistingValue; +/// IntersectDominators - This is the dataflow lattice "meet" operation for +/// finding dominators. Given two basic blocks, it walks up the dominator +/// tree until it finds a common dominator of both. It uses the postorder +/// number of the blocks to determine how to do that. +static SSAUpdater::BBInfo *IntersectDominators(SSAUpdater::BBInfo *Blk1, + SSAUpdater::BBInfo *Blk2) { + while (Blk1 != Blk2) { + while (Blk1->BlkNum < Blk2->BlkNum) { + Blk1 = Blk1->IDom; + if (!Blk1) + return Blk2; } + while (Blk2->BlkNum < Blk1->BlkNum) { + Blk2 = Blk2->IDom; + if (!Blk2) + return Blk1; + } + } + return Blk1; +} - // Either path through the 'if' should have set InsertedVal -> ExistingVal. - assert((InsertedVal == ExistingValue || isa<UndefValue>(InsertedVal)) && - "RAUW didn't change InsertedVal to be ExistingValue"); +/// FindDominators - Calculate the dominator tree for the subset of the CFG +/// corresponding to the basic blocks on the BlockList. This uses the +/// algorithm from: "A Simple, Fast Dominance Algorithm" by Cooper, Harvey and +/// Kennedy, published in Software--Practice and Experience, 2001, 4:1-10. +/// Because the CFG subset does not include any edges leading into blocks that +/// define the value, the results are not the usual dominator tree. The CFG +/// subset has a single pseudo-entry node with edges to a set of root nodes +/// for blocks that define the value. The dominators for this subset CFG are +/// not the standard dominators but they are adequate for placing PHIs within +/// the subset CFG. +void SSAUpdater::FindDominators(BlockListTy *BlockList) { + bool Changed; + do { + Changed = false; + // Iterate over the list in reverse order, i.e., forward on CFG edges. + for (BlockListTy::reverse_iterator I = BlockList->rbegin(), + E = BlockList->rend(); I != E; ++I) { + BBInfo *Info = *I; + + // Start with the first predecessor. + assert(Info->NumPreds > 0 && "unreachable block"); + BBInfo *NewIDom = Info->Preds[0]; + + // Iterate through the block's other predecessors. + for (unsigned p = 1; p != Info->NumPreds; ++p) { + BBInfo *Pred = Info->Preds[p]; + NewIDom = IntersectDominators(NewIDom, Pred); + } + + // Check if the IDom value has changed. + if (NewIDom != Info->IDom) { + Info->IDom = NewIDom; + Changed = true; + } + } + } while (Changed); +} - // Drop the entries we added in IncomingPredInfo to restore the stack. - IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry, - IncomingPredInfo.end()); - return ExistingValue; +/// IsDefInDomFrontier - Search up the dominator tree from Pred to IDom for +/// any blocks containing definitions of the value. If one is found, then the +/// successor of Pred is in the dominance frontier for the definition, and +/// this function returns true. +static bool IsDefInDomFrontier(const SSAUpdater::BBInfo *Pred, + const SSAUpdater::BBInfo *IDom) { + for (; Pred != IDom; Pred = Pred->IDom) { + if (Pred->DefBB == Pred) + return true; } + return false; +} - // Otherwise, we do need a PHI: insert one now if we don't already have one. - if (InsertedVal == 0) - InsertedVal = PHINode::Create(PrototypeValue->getType(), - PrototypeValue->getName(), &BB->front()); +/// FindPHIPlacement - PHIs are needed in the iterated dominance frontiers of +/// the known definitions. Iteratively add PHIs in the dom frontiers until +/// nothing changes. Along the way, keep track of the nearest dominating +/// definitions for non-PHI blocks. +void SSAUpdater::FindPHIPlacement(BlockListTy *BlockList) { + bool Changed; + do { + Changed = false; + // Iterate over the list in reverse order, i.e., forward on CFG edges. + for (BlockListTy::reverse_iterator I = BlockList->rbegin(), + E = BlockList->rend(); I != E; ++I) { + BBInfo *Info = *I; + + // If this block already needs a PHI, there is nothing to do here. + if (Info->DefBB == Info) + continue; + + // Default to use the same def as the immediate dominator. + BBInfo *NewDefBB = Info->IDom->DefBB; + for (unsigned p = 0; p != Info->NumPreds; ++p) { + if (IsDefInDomFrontier(Info->Preds[p], Info->IDom)) { + // Need a PHI here. + NewDefBB = Info; + break; + } + } + + // Check if anything changed. + if (NewDefBB != Info->DefBB) { + Info->DefBB = NewDefBB; + Changed = true; + } + } + } while (Changed); +} - PHINode *InsertedPHI = cast<PHINode>(InsertedVal); - InsertedPHI->reserveOperandSpace(IncomingPredInfo.size()-FirstPredInfoEntry); +/// FindAvailableVal - If this block requires a PHI, first check if an existing +/// PHI matches the PHI placement and reaching definitions computed earlier, +/// and if not, create a new PHI. Visit all the block's predecessors to +/// calculate the available value for each one and fill in the incoming values +/// for a new PHI. +void SSAUpdater::FindAvailableVals(BlockListTy *BlockList) { + AvailableValsTy &AvailableVals = getAvailableVals(AV); - // Fill in all the predecessors of the PHI. - for (IncomingPredInfoTy::iterator I = - IncomingPredInfo.begin()+FirstPredInfoEntry, - E = IncomingPredInfo.end(); I != E; ++I) - InsertedPHI->addIncoming(I->second, I->first); + // Go through the worklist in forward order (i.e., backward through the CFG) + // and check if existing PHIs can be used. If not, create empty PHIs where + // they are needed. + for (BlockListTy::iterator I = BlockList->begin(), E = BlockList->end(); + I != E; ++I) { + BBInfo *Info = *I; + // Check if there needs to be a PHI in BB. + if (Info->DefBB != Info) + continue; + + // Look for an existing PHI. + FindExistingPHI(Info->BB, BlockList); + if (Info->AvailableVal) + continue; + + PHINode *PHI = PHINode::Create(PrototypeValue->getType(), + PrototypeValue->getName(), + &Info->BB->front()); + PHI->reserveOperandSpace(Info->NumPreds); + Info->AvailableVal = PHI; + AvailableVals[Info->BB] = PHI; + } - // Drop the entries we added in IncomingPredInfo to restore the stack. - IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry, - IncomingPredInfo.end()); + // Now go back through the worklist in reverse order to fill in the arguments + // for any new PHIs added in the forward traversal. + for (BlockListTy::reverse_iterator I = BlockList->rbegin(), + E = BlockList->rend(); I != E; ++I) { + BBInfo *Info = *I; + + if (Info->DefBB != Info) { + // Record the available value at join nodes to speed up subsequent + // uses of this SSAUpdater for the same value. + if (Info->NumPreds > 1) + AvailableVals[Info->BB] = Info->DefBB->AvailableVal; + continue; + } - // See if the PHI node can be merged to a single value. This can happen in - // loop cases when we get a PHI of itself and one other value. - if (Value *ConstVal = InsertedPHI->hasConstantValue()) { - InsertedPHI->replaceAllUsesWith(ConstVal); - InsertedPHI->eraseFromParent(); - InsertedVal = ConstVal; - } else { - DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n"); + // Check if this block contains a newly added PHI. + PHINode *PHI = dyn_cast<PHINode>(Info->AvailableVal); + if (!PHI || PHI->getNumIncomingValues() == Info->NumPreds) + continue; + + // Iterate through the block's predecessors. + for (unsigned p = 0; p != Info->NumPreds; ++p) { + BBInfo *PredInfo = Info->Preds[p]; + BasicBlock *Pred = PredInfo->BB; + // Skip to the nearest preceding definition. + if (PredInfo->DefBB != PredInfo) + PredInfo = PredInfo->DefBB; + PHI->addIncoming(PredInfo->AvailableVal, Pred); + } + + DEBUG(dbgs() << " Inserted PHI: " << *PHI << "\n"); // If the client wants to know about all new instructions, tell it. - if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); + if (InsertedPHIs) InsertedPHIs->push_back(PHI); + } +} + +/// FindExistingPHI - Look through the PHI nodes in a block to see if any of +/// them match what is needed. +void SSAUpdater::FindExistingPHI(BasicBlock *BB, BlockListTy *BlockList) { + PHINode *SomePHI; + for (BasicBlock::iterator It = BB->begin(); + (SomePHI = dyn_cast<PHINode>(It)); ++It) { + if (CheckIfPHIMatches(SomePHI)) { + RecordMatchingPHI(SomePHI); + break; + } + // Match failed: clear all the PHITag values. + for (BlockListTy::iterator I = BlockList->begin(), E = BlockList->end(); + I != E; ++I) + (*I)->PHITag = 0; + } +} + +/// CheckIfPHIMatches - Check if a PHI node matches the placement and values +/// in the BBMap. +bool SSAUpdater::CheckIfPHIMatches(PHINode *PHI) { + BBMapTy *BBMap = getBBMap(BM); + SmallVector<PHINode*, 20> WorkList; + WorkList.push_back(PHI); + + // Mark that the block containing this PHI has been visited. + (*BBMap)[PHI->getParent()]->PHITag = PHI; + + while (!WorkList.empty()) { + PHI = WorkList.pop_back_val(); + + // Iterate through the PHI's incoming values. + for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { + Value *IncomingVal = PHI->getIncomingValue(i); + BBInfo *PredInfo = (*BBMap)[PHI->getIncomingBlock(i)]; + // Skip to the nearest preceding definition. + if (PredInfo->DefBB != PredInfo) + PredInfo = PredInfo->DefBB; + + // Check if it matches the expected value. + if (PredInfo->AvailableVal) { + if (IncomingVal == PredInfo->AvailableVal) + continue; + return false; + } + + // Check if the value is a PHI in the correct block. + PHINode *IncomingPHIVal = dyn_cast<PHINode>(IncomingVal); + if (!IncomingPHIVal || IncomingPHIVal->getParent() != PredInfo->BB) + return false; + + // If this block has already been visited, check if this PHI matches. + if (PredInfo->PHITag) { + if (IncomingPHIVal == PredInfo->PHITag) + continue; + return false; + } + PredInfo->PHITag = IncomingPHIVal; + + WorkList.push_back(IncomingPHIVal); + } } + return true; +} - return InsertedVal; +/// RecordMatchingPHI - For a PHI node that matches, record it and its input +/// PHIs in both the BBMap and the AvailableVals mapping. +void SSAUpdater::RecordMatchingPHI(PHINode *PHI) { + BBMapTy *BBMap = getBBMap(BM); + AvailableValsTy &AvailableVals = getAvailableVals(AV); + SmallVector<PHINode*, 20> WorkList; + WorkList.push_back(PHI); + + // Record this PHI. + BasicBlock *BB = PHI->getParent(); + AvailableVals[BB] = PHI; + (*BBMap)[BB]->AvailableVal = PHI; + + while (!WorkList.empty()) { + PHI = WorkList.pop_back_val(); + + // Iterate through the PHI's incoming values. + for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { + PHINode *IncomingPHIVal = dyn_cast<PHINode>(PHI->getIncomingValue(i)); + if (!IncomingPHIVal) continue; + BB = IncomingPHIVal->getParent(); + BBInfo *Info = (*BBMap)[BB]; + if (!Info || Info->AvailableVal) + continue; + + // Record the PHI and add it to the worklist. + AvailableVals[BB] = IncomingPHIVal; + Info->AvailableVal = IncomingPHIVal; + WorkList.push_back(IncomingPHIVal); + } + } } diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index 6045048..87ce631 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -12,7 +12,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/ValueMapper.h" +#include "ValueMapper.h" #include "llvm/Type.h" #include "llvm/Constants.h" #include "llvm/Function.h" @@ -20,7 +20,7 @@ #include "llvm/ADT/SmallVector.h" using namespace llvm; -Value *llvm::MapValue(const Value *V, ValueMapTy &VM) { +Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) { Value *&VMSlot = VM[V]; if (VMSlot) return VMSlot; // Does it exist in the map yet? @@ -127,7 +127,7 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) { /// RemapInstruction - Convert the instruction operands from referencing the /// current values into those specified by ValueMap. /// -void llvm::RemapInstruction(Instruction *I, ValueMapTy &ValueMap) { +void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &ValueMap) { for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) { Value *V = MapValue(*op, ValueMap); assert(V && "Referenced value not in value map!"); diff --git a/lib/Transforms/Utils/ValueMapper.h b/lib/Transforms/Utils/ValueMapper.h new file mode 100644 index 0000000..d61c24c --- /dev/null +++ b/lib/Transforms/Utils/ValueMapper.h @@ -0,0 +1,29 @@ +//===- ValueMapper.h - Interface shared by lib/Transforms/Utils -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the MapValue interface which is used by various parts of +// the Transforms/Utils library to implement cloning and linking facilities. +// +//===----------------------------------------------------------------------===// + +#ifndef VALUEMAPPER_H +#define VALUEMAPPER_H + +#include "llvm/ADT/DenseMap.h" + +namespace llvm { + class Value; + class Instruction; + typedef DenseMap<const Value *, Value *> ValueToValueMapTy; + + Value *MapValue(const Value *V, ValueToValueMapTy &VM); + void RemapInstruction(Instruction *I, ValueToValueMapTy &VM); +} // End llvm namespace + +#endif diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp index f6a6076..6c1aa5e 100644 --- a/lib/VMCore/AsmWriter.cpp +++ b/lib/VMCore/AsmWriter.cpp @@ -227,13 +227,15 @@ void TypePrinting::CalcTypeName(const Type *Ty, const StructType *STy = cast<StructType>(Ty); if (STy->isPacked()) OS << '<'; - OS << "{ "; + OS << '{'; for (StructType::element_iterator I = STy->element_begin(), E = STy->element_end(); I != E; ++I) { + OS << ' '; CalcTypeName(*I, TypeStack, OS); - if (next(I) != STy->element_end()) + if (next(I) == STy->element_end()) + OS << ' '; + else OS << ','; - OS << ' '; } OS << '}'; if (STy->isPacked()) @@ -242,13 +244,15 @@ void TypePrinting::CalcTypeName(const Type *Ty, } case Type::UnionTyID: { const UnionType *UTy = cast<UnionType>(Ty); - OS << "union { "; + OS << "union {"; for (StructType::element_iterator I = UTy->element_begin(), E = UTy->element_end(); I != E; ++I) { + OS << ' '; CalcTypeName(*I, TypeStack, OS); - if (next(I) != UTy->element_end()) + if (next(I) == UTy->element_end()) + OS << ' '; + else OS << ','; - OS << ' '; } OS << '}'; break; diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp index 4d06b66..0144210 100644 --- a/lib/VMCore/AutoUpgrade.cpp +++ b/lib/VMCore/AutoUpgrade.cpp @@ -19,6 +19,7 @@ #include "llvm/IntrinsicInst.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/IRBuilder.h" #include <cstring> using namespace llvm; @@ -277,8 +278,13 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { // Calls to these intrinsics are transformed into vector multiplies. NewFn = 0; return true; + } else if (Name.compare(5, 18, "x86.ssse3.palign.r", 18) == 0 || + Name.compare(5, 22, "x86.ssse3.palign.r.128", 22) == 0) { + // Calls to these intrinsics are transformed into vector shuffles, shifts, + // or 0. + NewFn = 0; + return true; } - break; } @@ -420,6 +426,118 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { // Remove upgraded multiply. CI->eraseFromParent(); + } else if (F->getName() == "llvm.x86.ssse3.palign.r") { + Value *Op1 = CI->getOperand(1); + Value *Op2 = CI->getOperand(2); + Value *Op3 = CI->getOperand(3); + unsigned shiftVal = cast<ConstantInt>(Op3)->getZExtValue(); + Value *Rep; + IRBuilder<> Builder(C); + Builder.SetInsertPoint(CI->getParent(), CI); + + // If palignr is shifting the pair of input vectors less than 9 bytes, + // emit a shuffle instruction. + if (shiftVal <= 8) { + const Type *IntTy = Type::getInt32Ty(C); + const Type *EltTy = Type::getInt8Ty(C); + const Type *VecTy = VectorType::get(EltTy, 8); + + Op2 = Builder.CreateBitCast(Op2, VecTy); + Op1 = Builder.CreateBitCast(Op1, VecTy); + + llvm::SmallVector<llvm::Constant*, 8> Indices; + for (unsigned i = 0; i != 8; ++i) + Indices.push_back(ConstantInt::get(IntTy, shiftVal + i)); + + Value *SV = ConstantVector::get(Indices.begin(), Indices.size()); + Rep = Builder.CreateShuffleVector(Op2, Op1, SV, "palignr"); + Rep = Builder.CreateBitCast(Rep, F->getReturnType()); + } + + // If palignr is shifting the pair of input vectors more than 8 but less + // than 16 bytes, emit a logical right shift of the destination. + else if (shiftVal < 16) { + // MMX has these as 1 x i64 vectors for some odd optimization reasons. + const Type *EltTy = Type::getInt64Ty(C); + const Type *VecTy = VectorType::get(EltTy, 1); + + Op1 = Builder.CreateBitCast(Op1, VecTy, "cast"); + Op2 = ConstantInt::get(VecTy, (shiftVal-8) * 8); + + // create i32 constant + Function *I = + Intrinsic::getDeclaration(F->getParent(), Intrinsic::x86_mmx_psrl_q); + Rep = Builder.CreateCall2(I, Op1, Op2, "palignr"); + } + + // If palignr is shifting the pair of vectors more than 32 bytes, emit zero. + else { + Rep = Constant::getNullValue(F->getReturnType()); + } + + // Replace any uses with our new instruction. + if (!CI->use_empty()) + CI->replaceAllUsesWith(Rep); + + // Remove upgraded instruction. + CI->eraseFromParent(); + + } else if (F->getName() == "llvm.x86.ssse3.palign.r.128") { + Value *Op1 = CI->getOperand(1); + Value *Op2 = CI->getOperand(2); + Value *Op3 = CI->getOperand(3); + unsigned shiftVal = cast<ConstantInt>(Op3)->getZExtValue(); + Value *Rep; + IRBuilder<> Builder(C); + Builder.SetInsertPoint(CI->getParent(), CI); + + // If palignr is shifting the pair of input vectors less than 17 bytes, + // emit a shuffle instruction. + if (shiftVal <= 16) { + const Type *IntTy = Type::getInt32Ty(C); + const Type *EltTy = Type::getInt8Ty(C); + const Type *VecTy = VectorType::get(EltTy, 16); + + Op2 = Builder.CreateBitCast(Op2, VecTy); + Op1 = Builder.CreateBitCast(Op1, VecTy); + + llvm::SmallVector<llvm::Constant*, 16> Indices; + for (unsigned i = 0; i != 16; ++i) + Indices.push_back(ConstantInt::get(IntTy, shiftVal + i)); + + Value *SV = ConstantVector::get(Indices.begin(), Indices.size()); + Rep = Builder.CreateShuffleVector(Op2, Op1, SV, "palignr"); + Rep = Builder.CreateBitCast(Rep, F->getReturnType()); + } + + // If palignr is shifting the pair of input vectors more than 16 but less + // than 32 bytes, emit a logical right shift of the destination. + else if (shiftVal < 32) { + const Type *EltTy = Type::getInt64Ty(C); + const Type *VecTy = VectorType::get(EltTy, 2); + const Type *IntTy = Type::getInt32Ty(C); + + Op1 = Builder.CreateBitCast(Op1, VecTy, "cast"); + Op2 = ConstantInt::get(IntTy, (shiftVal-16) * 8); + + // create i32 constant + Function *I = + Intrinsic::getDeclaration(F->getParent(), Intrinsic::x86_sse2_psrl_dq); + Rep = Builder.CreateCall2(I, Op1, Op2, "palignr"); + } + + // If palignr is shifting the pair of vectors more than 32 bytes, emit zero. + else { + Rep = Constant::getNullValue(F->getReturnType()); + } + + // Replace any uses with our new instruction. + if (!CI->use_empty()) + CI->replaceAllUsesWith(Rep); + + // Remove upgraded instruction. + CI->eraseFromParent(); + } else { llvm_unreachable("Unknown function for CallInst upgrade."); } diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp index 1553bd5..00b0094 100644 --- a/lib/VMCore/Constants.cpp +++ b/lib/VMCore/Constants.cpp @@ -1224,20 +1224,20 @@ Constant *ConstantExpr::getCast(unsigned oc, Constant *C, const Type *Ty) { Constant *ConstantExpr::getZExtOrBitCast(Constant *C, const Type *Ty) { if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits()) - return getCast(Instruction::BitCast, C, Ty); - return getCast(Instruction::ZExt, C, Ty); + return getBitCast(C, Ty); + return getZExt(C, Ty); } Constant *ConstantExpr::getSExtOrBitCast(Constant *C, const Type *Ty) { if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits()) - return getCast(Instruction::BitCast, C, Ty); - return getCast(Instruction::SExt, C, Ty); + return getBitCast(C, Ty); + return getSExt(C, Ty); } Constant *ConstantExpr::getTruncOrBitCast(Constant *C, const Type *Ty) { if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits()) - return getCast(Instruction::BitCast, C, Ty); - return getCast(Instruction::Trunc, C, Ty); + return getBitCast(C, Ty); + return getTrunc(C, Ty); } Constant *ConstantExpr::getPointerCast(Constant *S, const Type *Ty) { @@ -1245,8 +1245,8 @@ Constant *ConstantExpr::getPointerCast(Constant *S, const Type *Ty) { assert((Ty->isIntegerTy() || Ty->isPointerTy()) && "Invalid cast"); if (Ty->isIntegerTy()) - return getCast(Instruction::PtrToInt, S, Ty); - return getCast(Instruction::BitCast, S, Ty); + return getPtrToInt(S, Ty); + return getBitCast(S, Ty); } Constant *ConstantExpr::getIntegerCast(Constant *C, const Type *Ty, @@ -1450,12 +1450,6 @@ Constant *ConstantExpr::getCompareTy(unsigned short predicate, Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2, unsigned Flags) { - // API compatibility: Adjust integer opcodes to floating-point opcodes. - if (C1->getType()->isFPOrFPVectorTy()) { - if (Opcode == Instruction::Add) Opcode = Instruction::FAdd; - else if (Opcode == Instruction::Sub) Opcode = Instruction::FSub; - else if (Opcode == Instruction::Mul) Opcode = Instruction::FMul; - } #ifndef NDEBUG switch (Opcode) { case Instruction::Add: @@ -1523,8 +1517,8 @@ Constant* ConstantExpr::getSizeOf(const Type* Ty) { Constant *GEPIdx = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1); Constant *GEP = getGetElementPtr( Constant::getNullValue(PointerType::getUnqual(Ty)), &GEPIdx, 1); - return getCast(Instruction::PtrToInt, GEP, - Type::getInt64Ty(Ty->getContext())); + return getPtrToInt(GEP, + Type::getInt64Ty(Ty->getContext())); } Constant* ConstantExpr::getAlignOf(const Type* Ty) { @@ -1537,8 +1531,8 @@ Constant* ConstantExpr::getAlignOf(const Type* Ty) { Constant *One = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1); Constant *Indices[2] = { Zero, One }; Constant *GEP = getGetElementPtr(NullPtr, Indices, 2); - return getCast(Instruction::PtrToInt, GEP, - Type::getInt64Ty(Ty->getContext())); + return getPtrToInt(GEP, + Type::getInt64Ty(Ty->getContext())); } Constant* ConstantExpr::getOffsetOf(const StructType* STy, unsigned FieldNo) { @@ -1555,8 +1549,8 @@ Constant* ConstantExpr::getOffsetOf(const Type* Ty, Constant *FieldNo) { }; Constant *GEP = getGetElementPtr( Constant::getNullValue(PointerType::getUnqual(Ty)), GEPIdx, 2); - return getCast(Instruction::PtrToInt, GEP, - Type::getInt64Ty(Ty->getContext())); + return getPtrToInt(GEP, + Type::getInt64Ty(Ty->getContext())); } Constant *ConstantExpr::getCompare(unsigned short pred, @@ -1840,9 +1834,6 @@ Constant *ConstantExpr::getExtractValue(Constant *Agg, } Constant* ConstantExpr::getNeg(Constant* C) { - // API compatibility: Adjust integer opcodes to floating-point opcodes. - if (C->getType()->isFPOrFPVectorTy()) - return getFNeg(C); assert(C->getType()->isIntOrIntVectorTy() && "Cannot NEG a nonintegral value!"); return get(Instruction::Sub, diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp index 634407c..bbf1375 100644 --- a/lib/VMCore/Core.cpp +++ b/lib/VMCore/Core.cpp @@ -119,6 +119,11 @@ void LLVMDumpModule(LLVMModuleRef M) { unwrap(M)->dump(); } +/*--.. Operations on inline assembler ......................................--*/ +void LLVMSetModuleInlineAsm(LLVMModuleRef M, const char *Asm) { + unwrap(M)->setModuleInlineAsm(StringRef(Asm)); +} + /*===-- Operations on types -----------------------------------------------===*/ @@ -322,8 +327,7 @@ LLVMTypeRef LLVMUnionTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes, return wrap(UnionType::get(&Tys[0], Tys.size())); } -LLVMTypeRef LLVMUnionType(LLVMTypeRef *ElementTypes, - unsigned ElementCount, int Packed) { +LLVMTypeRef LLVMUnionType(LLVMTypeRef *ElementTypes, unsigned ElementCount) { return LLVMUnionTypeInContext(LLVMGetGlobalContext(), ElementTypes, ElementCount); } diff --git a/lib/VMCore/Dominators.cpp b/lib/VMCore/Dominators.cpp index 3441750..10a866f 100644 --- a/lib/VMCore/Dominators.cpp +++ b/lib/VMCore/Dominators.cpp @@ -30,9 +30,9 @@ using namespace llvm; // Always verify dominfo if expensive checking is enabled. #ifdef XDEBUG -bool VerifyDomInfo = true; +static bool VerifyDomInfo = true; #else -bool VerifyDomInfo = false; +static bool VerifyDomInfo = false; #endif static cl::opt<bool,true> VerifyDomInfoX("verify-dom-info", cl::location(VerifyDomInfo), @@ -119,7 +119,7 @@ void DominanceFrontier::verifyAnalysis() const { assert(!compare(OtherDF) && "Invalid DominanceFrontier info!"); } -// NewBB is split and now it has one successor. Update dominace frontier to +// NewBB is split and now it has one successor. Update dominance frontier to // reflect this change. void DominanceFrontier::splitBlock(BasicBlock *NewBB) { assert(NewBB->getTerminator()->getNumSuccessors() == 1 @@ -129,7 +129,7 @@ void DominanceFrontier::splitBlock(BasicBlock *NewBB) { SmallVector<BasicBlock*, 8> PredBlocks; for (pred_iterator PI = pred_begin(NewBB), PE = pred_end(NewBB); PI != PE; ++PI) - PredBlocks.push_back(*PI); + PredBlocks.push_back(*PI); if (PredBlocks.empty()) // If NewBB does not have any predecessors then it is a entry block. diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp index 4609a64..f64b220 100644 --- a/lib/VMCore/Instructions.cpp +++ b/lib/VMCore/Instructions.cpp @@ -30,80 +30,6 @@ using namespace llvm; // CallSite Class //===----------------------------------------------------------------------===// -#define CALLSITE_DELEGATE_GETTER(METHOD) \ - Instruction *II = getInstruction(); \ - return isCall() \ - ? cast<CallInst>(II)->METHOD \ - : cast<InvokeInst>(II)->METHOD - -#define CALLSITE_DELEGATE_SETTER(METHOD) \ - Instruction *II = getInstruction(); \ - if (isCall()) \ - cast<CallInst>(II)->METHOD; \ - else \ - cast<InvokeInst>(II)->METHOD - -CallingConv::ID CallSite::getCallingConv() const { - CALLSITE_DELEGATE_GETTER(getCallingConv()); -} -void CallSite::setCallingConv(CallingConv::ID CC) { - CALLSITE_DELEGATE_SETTER(setCallingConv(CC)); -} -const AttrListPtr &CallSite::getAttributes() const { - CALLSITE_DELEGATE_GETTER(getAttributes()); -} -void CallSite::setAttributes(const AttrListPtr &PAL) { - CALLSITE_DELEGATE_SETTER(setAttributes(PAL)); -} -bool CallSite::paramHasAttr(uint16_t i, Attributes attr) const { - CALLSITE_DELEGATE_GETTER(paramHasAttr(i, attr)); -} -uint16_t CallSite::getParamAlignment(uint16_t i) const { - CALLSITE_DELEGATE_GETTER(getParamAlignment(i)); -} - -/// @brief Return true if the call should not be inlined. -bool CallSite::isNoInline() const { - CALLSITE_DELEGATE_GETTER(isNoInline()); -} - -void CallSite::setIsNoInline(bool Value) { - CALLSITE_DELEGATE_GETTER(setIsNoInline(Value)); -} - - -bool CallSite::doesNotAccessMemory() const { - CALLSITE_DELEGATE_GETTER(doesNotAccessMemory()); -} -void CallSite::setDoesNotAccessMemory(bool doesNotAccessMemory) { - CALLSITE_DELEGATE_SETTER(setDoesNotAccessMemory(doesNotAccessMemory)); -} -bool CallSite::onlyReadsMemory() const { - CALLSITE_DELEGATE_GETTER(onlyReadsMemory()); -} -void CallSite::setOnlyReadsMemory(bool onlyReadsMemory) { - CALLSITE_DELEGATE_SETTER(setOnlyReadsMemory(onlyReadsMemory)); -} -bool CallSite::doesNotReturn() const { - CALLSITE_DELEGATE_GETTER(doesNotReturn()); -} -void CallSite::setDoesNotReturn(bool doesNotReturn) { - CALLSITE_DELEGATE_SETTER(setDoesNotReturn(doesNotReturn)); -} -bool CallSite::doesNotThrow() const { - CALLSITE_DELEGATE_GETTER(doesNotThrow()); -} -void CallSite::setDoesNotThrow(bool doesNotThrow) { - CALLSITE_DELEGATE_SETTER(setDoesNotThrow(doesNotThrow)); -} - -bool CallSite::hasArgument(const Value *Arg) const { - for (arg_iterator AI = this->arg_begin(), E = this->arg_end(); AI != E; ++AI) - if (AI->get() == Arg) - return true; - return false; -} - User::op_iterator CallSite::getCallee() const { Instruction *II(getInstruction()); return isCall() @@ -111,9 +37,6 @@ User::op_iterator CallSite::getCallee() const { : cast<InvokeInst>(II)->op_end() - 3; // Skip BB, BB, Function } -#undef CALLSITE_DELEGATE_GETTER -#undef CALLSITE_DELEGATE_SETTER - //===----------------------------------------------------------------------===// // TerminatorInst Class //===----------------------------------------------------------------------===// @@ -1639,43 +1562,29 @@ const Type* ExtractValueInst::getIndexedType(const Type *Agg, // BinaryOperator Class //===----------------------------------------------------------------------===// -/// AdjustIType - Map Add, Sub, and Mul to FAdd, FSub, and FMul when the -/// type is floating-point, to help provide compatibility with an older API. -/// -static BinaryOperator::BinaryOps AdjustIType(BinaryOperator::BinaryOps iType, - const Type *Ty) { - // API compatibility: Adjust integer opcodes to floating-point opcodes. - if (Ty->isFPOrFPVectorTy()) { - if (iType == BinaryOperator::Add) iType = BinaryOperator::FAdd; - else if (iType == BinaryOperator::Sub) iType = BinaryOperator::FSub; - else if (iType == BinaryOperator::Mul) iType = BinaryOperator::FMul; - } - return iType; -} - BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2, const Type *Ty, const Twine &Name, Instruction *InsertBefore) - : Instruction(Ty, AdjustIType(iType, Ty), + : Instruction(Ty, iType, OperandTraits<BinaryOperator>::op_begin(this), OperandTraits<BinaryOperator>::operands(this), InsertBefore) { Op<0>() = S1; Op<1>() = S2; - init(AdjustIType(iType, Ty)); + init(iType); setName(Name); } BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd) - : Instruction(Ty, AdjustIType(iType, Ty), + : Instruction(Ty, iType, OperandTraits<BinaryOperator>::op_begin(this), OperandTraits<BinaryOperator>::operands(this), InsertAtEnd) { Op<0>() = S1; Op<1>() = S2; - init(AdjustIType(iType, Ty)); + init(iType); setName(Name); } @@ -2060,7 +1969,7 @@ unsigned CastInst::isEliminableCastPair( // FPEXT < FloatPt n/a FloatPt n/a // PTRTOINT n/a Pointer n/a Integral Unsigned // INTTOPTR n/a Integral Unsigned Pointer n/a - // BITCONVERT = FirstClass n/a FirstClass n/a + // BITCAST = FirstClass n/a FirstClass n/a // // NOTE: some transforms are safe, but we consider them to be non-profitable. // For example, we could merge "fptoui double to i32" + "zext i32 to i64", diff --git a/lib/VMCore/LLVMContext.cpp b/lib/VMCore/LLVMContext.cpp index 3244f28..4d61363 100644 --- a/lib/VMCore/LLVMContext.cpp +++ b/lib/VMCore/LLVMContext.cpp @@ -17,6 +17,7 @@ #include "llvm/Constants.h" #include "llvm/Instruction.h" #include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/SourceMgr.h" #include "LLVMContextImpl.h" using namespace llvm; @@ -33,6 +34,10 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) { } LLVMContext::~LLVMContext() { delete pImpl; } +//===----------------------------------------------------------------------===// +// Recoverable Backend Errors +//===----------------------------------------------------------------------===// + void LLVMContext::setInlineAsmDiagnosticHandler(void *DiagHandler, void *DiagContext) { pImpl->InlineAsmDiagHandler = DiagHandler; @@ -51,6 +56,39 @@ void *LLVMContext::getInlineAsmDiagnosticContext() const { return pImpl->InlineAsmDiagContext; } +void LLVMContext::emitError(StringRef ErrorStr) { + emitError(0U, ErrorStr); +} + +void LLVMContext::emitError(const Instruction *I, StringRef ErrorStr) { + unsigned LocCookie = 0; + if (const MDNode *SrcLoc = I->getMetadata("srcloc")) { + if (SrcLoc->getNumOperands() != 0) + if (const ConstantInt *CI = dyn_cast<ConstantInt>(SrcLoc->getOperand(0))) + LocCookie = CI->getZExtValue(); + } + return emitError(LocCookie, ErrorStr); +} + +void LLVMContext::emitError(unsigned LocCookie, StringRef ErrorStr) { + // If there is no error handler installed, just print the error and exit. + if (pImpl->InlineAsmDiagHandler == 0) { + errs() << "error: " << ErrorStr << "\n"; + exit(1); + } + + // If we do have an error handler, we can report the error and keep going. + SMDiagnostic Diag("", "error: " + ErrorStr.str()); + + ((SourceMgr::DiagHandlerTy)(intptr_t)pImpl->InlineAsmDiagHandler) + (Diag, pImpl->InlineAsmDiagContext, LocCookie); + +} + +//===----------------------------------------------------------------------===// +// Metadata Kind Uniquing +//===----------------------------------------------------------------------===// + #ifndef NDEBUG /// isValidName - Return true if Name is a valid custom metadata handler name. static bool isValidName(StringRef MDName) { diff --git a/lib/VMCore/LLVMContextImpl.cpp b/lib/VMCore/LLVMContextImpl.cpp index e71157f..9e41a08 100644 --- a/lib/VMCore/LLVMContextImpl.cpp +++ b/lib/VMCore/LLVMContextImpl.cpp @@ -13,6 +13,7 @@ #include "LLVMContextImpl.h" #include <algorithm> +using namespace llvm; LLVMContextImpl::LLVMContextImpl(LLVMContext &C) : TheTrueVal(0), TheFalseVal(0), diff --git a/lib/VMCore/LeaksContext.h b/lib/VMCore/LeaksContext.h index abff090..b9e59d4 100644 --- a/lib/VMCore/LeaksContext.h +++ b/lib/VMCore/LeaksContext.h @@ -14,7 +14,8 @@ #include "llvm/Value.h" #include "llvm/ADT/SmallPtrSet.h" -using namespace llvm; + +namespace llvm { template <class T> struct PrinterTrait { @@ -87,3 +88,5 @@ private: const T* Cache; const char* Name; }; + +} diff --git a/lib/VMCore/Makefile b/lib/VMCore/Makefile index 4395ecf..03a4fc7 100644 --- a/lib/VMCore/Makefile +++ b/lib/VMCore/Makefile @@ -1,4 +1,4 @@ -##===- lib/VMCore/Makefile ------------------------------*- Makefile -*-===## +##===- lib/VMCore/Makefile ---------------------------------*- Makefile -*-===## # # The LLVM Compiler Infrastructure # diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp index 72de032..092fe00 100644 --- a/lib/VMCore/Metadata.cpp +++ b/lib/VMCore/Metadata.cpp @@ -178,6 +178,13 @@ void MDNode::destroy() { free(this); } +/// isFunctionLocalValue - Return true if this is a value that would require a +/// function-local MDNode. +static bool isFunctionLocalValue(Value *V) { + return isa<Instruction>(V) || isa<Argument>(V) || isa<BasicBlock>(V) || + (isa<MDNode>(V) && cast<MDNode>(V)->isFunctionLocal()); +} + MDNode *MDNode::getMDNode(LLVMContext &Context, Value *const *Vals, unsigned NumVals, FunctionLocalness FL, bool Insert) { @@ -188,8 +195,7 @@ MDNode *MDNode::getMDNode(LLVMContext &Context, Value *const *Vals, for (unsigned i = 0; i != NumVals; ++i) { Value *V = Vals[i]; if (!V) continue; - if (isa<Instruction>(V) || isa<Argument>(V) || isa<BasicBlock>(V) || - (isa<MDNode>(V) && cast<MDNode>(V)->isFunctionLocal())) { + if (isFunctionLocalValue(V)) { isFunctionLocal = true; break; } @@ -262,6 +268,13 @@ void MDNode::setIsNotUniqued() { void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) { Value *From = *Op; + // If is possible that someone did GV->RAUW(inst), replacing a global variable + // with an instruction or some other function-local object. If this is a + // non-function-local MDNode, it can't point to a function-local object. + // Handle this case by implicitly dropping the MDNode reference to null. + if (!isFunctionLocal() && To && isFunctionLocalValue(To)) + To = 0; + if (From == To) return; diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp index 6b941f3..a60877d 100644 --- a/lib/VMCore/Pass.cpp +++ b/lib/VMCore/Pass.cpp @@ -318,6 +318,8 @@ static PassRegistrar *getPassRegistrar() { return PassRegistrarObj; } +namespace { + // FIXME: We use ManagedCleanup to erase the pass registrar on shutdown. // Unfortunately, passes are registered with static ctors, and having // llvm_shutdown clear this map prevents successful ressurection after @@ -329,7 +331,9 @@ void cleanupPassRegistrar(void*) { PassRegistrarObj = 0; } } -ManagedCleanup<&cleanupPassRegistrar> registrarCleanup; +ManagedCleanup<&cleanupPassRegistrar> registrarCleanup ATTRIBUTE_USED; + +} // getPassInfo - Return the PassInfo data structure that corresponds to this // pass... diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp index 6ca35ac..b28fdeb 100644 --- a/lib/VMCore/PassManager.cpp +++ b/lib/VMCore/PassManager.cpp @@ -1293,9 +1293,8 @@ void FunctionPassManager::add(Pass *P) { bool FunctionPassManager::run(Function &F) { if (F.isMaterializable()) { std::string errstr; - if (F.Materialize(&errstr)) { - llvm_report_error("Error reading bitcode file: " + errstr); - } + if (F.Materialize(&errstr)) + report_fatal_error("Error reading bitcode file: " + Twine(errstr)); } return FPM->run(F); } diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp index 5f9c11f..845b523 100644 --- a/lib/VMCore/Type.cpp +++ b/lib/VMCore/Type.cpp @@ -380,6 +380,10 @@ const Type *Type::getPPC_FP128Ty(LLVMContext &C) { return &C.pImpl->PPC_FP128Ty; } +const IntegerType *Type::getIntNTy(LLVMContext &C, unsigned N) { + return IntegerType::get(C, N); +} + const IntegerType *Type::getInt1Ty(LLVMContext &C) { return &C.pImpl->Int1Ty; } @@ -420,6 +424,10 @@ const PointerType *Type::getPPC_FP128PtrTy(LLVMContext &C, unsigned AS) { return getPPC_FP128Ty(C)->getPointerTo(AS); } +const PointerType *Type::getIntNPtrTy(LLVMContext &C, unsigned N, unsigned AS) { + return getIntNTy(C, N)->getPointerTo(AS); +} + const PointerType *Type::getInt1PtrTy(LLVMContext &C, unsigned AS) { return getInt1Ty(C)->getPointerTo(AS); } diff --git a/lib/VMCore/TypeSymbolTable.cpp b/lib/VMCore/TypeSymbolTable.cpp index b4daf0f..d68a44b 100644 --- a/lib/VMCore/TypeSymbolTable.cpp +++ b/lib/VMCore/TypeSymbolTable.cpp @@ -126,13 +126,15 @@ void TypeSymbolTable::refineAbstractType(const DerivedType *OldType, // faster to remove them all in one pass. // for (iterator I = begin(), E = end(); I != E; ++I) { - if (I->second == (Type*)OldType) { // FIXME when Types aren't const. + // FIXME when Types aren't const. + if (I->second == const_cast<DerivedType *>(OldType)) { #if DEBUG_ABSTYPE dbgs() << "Removing type " << OldType->getDescription() << "\n"; #endif OldType->removeAbstractTypeUser(this); - I->second = (Type*)NewType; // TODO FIXME when types aren't const + // TODO FIXME when types aren't const + I->second = const_cast<Type *>(NewType); if (NewType->isAbstract()) { #if DEBUG_ABSTYPE dbgs() << "Added type " << NewType->getDescription() << "\n"; diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp index c18168d..6ad4272 100644 --- a/lib/VMCore/Verifier.cpp +++ b/lib/VMCore/Verifier.cpp @@ -93,7 +93,7 @@ namespace { // Anonymous namespace for class } if (Broken) - llvm_report_error("Broken module, no Basic Block terminator!"); + report_fatal_error("Broken module, no Basic Block terminator!"); return false; } @@ -176,6 +176,10 @@ namespace { /// Types - keep track of the types that have been checked already. TypeSet Types; + /// MDNodes - keep track of the metadata nodes that have been checked + /// already. + SmallPtrSet<MDNode *, 32> MDNodes; + Verifier() : FunctionPass(&ID), Broken(false), RealPass(true), action(AbortProcessAction), @@ -244,6 +248,10 @@ namespace { I != E; ++I) visitGlobalAlias(*I); + for (Module::named_metadata_iterator I = M.named_metadata_begin(), + E = M.named_metadata_end(); I != E; ++I) + visitNamedMDNode(*I); + // If the module is broken, abort at this time. return abortIfBroken(); } @@ -284,6 +292,8 @@ namespace { void visitGlobalValue(GlobalValue &GV); void visitGlobalVariable(GlobalVariable &GV); void visitGlobalAlias(GlobalAlias &GA); + void visitNamedMDNode(NamedMDNode &NMD); + void visitMDNode(MDNode &MD, Function *F); void visitFunction(Function &F); void visitBasicBlock(BasicBlock &BB); using InstVisitor<Verifier>::visit; @@ -333,8 +343,6 @@ namespace { int VT, unsigned ArgNo, std::string &Suffix); void VerifyIntrinsicPrototype(Intrinsic::ID ID, Function *F, unsigned RetNum, unsigned ParamNum, ...); - void VerifyFunctionLocalMetadata(MDNode *N, Function *F, - SmallPtrSet<MDNode *, 32> &Visited); void VerifyParameterAttrs(Attributes Attrs, const Type *Ty, bool isReturnValue, const Value *V); void VerifyFunctionAttrs(const FunctionType *FT, const AttrListPtr &Attrs, @@ -489,6 +497,54 @@ void Verifier::visitGlobalAlias(GlobalAlias &GA) { visitGlobalValue(GA); } +void Verifier::visitNamedMDNode(NamedMDNode &NMD) { + for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) { + MDNode *MD = NMD.getOperand(i); + if (!MD) + continue; + + Assert2(!MD->isFunctionLocal(), + "Named metadata operand cannot be function local!", &NMD, MD); + visitMDNode(*MD, 0); + } +} + +void Verifier::visitMDNode(MDNode &MD, Function *F) { + // Only visit each node once. Metadata can be mutually recursive, so this + // avoids infinite recursion here, as well as being an optimization. + if (!MDNodes.insert(&MD)) + return; + + for (unsigned i = 0, e = MD.getNumOperands(); i != e; ++i) { + Value *Op = MD.getOperand(i); + if (!Op) + continue; + if (isa<Constant>(Op) || isa<MDString>(Op) || isa<NamedMDNode>(Op)) + continue; + if (MDNode *N = dyn_cast<MDNode>(Op)) { + Assert2(MD.isFunctionLocal() || !N->isFunctionLocal(), + "Global metadata operand cannot be function local!", &MD, N); + visitMDNode(*N, F); + continue; + } + Assert2(MD.isFunctionLocal(), "Invalid operand for global metadata!", &MD, Op); + + // If this was an instruction, bb, or argument, verify that it is in the + // function that we expect. + Function *ActualF = 0; + if (Instruction *I = dyn_cast<Instruction>(Op)) + ActualF = I->getParent()->getParent(); + else if (BasicBlock *BB = dyn_cast<BasicBlock>(Op)) + ActualF = BB->getParent(); + else if (Argument *A = dyn_cast<Argument>(Op)) + ActualF = A->getParent(); + assert(ActualF && "Unimplemented function local metadata case!"); + + Assert2(ActualF == F, "function-local metadata used in wrong function", + &MD, Op); + } +} + void Verifier::verifyTypeSymbolTable(TypeSymbolTable &ST) { for (TypeSymbolTable::iterator I = ST.begin(), E = ST.end(); I != E; ++I) VerifyType(I->second); @@ -1553,38 +1609,6 @@ void Verifier::VerifyType(const Type *Ty) { } } -/// VerifyFunctionLocalMetadata - Verify that the specified MDNode is local to -/// specified Function. -void Verifier::VerifyFunctionLocalMetadata(MDNode *N, Function *F, - SmallPtrSet<MDNode *, 32> &Visited) { - assert(N->isFunctionLocal() && "Should only be called on function-local MD"); - - // Only visit each node once. - if (!Visited.insert(N)) - return; - - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - Value *V = N->getOperand(i); - if (!V) continue; - - Function *ActualF = 0; - if (Instruction *I = dyn_cast<Instruction>(V)) - ActualF = I->getParent()->getParent(); - else if (BasicBlock *BB = dyn_cast<BasicBlock>(V)) - ActualF = BB->getParent(); - else if (Argument *A = dyn_cast<Argument>(V)) - ActualF = A->getParent(); - else if (MDNode *MD = dyn_cast<MDNode>(V)) - if (MD->isFunctionLocal()) - VerifyFunctionLocalMetadata(MD, F, Visited); - - // If this was an instruction, bb, or argument, verify that it is in the - // function that we expect. - Assert1(ActualF == 0 || ActualF == F, - "function-local metadata used in wrong function", N); - } -} - // Flags used by TableGen to mark intrinsic parameters with the // LLVMExtendedElementVectorType and LLVMTruncatedElementVectorType classes. static const unsigned ExtendedElementVectorType = 0x40000000; @@ -1604,11 +1628,8 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) { // If the intrinsic takes MDNode arguments, verify that they are either global // or are local to *this* function. for (unsigned i = 1, e = CI.getNumOperands(); i != e; ++i) - if (MDNode *MD = dyn_cast<MDNode>(CI.getOperand(i))) { - if (!MD->isFunctionLocal()) continue; - SmallPtrSet<MDNode *, 32> Visited; - VerifyFunctionLocalMetadata(MD, CI.getParent()->getParent(), Visited); - } + if (MDNode *MD = dyn_cast<MDNode>(CI.getOperand(i))) + visitMDNode(*MD, CI.getParent()->getParent()); switch (ID) { default: @@ -1933,7 +1954,9 @@ FunctionPass *llvm::createVerifierPass(VerifierFailureAction action) { } -// verifyFunction - Create +/// verifyFunction - Check a function for errors, printing messages on stderr. +/// Return true if the function is corrupt. +/// bool llvm::verifyFunction(const Function &f, VerifierFailureAction action) { Function &F = const_cast<Function&>(f); assert(!F.isDeclaration() && "Cannot verify external functions"); |